American Seymour Genealogy

This is an old revision of the document!

Ged2Wiki

Ged2Wiki is a project to convert a GEDCOM into DokuWiki pages so that they can be added to this (or any other DokuWiki-based) site.

It is my intention to write this program in the Perl programming language. And while I had initially thought that the GEDCOM module from CPAN would be a good starting point, I've since come to the decision that starting from the ground up will be easier (for me).

As of 18 Nov 2007, I have written only the skeleton parser. This version will read a GEDCOM file and report the “XREF” labels for each Individual and Family found. However, as of now, it does nothing with the data.

Here's what it looks like so far:

#
# Ged2Wiki.pl - Perl program to convert a GEDCOM file to plain text Wiki pages
#
# The input format is based on "The GEDCOM Standard" Release 5.5, as published
# by the Family History Department of The Church of Jesus Christ of Latter-day
# Saints, 2 January 1996.
#
# The output format is a series of plain-text files based on DokuWiki
#
# For questions, comments, bug reports, etc., please visit this site:
#    http://s560.com/ged2wiki
#

my $currline;
my $nextline;
my $currlevel;
my $nextlevel;

# Prime the parser by reading the first line
fetch_line();

# "Read" the first line
read_line();

# The first line must be "0 HEAD"
if ($currline !~ /^HEAD/) {
   print "Input does not appear to be a GEDCOM file\n";
   exit;
}

# First line is good.  Parse the rest of the header
parse_header();

# If it's a Submission record, parse it here
if ($currline =~ /^\@(.*?)\@\s+SUBN/) {
   parse_subn($1);
}

# Loop here, reading the "Record" entries until we find the Trailer
while (1) {

   # First, check for the Trailer record
   if ($currline =~ /^TRLR/) {
      last; # We're done with the input - break out of this loop
   }

   # Each Record must be in this format
   if ($currline !~ /^\@(.*?)\@\s+(\w+)/) {
      print "Unexpected Record at line $.:\n>> $currline\n";
      exit;
   }
   my $xref = $1;
   my $tag = $2;

   # If it's a "FAM" record, parse it here
   if ($tag eq "FAM") {
      parse_family($xref);
      next;
   }

   # If it's a "INDI" record, parse it here
   if ($tag eq "INDI") {
      parse_individual($xref);
      next;
   }

   # If it's a "NOTE" record, parse it here
   if ($tag eq "NOTE") {
      parse_note($xref);
      next;
   }

   # If it's an unknown record, it's an error
   if ($tag !~ /^(OBJE|REPO|SOUR|SUBM)$/) {
      print "Unexpected Record at line $.:\n>> $currline\n";
      exit;
   }

   # Ignore this type of record
   parse_ignore();
}

print "GEDCOM file parsed successfully\n";

# Generate all the output here ...

exit;

# Read one line from the input file (and strip unwanted whitespace)
sub fetch_line {
   $nextline = <>;
   $nextline =~ s/^\s+//;        # Strip leading whitespace
   $nextline =~ s/\s+[\r\n]*$//; # Strip trailing whitespace (and newline)
   if ($nextline !~ /^(\d+)\s+/) {
      print "Invalid GEDCOM data at line $.:\n>> $nextline\n";
      exit;
   }
   $nextlevel = $1;
   $nextline = "$'";
}

# One input line -> $currline (concatenating as needed)
sub read_line {

   $currline = $nextline;
   $currlevel = $nextlevel;

   # Special case: Do not read beyond the Trailer
   if ($currline =~ /^TRLR/) {
      $nextline = "";
      $nextlevel = "";
      return;
   }

   fetch_line();

   while ($nextlevel == $currlevel + 1 && $nextline =~ /^CON([CT])\s+/) {
      if ($1 eq "C") {
         $currline .= "$'";
      } else {
         $currline .= "\n$'";
      }
      fetch_line();
   }
}

# Parse the header
sub parse_header {
   print ">> Header\n";
   parse_ignore();   # For now, simply ignore it
}

# Parse the submission record
sub parse_subn {
   print ">> Submission\n";
   parse_ignore();   # For now, simply ignore it
}

# Parse a Family record
sub parse_family {
   print ">> Family $_[0]\n";
   parse_ignore();   # For now, simply ignore it
}

# Parse an Individual record
sub parse_individual {
   print ">> Individual $_[0]\n";
   parse_ignore();   # For now, simply ignore it
}

# Parse a Note record
sub parse_note {
   print ">> Note\n";
   parse_ignore();   # For now, simply ignore it
}

# Ignore this record
sub parse_ignore {
   my $thislevel = $currlevel;
   while (1) {
      read_line();
      if ($currlevel <= $thislevel) {
         last;
      }
   }
}