[Date Prev] | [Thread Prev] | [Thread Next] | [Date Next] -- [Date Index] | [Thread Index] | [Elist Home]
Subject: Socat to XML Catalog program
This is a Perl program to convert a 9401-compliant socat to an XML Catalog. It handles the entire socat syntax, including unknown keywords (which are mapped to a new namespace, urn:oasis:names:tc:entity:xmlns:unknown). There is a slight problem with the placement of comments, since comments cannot appear inside tags. I have tested this using the four socats distributed with SP. Please hammer on it, and send me any socat that fails to translate or translates incorrectly. Thanks. -- There is / one art || John Cowan <jcowan@reutershealth.com> no more / no less || http://www.reutershealth.com to do / all things || http://www.ccil.org/~cowan with art- / lessness \\ -- Piet Hein
#!/usr/bin/perl -w # Converts OASIS 9401 socats to XML catalogs # No copyright, no warranty, use as you will # John Cowan asserts the moral right to be known as the author of this software use strict; $/ = undef; my $lines = 1; # current line number my $token; # current token type my $value; # current token value my @commentq; # queue of comments my $kw; # current keyword my @args; # arguments to current keyword my $groups; # number of groups entered $_ = <>; init(); recover: scan(); while ($token ne "EOF") { parse(); emit(); dequeue(); } fin(); # Scan the next token into $token, putting its value in $value sub scan { rescan: if (($value) = /^([ \t\r\n]+)/) { # whitespace $_ = $'; $lines++ while $value =~ s/\n//; } if (/^--/) { # comment ($value) = /^--([^-]+(?:-[^-]+)*)--/; $_ = $'; push @commentq, $value; $lines++ while $value =~ s/\n//; goto rescan; } if ($_ eq "") { # end of input $token = "EOF"; return; } if (($value) = /^([^"' \t\r\n]+)/) { # non-string $_ = $'; $token = ($value =~ /[\\\/.<>]/) ? "NONSYM" : "SYM"; my $uct = uc($value); if ($uct eq "OVERRIDE" || $uct eq "SYSTEM" || $uct eq "DELEGATE" || $uct eq "PUBLIC" || $uct eq "DTDDECL" || $uct eq "ENTITY" || $uct eq "DOCTYPE" || $uct eq "LINKTYPE" || $uct eq "NOTATION" || $uct eq "SGMLDECL" || $uct eq "DOCUMENT" || $uct eq "BASE" || $uct eq "CATALOG") { $value = $uct; $token = "KW"; } return; } if (($value) = /^"([^"]*)"/) { # double-quoted string $_ = $'; $token = "LIT"; return; } if (($value) = /^'([^']*)'/) { # single-quoted string $_ = $'; $token = "LIT"; return; } die "can't happen"; } # Syntax error in input sub yammer { my ($msg) = @_; warn "$msg at line $lines\n"; goto recover; } # Parse tokens into xcatalog entries sub parse { $kw = $value; @args = (); if ($token eq "SYM") { # unknown keyword while (1) { scan(); last if $token eq "KW" || $token eq "EOF"; last if $token eq "SYM" && @args != 0; push @args, $value; } return; } yammer "$value not a valid keyword" unless $token eq "KW"; scan(); if ($kw eq "PUBLIC" || $kw eq "DTDDECL") { yammer "$value not a public id" unless $token eq "LIT"; push @args, $value; scan(); push @args, $value; scan(); return; } if ($kw eq "ENTITY" || $kw eq "DOCTYPE" || $kw eq "LINKTYPE" || $kw eq "NOTATION") { push @args, $value; scan(); push @args, $value; scan(); return; } if ($kw eq "SGMLDECL" || $kw eq "DOCUMENT" || $kw eq "BASE" || $kw eq "CATALOG") { push @args, $value; scan(); return; } if ($kw eq "SYSTEM") { yammer "$value not a system id" unless $token eq "LIT"; push @args, $value; scan(); push @args, $value; scan(); return; } if ($kw eq "DELEGATE") { yammer "$value not a partial public id" unless $token eq "LIT"; push @args, $value; scan(); push @args, $value; scan(); return; } if ($kw eq "OVERRIDE") { $value = uc($value); yammer "OVERRIDE requires YES or NO" unless $value eq "YES" || $value eq "NO"; push @args, $value; scan(); return; } die "can't happen"; } # Emit the XML catalog entry sub emit { my $arg; foreach $arg (@args) { $arg = ($arg =~ /"/) ? "'$arg'" : "\"$arg\""; } if ($kw eq "SYSTEM") { print "<system systemId=$args[0] uri=$args[1]/>\n"; } elsif ($kw eq "DELEGATE") { print "<delegatePublic publicIdStartString=$args[0] "; print "catalog=$args[1]/>\n"; } elsif ($kw eq "OVERRIDE") { my $prefer = ($args[0] =~ /YES/) ? "\"public\"" : "\"system\""; print "<group prefer=$prefer>\n"; $groups++; } elsif ($kw eq "PUBLIC") { print "<public publicId=$args[0] uri=$args[1]/>\n"; } elsif ($kw eq "DTDDECL") { print "<soc:dtddecl publicId=$args[0] uri=$args[1]/>\n"; } elsif ($kw eq "ENTITY") { print "<soc:entity name=$args[0] uri=$args[1]/>\n"; } elsif ($kw eq "DOCTYPE") { print "<soc:doctype name=$args[0] uri=$args[1]/>\n"; } elsif ($kw eq "LINKTYPE") { print "<soc:linktype name=$args[0] uri=$args[1]/>\n"; } elsif ($kw eq "NOTATION") { print "<soc:notation name=$args[0] uri=$args[1]/>\n"; } elsif ($kw eq "SGMLDECL") { print "<soc:sgmldecl uri=$args[0]/>\n"; } elsif ($kw eq "DOCUMENT") { print "<soc:document uri=$args[0]/>\n"; } elsif ($kw eq "BASE") { print "<group xml:base=$args[0]>\n"; $groups++; } elsif ($kw eq "CATALOG") { print "<nextCatalog catalog=$args[0]/>\n"; } else { print "<unk:$kw "; my $i; for ($i = 0; $i <= $#args; $i++) { print " arg$i=$args[$i]\n"; } print " />\n"; } } # dequeue comments sub dequeue { my $comment; foreach $comment (@commentq) { print "<!--$comment-->\n"; } @commentq = (); } # start xml catalog sub init { print "<catalog xmlns=\"urn:oasis:names:tc:entity:xmlns:xml:catalog\"\n"; print " xmlns:soc=\"urn:oasis:names:tc:entity:xmlns:tr9401:catalog\"\n"; print " xmlns:unk=\"urn:oasis:names:tc:entity:xmlns:unknown\"\n"; print " >\n"; } # wrap up xml catalog sub fin { while ($groups--) { print "</group>\n"; } print "</catalog>\n"; }
[Date Prev] | [Thread Prev] | [Thread Next] | [Date Next] -- [Date Index] | [Thread Index] | [Elist Home]
Powered by eList eXpress LLC