[Date Prev] | [Thread Prev] | [Thread Next] | [Date Next] -- [Date Index] | [Thread Index] | [Elist Home]
Subject: Socat to XML Catalog program
This is a Perl program to convert a 9401-compliant socat to an XML Catalog. It handles the entire socat syntax, including unknown keywords (which are mapped to a new namespace, urn:oasis:names:tc:entity:xmlns:unknown). There is a slight problem with the placement of comments, since comments cannot appear inside tags. I have tested this using the four socats distributed with SP. Please hammer on it, and send me any socat that fails to translate or translates incorrectly. Thanks. -- There is / one art || John Cowan <jcowan@reutershealth.com> no more / no less || http://www.reutershealth.com to do / all things || http://www.ccil.org/~cowan with art- / lessness \\ -- Piet Hein
#!/usr/bin/perl -w
# Converts OASIS 9401 socats to XML catalogs
# No copyright, no warranty, use as you will
# John Cowan asserts the moral right to be known as the author of this software
use strict;
$/ = undef;
my $lines = 1; # current line number
my $token; # current token type
my $value; # current token value
my @commentq; # queue of comments
my $kw; # current keyword
my @args; # arguments to current keyword
my $groups; # number of groups entered
$_ = <>;
init();
recover:
scan();
while ($token ne "EOF") {
parse();
emit();
dequeue();
}
fin();
# Scan the next token into $token, putting its value in $value
sub scan {
rescan:
if (($value) = /^([ \t\r\n]+)/) { # whitespace
$_ = $';
$lines++ while $value =~ s/\n//;
}
if (/^--/) { # comment
($value) = /^--([^-]+(?:-[^-]+)*)--/;
$_ = $';
push @commentq, $value;
$lines++ while $value =~ s/\n//;
goto rescan;
}
if ($_ eq "") { # end of input
$token = "EOF";
return;
}
if (($value) = /^([^"' \t\r\n]+)/) { # non-string
$_ = $';
$token = ($value =~ /[\\\/.<>]/) ? "NONSYM" : "SYM";
my $uct = uc($value);
if ($uct eq "OVERRIDE" || $uct eq "SYSTEM"
|| $uct eq "DELEGATE" || $uct eq "PUBLIC"
|| $uct eq "DTDDECL" || $uct eq "ENTITY"
|| $uct eq "DOCTYPE" || $uct eq "LINKTYPE"
|| $uct eq "NOTATION" || $uct eq "SGMLDECL"
|| $uct eq "DOCUMENT" || $uct eq "BASE"
|| $uct eq "CATALOG") {
$value = $uct;
$token = "KW";
}
return;
}
if (($value) = /^"([^"]*)"/) { # double-quoted string
$_ = $';
$token = "LIT";
return;
}
if (($value) = /^'([^']*)'/) { # single-quoted string
$_ = $';
$token = "LIT";
return;
}
die "can't happen";
}
# Syntax error in input
sub yammer {
my ($msg) = @_;
warn "$msg at line $lines\n";
goto recover;
}
# Parse tokens into xcatalog entries
sub parse {
$kw = $value;
@args = ();
if ($token eq "SYM") { # unknown keyword
while (1) {
scan();
last if $token eq "KW" || $token eq "EOF";
last if $token eq "SYM" && @args != 0;
push @args, $value;
}
return;
}
yammer "$value not a valid keyword" unless $token eq "KW";
scan();
if ($kw eq "PUBLIC" || $kw eq "DTDDECL") {
yammer "$value not a public id" unless $token eq "LIT";
push @args, $value;
scan();
push @args, $value;
scan();
return;
}
if ($kw eq "ENTITY" || $kw eq "DOCTYPE" || $kw eq "LINKTYPE"
|| $kw eq "NOTATION") {
push @args, $value;
scan();
push @args, $value;
scan();
return;
}
if ($kw eq "SGMLDECL" || $kw eq "DOCUMENT" || $kw eq "BASE"
|| $kw eq "CATALOG") {
push @args, $value;
scan();
return;
}
if ($kw eq "SYSTEM") {
yammer "$value not a system id" unless $token eq "LIT";
push @args, $value;
scan();
push @args, $value;
scan();
return;
}
if ($kw eq "DELEGATE") {
yammer "$value not a partial public id"
unless $token eq "LIT";
push @args, $value;
scan();
push @args, $value;
scan();
return;
}
if ($kw eq "OVERRIDE") {
$value = uc($value);
yammer "OVERRIDE requires YES or NO"
unless $value eq "YES" || $value eq "NO";
push @args, $value;
scan();
return;
}
die "can't happen";
}
# Emit the XML catalog entry
sub emit {
my $arg;
foreach $arg (@args) {
$arg = ($arg =~ /"/) ? "'$arg'" : "\"$arg\"";
}
if ($kw eq "SYSTEM") {
print "<system systemId=$args[0] uri=$args[1]/>\n";
}
elsif ($kw eq "DELEGATE") {
print "<delegatePublic publicIdStartString=$args[0] ";
print "catalog=$args[1]/>\n";
}
elsif ($kw eq "OVERRIDE") {
my $prefer = ($args[0] =~ /YES/) ? "\"public\""
: "\"system\"";
print "<group prefer=$prefer>\n";
$groups++;
}
elsif ($kw eq "PUBLIC") {
print "<public publicId=$args[0] uri=$args[1]/>\n";
}
elsif ($kw eq "DTDDECL") {
print "<soc:dtddecl publicId=$args[0] uri=$args[1]/>\n";
}
elsif ($kw eq "ENTITY") {
print "<soc:entity name=$args[0] uri=$args[1]/>\n";
}
elsif ($kw eq "DOCTYPE") {
print "<soc:doctype name=$args[0] uri=$args[1]/>\n";
}
elsif ($kw eq "LINKTYPE") {
print "<soc:linktype name=$args[0] uri=$args[1]/>\n";
}
elsif ($kw eq "NOTATION") {
print "<soc:notation name=$args[0] uri=$args[1]/>\n";
}
elsif ($kw eq "SGMLDECL") {
print "<soc:sgmldecl uri=$args[0]/>\n";
}
elsif ($kw eq "DOCUMENT") {
print "<soc:document uri=$args[0]/>\n";
}
elsif ($kw eq "BASE") {
print "<group xml:base=$args[0]>\n";
$groups++;
}
elsif ($kw eq "CATALOG") {
print "<nextCatalog catalog=$args[0]/>\n";
}
else {
print "<unk:$kw ";
my $i;
for ($i = 0; $i <= $#args; $i++) {
print " arg$i=$args[$i]\n";
}
print " />\n";
}
}
# dequeue comments
sub dequeue {
my $comment;
foreach $comment (@commentq) {
print "<!--$comment-->\n";
}
@commentq = ();
}
# start xml catalog
sub init {
print "<catalog xmlns=\"urn:oasis:names:tc:entity:xmlns:xml:catalog\"\n";
print " xmlns:soc=\"urn:oasis:names:tc:entity:xmlns:tr9401:catalog\"\n";
print " xmlns:unk=\"urn:oasis:names:tc:entity:xmlns:unknown\"\n";
print " >\n";
}
# wrap up xml catalog
sub fin {
while ($groups--) {
print "</group>\n";
}
print "</catalog>\n";
}
[Date Prev] | [Thread Prev] | [Thread Next] | [Date Next] -- [Date Index] | [Thread Index] | [Elist Home]
Powered by eList eXpress LLC