OASIS Mailing List ArchivesView the OASIS mailing list archive below
or browse/search using MarkMail.

 


Help: OASIS Mailing Lists Help | MarkMail Help

docbook-apps message

[Date Prev] | [Thread Prev] | [Thread Next] | [Date Next] -- [Date Index] | [Thread Index] | [Elist Home]


Subject: DOCBOOK-APPS: Re: conditionalization of XML


-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

/ "Eric S. Raymond" <esr@thyrsus.com> was heard to say:
| Norman Walsh <ndw@nwalsh.com>:
|> That's valid when the PIs are left in, but results in a non-XML
|> document when profiled. My model forces the input to be well-formed
|> XML and guarantees that the result will be well-formed.
|
| Good answer.  Same one I anticipated several messages up-thgread :-)
|
| So, how would *you* implement this "specialized vocabulary"?  XSLT
| doesn't have the marbles for it.

I think the right answer is a specialized XML parser that performs a
variant of the identity transformation. In fact, it does exactly what
Jirka's profiling code does except that it has a funky serializer that
outputs the <!DOCTYPE declaration and the internal subset (or ideally
only the necessary parts of it).

In fact, that's just what my code does, by way of an egregious hack.

#!/usr/bin/perl -w -- # -*- Perl -*-

use strict;
use XML::Parser::PerlSAX;

my $host = $ENV{'HTTP_HOST'} || "";
my $uri = $ENV{'REQUEST_URI'} || "";

if ($host ne 'localhost') {
    &forbidden();
}

my %profile = ();
my $xmlfile = "";
my $options = "";

if ($uri =~ /^.*?profile(\/.*?)\?(.*)$/) {
    $xmlfile = $1;
    $options = $2;
} elsif ($uri =~ /^.*?profile(\/.*)$/) {
    $xmlfile = $1;
} else {
    &forbidden();
}

my @args = split(/&/, $options);
foreach $_ (@args) {
    tr/+/ /;
    s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
}

foreach my $cond (@args) {
    next if $cond !~ /^(\S+)=(\S+)/;
    if (exists $profile{$1}) {
	$profile{$1} .= "|$2";
    } else {
	$profile{$1} = $2;
    }
}

print "Content-type: application/xml\n\n";

my $xmlDecl = "<?xml version='1.0'?>";
my $internalSubset = "";

if (open (F, $xmlfile)) {
    # THIS IS A HACK
    read (F, $_, 16384);

    if (/^\s*(\<\?xml.*?\?>)/is) {
	$xmlDecl = $1;
    }

    if (/<!DOCTYPE\s/is) {
	$_ = $& . $'; # '
	if (/\]\>/is) {
	    $_ = $` . $&;
	}
	$internalSubset = $_;
    }
    close (F);
}

my $shandler = new SerializeHandler($xmlDecl, $internalSubset);
my $handler = new ProfileHandler($shandler, %profile);
my $parser = new XML::Parser::PerlSAX (Handler => $handler);

$parser->parse (Source => { 'SystemId' => $xmlfile });

close (STDOUT);
exit;

sub forbidden {
    # FIXME: make this work on my server
#    print "HTTP/1.1 403 Forbidden\n";
#    print "Connection: close\n";
    print "Content-Type: text/html; charset=iso-8859-1\n\n";

    print "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">\n";
    print "<HTML><HEAD>\n";
    print "<TITLE>403 Forbidden</TITLE>\n";
    print "</HEAD><BODY>\n";
    print "<H1>Forbidden</H1>\n";
    print "You don't have permission to access that resource\n";
    print "on this server.<P>\n";
    print "</BODY></HTML>\n";
    exit 0;
}

package ProfileHandler;

sub new {
    my $type = shift;
    my $chain = shift;
    my %profile = @_;
    my @stack = ();

    my $self = { 'chain' => $chain,
		 'profile' => \%profile,
		 'stack' => \@stack };

    return bless $self, $type;
}

sub start_document {
    my $self = shift;

    $self->{'chain'}->start_document() if $self->{'chain'};
    $self->include();
}

sub start_element {
    my $self = shift;
    my $element = shift;

#    print $element->{'Name'}, ": ", $self->context(), "\n";

    if ($self->context()) {
	my %profile = %{$self->{'profile'}};
	my %attrs = %{$element->{'Attributes'}};
	my $match = 1;

	foreach my $attr (keys %attrs) {
	    if (exists $profile{$attr}) {
		my $value = $attrs{$attr};
		my $prof = $profile{$attr};
		$match = $match && $self->profileMatch($value,$prof);
	    }
	}

	if ($match) {
	    $self->include();
	} else {
	    $self->ignore();
	}
    } else {
	$self->ignore();
    }

    if ($self->context()) {
	$self->{'chain'}->start_element($element) if $self->{'chain'};
    }
}

sub end_element {
    my $self = shift;

    if ($self->context()) {
	$self->{'chain'}->end_element(@_) if $self->{'chain'};
    }
    $self->pop();
}

sub characters {
    my $self = shift;

    if ($self->context()) {
	$self->{'chain'}->characters(@_) if $self->{'chain'};
    }
}

sub processing_instruction {
    my $self = shift;
    $self->{'chain'}->processing_instruction(@_) if $self->{'chain'};
}

sub comment {
    my $self = shift;

    if ($self->context()) {
	$self->{'chain'}->comment(@_) if $self->{'chain'};
    }
}

sub ignore {
    my $self = shift;

#    print "IGNORE\n";
    push (@{$self->{'stack'}}, 0);
}

sub include {
    my $self = shift;

#    print "INCLUDE\n";
    push (@{$self->{'stack'}}, 1);
}

sub pop {
    my $self = shift;

#    print "POP\n";
    pop (@{$self->{'stack'}});
}

sub context {
    my $self = shift;

    my @stack = @{$self->{'stack'}};
#    print "CONTEXT: ", $stack[$#stack], "\n";
    return $stack[$#stack];
}

sub profileMatch {
    my $self = shift;
    my $values= shift;
    my $profiles = shift;
    my %profs = ();

    my @profiles = split(/\|/, $profiles);
    foreach my $profile (@profiles) {
	$profs{$profile} = 1;
    }

    foreach my $value (split(/\|/, $values)) {
	return 1 if $profs{$value};
    }

    return 0;
}

package SerializeHandler;

sub new {
    my $type = shift;
    my $xmlDecl = shift;
    my $internalSubset = shift;

    return bless { 'xmldecl' => $xmlDecl, 'subset' => $internalSubset}, $type;
}

sub start_document {
    my $self = shift;

    print $self->{'xmldecl'};
    print "\n";
    print $self->{'subset'};
    print "\n" if $self->{'subset'} ne '';
}

sub start_element {
    my $self = shift;
    my $element = shift;

    print "<", $element->{'Name'};

    my %attr = %{$element->{'Attributes'}};
    if (%attr) {
	foreach my $name (keys %attr) {
	    my $value = $attr{$name};
	    my $quot = '"';

	    if ($value =~ /\"/) {
		$quot = "'";
		$value =~ s/\'/\&apos;/sg;
	    }

	    print " $name=$quot$value$quot";
	}
    }

    print ">";
}

sub end_element {
    my $self = shift;
    my $element = shift;

    print "</", $element->{'Name'}, ">";
}

sub characters {
    my $self = shift;
    my $data = shift;

    print $data->{'Data'};
}

sub processing_instruction {
    my $self = shift;
    my $pi = shift;

    print "<?", $pi->{'Target'}, " ", $pi->{'Data'}, "?>";
}

sub comment {
    my $self = shift;
    my $comment = shift;

    print "<--", $comment->{'Data'}, "-->";
}

                                        Be seeing you,
                                          norm

- -- 
Norman Walsh <ndw@nwalsh.com>      | The art of living is more like
http://www.oasis-open.org/docbook/ | wrestling than dancing.--Marcus
Chair, DocBook Technical Committee | Aurelius
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.0.6 (GNU/Linux)
Comment: Processed by Mailcrypt 3.5.7 <http://mailcrypt.sourceforge.net/>

iD8DBQE9pz4kOyltUcwYWjsRAh3ZAJ9qfOS5Lr3C9XFmeYKdmEgSie1XJgCgr5v9
ucfZl6KWRHnBPSPsDTti56A=
=SBtk
-----END PGP SIGNATURE-----


[Date Prev] | [Thread Prev] | [Thread Next] | [Date Next] -- [Date Index] | [Thread Index] | [Elist Home]


Powered by eList eXpress LLC