Adding more perl modules

Adding more support modules that are needed by XML::Simple to read the
conf/conf.xml.example.
Adding these files into the repository means that users don't need to
install them on their computers.
This commit is contained in:
Bernard Davison
2012-03-05 19:50:44 +11:00
parent 567f51345b
commit 3ef96b1003
27 changed files with 8423 additions and 5 deletions
+1 -1
View File
@@ -24,7 +24,7 @@ my $upload_output = run_program(
"Attempting to build and upload the firmware.",
$ENV{'PAPARAZZI_SRC'},
"make $make_upload_options",
1,1);
0,1);
unlike($upload_output, '/Error/i', "The upload output does not contain the word \"Error\"");
+583
View File
File diff suppressed because it is too large Load Diff
+379
View File
@@ -0,0 +1,379 @@
# $Id$
package XML::SAX;
use strict;
use vars qw($VERSION @ISA @EXPORT_OK);
$VERSION = '0.99';
use Exporter ();
@ISA = ('Exporter');
@EXPORT_OK = qw(Namespaces Validation);
use File::Basename qw(dirname);
use File::Spec ();
use Symbol qw(gensym);
use XML::SAX::ParserFactory (); # loaded for simplicity
use constant PARSER_DETAILS => "ParserDetails.ini";
use constant Namespaces => "http://xml.org/sax/features/namespaces";
use constant Validation => "http://xml.org/sax/features/validation";
my $known_parsers = undef;
# load_parsers takes the ParserDetails.ini file out of the same directory
# that XML::SAX is in, and looks at it. Format in POD below
=begin EXAMPLE
[XML::SAX::PurePerl]
http://xml.org/sax/features/namespaces = 1
http://xml.org/sax/features/validation = 0
# a comment
# blank lines ignored
[XML::SAX::AnotherParser]
http://xml.org/sax/features/namespaces = 0
http://xml.org/sax/features/validation = 1
=end EXAMPLE
=cut
sub load_parsers {
my $class = shift;
my $dir = shift;
# reset parsers
$known_parsers = [];
# get directory from wherever XML::SAX is installed
if (!$dir) {
$dir = $INC{'XML/SAX.pm'};
$dir = dirname($dir);
}
my $fh = gensym();
if (!open($fh, File::Spec->catfile($dir, "SAX", PARSER_DETAILS))) {
XML::SAX->do_warn("could not find " . PARSER_DETAILS . " in $dir/SAX\n");
return $class;
}
$known_parsers = $class->_parse_ini_file($fh);
return $class;
}
sub _parse_ini_file {
my $class = shift;
my ($fh) = @_;
my @config;
my $lineno = 0;
while (defined(my $line = <$fh>)) {
$lineno++;
my $original = $line;
# strip whitespace
$line =~ s/\s*$//m;
$line =~ s/^\s*//m;
# strip comments
$line =~ s/[#;].*$//m;
# ignore blanks
next if $line =~ /^$/m;
# heading
if ($line =~ /^\[\s*(.*)\s*\]$/m) {
push @config, { Name => $1 };
next;
}
# instruction
elsif ($line =~ /^(.*?)\s*?=\s*(.*)$/) {
unless(@config) {
push @config, { Name => '' };
}
$config[-1]{Features}{$1} = $2;
}
# not whitespace, comment, or instruction
else {
die "Invalid line in ini: $lineno\n>>> $original\n";
}
}
return \@config;
}
sub parsers {
my $class = shift;
if (!$known_parsers) {
$class->load_parsers();
}
return $known_parsers;
}
sub remove_parser {
my $class = shift;
my ($parser_module) = @_;
if (!$known_parsers) {
$class->load_parsers();
}
@$known_parsers = grep { $_->{Name} ne $parser_module } @$known_parsers;
return $class;
}
sub add_parser {
my $class = shift;
my ($parser_module) = @_;
if (!$known_parsers) {
$class->load_parsers();
}
# first load module, then query features, then push onto known_parsers,
my $parser_file = $parser_module;
$parser_file =~ s/::/\//g;
$parser_file .= ".pm";
require $parser_file;
my @features = $parser_module->supported_features();
my $new = { Name => $parser_module };
foreach my $feature (@features) {
$new->{Features}{$feature} = 1;
}
# If exists in list already, move to end.
my $done = 0;
my $pos = undef;
for (my $i = 0; $i < @$known_parsers; $i++) {
my $p = $known_parsers->[$i];
if ($p->{Name} eq $parser_module) {
$pos = $i;
}
}
if (defined $pos) {
splice(@$known_parsers, $pos, 1);
push @$known_parsers, $new;
$done++;
}
# Otherwise (not in list), add at end of list.
if (!$done) {
push @$known_parsers, $new;
}
return $class;
}
sub save_parsers {
my $class = shift;
# get directory from wherever XML::SAX is installed
my $dir = $INC{'XML/SAX.pm'};
$dir = dirname($dir);
my $file = File::Spec->catfile($dir, "SAX", PARSER_DETAILS);
chmod 0644, $file;
unlink($file);
my $fh = gensym();
open($fh, ">$file") ||
die "Cannot write to $file: $!";
foreach my $p (@$known_parsers) {
print $fh "[$p->{Name}]\n";
foreach my $key (keys %{$p->{Features}}) {
print $fh "$key = $p->{Features}{$key}\n";
}
print $fh "\n";
}
print $fh "\n";
close $fh;
return $class;
}
sub do_warn {
my $class = shift;
# Don't output warnings if running under Test::Harness
warn(@_) unless $ENV{HARNESS_ACTIVE};
}
1;
__END__
=head1 NAME
XML::SAX - Simple API for XML
=head1 SYNOPSIS
use XML::SAX;
# get a list of known parsers
my $parsers = XML::SAX->parsers();
# add/update a parser
XML::SAX->add_parser(q(XML::SAX::PurePerl));
# remove parser
XML::SAX->remove_parser(q(XML::SAX::Foodelberry));
# save parsers
XML::SAX->save_parsers();
=head1 DESCRIPTION
XML::SAX is a SAX parser access API for Perl. It includes classes
and APIs required for implementing SAX drivers, along with a factory
class for returning any SAX parser installed on the user's system.
=head1 USING A SAX2 PARSER
The factory class is XML::SAX::ParserFactory. Please see the
documentation of that module for how to instantiate a SAX parser:
L<XML::SAX::ParserFactory>. However if you don't want to load up
another manual page, here's a short synopsis:
use XML::SAX::ParserFactory;
use XML::SAX::XYZHandler;
my $handler = XML::SAX::XYZHandler->new();
my $p = XML::SAX::ParserFactory->parser(Handler => $handler);
$p->parse_uri("foo.xml");
# or $p->parse_string("<foo/>") or $p->parse_file($fh);
This will automatically load a SAX2 parser (defaulting to
XML::SAX::PurePerl if no others are found) and return it to you.
In order to learn how to use SAX to parse XML, you will need to read
L<XML::SAX::Intro> and for reference, L<XML::SAX::Specification>.
=head1 WRITING A SAX2 PARSER
The first thing to remember in writing a SAX2 parser is to subclass
XML::SAX::Base. This will make your life infinitely easier, by providing
a number of methods automagically for you. See L<XML::SAX::Base> for more
details.
When writing a SAX2 parser that is compatible with XML::SAX, you need
to inform XML::SAX of the presence of that driver when you install it.
In order to do that, XML::SAX contains methods for saving the fact that
the parser exists on your system to a "INI" file, which is then loaded
to determine which parsers are installed.
The best way to do this is to follow these rules:
=over 4
=item * Add XML::SAX as a prerequisite in Makefile.PL:
WriteMakefile(
...
PREREQ_PM => { 'XML::SAX' => 0 },
...
);
Alternatively you may wish to check for it in other ways that will
cause more than just a warning.
=item * Add the following code snippet to your Makefile.PL:
sub MY::install {
package MY;
my $script = shift->SUPER::install(@_);
if (ExtUtils::MakeMaker::prompt(
"Do you want to modify ParserDetails.ini?", 'Y')
=~ /^y/i) {
$script =~ s/install :: (.*)$/install :: $1 install_sax_driver/m;
$script .= <<"INSTALL";
install_sax_driver :
\t\@\$(PERL) -MXML::SAX -e "XML::SAX->add_parser(q(\$(NAME)))->save_parsers()"
INSTALL
}
return $script;
}
Note that you should check the output of this - \$(NAME) will use the name of
your distribution, which may not be exactly what you want. For example XML::LibXML
has a driver called XML::LibXML::SAX::Generator, which is used in place of
\$(NAME) in the above.
=item * Add an XML::SAX test:
A test file should be added to your t/ directory containing something like the
following:
use Test;
BEGIN { plan tests => 3 }
use XML::SAX;
use XML::SAX::PurePerl::DebugHandler;
XML::SAX->add_parser(q(XML::SAX::MyDriver));
local $XML::SAX::ParserPackage = 'XML::SAX::MyDriver';
eval {
my $handler = XML::SAX::PurePerl::DebugHandler->new();
ok($handler);
my $parser = XML::SAX::ParserFactory->parser(Handler => $handler);
ok($parser);
ok($parser->isa('XML::SAX::MyDriver');
$parser->parse_string("<tag/>");
ok($handler->{seen}{start_element});
};
=back
=head1 EXPORTS
By default, XML::SAX exports nothing into the caller's namespace. However you
can request the symbols C<Namespaces> and C<Validation> which are the
URIs for those features, allowing an easier way to request those features
via ParserFactory:
use XML::SAX qw(Namespaces Validation);
my $factory = XML::SAX::ParserFactory->new();
$factory->require_feature(Namespaces);
$factory->require_feature(Validation);
my $parser = $factory->parser();
=head1 AUTHOR
Current maintainer: Grant McLean, grantm@cpan.org
Originally written by:
Matt Sergeant, matt@sergeant.org
Kip Hampton, khampton@totalcinema.com
Robin Berjon, robin@knowscape.com
=head1 LICENSE
This is free software, you may use it and distribute it under
the same terms as Perl itself.
=head1 SEE ALSO
L<XML::SAX::Base> for writing SAX Filters and Parsers
L<XML::SAX::PurePerl> for an XML parser written in 100%
pure perl.
L<XML::SAX::Exception> for details on exception handling
=cut
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
+134
View File
@@ -0,0 +1,134 @@
# $Id$
package XML::SAX::DocumentLocator;
use strict;
sub new {
my $class = shift;
my %object;
tie %object, $class, @_;
return bless \%object, $class;
}
sub TIEHASH {
my $class = shift;
my ($pubmeth, $sysmeth, $linemeth, $colmeth, $encmeth, $xmlvmeth) = @_;
return bless {
pubmeth => $pubmeth,
sysmeth => $sysmeth,
linemeth => $linemeth,
colmeth => $colmeth,
encmeth => $encmeth,
xmlvmeth => $xmlvmeth,
}, $class;
}
sub FETCH {
my ($self, $key) = @_;
my $method;
if ($key eq 'PublicId') {
$method = $self->{pubmeth};
}
elsif ($key eq 'SystemId') {
$method = $self->{sysmeth};
}
elsif ($key eq 'LineNumber') {
$method = $self->{linemeth};
}
elsif ($key eq 'ColumnNumber') {
$method = $self->{colmeth};
}
elsif ($key eq 'Encoding') {
$method = $self->{encmeth};
}
elsif ($key eq 'XMLVersion') {
$method = $self->{xmlvmeth};
}
if ($method) {
my $value = $method->($key);
return $value;
}
return undef;
}
sub EXISTS {
my ($self, $key) = @_;
if ($key =~ /^(PublicId|SystemId|LineNumber|ColumnNumber|Encoding|XMLVersion)$/) {
return 1;
}
return 0;
}
sub STORE {
my ($self, $key, $value) = @_;
}
sub DELETE {
my ($self, $key) = @_;
}
sub CLEAR {
my ($self) = @_;
}
sub FIRSTKEY {
my ($self) = @_;
# assignment resets.
$self->{keys} = {
PublicId => 1,
SystemId => 1,
LineNumber => 1,
ColumnNumber => 1,
Encoding => 1,
XMLVersion => 1,
};
return each %{$self->{keys}};
}
sub NEXTKEY {
my ($self, $lastkey) = @_;
return each %{$self->{keys}};
}
1;
__END__
=head1 NAME
XML::SAX::DocumentLocator - Helper class for document locators
=head1 SYNOPSIS
my $locator = XML::SAX::DocumentLocator->new(
sub { $object->get_public_id },
sub { $object->get_system_id },
sub { $reader->current_line },
sub { $reader->current_column },
sub { $reader->get_encoding },
sub { $reader->get_xml_version },
);
=head1 DESCRIPTION
This module gives you a tied hash reference that calls the
specified closures when asked for PublicId, SystemId,
LineNumber and ColumnNumber.
It is useful for writing SAX Parsers so that you don't have
to constantly update the line numbers in a hash reference on
the object you pass to set_document_locator(). See the source
code for XML::SAX::PurePerl for a usage example.
=head1 API
There is only 1 method: C<new>. Simply pass it a list of
closures that when called will return the PublicId, the
SystemId, the LineNumber, the ColumnNumber, the Encoding
and the XMLVersion respectively.
The closures are passed a single parameter, the key being
requested. But you're free to ignore that.
=cut
+129
View File
@@ -0,0 +1,129 @@
package XML::SAX::Exception;
BEGIN {
$XML::SAX::Exception::VERSION = '1.08';
}
use strict;
use overload '""' => "stringify",
'fallback' => 1;
use vars qw($StackTrace);
use Carp;
$StackTrace = $ENV{XML_DEBUG} || 0;
# Other exception classes:
@XML::SAX::Exception::NotRecognized::ISA = ('XML::SAX::Exception');
@XML::SAX::Exception::NotSupported::ISA = ('XML::SAX::Exception');
@XML::SAX::Exception::Parse::ISA = ('XML::SAX::Exception');
sub throw {
my $class = shift;
if (ref($class)) {
die $class;
}
die $class->new(@_);
}
sub new {
my $class = shift;
my %opts = @_;
confess "Invalid options: " . join(', ', keys %opts) unless exists $opts{Message};
bless { ($StackTrace ? (StackTrace => stacktrace()) : ()), %opts },
$class;
}
sub stringify {
my $self = shift;
local $^W;
my $error;
if (exists $self->{LineNumber}) {
$error = $self->{Message} . " [Ln: " . $self->{LineNumber} .
", Col: " . $self->{ColumnNumber} . "]";
}
else {
$error = $self->{Message};
}
if ($StackTrace) {
$error .= stackstring($self->{StackTrace});
}
$error .= "\n";
return $error;
}
sub stacktrace {
my $i = 2;
my @fulltrace;
while (my @trace = caller($i++)) {
my %hash;
@hash{qw(Package Filename Line)} = @trace[0..2];
push @fulltrace, \%hash;
}
return \@fulltrace;
}
sub stackstring {
my $stacktrace = shift;
my $string = "\nFrom:\n";
foreach my $current (@$stacktrace) {
$string .= $current->{Filename} . " Line: " . $current->{Line} . "\n";
}
return $string;
}
1;
__END__
=head1 NAME
XML::SAX::Exception - Exception classes for XML::SAX
=head1 SYNOPSIS
throw XML::SAX::Exception::NotSupported(
Message => "The foo feature is not supported",
);
=head1 DESCRIPTION
This module is the base class for all SAX Exceptions, those defined in
the spec as well as those that one may create for one's own SAX errors.
There are three subclasses included, corresponding to those of the SAX
spec:
XML::SAX::Exception::NotSupported
XML::SAX::Exception::NotRecognized
XML::SAX::Exception::Parse
Use them wherever you want, and as much as possible when you encounter
such errors. SAX is meant to use exceptions as much as possible to
flag problems.
=head1 CREATING NEW EXCEPTION CLASSES
All you need to do to create a new exception class is:
@XML::SAX::Exception::MyException::ISA = ('XML::SAX::Exception')
The given package doesn't need to exist, it'll behave correctly this
way. If your exception refines an existing exception class, then you
may also inherit from that instead of from the base class.
=head1 THROWING EXCEPTIONS
This is as simple as exemplified in the SYNOPSIS. In fact, there's
nothing more to know. All you have to do is:
throw XML::SAX::Exception::MyException( Message => 'Something went wrong' );
and voila, you've thrown an exception which can be caught in an eval block.
=cut
+407
View File
@@ -0,0 +1,407 @@
=head1 NAME
XML::SAX::Intro - An Introduction to SAX Parsing with Perl
=head1 Introduction
XML::SAX is a new way to work with XML Parsers in Perl. In this article
we'll discuss why you should be using SAX, why you should be using
XML::SAX, and we'll see some of the finer implementation details. The
text below assumes some familiarity with callback, or push based
parsing, but if you are unfamiliar with these techniques then a good
place to start is Kip Hampton's excellent series of articles on XML.com.
=head1 Replacing XML::Parser
The de-facto way of parsing XML under perl is to use Larry Wall and
Clark Cooper's XML::Parser. This module is a Perl and XS wrapper around
the expat XML parser library by James Clark. It has been a hugely
successful project, but suffers from a couple of rather major flaws.
Firstly it is a proprietary API, designed before the SAX API was
conceived, which means that it is not easily replaceable by other
streaming parsers. Secondly it's callbacks are subrefs. This doesn't
sound like much of an issue, but unfortunately leads to code like:
sub handle_start {
my ($e, $el, %attrs) = @_;
if ($el eq 'foo') {
$e->{inside_foo}++; # BAD! $e is an XML::Parser::Expat object.
}
}
As you can see, we're using the $e object to hold our state
information, which is a bad idea because we don't own that object - we
didn't create it. It's an internal object of XML::Parser, that happens
to be a hashref. We could all too easily overwrite XML::Parser internal
state variables by using this, or Clark could change it to an array ref
(not that he would, because it would break so much code, but he could).
The only way currently with XML::Parser to safely maintain state is to
use a closure:
my $state = MyState->new();
$parser->setHandlers(Start => sub { handle_start($state, @_) });
This closure traps the $state variable, which now gets passed as the
first parameter to your callback. Unfortunately very few people use
this technique, as it is not documented in the XML::Parser POD files.
Another reason you might not want to use XML::Parser is because you
need some feature that it doesn't provide (such as validation), or you
might need to use a library that doesn't use expat, due to it not being
installed on your system, or due to having a restrictive ISP. Using SAX
allows you to work around these restrictions.
=head1 Introducing SAX
SAX stands for the Simple API for XML. And simple it really is.
Constructing a SAX parser and passing events to handlers is done as
simply as:
use XML::SAX;
use MySAXHandler;
my $parser = XML::SAX::ParserFactory->parser(
Handler => MySAXHandler->new
);
$parser->parse_uri("foo.xml");
The important concept to grasp here is that SAX uses a factory class
called XML::SAX::ParserFactory to create a new parser instance. The
reason for this is so that you can support other underlying
parser implementations for different feature sets. This is one thing
that XML::Parser has always sorely lacked.
In the code above we see the parse_uri method used, but we could
have equally well
called parse_file, parse_string, or parse(). Please see XML::SAX::Base
for what these methods take as parameters, but don't be fooled into
believing parse_file takes a filename. No, it takes a file handle, a
glob, or a subclass of IO::Handle. Beware.
SAX works very similarly to XML::Parser's default callback method,
except it has one major difference: rather than setting individual
callbacks, you create a new class in which to recieve the callbacks.
Each callback is called as a method call on an instance of that handler
class. An example will best demonstrate this:
package MySAXHandler;
use base qw(XML::SAX::Base);
sub start_document {
my ($self, $doc) = @_;
# process document start event
}
sub start_element {
my ($self, $el) = @_;
# process element start event
}
Now, when we instantiate this as above, and parse some XML with this as
the handler, the methods start_document and start_element will be
called as method calls, so this would be the equivalent of directly
calling:
$object->start_element($el);
Notice how this is different to XML::Parser's calling style, which
calls:
start_element($e, $name, %attribs);
It's the difference between function calling and method calling which
allows you to subclass SAX handlers which contributes to SAX being a
powerful solution.
As you can see, unlike XML::Parser, we have to define a new package in
which to do our processing (there are hacks you can do to make this
uneccessary, but I'll leave figuring those out to the experts). The
biggest benefit of this is that you maintain your own state variable
($self in the above example) thus freeing you of the concerns listed
above. It is also an improvement in maintainability - you can place the
code in a separate file if you wish to, and your callback methods are
always called the same thing, rather than having to choose a suitable
name for them as you had to with XML::Parser. This is an obvious win.
SAX parsers are also very flexible in how you pass a handler to them.
You can use a constructor parameter as we saw above, or we can pass the
handler directly in the call to one of the parse methods:
$parser->parse(Handler => $handler,
Source => { SystemId => "foo.xml" });
# or...
$parser->parse_file($fh, Handler => $handler);
This flexibility allows for one parser to be used in many different
scenarios throughout your script (though one shouldn't feel pressure to
use this method, as parser construction is generally not a time
consuming process).
=head1 Callback Parameters
The only other thing you need to know to understand basic SAX is the
structure of the parameters passed to each of the callbacks. In
XML::Parser, all parameters are passed as multiple options to the
callbacks, so for example the Start callback would be called as
my_start($e, $name, %attributes), and the PI callback would be called
as my_processing_instruction($e, $target, $data). In SAX, every
callback is passed a hash reference, containing entries that define our
"node". The key callbacks and the structures they receive are:
=head2 start_element
The start_element handler is called whenever a parser sees an opening
tag. It is passed an element structure consisting of:
=over 4
=item LocalName
The name of the element minus any namespace prefix it may
have come with in the document.
=item NamespaceURI
The URI of the namespace associated with this element,
or the empty string for none.
=item Attributes
A set of attributes as described below.
=item Name
The name of the element as it was seen in the document (i.e.
including any prefix associated with it)
=item Prefix
The prefix used to qualify this element's namespace, or the
empty string if none.
=back
The B<Attributes> are a hash reference, keyed by what we have called
"James Clark" notation. This means that the attribute name has been
expanded to include any associated namespace URI, and put together as
{ns}name, where "ns" is the expanded namespace URI of the attribute if
and only if the attribute had a prefix, and "name" is the LocalName of
the attribute.
The value of each entry in the attributes hash is another hash
structure consisting of:
=over 4
=item LocalName
The name of the attribute minus any namespace prefix it may have
come with in the document.
=item NamespaceURI
The URI of the namespace associated with this attribute. If the
attribute had no prefix, then this consists of just the empty string.
=item Name
The attribute's name as it appeared in the document, including any
namespace prefix.
=item Prefix
The prefix used to qualify this attribute's namepace, or the
empty string if none.
=item Value
The value of the attribute.
=back
So a full example, as output by Data::Dumper might be:
....
=head2 end_element
The end_element handler is called either when a parser sees a closing
tag, or after start_element has been called for an empty element (do
note however that a parser may if it is so inclined call characters
with an empty string when it sees an empty element. There is no simple
way in SAX to determine if the parser in fact saw an empty element, a
start and end element with no content..
The end_element handler receives exactly the same structure as
start_element, minus the Attributes entry. One must note though that it
should not be a reference to the same data as start_element receives,
so you may change the values in start_element but this will not affect
the values later seen by end_element.
=head2 characters
The characters callback may be called in serveral circumstances. The
most obvious one is when seeing ordinary character data in the markup.
But it is also called for text in a CDATA section, and is also called
in other situations. A SAX parser has to make no guarantees whatsoever
about how many times it may call characters for a stretch of text in an
XML document - it may call once, or it may call once for every
character in the text. In order to work around this it is often
important for the SAX developer to use a bundling technique, where text
is gathered up and processed in one of the other callbacks. This is not
always necessary, but it is a worthwhile technique to learn, which we
will cover in XML::SAX::Advanced (when I get around to writing it).
The characters handler is called with a very simple structure - a hash
reference consisting of just one entry:
=over 4
=item Data
The text data that was received.
=back
=head2 comment
The comment callback is called for comment text. Unlike with
C<characters()>, the comment callback *must* be invoked just once for an
entire comment string. It receives a single simple structure - a hash
reference containing just one entry:
=over 4
=item Data
The text of the comment.
=back
=head2 processing_instruction
The processing instruction handler is called for all processing
instructions in the document. Note that these processing instructions
may appear before the document root element, or after it, or anywhere
where text and elements would normally appear within the document,
according to the XML specification.
The handler is passed a structure containing just two entries:
=over 4
=item Target
The target of the processing instrcution
=item Data
The text data in the processing instruction. Can be an empty
string for a processing instruction that has no data element.
For example E<lt>?wiggle?E<gt> is a perfectly valid processing instruction.
=back
=head1 Tip of the iceberg
What we have discussed above is really the tip of the SAX iceberg. And
so far it looks like there's not much of interest to SAX beyond what we
have seen with XML::Parser. But it does go much further than that, I
promise.
People who hate Object Oriented code for the sake of it may be thinking
here that creating a new package just to parse something is a waste
when they've been parsing things just fine up to now using procedural
code. But there's reason to all this madness. And that reason is SAX
Filters.
As you saw right at the very start, to let the parser know about our
class, we pass it an instance of our class as the Handler to the
parser. But now imagine what would happen if our class could also take
a Handler option, and simply do some processing and pass on our data
further down the line? That in a nutshell is how SAX filters work. It's
Unix pipes for the 21st century!
There are two downsides to this. Number 1 - writing SAX filters can be
tricky. If you look into the future and read the advanced tutorial I'm
writing, you'll see that Handler can come in several shapes and sizes.
So making sure your filter does the right thing can be tricky.
Secondly, constructing complex filter chains can be difficult, and
simple thinking tells us that we only get one pass at our document,
when often we'll need more than that.
Luckily though, those downsides have been fixed by the release of two
very cool modules. What's even better is that I didn't write either of
them!
The first module is XML::SAX::Base. This is a VITAL SAX module that
acts as a base class for all SAX parsers and filters. It provides an
abstraction away from calling the handler methods, that makes sure your
filter or parser does the right thing, and it does it FAST. So, if you
ever need to write a SAX filter, which if you're processing XML -> XML,
or XML -> HTML, then you probably do, then you need to be writing it as
a subclass of XML::SAX::Base. Really - this is advice not to ignore
lightly. I will not go into the details of writing a SAX filter here.
Kip Hampton, the author of XML::SAX::Base has covered this nicely in
his article on XML.com here <URI>.
To construct SAX pipelines, Barrie Slaymaker, a long time Perl hacker
whose modules you will probably have heard of or used, wrote a very
clever module called XML::SAX::Machines. This combines some really
clever SAX filter-type modules, with a construction toolkit for filters
that makes building pipelines easy. But before we see how it makes
things easy, first lets see how tricky it looks to build complex SAX
filter pipelines.
use XML::SAX::ParserFactory;
use XML::Filter::Filter1;
use XML::Filter::Filter2;
use XML::SAX::Writer;
my $output_string;
my $writer = XML::SAX::Writer->new(Output => \$output_string);
my $filter2 = XML::SAX::Filter2->new(Handler => $writer);
my $filter1 = XML::SAX::Filter1->new(Handler => $filter2);
my $parser = XML::SAX::ParserFactory->parser(Handler => $filter1);
$parser->parse_uri("foo.xml");
This is a lot easier with XML::SAX::Machines:
use XML::SAX::Machines qw(Pipeline);
my $output_string;
my $parser = Pipeline(
XML::SAX::Filter1 => XML::SAX::Filter2 => \$output_string
);
$parser->parse_uri("foo.xml");
One of the main benefits of XML::SAX::Machines is that the pipelines
are constructed in natural order, rather than the reverse order we saw
with manual pipeline construction. XML::SAX::Machines takes care of all
the internals of pipe construction, providing you at the end with just
a parser you can use (and you can re-use the same parser as many times
as you need to).
Just a final tip. If you ever get stuck and are confused about what is
being passed from one SAX filter or parser to the next, then
Devel::TraceSAX will come to your rescue. This perl debugger plugin
will allow you to dump the SAX stream of events as it goes by. Usage is
really very simple just call your perl script that uses SAX as follows:
$ perl -d:TraceSAX <scriptname>
And preferably pipe the output to a pager of some sort, such as more or
less. The output is extremely verbose, but should help clear some
issues up.
=head1 AUTHOR
Matt Sergeant, matt@sergeant.org
$Id$
=cut
+230
View File
@@ -0,0 +1,230 @@
# $Id$
package XML::SAX::ParserFactory;
use strict;
use vars qw($VERSION);
$VERSION = '1.01';
use Symbol qw(gensym);
use XML::SAX;
use XML::SAX::Exception;
sub new {
my $class = shift;
my %params = @_; # TODO : Fix this in spec.
my $self = bless \%params, $class;
$self->{KnownParsers} = XML::SAX->parsers();
return $self;
}
sub parser {
my $self = shift;
my @parser_params = @_;
if (!ref($self)) {
$self = $self->new();
}
my $parser_class = $self->_parser_class();
my $version = '';
if ($parser_class =~ s/\s*\(([\d\.]+)\)\s*$//) {
$version = " $1";
}
if (!$parser_class->can('new')) {
eval "require $parser_class $version;";
die $@ if $@;
}
return $parser_class->new(@parser_params);
}
sub require_feature {
my $self = shift;
my ($feature) = @_;
$self->{RequiredFeatures}{$feature}++;
return $self;
}
sub _parser_class {
my $self = shift;
# First try ParserPackage
if ($XML::SAX::ParserPackage) {
return $XML::SAX::ParserPackage;
}
# Now check if required/preferred is there
if ($self->{RequiredFeatures}) {
my %required = %{$self->{RequiredFeatures}};
# note - we never go onto the next try (ParserDetails.ini),
# because if we can't provide the requested feature
# we need to throw an exception.
PARSER:
foreach my $parser (reverse @{$self->{KnownParsers}}) {
foreach my $feature (keys %required) {
if (!exists $parser->{Features}{$feature}) {
next PARSER;
}
}
# got here - all features must exist!
return $parser->{Name};
}
# TODO : should this be NotSupported() ?
throw XML::SAX::Exception (
Message => "Unable to provide required features",
);
}
# Next try SAX.ini
for my $dir (@INC) {
my $fh = gensym();
if (open($fh, "$dir/SAX.ini")) {
my $param_list = XML::SAX->_parse_ini_file($fh);
my $params = $param_list->[0]->{Features};
if ($params->{ParserPackage}) {
return $params->{ParserPackage};
}
else {
# we have required features (or nothing?)
PARSER:
foreach my $parser (reverse @{$self->{KnownParsers}}) {
foreach my $feature (keys %$params) {
if (!exists $parser->{Features}{$feature}) {
next PARSER;
}
}
return $parser->{Name};
}
XML::SAX->do_warn("Unable to provide SAX.ini required features. Using fallback\n");
}
last; # stop after first INI found
}
}
if (@{$self->{KnownParsers}}) {
return $self->{KnownParsers}[-1]{Name};
}
else {
return "XML::SAX::PurePerl"; # backup plan!
}
}
1;
__END__
=head1 NAME
XML::SAX::ParserFactory - Obtain a SAX parser
=head1 SYNOPSIS
use XML::SAX::ParserFactory;
use XML::SAX::XYZHandler;
my $handler = XML::SAX::XYZHandler->new();
my $p = XML::SAX::ParserFactory->parser(Handler => $handler);
$p->parse_uri("foo.xml");
# or $p->parse_string("<foo/>") or $p->parse_file($fh);
=head1 DESCRIPTION
XML::SAX::ParserFactory is a factory class for providing an application
with a Perl SAX2 XML parser. It is akin to DBI - a front end for other
parser classes. Each new SAX2 parser installed will register itself
with XML::SAX, and then it will become available to all applications
that use XML::SAX::ParserFactory to obtain a SAX parser.
Unlike DBI however, XML/SAX parsers almost all work alike (especially
if they subclass XML::SAX::Base, as they should), so rather than
specifying the parser you want in the call to C<parser()>, XML::SAX
has several ways to automatically choose which parser to use:
=over 4
=item * $XML::SAX::ParserPackage
If this package variable is set, then this package is C<require()>d
and an instance of this package is returned by calling the C<new()>
class method in that package. If it cannot be loaded or there is
an error, an exception will be thrown. The variable can also contain
a version number:
$XML::SAX::ParserPackage = "XML::SAX::Expat (0.72)";
And the number will be treated as a minimum version number.
=item * Required features
It is possible to require features from the parsers. For example, you
may wish for a parser that supports validation via a DTD. To do that,
use the following code:
use XML::SAX::ParserFactory;
my $factory = XML::SAX::ParserFactory->new();
$factory->require_feature('http://xml.org/sax/features/validation');
my $parser = $factory->parser(...);
Alternatively, specify the required features in the call to the
ParserFactory constructor:
my $factory = XML::SAX::ParserFactory->new(
RequiredFeatures => {
'http://xml.org/sax/features/validation' => 1,
}
);
If the features you have asked for are unavailable (for example the
user might not have a validating parser installed), then an
exception will be thrown.
The list of known parsers is searched in reverse order, so it will
always return the last installed parser that supports all of your
requested features (Note: this is subject to change if someone
comes up with a better way of making this work).
=item * SAX.ini
ParserFactory will search @INC for a file called SAX.ini, which
is in a simple format:
# a comment looks like this,
; or like this, and are stripped anywhere in the file
key = value # SAX.in contains key/value pairs.
All whitespace is non-significant.
This file can contain either a line:
ParserPackage = MyParserModule (1.02)
Where MyParserModule is the module to load and use for the parser,
and the number in brackets is a minimum version to load.
Or you can list required features:
http://xml.org/sax/features/validation = 1
And each feature with a true value will be required.
=item * Fallback
If none of the above works, the last parser installed on the user's
system will be used. The XML::SAX package ships with a pure perl
XML parser, XML::SAX::PurePerl, so that there will always be a
fallback parser.
=back
=head1 AUTHOR
Matt Sergeant, matt@sergeant.org
=head1 LICENSE
This is free software, you may use it and distribute it under the same
terms as Perl itself.
=cut
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,95 @@
# $Id$
package XML::SAX::PurePerl::DebugHandler;
use strict;
sub new {
my $class = shift;
my %opts = @_;
return bless \%opts, $class;
}
# DocumentHandler
sub set_document_locator {
my $self = shift;
print "set_document_locator\n" if $ENV{DEBUG_XML};
$self->{seen}{set_document_locator}++;
}
sub start_document {
my $self = shift;
print "start_document\n" if $ENV{DEBUG_XML};
$self->{seen}{start_document}++;
}
sub end_document {
my $self = shift;
print "end_document\n" if $ENV{DEBUG_XML};
$self->{seen}{end_document}++;
}
sub start_element {
my $self = shift;
print "start_element\n" if $ENV{DEBUG_XML};
$self->{seen}{start_element}++;
}
sub end_element {
my $self = shift;
print "end_element\n" if $ENV{DEBUG_XML};
$self->{seen}{end_element}++;
}
sub characters {
my $self = shift;
print "characters\n" if $ENV{DEBUG_XML};
# warn "Char: ", $_[0]->{Data}, "\n";
$self->{seen}{characters}++;
}
sub processing_instruction {
my $self = shift;
print "processing_instruction\n" if $ENV{DEBUG_XML};
$self->{seen}{processing_instruction}++;
}
sub ignorable_whitespace {
my $self = shift;
print "ignorable_whitespace\n" if $ENV{DEBUG_XML};
$self->{seen}{ignorable_whitespace}++;
}
# LexHandler
sub comment {
my $self = shift;
print "comment\n" if $ENV{DEBUG_XML};
$self->{seen}{comment}++;
}
# DTDHandler
sub notation_decl {
my $self = shift;
print "notation_decl\n" if $ENV{DEBUG_XML};
$self->{seen}{notation_decl}++;
}
sub unparsed_entity_decl {
my $self = shift;
print "unparsed_entity_decl\n" if $ENV{DEBUG_XML};
$self->{seen}{entity_decl}++;
}
# EntityResolver
sub resolve_entity {
my $self = shift;
print "resolve_entity\n" if $ENV{DEBUG_XML};
$self->{seen}{resolve_entity}++;
return '';
}
1;
+180
View File
@@ -0,0 +1,180 @@
# $Id$
package XML::SAX::PurePerl;
use strict;
use XML::SAX::PurePerl::Productions qw($PubidChar);
sub doctypedecl {
my ($self, $reader) = @_;
my $data = $reader->data(9);
if ($data =~ /^<!DOCTYPE/) {
$reader->move_along(9);
$self->skip_whitespace($reader) ||
$self->parser_error("No whitespace after doctype declaration", $reader);
my $root_name = $self->Name($reader) ||
$self->parser_error("Doctype declaration has no root element name", $reader);
if ($self->skip_whitespace($reader)) {
# might be externalid...
my %dtd = $self->ExternalID($reader);
# TODO: Call SAX event
}
$self->skip_whitespace($reader);
$self->InternalSubset($reader);
$reader->match('>') or $self->parser_error("Doctype not closed", $reader);
return 1;
}
return 0;
}
sub ExternalID {
my ($self, $reader) = @_;
my $data = $reader->data(6);
if ($data =~ /^SYSTEM/) {
$reader->move_along(6);
$self->skip_whitespace($reader) ||
$self->parser_error("No whitespace after SYSTEM identifier", $reader);
return (SYSTEM => $self->SystemLiteral($reader));
}
elsif ($data =~ /^PUBLIC/) {
$reader->move_along(6);
$self->skip_whitespace($reader) ||
$self->parser_error("No whitespace after PUBLIC identifier", $reader);
my $quote = $self->quote($reader) ||
$self->parser_error("Not a quote character in PUBLIC identifier", $reader);
my $data = $reader->data;
my $pubid = '';
while(1) {
$self->parser_error("EOF while looking for end of PUBLIC identifiier", $reader)
unless length($data);
if ($data =~ /^([^$quote]*)$quote/) {
$pubid .= $1;
$reader->move_along(length($1) + 1);
last;
}
else {
$pubid .= $data;
$reader->move_along(length($data));
$data = $reader->data;
}
}
if ($pubid !~ /^($PubidChar)+$/) {
$self->parser_error("Invalid characters in PUBLIC identifier", $reader);
}
$self->skip_whitespace($reader) ||
$self->parser_error("Not whitespace after PUBLIC ID in DOCTYPE", $reader);
return (PUBLIC => $pubid,
SYSTEM => $self->SystemLiteral($reader));
}
else {
return;
}
return 1;
}
sub SystemLiteral {
my ($self, $reader) = @_;
my $quote = $self->quote($reader);
my $data = $reader->data;
my $systemid = '';
while (1) {
$self->parser_error("EOF found while looking for end of Sytem Literal", $reader)
unless length($data);
if ($data =~ /^([^$quote]*)$quote/) {
$systemid .= $1;
$reader->move_along(length($1) + 1);
return $systemid;
}
else {
$systemid .= $data;
$reader->move_along(length($data));
$data = $reader->data;
}
}
}
sub InternalSubset {
my ($self, $reader) = @_;
return 0 unless $reader->match('[');
1 while $self->IntSubsetDecl($reader);
$reader->match(']') or $self->parser_error("No close bracket on internal subset (found: " . $reader->data, $reader);
$self->skip_whitespace($reader);
return 1;
}
sub IntSubsetDecl {
my ($self, $reader) = @_;
return $self->DeclSep($reader) || $self->markupdecl($reader);
}
sub DeclSep {
my ($self, $reader) = @_;
if ($self->skip_whitespace($reader)) {
return 1;
}
if ($self->PEReference($reader)) {
return 1;
}
# if ($self->ParsedExtSubset($reader)) {
# return 1;
# }
return 0;
}
sub PEReference {
my ($self, $reader) = @_;
return 0 unless $reader->match('%');
my $peref = $self->Name($reader) ||
$self->parser_error("PEReference did not find a Name", $reader);
# TODO - load/parse the peref
$reader->match(';') or $self->parser_error("Invalid token in PEReference", $reader);
return 1;
}
sub markupdecl {
my ($self, $reader) = @_;
if ($self->elementdecl($reader) ||
$self->AttlistDecl($reader) ||
$self->EntityDecl($reader) ||
$self->NotationDecl($reader) ||
$self->PI($reader) ||
$self->Comment($reader))
{
return 1;
}
return 0;
}
1;
@@ -0,0 +1,105 @@
# $Id$
package XML::SAX::PurePerl; # NB, not ::EncodingDetect!
use strict;
sub encoding_detect {
my ($parser, $reader) = @_;
my $error = "Invalid byte sequence at start of file";
my $data = $reader->data;
if ($data =~ /^\x00\x00\xFE\xFF/) {
# BO-UCS4-be
$reader->move_along(4);
$reader->set_encoding('UCS-4BE');
return;
}
elsif ($data =~ /^\x00\x00\xFF\xFE/) {
# BO-UCS-4-2143
$reader->move_along(4);
$reader->set_encoding('UCS-4-2143');
return;
}
elsif ($data =~ /^\x00\x00\x00\x3C/) {
$reader->set_encoding('UCS-4BE');
return;
}
elsif ($data =~ /^\x00\x00\x3C\x00/) {
$reader->set_encoding('UCS-4-2143');
return;
}
elsif ($data =~ /^\x00\x3C\x00\x00/) {
$reader->set_encoding('UCS-4-3412');
return;
}
elsif ($data =~ /^\x00\x3C\x00\x3F/) {
$reader->set_encoding('UTF-16BE');
return;
}
elsif ($data =~ /^\xFF\xFE\x00\x00/) {
# BO-UCS-4LE
$reader->move_along(4);
$reader->set_encoding('UCS-4LE');
return;
}
elsif ($data =~ /^\xFF\xFE/) {
$reader->move_along(2);
$reader->set_encoding('UTF-16LE');
return;
}
elsif ($data =~ /^\xFE\xFF\x00\x00/) {
$reader->move_along(4);
$reader->set_encoding('UCS-4-3412');
return;
}
elsif ($data =~ /^\xFE\xFF/) {
$reader->move_along(2);
$reader->set_encoding('UTF-16BE');
return;
}
elsif ($data =~ /^\xEF\xBB\xBF/) { # UTF-8 BOM
$reader->move_along(3);
$reader->set_encoding('UTF-8');
return;
}
elsif ($data =~ /^\x3C\x00\x00\x00/) {
$reader->set_encoding('UCS-4LE');
return;
}
elsif ($data =~ /^\x3C\x00\x3F\x00/) {
$reader->set_encoding('UTF-16LE');
return;
}
elsif ($data =~ /^\x3C\x3F\x78\x6D/) {
# $reader->set_encoding('UTF-8');
return;
}
elsif ($data =~ /^\x3C\x3F\x78/) {
# $reader->set_encoding('UTF-8');
return;
}
elsif ($data =~ /^\x3C\x3F/) {
# $reader->set_encoding('UTF-8');
return;
}
elsif ($data =~ /^\x3C/) {
# $reader->set_encoding('UTF-8');
return;
}
elsif ($data =~ /^[\x20\x09\x0A\x0D]+\x3C[^\x3F]/) {
# $reader->set_encoding('UTF-8');
return;
}
elsif ($data =~ /^\x4C\x6F\xA7\x94/) {
$reader->set_encoding('EBCDIC');
return;
}
warn("Unable to recognise encoding of this document");
return;
}
1;
+67
View File
@@ -0,0 +1,67 @@
# $Id$
package XML::SAX::PurePerl::Exception;
use strict;
use overload '""' => "stringify";
use vars qw/$StackTrace/;
$StackTrace = $ENV{XML_DEBUG} || 0;
sub throw {
my $class = shift;
die $class->new(@_);
}
sub new {
my $class = shift;
my %opts = @_;
die "Invalid options" unless exists $opts{Message};
if ($opts{reader}) {
return bless { Message => $opts{Message},
Exception => undef, # not sure what this is for!!!
ColumnNumber => $opts{reader}->column,
LineNumber => $opts{reader}->line,
PublicId => $opts{reader}->public_id,
SystemId => $opts{reader}->system_id,
$StackTrace ? (StackTrace => stacktrace()) : (),
}, $class;
}
return bless { Message => $opts{Message},
Exception => undef, # not sure what this is for!!!
}, $class;
}
sub stringify {
my $self = shift;
local $^W;
return $self->{Message} . " [Ln: " . $self->{LineNumber} .
", Col: " . $self->{ColumnNumber} . "]" .
($StackTrace ? stackstring($self->{StackTrace}) : "") . "\n";
}
sub stacktrace {
my $i = 2;
my @fulltrace;
while (my @trace = caller($i++)) {
my %hash;
@hash{qw(Package Filename Line)} = @trace[0..2];
push @fulltrace, \%hash;
}
return \@fulltrace;
}
sub stackstring {
my $stacktrace = shift;
my $string = "\nFrom:\n";
foreach my $current (@$stacktrace) {
$string .= $current->{Filename} . " Line: " . $current->{Line} . "\n";
}
return $string;
}
1;
@@ -0,0 +1,28 @@
# $Id$
package XML::SAX::PurePerl;
use strict;
sub chr_ref {
my $n = shift;
if ($n < 0x80) {
return chr ($n);
}
elsif ($n < 0x800) {
return pack ("CC", (($n >> 6) | 0xc0), (($n & 0x3f) | 0x80));
}
elsif ($n < 0x10000) {
return pack ("CCC", (($n >> 12) | 0xe0), ((($n >> 6) & 0x3f) | 0x80),
(($n & 0x3f) | 0x80));
}
elsif ($n < 0x110000)
{
return pack ("CCCC", (($n >> 18) | 0xf0), ((($n >> 12) & 0x3f) | 0x80),
((($n >> 6) & 0x3f) | 0x80), (($n & 0x3f) | 0x80));
}
else {
return undef;
}
}
1;
+147
View File
@@ -0,0 +1,147 @@
# $Id$
package XML::SAX::PurePerl::Productions;
use Exporter;
@ISA = ('Exporter');
@EXPORT_OK = qw($S $Char $VersionNum $BaseChar $Ideographic
$Extender $Digit $CombiningChar $EncNameStart $EncNameEnd $NameChar $CharMinusDash
$PubidChar $Any $SingleChar);
### WARNING!!! All productions here must *only* match a *single* character!!! ###
BEGIN {
$S = qr/[\x20\x09\x0D\x0A]/;
$CharMinusDash = qr/[^-]/x;
$Any = qr/ . /xms;
$VersionNum = qr/ [a-zA-Z0-9_.:-]+ /x;
$EncNameStart = qr/ [A-Za-z] /x;
$EncNameEnd = qr/ [A-Za-z0-9\._-] /x;
$PubidChar = qr/ [\x20\x0D\x0Aa-zA-Z0-9'()\+,.\/:=\?;!*\#@\$_\%-] /x;
if ($] < 5.006) {
eval <<' PERL';
$Char = qr/^ [\x09\x0A\x0D\x20-\x7F]|([\xC0-\xFD][\x80-\xBF]+) $/x;
$SingleChar = qr/^$Char$/;
$BaseChar = qr/ [\x41-\x5A\x61-\x7A]|([\xC0-\xFD][\x80-\xBF]+) /x;
$Extender = qr/ \xB7 /x;
$Digit = qr/ [\x30-\x39] /x;
# can't do this one without unicode
# $CombiningChar = qr/^$/msx;
$NameChar = qr/^ (?: $BaseChar | $Digit | [._:-] | $Extender )+ $/x;
PERL
die $@ if $@;
}
else {
eval <<' PERL';
use utf8; # for 5.6
$Char = qr/^ [\x09\x0A\x0D\x{0020}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}] $/x;
$SingleChar = qr/^$Char$/;
$BaseChar = qr/
[\x{0041}-\x{005A}\x{0061}-\x{007A}\x{00C0}-\x{00D6}\x{00D8}-\x{00F6}] |
[\x{00F8}-\x{00FF}\x{0100}-\x{0131}\x{0134}-\x{013E}\x{0141}-\x{0148}] |
[\x{014A}-\x{017E}\x{0180}-\x{01C3}\x{01CD}-\x{01F0}\x{01F4}-\x{01F5}] |
[\x{01FA}-\x{0217}\x{0250}-\x{02A8}\x{02BB}-\x{02C1}\x{0386}\x{0388}-\x{038A}] |
[\x{038C}\x{038E}-\x{03A1}\x{03A3}-\x{03CE}\x{03D0}-\x{03D6}\x{03DA}] |
[\x{03DC}\x{03DE}\x{03E0}\x{03E2}-\x{03F3}\x{0401}-\x{040C}\x{040E}-\x{044F}] |
[\x{0451}-\x{045C}\x{045E}-\x{0481}\x{0490}-\x{04C4}\x{04C7}-\x{04C8}] |
[\x{04CB}-\x{04CC}\x{04D0}-\x{04EB}\x{04EE}-\x{04F5}\x{04F8}-\x{04F9}] |
[\x{0531}-\x{0556}\x{0559}\x{0561}-\x{0586}\x{05D0}-\x{05EA}\x{05F0}-\x{05F2}] |
[\x{0621}-\x{063A}\x{0641}-\x{064A}\x{0671}-\x{06B7}\x{06BA}-\x{06BE}] |
[\x{06C0}-\x{06CE}\x{06D0}-\x{06D3}\x{06D5}\x{06E5}-\x{06E6}\x{0905}-\x{0939}] |
[\x{093D}\x{0958}-\x{0961}\x{0985}-\x{098C}\x{098F}-\x{0990}] |
[\x{0993}-\x{09A8}\x{09AA}-\x{09B0}\x{09B2}\x{09B6}-\x{09B9}\x{09DC}-\x{09DD}] |
[\x{09DF}-\x{09E1}\x{09F0}-\x{09F1}\x{0A05}-\x{0A0A}\x{0A0F}-\x{0A10}] |
[\x{0A13}-\x{0A28}\x{0A2A}-\x{0A30}\x{0A32}-\x{0A33}\x{0A35}-\x{0A36}] |
[\x{0A38}-\x{0A39}\x{0A59}-\x{0A5C}\x{0A5E}\x{0A72}-\x{0A74}\x{0A85}-\x{0A8B}] |
[\x{0A8D}\x{0A8F}-\x{0A91}\x{0A93}-\x{0AA8}\x{0AAA}-\x{0AB0}] |
[\x{0AB2}-\x{0AB3}\x{0AB5}-\x{0AB9}\x{0ABD}\x{0AE0}\x{0B05}-\x{0B0C}] |
[\x{0B0F}-\x{0B10}\x{0B13}-\x{0B28}\x{0B2A}-\x{0B30}\x{0B32}-\x{0B33}] |
[\x{0B36}-\x{0B39}\x{0B3D}\x{0B5C}-\x{0B5D}\x{0B5F}-\x{0B61}\x{0B85}-\x{0B8A}] |
[\x{0B8E}-\x{0B90}\x{0B92}-\x{0B95}\x{0B99}-\x{0B9A}\x{0B9C}] |
[\x{0B9E}-\x{0B9F}\x{0BA3}-\x{0BA4}\x{0BA8}-\x{0BAA}\x{0BAE}-\x{0BB5}] |
[\x{0BB7}-\x{0BB9}\x{0C05}-\x{0C0C}\x{0C0E}-\x{0C10}\x{0C12}-\x{0C28}] |
[\x{0C2A}-\x{0C33}\x{0C35}-\x{0C39}\x{0C60}-\x{0C61}\x{0C85}-\x{0C8C}] |
[\x{0C8E}-\x{0C90}\x{0C92}-\x{0CA8}\x{0CAA}-\x{0CB3}\x{0CB5}-\x{0CB9}\x{0CDE}] |
[\x{0CE0}-\x{0CE1}\x{0D05}-\x{0D0C}\x{0D0E}-\x{0D10}\x{0D12}-\x{0D28}] |
[\x{0D2A}-\x{0D39}\x{0D60}-\x{0D61}\x{0E01}-\x{0E2E}\x{0E30}\x{0E32}-\x{0E33}] |
[\x{0E40}-\x{0E45}\x{0E81}-\x{0E82}\x{0E84}\x{0E87}-\x{0E88}\x{0E8A}] |
[\x{0E8D}\x{0E94}-\x{0E97}\x{0E99}-\x{0E9F}\x{0EA1}-\x{0EA3}\x{0EA5}\x{0EA7}] |
[\x{0EAA}-\x{0EAB}\x{0EAD}-\x{0EAE}\x{0EB0}\x{0EB2}-\x{0EB3}\x{0EBD}] |
[\x{0EC0}-\x{0EC4}\x{0F40}-\x{0F47}\x{0F49}-\x{0F69}\x{10A0}-\x{10C5}] |
[\x{10D0}-\x{10F6}\x{1100}\x{1102}-\x{1103}\x{1105}-\x{1107}\x{1109}] |
[\x{110B}-\x{110C}\x{110E}-\x{1112}\x{113C}\x{113E}\x{1140}\x{114C}\x{114E}] |
[\x{1150}\x{1154}-\x{1155}\x{1159}\x{115F}-\x{1161}\x{1163}\x{1165}] |
[\x{1167}\x{1169}\x{116D}-\x{116E}\x{1172}-\x{1173}\x{1175}\x{119E}\x{11A8}] |
[\x{11AB}\x{11AE}-\x{11AF}\x{11B7}-\x{11B8}\x{11BA}\x{11BC}-\x{11C2}] |
[\x{11EB}\x{11F0}\x{11F9}\x{1E00}-\x{1E9B}\x{1EA0}-\x{1EF9}\x{1F00}-\x{1F15}] |
[\x{1F18}-\x{1F1D}\x{1F20}-\x{1F45}\x{1F48}-\x{1F4D}\x{1F50}-\x{1F57}] |
[\x{1F59}\x{1F5B}\x{1F5D}\x{1F5F}-\x{1F7D}\x{1F80}-\x{1FB4}\x{1FB6}-\x{1FBC}] |
[\x{1FBE}\x{1FC2}-\x{1FC4}\x{1FC6}-\x{1FCC}\x{1FD0}-\x{1FD3}] |
[\x{1FD6}-\x{1FDB}\x{1FE0}-\x{1FEC}\x{1FF2}-\x{1FF4}\x{1FF6}-\x{1FFC}] |
[\x{2126}\x{212A}-\x{212B}\x{212E}\x{2180}-\x{2182}\x{3041}-\x{3094}] |
[\x{30A1}-\x{30FA}\x{3105}-\x{312C}\x{AC00}-\x{D7A3}]
/x;
$Extender = qr/
[\x{00B7}\x{02D0}\x{02D1}\x{0387}\x{0640}\x{0E46}\x{0EC6}\x{3005}\x{3031}-\x{3035}\x{309D}-\x{309E}\x{30FC}-\x{30FE}]
/x;
$Digit = qr/
[\x{0030}-\x{0039}\x{0660}-\x{0669}\x{06F0}-\x{06F9}\x{0966}-\x{096F}] |
[\x{09E6}-\x{09EF}\x{0A66}-\x{0A6F}\x{0AE6}-\x{0AEF}\x{0B66}-\x{0B6F}] |
[\x{0BE7}-\x{0BEF}\x{0C66}-\x{0C6F}\x{0CE6}-\x{0CEF}\x{0D66}-\x{0D6F}] |
[\x{0E50}-\x{0E59}\x{0ED0}-\x{0ED9}\x{0F20}-\x{0F29}]
/x;
$CombiningChar = qr/
[\x{0300}-\x{0345}\x{0360}-\x{0361}\x{0483}-\x{0486}\x{0591}-\x{05A1}] |
[\x{05A3}-\x{05B9}\x{05BB}-\x{05BD}\x{05BF}\x{05C1}-\x{05C2}\x{05C4}] |
[\x{064B}-\x{0652}\x{0670}\x{06D6}-\x{06DC}\x{06DD}-\x{06DF}\x{06E0}-\x{06E4}] |
[\x{06E7}-\x{06E8}\x{06EA}-\x{06ED}\x{0901}-\x{0903}\x{093C}] |
[\x{093E}-\x{094C}\x{094D}\x{0951}-\x{0954}\x{0962}-\x{0963}\x{0981}-\x{0983}] |
[\x{09BC}\x{09BE}\x{09BF}\x{09C0}-\x{09C4}\x{09C7}-\x{09C8}] |
[\x{09CB}-\x{09CD}\x{09D7}\x{09E2}-\x{09E3}\x{0A02}\x{0A3C}\x{0A3E}\x{0A3F}] |
[\x{0A40}-\x{0A42}\x{0A47}-\x{0A48}\x{0A4B}-\x{0A4D}\x{0A70}-\x{0A71}] |
[\x{0A81}-\x{0A83}\x{0ABC}\x{0ABE}-\x{0AC5}\x{0AC7}-\x{0AC9}\x{0ACB}-\x{0ACD}] |
[\x{0B01}-\x{0B03}\x{0B3C}\x{0B3E}-\x{0B43}\x{0B47}-\x{0B48}] |
[\x{0B4B}-\x{0B4D}\x{0B56}-\x{0B57}\x{0B82}-\x{0B83}\x{0BBE}-\x{0BC2}] |
[\x{0BC6}-\x{0BC8}\x{0BCA}-\x{0BCD}\x{0BD7}\x{0C01}-\x{0C03}\x{0C3E}-\x{0C44}] |
[\x{0C46}-\x{0C48}\x{0C4A}-\x{0C4D}\x{0C55}-\x{0C56}\x{0C82}-\x{0C83}] |
[\x{0CBE}-\x{0CC4}\x{0CC6}-\x{0CC8}\x{0CCA}-\x{0CCD}\x{0CD5}-\x{0CD6}] |
[\x{0D02}-\x{0D03}\x{0D3E}-\x{0D43}\x{0D46}-\x{0D48}\x{0D4A}-\x{0D4D}\x{0D57}] |
[\x{0E31}\x{0E34}-\x{0E3A}\x{0E47}-\x{0E4E}\x{0EB1}\x{0EB4}-\x{0EB9}] |
[\x{0EBB}-\x{0EBC}\x{0EC8}-\x{0ECD}\x{0F18}-\x{0F19}\x{0F35}\x{0F37}\x{0F39}] |
[\x{0F3E}\x{0F3F}\x{0F71}-\x{0F84}\x{0F86}-\x{0F8B}\x{0F90}-\x{0F95}] |
[\x{0F97}\x{0F99}-\x{0FAD}\x{0FB1}-\x{0FB7}\x{0FB9}\x{20D0}-\x{20DC}\x{20E1}] |
[\x{302A}-\x{302F}\x{3099}\x{309A}]
/x;
$Ideographic = qr/
[\x{4E00}-\x{9FA5}\x{3007}\x{3021}-\x{3029}]
/x;
$NameChar = qr/^ (?: $BaseChar | $Ideographic | $Digit | [._:-] | $CombiningChar | $Extender )+ $/x;
PERL
die $@ if $@;
}
}
1;
+136
View File
@@ -0,0 +1,136 @@
# $Id$
package XML::SAX::PurePerl::Reader;
use strict;
use XML::SAX::PurePerl::Reader::URI;
use Exporter ();
use vars qw(@ISA @EXPORT_OK);
@ISA = qw(Exporter);
@EXPORT_OK = qw(
EOF
BUFFER
LINE
COLUMN
ENCODING
XML_VERSION
);
use constant EOF => 0;
use constant BUFFER => 1;
use constant LINE => 2;
use constant COLUMN => 3;
use constant ENCODING => 4;
use constant SYSTEM_ID => 5;
use constant PUBLIC_ID => 6;
use constant XML_VERSION => 7;
require XML::SAX::PurePerl::Reader::Stream;
require XML::SAX::PurePerl::Reader::String;
if ($] >= 5.007002) {
require XML::SAX::PurePerl::Reader::UnicodeExt;
}
else {
require XML::SAX::PurePerl::Reader::NoUnicodeExt;
}
sub new {
my $class = shift;
my $thing = shift;
# try to figure if this $thing is a handle of some sort
if (ref($thing) && UNIVERSAL::isa($thing, 'IO::Handle')) {
return XML::SAX::PurePerl::Reader::Stream->new($thing)->init;
}
my $ioref;
if (tied($thing)) {
my $class = ref($thing);
no strict 'refs';
$ioref = $thing if defined &{"${class}::TIEHANDLE"};
}
else {
eval {
$ioref = *{$thing}{IO};
};
undef $@;
}
if ($ioref) {
return XML::SAX::PurePerl::Reader::Stream->new($thing)->init;
}
if ($thing =~ /</) {
# assume it's a string
return XML::SAX::PurePerl::Reader::String->new($thing)->init;
}
# assume it is a uri
return XML::SAX::PurePerl::Reader::URI->new($thing)->init;
}
sub init {
my $self = shift;
$self->[LINE] = 1;
$self->[COLUMN] = 1;
$self->read_more;
return $self;
}
sub data {
my ($self, $min_length) = (@_, 1);
if (length($self->[BUFFER]) < $min_length) {
$self->read_more;
}
return $self->[BUFFER];
}
sub match {
my ($self, $char) = @_;
my $data = $self->data;
if (substr($data, 0, 1) eq $char) {
$self->move_along(1);
return 1;
}
return 0;
}
sub public_id {
my $self = shift;
@_ and $self->[PUBLIC_ID] = shift;
$self->[PUBLIC_ID];
}
sub system_id {
my $self = shift;
@_ and $self->[SYSTEM_ID] = shift;
$self->[SYSTEM_ID];
}
sub line {
shift->[LINE];
}
sub column {
shift->[COLUMN];
}
sub get_encoding {
my $self = shift;
return $self->[ENCODING];
}
sub get_xml_version {
my $self = shift;
return $self->[XML_VERSION];
}
1;
__END__
=head1 NAME
XML::Parser::PurePerl::Reader - Abstract Reader factory class
=cut
@@ -0,0 +1,25 @@
# $Id$
package XML::SAX::PurePerl::Reader;
use strict;
sub set_raw_stream {
# no-op
}
sub switch_encoding_stream {
my ($fh, $encoding) = @_;
throw XML::SAX::Exception::Parse (
Message => "Only ASCII encoding allowed without perl 5.7.2 or higher. You tried: $encoding",
) if $encoding !~ /(ASCII|UTF\-?8)/i;
}
sub switch_encoding_string {
my (undef, $encoding) = @_;
throw XML::SAX::Exception::Parse (
Message => "Only ASCII encoding allowed without perl 5.7.2 or higher. You tried: $encoding",
) if $encoding !~ /(ASCII|UTF\-?8)/i;
}
1;
@@ -0,0 +1,84 @@
# $Id$
package XML::SAX::PurePerl::Reader::Stream;
use strict;
use vars qw(@ISA);
use XML::SAX::PurePerl::Reader qw(
EOF
BUFFER
LINE
COLUMN
ENCODING
XML_VERSION
);
use XML::SAX::Exception;
@ISA = ('XML::SAX::PurePerl::Reader');
# subclassed by adding 1 to last element
use constant FH => 8;
use constant BUFFER_SIZE => 4096;
sub new {
my $class = shift;
my $ioref = shift;
XML::SAX::PurePerl::Reader::set_raw_stream($ioref);
my @parts;
@parts[FH, LINE, COLUMN, BUFFER, EOF, XML_VERSION] =
($ioref, 1, 0, '', 0, '1.0');
return bless \@parts, $class;
}
sub read_more {
my $self = shift;
my $buf;
my $bytesread = read($self->[FH], $buf, BUFFER_SIZE);
if ($bytesread) {
$self->[BUFFER] .= $buf;
return 1;
}
elsif (defined($bytesread)) {
$self->[EOF]++;
return 0;
}
else {
throw XML::SAX::Exception::Parse(
Message => "Error reading from filehandle: $!",
);
}
}
sub move_along {
my $self = shift;
my $discarded = substr($self->[BUFFER], 0, $_[0], '');
# Wish I could skip this lot - tells us where we are in the file
my $lines = $discarded =~ tr/\n//;
$self->[LINE] += $lines;
if ($lines) {
$discarded =~ /\n([^\n]*)$/;
$self->[COLUMN] = length($1);
}
else {
$self->[COLUMN] += $_[0];
}
}
sub set_encoding {
my $self = shift;
my ($encoding) = @_;
# warn("set encoding to: $encoding\n");
XML::SAX::PurePerl::Reader::switch_encoding_stream($self->[FH], $encoding);
XML::SAX::PurePerl::Reader::switch_encoding_string($self->[BUFFER], $encoding);
$self->[ENCODING] = $encoding;
}
sub bytepos {
my $self = shift;
tell($self->[FH]);
}
1;
@@ -0,0 +1,78 @@
# $Id$
package XML::SAX::PurePerl::Reader::String;
use strict;
use vars qw(@ISA);
use XML::SAX::PurePerl::Reader qw(
LINE
COLUMN
BUFFER
ENCODING
EOF
);
@ISA = ('XML::SAX::PurePerl::Reader');
use constant DISCARDED => 8;
use constant STRING => 9;
use constant USED => 10;
use constant CHUNK_SIZE => 2048;
sub new {
my $class = shift;
my $string = shift;
my @parts;
@parts[BUFFER, EOF, LINE, COLUMN, DISCARDED, STRING, USED] =
('', 0, 1, 0, 0, $string, 0);
return bless \@parts, $class;
}
sub read_more () {
my $self = shift;
if ($self->[USED] >= length($self->[STRING])) {
$self->[EOF]++;
return 0;
}
my $bytes = CHUNK_SIZE;
if ($bytes > (length($self->[STRING]) - $self->[USED])) {
$bytes = (length($self->[STRING]) - $self->[USED]);
}
$self->[BUFFER] .= substr($self->[STRING], $self->[USED], $bytes);
$self->[USED] += $bytes;
return 1;
}
sub move_along {
my($self, $bytes) = @_;
my $discarded = substr($self->[BUFFER], 0, $bytes, '');
$self->[DISCARDED] += length($discarded);
# Wish I could skip this lot - tells us where we are in the file
my $lines = $discarded =~ tr/\n//;
$self->[LINE] += $lines;
if ($lines) {
$discarded =~ /\n([^\n]*)$/;
$self->[COLUMN] = length($1);
}
else {
$self->[COLUMN] += $_[0];
}
}
sub set_encoding {
my $self = shift;
my ($encoding) = @_;
XML::SAX::PurePerl::Reader::switch_encoding_string($self->[BUFFER], $encoding, "utf-8");
$self->[ENCODING] = $encoding;
}
sub bytepos {
my $self = shift;
$self->[DISCARDED];
}
1;
+57
View File
@@ -0,0 +1,57 @@
# $Id$
package XML::SAX::PurePerl::Reader::URI;
use strict;
use XML::SAX::PurePerl::Reader;
use File::Temp qw(tempfile);
use Symbol;
## NOTE: This is *not* a subclass of Reader. It just returns Stream or String
## Reader objects depending on what it's capabilities are.
sub new {
my $class = shift;
my $uri = shift;
# request the URI
if (-e $uri && -f _) {
my $fh = gensym;
open($fh, $uri) || die "Cannot open file $uri : $!";
return XML::SAX::PurePerl::Reader::Stream->new($fh);
}
elsif ($uri =~ /^file:(.*)$/ && -e $1 && -f _) {
my $file = $1;
my $fh = gensym;
open($fh, $file) || die "Cannot open file $file : $!";
return XML::SAX::PurePerl::Reader::Stream->new($fh);
}
else {
# request URI, return String reader
require LWP::UserAgent;
my $ua = LWP::UserAgent->new;
$ua->agent("Perl/XML/SAX/PurePerl/1.0 " . $ua->agent);
my $req = HTTP::Request->new(GET => $uri);
my $fh = tempfile();
my $callback = sub {
my ($data, $response, $protocol) = @_;
print $fh $data;
};
my $res = $ua->request($req, $callback, 4096);
if ($res->is_success) {
seek($fh, 0, 0);
return XML::SAX::PurePerl::Reader::Stream->new($fh);
}
else {
die "LWP Request Failed";
}
}
}
1;
@@ -0,0 +1,23 @@
# $Id$
package XML::SAX::PurePerl::Reader;
use strict;
use Encode ();
sub set_raw_stream {
my ($fh) = @_;
binmode($fh, ":bytes");
}
sub switch_encoding_stream {
my ($fh, $encoding) = @_;
binmode($fh, ":encoding($encoding)");
}
sub switch_encoding_string {
$_[0] = Encode::decode($_[1], $_[0]);
}
1;
+22
View File
@@ -0,0 +1,22 @@
# $Id$
package XML::SAX::PurePerl;
use strict;
no warnings 'utf8';
sub chr_ref {
return chr(shift);
}
if ($] >= 5.007002) {
require Encode;
Encode::define_alias( "UTF-16" => "UCS-2" );
Encode::define_alias( "UTF-16BE" => "UCS-2" );
Encode::define_alias( "UTF-16LE" => "ucs-2le" );
Encode::define_alias( "UTF16LE" => "ucs-2le" );
}
1;
+129
View File
@@ -0,0 +1,129 @@
# $Id$
package XML::SAX::PurePerl;
use strict;
use XML::SAX::PurePerl::Productions qw($S $VersionNum $EncNameStart $EncNameEnd);
sub XMLDecl {
my ($self, $reader) = @_;
my $data = $reader->data(5);
# warn("Looking for xmldecl in: $data");
if ($data =~ /^<\?xml$S/o) {
$reader->move_along(5);
$self->skip_whitespace($reader);
# get version attribute
$self->VersionInfo($reader) ||
$self->parser_error("XML Declaration lacks required version attribute, or version attribute does not match XML specification", $reader);
if (!$self->skip_whitespace($reader)) {
my $data = $reader->data(2);
$data =~ /^\?>/ or $self->parser_error("Syntax error", $reader);
$reader->move_along(2);
return;
}
if ($self->EncodingDecl($reader)) {
if (!$self->skip_whitespace($reader)) {
my $data = $reader->data(2);
$data =~ /^\?>/ or $self->parser_error("Syntax error", $reader);
$reader->move_along(2);
return;
}
}
$self->SDDecl($reader);
$self->skip_whitespace($reader);
my $data = $reader->data(2);
$data =~ /^\?>/ or $self->parser_error("Syntax error", $reader);
$reader->move_along(2);
}
else {
# warn("first 5 bytes: ", join(',', unpack("CCCCC", $data)), "\n");
# no xml decl
if (!$reader->get_encoding) {
$reader->set_encoding("UTF-8");
}
}
}
sub VersionInfo {
my ($self, $reader) = @_;
my $data = $reader->data(11);
# warn("Looking for version in $data");
$data =~ /^(version$S*=$S*(["'])($VersionNum)\2)/o or return 0;
$reader->move_along(length($1));
my $vernum = $3;
if ($vernum ne "1.0") {
$self->parser_error("Only XML version 1.0 supported. Saw: '$vernum'", $reader);
}
return 1;
}
sub SDDecl {
my ($self, $reader) = @_;
my $data = $reader->data(15);
$data =~ /^(standalone$S*=$S*(["'])(yes|no)\2)/o or return 0;
$reader->move_along(length($1));
my $yesno = $3;
if ($yesno eq 'yes') {
$self->{standalone} = 1;
}
else {
$self->{standalone} = 0;
}
return 1;
}
sub EncodingDecl {
my ($self, $reader) = @_;
my $data = $reader->data(12);
$data =~ /^(encoding$S*=$S*(["'])($EncNameStart$EncNameEnd*)\2)/o or return 0;
$reader->move_along(length($1));
my $encoding = $3;
$reader->set_encoding($encoding);
return 1;
}
sub TextDecl {
my ($self, $reader) = @_;
my $data = $reader->data(6);
$data =~ /^<\?xml$S+/ or return;
$reader->move_along(5);
$self->skip_whitespace($reader);
if ($self->VersionInfo($reader)) {
$self->skip_whitespace($reader) ||
$self->parser_error("Lack of whitespace after version attribute in text declaration", $reader);
}
$self->EncodingDecl($reader) ||
$self->parser_error("Encoding declaration missing from external entity text declaration", $reader);
$self->skip_whitespace($reader);
$data = $reader->data(2);
$data =~ /^\?>/ or $self->parser_error("Syntax error", $reader);
return 1;
}
1;