mirror of
https://github.com/paparazzi/paparazzi.git
synced 2026-05-09 22:49:53 +08:00
Merge branch 'hardware_test' of github.com:paparazzi/paparazzi into hardware_test
This commit is contained in:
@@ -175,3 +175,7 @@ sw/ground_segment/lpc21iap/lpc21iap.dSYM/Contents/Info.plist
|
|||||||
sw/ground_segment/lpc21iap/lpc21iap.dSYM/Contents/Resources/DWARF/lpc21iap
|
sw/ground_segment/lpc21iap/lpc21iap.dSYM/Contents/Resources/DWARF/lpc21iap
|
||||||
|
|
||||||
tests/results/*
|
tests/results/*
|
||||||
|
|
||||||
|
sw/logalizer/plotprofile.dSYM/Contents/Resources/DWARF/plotprofile
|
||||||
|
|
||||||
|
sw/logalizer/plotprofile.dSYM/Contents/Info.plist
|
||||||
|
|||||||
@@ -259,9 +259,6 @@ dist_clean_irreversible: clean
|
|||||||
ab_clean:
|
ab_clean:
|
||||||
find sw/airborne -name '*~' -exec rm -f {} \;
|
find sw/airborne -name '*~' -exec rm -f {} \;
|
||||||
|
|
||||||
test_all_example_airframes: replace_current_conf_xml
|
|
||||||
for ap in `grep name conf/conf.xml.example | sed -e 's/.*name=\"//' | sed -e 's/".*//'`; do for airframe in `grep $$ap conf/conf.xml.example | sed -e 's/.*airframe=\"//' | sed -e 's/".*//'`; do for target in `grep target conf/$$airframe | grep name | sed -e 's/.*name=\"//' | sed -e 's/\".*//'`; do echo "Making $$ap $$target"; make -C ./ AIRCRAFT=$$ap clean_ac $$target.compile || exit 1; done; done; done
|
|
||||||
|
|
||||||
replace_current_conf_xml:
|
replace_current_conf_xml:
|
||||||
test conf/conf.xml || mv conf/conf.xml conf/conf.xml.backup.`date +%Y%m%d-%H%M%s`
|
test conf/conf.xml || mv conf/conf.xml conf/conf.xml.backup.`date +%Y%m%d-%H%M%s`
|
||||||
cp conf/conf.xml.example conf/conf.xml
|
cp conf/conf.xml.example conf/conf.xml
|
||||||
@@ -276,7 +273,9 @@ sw/simulator/launchsitl:
|
|||||||
cat src/$(@F) | sed s#OCAMLRUN#$(OCAMLRUN)# | sed s#OCAML#$(OCAML)# > $@
|
cat src/$(@F) | sed s#OCAMLRUN#$(OCAMLRUN)# | sed s#OCAML#$(OCAML)# > $@
|
||||||
chmod a+x $@
|
chmod a+x $@
|
||||||
|
|
||||||
|
|
||||||
test: all replace_current_conf_xml
|
test: all replace_current_conf_xml
|
||||||
cd tests; $(MAKE) $(@)
|
cd tests; $(MAKE) $(@)
|
||||||
|
|
||||||
|
test_all_example_airframes: replace_current_conf_xml
|
||||||
|
cd tests; $(MAKE) $(@) TARGET_BOARD=examples
|
||||||
|
|
||||||
|
|||||||
@@ -10,8 +10,7 @@
|
|||||||
radio="radios/cockpitSX.xml"
|
radio="radios/cockpitSX.xml"
|
||||||
telemetry="telemetry/telemetry_booz2.xml"
|
telemetry="telemetry/telemetry_booz2.xml"
|
||||||
flight_plan="flight_plans/dummy.xml"
|
flight_plan="flight_plans/dummy.xml"
|
||||||
settings="settings/settings_booz2.xml
|
settings="settings/settings_booz2.xml settings/settings_booz2_ahrs_cmpl.xml"
|
||||||
settings/settings_booz2_ahrs_cmpl.xml"
|
|
||||||
gui_color="white"
|
gui_color="white"
|
||||||
/>
|
/>
|
||||||
|
|
||||||
@@ -33,8 +32,7 @@
|
|||||||
radio="radios/cockpitSX.xml"
|
radio="radios/cockpitSX.xml"
|
||||||
telemetry="telemetry/telemetry_booz2.xml"
|
telemetry="telemetry/telemetry_booz2.xml"
|
||||||
flight_plan="flight_plans/dummy.xml"
|
flight_plan="flight_plans/dummy.xml"
|
||||||
settings="settings/settings_booz2.xml
|
settings="settings/settings_booz2.xml settings/settings_booz2_ahrs_cmpl.xml"
|
||||||
settings/settings_booz2_ahrs_cmpl.xml"
|
|
||||||
gui_color="white"
|
gui_color="white"
|
||||||
/>
|
/>
|
||||||
|
|
||||||
@@ -66,8 +64,7 @@
|
|||||||
radio="radios/cockpitMM.xml"
|
radio="radios/cockpitMM.xml"
|
||||||
telemetry="telemetry/default.xml"
|
telemetry="telemetry/default.xml"
|
||||||
flight_plan="flight_plans/versatile.xml"
|
flight_plan="flight_plans/versatile.xml"
|
||||||
settings="settings/tuning.xml
|
settings="settings/tuning.xml settings/infrared.xml"
|
||||||
settings/infrared.xml"
|
|
||||||
gui_color="#ba6293"
|
gui_color="#ba6293"
|
||||||
/>
|
/>
|
||||||
<aircraft
|
<aircraft
|
||||||
@@ -77,8 +74,7 @@
|
|||||||
radio="radios/cockpitSX.xml"
|
radio="radios/cockpitSX.xml"
|
||||||
telemetry="telemetry/default.xml"
|
telemetry="telemetry/default.xml"
|
||||||
flight_plan="flight_plans/versatile.xml"
|
flight_plan="flight_plans/versatile.xml"
|
||||||
settings="settings/tuning.xml
|
settings="settings/tuning.xml settings/infrared.xml"
|
||||||
settings/infrared.xml"
|
|
||||||
gui_color="red"
|
gui_color="red"
|
||||||
/>
|
/>
|
||||||
|
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ my $upload_output = run_program(
|
|||||||
"Attempting to build and upload the firmware.",
|
"Attempting to build and upload the firmware.",
|
||||||
$ENV{'PAPARAZZI_SRC'},
|
$ENV{'PAPARAZZI_SRC'},
|
||||||
"make $make_upload_options",
|
"make $make_upload_options",
|
||||||
1,1);
|
0,1);
|
||||||
unlike($upload_output, '/Error/i', "The upload output does not contain the word \"Error\"");
|
unlike($upload_output, '/Error/i', "The upload output does not contain the word \"Error\"");
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -4,26 +4,52 @@ use Test::More;
|
|||||||
use lib "$ENV{'PAPARAZZI_SRC'}/tests/lib";
|
use lib "$ENV{'PAPARAZZI_SRC'}/tests/lib";
|
||||||
use XML::Simple;
|
use XML::Simple;
|
||||||
use Program;
|
use Program;
|
||||||
|
use Data::Dumper;
|
||||||
|
|
||||||
$|++;
|
$|++;
|
||||||
my $examples = XMLin("$ENV{'PAPARAZZI_SRC'}/conf/conf.xml.example");
|
my $examples = XMLin("$ENV{'PAPARAZZI_SRC'}/conf/conf.xml.example");
|
||||||
|
|
||||||
|
use Data::Dumper;
|
||||||
|
|
||||||
ok(1, "Parsed the example file");
|
ok(1, "Parsed the example file");
|
||||||
foreach my $example (sort keys%{$examples->{'aircraft'}})
|
foreach my $example (sort keys%{$examples->{'aircraft'}})
|
||||||
{
|
{
|
||||||
|
#next unless $example =~ m#easystar#i;
|
||||||
my $airframe = $examples->{'aircraft'}->{$example}->{'airframe'};
|
my $airframe = $examples->{'aircraft'}->{$example}->{'airframe'};
|
||||||
my $airframe_config = XMLin("$ENV{'PAPARAZZI_SRC'}/conf/$airframe");
|
my $airframe_config = XMLin("$ENV{'PAPARAZZI_SRC'}/conf/$airframe");
|
||||||
foreach my $process (sort keys %{$airframe_config->{'firmware'}})
|
foreach my $process (sort keys %{$airframe_config->{'firmware'}})
|
||||||
{
|
{
|
||||||
foreach my $target (sort keys %{$airframe_config->{'firmware'}->{$process}->{'target'}})
|
if ($process =~ m#setup|fixedwing|rotorcraft|lisa_test_progs#)
|
||||||
{
|
{
|
||||||
my $make_upload_options = "AIRCRAFT=$example $target.compile";
|
#warn "EX: [$example] ". Dumper($airframe_config->{'firmware'}->{$process}->{'target'});
|
||||||
my $upload_output = run_program(
|
foreach my $target (sort keys %{$airframe_config->{'firmware'}->{$process}->{'target'}})
|
||||||
"Attempting to build the firmware $target for the airframe $example.",
|
{
|
||||||
$ENV{'PAPARAZZI_SRC'},
|
next unless scalar $airframe_config->{'firmware'}->{$process}->{'target'}->{$target}->{'board'};
|
||||||
"make $make_upload_options",
|
#warn "EXAMPLE: [$example] TARGET: [$target]\n";
|
||||||
0,1);
|
my $make_upload_options = "AIRCRAFT=$example clean_ac $target.compile";
|
||||||
unlike($upload_output, '/Error/i', "The upload output does not contain the word \"Error\"");
|
my $upload_output = run_program(
|
||||||
|
"Attempting to build the firmware $target for the airframe $example.",
|
||||||
|
$ENV{'PAPARAZZI_SRC'},
|
||||||
|
"make $make_upload_options",
|
||||||
|
$ENV->{'TEST_VERBOSE'},1);
|
||||||
|
unlike($upload_output, '/Error/i', "The upload output does not contain the word \"Error\"");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
elsif ($process =~ m#target#)
|
||||||
|
{
|
||||||
|
#warn "EXT: [$example] ". Dumper($airframe_config->{'firmware'}->{$process});
|
||||||
|
foreach my $target (sort keys %{$airframe_config->{'firmware'}->{$process}})
|
||||||
|
{
|
||||||
|
next unless scalar $airframe_config->{'firmware'}->{$process}->{$target}->{'board'};
|
||||||
|
#warn "EXAMPLET: [$example] TARGET: [$target]\n";
|
||||||
|
my $make_upload_options = "AIRCRAFT=$example clean_ac $target.compile";
|
||||||
|
my $upload_output = run_program(
|
||||||
|
"Attempting to build the firmware $target for the airframe $example.",
|
||||||
|
$ENV{'PAPARAZZI_SRC'},
|
||||||
|
"make $make_upload_options",
|
||||||
|
$ENV->{'TEST_VERBOSE'},1);
|
||||||
|
unlike($upload_output, '/Error/i', "The upload output does not contain the word \"Error\"");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Executable
+583
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,379 @@
|
|||||||
|
# $Id$
|
||||||
|
|
||||||
|
package XML::SAX;
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
use vars qw($VERSION @ISA @EXPORT_OK);
|
||||||
|
|
||||||
|
$VERSION = '0.99';
|
||||||
|
|
||||||
|
use Exporter ();
|
||||||
|
@ISA = ('Exporter');
|
||||||
|
|
||||||
|
@EXPORT_OK = qw(Namespaces Validation);
|
||||||
|
|
||||||
|
use File::Basename qw(dirname);
|
||||||
|
use File::Spec ();
|
||||||
|
use Symbol qw(gensym);
|
||||||
|
use XML::SAX::ParserFactory (); # loaded for simplicity
|
||||||
|
|
||||||
|
use constant PARSER_DETAILS => "ParserDetails.ini";
|
||||||
|
|
||||||
|
use constant Namespaces => "http://xml.org/sax/features/namespaces";
|
||||||
|
use constant Validation => "http://xml.org/sax/features/validation";
|
||||||
|
|
||||||
|
my $known_parsers = undef;
|
||||||
|
|
||||||
|
# load_parsers takes the ParserDetails.ini file out of the same directory
|
||||||
|
# that XML::SAX is in, and looks at it. Format in POD below
|
||||||
|
|
||||||
|
=begin EXAMPLE
|
||||||
|
|
||||||
|
[XML::SAX::PurePerl]
|
||||||
|
http://xml.org/sax/features/namespaces = 1
|
||||||
|
http://xml.org/sax/features/validation = 0
|
||||||
|
# a comment
|
||||||
|
|
||||||
|
# blank lines ignored
|
||||||
|
|
||||||
|
[XML::SAX::AnotherParser]
|
||||||
|
http://xml.org/sax/features/namespaces = 0
|
||||||
|
http://xml.org/sax/features/validation = 1
|
||||||
|
|
||||||
|
=end EXAMPLE
|
||||||
|
|
||||||
|
=cut
|
||||||
|
|
||||||
|
sub load_parsers {
|
||||||
|
my $class = shift;
|
||||||
|
my $dir = shift;
|
||||||
|
|
||||||
|
# reset parsers
|
||||||
|
$known_parsers = [];
|
||||||
|
|
||||||
|
# get directory from wherever XML::SAX is installed
|
||||||
|
if (!$dir) {
|
||||||
|
$dir = $INC{'XML/SAX.pm'};
|
||||||
|
$dir = dirname($dir);
|
||||||
|
}
|
||||||
|
|
||||||
|
my $fh = gensym();
|
||||||
|
if (!open($fh, File::Spec->catfile($dir, "SAX", PARSER_DETAILS))) {
|
||||||
|
XML::SAX->do_warn("could not find " . PARSER_DETAILS . " in $dir/SAX\n");
|
||||||
|
return $class;
|
||||||
|
}
|
||||||
|
|
||||||
|
$known_parsers = $class->_parse_ini_file($fh);
|
||||||
|
|
||||||
|
return $class;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub _parse_ini_file {
|
||||||
|
my $class = shift;
|
||||||
|
my ($fh) = @_;
|
||||||
|
|
||||||
|
my @config;
|
||||||
|
|
||||||
|
my $lineno = 0;
|
||||||
|
while (defined(my $line = <$fh>)) {
|
||||||
|
$lineno++;
|
||||||
|
my $original = $line;
|
||||||
|
# strip whitespace
|
||||||
|
$line =~ s/\s*$//m;
|
||||||
|
$line =~ s/^\s*//m;
|
||||||
|
# strip comments
|
||||||
|
$line =~ s/[#;].*$//m;
|
||||||
|
# ignore blanks
|
||||||
|
next if $line =~ /^$/m;
|
||||||
|
|
||||||
|
# heading
|
||||||
|
if ($line =~ /^\[\s*(.*)\s*\]$/m) {
|
||||||
|
push @config, { Name => $1 };
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
|
||||||
|
# instruction
|
||||||
|
elsif ($line =~ /^(.*?)\s*?=\s*(.*)$/) {
|
||||||
|
unless(@config) {
|
||||||
|
push @config, { Name => '' };
|
||||||
|
}
|
||||||
|
$config[-1]{Features}{$1} = $2;
|
||||||
|
}
|
||||||
|
|
||||||
|
# not whitespace, comment, or instruction
|
||||||
|
else {
|
||||||
|
die "Invalid line in ini: $lineno\n>>> $original\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return \@config;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub parsers {
|
||||||
|
my $class = shift;
|
||||||
|
if (!$known_parsers) {
|
||||||
|
$class->load_parsers();
|
||||||
|
}
|
||||||
|
return $known_parsers;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub remove_parser {
|
||||||
|
my $class = shift;
|
||||||
|
my ($parser_module) = @_;
|
||||||
|
|
||||||
|
if (!$known_parsers) {
|
||||||
|
$class->load_parsers();
|
||||||
|
}
|
||||||
|
|
||||||
|
@$known_parsers = grep { $_->{Name} ne $parser_module } @$known_parsers;
|
||||||
|
|
||||||
|
return $class;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub add_parser {
|
||||||
|
my $class = shift;
|
||||||
|
my ($parser_module) = @_;
|
||||||
|
|
||||||
|
if (!$known_parsers) {
|
||||||
|
$class->load_parsers();
|
||||||
|
}
|
||||||
|
|
||||||
|
# first load module, then query features, then push onto known_parsers,
|
||||||
|
|
||||||
|
my $parser_file = $parser_module;
|
||||||
|
$parser_file =~ s/::/\//g;
|
||||||
|
$parser_file .= ".pm";
|
||||||
|
|
||||||
|
require $parser_file;
|
||||||
|
|
||||||
|
my @features = $parser_module->supported_features();
|
||||||
|
|
||||||
|
my $new = { Name => $parser_module };
|
||||||
|
foreach my $feature (@features) {
|
||||||
|
$new->{Features}{$feature} = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
# If exists in list already, move to end.
|
||||||
|
my $done = 0;
|
||||||
|
my $pos = undef;
|
||||||
|
for (my $i = 0; $i < @$known_parsers; $i++) {
|
||||||
|
my $p = $known_parsers->[$i];
|
||||||
|
if ($p->{Name} eq $parser_module) {
|
||||||
|
$pos = $i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (defined $pos) {
|
||||||
|
splice(@$known_parsers, $pos, 1);
|
||||||
|
push @$known_parsers, $new;
|
||||||
|
$done++;
|
||||||
|
}
|
||||||
|
|
||||||
|
# Otherwise (not in list), add at end of list.
|
||||||
|
if (!$done) {
|
||||||
|
push @$known_parsers, $new;
|
||||||
|
}
|
||||||
|
|
||||||
|
return $class;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub save_parsers {
|
||||||
|
my $class = shift;
|
||||||
|
|
||||||
|
# get directory from wherever XML::SAX is installed
|
||||||
|
my $dir = $INC{'XML/SAX.pm'};
|
||||||
|
$dir = dirname($dir);
|
||||||
|
|
||||||
|
my $file = File::Spec->catfile($dir, "SAX", PARSER_DETAILS);
|
||||||
|
chmod 0644, $file;
|
||||||
|
unlink($file);
|
||||||
|
|
||||||
|
my $fh = gensym();
|
||||||
|
open($fh, ">$file") ||
|
||||||
|
die "Cannot write to $file: $!";
|
||||||
|
|
||||||
|
foreach my $p (@$known_parsers) {
|
||||||
|
print $fh "[$p->{Name}]\n";
|
||||||
|
foreach my $key (keys %{$p->{Features}}) {
|
||||||
|
print $fh "$key = $p->{Features}{$key}\n";
|
||||||
|
}
|
||||||
|
print $fh "\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
print $fh "\n";
|
||||||
|
|
||||||
|
close $fh;
|
||||||
|
|
||||||
|
return $class;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub do_warn {
|
||||||
|
my $class = shift;
|
||||||
|
# Don't output warnings if running under Test::Harness
|
||||||
|
warn(@_) unless $ENV{HARNESS_ACTIVE};
|
||||||
|
}
|
||||||
|
|
||||||
|
1;
|
||||||
|
__END__
|
||||||
|
|
||||||
|
=head1 NAME
|
||||||
|
|
||||||
|
XML::SAX - Simple API for XML
|
||||||
|
|
||||||
|
=head1 SYNOPSIS
|
||||||
|
|
||||||
|
use XML::SAX;
|
||||||
|
|
||||||
|
# get a list of known parsers
|
||||||
|
my $parsers = XML::SAX->parsers();
|
||||||
|
|
||||||
|
# add/update a parser
|
||||||
|
XML::SAX->add_parser(q(XML::SAX::PurePerl));
|
||||||
|
|
||||||
|
# remove parser
|
||||||
|
XML::SAX->remove_parser(q(XML::SAX::Foodelberry));
|
||||||
|
|
||||||
|
# save parsers
|
||||||
|
XML::SAX->save_parsers();
|
||||||
|
|
||||||
|
=head1 DESCRIPTION
|
||||||
|
|
||||||
|
XML::SAX is a SAX parser access API for Perl. It includes classes
|
||||||
|
and APIs required for implementing SAX drivers, along with a factory
|
||||||
|
class for returning any SAX parser installed on the user's system.
|
||||||
|
|
||||||
|
=head1 USING A SAX2 PARSER
|
||||||
|
|
||||||
|
The factory class is XML::SAX::ParserFactory. Please see the
|
||||||
|
documentation of that module for how to instantiate a SAX parser:
|
||||||
|
L<XML::SAX::ParserFactory>. However if you don't want to load up
|
||||||
|
another manual page, here's a short synopsis:
|
||||||
|
|
||||||
|
use XML::SAX::ParserFactory;
|
||||||
|
use XML::SAX::XYZHandler;
|
||||||
|
my $handler = XML::SAX::XYZHandler->new();
|
||||||
|
my $p = XML::SAX::ParserFactory->parser(Handler => $handler);
|
||||||
|
$p->parse_uri("foo.xml");
|
||||||
|
# or $p->parse_string("<foo/>") or $p->parse_file($fh);
|
||||||
|
|
||||||
|
This will automatically load a SAX2 parser (defaulting to
|
||||||
|
XML::SAX::PurePerl if no others are found) and return it to you.
|
||||||
|
|
||||||
|
In order to learn how to use SAX to parse XML, you will need to read
|
||||||
|
L<XML::SAX::Intro> and for reference, L<XML::SAX::Specification>.
|
||||||
|
|
||||||
|
=head1 WRITING A SAX2 PARSER
|
||||||
|
|
||||||
|
The first thing to remember in writing a SAX2 parser is to subclass
|
||||||
|
XML::SAX::Base. This will make your life infinitely easier, by providing
|
||||||
|
a number of methods automagically for you. See L<XML::SAX::Base> for more
|
||||||
|
details.
|
||||||
|
|
||||||
|
When writing a SAX2 parser that is compatible with XML::SAX, you need
|
||||||
|
to inform XML::SAX of the presence of that driver when you install it.
|
||||||
|
In order to do that, XML::SAX contains methods for saving the fact that
|
||||||
|
the parser exists on your system to a "INI" file, which is then loaded
|
||||||
|
to determine which parsers are installed.
|
||||||
|
|
||||||
|
The best way to do this is to follow these rules:
|
||||||
|
|
||||||
|
=over 4
|
||||||
|
|
||||||
|
=item * Add XML::SAX as a prerequisite in Makefile.PL:
|
||||||
|
|
||||||
|
WriteMakefile(
|
||||||
|
...
|
||||||
|
PREREQ_PM => { 'XML::SAX' => 0 },
|
||||||
|
...
|
||||||
|
);
|
||||||
|
|
||||||
|
Alternatively you may wish to check for it in other ways that will
|
||||||
|
cause more than just a warning.
|
||||||
|
|
||||||
|
=item * Add the following code snippet to your Makefile.PL:
|
||||||
|
|
||||||
|
sub MY::install {
|
||||||
|
package MY;
|
||||||
|
my $script = shift->SUPER::install(@_);
|
||||||
|
if (ExtUtils::MakeMaker::prompt(
|
||||||
|
"Do you want to modify ParserDetails.ini?", 'Y')
|
||||||
|
=~ /^y/i) {
|
||||||
|
$script =~ s/install :: (.*)$/install :: $1 install_sax_driver/m;
|
||||||
|
$script .= <<"INSTALL";
|
||||||
|
|
||||||
|
install_sax_driver :
|
||||||
|
\t\@\$(PERL) -MXML::SAX -e "XML::SAX->add_parser(q(\$(NAME)))->save_parsers()"
|
||||||
|
|
||||||
|
INSTALL
|
||||||
|
}
|
||||||
|
return $script;
|
||||||
|
}
|
||||||
|
|
||||||
|
Note that you should check the output of this - \$(NAME) will use the name of
|
||||||
|
your distribution, which may not be exactly what you want. For example XML::LibXML
|
||||||
|
has a driver called XML::LibXML::SAX::Generator, which is used in place of
|
||||||
|
\$(NAME) in the above.
|
||||||
|
|
||||||
|
=item * Add an XML::SAX test:
|
||||||
|
|
||||||
|
A test file should be added to your t/ directory containing something like the
|
||||||
|
following:
|
||||||
|
|
||||||
|
use Test;
|
||||||
|
BEGIN { plan tests => 3 }
|
||||||
|
use XML::SAX;
|
||||||
|
use XML::SAX::PurePerl::DebugHandler;
|
||||||
|
XML::SAX->add_parser(q(XML::SAX::MyDriver));
|
||||||
|
local $XML::SAX::ParserPackage = 'XML::SAX::MyDriver';
|
||||||
|
eval {
|
||||||
|
my $handler = XML::SAX::PurePerl::DebugHandler->new();
|
||||||
|
ok($handler);
|
||||||
|
my $parser = XML::SAX::ParserFactory->parser(Handler => $handler);
|
||||||
|
ok($parser);
|
||||||
|
ok($parser->isa('XML::SAX::MyDriver');
|
||||||
|
$parser->parse_string("<tag/>");
|
||||||
|
ok($handler->{seen}{start_element});
|
||||||
|
};
|
||||||
|
|
||||||
|
=back
|
||||||
|
|
||||||
|
=head1 EXPORTS
|
||||||
|
|
||||||
|
By default, XML::SAX exports nothing into the caller's namespace. However you
|
||||||
|
can request the symbols C<Namespaces> and C<Validation> which are the
|
||||||
|
URIs for those features, allowing an easier way to request those features
|
||||||
|
via ParserFactory:
|
||||||
|
|
||||||
|
use XML::SAX qw(Namespaces Validation);
|
||||||
|
my $factory = XML::SAX::ParserFactory->new();
|
||||||
|
$factory->require_feature(Namespaces);
|
||||||
|
$factory->require_feature(Validation);
|
||||||
|
my $parser = $factory->parser();
|
||||||
|
|
||||||
|
=head1 AUTHOR
|
||||||
|
|
||||||
|
Current maintainer: Grant McLean, grantm@cpan.org
|
||||||
|
|
||||||
|
Originally written by:
|
||||||
|
|
||||||
|
Matt Sergeant, matt@sergeant.org
|
||||||
|
|
||||||
|
Kip Hampton, khampton@totalcinema.com
|
||||||
|
|
||||||
|
Robin Berjon, robin@knowscape.com
|
||||||
|
|
||||||
|
=head1 LICENSE
|
||||||
|
|
||||||
|
This is free software, you may use it and distribute it under
|
||||||
|
the same terms as Perl itself.
|
||||||
|
|
||||||
|
=head1 SEE ALSO
|
||||||
|
|
||||||
|
L<XML::SAX::Base> for writing SAX Filters and Parsers
|
||||||
|
|
||||||
|
L<XML::SAX::PurePerl> for an XML parser written in 100%
|
||||||
|
pure perl.
|
||||||
|
|
||||||
|
L<XML::SAX::Exception> for details on exception handling
|
||||||
|
|
||||||
|
=cut
|
||||||
|
|
||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,134 @@
|
|||||||
|
# $Id$
|
||||||
|
|
||||||
|
package XML::SAX::DocumentLocator;
|
||||||
|
use strict;
|
||||||
|
|
||||||
|
sub new {
|
||||||
|
my $class = shift;
|
||||||
|
my %object;
|
||||||
|
tie %object, $class, @_;
|
||||||
|
|
||||||
|
return bless \%object, $class;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub TIEHASH {
|
||||||
|
my $class = shift;
|
||||||
|
my ($pubmeth, $sysmeth, $linemeth, $colmeth, $encmeth, $xmlvmeth) = @_;
|
||||||
|
return bless {
|
||||||
|
pubmeth => $pubmeth,
|
||||||
|
sysmeth => $sysmeth,
|
||||||
|
linemeth => $linemeth,
|
||||||
|
colmeth => $colmeth,
|
||||||
|
encmeth => $encmeth,
|
||||||
|
xmlvmeth => $xmlvmeth,
|
||||||
|
}, $class;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub FETCH {
|
||||||
|
my ($self, $key) = @_;
|
||||||
|
my $method;
|
||||||
|
if ($key eq 'PublicId') {
|
||||||
|
$method = $self->{pubmeth};
|
||||||
|
}
|
||||||
|
elsif ($key eq 'SystemId') {
|
||||||
|
$method = $self->{sysmeth};
|
||||||
|
}
|
||||||
|
elsif ($key eq 'LineNumber') {
|
||||||
|
$method = $self->{linemeth};
|
||||||
|
}
|
||||||
|
elsif ($key eq 'ColumnNumber') {
|
||||||
|
$method = $self->{colmeth};
|
||||||
|
}
|
||||||
|
elsif ($key eq 'Encoding') {
|
||||||
|
$method = $self->{encmeth};
|
||||||
|
}
|
||||||
|
elsif ($key eq 'XMLVersion') {
|
||||||
|
$method = $self->{xmlvmeth};
|
||||||
|
}
|
||||||
|
if ($method) {
|
||||||
|
my $value = $method->($key);
|
||||||
|
return $value;
|
||||||
|
}
|
||||||
|
return undef;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub EXISTS {
|
||||||
|
my ($self, $key) = @_;
|
||||||
|
if ($key =~ /^(PublicId|SystemId|LineNumber|ColumnNumber|Encoding|XMLVersion)$/) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub STORE {
|
||||||
|
my ($self, $key, $value) = @_;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub DELETE {
|
||||||
|
my ($self, $key) = @_;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub CLEAR {
|
||||||
|
my ($self) = @_;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub FIRSTKEY {
|
||||||
|
my ($self) = @_;
|
||||||
|
# assignment resets.
|
||||||
|
$self->{keys} = {
|
||||||
|
PublicId => 1,
|
||||||
|
SystemId => 1,
|
||||||
|
LineNumber => 1,
|
||||||
|
ColumnNumber => 1,
|
||||||
|
Encoding => 1,
|
||||||
|
XMLVersion => 1,
|
||||||
|
};
|
||||||
|
return each %{$self->{keys}};
|
||||||
|
}
|
||||||
|
|
||||||
|
sub NEXTKEY {
|
||||||
|
my ($self, $lastkey) = @_;
|
||||||
|
return each %{$self->{keys}};
|
||||||
|
}
|
||||||
|
|
||||||
|
1;
|
||||||
|
__END__
|
||||||
|
|
||||||
|
=head1 NAME
|
||||||
|
|
||||||
|
XML::SAX::DocumentLocator - Helper class for document locators
|
||||||
|
|
||||||
|
=head1 SYNOPSIS
|
||||||
|
|
||||||
|
my $locator = XML::SAX::DocumentLocator->new(
|
||||||
|
sub { $object->get_public_id },
|
||||||
|
sub { $object->get_system_id },
|
||||||
|
sub { $reader->current_line },
|
||||||
|
sub { $reader->current_column },
|
||||||
|
sub { $reader->get_encoding },
|
||||||
|
sub { $reader->get_xml_version },
|
||||||
|
);
|
||||||
|
|
||||||
|
=head1 DESCRIPTION
|
||||||
|
|
||||||
|
This module gives you a tied hash reference that calls the
|
||||||
|
specified closures when asked for PublicId, SystemId,
|
||||||
|
LineNumber and ColumnNumber.
|
||||||
|
|
||||||
|
It is useful for writing SAX Parsers so that you don't have
|
||||||
|
to constantly update the line numbers in a hash reference on
|
||||||
|
the object you pass to set_document_locator(). See the source
|
||||||
|
code for XML::SAX::PurePerl for a usage example.
|
||||||
|
|
||||||
|
=head1 API
|
||||||
|
|
||||||
|
There is only 1 method: C<new>. Simply pass it a list of
|
||||||
|
closures that when called will return the PublicId, the
|
||||||
|
SystemId, the LineNumber, the ColumnNumber, the Encoding
|
||||||
|
and the XMLVersion respectively.
|
||||||
|
|
||||||
|
The closures are passed a single parameter, the key being
|
||||||
|
requested. But you're free to ignore that.
|
||||||
|
|
||||||
|
=cut
|
||||||
|
|
||||||
@@ -0,0 +1,129 @@
|
|||||||
|
package XML::SAX::Exception;
|
||||||
|
BEGIN {
|
||||||
|
$XML::SAX::Exception::VERSION = '1.08';
|
||||||
|
}
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
|
||||||
|
use overload '""' => "stringify",
|
||||||
|
'fallback' => 1;
|
||||||
|
|
||||||
|
use vars qw($StackTrace);
|
||||||
|
|
||||||
|
use Carp;
|
||||||
|
|
||||||
|
$StackTrace = $ENV{XML_DEBUG} || 0;
|
||||||
|
|
||||||
|
# Other exception classes:
|
||||||
|
|
||||||
|
@XML::SAX::Exception::NotRecognized::ISA = ('XML::SAX::Exception');
|
||||||
|
@XML::SAX::Exception::NotSupported::ISA = ('XML::SAX::Exception');
|
||||||
|
@XML::SAX::Exception::Parse::ISA = ('XML::SAX::Exception');
|
||||||
|
|
||||||
|
|
||||||
|
sub throw {
|
||||||
|
my $class = shift;
|
||||||
|
if (ref($class)) {
|
||||||
|
die $class;
|
||||||
|
}
|
||||||
|
die $class->new(@_);
|
||||||
|
}
|
||||||
|
|
||||||
|
sub new {
|
||||||
|
my $class = shift;
|
||||||
|
my %opts = @_;
|
||||||
|
confess "Invalid options: " . join(', ', keys %opts) unless exists $opts{Message};
|
||||||
|
|
||||||
|
bless { ($StackTrace ? (StackTrace => stacktrace()) : ()), %opts },
|
||||||
|
$class;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub stringify {
|
||||||
|
my $self = shift;
|
||||||
|
local $^W;
|
||||||
|
my $error;
|
||||||
|
if (exists $self->{LineNumber}) {
|
||||||
|
$error = $self->{Message} . " [Ln: " . $self->{LineNumber} .
|
||||||
|
", Col: " . $self->{ColumnNumber} . "]";
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
$error = $self->{Message};
|
||||||
|
}
|
||||||
|
if ($StackTrace) {
|
||||||
|
$error .= stackstring($self->{StackTrace});
|
||||||
|
}
|
||||||
|
$error .= "\n";
|
||||||
|
return $error;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub stacktrace {
|
||||||
|
my $i = 2;
|
||||||
|
my @fulltrace;
|
||||||
|
while (my @trace = caller($i++)) {
|
||||||
|
my %hash;
|
||||||
|
@hash{qw(Package Filename Line)} = @trace[0..2];
|
||||||
|
push @fulltrace, \%hash;
|
||||||
|
}
|
||||||
|
return \@fulltrace;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub stackstring {
|
||||||
|
my $stacktrace = shift;
|
||||||
|
my $string = "\nFrom:\n";
|
||||||
|
foreach my $current (@$stacktrace) {
|
||||||
|
$string .= $current->{Filename} . " Line: " . $current->{Line} . "\n";
|
||||||
|
}
|
||||||
|
return $string;
|
||||||
|
}
|
||||||
|
|
||||||
|
1;
|
||||||
|
|
||||||
|
__END__
|
||||||
|
|
||||||
|
=head1 NAME
|
||||||
|
|
||||||
|
XML::SAX::Exception - Exception classes for XML::SAX
|
||||||
|
|
||||||
|
=head1 SYNOPSIS
|
||||||
|
|
||||||
|
throw XML::SAX::Exception::NotSupported(
|
||||||
|
Message => "The foo feature is not supported",
|
||||||
|
);
|
||||||
|
|
||||||
|
=head1 DESCRIPTION
|
||||||
|
|
||||||
|
This module is the base class for all SAX Exceptions, those defined in
|
||||||
|
the spec as well as those that one may create for one's own SAX errors.
|
||||||
|
|
||||||
|
There are three subclasses included, corresponding to those of the SAX
|
||||||
|
spec:
|
||||||
|
|
||||||
|
XML::SAX::Exception::NotSupported
|
||||||
|
XML::SAX::Exception::NotRecognized
|
||||||
|
XML::SAX::Exception::Parse
|
||||||
|
|
||||||
|
Use them wherever you want, and as much as possible when you encounter
|
||||||
|
such errors. SAX is meant to use exceptions as much as possible to
|
||||||
|
flag problems.
|
||||||
|
|
||||||
|
=head1 CREATING NEW EXCEPTION CLASSES
|
||||||
|
|
||||||
|
All you need to do to create a new exception class is:
|
||||||
|
|
||||||
|
@XML::SAX::Exception::MyException::ISA = ('XML::SAX::Exception')
|
||||||
|
|
||||||
|
The given package doesn't need to exist, it'll behave correctly this
|
||||||
|
way. If your exception refines an existing exception class, then you
|
||||||
|
may also inherit from that instead of from the base class.
|
||||||
|
|
||||||
|
=head1 THROWING EXCEPTIONS
|
||||||
|
|
||||||
|
This is as simple as exemplified in the SYNOPSIS. In fact, there's
|
||||||
|
nothing more to know. All you have to do is:
|
||||||
|
|
||||||
|
throw XML::SAX::Exception::MyException( Message => 'Something went wrong' );
|
||||||
|
|
||||||
|
and voila, you've thrown an exception which can be caught in an eval block.
|
||||||
|
|
||||||
|
=cut
|
||||||
|
|
||||||
@@ -0,0 +1,407 @@
|
|||||||
|
=head1 NAME
|
||||||
|
|
||||||
|
XML::SAX::Intro - An Introduction to SAX Parsing with Perl
|
||||||
|
|
||||||
|
=head1 Introduction
|
||||||
|
|
||||||
|
XML::SAX is a new way to work with XML Parsers in Perl. In this article
|
||||||
|
we'll discuss why you should be using SAX, why you should be using
|
||||||
|
XML::SAX, and we'll see some of the finer implementation details. The
|
||||||
|
text below assumes some familiarity with callback, or push based
|
||||||
|
parsing, but if you are unfamiliar with these techniques then a good
|
||||||
|
place to start is Kip Hampton's excellent series of articles on XML.com.
|
||||||
|
|
||||||
|
=head1 Replacing XML::Parser
|
||||||
|
|
||||||
|
The de-facto way of parsing XML under perl is to use Larry Wall and
|
||||||
|
Clark Cooper's XML::Parser. This module is a Perl and XS wrapper around
|
||||||
|
the expat XML parser library by James Clark. It has been a hugely
|
||||||
|
successful project, but suffers from a couple of rather major flaws.
|
||||||
|
Firstly it is a proprietary API, designed before the SAX API was
|
||||||
|
conceived, which means that it is not easily replaceable by other
|
||||||
|
streaming parsers. Secondly it's callbacks are subrefs. This doesn't
|
||||||
|
sound like much of an issue, but unfortunately leads to code like:
|
||||||
|
|
||||||
|
sub handle_start {
|
||||||
|
my ($e, $el, %attrs) = @_;
|
||||||
|
if ($el eq 'foo') {
|
||||||
|
$e->{inside_foo}++; # BAD! $e is an XML::Parser::Expat object.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
As you can see, we're using the $e object to hold our state
|
||||||
|
information, which is a bad idea because we don't own that object - we
|
||||||
|
didn't create it. It's an internal object of XML::Parser, that happens
|
||||||
|
to be a hashref. We could all too easily overwrite XML::Parser internal
|
||||||
|
state variables by using this, or Clark could change it to an array ref
|
||||||
|
(not that he would, because it would break so much code, but he could).
|
||||||
|
|
||||||
|
The only way currently with XML::Parser to safely maintain state is to
|
||||||
|
use a closure:
|
||||||
|
|
||||||
|
my $state = MyState->new();
|
||||||
|
$parser->setHandlers(Start => sub { handle_start($state, @_) });
|
||||||
|
|
||||||
|
This closure traps the $state variable, which now gets passed as the
|
||||||
|
first parameter to your callback. Unfortunately very few people use
|
||||||
|
this technique, as it is not documented in the XML::Parser POD files.
|
||||||
|
|
||||||
|
Another reason you might not want to use XML::Parser is because you
|
||||||
|
need some feature that it doesn't provide (such as validation), or you
|
||||||
|
might need to use a library that doesn't use expat, due to it not being
|
||||||
|
installed on your system, or due to having a restrictive ISP. Using SAX
|
||||||
|
allows you to work around these restrictions.
|
||||||
|
|
||||||
|
=head1 Introducing SAX
|
||||||
|
|
||||||
|
SAX stands for the Simple API for XML. And simple it really is.
|
||||||
|
Constructing a SAX parser and passing events to handlers is done as
|
||||||
|
simply as:
|
||||||
|
|
||||||
|
use XML::SAX;
|
||||||
|
use MySAXHandler;
|
||||||
|
|
||||||
|
my $parser = XML::SAX::ParserFactory->parser(
|
||||||
|
Handler => MySAXHandler->new
|
||||||
|
);
|
||||||
|
|
||||||
|
$parser->parse_uri("foo.xml");
|
||||||
|
|
||||||
|
The important concept to grasp here is that SAX uses a factory class
|
||||||
|
called XML::SAX::ParserFactory to create a new parser instance. The
|
||||||
|
reason for this is so that you can support other underlying
|
||||||
|
parser implementations for different feature sets. This is one thing
|
||||||
|
that XML::Parser has always sorely lacked.
|
||||||
|
|
||||||
|
In the code above we see the parse_uri method used, but we could
|
||||||
|
have equally well
|
||||||
|
called parse_file, parse_string, or parse(). Please see XML::SAX::Base
|
||||||
|
for what these methods take as parameters, but don't be fooled into
|
||||||
|
believing parse_file takes a filename. No, it takes a file handle, a
|
||||||
|
glob, or a subclass of IO::Handle. Beware.
|
||||||
|
|
||||||
|
SAX works very similarly to XML::Parser's default callback method,
|
||||||
|
except it has one major difference: rather than setting individual
|
||||||
|
callbacks, you create a new class in which to recieve the callbacks.
|
||||||
|
Each callback is called as a method call on an instance of that handler
|
||||||
|
class. An example will best demonstrate this:
|
||||||
|
|
||||||
|
package MySAXHandler;
|
||||||
|
use base qw(XML::SAX::Base);
|
||||||
|
|
||||||
|
sub start_document {
|
||||||
|
my ($self, $doc) = @_;
|
||||||
|
# process document start event
|
||||||
|
}
|
||||||
|
|
||||||
|
sub start_element {
|
||||||
|
my ($self, $el) = @_;
|
||||||
|
# process element start event
|
||||||
|
}
|
||||||
|
|
||||||
|
Now, when we instantiate this as above, and parse some XML with this as
|
||||||
|
the handler, the methods start_document and start_element will be
|
||||||
|
called as method calls, so this would be the equivalent of directly
|
||||||
|
calling:
|
||||||
|
|
||||||
|
$object->start_element($el);
|
||||||
|
|
||||||
|
Notice how this is different to XML::Parser's calling style, which
|
||||||
|
calls:
|
||||||
|
|
||||||
|
start_element($e, $name, %attribs);
|
||||||
|
|
||||||
|
It's the difference between function calling and method calling which
|
||||||
|
allows you to subclass SAX handlers which contributes to SAX being a
|
||||||
|
powerful solution.
|
||||||
|
|
||||||
|
As you can see, unlike XML::Parser, we have to define a new package in
|
||||||
|
which to do our processing (there are hacks you can do to make this
|
||||||
|
uneccessary, but I'll leave figuring those out to the experts). The
|
||||||
|
biggest benefit of this is that you maintain your own state variable
|
||||||
|
($self in the above example) thus freeing you of the concerns listed
|
||||||
|
above. It is also an improvement in maintainability - you can place the
|
||||||
|
code in a separate file if you wish to, and your callback methods are
|
||||||
|
always called the same thing, rather than having to choose a suitable
|
||||||
|
name for them as you had to with XML::Parser. This is an obvious win.
|
||||||
|
|
||||||
|
SAX parsers are also very flexible in how you pass a handler to them.
|
||||||
|
You can use a constructor parameter as we saw above, or we can pass the
|
||||||
|
handler directly in the call to one of the parse methods:
|
||||||
|
|
||||||
|
$parser->parse(Handler => $handler,
|
||||||
|
Source => { SystemId => "foo.xml" });
|
||||||
|
# or...
|
||||||
|
$parser->parse_file($fh, Handler => $handler);
|
||||||
|
|
||||||
|
This flexibility allows for one parser to be used in many different
|
||||||
|
scenarios throughout your script (though one shouldn't feel pressure to
|
||||||
|
use this method, as parser construction is generally not a time
|
||||||
|
consuming process).
|
||||||
|
|
||||||
|
=head1 Callback Parameters
|
||||||
|
|
||||||
|
The only other thing you need to know to understand basic SAX is the
|
||||||
|
structure of the parameters passed to each of the callbacks. In
|
||||||
|
XML::Parser, all parameters are passed as multiple options to the
|
||||||
|
callbacks, so for example the Start callback would be called as
|
||||||
|
my_start($e, $name, %attributes), and the PI callback would be called
|
||||||
|
as my_processing_instruction($e, $target, $data). In SAX, every
|
||||||
|
callback is passed a hash reference, containing entries that define our
|
||||||
|
"node". The key callbacks and the structures they receive are:
|
||||||
|
|
||||||
|
=head2 start_element
|
||||||
|
|
||||||
|
The start_element handler is called whenever a parser sees an opening
|
||||||
|
tag. It is passed an element structure consisting of:
|
||||||
|
|
||||||
|
=over 4
|
||||||
|
|
||||||
|
=item LocalName
|
||||||
|
|
||||||
|
The name of the element minus any namespace prefix it may
|
||||||
|
have come with in the document.
|
||||||
|
|
||||||
|
=item NamespaceURI
|
||||||
|
|
||||||
|
The URI of the namespace associated with this element,
|
||||||
|
or the empty string for none.
|
||||||
|
|
||||||
|
=item Attributes
|
||||||
|
|
||||||
|
A set of attributes as described below.
|
||||||
|
|
||||||
|
=item Name
|
||||||
|
|
||||||
|
The name of the element as it was seen in the document (i.e.
|
||||||
|
including any prefix associated with it)
|
||||||
|
|
||||||
|
=item Prefix
|
||||||
|
|
||||||
|
The prefix used to qualify this element's namespace, or the
|
||||||
|
empty string if none.
|
||||||
|
|
||||||
|
=back
|
||||||
|
|
||||||
|
The B<Attributes> are a hash reference, keyed by what we have called
|
||||||
|
"James Clark" notation. This means that the attribute name has been
|
||||||
|
expanded to include any associated namespace URI, and put together as
|
||||||
|
{ns}name, where "ns" is the expanded namespace URI of the attribute if
|
||||||
|
and only if the attribute had a prefix, and "name" is the LocalName of
|
||||||
|
the attribute.
|
||||||
|
|
||||||
|
The value of each entry in the attributes hash is another hash
|
||||||
|
structure consisting of:
|
||||||
|
|
||||||
|
=over 4
|
||||||
|
|
||||||
|
=item LocalName
|
||||||
|
|
||||||
|
The name of the attribute minus any namespace prefix it may have
|
||||||
|
come with in the document.
|
||||||
|
|
||||||
|
=item NamespaceURI
|
||||||
|
|
||||||
|
The URI of the namespace associated with this attribute. If the
|
||||||
|
attribute had no prefix, then this consists of just the empty string.
|
||||||
|
|
||||||
|
=item Name
|
||||||
|
|
||||||
|
The attribute's name as it appeared in the document, including any
|
||||||
|
namespace prefix.
|
||||||
|
|
||||||
|
=item Prefix
|
||||||
|
|
||||||
|
The prefix used to qualify this attribute's namepace, or the
|
||||||
|
empty string if none.
|
||||||
|
|
||||||
|
=item Value
|
||||||
|
|
||||||
|
The value of the attribute.
|
||||||
|
|
||||||
|
=back
|
||||||
|
|
||||||
|
So a full example, as output by Data::Dumper might be:
|
||||||
|
|
||||||
|
....
|
||||||
|
|
||||||
|
=head2 end_element
|
||||||
|
|
||||||
|
The end_element handler is called either when a parser sees a closing
|
||||||
|
tag, or after start_element has been called for an empty element (do
|
||||||
|
note however that a parser may if it is so inclined call characters
|
||||||
|
with an empty string when it sees an empty element. There is no simple
|
||||||
|
way in SAX to determine if the parser in fact saw an empty element, a
|
||||||
|
start and end element with no content..
|
||||||
|
|
||||||
|
The end_element handler receives exactly the same structure as
|
||||||
|
start_element, minus the Attributes entry. One must note though that it
|
||||||
|
should not be a reference to the same data as start_element receives,
|
||||||
|
so you may change the values in start_element but this will not affect
|
||||||
|
the values later seen by end_element.
|
||||||
|
|
||||||
|
=head2 characters
|
||||||
|
|
||||||
|
The characters callback may be called in serveral circumstances. The
|
||||||
|
most obvious one is when seeing ordinary character data in the markup.
|
||||||
|
But it is also called for text in a CDATA section, and is also called
|
||||||
|
in other situations. A SAX parser has to make no guarantees whatsoever
|
||||||
|
about how many times it may call characters for a stretch of text in an
|
||||||
|
XML document - it may call once, or it may call once for every
|
||||||
|
character in the text. In order to work around this it is often
|
||||||
|
important for the SAX developer to use a bundling technique, where text
|
||||||
|
is gathered up and processed in one of the other callbacks. This is not
|
||||||
|
always necessary, but it is a worthwhile technique to learn, which we
|
||||||
|
will cover in XML::SAX::Advanced (when I get around to writing it).
|
||||||
|
|
||||||
|
The characters handler is called with a very simple structure - a hash
|
||||||
|
reference consisting of just one entry:
|
||||||
|
|
||||||
|
=over 4
|
||||||
|
|
||||||
|
=item Data
|
||||||
|
|
||||||
|
The text data that was received.
|
||||||
|
|
||||||
|
=back
|
||||||
|
|
||||||
|
=head2 comment
|
||||||
|
|
||||||
|
The comment callback is called for comment text. Unlike with
|
||||||
|
C<characters()>, the comment callback *must* be invoked just once for an
|
||||||
|
entire comment string. It receives a single simple structure - a hash
|
||||||
|
reference containing just one entry:
|
||||||
|
|
||||||
|
=over 4
|
||||||
|
|
||||||
|
=item Data
|
||||||
|
|
||||||
|
The text of the comment.
|
||||||
|
|
||||||
|
=back
|
||||||
|
|
||||||
|
=head2 processing_instruction
|
||||||
|
|
||||||
|
The processing instruction handler is called for all processing
|
||||||
|
instructions in the document. Note that these processing instructions
|
||||||
|
may appear before the document root element, or after it, or anywhere
|
||||||
|
where text and elements would normally appear within the document,
|
||||||
|
according to the XML specification.
|
||||||
|
|
||||||
|
The handler is passed a structure containing just two entries:
|
||||||
|
|
||||||
|
=over 4
|
||||||
|
|
||||||
|
=item Target
|
||||||
|
|
||||||
|
The target of the processing instrcution
|
||||||
|
|
||||||
|
=item Data
|
||||||
|
|
||||||
|
The text data in the processing instruction. Can be an empty
|
||||||
|
string for a processing instruction that has no data element.
|
||||||
|
For example E<lt>?wiggle?E<gt> is a perfectly valid processing instruction.
|
||||||
|
|
||||||
|
=back
|
||||||
|
|
||||||
|
=head1 Tip of the iceberg
|
||||||
|
|
||||||
|
What we have discussed above is really the tip of the SAX iceberg. And
|
||||||
|
so far it looks like there's not much of interest to SAX beyond what we
|
||||||
|
have seen with XML::Parser. But it does go much further than that, I
|
||||||
|
promise.
|
||||||
|
|
||||||
|
People who hate Object Oriented code for the sake of it may be thinking
|
||||||
|
here that creating a new package just to parse something is a waste
|
||||||
|
when they've been parsing things just fine up to now using procedural
|
||||||
|
code. But there's reason to all this madness. And that reason is SAX
|
||||||
|
Filters.
|
||||||
|
|
||||||
|
As you saw right at the very start, to let the parser know about our
|
||||||
|
class, we pass it an instance of our class as the Handler to the
|
||||||
|
parser. But now imagine what would happen if our class could also take
|
||||||
|
a Handler option, and simply do some processing and pass on our data
|
||||||
|
further down the line? That in a nutshell is how SAX filters work. It's
|
||||||
|
Unix pipes for the 21st century!
|
||||||
|
|
||||||
|
There are two downsides to this. Number 1 - writing SAX filters can be
|
||||||
|
tricky. If you look into the future and read the advanced tutorial I'm
|
||||||
|
writing, you'll see that Handler can come in several shapes and sizes.
|
||||||
|
So making sure your filter does the right thing can be tricky.
|
||||||
|
Secondly, constructing complex filter chains can be difficult, and
|
||||||
|
simple thinking tells us that we only get one pass at our document,
|
||||||
|
when often we'll need more than that.
|
||||||
|
|
||||||
|
Luckily though, those downsides have been fixed by the release of two
|
||||||
|
very cool modules. What's even better is that I didn't write either of
|
||||||
|
them!
|
||||||
|
|
||||||
|
The first module is XML::SAX::Base. This is a VITAL SAX module that
|
||||||
|
acts as a base class for all SAX parsers and filters. It provides an
|
||||||
|
abstraction away from calling the handler methods, that makes sure your
|
||||||
|
filter or parser does the right thing, and it does it FAST. So, if you
|
||||||
|
ever need to write a SAX filter, which if you're processing XML -> XML,
|
||||||
|
or XML -> HTML, then you probably do, then you need to be writing it as
|
||||||
|
a subclass of XML::SAX::Base. Really - this is advice not to ignore
|
||||||
|
lightly. I will not go into the details of writing a SAX filter here.
|
||||||
|
Kip Hampton, the author of XML::SAX::Base has covered this nicely in
|
||||||
|
his article on XML.com here <URI>.
|
||||||
|
|
||||||
|
To construct SAX pipelines, Barrie Slaymaker, a long time Perl hacker
|
||||||
|
whose modules you will probably have heard of or used, wrote a very
|
||||||
|
clever module called XML::SAX::Machines. This combines some really
|
||||||
|
clever SAX filter-type modules, with a construction toolkit for filters
|
||||||
|
that makes building pipelines easy. But before we see how it makes
|
||||||
|
things easy, first lets see how tricky it looks to build complex SAX
|
||||||
|
filter pipelines.
|
||||||
|
|
||||||
|
use XML::SAX::ParserFactory;
|
||||||
|
use XML::Filter::Filter1;
|
||||||
|
use XML::Filter::Filter2;
|
||||||
|
use XML::SAX::Writer;
|
||||||
|
|
||||||
|
my $output_string;
|
||||||
|
my $writer = XML::SAX::Writer->new(Output => \$output_string);
|
||||||
|
my $filter2 = XML::SAX::Filter2->new(Handler => $writer);
|
||||||
|
my $filter1 = XML::SAX::Filter1->new(Handler => $filter2);
|
||||||
|
my $parser = XML::SAX::ParserFactory->parser(Handler => $filter1);
|
||||||
|
|
||||||
|
$parser->parse_uri("foo.xml");
|
||||||
|
|
||||||
|
This is a lot easier with XML::SAX::Machines:
|
||||||
|
|
||||||
|
use XML::SAX::Machines qw(Pipeline);
|
||||||
|
|
||||||
|
my $output_string;
|
||||||
|
my $parser = Pipeline(
|
||||||
|
XML::SAX::Filter1 => XML::SAX::Filter2 => \$output_string
|
||||||
|
);
|
||||||
|
|
||||||
|
$parser->parse_uri("foo.xml");
|
||||||
|
|
||||||
|
One of the main benefits of XML::SAX::Machines is that the pipelines
|
||||||
|
are constructed in natural order, rather than the reverse order we saw
|
||||||
|
with manual pipeline construction. XML::SAX::Machines takes care of all
|
||||||
|
the internals of pipe construction, providing you at the end with just
|
||||||
|
a parser you can use (and you can re-use the same parser as many times
|
||||||
|
as you need to).
|
||||||
|
|
||||||
|
Just a final tip. If you ever get stuck and are confused about what is
|
||||||
|
being passed from one SAX filter or parser to the next, then
|
||||||
|
Devel::TraceSAX will come to your rescue. This perl debugger plugin
|
||||||
|
will allow you to dump the SAX stream of events as it goes by. Usage is
|
||||||
|
really very simple just call your perl script that uses SAX as follows:
|
||||||
|
|
||||||
|
$ perl -d:TraceSAX <scriptname>
|
||||||
|
|
||||||
|
And preferably pipe the output to a pager of some sort, such as more or
|
||||||
|
less. The output is extremely verbose, but should help clear some
|
||||||
|
issues up.
|
||||||
|
|
||||||
|
=head1 AUTHOR
|
||||||
|
|
||||||
|
Matt Sergeant, matt@sergeant.org
|
||||||
|
|
||||||
|
$Id$
|
||||||
|
|
||||||
|
=cut
|
||||||
@@ -0,0 +1,230 @@
|
|||||||
|
# $Id$
|
||||||
|
|
||||||
|
package XML::SAX::ParserFactory;
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
use vars qw($VERSION);
|
||||||
|
|
||||||
|
$VERSION = '1.01';
|
||||||
|
|
||||||
|
use Symbol qw(gensym);
|
||||||
|
use XML::SAX;
|
||||||
|
use XML::SAX::Exception;
|
||||||
|
|
||||||
|
sub new {
|
||||||
|
my $class = shift;
|
||||||
|
my %params = @_; # TODO : Fix this in spec.
|
||||||
|
my $self = bless \%params, $class;
|
||||||
|
$self->{KnownParsers} = XML::SAX->parsers();
|
||||||
|
return $self;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub parser {
|
||||||
|
my $self = shift;
|
||||||
|
my @parser_params = @_;
|
||||||
|
if (!ref($self)) {
|
||||||
|
$self = $self->new();
|
||||||
|
}
|
||||||
|
|
||||||
|
my $parser_class = $self->_parser_class();
|
||||||
|
|
||||||
|
my $version = '';
|
||||||
|
if ($parser_class =~ s/\s*\(([\d\.]+)\)\s*$//) {
|
||||||
|
$version = " $1";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!$parser_class->can('new')) {
|
||||||
|
eval "require $parser_class $version;";
|
||||||
|
die $@ if $@;
|
||||||
|
}
|
||||||
|
|
||||||
|
return $parser_class->new(@parser_params);
|
||||||
|
}
|
||||||
|
|
||||||
|
sub require_feature {
|
||||||
|
my $self = shift;
|
||||||
|
my ($feature) = @_;
|
||||||
|
$self->{RequiredFeatures}{$feature}++;
|
||||||
|
return $self;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub _parser_class {
|
||||||
|
my $self = shift;
|
||||||
|
|
||||||
|
# First try ParserPackage
|
||||||
|
if ($XML::SAX::ParserPackage) {
|
||||||
|
return $XML::SAX::ParserPackage;
|
||||||
|
}
|
||||||
|
|
||||||
|
# Now check if required/preferred is there
|
||||||
|
if ($self->{RequiredFeatures}) {
|
||||||
|
my %required = %{$self->{RequiredFeatures}};
|
||||||
|
# note - we never go onto the next try (ParserDetails.ini),
|
||||||
|
# because if we can't provide the requested feature
|
||||||
|
# we need to throw an exception.
|
||||||
|
PARSER:
|
||||||
|
foreach my $parser (reverse @{$self->{KnownParsers}}) {
|
||||||
|
foreach my $feature (keys %required) {
|
||||||
|
if (!exists $parser->{Features}{$feature}) {
|
||||||
|
next PARSER;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
# got here - all features must exist!
|
||||||
|
return $parser->{Name};
|
||||||
|
}
|
||||||
|
# TODO : should this be NotSupported() ?
|
||||||
|
throw XML::SAX::Exception (
|
||||||
|
Message => "Unable to provide required features",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
# Next try SAX.ini
|
||||||
|
for my $dir (@INC) {
|
||||||
|
my $fh = gensym();
|
||||||
|
if (open($fh, "$dir/SAX.ini")) {
|
||||||
|
my $param_list = XML::SAX->_parse_ini_file($fh);
|
||||||
|
my $params = $param_list->[0]->{Features};
|
||||||
|
if ($params->{ParserPackage}) {
|
||||||
|
return $params->{ParserPackage};
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
# we have required features (or nothing?)
|
||||||
|
PARSER:
|
||||||
|
foreach my $parser (reverse @{$self->{KnownParsers}}) {
|
||||||
|
foreach my $feature (keys %$params) {
|
||||||
|
if (!exists $parser->{Features}{$feature}) {
|
||||||
|
next PARSER;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return $parser->{Name};
|
||||||
|
}
|
||||||
|
XML::SAX->do_warn("Unable to provide SAX.ini required features. Using fallback\n");
|
||||||
|
}
|
||||||
|
last; # stop after first INI found
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (@{$self->{KnownParsers}}) {
|
||||||
|
return $self->{KnownParsers}[-1]{Name};
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return "XML::SAX::PurePerl"; # backup plan!
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
1;
|
||||||
|
__END__
|
||||||
|
|
||||||
|
=head1 NAME
|
||||||
|
|
||||||
|
XML::SAX::ParserFactory - Obtain a SAX parser
|
||||||
|
|
||||||
|
=head1 SYNOPSIS
|
||||||
|
|
||||||
|
use XML::SAX::ParserFactory;
|
||||||
|
use XML::SAX::XYZHandler;
|
||||||
|
my $handler = XML::SAX::XYZHandler->new();
|
||||||
|
my $p = XML::SAX::ParserFactory->parser(Handler => $handler);
|
||||||
|
$p->parse_uri("foo.xml");
|
||||||
|
# or $p->parse_string("<foo/>") or $p->parse_file($fh);
|
||||||
|
|
||||||
|
=head1 DESCRIPTION
|
||||||
|
|
||||||
|
XML::SAX::ParserFactory is a factory class for providing an application
|
||||||
|
with a Perl SAX2 XML parser. It is akin to DBI - a front end for other
|
||||||
|
parser classes. Each new SAX2 parser installed will register itself
|
||||||
|
with XML::SAX, and then it will become available to all applications
|
||||||
|
that use XML::SAX::ParserFactory to obtain a SAX parser.
|
||||||
|
|
||||||
|
Unlike DBI however, XML/SAX parsers almost all work alike (especially
|
||||||
|
if they subclass XML::SAX::Base, as they should), so rather than
|
||||||
|
specifying the parser you want in the call to C<parser()>, XML::SAX
|
||||||
|
has several ways to automatically choose which parser to use:
|
||||||
|
|
||||||
|
=over 4
|
||||||
|
|
||||||
|
=item * $XML::SAX::ParserPackage
|
||||||
|
|
||||||
|
If this package variable is set, then this package is C<require()>d
|
||||||
|
and an instance of this package is returned by calling the C<new()>
|
||||||
|
class method in that package. If it cannot be loaded or there is
|
||||||
|
an error, an exception will be thrown. The variable can also contain
|
||||||
|
a version number:
|
||||||
|
|
||||||
|
$XML::SAX::ParserPackage = "XML::SAX::Expat (0.72)";
|
||||||
|
|
||||||
|
And the number will be treated as a minimum version number.
|
||||||
|
|
||||||
|
=item * Required features
|
||||||
|
|
||||||
|
It is possible to require features from the parsers. For example, you
|
||||||
|
may wish for a parser that supports validation via a DTD. To do that,
|
||||||
|
use the following code:
|
||||||
|
|
||||||
|
use XML::SAX::ParserFactory;
|
||||||
|
my $factory = XML::SAX::ParserFactory->new();
|
||||||
|
$factory->require_feature('http://xml.org/sax/features/validation');
|
||||||
|
my $parser = $factory->parser(...);
|
||||||
|
|
||||||
|
Alternatively, specify the required features in the call to the
|
||||||
|
ParserFactory constructor:
|
||||||
|
|
||||||
|
my $factory = XML::SAX::ParserFactory->new(
|
||||||
|
RequiredFeatures => {
|
||||||
|
'http://xml.org/sax/features/validation' => 1,
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
If the features you have asked for are unavailable (for example the
|
||||||
|
user might not have a validating parser installed), then an
|
||||||
|
exception will be thrown.
|
||||||
|
|
||||||
|
The list of known parsers is searched in reverse order, so it will
|
||||||
|
always return the last installed parser that supports all of your
|
||||||
|
requested features (Note: this is subject to change if someone
|
||||||
|
comes up with a better way of making this work).
|
||||||
|
|
||||||
|
=item * SAX.ini
|
||||||
|
|
||||||
|
ParserFactory will search @INC for a file called SAX.ini, which
|
||||||
|
is in a simple format:
|
||||||
|
|
||||||
|
# a comment looks like this,
|
||||||
|
; or like this, and are stripped anywhere in the file
|
||||||
|
key = value # SAX.in contains key/value pairs.
|
||||||
|
|
||||||
|
All whitespace is non-significant.
|
||||||
|
|
||||||
|
This file can contain either a line:
|
||||||
|
|
||||||
|
ParserPackage = MyParserModule (1.02)
|
||||||
|
|
||||||
|
Where MyParserModule is the module to load and use for the parser,
|
||||||
|
and the number in brackets is a minimum version to load.
|
||||||
|
|
||||||
|
Or you can list required features:
|
||||||
|
|
||||||
|
http://xml.org/sax/features/validation = 1
|
||||||
|
|
||||||
|
And each feature with a true value will be required.
|
||||||
|
|
||||||
|
=item * Fallback
|
||||||
|
|
||||||
|
If none of the above works, the last parser installed on the user's
|
||||||
|
system will be used. The XML::SAX package ships with a pure perl
|
||||||
|
XML parser, XML::SAX::PurePerl, so that there will always be a
|
||||||
|
fallback parser.
|
||||||
|
|
||||||
|
=back
|
||||||
|
|
||||||
|
=head1 AUTHOR
|
||||||
|
|
||||||
|
Matt Sergeant, matt@sergeant.org
|
||||||
|
|
||||||
|
=head1 LICENSE
|
||||||
|
|
||||||
|
This is free software, you may use it and distribute it under the same
|
||||||
|
terms as Perl itself.
|
||||||
|
|
||||||
|
=cut
|
||||||
|
|
||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,95 @@
|
|||||||
|
# $Id$
|
||||||
|
|
||||||
|
package XML::SAX::PurePerl::DebugHandler;
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
|
||||||
|
sub new {
|
||||||
|
my $class = shift;
|
||||||
|
my %opts = @_;
|
||||||
|
return bless \%opts, $class;
|
||||||
|
}
|
||||||
|
|
||||||
|
# DocumentHandler
|
||||||
|
|
||||||
|
sub set_document_locator {
|
||||||
|
my $self = shift;
|
||||||
|
print "set_document_locator\n" if $ENV{DEBUG_XML};
|
||||||
|
$self->{seen}{set_document_locator}++;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub start_document {
|
||||||
|
my $self = shift;
|
||||||
|
print "start_document\n" if $ENV{DEBUG_XML};
|
||||||
|
$self->{seen}{start_document}++;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub end_document {
|
||||||
|
my $self = shift;
|
||||||
|
print "end_document\n" if $ENV{DEBUG_XML};
|
||||||
|
$self->{seen}{end_document}++;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub start_element {
|
||||||
|
my $self = shift;
|
||||||
|
print "start_element\n" if $ENV{DEBUG_XML};
|
||||||
|
$self->{seen}{start_element}++;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub end_element {
|
||||||
|
my $self = shift;
|
||||||
|
print "end_element\n" if $ENV{DEBUG_XML};
|
||||||
|
$self->{seen}{end_element}++;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub characters {
|
||||||
|
my $self = shift;
|
||||||
|
print "characters\n" if $ENV{DEBUG_XML};
|
||||||
|
# warn "Char: ", $_[0]->{Data}, "\n";
|
||||||
|
$self->{seen}{characters}++;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub processing_instruction {
|
||||||
|
my $self = shift;
|
||||||
|
print "processing_instruction\n" if $ENV{DEBUG_XML};
|
||||||
|
$self->{seen}{processing_instruction}++;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub ignorable_whitespace {
|
||||||
|
my $self = shift;
|
||||||
|
print "ignorable_whitespace\n" if $ENV{DEBUG_XML};
|
||||||
|
$self->{seen}{ignorable_whitespace}++;
|
||||||
|
}
|
||||||
|
|
||||||
|
# LexHandler
|
||||||
|
|
||||||
|
sub comment {
|
||||||
|
my $self = shift;
|
||||||
|
print "comment\n" if $ENV{DEBUG_XML};
|
||||||
|
$self->{seen}{comment}++;
|
||||||
|
}
|
||||||
|
|
||||||
|
# DTDHandler
|
||||||
|
|
||||||
|
sub notation_decl {
|
||||||
|
my $self = shift;
|
||||||
|
print "notation_decl\n" if $ENV{DEBUG_XML};
|
||||||
|
$self->{seen}{notation_decl}++;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub unparsed_entity_decl {
|
||||||
|
my $self = shift;
|
||||||
|
print "unparsed_entity_decl\n" if $ENV{DEBUG_XML};
|
||||||
|
$self->{seen}{entity_decl}++;
|
||||||
|
}
|
||||||
|
|
||||||
|
# EntityResolver
|
||||||
|
|
||||||
|
sub resolve_entity {
|
||||||
|
my $self = shift;
|
||||||
|
print "resolve_entity\n" if $ENV{DEBUG_XML};
|
||||||
|
$self->{seen}{resolve_entity}++;
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
|
||||||
|
1;
|
||||||
@@ -0,0 +1,180 @@
|
|||||||
|
# $Id$
|
||||||
|
|
||||||
|
package XML::SAX::PurePerl;
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
use XML::SAX::PurePerl::Productions qw($PubidChar);
|
||||||
|
|
||||||
|
sub doctypedecl {
|
||||||
|
my ($self, $reader) = @_;
|
||||||
|
|
||||||
|
my $data = $reader->data(9);
|
||||||
|
if ($data =~ /^<!DOCTYPE/) {
|
||||||
|
$reader->move_along(9);
|
||||||
|
$self->skip_whitespace($reader) ||
|
||||||
|
$self->parser_error("No whitespace after doctype declaration", $reader);
|
||||||
|
|
||||||
|
my $root_name = $self->Name($reader) ||
|
||||||
|
$self->parser_error("Doctype declaration has no root element name", $reader);
|
||||||
|
|
||||||
|
if ($self->skip_whitespace($reader)) {
|
||||||
|
# might be externalid...
|
||||||
|
my %dtd = $self->ExternalID($reader);
|
||||||
|
# TODO: Call SAX event
|
||||||
|
}
|
||||||
|
|
||||||
|
$self->skip_whitespace($reader);
|
||||||
|
|
||||||
|
$self->InternalSubset($reader);
|
||||||
|
|
||||||
|
$reader->match('>') or $self->parser_error("Doctype not closed", $reader);
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub ExternalID {
|
||||||
|
my ($self, $reader) = @_;
|
||||||
|
|
||||||
|
my $data = $reader->data(6);
|
||||||
|
|
||||||
|
if ($data =~ /^SYSTEM/) {
|
||||||
|
$reader->move_along(6);
|
||||||
|
$self->skip_whitespace($reader) ||
|
||||||
|
$self->parser_error("No whitespace after SYSTEM identifier", $reader);
|
||||||
|
return (SYSTEM => $self->SystemLiteral($reader));
|
||||||
|
}
|
||||||
|
elsif ($data =~ /^PUBLIC/) {
|
||||||
|
$reader->move_along(6);
|
||||||
|
$self->skip_whitespace($reader) ||
|
||||||
|
$self->parser_error("No whitespace after PUBLIC identifier", $reader);
|
||||||
|
|
||||||
|
my $quote = $self->quote($reader) ||
|
||||||
|
$self->parser_error("Not a quote character in PUBLIC identifier", $reader);
|
||||||
|
|
||||||
|
my $data = $reader->data;
|
||||||
|
my $pubid = '';
|
||||||
|
while(1) {
|
||||||
|
$self->parser_error("EOF while looking for end of PUBLIC identifiier", $reader)
|
||||||
|
unless length($data);
|
||||||
|
|
||||||
|
if ($data =~ /^([^$quote]*)$quote/) {
|
||||||
|
$pubid .= $1;
|
||||||
|
$reader->move_along(length($1) + 1);
|
||||||
|
last;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
$pubid .= $data;
|
||||||
|
$reader->move_along(length($data));
|
||||||
|
$data = $reader->data;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($pubid !~ /^($PubidChar)+$/) {
|
||||||
|
$self->parser_error("Invalid characters in PUBLIC identifier", $reader);
|
||||||
|
}
|
||||||
|
|
||||||
|
$self->skip_whitespace($reader) ||
|
||||||
|
$self->parser_error("Not whitespace after PUBLIC ID in DOCTYPE", $reader);
|
||||||
|
|
||||||
|
return (PUBLIC => $pubid,
|
||||||
|
SYSTEM => $self->SystemLiteral($reader));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub SystemLiteral {
|
||||||
|
my ($self, $reader) = @_;
|
||||||
|
|
||||||
|
my $quote = $self->quote($reader);
|
||||||
|
|
||||||
|
my $data = $reader->data;
|
||||||
|
my $systemid = '';
|
||||||
|
while (1) {
|
||||||
|
$self->parser_error("EOF found while looking for end of Sytem Literal", $reader)
|
||||||
|
unless length($data);
|
||||||
|
if ($data =~ /^([^$quote]*)$quote/) {
|
||||||
|
$systemid .= $1;
|
||||||
|
$reader->move_along(length($1) + 1);
|
||||||
|
return $systemid;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
$systemid .= $data;
|
||||||
|
$reader->move_along(length($data));
|
||||||
|
$data = $reader->data;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sub InternalSubset {
|
||||||
|
my ($self, $reader) = @_;
|
||||||
|
|
||||||
|
return 0 unless $reader->match('[');
|
||||||
|
|
||||||
|
1 while $self->IntSubsetDecl($reader);
|
||||||
|
|
||||||
|
$reader->match(']') or $self->parser_error("No close bracket on internal subset (found: " . $reader->data, $reader);
|
||||||
|
$self->skip_whitespace($reader);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub IntSubsetDecl {
|
||||||
|
my ($self, $reader) = @_;
|
||||||
|
|
||||||
|
return $self->DeclSep($reader) || $self->markupdecl($reader);
|
||||||
|
}
|
||||||
|
|
||||||
|
sub DeclSep {
|
||||||
|
my ($self, $reader) = @_;
|
||||||
|
|
||||||
|
if ($self->skip_whitespace($reader)) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($self->PEReference($reader)) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
# if ($self->ParsedExtSubset($reader)) {
|
||||||
|
# return 1;
|
||||||
|
# }
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub PEReference {
|
||||||
|
my ($self, $reader) = @_;
|
||||||
|
|
||||||
|
return 0 unless $reader->match('%');
|
||||||
|
|
||||||
|
my $peref = $self->Name($reader) ||
|
||||||
|
$self->parser_error("PEReference did not find a Name", $reader);
|
||||||
|
# TODO - load/parse the peref
|
||||||
|
|
||||||
|
$reader->match(';') or $self->parser_error("Invalid token in PEReference", $reader);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub markupdecl {
|
||||||
|
my ($self, $reader) = @_;
|
||||||
|
|
||||||
|
if ($self->elementdecl($reader) ||
|
||||||
|
$self->AttlistDecl($reader) ||
|
||||||
|
$self->EntityDecl($reader) ||
|
||||||
|
$self->NotationDecl($reader) ||
|
||||||
|
$self->PI($reader) ||
|
||||||
|
$self->Comment($reader))
|
||||||
|
{
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
1;
|
||||||
@@ -0,0 +1,105 @@
|
|||||||
|
# $Id$
|
||||||
|
|
||||||
|
package XML::SAX::PurePerl; # NB, not ::EncodingDetect!
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
|
||||||
|
sub encoding_detect {
|
||||||
|
my ($parser, $reader) = @_;
|
||||||
|
|
||||||
|
my $error = "Invalid byte sequence at start of file";
|
||||||
|
|
||||||
|
my $data = $reader->data;
|
||||||
|
if ($data =~ /^\x00\x00\xFE\xFF/) {
|
||||||
|
# BO-UCS4-be
|
||||||
|
$reader->move_along(4);
|
||||||
|
$reader->set_encoding('UCS-4BE');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
elsif ($data =~ /^\x00\x00\xFF\xFE/) {
|
||||||
|
# BO-UCS-4-2143
|
||||||
|
$reader->move_along(4);
|
||||||
|
$reader->set_encoding('UCS-4-2143');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
elsif ($data =~ /^\x00\x00\x00\x3C/) {
|
||||||
|
$reader->set_encoding('UCS-4BE');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
elsif ($data =~ /^\x00\x00\x3C\x00/) {
|
||||||
|
$reader->set_encoding('UCS-4-2143');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
elsif ($data =~ /^\x00\x3C\x00\x00/) {
|
||||||
|
$reader->set_encoding('UCS-4-3412');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
elsif ($data =~ /^\x00\x3C\x00\x3F/) {
|
||||||
|
$reader->set_encoding('UTF-16BE');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
elsif ($data =~ /^\xFF\xFE\x00\x00/) {
|
||||||
|
# BO-UCS-4LE
|
||||||
|
$reader->move_along(4);
|
||||||
|
$reader->set_encoding('UCS-4LE');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
elsif ($data =~ /^\xFF\xFE/) {
|
||||||
|
$reader->move_along(2);
|
||||||
|
$reader->set_encoding('UTF-16LE');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
elsif ($data =~ /^\xFE\xFF\x00\x00/) {
|
||||||
|
$reader->move_along(4);
|
||||||
|
$reader->set_encoding('UCS-4-3412');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
elsif ($data =~ /^\xFE\xFF/) {
|
||||||
|
$reader->move_along(2);
|
||||||
|
$reader->set_encoding('UTF-16BE');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
elsif ($data =~ /^\xEF\xBB\xBF/) { # UTF-8 BOM
|
||||||
|
$reader->move_along(3);
|
||||||
|
$reader->set_encoding('UTF-8');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
elsif ($data =~ /^\x3C\x00\x00\x00/) {
|
||||||
|
$reader->set_encoding('UCS-4LE');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
elsif ($data =~ /^\x3C\x00\x3F\x00/) {
|
||||||
|
$reader->set_encoding('UTF-16LE');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
elsif ($data =~ /^\x3C\x3F\x78\x6D/) {
|
||||||
|
# $reader->set_encoding('UTF-8');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
elsif ($data =~ /^\x3C\x3F\x78/) {
|
||||||
|
# $reader->set_encoding('UTF-8');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
elsif ($data =~ /^\x3C\x3F/) {
|
||||||
|
# $reader->set_encoding('UTF-8');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
elsif ($data =~ /^\x3C/) {
|
||||||
|
# $reader->set_encoding('UTF-8');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
elsif ($data =~ /^[\x20\x09\x0A\x0D]+\x3C[^\x3F]/) {
|
||||||
|
# $reader->set_encoding('UTF-8');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
elsif ($data =~ /^\x4C\x6F\xA7\x94/) {
|
||||||
|
$reader->set_encoding('EBCDIC');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
warn("Unable to recognise encoding of this document");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
1;
|
||||||
|
|
||||||
@@ -0,0 +1,67 @@
|
|||||||
|
# $Id$
|
||||||
|
|
||||||
|
package XML::SAX::PurePerl::Exception;
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
|
||||||
|
use overload '""' => "stringify";
|
||||||
|
|
||||||
|
use vars qw/$StackTrace/;
|
||||||
|
|
||||||
|
$StackTrace = $ENV{XML_DEBUG} || 0;
|
||||||
|
|
||||||
|
sub throw {
|
||||||
|
my $class = shift;
|
||||||
|
die $class->new(@_);
|
||||||
|
}
|
||||||
|
|
||||||
|
sub new {
|
||||||
|
my $class = shift;
|
||||||
|
my %opts = @_;
|
||||||
|
die "Invalid options" unless exists $opts{Message};
|
||||||
|
|
||||||
|
if ($opts{reader}) {
|
||||||
|
return bless { Message => $opts{Message},
|
||||||
|
Exception => undef, # not sure what this is for!!!
|
||||||
|
ColumnNumber => $opts{reader}->column,
|
||||||
|
LineNumber => $opts{reader}->line,
|
||||||
|
PublicId => $opts{reader}->public_id,
|
||||||
|
SystemId => $opts{reader}->system_id,
|
||||||
|
$StackTrace ? (StackTrace => stacktrace()) : (),
|
||||||
|
}, $class;
|
||||||
|
}
|
||||||
|
return bless { Message => $opts{Message},
|
||||||
|
Exception => undef, # not sure what this is for!!!
|
||||||
|
}, $class;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub stringify {
|
||||||
|
my $self = shift;
|
||||||
|
local $^W;
|
||||||
|
return $self->{Message} . " [Ln: " . $self->{LineNumber} .
|
||||||
|
", Col: " . $self->{ColumnNumber} . "]" .
|
||||||
|
($StackTrace ? stackstring($self->{StackTrace}) : "") . "\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
sub stacktrace {
|
||||||
|
my $i = 2;
|
||||||
|
my @fulltrace;
|
||||||
|
while (my @trace = caller($i++)) {
|
||||||
|
my %hash;
|
||||||
|
@hash{qw(Package Filename Line)} = @trace[0..2];
|
||||||
|
push @fulltrace, \%hash;
|
||||||
|
}
|
||||||
|
return \@fulltrace;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub stackstring {
|
||||||
|
my $stacktrace = shift;
|
||||||
|
my $string = "\nFrom:\n";
|
||||||
|
foreach my $current (@$stacktrace) {
|
||||||
|
$string .= $current->{Filename} . " Line: " . $current->{Line} . "\n";
|
||||||
|
}
|
||||||
|
return $string;
|
||||||
|
}
|
||||||
|
|
||||||
|
1;
|
||||||
|
|
||||||
@@ -0,0 +1,28 @@
|
|||||||
|
# $Id$
|
||||||
|
|
||||||
|
package XML::SAX::PurePerl;
|
||||||
|
use strict;
|
||||||
|
|
||||||
|
sub chr_ref {
|
||||||
|
my $n = shift;
|
||||||
|
if ($n < 0x80) {
|
||||||
|
return chr ($n);
|
||||||
|
}
|
||||||
|
elsif ($n < 0x800) {
|
||||||
|
return pack ("CC", (($n >> 6) | 0xc0), (($n & 0x3f) | 0x80));
|
||||||
|
}
|
||||||
|
elsif ($n < 0x10000) {
|
||||||
|
return pack ("CCC", (($n >> 12) | 0xe0), ((($n >> 6) & 0x3f) | 0x80),
|
||||||
|
(($n & 0x3f) | 0x80));
|
||||||
|
}
|
||||||
|
elsif ($n < 0x110000)
|
||||||
|
{
|
||||||
|
return pack ("CCCC", (($n >> 18) | 0xf0), ((($n >> 12) & 0x3f) | 0x80),
|
||||||
|
((($n >> 6) & 0x3f) | 0x80), (($n & 0x3f) | 0x80));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return undef;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
1;
|
||||||
@@ -0,0 +1,147 @@
|
|||||||
|
# $Id$
|
||||||
|
|
||||||
|
package XML::SAX::PurePerl::Productions;
|
||||||
|
|
||||||
|
use Exporter;
|
||||||
|
@ISA = ('Exporter');
|
||||||
|
@EXPORT_OK = qw($S $Char $VersionNum $BaseChar $Ideographic
|
||||||
|
$Extender $Digit $CombiningChar $EncNameStart $EncNameEnd $NameChar $CharMinusDash
|
||||||
|
$PubidChar $Any $SingleChar);
|
||||||
|
|
||||||
|
### WARNING!!! All productions here must *only* match a *single* character!!! ###
|
||||||
|
|
||||||
|
BEGIN {
|
||||||
|
$S = qr/[\x20\x09\x0D\x0A]/;
|
||||||
|
|
||||||
|
$CharMinusDash = qr/[^-]/x;
|
||||||
|
|
||||||
|
$Any = qr/ . /xms;
|
||||||
|
|
||||||
|
$VersionNum = qr/ [a-zA-Z0-9_.:-]+ /x;
|
||||||
|
|
||||||
|
$EncNameStart = qr/ [A-Za-z] /x;
|
||||||
|
$EncNameEnd = qr/ [A-Za-z0-9\._-] /x;
|
||||||
|
|
||||||
|
$PubidChar = qr/ [\x20\x0D\x0Aa-zA-Z0-9'()\+,.\/:=\?;!*\#@\$_\%-] /x;
|
||||||
|
|
||||||
|
if ($] < 5.006) {
|
||||||
|
eval <<' PERL';
|
||||||
|
$Char = qr/^ [\x09\x0A\x0D\x20-\x7F]|([\xC0-\xFD][\x80-\xBF]+) $/x;
|
||||||
|
|
||||||
|
$SingleChar = qr/^$Char$/;
|
||||||
|
|
||||||
|
$BaseChar = qr/ [\x41-\x5A\x61-\x7A]|([\xC0-\xFD][\x80-\xBF]+) /x;
|
||||||
|
|
||||||
|
$Extender = qr/ \xB7 /x;
|
||||||
|
|
||||||
|
$Digit = qr/ [\x30-\x39] /x;
|
||||||
|
|
||||||
|
# can't do this one without unicode
|
||||||
|
# $CombiningChar = qr/^$/msx;
|
||||||
|
|
||||||
|
$NameChar = qr/^ (?: $BaseChar | $Digit | [._:-] | $Extender )+ $/x;
|
||||||
|
PERL
|
||||||
|
die $@ if $@;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
eval <<' PERL';
|
||||||
|
|
||||||
|
use utf8; # for 5.6
|
||||||
|
|
||||||
|
$Char = qr/^ [\x09\x0A\x0D\x{0020}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}] $/x;
|
||||||
|
|
||||||
|
$SingleChar = qr/^$Char$/;
|
||||||
|
|
||||||
|
$BaseChar = qr/
|
||||||
|
[\x{0041}-\x{005A}\x{0061}-\x{007A}\x{00C0}-\x{00D6}\x{00D8}-\x{00F6}] |
|
||||||
|
[\x{00F8}-\x{00FF}\x{0100}-\x{0131}\x{0134}-\x{013E}\x{0141}-\x{0148}] |
|
||||||
|
[\x{014A}-\x{017E}\x{0180}-\x{01C3}\x{01CD}-\x{01F0}\x{01F4}-\x{01F5}] |
|
||||||
|
[\x{01FA}-\x{0217}\x{0250}-\x{02A8}\x{02BB}-\x{02C1}\x{0386}\x{0388}-\x{038A}] |
|
||||||
|
[\x{038C}\x{038E}-\x{03A1}\x{03A3}-\x{03CE}\x{03D0}-\x{03D6}\x{03DA}] |
|
||||||
|
[\x{03DC}\x{03DE}\x{03E0}\x{03E2}-\x{03F3}\x{0401}-\x{040C}\x{040E}-\x{044F}] |
|
||||||
|
[\x{0451}-\x{045C}\x{045E}-\x{0481}\x{0490}-\x{04C4}\x{04C7}-\x{04C8}] |
|
||||||
|
[\x{04CB}-\x{04CC}\x{04D0}-\x{04EB}\x{04EE}-\x{04F5}\x{04F8}-\x{04F9}] |
|
||||||
|
[\x{0531}-\x{0556}\x{0559}\x{0561}-\x{0586}\x{05D0}-\x{05EA}\x{05F0}-\x{05F2}] |
|
||||||
|
[\x{0621}-\x{063A}\x{0641}-\x{064A}\x{0671}-\x{06B7}\x{06BA}-\x{06BE}] |
|
||||||
|
[\x{06C0}-\x{06CE}\x{06D0}-\x{06D3}\x{06D5}\x{06E5}-\x{06E6}\x{0905}-\x{0939}] |
|
||||||
|
[\x{093D}\x{0958}-\x{0961}\x{0985}-\x{098C}\x{098F}-\x{0990}] |
|
||||||
|
[\x{0993}-\x{09A8}\x{09AA}-\x{09B0}\x{09B2}\x{09B6}-\x{09B9}\x{09DC}-\x{09DD}] |
|
||||||
|
[\x{09DF}-\x{09E1}\x{09F0}-\x{09F1}\x{0A05}-\x{0A0A}\x{0A0F}-\x{0A10}] |
|
||||||
|
[\x{0A13}-\x{0A28}\x{0A2A}-\x{0A30}\x{0A32}-\x{0A33}\x{0A35}-\x{0A36}] |
|
||||||
|
[\x{0A38}-\x{0A39}\x{0A59}-\x{0A5C}\x{0A5E}\x{0A72}-\x{0A74}\x{0A85}-\x{0A8B}] |
|
||||||
|
[\x{0A8D}\x{0A8F}-\x{0A91}\x{0A93}-\x{0AA8}\x{0AAA}-\x{0AB0}] |
|
||||||
|
[\x{0AB2}-\x{0AB3}\x{0AB5}-\x{0AB9}\x{0ABD}\x{0AE0}\x{0B05}-\x{0B0C}] |
|
||||||
|
[\x{0B0F}-\x{0B10}\x{0B13}-\x{0B28}\x{0B2A}-\x{0B30}\x{0B32}-\x{0B33}] |
|
||||||
|
[\x{0B36}-\x{0B39}\x{0B3D}\x{0B5C}-\x{0B5D}\x{0B5F}-\x{0B61}\x{0B85}-\x{0B8A}] |
|
||||||
|
[\x{0B8E}-\x{0B90}\x{0B92}-\x{0B95}\x{0B99}-\x{0B9A}\x{0B9C}] |
|
||||||
|
[\x{0B9E}-\x{0B9F}\x{0BA3}-\x{0BA4}\x{0BA8}-\x{0BAA}\x{0BAE}-\x{0BB5}] |
|
||||||
|
[\x{0BB7}-\x{0BB9}\x{0C05}-\x{0C0C}\x{0C0E}-\x{0C10}\x{0C12}-\x{0C28}] |
|
||||||
|
[\x{0C2A}-\x{0C33}\x{0C35}-\x{0C39}\x{0C60}-\x{0C61}\x{0C85}-\x{0C8C}] |
|
||||||
|
[\x{0C8E}-\x{0C90}\x{0C92}-\x{0CA8}\x{0CAA}-\x{0CB3}\x{0CB5}-\x{0CB9}\x{0CDE}] |
|
||||||
|
[\x{0CE0}-\x{0CE1}\x{0D05}-\x{0D0C}\x{0D0E}-\x{0D10}\x{0D12}-\x{0D28}] |
|
||||||
|
[\x{0D2A}-\x{0D39}\x{0D60}-\x{0D61}\x{0E01}-\x{0E2E}\x{0E30}\x{0E32}-\x{0E33}] |
|
||||||
|
[\x{0E40}-\x{0E45}\x{0E81}-\x{0E82}\x{0E84}\x{0E87}-\x{0E88}\x{0E8A}] |
|
||||||
|
[\x{0E8D}\x{0E94}-\x{0E97}\x{0E99}-\x{0E9F}\x{0EA1}-\x{0EA3}\x{0EA5}\x{0EA7}] |
|
||||||
|
[\x{0EAA}-\x{0EAB}\x{0EAD}-\x{0EAE}\x{0EB0}\x{0EB2}-\x{0EB3}\x{0EBD}] |
|
||||||
|
[\x{0EC0}-\x{0EC4}\x{0F40}-\x{0F47}\x{0F49}-\x{0F69}\x{10A0}-\x{10C5}] |
|
||||||
|
[\x{10D0}-\x{10F6}\x{1100}\x{1102}-\x{1103}\x{1105}-\x{1107}\x{1109}] |
|
||||||
|
[\x{110B}-\x{110C}\x{110E}-\x{1112}\x{113C}\x{113E}\x{1140}\x{114C}\x{114E}] |
|
||||||
|
[\x{1150}\x{1154}-\x{1155}\x{1159}\x{115F}-\x{1161}\x{1163}\x{1165}] |
|
||||||
|
[\x{1167}\x{1169}\x{116D}-\x{116E}\x{1172}-\x{1173}\x{1175}\x{119E}\x{11A8}] |
|
||||||
|
[\x{11AB}\x{11AE}-\x{11AF}\x{11B7}-\x{11B8}\x{11BA}\x{11BC}-\x{11C2}] |
|
||||||
|
[\x{11EB}\x{11F0}\x{11F9}\x{1E00}-\x{1E9B}\x{1EA0}-\x{1EF9}\x{1F00}-\x{1F15}] |
|
||||||
|
[\x{1F18}-\x{1F1D}\x{1F20}-\x{1F45}\x{1F48}-\x{1F4D}\x{1F50}-\x{1F57}] |
|
||||||
|
[\x{1F59}\x{1F5B}\x{1F5D}\x{1F5F}-\x{1F7D}\x{1F80}-\x{1FB4}\x{1FB6}-\x{1FBC}] |
|
||||||
|
[\x{1FBE}\x{1FC2}-\x{1FC4}\x{1FC6}-\x{1FCC}\x{1FD0}-\x{1FD3}] |
|
||||||
|
[\x{1FD6}-\x{1FDB}\x{1FE0}-\x{1FEC}\x{1FF2}-\x{1FF4}\x{1FF6}-\x{1FFC}] |
|
||||||
|
[\x{2126}\x{212A}-\x{212B}\x{212E}\x{2180}-\x{2182}\x{3041}-\x{3094}] |
|
||||||
|
[\x{30A1}-\x{30FA}\x{3105}-\x{312C}\x{AC00}-\x{D7A3}]
|
||||||
|
/x;
|
||||||
|
|
||||||
|
$Extender = qr/
|
||||||
|
[\x{00B7}\x{02D0}\x{02D1}\x{0387}\x{0640}\x{0E46}\x{0EC6}\x{3005}\x{3031}-\x{3035}\x{309D}-\x{309E}\x{30FC}-\x{30FE}]
|
||||||
|
/x;
|
||||||
|
|
||||||
|
$Digit = qr/
|
||||||
|
[\x{0030}-\x{0039}\x{0660}-\x{0669}\x{06F0}-\x{06F9}\x{0966}-\x{096F}] |
|
||||||
|
[\x{09E6}-\x{09EF}\x{0A66}-\x{0A6F}\x{0AE6}-\x{0AEF}\x{0B66}-\x{0B6F}] |
|
||||||
|
[\x{0BE7}-\x{0BEF}\x{0C66}-\x{0C6F}\x{0CE6}-\x{0CEF}\x{0D66}-\x{0D6F}] |
|
||||||
|
[\x{0E50}-\x{0E59}\x{0ED0}-\x{0ED9}\x{0F20}-\x{0F29}]
|
||||||
|
/x;
|
||||||
|
|
||||||
|
$CombiningChar = qr/
|
||||||
|
[\x{0300}-\x{0345}\x{0360}-\x{0361}\x{0483}-\x{0486}\x{0591}-\x{05A1}] |
|
||||||
|
[\x{05A3}-\x{05B9}\x{05BB}-\x{05BD}\x{05BF}\x{05C1}-\x{05C2}\x{05C4}] |
|
||||||
|
[\x{064B}-\x{0652}\x{0670}\x{06D6}-\x{06DC}\x{06DD}-\x{06DF}\x{06E0}-\x{06E4}] |
|
||||||
|
[\x{06E7}-\x{06E8}\x{06EA}-\x{06ED}\x{0901}-\x{0903}\x{093C}] |
|
||||||
|
[\x{093E}-\x{094C}\x{094D}\x{0951}-\x{0954}\x{0962}-\x{0963}\x{0981}-\x{0983}] |
|
||||||
|
[\x{09BC}\x{09BE}\x{09BF}\x{09C0}-\x{09C4}\x{09C7}-\x{09C8}] |
|
||||||
|
[\x{09CB}-\x{09CD}\x{09D7}\x{09E2}-\x{09E3}\x{0A02}\x{0A3C}\x{0A3E}\x{0A3F}] |
|
||||||
|
[\x{0A40}-\x{0A42}\x{0A47}-\x{0A48}\x{0A4B}-\x{0A4D}\x{0A70}-\x{0A71}] |
|
||||||
|
[\x{0A81}-\x{0A83}\x{0ABC}\x{0ABE}-\x{0AC5}\x{0AC7}-\x{0AC9}\x{0ACB}-\x{0ACD}] |
|
||||||
|
[\x{0B01}-\x{0B03}\x{0B3C}\x{0B3E}-\x{0B43}\x{0B47}-\x{0B48}] |
|
||||||
|
[\x{0B4B}-\x{0B4D}\x{0B56}-\x{0B57}\x{0B82}-\x{0B83}\x{0BBE}-\x{0BC2}] |
|
||||||
|
[\x{0BC6}-\x{0BC8}\x{0BCA}-\x{0BCD}\x{0BD7}\x{0C01}-\x{0C03}\x{0C3E}-\x{0C44}] |
|
||||||
|
[\x{0C46}-\x{0C48}\x{0C4A}-\x{0C4D}\x{0C55}-\x{0C56}\x{0C82}-\x{0C83}] |
|
||||||
|
[\x{0CBE}-\x{0CC4}\x{0CC6}-\x{0CC8}\x{0CCA}-\x{0CCD}\x{0CD5}-\x{0CD6}] |
|
||||||
|
[\x{0D02}-\x{0D03}\x{0D3E}-\x{0D43}\x{0D46}-\x{0D48}\x{0D4A}-\x{0D4D}\x{0D57}] |
|
||||||
|
[\x{0E31}\x{0E34}-\x{0E3A}\x{0E47}-\x{0E4E}\x{0EB1}\x{0EB4}-\x{0EB9}] |
|
||||||
|
[\x{0EBB}-\x{0EBC}\x{0EC8}-\x{0ECD}\x{0F18}-\x{0F19}\x{0F35}\x{0F37}\x{0F39}] |
|
||||||
|
[\x{0F3E}\x{0F3F}\x{0F71}-\x{0F84}\x{0F86}-\x{0F8B}\x{0F90}-\x{0F95}] |
|
||||||
|
[\x{0F97}\x{0F99}-\x{0FAD}\x{0FB1}-\x{0FB7}\x{0FB9}\x{20D0}-\x{20DC}\x{20E1}] |
|
||||||
|
[\x{302A}-\x{302F}\x{3099}\x{309A}]
|
||||||
|
/x;
|
||||||
|
|
||||||
|
$Ideographic = qr/
|
||||||
|
[\x{4E00}-\x{9FA5}\x{3007}\x{3021}-\x{3029}]
|
||||||
|
/x;
|
||||||
|
|
||||||
|
$NameChar = qr/^ (?: $BaseChar | $Ideographic | $Digit | [._:-] | $CombiningChar | $Extender )+ $/x;
|
||||||
|
PERL
|
||||||
|
|
||||||
|
die $@ if $@;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
1;
|
||||||
@@ -0,0 +1,136 @@
|
|||||||
|
# $Id$
|
||||||
|
|
||||||
|
package XML::SAX::PurePerl::Reader;
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
use XML::SAX::PurePerl::Reader::URI;
|
||||||
|
use Exporter ();
|
||||||
|
|
||||||
|
use vars qw(@ISA @EXPORT_OK);
|
||||||
|
@ISA = qw(Exporter);
|
||||||
|
@EXPORT_OK = qw(
|
||||||
|
EOF
|
||||||
|
BUFFER
|
||||||
|
LINE
|
||||||
|
COLUMN
|
||||||
|
ENCODING
|
||||||
|
XML_VERSION
|
||||||
|
);
|
||||||
|
|
||||||
|
use constant EOF => 0;
|
||||||
|
use constant BUFFER => 1;
|
||||||
|
use constant LINE => 2;
|
||||||
|
use constant COLUMN => 3;
|
||||||
|
use constant ENCODING => 4;
|
||||||
|
use constant SYSTEM_ID => 5;
|
||||||
|
use constant PUBLIC_ID => 6;
|
||||||
|
use constant XML_VERSION => 7;
|
||||||
|
|
||||||
|
require XML::SAX::PurePerl::Reader::Stream;
|
||||||
|
require XML::SAX::PurePerl::Reader::String;
|
||||||
|
|
||||||
|
if ($] >= 5.007002) {
|
||||||
|
require XML::SAX::PurePerl::Reader::UnicodeExt;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
require XML::SAX::PurePerl::Reader::NoUnicodeExt;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub new {
|
||||||
|
my $class = shift;
|
||||||
|
my $thing = shift;
|
||||||
|
|
||||||
|
# try to figure if this $thing is a handle of some sort
|
||||||
|
if (ref($thing) && UNIVERSAL::isa($thing, 'IO::Handle')) {
|
||||||
|
return XML::SAX::PurePerl::Reader::Stream->new($thing)->init;
|
||||||
|
}
|
||||||
|
my $ioref;
|
||||||
|
if (tied($thing)) {
|
||||||
|
my $class = ref($thing);
|
||||||
|
no strict 'refs';
|
||||||
|
$ioref = $thing if defined &{"${class}::TIEHANDLE"};
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
eval {
|
||||||
|
$ioref = *{$thing}{IO};
|
||||||
|
};
|
||||||
|
undef $@;
|
||||||
|
}
|
||||||
|
if ($ioref) {
|
||||||
|
return XML::SAX::PurePerl::Reader::Stream->new($thing)->init;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($thing =~ /</) {
|
||||||
|
# assume it's a string
|
||||||
|
return XML::SAX::PurePerl::Reader::String->new($thing)->init;
|
||||||
|
}
|
||||||
|
|
||||||
|
# assume it is a uri
|
||||||
|
return XML::SAX::PurePerl::Reader::URI->new($thing)->init;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub init {
|
||||||
|
my $self = shift;
|
||||||
|
$self->[LINE] = 1;
|
||||||
|
$self->[COLUMN] = 1;
|
||||||
|
$self->read_more;
|
||||||
|
return $self;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub data {
|
||||||
|
my ($self, $min_length) = (@_, 1);
|
||||||
|
if (length($self->[BUFFER]) < $min_length) {
|
||||||
|
$self->read_more;
|
||||||
|
}
|
||||||
|
return $self->[BUFFER];
|
||||||
|
}
|
||||||
|
|
||||||
|
sub match {
|
||||||
|
my ($self, $char) = @_;
|
||||||
|
my $data = $self->data;
|
||||||
|
if (substr($data, 0, 1) eq $char) {
|
||||||
|
$self->move_along(1);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub public_id {
|
||||||
|
my $self = shift;
|
||||||
|
@_ and $self->[PUBLIC_ID] = shift;
|
||||||
|
$self->[PUBLIC_ID];
|
||||||
|
}
|
||||||
|
|
||||||
|
sub system_id {
|
||||||
|
my $self = shift;
|
||||||
|
@_ and $self->[SYSTEM_ID] = shift;
|
||||||
|
$self->[SYSTEM_ID];
|
||||||
|
}
|
||||||
|
|
||||||
|
sub line {
|
||||||
|
shift->[LINE];
|
||||||
|
}
|
||||||
|
|
||||||
|
sub column {
|
||||||
|
shift->[COLUMN];
|
||||||
|
}
|
||||||
|
|
||||||
|
sub get_encoding {
|
||||||
|
my $self = shift;
|
||||||
|
return $self->[ENCODING];
|
||||||
|
}
|
||||||
|
|
||||||
|
sub get_xml_version {
|
||||||
|
my $self = shift;
|
||||||
|
return $self->[XML_VERSION];
|
||||||
|
}
|
||||||
|
|
||||||
|
1;
|
||||||
|
|
||||||
|
__END__
|
||||||
|
|
||||||
|
=head1 NAME
|
||||||
|
|
||||||
|
XML::Parser::PurePerl::Reader - Abstract Reader factory class
|
||||||
|
|
||||||
|
=cut
|
||||||
@@ -0,0 +1,25 @@
|
|||||||
|
# $Id$
|
||||||
|
|
||||||
|
package XML::SAX::PurePerl::Reader;
|
||||||
|
use strict;
|
||||||
|
|
||||||
|
sub set_raw_stream {
|
||||||
|
# no-op
|
||||||
|
}
|
||||||
|
|
||||||
|
sub switch_encoding_stream {
|
||||||
|
my ($fh, $encoding) = @_;
|
||||||
|
throw XML::SAX::Exception::Parse (
|
||||||
|
Message => "Only ASCII encoding allowed without perl 5.7.2 or higher. You tried: $encoding",
|
||||||
|
) if $encoding !~ /(ASCII|UTF\-?8)/i;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub switch_encoding_string {
|
||||||
|
my (undef, $encoding) = @_;
|
||||||
|
throw XML::SAX::Exception::Parse (
|
||||||
|
Message => "Only ASCII encoding allowed without perl 5.7.2 or higher. You tried: $encoding",
|
||||||
|
) if $encoding !~ /(ASCII|UTF\-?8)/i;
|
||||||
|
}
|
||||||
|
|
||||||
|
1;
|
||||||
|
|
||||||
@@ -0,0 +1,84 @@
|
|||||||
|
# $Id$
|
||||||
|
|
||||||
|
package XML::SAX::PurePerl::Reader::Stream;
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
use vars qw(@ISA);
|
||||||
|
|
||||||
|
use XML::SAX::PurePerl::Reader qw(
|
||||||
|
EOF
|
||||||
|
BUFFER
|
||||||
|
LINE
|
||||||
|
COLUMN
|
||||||
|
ENCODING
|
||||||
|
XML_VERSION
|
||||||
|
);
|
||||||
|
use XML::SAX::Exception;
|
||||||
|
|
||||||
|
@ISA = ('XML::SAX::PurePerl::Reader');
|
||||||
|
|
||||||
|
# subclassed by adding 1 to last element
|
||||||
|
use constant FH => 8;
|
||||||
|
use constant BUFFER_SIZE => 4096;
|
||||||
|
|
||||||
|
sub new {
|
||||||
|
my $class = shift;
|
||||||
|
my $ioref = shift;
|
||||||
|
XML::SAX::PurePerl::Reader::set_raw_stream($ioref);
|
||||||
|
my @parts;
|
||||||
|
@parts[FH, LINE, COLUMN, BUFFER, EOF, XML_VERSION] =
|
||||||
|
($ioref, 1, 0, '', 0, '1.0');
|
||||||
|
return bless \@parts, $class;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub read_more {
|
||||||
|
my $self = shift;
|
||||||
|
my $buf;
|
||||||
|
my $bytesread = read($self->[FH], $buf, BUFFER_SIZE);
|
||||||
|
if ($bytesread) {
|
||||||
|
$self->[BUFFER] .= $buf;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
elsif (defined($bytesread)) {
|
||||||
|
$self->[EOF]++;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
throw XML::SAX::Exception::Parse(
|
||||||
|
Message => "Error reading from filehandle: $!",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sub move_along {
|
||||||
|
my $self = shift;
|
||||||
|
my $discarded = substr($self->[BUFFER], 0, $_[0], '');
|
||||||
|
|
||||||
|
# Wish I could skip this lot - tells us where we are in the file
|
||||||
|
my $lines = $discarded =~ tr/\n//;
|
||||||
|
$self->[LINE] += $lines;
|
||||||
|
if ($lines) {
|
||||||
|
$discarded =~ /\n([^\n]*)$/;
|
||||||
|
$self->[COLUMN] = length($1);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
$self->[COLUMN] += $_[0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sub set_encoding {
|
||||||
|
my $self = shift;
|
||||||
|
my ($encoding) = @_;
|
||||||
|
# warn("set encoding to: $encoding\n");
|
||||||
|
XML::SAX::PurePerl::Reader::switch_encoding_stream($self->[FH], $encoding);
|
||||||
|
XML::SAX::PurePerl::Reader::switch_encoding_string($self->[BUFFER], $encoding);
|
||||||
|
$self->[ENCODING] = $encoding;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub bytepos {
|
||||||
|
my $self = shift;
|
||||||
|
tell($self->[FH]);
|
||||||
|
}
|
||||||
|
|
||||||
|
1;
|
||||||
|
|
||||||
@@ -0,0 +1,78 @@
|
|||||||
|
# $Id$
|
||||||
|
|
||||||
|
package XML::SAX::PurePerl::Reader::String;
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
use vars qw(@ISA);
|
||||||
|
|
||||||
|
use XML::SAX::PurePerl::Reader qw(
|
||||||
|
LINE
|
||||||
|
COLUMN
|
||||||
|
BUFFER
|
||||||
|
ENCODING
|
||||||
|
EOF
|
||||||
|
);
|
||||||
|
|
||||||
|
@ISA = ('XML::SAX::PurePerl::Reader');
|
||||||
|
|
||||||
|
use constant DISCARDED => 8;
|
||||||
|
use constant STRING => 9;
|
||||||
|
use constant USED => 10;
|
||||||
|
use constant CHUNK_SIZE => 2048;
|
||||||
|
|
||||||
|
sub new {
|
||||||
|
my $class = shift;
|
||||||
|
my $string = shift;
|
||||||
|
my @parts;
|
||||||
|
@parts[BUFFER, EOF, LINE, COLUMN, DISCARDED, STRING, USED] =
|
||||||
|
('', 0, 1, 0, 0, $string, 0);
|
||||||
|
return bless \@parts, $class;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub read_more () {
|
||||||
|
my $self = shift;
|
||||||
|
if ($self->[USED] >= length($self->[STRING])) {
|
||||||
|
$self->[EOF]++;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
my $bytes = CHUNK_SIZE;
|
||||||
|
if ($bytes > (length($self->[STRING]) - $self->[USED])) {
|
||||||
|
$bytes = (length($self->[STRING]) - $self->[USED]);
|
||||||
|
}
|
||||||
|
$self->[BUFFER] .= substr($self->[STRING], $self->[USED], $bytes);
|
||||||
|
$self->[USED] += $bytes;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
sub move_along {
|
||||||
|
my($self, $bytes) = @_;
|
||||||
|
my $discarded = substr($self->[BUFFER], 0, $bytes, '');
|
||||||
|
$self->[DISCARDED] += length($discarded);
|
||||||
|
|
||||||
|
# Wish I could skip this lot - tells us where we are in the file
|
||||||
|
my $lines = $discarded =~ tr/\n//;
|
||||||
|
$self->[LINE] += $lines;
|
||||||
|
if ($lines) {
|
||||||
|
$discarded =~ /\n([^\n]*)$/;
|
||||||
|
$self->[COLUMN] = length($1);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
$self->[COLUMN] += $_[0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sub set_encoding {
|
||||||
|
my $self = shift;
|
||||||
|
my ($encoding) = @_;
|
||||||
|
|
||||||
|
XML::SAX::PurePerl::Reader::switch_encoding_string($self->[BUFFER], $encoding, "utf-8");
|
||||||
|
$self->[ENCODING] = $encoding;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub bytepos {
|
||||||
|
my $self = shift;
|
||||||
|
$self->[DISCARDED];
|
||||||
|
}
|
||||||
|
|
||||||
|
1;
|
||||||
@@ -0,0 +1,57 @@
|
|||||||
|
# $Id$
|
||||||
|
|
||||||
|
package XML::SAX::PurePerl::Reader::URI;
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
|
||||||
|
use XML::SAX::PurePerl::Reader;
|
||||||
|
use File::Temp qw(tempfile);
|
||||||
|
use Symbol;
|
||||||
|
|
||||||
|
## NOTE: This is *not* a subclass of Reader. It just returns Stream or String
|
||||||
|
## Reader objects depending on what it's capabilities are.
|
||||||
|
|
||||||
|
sub new {
|
||||||
|
my $class = shift;
|
||||||
|
my $uri = shift;
|
||||||
|
# request the URI
|
||||||
|
if (-e $uri && -f _) {
|
||||||
|
my $fh = gensym;
|
||||||
|
open($fh, $uri) || die "Cannot open file $uri : $!";
|
||||||
|
return XML::SAX::PurePerl::Reader::Stream->new($fh);
|
||||||
|
}
|
||||||
|
elsif ($uri =~ /^file:(.*)$/ && -e $1 && -f _) {
|
||||||
|
my $file = $1;
|
||||||
|
my $fh = gensym;
|
||||||
|
open($fh, $file) || die "Cannot open file $file : $!";
|
||||||
|
return XML::SAX::PurePerl::Reader::Stream->new($fh);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
# request URI, return String reader
|
||||||
|
require LWP::UserAgent;
|
||||||
|
my $ua = LWP::UserAgent->new;
|
||||||
|
$ua->agent("Perl/XML/SAX/PurePerl/1.0 " . $ua->agent);
|
||||||
|
|
||||||
|
my $req = HTTP::Request->new(GET => $uri);
|
||||||
|
|
||||||
|
my $fh = tempfile();
|
||||||
|
|
||||||
|
my $callback = sub {
|
||||||
|
my ($data, $response, $protocol) = @_;
|
||||||
|
print $fh $data;
|
||||||
|
};
|
||||||
|
|
||||||
|
my $res = $ua->request($req, $callback, 4096);
|
||||||
|
|
||||||
|
if ($res->is_success) {
|
||||||
|
seek($fh, 0, 0);
|
||||||
|
return XML::SAX::PurePerl::Reader::Stream->new($fh);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
die "LWP Request Failed";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
1;
|
||||||
@@ -0,0 +1,23 @@
|
|||||||
|
# $Id$
|
||||||
|
|
||||||
|
package XML::SAX::PurePerl::Reader;
|
||||||
|
use strict;
|
||||||
|
|
||||||
|
use Encode ();
|
||||||
|
|
||||||
|
sub set_raw_stream {
|
||||||
|
my ($fh) = @_;
|
||||||
|
binmode($fh, ":bytes");
|
||||||
|
}
|
||||||
|
|
||||||
|
sub switch_encoding_stream {
|
||||||
|
my ($fh, $encoding) = @_;
|
||||||
|
binmode($fh, ":encoding($encoding)");
|
||||||
|
}
|
||||||
|
|
||||||
|
sub switch_encoding_string {
|
||||||
|
$_[0] = Encode::decode($_[1], $_[0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
1;
|
||||||
|
|
||||||
@@ -0,0 +1,22 @@
|
|||||||
|
# $Id$
|
||||||
|
|
||||||
|
package XML::SAX::PurePerl;
|
||||||
|
use strict;
|
||||||
|
|
||||||
|
no warnings 'utf8';
|
||||||
|
|
||||||
|
sub chr_ref {
|
||||||
|
return chr(shift);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($] >= 5.007002) {
|
||||||
|
require Encode;
|
||||||
|
|
||||||
|
Encode::define_alias( "UTF-16" => "UCS-2" );
|
||||||
|
Encode::define_alias( "UTF-16BE" => "UCS-2" );
|
||||||
|
Encode::define_alias( "UTF-16LE" => "ucs-2le" );
|
||||||
|
Encode::define_alias( "UTF16LE" => "ucs-2le" );
|
||||||
|
}
|
||||||
|
|
||||||
|
1;
|
||||||
|
|
||||||
@@ -0,0 +1,129 @@
|
|||||||
|
# $Id$
|
||||||
|
|
||||||
|
package XML::SAX::PurePerl;
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
use XML::SAX::PurePerl::Productions qw($S $VersionNum $EncNameStart $EncNameEnd);
|
||||||
|
|
||||||
|
sub XMLDecl {
|
||||||
|
my ($self, $reader) = @_;
|
||||||
|
|
||||||
|
my $data = $reader->data(5);
|
||||||
|
# warn("Looking for xmldecl in: $data");
|
||||||
|
if ($data =~ /^<\?xml$S/o) {
|
||||||
|
$reader->move_along(5);
|
||||||
|
$self->skip_whitespace($reader);
|
||||||
|
|
||||||
|
# get version attribute
|
||||||
|
$self->VersionInfo($reader) ||
|
||||||
|
$self->parser_error("XML Declaration lacks required version attribute, or version attribute does not match XML specification", $reader);
|
||||||
|
|
||||||
|
if (!$self->skip_whitespace($reader)) {
|
||||||
|
my $data = $reader->data(2);
|
||||||
|
$data =~ /^\?>/ or $self->parser_error("Syntax error", $reader);
|
||||||
|
$reader->move_along(2);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($self->EncodingDecl($reader)) {
|
||||||
|
if (!$self->skip_whitespace($reader)) {
|
||||||
|
my $data = $reader->data(2);
|
||||||
|
$data =~ /^\?>/ or $self->parser_error("Syntax error", $reader);
|
||||||
|
$reader->move_along(2);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$self->SDDecl($reader);
|
||||||
|
|
||||||
|
$self->skip_whitespace($reader);
|
||||||
|
|
||||||
|
my $data = $reader->data(2);
|
||||||
|
$data =~ /^\?>/ or $self->parser_error("Syntax error", $reader);
|
||||||
|
$reader->move_along(2);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
# warn("first 5 bytes: ", join(',', unpack("CCCCC", $data)), "\n");
|
||||||
|
# no xml decl
|
||||||
|
if (!$reader->get_encoding) {
|
||||||
|
$reader->set_encoding("UTF-8");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sub VersionInfo {
|
||||||
|
my ($self, $reader) = @_;
|
||||||
|
|
||||||
|
my $data = $reader->data(11);
|
||||||
|
|
||||||
|
# warn("Looking for version in $data");
|
||||||
|
|
||||||
|
$data =~ /^(version$S*=$S*(["'])($VersionNum)\2)/o or return 0;
|
||||||
|
$reader->move_along(length($1));
|
||||||
|
my $vernum = $3;
|
||||||
|
|
||||||
|
if ($vernum ne "1.0") {
|
||||||
|
$self->parser_error("Only XML version 1.0 supported. Saw: '$vernum'", $reader);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub SDDecl {
|
||||||
|
my ($self, $reader) = @_;
|
||||||
|
|
||||||
|
my $data = $reader->data(15);
|
||||||
|
|
||||||
|
$data =~ /^(standalone$S*=$S*(["'])(yes|no)\2)/o or return 0;
|
||||||
|
$reader->move_along(length($1));
|
||||||
|
my $yesno = $3;
|
||||||
|
|
||||||
|
if ($yesno eq 'yes') {
|
||||||
|
$self->{standalone} = 1;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
$self->{standalone} = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub EncodingDecl {
|
||||||
|
my ($self, $reader) = @_;
|
||||||
|
|
||||||
|
my $data = $reader->data(12);
|
||||||
|
|
||||||
|
$data =~ /^(encoding$S*=$S*(["'])($EncNameStart$EncNameEnd*)\2)/o or return 0;
|
||||||
|
$reader->move_along(length($1));
|
||||||
|
my $encoding = $3;
|
||||||
|
|
||||||
|
$reader->set_encoding($encoding);
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub TextDecl {
|
||||||
|
my ($self, $reader) = @_;
|
||||||
|
|
||||||
|
my $data = $reader->data(6);
|
||||||
|
$data =~ /^<\?xml$S+/ or return;
|
||||||
|
$reader->move_along(5);
|
||||||
|
$self->skip_whitespace($reader);
|
||||||
|
|
||||||
|
if ($self->VersionInfo($reader)) {
|
||||||
|
$self->skip_whitespace($reader) ||
|
||||||
|
$self->parser_error("Lack of whitespace after version attribute in text declaration", $reader);
|
||||||
|
}
|
||||||
|
|
||||||
|
$self->EncodingDecl($reader) ||
|
||||||
|
$self->parser_error("Encoding declaration missing from external entity text declaration", $reader);
|
||||||
|
|
||||||
|
$self->skip_whitespace($reader);
|
||||||
|
|
||||||
|
$data = $reader->data(2);
|
||||||
|
$data =~ /^\?>/ or $self->parser_error("Syntax error", $reader);
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
1;
|
||||||
Reference in New Issue
Block a user