diff --git a/.gitignore b/.gitignore index 4f632c572e..8555dcf4f3 100644 --- a/.gitignore +++ b/.gitignore @@ -170,3 +170,13 @@ # Mac OS X .DS_Store + +sw/ground_segment/lpc21iap/lpc21iap.dSYM/Contents/Info.plist + +sw/ground_segment/lpc21iap/lpc21iap.dSYM/Contents/Resources/DWARF/lpc21iap + +tests/results/* + +sw/logalizer/plotprofile.dSYM/Contents/Resources/DWARF/plotprofile + +sw/logalizer/plotprofile.dSYM/Contents/Info.plist diff --git a/Makefile b/Makefile index 9285339d7b..e61763d022 100644 --- a/Makefile +++ b/Makefile @@ -243,6 +243,7 @@ clean: $(Q)make -C sw/ext clean $(Q)find . -name '*~' -exec rm -f {} \; $(Q)rm -f paparazzi sw/simulator/launchsitl + $(Q)rm -rf tests/results/* cleanspaces: find ./sw/airborne -name '*.[ch]' -exec sed -i {} -e 's/[ \t]*$$//' \; @@ -263,9 +264,6 @@ dist_clean_irreversible: clean ab_clean: find sw/airborne -name '*~' -exec rm -f {} \; -test_all_example_airframes: replace_current_conf_xml - for ap in `grep name conf/conf.xml.example | sed -e 's/.*name=\"//' | sed -e 's/".*//'`; do for airframe in `grep $$ap conf/conf.xml.example | sed -e 's/.*airframe=\"//' | sed -e 's/".*//'`; do for target in `grep target conf/$$airframe | grep name | sed -e 's/.*name=\"//' | sed -e 's/\".*//'`; do echo "Making $$ap $$target"; make -C ./ AIRCRAFT=$$ap clean_ac $$target.compile || exit 1; done; done; done - replace_current_conf_xml: test conf/conf.xml || mv conf/conf.xml conf/conf.xml.backup.`date +%Y%m%d-%H%M%s` cp conf/conf.xml.example conf/conf.xml @@ -280,4 +278,9 @@ sw/simulator/launchsitl: cat src/$(@F) | sed s#OCAMLRUN#$(OCAMLRUN)# | sed s#OCAML#$(OCAML)# > $@ chmod a+x $@ +test: all replace_current_conf_xml + cd tests; $(MAKE) $(@) + +test_all_example_airframes: replace_current_conf_xml + cd tests; $(MAKE) $(@) TARGET_BOARD=examples diff --git a/conf/Makefile.stm32 b/conf/Makefile.stm32 index 92aa55d708..0a6d4e3263 100644 --- a/conf/Makefile.stm32 +++ b/conf/Makefile.stm32 @@ -78,6 +78,9 @@ OOCD = $(shell if test -e $(TOOLCHAIN_DIR)/bin/openocd ; then echo $(TOOLCHAIN_D endif endif +ifneq ($(BOARD_SERIAL),) +OOCD_OPTIONS = -c "ft2232_serial $(BOARD_SERIAL)" +endif LOADER=/home/poine/work/stm32/stm32loader-a3c51c26ad6c/stm32loader.py @@ -257,7 +260,7 @@ upload: $(OBJDIR)/$(TARGET).elf @echo "Using OOCD = $(OOCD)" @echo " OOCD\t$<" $(Q)$(OOCD) -f interface/$(OOCD_INTERFACE).cfg \ - -f board/$(OOCD_BOARD).cfg \ + -f board/$(OOCD_BOARD).cfg $(OOCD_OPTIONS) \ -c init \ -c "reset halt" \ -c "reset init" \ diff --git a/conf/airframes/TestHardware/LisaL_v1.1_aspirin_v1.5_overo_fw.xml b/conf/airframes/TestHardware/LisaL_v1.1_aspirin_v1.5_overo_fw.xml index ab07cfb2b8..0769b226a4 100644 --- a/conf/airframes/TestHardware/LisaL_v1.1_aspirin_v1.5_overo_fw.xml +++ b/conf/airframes/TestHardware/LisaL_v1.1_aspirin_v1.5_overo_fw.xml @@ -1,2 +1,192 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ + + + + + + + + +
+ + +
+ +
+ + + +
+ +
+ + + + + +
+ +
+ + + + + + + +
+ + +
+ + + +
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + +
+ +
+ + + + + + + + + + + + + + + + + + +
+ +
+ + + + + + + + + + + + + + + + + +
+
diff --git a/conf/airframes/TestHardware/LisaL_v1.1_aspirin_v1.5_overo_rc.xml b/conf/airframes/TestHardware/LisaL_v1.1_aspirin_v1.5_overo_rc.xml index ab07cfb2b8..fe79b77174 100644 --- a/conf/airframes/TestHardware/LisaL_v1.1_aspirin_v1.5_overo_rc.xml +++ b/conf/airframes/TestHardware/LisaL_v1.1_aspirin_v1.5_overo_rc.xml @@ -1,2 +1,23 @@ + + + + + + + + + + + + + diff --git a/conf/airframes/TestHardware/LisaL_v1.1_b2_v1.2_fw.xml b/conf/airframes/TestHardware/LisaL_v1.1_b2_v1.2_fw.xml index ab07cfb2b8..3486e98270 100644 --- a/conf/airframes/TestHardware/LisaL_v1.1_b2_v1.2_fw.xml +++ b/conf/airframes/TestHardware/LisaL_v1.1_b2_v1.2_fw.xml @@ -1,2 +1,60 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ + + + + + + + + +
+ + +
+
diff --git a/conf/airframes/TestHardware/LisaL_v1.1_b2_v1.2_rc.xml b/conf/airframes/TestHardware/LisaL_v1.1_b2_v1.2_rc.xml index ab07cfb2b8..3d0592f223 100644 --- a/conf/airframes/TestHardware/LisaL_v1.1_b2_v1.2_rc.xml +++ b/conf/airframes/TestHardware/LisaL_v1.1_b2_v1.2_rc.xml @@ -1,2 +1,303 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ + +
+ + + + + + + + + + + +
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +
+ + + + +
+ +
+ + + + + + + + + +
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ +
+ +
+ +
+ + + + + + + + + + + + + + + + +
+ + +
+ + + + +
+ +
+ +
+ +
+ + + +
+
diff --git a/conf/conf.xml.example b/conf/conf.xml.example index 84058ae592..6554a44879 100644 --- a/conf/conf.xml.example +++ b/conf/conf.xml.example @@ -1,22 +1,92 @@ - + - + + - + + + - - - - - - - - - - - - + + + + + + + diff --git a/tests/LisaL/01_upload.t b/tests/LisaL/01_upload.t new file mode 100644 index 0000000000..c32d5b62b5 --- /dev/null +++ b/tests/LisaL/01_upload.t @@ -0,0 +1,95 @@ +#!/usr/bin/perl -w + +use Test::More tests => 7; +use lib "$ENV{'PAPARAZZI_SRC'}/tests/lib"; +use Program; +use Proc::Background; +use Ivy; + +$|++; + +#################### +# Make the airframe +my $make_compile_options = "AIRCRAFT=LisaLv11_Booz2v12_RC clean_ac ap.compile"; +my $compile_output = run_program( + "Attempting to build and upload the firmware.", + $ENV{'PAPARAZZI_SRC'}, + "make $make_compile_options", + 0,1); +unlike($compile_output, '/Aircraft \'LisaLv11_Booz2v12_RC\' not found in/', "The compile output does not contain the message \"Aircraft \'LisaLv11_Booz2v12_RC\' not found in\""); +unlike($compile_output, '/Error/i', "The compile output does not contain the word \"Error\""); + +#################### +# Upload the airframe +my $make_upload_options = "AIRCRAFT=LisaLv11_Booz2v12_RC BOARD_SERIAL=LISA-L-000156 ap.upload"; +my $upload_output = run_program( + "Attempting to build and upload the firmware.", + $ENV{'PAPARAZZI_SRC'}, + "make $make_upload_options", + 0,1); +unlike($upload_output, '/Error/i', "The upload output does not contain the word \"Error\""); + +# Start the server process +my $server_command = "$ENV{'PAPARAZZI_HOME'}/sw/ground_segment/tmtc/server"; +my $server_options = ""; +my $server = Proc::Background->new($server_command, $server_options); +sleep 2; # The service should die in this time if there's an error +ok($server->alive(), "The server started successfully"); + +# Start the link process +my $link_command = "$ENV{'PAPARAZZI_HOME'}/sw/ground_segment/tmtc/link"; +my @link_options = qw(-d /dev/tty.usbserial-000013FD -s 57600 -transport xbee -xbee_addr 123); +#my @link_options = qw(-d /dev/tty.usbserial-000013FD -s 57600); +sleep 2; # The service should die in this time if there's an error +my $link = Proc::Background->new($link_command, @link_options); +ok($link->alive(), "The link started successfully"); + +# Open the Ivy bus and read from it... +# TODO: learn how to read and write to the Ivy bus + +# Shutdown the server and link processes +ok($server->die(), "The server shutdown successfully."); +ok($link->die(), "The link shutdown successfully."); + +################################################################################ +# functions used by this test script. +sub run_program +{ + my $message = shift; + my $dir = shift; + my $command = shift; + my $verbose = shift; + my $dont_fail_on_error = shift; + + warn "$message\n" if $verbose; + if (defined $dir) + { + $command = "cd $dir;" . $command; + } + my $prog = new Program("bash"); + my $fh = $prog->open("-c \"$command\""); + warn "Running command: \"". $prog->last_command() ."\"\n" if $verbose; + $fh->autoflush(1); + my @output; + while (<$fh>) + { + warn $_ if $verbose; + chomp $_; + push @output, $_; + } + $fh->close; + my $exit_status = $?/256; + unless ($exit_status == 0) + { + if ($dont_fail_on_error) + { + warn "Error: The command \"". $prog->last_command() ."\" failed to complete successfully. Exit status: $exit_status\n" if $verbose; + } + else + { + die "Error: The command \"". $prog->last_command() ."\" failed to complete successfully. Exit status: $exit_status\n"; + } + } + return wantarray ? @output : join "\n", @output; +} + diff --git a/tests/Makefile b/tests/Makefile new file mode 100644 index 0000000000..f6eb2bd4b5 --- /dev/null +++ b/tests/Makefile @@ -0,0 +1,19 @@ +Q = @ +PERL = /usr/bin/perl +TEST_VERBOSE = 0 +ifeq ($(TARGET_BOARD),) + TARGET_BOARD = * +endif +TEST_FILES := $(shell ls $(TARGET_BOARD)/*.t) + +ifneq ($(JUNIT),) + PERLENV=PERL_TEST_HARNESS_DUMP_TAP=$(PAPARAZZI_SRC)/tests/results + RUNTESTS=use TAP::Harness; TAP::Harness->new({ formatter_class => 'TAP::Formatter::JUnit', verbosity => $(TEST_VERBOSE), merge => 1, } )->runtests(qw($(TEST_FILES))) +else + PERLENV= + RUNTESTS=use TAP::Harness;TAP::Harness->new( { verbosity => $(TEST_VERBOSE) } )->runtests(qw($(TEST_FILES))) +endif + +test: + $(Q)$(PERLENV) $(PERL) "-e" "$(RUNTESTS)" + diff --git a/tests/examples/01_compile_all_example_targets.t b/tests/examples/01_compile_all_example_targets.t new file mode 100644 index 0000000000..d8cb760458 --- /dev/null +++ b/tests/examples/01_compile_all_example_targets.t @@ -0,0 +1,109 @@ +#!/usr/bin/perl -w + +use Test::More; +use lib "$ENV{'PAPARAZZI_SRC'}/tests/lib"; +use XML::Simple; +use Program; +use Data::Dumper; +use Config; + +$|++; +my $examples = XMLin("$ENV{'PAPARAZZI_SRC'}/conf/conf.xml.example"); + +use Data::Dumper; + +ok(1, "Parsed the example file"); +foreach my $example (sort keys%{$examples->{'aircraft'}}) +{ + #next unless $example =~ m#easystar#i; + my $airframe = $examples->{'aircraft'}->{$example}->{'airframe'}; + my $airframe_config = XMLin("$ENV{'PAPARAZZI_SRC'}/conf/$airframe"); + foreach my $process (sort keys %{$airframe_config->{'firmware'}}) + { + if ($process =~ m#setup|fixedwing|rotorcraft|lisa_test_progs#) + { + #warn "EX: [$example] ". Dumper($airframe_config->{'firmware'}->{$process}->{'target'}); + foreach my $target (sort keys %{$airframe_config->{'firmware'}->{$process}->{'target'}}) + { + next unless scalar $airframe_config->{'firmware'}->{$process}->{'target'}->{$target}->{'board'}; + + # Exclude some builds on Mac as they are currently broken. + next if ( ($Config{'osname'} =~ m#darwin#i) and ($example =~ m#LISA_ASCTEC_PIOTR|LisaLv11_Booz2v12_RC|BOOZ2_A1#i) and ($target =~ m#sim#i) ); + + #warn "EXAMPLE: [$example] TARGET: [$target]\n"; + my $make_upload_options = "AIRCRAFT=$example clean_ac $target.compile"; + my $upload_output = run_program( + "Attempting to build the firmware $target for the airframe $example.", + $ENV{'PAPARAZZI_SRC'}, + "make $make_upload_options", + $ENV->{'TEST_VERBOSE'},1); + unlike($upload_output, '/Error/i', "The upload output does not contain the word \"Error\""); + } + } + elsif ($process =~ m#target#) + { + #warn "EXT: [$example] ". Dumper($airframe_config->{'firmware'}->{$process}); + foreach my $target (sort keys %{$airframe_config->{'firmware'}->{$process}}) + { + next unless scalar $airframe_config->{'firmware'}->{$process}->{$target}->{'board'}; + + # Exclude some builds on Mac as they are currently broken. + next if ( ($Config{'osname'} =~ m#darwin#i) and ($example =~ m#LISA_ASCTEC_PIOTR|LisaLv11_Booz2v12_RC|BOOZ2_A1#i) and ($target =~ m#sim#i) ); + + #warn "EXAMPLET: [$example] TARGET: [$target]\n"; + my $make_upload_options = "AIRCRAFT=$example clean_ac $target.compile"; + my $upload_output = run_program( + "Attempting to build the firmware $target for the airframe $example.", + $ENV{'PAPARAZZI_SRC'}, + "make $make_upload_options", + $ENV->{'TEST_VERBOSE'},1); + unlike($upload_output, '/Error/i', "The upload output does not contain the word \"Error\""); + } + } + } +} + +done_testing(); + +################################################################################ +# functions used by this test script. +sub run_program +{ + my $message = shift; + my $dir = shift; + my $command = shift; + my $verbose = shift; + my $dont_fail_on_error = shift; + + warn "$message\n" if $verbose; + if (defined $dir) + { + $command = "cd $dir;" . $command; + } + my $prog = new Program("bash"); + my $fh = $prog->open("-c \"$command\""); + warn "Running command: \"". $prog->last_command() ."\"\n" if $verbose; + $fh->autoflush(1); + my @output; + while (<$fh>) + { + warn $_ if $verbose; + chomp $_; + push @output, $_; + } + $fh->close; + my $exit_status = $?/256; + unless ($exit_status == 0) + { + if ($dont_fail_on_error) + { + warn "Error: The command \"". $prog->last_command() ."\" failed to complete successfully. Exit status: $exit_status\n" if $verbose; + } + else + { + die "Error: The command \"". $prog->last_command() ."\" failed to complete successfully. Exit status: $exit_status\n"; + } + } + return wantarray ? @output : join "\n", @output; +} + diff --git a/tests/lib/Ivy.pm b/tests/lib/Ivy.pm new file mode 100644 index 0000000000..a580b6e891 --- /dev/null +++ b/tests/lib/Ivy.pm @@ -0,0 +1,3091 @@ +# +# Ivy, Perl interface +# +# Copyright 1997-2009 +# Centre d'Études de la Navigation Aérienne +# +# Authors: Alexandre Bustico +# Stéphane Chatty +# Hervé Damiano +# Christophe Mertz +# +# All functions +# +# $Id: Ivy.pm 3491 2011-06-20 09:35:58Z bustico $ +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU LGPL Libray General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library General Public License for more details. +# +# You should have received a copy of the GNU Library General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA, +# or refer to http://www.gnu.org/copyleft/lgpl.html +# +################################################################## + +package Ivy ; + +use Sys::Hostname; +use IO::Socket; +use strict; +use Time::HiRes qw(gettimeofday); +use Carp; +use IO::Socket::Multicast; +use File::Temp; + +use vars qw($VERSION); +use Fcntl qw(F_GETFL F_SETFL O_NONBLOCK); +use Socket qw(TCP_NODELAY); + +# to compute the VERSION from the CVS tag (or if no tag, as the cvs file revision) +my $TAG= q$Name: $; +my $REVISION = q$Revision: 3491 $ ; +$VERSION = '1.49' ; # for Makefile.PL +($VERSION) = $TAG =~ /^\D*([\d_]+)/ ; +if (defined $VERSION and $VERSION ne "_") { + $VERSION =~ s/_/\./g; +} +else { + $VERSION = $REVISION; +} + +############################################################################# +#### PROTOTYPES ##### +############################################################################# +sub init; # methode de classe, permet de renseigner + # tous les parametres globaux. Ces parametres + # seront utilises par new si ils ne sont pas + # donnes lors de l'appel de new. + + +sub new ($%); # verifie la validite de tous les parametres, + # cree et retourne un objet Ivy. Les parametres + # appName, networks, messWhenReady, peuvent + # etre donnes, meme si ils ont deja ete + # donnes dans init, dans ce cas, ce sont ceux + # de new qui prevalent + +sub start; # debut de l'integration au bus : + # - cree la socket d'application, recupere le no + # de port + # - cree la socket supervision + # - envoie le "no de port" + # - bind le file descriptor de la socket de + # supervision a la fonction getBonjour pour + # traiter les bonjours + # - bind le fd de connection sur la fonction + # getConnections + # pour etablir les connections "application" + +sub DESTROY ($); # - envoie un BYE et clôt les connections + +sub bindRegexp ($$$;$$) ; # permet d'associer une regexp avec un callBack + # ou d'annuler une precedente association +sub bindRegexpOneShot ($$$); # permet d'associer une regexp avec un callBack avec + # desabonnement automatique après reception du premier + # message qui matche + +sub changeRegexp ($$$); # permet de changer une regexp d'un abonnement + # precedemment fait avec bindRegexp + +sub bindDirect; # permet d'associer un identifiant de msg direct + # avec une fonction de callBack, ou de l'annuler + +sub sendMsgs; # envoie une liste de messages +sub sendAppNameMsgs; # envoie une liste de messages precedes + # du nom de l'application +sub sendDirectMsgs; # envoie une liste de messages directs a une appli +sub sendDieTo; # envoie un <> a une appli +sub ping ($$\&); # teste qu'une appli soit encore vivante +sub dumpTable ($$\&); # demande à une appli de dumper sa table de regexps dans + # un fichier à des fins de debug +sub mainLoop (;$); # la mainloop locale (sans tk) +sub stop (); # methode de classe : on delete les bus, mais + # on reste dans la mainloop +sub exit (); # methode de classe : on delete tous les + # bus (donc on ferme proprement toutes les + # connexions). + # Si on est en mainloop locale on sort de la + # mainloop, le code qui suit l'appel mainLoop + # sera execute. + # par contre si on est en mainloop Tk, + # il faut en plus detruire la mainwindow + # pour sortir de la mainloop; +sub after ($$;$); # temps en millisecondes, callback +sub repeat ($$;$); # temps en millisecondes, callback +sub afterCancel ($;$); # l'id d'un cancel ou d'un repeat +sub afterResetTimer ($;$); # pour ré-armer un timer non-encore déclenché à sa valeur initiale +sub fileEvent ($$;$); # associe un fd a un callback pour la mainloop locale +sub getUuid ($); # rend un identifiant unique d'application sur le bus, utile + # pour faire des genres de rpc avec un abonnement desabonnement dynamique + +################ PRIVEE #################################################### +sub _getBonjour ($); # lit le (ou les) bonjour(s) sur le canal de supervision + # et se connecte, verifie qu'il ne se reponds pas lui + # meme, ni qu'il ne repond pas a un service deja connecte + +sub _getConnections ($); # est appele lors d'une demande de connection : + # accepte la connection et mets a jour @sendRegList + # rajoute le fd du canal aux fd a scruter dans la + # boucle d'evenements + +sub _getMessages ($$); # est appele lorqu'un message arrive + +sub _sendWantedRegexp ($$); # envoie les regexp a l'appli distante + +sub _sendLastRegexpToAllreadyConnected ($$) ; # envoie la derniere regexp + # pushee dans @recCbList + # a toutes les applis deja + # connectees +sub _removeFileDescriptor ($$$); # on vire un fd et les structures associees +sub _sendErrorTo ($$$); #(fd, error) envoie un message d'erreur a un fd +sub _sendDieTo ($$); #(fd) envoie un message de demande de suicide a un fd +sub _sendMsgTo ($$\$); # (fd, message) +sub _pong ($$$); # (fd) +sub _dumpTable ($$$); # (fd) +sub _tkFileEvent ($$); # associe un fd a un callback pour la mainloop tk +sub _scanAfter () ; # parse si il faut appeler un callback associe a un after +sub _myCanRead (); # interface au select +sub _scanConnStatus ($$$@); # verifie les connections effectuees et + # appelle la fonction $statusFunc +sub _inetAdrByName ($$); # transforme une adresse inet native en chaine + # $host:$port +sub _getHostByAddr ($); +sub _toBePruned ($$$); +sub _parseIvyBusParam ($); # prends une adresse de bus de la forme + # 143.196.53,DGAC-CENATLS:2010 et + # renvoie une liste de deux elements : + # un numero de port et une ref sur une + # liste d'adresses addr_inet + +sub _substituteEscapedChar ($$); #permet de transformer une regexp etendue + # 'perl' en regexp de base + +sub _callCongestionCb ($$$); # appelle la callback de notification de congestion, + # si elle a été définie par l'utilisateur + +sub _getNameByFileDes ($$); # retourne le nom de l'appi en fonction du filedes + # de la socket +sub _univSend ($$$); # effectue les send de manière bloquante ou non bloquante + # et accumule les messages si la socket est bloquée + +sub _regexpGen ($$$); # routines for generating regexps wich matches +sub _strictPosRegexpGen ($$$$); # numerical interval using the special syntax +sub _genAtRank ($$$); # (?I-20#-10) or (?I-20#-10i) +sub _genPreRank ($$$); +sub _genRank ($$$); +sub _genPostRank ($); +sub _nextMax ($$); +sub _max ($$); +sub _min ($$); + +############################################################################# +#### CONSTANTES ##### +############################################################################# +use constant MSG_FMT => "%d %d\002%s\n"; + +# par defaut, on diffuse le bonjour en local +use constant BROADCAST_ADDRS => "127.255.255.255" ; +use constant BROADCAST_PORT => "2010"; + +use constant BYE => 0; +use constant REGEXP => 1; +use constant MSG => 2; +use constant ERROR => 3; +use constant DELREGEXP => 4; +use constant ENDREGEXP => 5; +use constant APP_NAME => 6; +use constant DIRECT_MSG => 7; +use constant DIE => 8; +use constant PING => 9; +use constant PONG => 10; +use constant DUMP_TABLES => 11; +use constant DUMP_TABLES_FILE => 12; +use constant IVY_PROTOCOLE_VERSION => 3; + +use constant AFTER => 0; +use constant REPEAT => 1; +use constant TK_MAINLOOP => 0; +use constant LOCAL_MAINLOOP => 1; +use constant CALL_BY_VALUE => 0; +use constant CALL_BY_REF => 1; +use constant BIND_ONCE => 2; +use constant MAX_TIMOUT => 1000; + + +# TCP_NODELAY is for a specific purpose; to disable the Nagle buffering +# algorithm. It should only be set for applications that send frequent +# small bursts of information without getting an immediate response, +# where timely delivery of data is required (the canonical example is +# mouse movements). +# Since Ivy is most of the time used to send events, we will priviligiate +# lag over throughtput, so _TCP_NO_DELAY_ACTIVATED is set to 1 +use constant TCP_NO_DELAY_ACTIVATED => 1; + + +# pour pouvoir employer les regexps perl. Attention lors de l'utilisation +# ne pas mettre un \n dans une chaine entre "" car l'\n sera interprete. +use constant REG_PERLISSISME => ('w' => '[a-zA-Z0-9_]', + 'W' => '[^a-zA-Z0-9_]', + 's' => "[\t ]", + 'S' => "[^\t ]", + 'd' => '[0-9]', + 'D' => '[^0-9]', + 'n' => '', # Il ne faut pas mettre d'\n : + # c'est un delimiteur pour le bus + 'e' => '[]') ; + +############################################################################# +#### VARIABLES de CLASSE ##### +############################################################################# + +# l'objet Ivy utilise par defaut quand le programmeur +# utilise le mode de compatibilite de la version 3, et ne +# manipule pas d'objets +my $globalIvy ; + +# optimisation : si l'on connait les sujets des messages +# qu'on envoie, cette variable contient une liste de +# sujets qui doivent matcher les regexps d'abonnement +# pour que celle ci se soient pas eliminees +my @topicRegexps; + +# les adresses de reseau sur lesquelles ont broadcaste +# suivies du No de port : +# exemples : "143.196.1.255,143.196.2.255:2010" +# "DGAC-CENATLS-PII:DGAC-CENATLS:2010" +# ":2010" <= dans ce cas c'est la valeur +# de reseau de broadcast par defaut qui est prise : 127.255.255.255 +# c.a.d local a la machine +my $ivyBus ; + +# le nom de l'appli pour le bus +my $appName ; + +# message a envoyer a un canal lorsqu'on +# a recu le message endregexp. +my $messWhenReady ; + +# fonction de cb appelee lorsque l'appli a recu l'ordre +# de quitter, on peut dans ce callback fermer +# proprement les ressources avant de sortir. +# ps : ne pas fasire d'exit dans le callback, +# c'est le bus qui s'en charge +my $onDieFunc; + + +# permet de donner des valeurs successives aux constantes permettant +# d'acceder aux differents champs de l'objet +my $constantIndexer =0; + +# pointeur sur la fonction permettant d'associer +# des callbacks a un file desc, (ainsi que de les enlever) +my $fileEventFunc; + +# dans le cas ou l'on soit dans une mainLoop +# locale, cette var pointe une un objet +# de type IO::Select, qui est l'ensemble des file descriptor +# des sockets que l'on scrute en lecture (attente des messages) +my $localLoopSelRead; + +# dans le cas ou l'on soit dans une mainLoop +# locale, cette var pointe une un objet +# de type IO::Select qui est l'ensemble des file descriptor +# des sockets que l'on scrute en ecriture : les fd congestionnés +# des "slow agent" que l'on surveille pour ecrire dedans dès que +# possible (mode non bloquant) +my $localLoopSelWrite; + +# table d'ass. handle -> callback +my %localBindByHandle; + +# table d'ass. fhd -> nom appli +my %nameByHandle; + +# tableau d'ass [AFTER ou REPEAT, +# timeTotal, deadLine, [callback, arg, arg, ...]] +my %afterList=(); + +my $afterId = 0; + +# timeout le plus petit pour le select +my $selectTimout = MAX_TIMOUT; +my $loopMode; + +# liste des bus actifs +my %allBuses = (); + +# cache des nom retournés par gethostbyaddr pour _getHostByAddr +my %hostNameByAddr = (); + +my $pingId = 1; # identifiant d'un ping (renvoyé par le pong) + +#my $trace; + +############################################################################# +#### CLEFS DES VARIABLES D'INSTANCE ##### +#### ##### +#### l'objet Ivy sera 'blessed' sur une reference sur un array et non ##### +#### sur une table de hash comme pratique courament de facon a ##### +#### 1/ optimiser au niveau vitesse ##### +#### 2/ avoir des clefs sous forme de symboles (use constant...) ##### +#### et nom des clefs sous forme de chaines de caracteres ##### +#### de facon a eviter des erreurs ##### +#### ##### +#### ##### +############################################################################# +use constant servPort => $constantIndexer++; +use constant neededApp => $constantIndexer++; +use constant statusFunc => $constantIndexer++; +use constant slowAgentFunc => $constantIndexer++; +use constant blockOnSlowAgent => $constantIndexer++; +use constant supSock => $constantIndexer++; +use constant connSock => $constantIndexer++; +use constant sockList => $constantIndexer++; +use constant threadList => $constantIndexer++; +use constant appliList => $constantIndexer++; +use constant sendRegList => $constantIndexer++; +use constant sendRegListSrc => $constantIndexer++; +use constant topicRegexps => $constantIndexer++; +use constant recCbList => $constantIndexer++; +use constant directCbList => $constantIndexer++; +use constant cnnxion => $constantIndexer++; +use constant connectedUuid => $constantIndexer++; +use constant bufRecByCnnx => $constantIndexer++; +use constant bufEmiByCnnx => $constantIndexer++; +use constant broadcastPort => $constantIndexer++; +use constant broadcastBuses => $constantIndexer++; +use constant useMulticast => $constantIndexer++; +use constant appName => $constantIndexer++; +use constant messWhenReady => $constantIndexer++; +use constant uuid => $constantIndexer++; +use constant pongQueue => $constantIndexer++; +use constant readyToSend => $constantIndexer++; + +############################################################################# +#### METHODES PUBLIQUES ##### +############################################################################# +sub init +{ + if (defined $fileEventFunc) { + print STDERR "Ivy warning: init has already been called\n"; + return; + } + + + srand(); # initialisation du generateur aléatoire qui sert à produire un UUID + my $class = shift if (@_ and $_[0] eq __PACKAGE__); + my (%options) = @_; + + # valeurs par defaut pour le parametre : variable d'environnement + # ou valeur cablee, a defaut + my $default_ivyBus = defined $ENV{"IVYBUS"} ? + $ENV{"IVYBUS"} : + BROADCAST_ADDRS.':'.BROADCAST_PORT; + + my %optionsAndDefaults = ( #PARAMETRES OBLIGATOIRES + -loopMode => undef, + # TK ou LOCAL + + -appName => undef, + # nom de l'appli + + # PARAMETRES FACULTATIFS (avec valeurs par defaut) + + # les adresses de reseau sur lesquelles ont broadcaste + # suivies du No de port : + # exemples : "143.196.1.255,143.196.2.255:2010" + # "DGAC-CENATLS-PII:DGAC-CENATLS:2010" + # ":2010" <= dans ce cas c'est la valeur + # de reseau de broadcast par defaut qui est prise : + # 127.255.255.255 c.a.d local a la machine + -ivyBus => $default_ivyBus, + + -messWhenReady => "_APP NAME READY", + # message de synchro a envoyer quand pret + + -onDieFunc => [sub {}], + # fonction de cb appelee lorsque l'appli a recu l'ordre + # de quitter, on peut dans ce callback fermer + # proprement les ressources avant de sortir. + # ps : ne pas faire d'exit dans le callback, + # c'est le bus qui s'en charge + + + -filterRegexp => [],# nouvelle api cohérente avec ivy-c, c++, java + -pruneRegexp => [], # obsolete + # optimisation : si l'on connait les sujets des messages + # qu'on envoie, on fournit la liste des sujets + # et les regexps qui ne matchent pas + # ces sujets sont eliminees. + ) ; + + # on examine toutes les options possibles + foreach my $opt (keys %optionsAndDefaults) { + # si un parametre a ete fourni, ignorer les valeurs par defaut + next if defined $options{$opt} ; + # sinon, prendre la valeur par defaut si elle existe + if (defined $optionsAndDefaults{$opt}) { + $options{$opt} = $optionsAndDefaults{$opt} ; + # sinon, on jette l'eponge + } else { + croak "Error in Ivy::init: option $opt is mandatory\n"; + } + } + + # on examine toutes les options fournies, pour detecter les inutiles + foreach my $opt (keys %options) { + unless (exists ($optionsAndDefaults{$opt})) { + carp "Warning in Ivy::init: option $opt is unknown"; + } + } + + $ivyBus = $options{-ivyBus}; + $appName = $options{-appName} ; + $onDieFunc = $options{-onDieFunc} ; + + + # trace pour le debug + my $verFile = 0; +# while (!open ($trace, , ">>", "/tmp/Ivy_$appName:$verFile.log")) {$verFile++}; +# syswrite ($trace, "DEBUT\n"); + + + if (scalar (@{$options{-pruneRegexp}})) { + carp "-pruneRegexp is *OBSOLETE*. -filterRegexp should be used instead\n"; + $options{-filterRegexp} = $options{-pruneRegexp} unless defined $options{-filterRegexp}; + } + + @topicRegexps = @{$options{-filterRegexp}}; + $messWhenReady = $options{-messWhenReady} eq "_APP NAME READY" ? + "$appName READY" : + $options{-messWhenReady}; + + + if ($options{-loopMode} =~ /local/i) { + # mode boucle d'evenement locale + use IO::Select; + $fileEventFunc = \&fileEvent ; + $localLoopSelRead = IO::Select->new (); + $localLoopSelWrite = IO::Select->new (); + $loopMode = LOCAL_MAINLOOP; + } elsif ($options{-loopMode} =~ /tk/i) { + # mode boucle d'evenement de TK + $fileEventFunc = \&_tkFileEvent ; + $loopMode = TK_MAINLOOP; + } else { + croak "Error in Ivy::init, argument loopMode must be either TK or LOCAL\n"; + } + + $SIG{'PIPE'} = 'IGNORE' ; +} # end init + +############# METHODE DE CLASSE NEW +sub new ($%) +{ + my ($class, %options) = @_; + my $self = []; + $#{$self} = $constantIndexer; # on predimensionne le tableau + bless ($self, $class); + + # on verifie que la methode de classe init ait ete appelee + unless ((defined $appName) && ($appName ne '')) { + croak "Error in Ivy::new, you should have called Ivy->init () first."; + } + + # No de port tcp du serveur + $self->[servPort] = ''; + + # liste des applis necessaires a l'appli locale + $self->[neededApp] = []; + + # callback prenant en param 3 refs sur des listes : + # [applis presentes, appli absentes, hash_applis_present] + # cette fonction est appelee : + # - tout les pollingTime tant que toutes les applis + # ne sont pas presentes + # - des qu'une appli se connecte + # - lorsqu'une appli se deconnecte + $self->[statusFunc] = ''; + $self->[slowAgentFunc] = ''; + + # callback prenant en param 1 refs sur une liste : + # [ref sur fonction, parametres] + + # socket de supervision en lecture/ecriture + $self->[supSock] = ''; + + # socket de connexion tcp + $self->[connSock] = ''; + + # tab ass : nom du fd => fd + $self->[sockList] = {}; + + # tab ass : nom de l'appli => fd + $self->[appliList] = {}; + + # tableau ass de liste du type + # sockId => [fonction, fonction, ...] + # pour savoir quoi envoyer a qui + # les fonctions anonymes sont compilees + # dynamiquement a la reception des messages REGEXP + # et filtrent les mess a envoyer et les envoient + # au besoin + $self->[sendRegList] = {}; + + # tableau ass de liste du type + # sockId => ["regexp"...] + # pour connaitre la valeur des regexp meme apres compilation + $self->[sendRegListSrc] = {}; + + # liste des topics qu'on envoie si on + # filtre les regexps + $self->[topicRegexps] = []; + + # liste de ref sur des couples + # (regexp,callBack) les callbacks + # sont appeles lors de + # la reception de messages en fonction + # du numero de regexp. + $self->[recCbList] = []; + + # liste de callBack pour les messages directs + $self->[directCbList] = []; + + # tableau ass : clef = nom:numero_de port + # permet de verifier qu'on ne se connecte pas + # sur nous meme et qu'on ne se reconnecte + # pas sur un service en cas de bonjours repetes + # valeur : nom de l'application + $self->[cnnxion] = {}; + $self->[connectedUuid] = {}; + + + # tableau associatif, clef => file desc, + # valeur :buffer au cas ou la lecture ne se termine + # pas par \n, de maniere a resegmenter les messages + $self->[bufRecByCnnx] = {}; + + # tableau associatif, clef => file desc, + # valeur :buffer au cas ou l'écriture bloque sur un fd + # pour eviter de bloquer la mainloop sur un send on bufferise + # les données dans le process + $self->[bufEmiByCnnx] = {}; + + # identifiant unique + $self->[uuid] = sprintf ("%d%d", time(), rand()*(2**31)); + + # queue de gestion des pings et des dumpTable: + # clef : socket fd, valeur :liste [timestamp, machine:port, callBack] + $self->[pongQueue] = {}; + + + + + my %optionsAndDefaults = ( + -appName => $appName, + # nom de l'appli + + # PARAMETRES FACULTATIFS (avec valeurs par defaut) + -messWhenReady => $messWhenReady, + # message de synchro a envoyer quand pret + + + # PARAMETRES FACULTATIFS (avec valeurs par defaut) + + # les adresses de reseau sur lesquelles ont broadcaste + # suivies du No de port : + # exemples : "143.196.1.255,143.196.2.255:2010" + # "DGAC-CENATLS-PII:DGAC-CENATLS:2010" + # ":2010" <= dans ce cas c'est la valeur + # de reseau de broadcast par defaut qui est prise : + # 127.255.255.255 c.a.d local a la machine + -ivyBus => $ivyBus, + + -neededApp => [], + # liste des appplis necessaires + + -statusFunc => sub {}, + # fonction de callBack qui sera appelee tant que + # toutes les applis necessaires ne sont pas presentes, + # et des que toutes les applis necessaires sont + # presentes, et si une appli necessaire se deconnecte + # les parametres passes à la callback sont : + # °[liste des applis presentes], + # °[liste des applis absentes], + # °[table de hash, clefs = applis presentes, valeurs = nombre d'applis . + # normalement ce nombre devrait etre 1, sinon + # ca veut dire que plus d'une appli de meme nom + # tourne sur le meme bus : danger !! + # ° nom de l'appli qui genere l'evenement + # ° evenement : 'subscribing'|'filtered'|'unsubscribing'|'died'|'new' + # ° adresse + # ° regexp si c'est un abonnement, un desabonnement ou un filtered + + -blockOnSlowAgent => 1, + # comportement lorque un ou plusieurs des agents connectés + # ne consomment pas suffisement rapidement les messages. Ou bien on laisse + # le send bloquer, ou bien on accumule les messages localement + # en attendant que l'agent congestionné soit capable de les traiter. + # cette méthode a l'avantage de ne pas bloquer la mainloop locale, + # par contre la consomation mémoire peut devenir problématique. + + -slowAgentFunc => sub {}, + # fonction de callBack qui sera appelee si l'envoi de messages + # à un agent n'est plus possible parce que l'agent en question ne + # consomme pas ses messages assez vite, ou si un agent qui etait + # dans cette etat retrouve sa capacité à lire les messages. + # les paramètres passés à la callback sont + # nom de l'appli, + # adresse + # etat (congestion = 1, decongestion = 0) + + -filterRegexp => [@topicRegexps], + -pruneRegexp => [], # obsolete + # optimisation : si l'on connait les sujets des messages + # qu'on envoie, on fournit la liste des sujets + # et les regexps qui ne matchent pas + # ces sujets sont eliminees. + ) ; + + + # on examine toutes les options possibles + foreach my $opt (keys %optionsAndDefaults) { + # si un parametre a ete fourni, ignorer les valeurs par defaut + next if defined $options{$opt} ; + # sinon, prendre la valeur par defaut si elle existe + if (defined $optionsAndDefaults{$opt}) { + $options{$opt} = $optionsAndDefaults{$opt} ; + # sinon, on jette l'eponge + } else { + croak "Error in Ivy::new: option $opt is mandatory\n"; + } + } + + if (scalar (@{$options{-pruneRegexp}})) { + carp "-pruneRegexp is *OBSOLETE*. -filterRegexp should be used instead\n"; + $options{-filterRegexp} = $options{-pruneRegexp} unless defined $options{-filterRegexp}; + } + + # on examine toutes les options fournies, pour detecter les inutiles + foreach my $opt (keys %options) { + unless (exists ($optionsAndDefaults{$opt})) { + carp "Warning in Ivy::new, option $opt is unknown"; + } + } + + $self->[appName] = $options{-appName} ; + $self->[messWhenReady] = $options{-messWhenReady} ; + @{$self->[neededApp]} = @{$options{-neededApp}} ; + $self->[statusFunc] = $options{-statusFunc} ; + $self->[slowAgentFunc] = $options{-slowAgentFunc} ; + $self->[blockOnSlowAgent] = $options{-blockOnSlowAgent} ; + + $self->[topicRegexps] = $options{-filterRegexp} ; + $allBuses{$self} = $self; + + ($self->[useMulticast], $self->[broadcastPort], $self->[broadcastBuses]) = + _parseIvyBusParam ($options{-ivyBus}); + $self->[readyToSend] = {}; + + + return ($self); +} # end new + +############### METHODE IVY DESTROY +sub DESTROY ($) +{ + my $self = shift; + return unless exists $allBuses{$self}; + # print ("DBG> DESTROY appele sur l'objet $self\n"); + + # pour toutes les connections + foreach my $fd (values %{$self->[sockList]}) { + # send ($fd, sprintf (MSG_FMT, BYE, 0, ""), 0) + # or $self->_removeFileDescriptor ($fd); + # the 2 previous lines seems to works with other ivy-perl applis + # but DO NOT work with ivy-c api. + # the 2 next lines works. This has to been validated! CM 21/12/2000 + if (defined $fd) { + _univSend ($self, $fd, sprintf (MSG_FMT, BYE, 0, "")); + $self->_removeFileDescriptor ($fd, 'DESTROY'); + } + } + + # on clot la socket de signalisation (UDP) + # print "DBG> fermeture de supSock ", $self->[supSock] ,"\n"; + # the following test has been expanded to avoid some nasty bug + # which appeared when upgrading from perl-tk 800.023 to 800.024 + $self->[supSock]->close() if ($self->[supSock] and $self->[supSock]->connected()); + delete $allBuses{$self}; + + # on clot la socket de connection + # print "DBG> fermeture de connSock ", $self->[connSock], "\n"; + # the following test has been expanded to avoid some nasty bug + # which appeared when upgrading from perl-tk 800.023 to 800.024 + $self->[connSock]->close() if ($self->[connSock] and $self->[connSock]->connected()); + undef (@$self); +# close ($trace); +} # end DESTROY + +############### METHODE DE CLASSE STOP +sub stop () +{ + foreach my $bus (values %allBuses) { + $bus->DESTROY(); + } # pour toutes les connections +} # end stop + + +############## METHODE DE CLASSE EXIT +sub exit () +{ + Ivy::stop (); + if (defined $localLoopSelRead) { + undef $localLoopSelRead ; + undef $localLoopSelWrite ; + } else { + Tk::exit (); + } +} # end exit + +############### PROCEDURE BUS START +sub start +{ + my $self; + + # compatibility for version 3 interface, ie. no objects manipulated by programmer + if (not @_ or ref ($_[0]) ne __PACKAGE__) { + init (@_); + $self = $globalIvy = new Ivy; + } else { + $globalIvy = $self = shift; + } + + if ($self->[connSock]) { + print "*** the Ivy bus is already started\n"; + return; + } + # cree la socket de connexion, recupere le no de port + my $connSock = $self->[connSock] = IO::Socket::INET->new(Listen => 128, + Proto => 'tcp', + ReuseAddr => 1) ; + # on memorise tout ca, ce qui evitera par la suite de se + # repondre a soi-meme. On le fait sous nos deux noms : + # le nom de machine et 'localhost' + my ($n, $al, $t, $l, @hostAddrs) = gethostbyname (hostname()); + foreach my $a (@hostAddrs) { +# syswrite ($trace, ("DBG> I am " . unpack ('CCCC', $a) . $connSock->sockport . "\n")); + $self->[cnnxion]->{"$a:". $connSock->sockport} = "\004"; + } + + my $localhostAddr = (gethostbyname ('localhost'))[4] ; + $self->[cnnxion]->{"$localhostAddr:". $connSock->sockport} = "\004"; + + # le message de bonjour à envoyer: "no de version no de port" + my $bonjourMsg = sprintf ("%d %d %s %s\n", IVY_PROTOCOLE_VERSION, $connSock->sockport(), + $self->[uuid], $self->[appName]); + + if (!$self->[useMulticast]) { + # cree la socket de broadcast + $self->[supSock] = IO::Socket::INET->new + (LocalPort => $self->[broadcastPort], + Proto => 'udp', + Type => SOCK_DGRAM, + ReuseAddr => 1); + + $self->[supSock]->sockopt (SO_BROADCAST, 1); + foreach my $netBroadcastAddr (@{$self->[broadcastBuses]}) { +# print "BroadcastBus: --", $netBroadcastAddr, "--\n"; + send ($self->[supSock], $bonjourMsg, 0, $netBroadcastAddr) or + carp "Warning in Ivy::start, broadcast of Hello message failed: $!"; + } + } + else { + # creating the multicast socket + $self->[supSock] = IO::Socket::Multicast->new + (LocalPort => $self->[broadcastPort], + ReuseAddr => 1); + + # Multicast datagrams with initial TTL 0 are restricted to the same host. + # Multicast datagrams with initial TTL 1 are restricted to the same subnet. + # Multicast datagrams with initial TTL 32 are restricted to the same site. + # Multicast datagrams with initial TTL 64 are restricted to the same region. + # Multicast datagrams with initial TTL 128 are restricted to the same continent. + # Multicast datagrams with initial TTL 255 are unrestricted in scope. + $self->[supSock]->mcast_ttl(64); + # $self->[supSock]->mcast_loopback(1); must be 1, which is the default + + foreach my $netMulticastAddr (@{$self->[broadcastBuses]}) { + my ($port,$multicastGroupI) = sockaddr_in ($netMulticastAddr); + my $multicastGroup = inet_ntoa($multicastGroupI); + # print "DBG> MulticastBus: --", $multicastGroup,":$port", "--\n"; + $self->[supSock]->mcast_add($multicastGroup); + $self->[supSock]->mcast_send($bonjourMsg, $multicastGroup.":".$port) or + carp "Warning in Ivy::start, multicast of Hello message failed: $!"; + } + } + # callback pour traiter la reception des bonjours + &$fileEventFunc ($self->[supSock], [\&_getBonjour, $self]) ; + + # callback pour traiter les demandes de cxion + &$fileEventFunc ($self->[connSock], [\&_getConnections, $self]) ; + + return $self; +} # end start + + +############### PROCEDURE BIND REGEXP +sub bindRegexp ($$$;$$) +{ + my $self = ref($_[0]) eq __PACKAGE__ ? shift : $globalIvy; + my ($regexp, $cb, $callByRef, $bindOnce) = @_; + my $id; + + $callByRef = 0 unless defined $callByRef ; + $bindOnce = 0 unless defined $bindOnce ; + + my $extraParam; + if ($bindOnce) { + $extraParam = BIND_ONCE; + } else { + $extraParam = $callByRef ? CALL_BY_REF : CALL_BY_VALUE; + } + +# print ("DBG> bindRegexp:: self=$self, regexp=$regexp, extraParam=$extraParam\n"); +# my $original_regexp = $regexp; +# # on substitue les meta caracteres des regexps perl : \d, \w, \s, \e +# # par les classes de caracteres corespondantes de maniere a ce +# # qu'une appli distante non perl comprenne ces regexp. +# $regexp =~ s| +# ( +# (? regexp = $regexp\n"); + if ($^W) { + eval {my $test = "a" =~ /$regexp/ } ; # testing the regexp for avoiding + if ($@) { + carp "Warning in Ivy::bindRegexp, ill-formed regexp: '$regexp'" ; + return; + }; + } + + + if ($cb) { + # on rajoute le couple $regexp, $cb dans la liste des messages + # qu'on prend + + # on teste la validité de l'argument + if (ref ($cb) ne 'ARRAY') { + carp ("Warning binRegexp on $regexp :\nargument 3 (callback) is not correct and will be ignored\n"); + return (); + } + if ((ref ($cb->[0]) ne 'CODE') && (ref ($cb->[1]) ne 'CODE')) { + carp ("Warning binRegexp on $regexp :\nargument 3 (callback) is not correct and will be ignored\n"); + return (); + } + + # on commence par tester si on a un id libere dans le tableau + for ($id=0; $id <= ($#{$self->[recCbList]}+1); $id++) { + last unless (defined $self->[recCbList][$id]) && @{$self->[recCbList][$id]->[1]}; + } + $self->[recCbList][$id] = [$regexp, $cb, $extraParam]; + + # on envoie les messages regexps aux processus deja connectes + _sendLastRegexpToAllreadyConnected ($self, $id) ; + } + else { + # on vire le callback, et on se desabonne de cette regexp + for (my $id=0; $id <= $#{$self->[recCbList]}; $id++) { + + next unless (defined $self->[recCbList][$id]) && + @{$self->[recCbList][$id]->[1]}; + + if ($self->[recCbList][$id]->[0] eq $regexp) { + + $self->[recCbList][$id]->[1] = []; + # on envoie le mesage delregexp + foreach my $fd (values %{$self->[sockList]}) { + _univSend ($self, $fd, sprintf (MSG_FMT, DELREGEXP, $id, "")); + } + } + } + } + return ($id); +} # end bindRegexp + +############### PROCEDURE BIND REGEXP ONCE +sub bindRegexpOneShot ($$$) +{ + Ivy::bindRegexp ($_[0], $_[1], $_[2], 0, 1); +} + +############### PROCEDURE CHANGE REGEXP +sub changeRegexp ($$$) +{ + my $self = ref($_[0]) eq __PACKAGE__ ? shift : $globalIvy; + my ($regexpId, $regexp) = @_; + + $regexp =~ s| + \(\?I # l'extension (?I + ([\d-]+) # la borne inférieure + \# # l'operateur d'intervalle + ([\d-]+) # la borne supérieure + ([if]?) # le caractère de codage f pour flottant, i pour integer, flottant par defaut + \) # la parenthèse fermante + | + _regexpGen ($1, $2, $3); + |xge; + + # print ("DBG> regexp = $regexp\n"); + if ($^W) { + eval {my $test = "a" =~ /$regexp/ } ; # testing the regexp for avoiding + if ($@) { + carp "Warning in Ivy::changeRegexp, ill-formed regexp: '$regexp'" ; + return; + }; + } + + unless (exists $self->[recCbList][$regexpId]) { + warn ("Warning in Ivy::changeRegexp, invalid regexpId\n"); + return (-1); + } else { + $self->[recCbList][$regexpId]->[0] = $regexp; + _sendLastRegexpToAllreadyConnected ($self, $regexpId) ; + return ($regexpId); + } +} # end changeRegexp + +############### METHODE BIND REGEXP +sub bindDirect +{ + my $self = ref($_[0]) eq __PACKAGE__ ? shift : $globalIvy; + my ($id, $cb) = @_; + + if ($cb) { + # on rajoute la $cb dans la liste des messages + # qu'on prend + $self->[directCbList][$id] = $cb; + } else { + # on vire le callback + undef $self->[directCbList][$id]; + } +} # end bindDirect + + +############### PROCEDURE SEND MSGS +sub sendMsgs +{ + my $self = ref($_[0]) eq __PACKAGE__ ? shift : $globalIvy; + my $appSock; + my $total = 0; + + # pour tous les messages + foreach my $msg (@_) { + study ($msg); + carp "Warning in Ivy::sendMsgs, a message contains a '\\n'. " . + "You should correct it:\n'$msg'" if ($^W && ($msg =~ /\n/)) ; + + # pour routes les connections + foreach $appSock (values %{$self->[sockList]}) { + # pour toutes les fonctions de filtrage de regexp + $total += _sendMsgTo ($self, $appSock, $msg); + } + } + # print "DBG> sended $total times\n"; + return $total; +} # end sendMsgs + + + + + + + +############### PROCEDURE SEND MSGS +sub sendAppNameMsgs +{ + my $self = ref($_[0]) eq __PACKAGE__ ? shift : $globalIvy; + return $self->sendMsgs (map ("$self->[appName] $_", @_)); +} + + +# sub sendAppNameMsgs +# { +# my $self = ref($_[0]) eq __PACKAGE__ ? shift : $globalIvy; +# my @msgs = @_; +# my $total = 0; + +# # pour tous les messages +# foreach (@msgs) { +# carp "Warning in Ivy::sendAppNameMsgs, a message contains a '\\n'. Skipping it:\n'$_'" if ($_ =~ /\n/); + +# my $msg = "$self->[appName] $_"; +# study ($msg); + +# # pour toutes les connections +# foreach my $fd (keys %{$self->[sockList]}) { + +# # pour toutes les fonctions de filtrage de regexp +# foreach my $regexpFunc (@{$self->[sendRegList]{$fd}}) { +# $total += &{$regexpFunc} (\$msg) if defined $regexpFunc; +# } +# } +# } +# # print "DBG> sended $total times\n"; +# return $total; +# } # end sendAppNameMsgs + + + +############### PROCEDURE SEND DIRECT MSGS +sub sendDirectMsgs +{ + my $self = ref($_[0]) eq __PACKAGE__ ? shift : $globalIvy; + my ($to, $id, @msgs) = @_; + + if (defined $to and defined ($self->[appliList]{$to})) { + my @fds = @{$self->[appliList]{$to}}; + # pour tous les messages + foreach my $msg (@msgs) { + carp "Warning in Ivy::sendDirectMsgs, a message contains a '\\n'. Skipping it:\n'$msg'" if ($msg =~ /\n/); + + foreach my $fd (@fds) { + _univSend ($self, $fd, sprintf (MSG_FMT, DIRECT_MSG, $id, "$msg"));; + } + } + return 1; + } + else { + my $to_appli = (defined $to) ? $to : ''; + carp "Warning in Ivy::sendDirectMsgs, application $to_appli unknown"; + return 0; + } +} # end sendDirectMsgs + + +############### METHOD SEND DIE TO +sub sendDieTo +{ + my $self = ref($_[0]) eq __PACKAGE__ ? shift : $globalIvy; + my ($to) = @_; + + if (defined $to and defined $self->[appliList]{$to}) { + my @fds = @{$self->[appliList]{$to}}; + + carp "Attention : in Ivy::sendDieTo big BUG \@fds is empty" + if (scalar (@fds) == 0); + + # pour tous les messages + foreach my $fd (@fds) { + $self->_sendDieTo($fd); + } + return 1; + } + else { + my $to_appli = (defined $to) ? $to : ''; + carp "Warning in Ivy::sendDieTo, application '$to_appli' is unknown" if $^W; + return 0; + } +} # end sendDieTo + + +############### METHOD PING +sub ping ($$\&) +{ + my $self = ref($_[0]) eq __PACKAGE__ ? shift : $globalIvy; + my ($to, $pongCbRef) = @_; + my @fds; + + return unless defined $to; + + if (defined ($self->[appliList]{$to})) { + @fds = @{$self->[appliList]{$to}}; + } else { + my %handleByName = reverse %nameByHandle; +# printf "DBG>>> all names : %s\n", join (', ', keys %handleByName); + if (exists $handleByName{$to}) { + @fds = ($handleByName{$to}); + } else { + carp "Warning in Ivy::ping, application '$to' is unknown" if $^W; + return 0; + } + } + + # pour tous les messages + foreach my $fd (@fds) { +# print ("DBG>> ping : send to fd $fd\n"); + $self->[pongQueue]->{$fd} = [$pingId, Time::HiRes::time(), $pongCbRef]; + _univSend ($self, $self->[sockList]->{$fd}, sprintf (MSG_FMT, PING, $pingId, "")); + } + return ($pingId++); +} # end ping + +############### METHOD DUMP_TABLE +sub dumpTable ($$\&) +{ + my $self = ref($_[0]) eq __PACKAGE__ ? shift : $globalIvy; + my ($to, $dumpTableCbRef) = @_; + my @fds; + + return unless defined $to; + + if (defined ($self->[appliList]{$to})) { + @fds = @{$self->[appliList]{$to}}; + } else { + my %handleByName = reverse %nameByHandle; +# printf "DBG>>> all names : %s\n", join (', ', keys %handleByName); + if (exists $handleByName{$to}) { + @fds = ($handleByName{$to}); + } else { + carp "Warning in Ivy::ping, application '$to' is unknown" if $^W; + return 0; + } + } + + # pour tous les messages + foreach my $fd (@fds) { +# print ("DBG>> dumpTable : send to fd $fd\n"); + $self->[pongQueue]->{$fd} = [$pingId, $dumpTableCbRef]; + _univSend ($self, $self->[sockList]->{$fd}, sprintf (MSG_FMT, DUMP_TABLES, $pingId, "")); + } + return ($pingId++); +} # end ping +############### METHODE MAINLOOP +sub mainLoop (;$) +{ + my $self = ref($_[0]) eq __PACKAGE__ ? shift : $globalIvy; + my ($fd, @selRes, @allDesc); + + + if ($loopMode == TK_MAINLOOP) { + eval {Tk::MainLoop ()}; + return; + } + + croak "Error in Ivy::mainLoop, Ivy not properly initialized\n" + unless defined $localLoopSelRead; + + while (defined $localLoopSelRead) { +# READ + @selRes = IO::Select::select ($localLoopSelRead, $localLoopSelWrite, undef , + $selectTimout) ; + _scanAfter () ; + + foreach $fd (@{$selRes[0]}) { + if (ref $localBindByHandle{$fd} eq 'CODE') { + &{$localBindByHandle{$fd}} ; + } + else { + my ($cb, @arg) = @{$localBindByHandle{$fd}} ; + &$cb (@arg) + } + } + +#WRITE + my $bufEmiRef; + my $sent; + + foreach $fd (@{$selRes[1]}) { + $bufEmiRef = \($self->[bufEmiByCnnx]->{$fd}); + $sent = send ($fd, $$bufEmiRef, 0); + unless (defined $sent) { + # y a rien à faire + } elsif ($sent == length ($$bufEmiRef)) { + $$bufEmiRef = ""; + _callCongestionCb ($self, $fd, 0); + } elsif ($sent >= 0) { + substr ($$bufEmiRef, 0, $sent, ''); + } else { + $self->_removeFileDescriptor ($fd, 'mainLoop[WRITE]') unless ($!{EAGAIN} || $!{EWOULDBLOCK}|| $!{EINTR} || $!{EMSGSIZE} || $!{ENOBUFS}) + } + } + } +} # end mainLoop + + +############### METHODE AFTER +sub after ($$;$) +{ + # test du premier argument au cas où la fonction est + # appelee de maniere objet : premier argument = class ou une instance + # de classe + shift if ((ref ($_[0]) eq __PACKAGE__) || ($_[0] eq __PACKAGE__)) ; + + my ($timeAfter, $cbListRef) = @_; + $timeAfter /= 1000; + $selectTimout = $timeAfter if $timeAfter < $selectTimout; + + # si la valeur de timout est negative : c'est un after sinon + # c'est un repeat + $afterList{++$afterId} = [AFTER, $timeAfter, + gettimeofday()+$timeAfter, $cbListRef]; + + return ($afterId); +} # end after + +############### METHODE REPEAT +sub repeat ($$;$) +{ + # test du premier argument au cas où la fonction est + # appelee de maniere objet : premier argument = class ou une instance + # de classe + shift if ((ref ($_[0]) eq __PACKAGE__) || ($_[0] eq __PACKAGE__)) ; + + # on passe le temps en secondes pour le select + my ($timeAfter, $cbListRef) = @_; + $timeAfter /= 1000; + $selectTimout = $timeAfter if $timeAfter < $selectTimout; + + $afterList{++$afterId}= [REPEAT, $timeAfter, gettimeofday()+$timeAfter, + $cbListRef]; + return ($afterId); +} # end repeat + +############### METHODE AFTER CANCEL +sub afterCancel ($;$) +{ + # test du premier argument au cas où la fonction est + # appelee de maniere objet : premier argument = class ou une instance + # de classe + shift if ((ref ($_[0]) eq __PACKAGE__) || ($_[0] eq __PACKAGE__)) ; + + my $id = shift; + + if (defined ($id) && defined $afterList{$id}) { + if ($afterList{$id}->[1] <= $selectTimout) { + delete $afterList{$id} ; + # le timout de l'after/repeat etait le plus petit des timout + # on cherche donc le plus petit parmi ceux qui restent; + $selectTimout = MAX_TIMOUT; + foreach my $af (values %afterList) { + $selectTimout = $af->[1] if $af->[1] < $selectTimout ; + } + } + else { + delete $afterList{$id} ; + } + } +} # end afterCancel + +############### METHODE AFTER RESET TIMER +# permet de gérer des timout plus facilement en permettant de +# réarmer un after non encore déclenché à sa valeur initiale +# cela évite le aftercancel suivi d'un nouvel after +sub afterResetTimer ($;$) +{ + # test du premier argument au cas où la fonction est + # appelee de maniere objet : premier argument = class ou une instance + # de classe + shift if ((ref ($_[0]) eq __PACKAGE__) || ($_[0] eq __PACKAGE__)) ; + + my $id = shift; + if (defined ($id) && defined $afterList{$id}) { + $afterList{$id}->[2] = $afterList{$id}->[1] + gettimeofday(); + } +} # end afterResetTimer + + +############### METHODE FILE EVENT +sub fileEvent ($$;$) +{ + # test du premier argument au cas où la fonction est + # appelee de maniere objet : premier argument = class ou une instance + # de classe + shift if ((ref ($_[0]) eq __PACKAGE__) || ($_[0] eq __PACKAGE__)) ; + + my ($fd, $cb) = @_; + + + unless (defined $localLoopSelRead) { + croak ("Error in Ivy::fileEvent, Ivy should have been initialised in LOCAL loop mode\n"); + } + + if ($cb) { + # adding the handler + $localBindByHandle{$fd} = $cb; + $localLoopSelRead->add ($fd); + } else { + # deleting the handler + delete $localBindByHandle{$fd}; +# print ("DBG> Ivy::fileEvent : removing fd from the select\n"); + $localLoopSelRead->remove ($fd); + $localLoopSelWrite->remove ($fd); + } +} # end fileEvent + + + + +sub getUuid ($) +{ + my $self = shift; + return $self->[uuid]; +} +############################################################################# +#### METHODES PRIVEES ##### +############################################################################# + + +############### METHODE GET BONJOUR +sub _getBonjour ($) +{ + my $self = shift; +# my $DTS = sprintf ("%2d:%2d:%2d", (localtime())[2,1,0]); + + my $bonjourMsg = ''; + + # l'hote distant + my $inetAddr = $self->[supSock]->recv ($bonjourMsg, 1024, 0); + + unless (length $inetAddr) { + carp "Warning in Ivy::_getBonjour, recv error, Hello message discarded"; + return; + } + + my $addr = (unpack_sockaddr_in ($inetAddr))[1]; + + my $peerName = _getHostByAddr ($addr); + + # on force $peerPort a etre vu comme une valeur numerique + my ($version, $peerPort, $uuid, $udpAppName) = +# $bonjourMsg =~ /^(\d+)\s+(\d+)\s+(?:(\w+)\s+(.*))?/; + $bonjourMsg =~ /^(\d+)\s+(\d+)(?:\s+(\S+)\s+(.*))?\n/; + + $udpAppName = 1 unless defined $udpAppName; +# syswrite ($trace, "DBG<$DTS>[$appName]> bonjourMsg = '$bonjourMsg'\n"); +# syswrite ($trace, "DBG<$DTS>[$appName]> reception de $peerName : bonjour $peerPort uuid = $uuid\n"); + + unless (defined ($version) && defined ($peerPort)) { + carp "Warning[$appName] in Ivy::_getBonjour, ill-formed Hello message \"$bonjourMsg\"" ; + return; + } + + if ($version != IVY_PROTOCOLE_VERSION) { + carp "Warning[$appName] in Ivy::_getBonjour, connection request from ". + "$peerName with protocol version $version,\ncurrent version is " . + IVY_PROTOCOLE_VERSION ; + return; + } + + + # on verifie qu'on ne se repond pas et qu'on ne + # se reconnecte pas a un process deja connecte + if (exists ($self->[cnnxion]{"$addr:$peerPort"})) { +# syswrite ($trace, "DBG<$DTS>[$appName]> from $self->[appName] DISCARD bonjour de $peerName:$peerPort [$udpAppName]: DEJA CONNECTE\n") ; + return ; + } elsif ((defined $uuid) && ($uuid eq $self->[uuid])) { +# syswrite ($trace, "DBG<$DTS>[$appName]> from $self->[appName] DISCARD bonjour de $peerName:$peerPort [$udpAppName]: $uuid c'est MOI\n") ; + return; + } elsif ((defined $uuid) && (exists ($self->[connectedUuid]->{$uuid}))) { +# syswrite ($trace, "DBG<$DTS>[$appName]> from $self->[appName] DISCARD bonjour de $peerName:$peerPort:$uuid [$udpAppName] DEJA CONNECTE\n") ; + return; + } else { +# syswrite ($trace, "DBG<$DTS>[$appName]> reception de $peerName : bonjour $udpAppName:$peerPort") ; +# syswrite ($trace, " uuid=$uuid") if (defined $uuid); +# syswrite ($trace, "\n"); +# syswrite ($trace, "DBG<$DTS>>[$appName] from $self->[appName] ACCEPT bonjour de $peerName:$peerPort:$uuid [$udpAppName]\n") ; + $self->[connectedUuid]->{$uuid} = 1 if (defined $uuid); + } + + # on verifie que l'adresse fasse partie de l'ensemble de reseau + # definis par ivybus + my $addrInIvyBus = 0; + my @ivyBusAddrList = map ( (unpack_sockaddr_in ($_))[1], + @{$self->[broadcastBuses]}); + # Bon dans cette version on reponds aux bonjour emis par + # la machine locale, on ne peut donc pas avoir + # une appli qui ne causerait qu'a des machines sur une + # autre reseau, si ca embete qqun, qu'il me le dise + push (@ivyBusAddrList, pack ("CCCC", 127,255,255,255)); + push (@ivyBusAddrList, (gethostbyname (hostname()))[4]); + + use bytes; + foreach my $ivyBusAddr (@ivyBusAddrList) { + $addrInIvyBus = 1 unless (grep ($_ != 0, unpack ("C4", + ($addr & $ivyBusAddr) ^ $addr))); + } + no bytes; + + if ($addrInIvyBus == 0) { + carp "Warning[$appName]: Hello message from $peerName ignored,\n". + "this guy is outside our emission zone\n" if $^W; + return; + } + + # ouverture du canal de communication + my $appSock = IO::Socket::INET->new (PeerAddr => inet_ntoa ($addr),#$peerName, + PeerPort => $peerPort, + Proto => 'tcp'); + + if ($appSock) { + my $flags = fcntl($appSock, F_GETFL, 0); + unless (fcntl($appSock, F_SETFL, $flags | O_NONBLOCK)) { + carp "Warning[$appName] Can't set flags for the socket: $!\n"; + return; + } + $appSock->sockopt(Socket::TCP_NODELAY, TCP_NO_DELAY_ACTIVATED); + + + binmode ($appSock); + # on cree une entree pour $appSock dans la liste des regexp + $nameByHandle{$appSock}=_getHostByAddr($addr) .":$peerPort"; + $self->[cnnxion]{"$addr:$peerPort"} = $udpAppName; + $self->[sendRegList]{$appSock} = []; + $self->[sendRegListSrc]{$appSock} = []; + $self->[bufRecByCnnx]{$appSock} = ''; + $self->[bufEmiByCnnx]{$appSock} = ''; + $self->[sockList]{$appSock} = $appSock; + + +# syswrite ($trace, sprintf ("_getBonjour : ajout dans le fdset de %s[%s]:%d\n", +# (gethostbyaddr ($appSock->peeraddr(),AF_INET))[0], +# join (':', unpack ('C4', $appSock->peeraddr())), +# $appSock->peerport())); + + &$fileEventFunc ($appSock, [\&_getMessages, $self, $appSock]) ; + + # on balance les regexps qui nous interessent a l'appli distante + $self->_sendWantedRegexp ($appSock); + } else { + carp "Warning[$appName] in Ivy::_getBonjour, connection to " . + "$peerName:$peerPort is impossible" ; + } +} # end _getBonjour + + +############### PROCEDURE GET CONNECTIONS +sub _getConnections ($) +{ + my $self = shift; + + my $appSock = $self->[connSock]->accept(); + + unless (defined $appSock) { + carp "Warning in Ivy::_getConnections, \$appSock not defined"; + return; + } else { + my $flags = fcntl($appSock, F_GETFL, 0); + unless (fcntl($appSock, F_SETFL, $flags | O_NONBLOCK)) { + carp "Can't set flags for the socket: $!\n"; + return; + } + + $appSock->sockopt(Socket::TCP_NODELAY, TCP_NO_DELAY_ACTIVATED); + binmode ($appSock); + } + + +# syswrite ($trace, sprintf ("_getConnections : ajout dans le fdset de %s[%s]:%d\n", +# (gethostbyaddr ($appSock->peeraddr(),AF_INET))[0], +# join (':', unpack ('C4', $appSock->peeraddr())), +# $appSock->peerport())); + + + # callback pour traiter la reception des messages + &$fileEventFunc ($appSock, [\&_getMessages, $self, $appSock]) ; + + # on cree une entree pour $appSock dans la liste des regexp + $self->[sendRegList]{$appSock} = []; + $self->[sendRegListSrc]{$appSock} = []; + $self->[bufRecByCnnx]{$appSock} = ''; + $self->[bufEmiByCnnx]{$appSock} = ''; + $self->[sockList]{$appSock} = $appSock; + # on balance les regexps qui nous interessent a l'appli distante + $self->_sendWantedRegexp ($appSock); +} # end _getConnections + + +############### METHODE GET MESSAGES +sub _getMessages ($$) +{ + my ($self, $appSock) = @_; + + unless (defined $appSock) { + carp "Warning in Ivy::_getMessages : *UN*inititialized appSock, don't do anything\n" if $^W; + return; + } + + my $bufferRef = \$self->[bufRecByCnnx]{$appSock}; + my ($addr, $peerPort, $senderName); + my $nlIndex; + my $mess; + + +# syswrite ($trace, sprintf ("_getMessages from %s[%s]:%d\n", +# (gethostbyaddr ($appSock->peeraddr(),AF_INET))[0], +# join (':', unpack ('C4', $appSock->peeraddr())), +# $appSock->peerport())); + + # on recupere le message + unless (sysread ($appSock, $$bufferRef, 65536, length ($$bufferRef))) { + # message null : broken pipe, ça s'est deconnecte a l'autre bout + # on vire ce fd de la boucle d'evenements + # print ("DBG> _getMessages, recv err, calling removeFileDesc.\n"); + # Bon la il faudra un jour clarifier ce bordel, lister toutes + # les facons dont un couple d'applis connectées peuvent sortir et + # eviter les dead lock qui doivent subsister. +# syswrite ($trace, sprintf ("_getMessage : bad FD[%d] detected errno=%d\n", $appSock->peerport(), $!)); + $self->_removeFileDescriptor ($appSock, '_getMessages') unless ($!{EAGAIN}); + return; + } + + $addr = $appSock->peeraddr(); + $peerPort = $appSock->peerport() ; + $senderName = $self->[cnnxion]{"$addr:$peerPort"} ; + $senderName = "NONAME" unless $senderName; + $senderName =~ s/^\004//g; + + while (($nlIndex= index ($$bufferRef, "\n")) > 0) { + $mess = substr ($$bufferRef, 0, $nlIndex, ''); + substr ($$bufferRef, 0, 1, ''); + # on recupere les 3 champs : le type, le numero de regexp, les valeurs + my ($type, $id, $valeurs) = $mess =~ /^(\d+) + \s+ + (\d+) + \002 + (.*)/x ; + + # si ca a chie on rale + (carp "Warning in Ivy::_getMessages, ill-formated message \'$mess\'" and return) unless defined $type ; + +# syswrite ($trace, "_getMessage type = $type\n"); + + # sinon on fait en fonction du type de message + if ($type == MSG) { # M S G + # on recupere le couple call back, regexp correspondant + # a l'identifiant et on appelle la fonction avec les parametres + # traites par la regexp + +# if ((ref ($self->[recCbList][$id]) eq 'ARRAY') && +# (my @cb = @{$self->[recCbList][$id]->[1]})) { + + + if (my @cb = @{$self->[recCbList][$id]->[1]}) { + my $cb = shift @cb; + + # cleaning $sendername with previous \004 used for connection status + # bindRegexp avancé : on envoie une liste nom adresse port au lieu du nom + $senderName = [$senderName, _getHostByAddr ($addr), $peerPort] + if ($self->[recCbList][$id]->[2] == CALL_BY_REF); + + if (ref($cb) ne 'CODE') { + my $method = shift @cb; + # on split sur ETX + $cb->$method($senderName, @cb, split ("\003", $valeurs)) ; + } else { + &$cb ($senderName, @cb, split ("\003", $valeurs)) ; + } + + if ($self->[recCbList][$id]->[2] == BIND_ONCE) { + # on vire la regexp des regexps verifiées + # print ("DBG> receive BIND ONCE message\n"); + $self->[recCbList][$id]->[1] = []; + # on envoie le mesage delregexp + foreach my $fd (values %{$self->[sockList]}) { + _univSend ($self, $fd, sprintf (MSG_FMT, DELREGEXP, $id, "")); + } + } + } else { + #_sendErrorTo ($appSock, "REEGXP ID $id inconnue"); + carp ("Warning in Ivy::_getMessages, received an unknown message or double one shot message ". + "with id $id from $senderName :\n\"$mess\"") if $^W; + } + } + + elsif ($type == BYE) { +# syswrite ($trace, "reception d'un bye\n"); + $self->_removeFileDescriptor ($appSock, '_getMessages[BYE]'); # B Y E + } + + elsif ($type == REGEXP) { # R E G E X P + # on ajoute une fonction traitant la regexp et envoyant le + # message sur le bon fd dans la liste des fonctions de filtrage + # ca permet de compiler les regexp avec 'once' donc une + # fois pour toute, et ainsi optimiser la vitesse de + # filtrage des messages a envoyer +# print "DBG> REGEXP from $senderName '$id' '$valeurs'\n"; + my $host = _getHostByAddr ($addr); + if ($self->_toBePruned ($senderName, $valeurs)) { + &_scanConnStatus ($self, $senderName, 'filtered', "$host:$peerPort" , $valeurs); + next; + } else { + &_scanConnStatus ($self, $senderName, 'subscribing', "$host:$peerPort" , $valeurs); + } + # on affecte la nouvelle regexp a un id + $self->[sendRegListSrc]{$appSock}->[$id] = $valeurs; +# printf ("DBG> add id $id regexps=[$valeurs]\n"); + $self->[sendRegList]{$appSock}->[$id] = + eval ('sub {@{$_[1]} = ${$_[0]} =~ /($valeurs)/io;}'); + + +# $self->[sendRegList]{$appSock}->[$id] = eval <<'_EOL_'; +# sub { +# use strict; +# if (my @args = ${$_[0]} =~ /($valeurs)/io) { +# shift @args; +# $args[$#args] .= "\003" if @args; +# $self->[bufEmiByCnnx]->{$appSock} .= +# sprintf (MSG_FMT, MSG, $id, join ("\003",@args)); +# my $sent = send ($appSock, $self->[bufEmiByCnnx]->{$appSock}, 0); +# unless (defined $sent) { +# # y a rien à faire +# } elsif ($sent == length ($self->[bufEmiByCnnx]->{$appSock})) { +# $self->[bufEmiByCnnx]->{$appSock} = ""; +# } elsif ($sent >= 0) { +# substr ($self->[bufEmiByCnnx]->{$appSock}, 0, $sent, ''); +# } else { +# $self->_removeFileDescriptor ($appSock) ; +# } +# } +# return 1; +# } +# _EOL_ + } + + elsif ($type == ERROR) { # E R R O R + carp ("Warning in Ivy::_getMessages, error message received from ". + "$senderName : \"$valeurs\""); + } + + elsif ($type == DELREGEXP) { # D E L R E G E X P + # on vire la regexp des regexps verifiées +# printf ("DBG> delete id $id\n"); + $self->[sendRegList]{$appSock}->[$id] = undef ; + my $regexp = $self->[sendRegListSrc]{$appSock}->[$id]; + $self->[sendRegListSrc]{$appSock}->[$id] = undef; + my $host = _getHostByAddr ($addr); + &_scanConnStatus ($self, $senderName, 'unsubscribing', "$host:$peerPort" , $regexp); + } + + elsif ($type == ENDREGEXP) { # E N D R E G E X P + # on envoie le message ready uniquement a celui qui nous + # a envoye le message endregexp, et uniquement si on a + # à la fois envoyé le end regexp, et reçu le endregexp de l'autre + $self->[readyToSend]->{"$addr:$peerPort"} = 0 unless + exists $self->[readyToSend]->{"$addr:$peerPort"}; + if (++($self->[readyToSend]->{"$addr:$peerPort"}) == 2) { + $self->_sendMsgTo ($appSock, \$self->[messWhenReady]); + } + + # on passe de l'etat Connecte a l'etat Ready + $self->[cnnxion]{"$addr:$peerPort"} =~ s/^\004//g; + $senderName = $self->[cnnxion]{"$addr:$peerPort"}; + unless (exists $self->[appliList]{$senderName}) { + $self->[appliList]{$senderName} = [$appSock]; + } + else { + push @{$self->[appliList]{$senderName}}, $appSock; + } + + my $host = _getHostByAddr ($addr); + $self->_scanConnStatus ($senderName, "new", "$host:$peerPort", undef); + } + + elsif ($type == APP_NAME) { + # etat Connecte1558 + if (($self->[appName] eq $valeurs) && $^W) { + carp "\033[1mWarning in Ivy::_getMessages, there is already an instance of ". + "$self->[appName] \033[m" ; + } + + $senderName = $valeurs; + $self->[cnnxion]{"$addr:$peerPort"} = "\004$valeurs"; + $nameByHandle{$appSock}=_getHostByAddr($addr) .":$peerPort"; + } + + elsif ($type == DIRECT_MSG) { + + if (defined $self->[directCbList][$id]) { + my @cb = @{$self->[directCbList][$id]}; + my $cb = shift @cb; + if (ref($cb) ne 'CODE') { + my $method = shift @cb; + $cb->$method(@cb, $valeurs); + } + else { + &$cb (@cb, $valeurs); + } + } + else { + $self->_sendErrorTo ($appSock, "DIRECT ID $id inconnue"); + carp "Warning in Ivy::_getMessages, received a DIRECT message with ". + "unknown id $id from $senderName :\n\"$mess\""; + } + } + + elsif ($type == DIE) { + # il faut quitter + # on commence par appeler la callback de fin + my @cb = @{$onDieFunc}; + my $cb = shift @cb; + if (ref($cb) ne 'CODE') { + my $method = shift @cb; + $cb->$method(@cb); + } + else { + &$cb (@cb); + } + # on avertit les autres qu'on se barre +# my $adr = $self->_inetAdrByName ($senderName) ; +# carp "Notice in Ivy::_getMessages, received a suicide request from " . "$senderName ($adr) ... exiting" if $^W; + # adios + Ivy::exit (); + + } + + elsif ($type == PING) { + # si on recois un ping, on envoie un pong + $self->_pong ($appSock, $id); + } + + elsif ($type == PONG) { + if (exists $self->[pongQueue]->{$appSock}) { + my ($pingid, $time, $funcRef) = @{$self->[pongQueue]->{$appSock}}; +# printf ("DBG>>> stocked Id = $pingid;; message id = $id\n"); + &$funcRef ((Time::HiRes::time()-$time)*1000, $nameByHandle{$appSock}) + if ($pingid == $id); + delete $self->[pongQueue]->{$appSock}; + } + } + + elsif ($type == DUMP_TABLES) { + # si on recois un ping, on envoie un pong + $self->_dumpTable ($appSock, $id); + } + + elsif ($type == DUMP_TABLES_FILE) { + if (exists $self->[pongQueue]->{$appSock}) { + my ($pingid, $funcRef) = @{$self->[pongQueue]->{$appSock}}; +# printf ("DBG>>> stocked Id = $pingid;; message id = $id valeur=$valeurs\n"); + &$funcRef ($valeurs, $nameByHandle{$appSock}) + if ($pingid == $id); + delete $self->[pongQueue]->{$appSock}; + } + } + + + else { + _$self->sendErrorTo ($appSock, "TYPE DE MESS $type inconnu"); + warn ("Warning in Ivy::_getMessages, received a message of unknown ". + " type $type from $senderName :\n\"$mess\""); + } + } + return 0; +} # end _getMessages + +############### METHODE SEND WANTED REGEXP +sub _sendWantedRegexp ($$) +{ + my ($self, $appSock) = @_; + my $connSock = $self->[connSock] ; + my $msg; + # on envoie le message "Nom appli" + _univSend ($self, $appSock, sprintf (MSG_FMT, APP_NAME, $connSock->sockport, + $self->[appName])); + # on envoie les regexps + for (my $id = 0; $id <= $#{$self->[recCbList]}; $id++) { + next unless defined $self->[recCbList][$id]->[1]->[0]; + $msg = sprintf (MSG_FMT, REGEXP, $id, $self->[recCbList][$id]->[0]); + _univSend ($self, $appSock, \$msg); + # print sprintf ("DBG> %s %d %s\n", + # 'REGEXP', $id, $self->[recCbList][$id]->[0]); + } + # on envoie le message de fin d'envoi de regexps + _univSend ($self, $appSock, sprintf (MSG_FMT, ENDREGEXP, 0, "")); + + # on envoie le message ready uniquement a celui qui nous + # a envoye le message endregexp, et uniquement si on a + # à la fois envoyé le end regexp, et reçu le endregexp de l'autre + my $addr = $appSock->peeraddr(); + my $peerPort = $appSock->peerport() ; + $self->[readyToSend]->{"$addr:$peerPort"} = 0 unless + exists $self->[readyToSend]->{"$addr:$peerPort"}; + if (++($self->[readyToSend]->{"$addr:$peerPort"}) == 2) { + $self->_sendMsgTo ($appSock, \$self->[messWhenReady]); + } +} # end _sendWantedRegexp + +############### METHODE SEND LAST REGEXP TO ALLREADY CONNECTED +sub _sendLastRegexpToAllreadyConnected ($$) +{ + my ($self, $id) = @_; + my $msg = sprintf (MSG_FMT, REGEXP, $id, $self->[recCbList][$id]->[0]); + foreach my $fd (values %{$self->[sockList]}) { + _univSend ($self, $fd, \$msg); + } +} # end _sendLastRegexpToAllreadyConnected + +############### METHODE INET ADR BY NAME +sub _inetAdrByName ($$) { + + my ($self, $appName) = @_; + + my $addrInet = (grep ($self->[cnnxion]{$_} eq $appName, + keys %{$self->[cnnxion]}))[0]; + + return ("unknow") unless defined $addrInet; + + my ($port) = $addrInet =~ /:(.*)/; + my $addr = substr ($addrInet,0,4); + my $host = _getHostByAddr ($addr); + return "$host:$port"; +} # end _inetAdrByName + + +############### PROCEDURE REMOVE FILE DESCRIPTOR +sub _removeFileDescriptor ($$$) +{ + my ($self, $fd, $callBy) = @_; + + unless (defined $fd) { +# syswrite ($trace, "_removeFileDescriptor : *UN*inititialized fd, don't do anything\n"); + return; + } + + + + # on s'est deja occupe de lui + return unless exists $self->[sockList]->{$fd}; + my $diedAppName = _getNameByFileDes ($self, $fd); + + # on efface les structures de donnees associees au fd + # on vire ce fd des fd a scruter dans la bcle d'evenements + # uniquement si on est dans le thread principal + # sinon le select merde salement sur ce coup + my $peerPort = $fd->peerport() ; + $peerPort = 0 unless defined $peerPort; + +# syswrite ($trace, sprintf ("_removeFileDescriptor : suppression dans le fdset de %s[%s]:%d\n", +# (gethostbyaddr ($fd->peeraddr(),AF_INET))[0], +# join (':', unpack ('C4', $fd->peeraddr())), +# $fd->peerport())); + &$fileEventFunc ($fd, '') ; + delete $self->[sendRegList]->{$fd}; + delete $self->[sendRegListSrc]->{$fd}; + delete $self->[sockList]->{$fd}; + delete $self->[bufRecByCnnx]->{$fd}; + delete $self->[bufEmiByCnnx]->{$fd}; + + $fd->close(); + + # remove all occurence of fd from $self->[appliList] + foreach my $name (keys %{$self->[appliList]}) { + for (my $i=0; $i < scalar (@{$self->[appliList]{$name}}); $i++) { + my $fdp = $self->[appliList]{$name}->[$i]; + if ($fd eq $fdp) { + delete ($self->[appliList]{$name}->[$i]); + } + } + } + + unless (defined $diedAppName) { + warn "Ivy::__removeFileDescriptor (called by $callBy) : disconnection of NONAME\n" if $^W; + return; + } + + my $addrInet = (grep ($self->[cnnxion]{$_} eq $diedAppName, + keys %{$self->[cnnxion]}))[0]; + + unless (defined $addrInet) { + carp "Warning in Ivy::_removeFileDescriptor (called by $callBy) : disconnection of $diedAppName with ". + "addrInet not defined\n"; + return; + } + +# syswrite ($trace, +# sprintf ("DBG> _removeFileDescriptor : deconnection de %s ($diedAppName)\n", $self->_inetAdrByName ($diedAppName))); + + delete $self->[cnnxion]{$addrInet}; + delete $nameByHandle{$fd}; + + # on vire l'entree correspondant a ce canal dans la liste des + # regexps par canal + + my $addr = substr ($addrInet,0,4); + my $host = _getHostByAddr ($addr); + $self->_scanConnStatus ($diedAppName, "died", "$host:$peerPort", undef) ; +} # end _removeFileDescriptor + + +############### METHODE SEND ERROR TO +sub _sendErrorTo ($$$) +{ + my ($self, $fd, $error) = @_; + + _univSend ($self, $fd, join (' ', ERROR, "0\002$error\n")); +} # end _sendErrorTo + + +############### METHODE PONG +sub _pong ($$$) +{ + my ($self, $fd, $pongId) = @_; + +# printf ("DBG>>> PONG Id = $pongId\n"); + _univSend ($self, $fd, sprintf (MSG_FMT, PONG, $pongId, "")); +} # end _pong + +############### METHODE _DUMP_TABLE +sub _dumpTable ($$$) +{ + my ($self, $fd, $pongId) = @_; + my ($fh, $tmpFileName) = File::Temp::tempfile("IvyTable_XXXXX", SUFFIX => '.txt', DIR => $ENV{'TMPDIR'}); +# print ("DBG> tmpFileName=$tmpFileName\n"); + my $ofs = $,; + $, = ', '; + print $fh <[appName] +selectTimout = $selectTimout +loopMode = $loopMode +messWhenReady = $self->[messWhenReady] +blockOnSlowAgent = $self->[blockOnSlowAgent] +topicRegexps = @{$self->[topicRegexps]} +useMulticast = $self->[useMulticast] +broadcastPort = $self->[broadcastPort] +readyToSend = %{$self->[readyToSend]} +neededApp = @{$self->[neededApp]} + +TABLE DE REGEXPS : + +EOF + + foreach my $appSock (keys %{$self->[sendRegListSrc]}) { + my $appName = $self->_getNameByFileDes ($appSock); +# my $ap2 = $self->[cnnxion]{$nameByHandle{$appSock}}; +# $ap2 =~ s/^\004//g; + print $fh "POUR l'application ${appName}[$nameByHandle{$appSock}] : \n"; + for (my $idx=0; $idx < scalar (@{$self->[sendRegListSrc]{$appSock}}); $idx++) { + printf $fh ("id[$idx] => '%s'\n", + defined ($self->[sendRegListSrc]{$appSock}->[$idx]) + ? $self->[sendRegListSrc]{$appSock}->[$idx] + : "DELETED or UNDEFINED"); + } + # print $fh join ("\n", @{$self->[sendRegListSrc]{$appSock}}); + print $fh "\n-----------------------------------\n"; + } + + close ($fh); + $, = $ofs; + + +# printf ("DBG>>> DUMP_TABLE Id = $pongId tmpFile= $tmpFileName\n"); + _univSend ($self, $fd, sprintf (MSG_FMT, DUMP_TABLES_FILE, $pongId, $tmpFileName)); +} # end _pong + + +############### METHODE SEND ERROR TO +sub _sendDieTo ($$) +{ + my ($self, $fd) = @_; + + _univSend ($self, $fd, join (' ', DIE, "0\002\n")); +} # end _sendDieTo + + +############### METHODE SEND MSG TO +sub _sendMsgTo ($$\$) +{ + my ($self, $fd, $msg) = @_; + my $id = -1; + my $total = 0; + my @args = (); # tableau passé en reference aux fonctions + # anonymes compilées par eval pour receuillir les arguments filtrés + # par les regexp à envoyer sur le tuyau + my $sent; + my $regexpFunc; + my $bufEmiRef; + + # pour toutes les fonctions de filtrage de regexp + foreach $regexpFunc (@{$self->[sendRegList]{$fd}}) { + $id++; + # $total += &{$regexpFunc} (\$msg) if defined $regexpFunc; + next unless defined $regexpFunc; + next unless &{$regexpFunc} ($msg, \@args); + next unless @args; + $total ++; + shift @args; + $args[$#args] .= "\003" if @args; + $bufEmiRef = \($self->[bufEmiByCnnx]->{$fd}); + my $enCongestion = $$bufEmiRef ? 1 : 0; + $$bufEmiRef .= sprintf (MSG_FMT, MSG, $id, join ("\003",@args)); + next if $enCongestion; + $sent = send ($fd, $$bufEmiRef, 0); + unless (defined $sent) { + # y a rien à faire + } elsif ($sent == length ($$bufEmiRef)) { + $$bufEmiRef = ""; + } elsif ($sent >= 0) { + substr ($$bufEmiRef, 0, $sent, ''); + _callCongestionCb ($self, $fd, 1); + if ($self->[blockOnSlowAgent]) { + my $win = ''; + vec($win, fileno ($fd), 1) = 1; + select (undef, $win, undef, undef); + } + } else { + $self->_removeFileDescriptor ($fd, '_sendMsgTo') ; + } + } + + return ($total); +} # end _sendMsgTo + + + +############### METHODE UNIV SEND +sub _univSend ($$$) +{ + my ($self, $fd, $msg) = @_; + + unless (defined $fd) { + carp "WARN _univSend fd is undefined, message will not be sent\n" if $^W; + afficher la call stack + } + my $bufEmiRef = \($self->[bufEmiByCnnx]->{$fd}); + my $enCongestion = $$bufEmiRef ? 1 : 0; + if (ref $msg) { + $$bufEmiRef .= $$msg; + } else { + $$bufEmiRef .= $msg; + } + return if $enCongestion; + my $sent = send ($fd, $$bufEmiRef, 0); + unless (defined $sent) { + # y a rien à faire + } elsif ($sent == length ($$bufEmiRef)) { + $$bufEmiRef = ""; + } elsif ($sent >= 0) { + substr ($$bufEmiRef, 0, $sent, ''); + _callCongestionCb ($self, $fd, 1); + if ($self->[blockOnSlowAgent]) { + my $win = ''; + vec($win, fileno ($fd), 1) = 1; + select (undef, $win, undef, undef); + } + } else { + if ($!{EWOULDBLOCK}) { + # Aucun octet n'a été envoyé, mais le send ne rend pas 0 + # car 0 peut être une longueur passée au send, donc dans ce cas + # send renvoie -1 et met errno a EWOULDBLOCK + _callCongestionCb ($self, $fd, 1); + if ($self->[blockOnSlowAgent]) { + my $win = ''; + vec($win, fileno ($fd), 1) = 1; + select (undef, $win, undef, undef); + } + } + $self->_removeFileDescriptor ($fd, '_univSend') unless ($!{EAGAIN} || $!{EWOULDBLOCK}|| $!{EINTR} || $!{EMSGSIZE} || $!{ENOBUFS}) + } +} + + +############### PROCEDURE TK FILE EVENT +sub _tkFileEvent ($$) +{ + my ($fd, $cb) = @_; + + Tk::fileevent ('', $fd, 'readable', $cb) ; +} # end _tkFileEvent + + +############### PROCEDURE SCAN AFTER +sub _scanAfter () +{ + my $stamp = gettimeofday (); + $selectTimout = MAX_TIMOUT; + foreach my $afk (keys %afterList) { + my $af = $afterList{$afk}; + # si ce timer est a declencher + if ($af->[2] <= $stamp) { + # on traite : le temps de declencher le cb est arrive + if (ref $af->[3] eq 'CODE') { + &{$af->[3]}; + } + else { + my ($cb, @args) = @{$af->[3]}; + &$cb (@args); + } + # si c'est un repeat on le reconduit + if ($af->[0]) { + $af->[2] = $stamp + $af->[1] ; + $selectTimout = $af->[1] if $af->[1] < $selectTimout; + } + else { + # si c'est un after on le vire + afterCancel ($afk); + } + } + else { + my $timeTotrigg = $af->[2] - $stamp; + $selectTimout = $timeTotrigg if $timeTotrigg < $selectTimout; + } + } +} # end _scanAfter + + +############### METHODE SCAN CONN STATUS +sub _scanConnStatus ($$$@) +{ + my ($self, $appname, $status, @addr) = @_; + + my (%readyApp, @nonReadyApp); + + foreach (values %{$self->[cnnxion]}) { + next if $_ eq "1"; + $readyApp{$_}++ unless /^\004/; # connecte mais pas ready + } + + foreach (@{$self->[neededApp]}) { + push (@nonReadyApp, $_) unless exists $readyApp{$_}; + } + + # par compatibilite avec l'ancienne version, on envoie comme + # deux premiers arguments une ref sur la liste des applis presentes, + # une ref de la liste des applis absentes, mais on rajoute comme troisieme + # argument une ref sur une table de hash : comme clef les + # applis presentes, comme valeur le nombre d'applis ayant ce nom, + # de facon a detecter plus facilement quand il y a trop d'applis + # de meme nom sur le meme bus. + # les nouveaux arguments sont: + # le 4eme arg est l'appli nouvelle, deconnecté, qui s'abonne ou se desabonne + # le 5eme arg est le statut (actuellement: 'subscribing'|'filtered'|'unsubscribing'|'died'|'new') + # le 6eme arg est l'addresse de la machine sur laquelle tourne l'agent + # le 7eme arg est la regexp si c'est un abonnement ou desabonnement + &{$self->[statusFunc]} ([keys %readyApp], \@nonReadyApp, \%readyApp, $appname, $status, @addr); +} # end _scanConnStatus + + +############### METHODE TO BE PRUNED +sub _toBePruned ($$$) +{ + my ($self, $from, $regexp) = @_; + + # for message purposes, removing the \004 which indicates the connection status + + my ($cleaned_from) = $from =~ /\004?(.*)/ ; + # print "DBG> $from s'abonne à nouvelle regexp '$regexp'\n"; + + # testing the received regexp for avoiding illformed regexp + eval {my $test = "a" =~ /$regexp/ } ; + if ($@) { + warn "Warning in Ivy::_toBePruned, receiving ill-formed regexp: '$regexp' from '$cleaned_from'" ; + return 1}; + + + # si il n'y a pas de liste de sujets, on ne + # filtre pas + return 0 unless @{$self->[topicRegexps]}; + + unless ($regexp =~ /^\^/) { + #print "DBG> regexp non ANCREE de $from : $regexp\n"; + return (0); + } + + if ($regexp =~ /^\^(\w+)/) { + my $topic = $1; + if (grep (/$topic/, @{$self->[topicRegexps]})) { + # on a trouve ce topic : on ne filtre pas la regexp + #print "DBG> on garde de $from : $regexp\n"; + return (0); + } + #print "DBG> on ELIMINE de $from : $regexp\n"; + return (1); + } + else { + #print "DBG> on garde de $from : $regexp\n"; + return (0); + } +} # end _toBePruned + + +############### PROCEDURE PARSE IVY BUS PARAM +sub _parseIvyBusParam ($) +{ + my $ivyBus = shift; + + my ($ivyNetworks, $ivyPort) = $ivyBus =~ /^(.*):(.*)/; + + my $useMulticast = 0; + + croak ("Error in Ivy::_parseIvyBusParam, illegal bus address format: $ivyBus\n") + unless $ivyPort =~ /^\d+$/; + + my @ivyAddrInet = (); + + $ivyNetworks =~ s/ //g; + my @broadcastAddrs = split (',', $ivyNetworks); + + foreach my $netAddr (@broadcastAddrs) { + $netAddr = BROADCAST_ADDRS if + (($netAddr eq '') || ($netAddr =~ /^127/) || ($netAddr =~ /^loopback/)); + + # deux cas de figure : on a un nom de sous reseau, ou + # une adresse ip de la forme \d+.\d+.... + my $netAddrInet; + + if ($netAddr !~ /^\d+(.\d+)+/) { + # on a un nom de reseau, il faut trouver son adresse ip + # on contourne un bug : Si les adresses sont incompletes + # dans la map network : 143.196.53 au lieu de 143.196.53.255 + # getbyname renvoie une adresse de type 0.143.196.53. + # on doit donc faire un decalage des octets vers la gauche, + # chaque 0 qui sort a gauche est remplace par un 255 a droite. + if ($useMulticast) { + carp "Warning in Ivy::_parseIvyBusParam, cannot mix broadcast and multicast.\n\tSkipping broadcast address $netAddr"; + next; + } + my $networkAddr = getnetbyname ($netAddr); + unless (defined $networkAddr) { + warn ("Warning in Ivy::_parseIvyBusParam, network $netAddr is unknown\n"); + next; + } + + my @dummyNetAddr = unpack ("CCCC", pack ('N', $networkAddr)); + while (!$dummyNetAddr[0]) { + # tant que le premier octet est 0, on decale vers la gauche et + # ont fait rentrer un 255 sur la droite + shift @dummyNetAddr; + push (@dummyNetAddr, 255); + } + $netAddrInet = pack ("CCCC", @dummyNetAddr); + } else { + # on a deja une adresse ip, on rajoute les .255 + # a la fin s'ils ont ete omis. + ($netAddr .= ".255.255.255") =~ s/^((\d+\.){3}\d+).*/$1/; + my ($dClass) = $netAddr =~ /^(\d*)/ ; + if ($dClass >= 224 and $dClass <= 239) { # this adress is for multicast + if (!$useMulticast) { # removing all broadcast addresses + carp "Warning in Ivy::_parseIvyBusParam, cannot mix broadcast and multicast.\n\tSkipping previous broadcast address" if (scalar @ivyAddrInet); + @ivyAddrInet = (); + $useMulticast = 1; + } + } + else { # this adress is for broadcast + if ($useMulticast) { # we are already in multicast, forget this new address + carp "Warning in Ivy::_parseIvyBusParam, cannot mix broadcast and multicast.\n\tSkipping broadcast address $netAddr"; + next; + } + } + $netAddrInet = inet_aton ($netAddr); + push (@ivyAddrInet, pack_sockaddr_in ($ivyPort, $netAddrInet)); + } +} + unless (scalar @ivyAddrInet) { + push (@ivyAddrInet, pack_sockaddr_in ($ivyPort, inet_aton(BROADCAST_ADDRS))); + } + + foreach my $ivyAddr (@ivyAddrInet) { + my ($port, $iaddr) = unpack_sockaddr_in($ivyAddr); + my $iaddrH = unpack ("H8",$iaddr); + my $iaddrInt = inet_ntoa($iaddr); + if ($useMulticast) {print "Multicasting"} + else {print "Broadcasting"} + print " on network $iaddrInt ($iaddrH) on port $ivyPort\n"; + } + + return ($useMulticast, $ivyPort, \@ivyAddrInet); +} # end _parseIvyBusParam + +############# Procedure _SUBSTITUTE ESCAPED CHAR +sub _substituteEscapedChar ($$) +{ + my ($scope, $reg) = @_; + + my %escapeRegexp = REG_PERLISSISME; + # Si on fait la substitution dans une classe de caractere + # on elimine les crochets. + grep ($escapeRegexp{$_} =~ s/[\[\]]//g, keys %escapeRegexp) + if ($scope eq 'inside') ; + + $reg =~ s/\\([wWsSdDne])/$escapeRegexp{$1}/ge; + return $reg; +} # end _substituteEscapedChar + +############# Procedure __CALL CONGESTION CALLBACK +sub _callCongestionCb ($$$) +{ + my ($self, $fd, $congestion) = @_; + my $addrInet; + my $appName = _getNameByFileDes ($self, $fd); + + if ($loopMode == LOCAL_MAINLOOP) { + if ($congestion) { + $localLoopSelWrite->add ($fd); + } else { + $localLoopSelWrite->remove ($fd); + } + } else { + if ($congestion) { + Tk::fileevent ('', $fd, 'writable', + sub { + my $bufEmiRef = \($self->[bufEmiByCnnx]->{$fd}); + my $sent = send ($fd, $$bufEmiRef, 0); + unless (defined $sent) { + # y a rien à faire + } elsif ($sent == length ($$bufEmiRef)) { + $$bufEmiRef = ""; + _callCongestionCb ($self, $fd, 0); + } elsif ($sent >= 0) { + substr ($$bufEmiRef, 0, $sent, ''); + } else { + $self->_removeFileDescriptor ($fd, '_callCongestionCb') unless ($!{EAGAIN} || $!{EWOULDBLOCK}|| + $!{EINTR} || $!{EMSGSIZE} || $!{ENOBUFS}); + } + }); + } else { + Tk::fileevent ('', $fd, 'writable', ''); + } + } + + if (defined $appName) { + $addrInet = $self->_inetAdrByName ($appName); + } else { + $appName = 'NONAME'; $addrInet = 'undef'; + } + + &{$self->[slowAgentFunc]} ($appName, $addrInet, $congestion); +} + +############# Procedure __GET NAME BY FILEDES + +sub _getNameByFileDes ($$) +{ + my ($self, $fd) = @_; + my $appName = 'NONAME'; + EXT_LOOP: + foreach my $name (keys %{$self->[appliList]}) { + foreach my $fdp (@{$self->[appliList]{$name}}) { + if ($fd eq $fdp) { + $appName = $name; + last EXT_LOOP; + } + } + } + return $appName; +} + + +sub _getHostByAddr ($) +{ + my $addr = shift; + + unless (defined $addr) { + warn "_getHostByAddr : no argument\n"; + return "EMPTY_ADDR"; + } elsif ((length ($addr)) != 4) { + warn "_getHostByAddr : bad argument (len != 4)\n"; + return "BAD_ADDR"; + } + + $hostNameByAddr{$addr} = (gethostbyaddr ($addr, AF_INET))[0] || inet_ntoa($addr) + unless exists $hostNameByAddr{$addr}; + + return $hostNameByAddr{$addr}; +} + + + +sub _regexpGen ($$$) +{ + my ($min, $max, $withDecimal) = @_; + +# print ("DBG> min=$min max=$max withDecimal=$withDecimal\n"); + + ($min, $max) = ($max, $min) if $min > $max; + + $min = int ($min); + $max = int ($max); + + my ($decimalPart,$boundDecimalPart, $reg) = ('') x 3; + + if ((!defined $withDecimal) || ($withDecimal ne 'i')) { + $decimalPart = '(?:\.\d+)?'; + $boundDecimalPart = '(?:\.0+)?'; + } + + if ($min == $max) { + $reg= $min; + } elsif ($min < 0) { + if ($max < 0) { + $reg = '\-(?:' . _strictPosRegexpGen (-$max, -$min, $decimalPart, $boundDecimalPart). ')'; + } elsif ($max == 0) { + $reg = "(?:0${boundDecimalPart})|(?:-0${decimalPart})|-(?:" . + _strictPosRegexpGen (1, -$min, $decimalPart,$boundDecimalPart ) . ')'; + } else { + $reg ='(?:' . regexpGen ($min, 0,$withDecimal) . '|' . regexpGen (0, $max, $withDecimal). ')' ; + } + } elsif ($min == 0) { + $reg = "(?:0${decimalPart})|" . _strictPosRegexpGen (1, $max, $decimalPart,$boundDecimalPart) ; + } else { + $reg = _strictPosRegexpGen ($min, $max, $decimalPart,$boundDecimalPart); + } + return ("(?:$reg)(?![\\d.])"); +} + + + +sub _strictPosRegexpGen ($$$$) +{ + my ($min, $max, $decimalPart,$boundDecimalPart) = @_; + carp "min[$min] sould be <= max[$max]\n " unless ($min <= $max); + carp "min[$min] and max[$max] should be strictly positive\n " unless (($min >0) && ($max > 0)); + +# my $fixBound ; +# $max -- if ($fixBound = ($decimalPart ne '') && ((int ($max /10) *10) != $max)); + + if ($min == $max) { + return ($min); + } + + $max -- ; + + my @regexps ; + my $nbRank = length ($max); + my ($rank, $lmax) ; + + do { + ($lmax, $rank) = _nextMax ($min, $max); + push (@regexps, _genAtRank ($min, $lmax, $rank)); + $min = $lmax+1; + } while ($lmax != $max) ; + + my $regexp = join ('|', map ("(?:$_$decimalPart)", @regexps)); + $max ++; + $regexp .= "|(?:${max}$boundDecimalPart)"; + + return ($regexp); +} + + +sub _genAtRank ($$$) +{ + my ($min, $max, $rank) = @_; + my $reg = _genPreRank ($min, $max, $rank); + $reg .= _genRank ($min, $max, $rank); + $reg .= _genPostRank ($rank); + return ($reg); +} + + +sub _nextMax ($$) +{ + my ($min, $max) = @_; + my $nextMax; + + # on a les unités au debut + my (@min) = reverse split ('', $min); + my (@max) = reverse split ('', $max); + my $nbDigit = scalar (@max); + my ($rankRev, $rankForw, $rank) = (0, $nbDigit-1, 0) ; + + # on rajoute des 0 en face si min n'a pas le même nombre de digits que max + push (@min, ('0') x ($#max - $#min)) if ($#min != $#max); + + # on calcule le rang concerné par le prochain intervale + + # en partant des unitées (digit de poids faible), premier champ de min != 0 + while (($min[$rankRev] == 0) && $rankRev < $nbDigit) {$rankRev++} ; + # printf ("DBG> min = $min[0]|$min[1]|$min[2] rankRev=$rankRev, nbDigit=$nbDigit\n"); + + # en partant du digit de poids fort, premier champ de max != du même champ + while (($min[$rankForw] == $max[$rankForw]) && $rankForw > 0) {$rankForw--}; +# printf ("DBG> min = $min[0]|$min[1]|$min[2] rankForw=$rankForw, nbDigit=$nbDigit\n"); + + if ($rankForw <= $rankRev) { + $rank = $rankForw; + $min[$rankForw]= $max[$rankForw] - ($rankForw ? 1 : 0); + @min[0 .. $rankForw-1]= (9) x ($rankForw); + } else { + $rank = $rankRev; + @min[0 .. $rankRev]= (9) x ($rankRev+1); + } + +# print ("DBG> NEWmin = $min[0]|$min[1]|$min[2]\n"); + $nextMax = join ('',reverse @min); + $nextMax = $max if $nextMax > $max; + + return ($nextMax, $rank+1); +} + + + +sub _genPreRank ($$$) +# les invariants du min +{ + my ($min, $max, $rank) = @_; + + $min = $min + 0; # force scalar to be evaluated as numérical + $max = $max + 0; # instead string (eliminate leading zeroes) + my $a = substr ($min, 0, (length ($min) - $rank)); + my $b = substr ($max, 0, (length ($max) - $rank)); + carp "genPreRank error $min, $max are not invariant @ rank $rank\n" if $a ne $b; + return $a; +} + + +sub _genRank ($$$) +{ + my ($min, $max, $rank) = @_; + my $syl ; + + my $a = substr ($min, (length ($min) - $rank), 1); + my $b = substr ($max, (length ($max) - $rank), 1); + + $min = _min ($a, $b); + $max = _max ($a, $b); + + if (($min == 0) && ($max == 9)) { + $syl = '\d'; + } elsif ($min == $max) { + $syl = $min; + } elsif ($max == $min+1) { + $syl = "[${min}${max}]" + } else { + $syl = "[$min-$max]"; + } + + return ($syl); +} + + +sub _genPostRank ($) +{ + my $rank = shift; + + return "" if ($rank <= 1); + return ($rank == 2) ? '\d' : sprintf ('\d{%d}', $rank -1); +} + +sub _max ($$) +{ + my ($a,$b) = @_; + return ($a > $b) ? $a : $b; +} + +sub _min ($$) +{ + my ($a,$b) = @_; + return ($a > $b) ? $b : $a; +} + + +1; + +__END__ + +=head1 NAME + +Ivy - Perl extension for implementing a software bus + +=head1 SYNOPSIS + +use Ivy; + +=head1 DESCRIPTION + +The Ivy perl module implements a software bus that provides easy +communication between applications. Messages are broadcast as ASCII strings +over a network defined by a list of domains and a port. +Messages are received if they match a regular expressions and if your application +is on the same network as remote ones. +Before receive or send message you must call 'init', and 'new' class methods, +followed by 'start' method. +When you quit your application don't forget to call 'exit' class methods. + +=head1 CLASS METHODS + +=over 2 + +=item B + + Ivy->init(...); + Ivy::init(...); + +Allows one to define global parameters which may be used as default ones +at object creation time. + +Parameters are : + +=over 4 + +=item B<-loopMode =E 'TK'|'LOCAL'> + +Mode of events loop among TK or LOCAL. According to this mode, you must +use Ivy->mainLoop or Tk::MainLoop(3) + +=item B<-appName =E 'your app ivy name'> + +Name of your application used to identify on ivy bus. + +=item B<-ivyBus =E 'domain 1,...,domain n:port number'> + +A list of domains (may be empty), followed by a port number where to broadcast messages. +If the domain list is empty (i.e. parameter is ':port number'), broadcast will be done +on localhost (i.e. '127:port number'). Default is the value of the environment variable +IVYBUS and if it is not defined the default is 127:2010. + +Since V4.12, it is possible to use multicast (ie. with a domain between 224.0.0.0 and 239.255.255.255). You must be aware than when multicast is used, udp broadcast (defined in the B<-ivyBus> paramter) are skipped. You should also probably avoid using the 244.x.x.x domain often used for networking management. + +=item B<-messWhenReady =E 'your message when ready'> + +Synchronisation message sent when application is ready to receive and send +messages. + +=item B<-onDieFunc =E [\&yourdiefunc, @parameters]> + +=item B<-onDieFunc =E [$an_object, \&a_method, @parameters]> + +A callback or method to call when your application receive a suicide request. +Do not call exit() in the callback, Ivy will do it for you. + +The prototype of your callback must be as follows: + + sub MyCallback { + my @parameters = @_; + ... + } + +The prototype of your method must be as follows: + + sub MyMethod { + my ($self, @parameters) = @_; + ... + } + +=item B<-filterRegexp =E ['subject 1', ..., 'subject n']> + +Optimize communication using this option. Regexps +which don't match these subjects are removed. + +=item B + + Ivy->init(-loopMode => 'TK', + -appName => 'MyWonderfulApp', + -onDieFunc => [\&restorecontext] , + -filterRegexp => ['MyWonderfulApp', 'ClockStart', 'ClockStop']); + +=back + +=item B + + Ivy::new(...); + Ivy->new(...); + +Check parameters, and create an Ivy bus object. You must call +Ivy->init before creating a bus. + +Parameters are : + +=over 4 + +=item B<-appName =E 'your application name'> + +Name of your application used to identify it with other applications +connected on the same bus. + +=item B<-ivyBus =E 'domain 1,...,domain n:port number'> + +A list of domains, followed by port number where to broadcast messages. +Default is 127:2010 + +=item B<-messWhenReady =E 'your message when ready'> + +Synchronisation message sent when your application is ready to receive and send +messages. + +=item B<-onDieFunc =E [\&yourdiefunc, @parameters]> + +=item B<-onDieFunc =E [$an_object, \&a_method, @parameters]> + +A callback or method called when your application receives a suicide request. +DO NOT CALL exit() in the callback, Ivy will do it for you. +The prototype of your callback must be as follows: + + sub MyCallback { + my @parameters = @_; + ... + } + +The prototype of your method must be as follows: + + sub MyMethod { + my ($self, @parameters) = @_; + ... + } + +=item B<-filterRegexp =E ['subject 1', ..., 'subject n']> + +Optimize communication using this option. Regexps which don't match these subjects are removed. + +=item B<-neededApp =E ['app 1', ..., 'app n']> + +A list of applications that your application needs present on the bus +before running. + +=item B<-statusFunc =E sub {}> + +A callback which is called every time an agent C connects on the bus, +disconnects from the bus, subscribes to a regexp, or unsubscribes to a +regexp. When the agent A is stopping, this function is also called +inside the agent A for every other agents C on the bus, as they are +disconnecting. The first 3 parameters are a reference to an array of +connected agents Ci, a reference to an array of not connected agents +(according to the "-neededApp" argument of the new method / function), +a reference to a hash table of connected agents Ci (giving the number +of each agent). These 3 parameters are maintained for upwards +compatibility but should no more be used, since the following four +parameters are much easier to use: the name of an appearing / +disapearing or subscribing / filtered / unsubscribing agent C, its status either +"new" or "died" or "subscribing" or "unsubscribing", and the hostname +where this agent C is running / dying OR the subscribed / unsubscribed +regexp. If the hostname of this agent C is not known, it will be +replaced by its IP address. + + + +Your callback could be: + + sub MyCallback { + my ($ref_array_present, $ref_array_absent, $ref_hash_present, + $appname, $status, $host, $regexp) = @_; + + # $status is either new or died + + my %present=%$ref_hash_present; + foreach my $remoteapp (keys %present) { + if ($present{$remoteapp} > 1) { + print "n apps $remoteapp are presents on bus\n"; + } + } + if ($status eq "new") { + print "$appname connected from $host\n"; + } + elsif ($status eq "died") { + print "$appname disconnected from $host\n"; + } + elsif ($status eq "subscribing") { + print "$appname subscribes to $regexp\n"; + } + elsif ($status eq "filtered") { + print "$appname subscribes to FILTERED $regexp check -filterRegexp option\n"; + } + elsif ($status eq "unsubscribing") { + print "$appname unsubscribed to $regexp\n"; + } + } + + + + +=item B<-blockOnSlowAgent =E 0 or 1> + +Behavior when the bus is being congested due to an ivy agent which +doesn't read messages sufficiently quickly. In blocking mode the local +app will block on a send, so it won't be interactive until the send +return, and it will at his turn don't read his pending message, +leading to a global sluggishness of the entire ivy bus. In non +blocking mode the messages are stocked until they could be sent, so +the problem is the uncontrolled memory consumption. + + +=item B<-slowAgentFunc=E \&congestionFunc > + + A callback which is called every time a congestion event occurs. A + congestion event is emitted each time an agent is being congested, + or, after being congested is able to read his messages again. The + parameters are the name of the app, his address (hostname+port + number), and the state, 1 for congested, 0 for able to read. + + Your callback could be: + +sub congestionFunc ($$$) +{ + my ($name, $addr, $state) = @_; + printf ("$name [$addr] %s\n", $state ? "CONGESTION" : "OK"); +} + + + +=item B + + Ivy->new(-ivyBus => '156,157:2204', + -onDieFunc => [\&restorecontext], + -neededApp => ["DataServer", "HMI"], + -slowAgentFunc=> \&congestionFunc, + -blockOnSlowAgent => 1, + -statusFunc => \&MyCallback); + +=back + + + + + + +=item B + + Ivy->mainLoop; + Ivy::mainLoop; + $ivyobj->mainLoop; + + main events loop, call local mainloop or Tk::MainLoop according so specified mode + +=item B + + $ivyobj->stop; + Ivy::stop; + +To stop the Ivy main loop. + +=back + +=head1 OBJECT METHODS + +=over 2 + +=item B + + $ivyobj->start; + Ivy::start; + +You must call this after you are ready to communicate through an Ivy bus +and before you really communicate. The method returns the $ivyobj. + +=item B + + $ivyobj->sendMsgs(@messages); + Ivy::sendMsgs(@messages); + +Send a list of messages. A message should not contain a '\n' or it will not be delivered. + + Example : + $ivyobj->sendMsgs("Hello", "Don't Bother", "Y2K is behind us"); + +=item B + + $ivyobj->sendAppNameMsgs(@messages); + Ivy::sendAppNameMsgs(@messages); + +Send a list of messages preceded by your application's name. A message should not contain a '\n' or it will not be delivered. + + Example : + $ivyobj->sendMsgs("Hello World"); + # it will send "$appName Hello World" over the Ivy bus + +=item B + + $ivyobject->bindRegexp($regexp, [\&callback, @cb_parameters]); + Ivy::bindRegexp($regexp, [\&callback, @cb_parameters]); + + $ivyobject->bindRegexp($regexp, [$an_obj, \&method, @cb_parameters]); + Ivy::bindRegexp($regexp, [$an_obj, \&method, @cb_parameters]); + +This allows you to bind a regular expression to a +callback or method. The callback or method will be called for every +message that matches the regexp (case insensitive). +See perlre(1) to find how to write regexps. +Use the bracketing construct ( ... ) so that your callback is +called with the captured bits of text as parameters. +To unbind callback(s) associated to a regexp use bindRegexp with only +one argument, the regexp. Note that doing the same binding more than +once will induce multiple call of the same callback (this is usually a bug). + +there is a special syntax for specifying numeric interval, in this case +the interval is locally transformed in a pcre regexp. +syntax is (?Imin#max[fi]). min and max are the bounds, +by default the regexp match decimal number, but if max bound is +followed by 'i', the regexp match only integers ex : (?I-10#20), (?I20#25i) +Note that due to the regexp generator limitation (which will perhaps be raised eventually) +the bounds are always integer. + +Return value : regexpId + + Example : + $ivyobject->bindRegexp("\w+ (\d+)", [\&callback, @cb_parameters]); + $ivyobject->bindRegexp("\w+ ((?I-10#20i))", [\&callback, @cb_parameters]); + + # Your callback will be called with one more parameter which will be + # the name of appli which send the message + + # Your callback and method must be like: + sub callback { + my ($sendername, @cb_parameters, + @matched_regexps_in_brackets) = @_; + ... + } + + sub method { + my ($self, $sendername, @cb_parameters, + @matched_regexps_in_brackets) = @_; + ... + } + + # to unbind: + $ivyobject->bindRegexp("\w+ (\d+)"); + +=item B + + $ivyobject->bindRegexpOneShot($regexp, [\&callback, @cb_parameters]); + Ivy::bindRegexpOneShot($regexp, [\&callback, @cb_parameters]); + + bindRegexpOneShot behavior is similar at bindRegexp one, except that + the callback is called once, it is similar as a bindRegexp with an unbind in the callback + but is simpler to write. + +=item B + + $regexpId = $ivyobject->bindRegexp("initialRegexp", [\&callback, @cb_parameters]); + $ivyobject->changeRegexp($regexpId, "newRegexp"); + or + Ivy::changeRegexp($regexpId, "newRegexp"); + +This allow you to change the regexp of a previously made bindRegexp, the callback +will remain the same. This is equivalent to unbinding current regexp and binding the new regexp, +but in this last case the 2 op are not done in atomic manner, and you can miss a message +or receive it twice. + + +=item B + + $ivyobj->sendDirectMsgs($to, $id, @msgs); + Ivy::sendDirectMsgs($to, $id, @msgs); + +Send a message a message to appli $to. This appli must have done a bindDirect before to accept this message. regexp matching is not used with direct Messages. A message should not contain a '\n' or it will not be delivered. + +=item B + + $ivyobj->bindDirect($regexp, $id, [\&callback, @cb_parameters]); + Ivy::bindDirect($id, [\&callback, @cb_parameters]); + +The callback will be called with both the @msgs and the @cb_parameters. + + Example : + $ivyobject->bindDirectMessage("id1", [\&callback, @cb_parameters]); + + # Your callback and method must be like: + sub cb { + my (@cb_parameters, @msgs) = @_; + ... + } + + sub method { + my ($self, @cb_parameters, @msgs) = @_; + ... + } + +=item B + + $ivyobj->sendDieTo($to); + Ivy::sendDieTo($to); + +Send a suicide to the application named $to. + +=item B + + $ivyobj->ping($to, $timeout); + Ivy::ping($to, \&callBack); + +Send a ping message, callBack will be called on response. + +=item B + + $after_id = $ivyobj->after($timeAfter, \@callbacks_list); + $after_id = Ivy::after($timeAfter, \@callbacks_list); + +Call a list of callbacks after $timeAfter milliseconds. To be used only in conjonction with the 'LOCAL' Mainloop (see the Init method). When using the TK mainloop, you must use the Tk::after method. + +=item B + + $repeat_id = $ivyobj->repeat($timeAfter, \@callbacks_list); + $repeat_id = Ivy:repeat($timeAfter, \@callbacks_list); + +Have a list of callbacks repeatedly called every $timeAfter milliseconds. To be used only in conjonction with the 'LOCAL' Mainloop (see the Init method). When using the TK mainloop, you must use the Tk::repeat method. + +=item B + + $ivyobj->afterCancel($after_or_repeat_id); + Ivy::afterCancel($after_or_repeat_id); + +Cancel an after callback call. To be used only in conjonction with the 'LOCAL' Mainloop (see the Init method). When using the TK mainloop, you must use the Tk::afterCancel method. + +=item B + + $ivyobj->afterResetTimer($after_id); + Ivy::afterResetTimer($after_id); + +Reset a timer if this timer has not yet been triggered. To be used only in conjonction with the 'LOCAL' Mainloop (see the Init method). + +=item B + + $ivyobj->fileEvent($fd, $cb); + Ivy::fileEvent($fd, $cb); + +Add a fileEvent handler (or remove any handler associated to $fd if $cb paramter is omitted). +The callback $cb will get the filehandle $fd as parameter. To be used only in conjonction with the 'LOCAL' Mainloop (see the Init method). When using the TK mainloop, you must use the Tk::fileevent method. + +=item B + + $ivyobj->DESTROY; + +Destroy the $ivyobj object. No other method should be applied to the reference of this deleted object. This method should not be used directly. + +=back + +=head1 BUGS + +The stop method does not work! + +In the statusFunc, an agent is identified by its name which is not garantted as unique + +A message to be sent should not contain '\n' char, because the '\n' is the message separator. Ivy.pm will detect and skip such messages. + +No other known bugs at this time. If you find one, please report them to the authors. + +=head1 SEE ALSO + +perl(1), perlre(1), ivyprobe.pl(1) + +=head1 AUTHORS + Alexandre Bustico + Stéphane Chatty + Hervé Damiano + Christophe Mertz + +=head1 COPYRIGHT + +CENA (C) 1997-2006 + +=head1 HISTORY + +=cut diff --git a/tests/lib/Proc/Background.pm b/tests/lib/Proc/Background.pm new file mode 100644 index 0000000000..aeac81550b --- /dev/null +++ b/tests/lib/Proc/Background.pm @@ -0,0 +1,477 @@ +# Proc::Background: Generic interface to background process management. +# +# Copyright (C) 1998-2005 Blair Zajac. All rights reserved. + +package Proc::Background; + +require 5.004_04; + +use strict; +use Exporter; +use Carp; +use Cwd; + +use vars qw(@ISA $VERSION @EXPORT_OK); +@ISA = qw(Exporter); +@EXPORT_OK = qw(timeout_system); +$VERSION = sprintf '%d.%02d', '$Revision: 1.10 $' =~ /(\d+)\.(\d+)/; + +# Determine if the operating system is Windows. +my $is_windows = $^O eq 'MSWin32'; + +# Set up a regular expression that tests if the path is absolute and +# if it has a directory separator in it. Also create a list of file +# extensions of append to the programs name to look for the real +# executable. +my $is_absolute_re; +my $has_dir_element_re; +my @extensions = (''); +if ($is_windows) { + $is_absolute_re = '^(?:(?:[a-zA-Z]:[\\\\/])|(?:[\\\\/]{2}\w+[\\\\/]))'; + $has_dir_element_re = "[\\\\/]"; + push(@extensions, '.exe'); +} else { + $is_absolute_re = "^/"; + $has_dir_element_re = "/"; +} + +# Make this class a subclass of Proc::Win32 or Proc::Unix. Any +# unresolved method calls will go to either of these classes. +if ($is_windows) { + require Proc::Background::Win32; + unshift(@ISA, 'Proc::Background::Win32'); +} else { + require Proc::Background::Unix; + unshift(@ISA, 'Proc::Background::Unix'); +} + +# Take either a relative or absolute path to a command and make it an +# absolute path. +sub _resolve_path { + my $command = shift; + + return unless length $command; + + # Make the path to the progam absolute if it isn't already. If the + # path is not absolute and if the path contains a directory element + # separator, then only prepend the current working to it. If the + # path is not absolute, then look through the PATH environment to + # find the executable. In all cases, look for the programs with any + # extensions added to the original path name. + my $path; + if ($command =~ /$is_absolute_re/o) { + foreach my $ext (@extensions) { + my $p = "$command$ext"; + if (-f $p and -x _) { + $path = $p; + last; + } + } + unless (defined $path) { + warn "$0: no executable program located at $command\n"; + } + } else { + my $cwd = cwd; + if ($command =~ /$has_dir_element_re/o) { + my $p1 = "$cwd/$command"; + foreach my $ext (@extensions) { + my $p2 = "$p1$ext"; + if (-f $p2 and -x _) { + $path = $p2; + last; + } + } + } else { + foreach my $dir (split($is_windows ? ';' : ':', $ENV{PATH})) { + next unless length $dir; + $dir = "$cwd/$dir" unless $dir =~ /$is_absolute_re/o; + my $p1 = "$dir/$command"; + foreach my $ext (@extensions) { + my $p2 = "$p1$ext"; + if (-f $p2 and -x _) { + $path = $p2; + last; + } + } + last if defined $path; + } + } + unless (defined $path) { + warn "$0: cannot find absolute location of $command\n"; + } + } + + $path; +} + +# We want the created object to live in Proc::Background instead of +# the OS specific class so that generic method calls can be used. +sub new { + my $class = shift; + + my $options; + if (@_ and defined $_[0] and UNIVERSAL::isa($_[0], 'HASH')) { + $options = shift; + } + + unless (@_ > 0) { + confess "Proc::Background::new called with insufficient number of arguments"; + } + + return unless defined $_[0]; + + my $self = $class->SUPER::_new(@_) or return; + + # Save the start time of the class. + $self->{_start_time} = time; + + # Handle the specific options. + if ($options) { + $self->{_die_upon_destroy} = $options->{die_upon_destroy}; + } + + bless $self, $class; +} + +sub DESTROY { + my $self = shift; + if ($self->{_die_upon_destroy}) { + $self->die; + } +} + +# Reap the child. If the first argument is 0 the wait should return +# immediately, 1 if it should wait forever. If this number is +# non-zero, then wait. If the wait was sucessful, then delete +# $self->{_os_obj} and set $self->{_exit_value} to the OS specific +# class return of _reap. Return 1 if we sucessfully waited, 0 +# otherwise. +sub _reap { + my $self = shift; + my $timeout = shift || 0; + + return 0 unless exists($self->{_os_obj}); + + # Try to wait on the process. Use the OS dependent wait call using + # the Proc::Background::*::waitpid call, which returns one of three + # values. + # (0, exit_value) : sucessfully waited on. + # (1, undef) : process already reaped and exist value lost. + # (2, undef) : process still running. + my ($result, $exit_value) = $self->_waitpid($timeout); + if ($result == 0 or $result == 1) { + $self->{_exit_value} = defined($exit_value) ? $exit_value : 0; + delete $self->{_os_obj}; + # Save the end time of the class. + $self->{_end_time} = time; + return 1; + } + return 0; +} + +sub alive { + my $self = shift; + + # If $self->{_os_obj} is not set, then the process is definitely + # not running. + return 0 unless exists($self->{_os_obj}); + + # If $self->{_exit_value} is set, then the process has already finished. + return 0 if exists($self->{_exit_value}); + + # Try to reap the child. If it doesn't reap, then it's alive. + !$self->_reap(0); +} + +sub wait { + my $self = shift; + + # If neither _os_obj or _exit_value are set, then something is wrong. + if (!exists($self->{_exit_value}) and !exists($self->{_os_obj})) { + return; + } + + # If $self->{_exit_value} exists, then we already waited. + return $self->{_exit_value} if exists($self->{_exit_value}); + + # Otherwise, wait forever for the process to finish. + $self->_reap(1); + return $self->{_exit_value}; +} + +sub die { + my $self = shift; + + # See if the process has already died. + return 1 unless $self->alive; + + # Kill the process using the OS specific method. + $self->_die; + + # See if the process is still alive. + !$self->alive; +} + +sub start_time { + $_[0]->{_start_time}; +} + +sub end_time { + $_[0]->{_end_time}; +} + +sub pid { + $_[0]->{_pid}; +} + +sub timeout_system { + unless (@_ > 1) { + confess "$0: timeout_system passed too few arguments.\n"; + } + + my $timeout = shift; + unless ($timeout =~ /^\d+(?:\.\d*)?$/ or $timeout =~ /^\.\d+$/) { + confess "$0: timeout_system passed a non-positive number first argument.\n"; + } + + my $proc = Proc::Background->new(@_) or return; + my $end_time = $proc->start_time + $timeout; + while ($proc->alive and time < $end_time) { + sleep(1); + } + + my $alive = $proc->alive; + if ($alive) { + $proc->die; + } + + if (wantarray) { + return ($proc->wait, $alive); + } else { + return $proc->wait; + } +} + +1; + +__END__ + +=pod + +=head1 NAME + +Proc::Background - Generic interface to Unix and Win32 background process management + +=head1 SYNOPSIS + + use Proc::Background; + timeout_system($seconds, $command, $arg1); + timeout_system($seconds, "$command $arg1"); + + my $proc1 = Proc::Background->new($command, $arg1, $arg2); + my $proc2 = Proc::Background->new("$command $arg1 1>&2"); + $proc1->alive; + $proc1->die; + $proc1->wait; + my $time1 = $proc1->start_time; + my $time2 = $proc1->end_time; + + # Add an option to kill the process with die when the variable is + # DETROYed. + my $opts = {'die_upon_destroy' => 1}; + my $proc3 = Proc::Background->new($opts, $command, $arg1, $arg2); + $proc3 = undef; + +=head1 DESCRIPTION + +This is a generic interface for placing processes in the background on +both Unix and Win32 platforms. This module lets you start, kill, wait +on, retrieve exit values, and see if background processes still exist. + +=head1 METHODS + +=over 4 + +=item B [options] I, [I, [I, ...]] + +=item B [options] 'I [I [I ...]]' + +This creates a new background process. As exec() or system() may be +passed an array with a single single string element containing a +command to be passed to the shell or an array with more than one +element to be run without calling the shell, B has the same +behavior. + +In certain cases B will attempt to find I on the system +and fail if it cannot be found. + +For Win32 operating systems: + + The Win32::Process module is always used to spawn background + processes on the Win32 platform. This module always takes a + single string argument containing the executable's name and + any option arguments. In addition, it requires that the + absolute path to the executable is also passed to it. If + only a single argument is passed to new, then it is split on + whitespace into an array and the first element of the split + array is used at the executable's name. If multiple + arguments are passed to new, then the first element is used + as the executable's name. + + If the executable's name is an absolute path, then new + checks to see if the executable exists in the given location + or fails otherwise. If the executable's name is not + absolute, then the executable is searched for using the PATH + environmental variable. The input executable name is always + replaced with the absolute path determined by this process. + + In addition, when searching for the executable, the + executable is searched for using the unchanged executable + name and if that is not found, then it is checked by + appending `.exe' to the name in case the name was passed + without the `.exe' suffix. + + Finally, the argument array is placed back into a single + string and passed to Win32::Process::Create. + +For non-Win32 operating systems, such as Unix: + + If more than one argument is passed to new, then new + assumes that the command will not be passed through the + shell and the first argument is the executable's relative + or absolute path. If the first argument is an absolute + path, then it is checked to see if it exists and can be + run, otherwise new fails. If the path is not absolute, + then the PATH environmental variable is checked to see if + the executable can be found. If the executable cannot be + found, then new fails. These steps are taking to prevent + exec() from failing after an fork() without the caller of + new knowing that something failed. + +The first argument to B I may be a reference to a hash +which contains key/value pairs to modify Proc::Background's behavior. +Currently the only key understood by B is I. +When this value is set to true, then when the Proc::Background object +is being DESTROY'ed for any reason (i.e. the variable goes out of +scope) the process is killed via the die() method. + +If anything fails, then new returns an empty list in a list context, +an undefined value in a scalar context, or nothing in a void context. + +=item B + +Returns the process ID of the created process. This value is saved +even if the process has already finished. + +=item B + +Return 1 if the process is still active, 0 otherwise. + +=item B + +Reliably try to kill the process. Returns 1 if the process no longer +exists once B has completed, 0 otherwise. This will also return +1 if the process has already died. On Unix, the following signals are +sent to the process in one second intervals until the process dies: +HUP, QUIT, INT, KILL. + +=item B + +Wait for the process to exit. Return the exit status of the command +as returned by wait() on the system. To get the actual exit value, +divide by 256 or right bit shift by 8, regardless of the operating +system being used. If the process never existed, then return an empty +list in a list context, an undefined value in a scalar context, or +nothing in a void context. This function may be called multiple times +even after the process has exited and it will return the same exit +status. + +=item B + +Return the value that the Perl function time() returned when the +process was started. + +=item B + +Return the value that the Perl function time() returned when the exit +status was obtained from the process. + +=back + +=head1 FUNCTIONS + +=over 4 + +=item B I, I, [I, [I...]] + +=item B 'I I [I [I...]]' + +Run a command for I seconds and if the process did not exit, +then kill it. While the timeout is implemented using sleep(), this +function makes sure that the full I is reached before killing +the process. B does not wait for the complete +I number of seconds before checking if the process has +exited. Rather, it sleeps repeatidly for 1 second and checks to see +if the process still exists. + +In a scalar context, B returns the exit status from +the process. In an array context, B returns a two +element array, where the first element is the exist status from the +process and the second is set to 1 if the process was killed by +B or 0 if the process exited by itself. + +The exit status is the value returned from the wait() call. If the +process was killed, then the return value will include the killing of +it. To get the actual exit value, divide by 256. + +If something failed in the creation of the process, the subroutine +returns an empty list in a list context, an undefined value in a +scalar context, or nothing in a void context. + +=back + +=head1 IMPLEMENTATION + +I comes with two modules, I +and I. Currently, on Unix platforms +I uses the I class and on +Win32 platforms it uses I, which makes use of +I. + +The I assigns to @ISA either +I or I, which does +the OS dependent work. The OS independent work is done in +I. + +Proc::Background uses two variables to keep track of the process. +$self->{_os_obj} contains the operating system object to reference the +process. On a Unix systems this is the process id (pid). On Win32, +it is an object returned from the I class. When +$self->{_os_obj} exists, then the process is running. When the +process dies, this is recorded by deleting $self->{_os_obj} and saving +the exit value $self->{_exit_value}. + +Anytime I is called, a waitpid() is called on the process and +the return status, if any, is gathered and saved for a call to +I. This module does not install a signal handler for SIGCHLD. +If for some reason, the user has installed a signal handler for +SIGCHLD, then, then when this module calls waitpid(), the failure will +be noticed and taken as the exited child, but it won't be able to +gather the exit status. In this case, the exit status will be set to +0. + +=head1 SEE ALSO + +See also L and L. + +=head1 AUTHOR + +Blair Zajac + +=head1 COPYRIGHT + +Copyright (C) 1998-2005 Blair Zajac. All rights reserved. This +package is free software; you can redistribute it and/or modify it +under the same terms as Perl itself. + +=cut diff --git a/tests/lib/Proc/Background/Unix.pm b/tests/lib/Proc/Background/Unix.pm new file mode 100644 index 0000000000..07f2ea7328 --- /dev/null +++ b/tests/lib/Proc/Background/Unix.pm @@ -0,0 +1,138 @@ +# Proc::Background::Unix: Unix interface to background process management. +# +# Copyright (C) 1998-2005 Blair Zajac. All rights reserved. + +package Proc::Background::Unix; + +require 5.004_04; + +use strict; +use Exporter; +use Carp; +use POSIX qw(:errno_h :sys_wait_h); + +use vars qw(@ISA $VERSION); +@ISA = qw(Exporter); +$VERSION = sprintf '%d.%02d', '$Revision: 1.10 $' =~ /(\d+)\.(\d+)/; + +# Start the background process. If it is started sucessfully, then record +# the process id in $self->{_os_obj}. +sub _new { + my $class = shift; + + unless (@_ > 0) { + confess "Proc::Background::Unix::_new called with insufficient number of arguments"; + } + + return unless defined $_[0]; + + # If there is only one element in the @_ array, then it may be a + # command to be passed to the shell and should not be checked, in + # case the command sets environmental variables in the beginning, + # i.e. 'VAR=arg ls -l'. If there is more than one element in the + # array, then check that the first element is a valid executable + # that can be found through the PATH and find the absolute path to + # the executable. If the executable is found, then replace the + # first element it with the absolute path. + my @args = @_; + if (@_ > 1) { + $args[0] = Proc::Background::_resolve_path($args[0]) or return; + } + + my $self = bless {}, $class; + + # Fork a child process. + my $pid; + { + if ($pid = fork()) { + # parent + $self->{_os_obj} = $pid; + $self->{_pid} = $pid; + last; + } elsif (defined $pid) { + # child + exec @_ or croak "$0: exec failed: $!\n"; + } elsif ($! == EAGAIN) { + sleep 5; + redo; + } else { + return; + } + } + + $self; +} + +# Wait for the child. +sub _waitpid { + my $self = shift; + my $timeout = shift; + + { + # Try to wait on the process. + my $result = waitpid($self->{_os_obj}, $timeout ? 0 : WNOHANG); + # Process finished. Grab the exit value. + if ($result == $self->{_os_obj}) { + return (0, $?); + } + # Process already reaped. We don't know the exist status. + elsif ($result == -1 and $! == ECHILD) { + return (1, 0); + } + # Process still running. + elsif ($result == 0) { + return (2, 0); + } + # If we reach here, then waitpid caught a signal, so let's retry it. + redo; + } + return 0; +} + +sub _die { + my $self = shift; + + # Try to kill the process with different signals. Calling alive() will + # collect the exit status of the program. + SIGNAL: { + foreach my $signal (qw(HUP QUIT INT KILL)) { + my $count = 5; + while ($count and $self->alive) { + --$count; + kill($signal, $self->{_os_obj}); + last SIGNAL unless $self->alive; + sleep 1; + } + } + } +} + +1; + +__END__ + +=head1 NAME + +Proc::Background::Unix - Unix interface to process mangement + +=head1 SYNOPSIS + +Do not use this module directly. + +=head1 DESCRIPTION + +This is a process management class designed specifically for Unix +operating systems. It is not meant used except through the +I class. See L for more information. + +=head1 AUTHOR + +Blair Zajac + +=head1 COPYRIGHT + +Copyright (C) 1998-2005 Blair Zajac. All rights reserved. This +package is free software; you can redistribute it and/or modify it +under the same terms as Perl itself. + +=cut diff --git a/tests/lib/Proc/Background/Win32.pm b/tests/lib/Proc/Background/Win32.pm new file mode 100644 index 0000000000..62b7949c1c --- /dev/null +++ b/tests/lib/Proc/Background/Win32.pm @@ -0,0 +1,157 @@ +# Proc::Background::Win32 Windows interface to background process management. +# +# Copyright (C) 1998-2005 Blair Zajac. All rights reserved. + +package Proc::Background::Win32; + +require 5.004_04; + +use strict; +use Exporter; +use Carp; + +use vars qw(@ISA $VERSION); +@ISA = qw(Exporter); +$VERSION = sprintf '%d.%02d', '$Revision: 1.10 $' =~ /(\d+)\.(\d+)/; + +BEGIN { + eval "use Win32"; + $@ and die "Proc::Background::Win32 needs Win32 from libwin32-?.??.zip to run.\n"; + eval "use Win32::Process"; + $@ and die "Proc::Background::Win32 needs Win32::Process from libwin32-?.??.zip to run.\n"; +} + +sub _new { + my $class = shift; + + unless (@_ > 0) { + confess "Proc::Background::Win32::_new called with insufficient number of arguments"; + } + + return unless defined $_[0]; + + # If there is only one element in the @_ array, then just split the + # argument by whitespace. If there is more than one element in @_, + # then assume that each argument should be properly protected from + # the shell so that whitespace and special characters are passed + # properly to the program, just as it would be in a Unix + # environment. This will ensure that a single argument with + # whitespace will not be split into multiple arguments by the time + # the program is run. Make sure that any arguments that are already + # protected stay protected. Then convert unquoted "'s into \"'s. + # Finally, check for whitespace and protect it. + my @args; + if (@_ == 1) { + @args = split(' ', $_[0]); + } else { + @args = @_; + for (my $i=1; $i<@args; ++$i) { + my $arg = $args[$i]; + $arg =~ s#\\\\#\200#g; + $arg =~ s#\\"#\201#g; + $arg =~ s#"#\\"#g; + $arg =~ s#\200#\\\\#g; + $arg =~ s#\201#\\"#g; + if (length($arg) == 0 or $arg =~ /\s/) { + $arg = "\"$arg\""; + } + $args[$i] = $arg; + } + } + + # Find the absolute path to the program. If it cannot be found, + # then return. To work around a problem where + # Win32::Process::Create cannot start a process when the full + # pathname has a space in it, convert the full pathname to the + # Windows short 8.3 format which contains no spaces. + $args[0] = Proc::Background::_resolve_path($args[0]) or return; + $args[0] = Win32::GetShortPathName($args[0]); + + my $self = bless {}, $class; + + # Perl 5.004_04 cannot run Win32::Process::Create on a nonexistant + # hash key. + my $os_obj = 0; + + # Create the process. + if (Win32::Process::Create($os_obj, + $args[0], + "@args", + 0, + NORMAL_PRIORITY_CLASS, + '.')) { + $self->{_pid} = $os_obj->GetProcessID; + $self->{_os_obj} = $os_obj; + return $self; + } else { + return; + } +} + +# Reap the child. +sub _waitpid { + my ($self, $timeout) = @_; + + # Try to wait on the process. + my $result = $self->{_os_obj}->Wait($timeout ? INFINITE : 0); + # Process finished. Grab the exit value. + if ($result == 1) { + my $_exit_status; + $self->{_os_obj}->GetExitCode($_exit_status); + return (0, $_exit_status<<8); + } + # Process still running. + elsif ($result == 0) { + return (2, 0); + } + # If we reach here, then something odd happened. + return (0, 1<<8); +} + +sub _die { + my $self = shift; + + # Try the kill the process several times. Calling alive() will + # collect the exit status of the program. + my $count = 5; + while ($count and $self->alive) { + --$count; + $self->{_os_obj}->Kill(1<<8); + last unless $self->alive; + sleep 1; + } +} + +1; + +__END__ + +=head1 NAME + +Proc::Background::Win32 - Interface to process mangement on Win32 systems + +=head1 SYNOPSIS + +Do not use this module directly. + +=head1 DESCRIPTION + +This is a process management class designed specifically for Win32 +operating systems. It is not meant used except through the +I class. See L for more information. + +=head1 IMPLEMENTATION + +This package uses the Win32::Process class to manage the objects. + +=head1 AUTHOR + +Blair Zajac + +=head1 COPYRIGHT + +Copyright (C) 1998-2005 Blair Zajac. All rights reserved. This +package is free software; you can redistribute it and/or modify it +under the same terms as Perl itself. + +=cut diff --git a/tests/lib/Program.pm b/tests/lib/Program.pm new file mode 100644 index 0000000000..4ad5f6567d --- /dev/null +++ b/tests/lib/Program.pm @@ -0,0 +1,304 @@ +=head1 SYNOPSIS + + use Program; + my $m_program = new Program('ls'); + +=head1 DESCRIPTION + +Program is a generic program wrapper that allows easy use of programs on +multipul platforms with the correct file handle redirection. + +=head1 FUNCTIONS + +=cut + +package Program; + +################### +# Standard Modules +use strict; +use Config; +use FileHandle; + +################### +# Variables +use vars qw(@ISA @EXPORT @EXPORT_OK $VERSION); +@ISA = qw(); +@EXPORT = qw(); +@EXPORT_OK = qw(); +$VERSION = 0.04; + +# This hash contains the values for redirection. +my %m_redirect_values; + +# Standard values +$m_redirect_values{'none'} = ''; +$m_redirect_values{'stdout&stderr2stdout'} = '2>&1'; +$m_redirect_values{'stdout&stderr2stderr'} = '1>&2'; +$m_redirect_values{'stdout2stderr&stderr2stdout'} = '3>&1 1>&2 2>&3 3>&-'; + +# Platform specific values +if ($Config{'osname'} eq 'MSWin32') +{ + if ($Config{'osvers'} ge 4.0) + { + $m_redirect_values{'stdout2stdnull'} = '1>NUL'; + $m_redirect_values{'stderr2stdnull'} = '2>NUL'; + } + else + { + $m_redirect_values{'stdout2stdnull'} = '1>'; + $m_redirect_values{'stderr2stdnull'} = '2>'; + } +} +else +{ + $m_redirect_values{'stdout2stdnull'} = '1>/dev/null'; + $m_redirect_values{'stderr2stdnull'} = '2>/dev/null'; +} + +################### +# Functions + +################################################################################ +# Function: new() + +=head2 B + + Description: This method returns an objective interface the the passed program. + Arguments: $program + Return: class + Usage: my $ls = new Program('ls'); + +=cut + +sub new +{ + my $class = shift; + my $program = shift; + + my $self = bless {}, $class; + $self->{'PROGRAM'} = $program; + $self->{'CHOMP'} = 1; + $self->{'REDIRECT'} = 'stdout&stderr2stdout'; + $self->{'LAST_COMMAND'} = "No commands have been executed yet."; + $self->{'EXIT_STATUS'} = 0; + + return $self; +} + +################################################################################ +# Function: strip_new_lines() + +=head2 B + + Description: This functions is used to set wether the output from a command + has trailing new lines removed. + Arguments: 0 to turn chomping off + 1 to turn chomping on + Default: 1 + Return: true if chomping is on else undef + Usage: $ls->strip_new_lines(1); + $ls->strip_new_lines(0); + +=cut + +sub strip_new_lines +{ + my $self = shift; + my $value = shift; + + if (scalar $value) + { + $self->{'CHOMP'} = $value; + } + else + { + return $self->{'CHOMP'}; + } +} + + +################################################################################ +# Function: redirect() + +=head2 B + + Description: This functions is used to set the STDOUT and STDERR redirection + for commands executed by the program. + Arguments: possible values are stdout&stderr2stdout, stdout&stderr2stderr + stdout2stderr&stderr2stdout, stdout2stdnull, stderr2stdnull + Default: sdtout&stderr2stdout + Return: redirection option if nothing passed else set the redirection + Usage: $ls->redirect('stderr2sdtnull'); + +=cut + +sub redirect +{ + my $self = shift; + my $value = shift; + + if (scalar $value) + { + $self->{'REDIRECT'} = $value; + } + else + { + return $m_redirect_values{$self->{'REDIRECT'}}; + } +} + +################################################################################ +# Function: last_command() + +=head2 B + + Description: This function returns the last command executed. + Arguments: None. + Return: The last command + Usage: print $ls->last_command(); + +=cut + +sub last_command +{ + my $self = shift; + return $self->{'LAST_COMMAND'}; +} + +################################################################################ +# Function: output() + +=head2 B + + Description: This function returns the output from a program. + Arguments: command to execute. + Return: array if called in an array context else string + Usage: my $output = $ls->output("-l"); + my @output = $ls->output("-l"); + +=cut + +sub output +{ + my $self = shift; + my $command = shift; + my $exec_command = "$self->{'PROGRAM'} $command " . $self->redirect(); + $self->{'LAST_COMMAND'} = $exec_command; + + my @output_list; + my $output_line; + if (wantarray) + { + @output_list = `$exec_command`; + foreach my $line (@output_list) + { + chomp $line if $self->strip_new_lines(); + } + } + else + { + $output_line = `$exec_command`; + chomp $output_line if $self->strip_new_lines(); + } + $self->{'EXIT_STATUS'} = $?/256; + + return wantarray ? @output_list : $output_line; +} + +################################################################################ +# Function: status() + +=head2 B + + Description: This function returns the exit status of the last + command. + Arguments: None. + Return: exit status. + Usage: my $status = $ls->status(); + +=cut + +sub status +{ + my $self = shift; + return $self->{'EXIT_STATUS'}; +} + +################################################################################ +# Function: success() + +=head2 B + + Description: This function returns true if the last command was successful + Arguments: None. + Return: true for success else undef. + Usage: if ($ls->success()) + { + Print "Success\n"; + } + else + { + Print "Failure\n"; + } + +=cut + +sub success +{ + my $self = shift; + return 1 if $self->status() eq 0; + return undef +} + +################################################################################ +# Function: open() + +=head2 B + + Description: This function returns an open file handle for the passed command + NOTE: Since the exit status of the file handle cannot be + retrieved by this module the user must check the exit status of + the file handle using the $fh->error() method. See IO::Handle. + Arguments: command to execute. + Return: open file handle. + Usage: my $fh = $ls->open("-l) + while (<$fh>) + { + print "$_\n"; + } + +=cut + +sub open +{ + my $self = shift; + my $command = shift; + + my $exec_command = "$self->{'PROGRAM'} $command " . $self->redirect() ." |"; + $self->{'LAST_COMMAND'} = $exec_command; + $self->{'EXIT_STATUS'} = 0; + + my $fh = new FileHandle($exec_command); + return $fh; +} + +1; + +__END__ + +=head1 SEE ALSO + + FileHandle + +=head1 AUTHOR + + Bernard Davison bernard@gondwana.com.au + +=head1 COPYRIGHT + + Copyright (C) 2000, Gondwanatech. + +=cut + diff --git a/tests/lib/XML/NamespaceSupport.pm b/tests/lib/XML/NamespaceSupport.pm new file mode 100755 index 0000000000..c653e35d21 --- /dev/null +++ b/tests/lib/XML/NamespaceSupport.pm @@ -0,0 +1,583 @@ + +### +# XML::NamespaceSupport - a simple generic namespace processor +# Robin Berjon +### + +package XML::NamespaceSupport; +use strict; +use constant FATALS => 0; # root object +use constant NSMAP => 1; +use constant UNKNOWN_PREF => 2; +use constant AUTO_PREFIX => 3; +use constant XMLNS_11 => 4; +use constant DEFAULT => 0; # maps +use constant PREFIX_MAP => 1; +use constant DECLARATIONS => 2; + +use vars qw($VERSION $NS_XMLNS $NS_XML); +$VERSION = '1.11'; +$NS_XMLNS = 'http://www.w3.org/2000/xmlns/'; +$NS_XML = 'http://www.w3.org/XML/1998/namespace'; + + +# add the ns stuff that baud wants based on Java's xml-writer + + +#-------------------------------------------------------------------# +# constructor +#-------------------------------------------------------------------# +sub new { + my $class = ref($_[0]) ? ref(shift) : shift; + my $options = shift; + my $self = [ + 1, # FATALS + [[ # NSMAP + undef, # DEFAULT + { xml => $NS_XML }, # PREFIX_MAP + undef, # DECLARATIONS + ]], + 'aaa', # UNKNOWN_PREF + 0, # AUTO_PREFIX + 1, # XML_11 + ]; + $self->[NSMAP]->[0]->[PREFIX_MAP]->{xmlns} = $NS_XMLNS if $options->{xmlns}; + $self->[FATALS] = $options->{fatal_errors} if defined $options->{fatal_errors}; + $self->[AUTO_PREFIX] = $options->{auto_prefix} if defined $options->{auto_prefix}; + $self->[XMLNS_11] = $options->{xmlns_11} if defined $options->{xmlns_11}; + return bless $self, $class; +} +#-------------------------------------------------------------------# + +#-------------------------------------------------------------------# +# reset() - return to the original state (for reuse) +#-------------------------------------------------------------------# +sub reset { + my $self = shift; + $#{$self->[NSMAP]} = 0; +} +#-------------------------------------------------------------------# + +#-------------------------------------------------------------------# +# push_context() - add a new empty context to the stack +#-------------------------------------------------------------------# +sub push_context { + my $self = shift; + push @{$self->[NSMAP]}, [ + $self->[NSMAP]->[-1]->[DEFAULT], + { %{$self->[NSMAP]->[-1]->[PREFIX_MAP]} }, + [], + ]; +} +#-------------------------------------------------------------------# + +#-------------------------------------------------------------------# +# pop_context() - remove the topmost context fromt the stack +#-------------------------------------------------------------------# +sub pop_context { + my $self = shift; + die 'Trying to pop context without push context' unless @{$self->[NSMAP]} > 1; + pop @{$self->[NSMAP]}; +} +#-------------------------------------------------------------------# + +#-------------------------------------------------------------------# +# declare_prefix() - declare a prefix in the current scope +#-------------------------------------------------------------------# +sub declare_prefix { + my $self = shift; + my $prefix = shift; + my $value = shift; + + warn <<' EOWARN' unless defined $prefix or $self->[AUTO_PREFIX]; + Prefix was undefined. + If you wish to set the default namespace, use the empty string ''. + If you wish to autogenerate prefixes, set the auto_prefix option + to a true value. + EOWARN + + no warnings 'uninitialized'; + if ($prefix eq 'xml' and $value ne $NS_XML) { + die "The xml prefix can only be bound to the $NS_XML namespace." + } + elsif ($value eq $NS_XML and $prefix ne 'xml') { + die "the $NS_XML namespace can only be bound to the xml prefix."; + } + elsif ($value eq $NS_XML and $prefix eq 'xml') { + return 1; + } + return 0 if index(lc($prefix), 'xml') == 0; + use warnings 'uninitialized'; + + if (defined $prefix and $prefix eq '') { + $self->[NSMAP]->[-1]->[DEFAULT] = $value; + } + else { + die "Cannot undeclare prefix $prefix" if $value eq '' and not $self->[XMLNS_11]; + if (not defined $prefix and $self->[AUTO_PREFIX]) { + while (1) { + $prefix = $self->[UNKNOWN_PREF]++; + last if not exists $self->[NSMAP]->[-1]->[PREFIX_MAP]->{$prefix}; + } + } + elsif (not defined $prefix and not $self->[AUTO_PREFIX]) { + return 0; + } + $self->[NSMAP]->[-1]->[PREFIX_MAP]->{$prefix} = $value; + } + push @{$self->[NSMAP]->[-1]->[DECLARATIONS]}, $prefix; + return 1; +} +#-------------------------------------------------------------------# + +#-------------------------------------------------------------------# +# declare_prefixes() - declare several prefixes in the current scope +#-------------------------------------------------------------------# +sub declare_prefixes { + my $self = shift; + my %prefixes = @_; + while (my ($k,$v) = each %prefixes) { + $self->declare_prefix($k,$v); + } +} +#-------------------------------------------------------------------# + +#-------------------------------------------------------------------# +# undeclare_prefix +#-------------------------------------------------------------------# +sub undeclare_prefix { + my $self = shift; + my $prefix = shift; + return unless not defined $prefix or $prefix eq ''; + return unless exists $self->[NSMAP]->[-1]->[PREFIX_MAP]->{$prefix}; + + my ( $tfix ) = grep { $_ eq $prefix } @{$self->[NSMAP]->[-1]->[DECLARATIONS]}; + if ( not defined $tfix ) { + die "prefix $prefix not declared in this context\n"; + } + + @{$self->[NSMAP]->[-1]->[DECLARATIONS]} = grep { $_ ne $prefix } @{$self->[NSMAP]->[-1]->[DECLARATIONS]}; + delete $self->[NSMAP]->[-1]->[PREFIX_MAP]->{$prefix}; +} +#-------------------------------------------------------------------# + +#-------------------------------------------------------------------# +# get_prefix() - get a (random) prefix for a given URI +#-------------------------------------------------------------------# +sub get_prefix { + my $self = shift; + my $uri = shift; + + # we have to iterate over the whole hash here because if we don't + # the iterator isn't reset and the next pass will fail + my $pref; + while (my ($k, $v) = each %{$self->[NSMAP]->[-1]->[PREFIX_MAP]}) { + $pref = $k if $v eq $uri; + } + return $pref; +} +#-------------------------------------------------------------------# + +#-------------------------------------------------------------------# +# get_prefixes() - get all the prefixes for a given URI +#-------------------------------------------------------------------# +sub get_prefixes { + my $self = shift; + my $uri = shift; + + return keys %{$self->[NSMAP]->[-1]->[PREFIX_MAP]} unless defined $uri; + return grep { $self->[NSMAP]->[-1]->[PREFIX_MAP]->{$_} eq $uri } keys %{$self->[NSMAP]->[-1]->[PREFIX_MAP]}; +} +#-------------------------------------------------------------------# + +#-------------------------------------------------------------------# +# get_declared_prefixes() - get all prefixes declared in the last context +#-------------------------------------------------------------------# +sub get_declared_prefixes { + return @{$_[0]->[NSMAP]->[-1]->[DECLARATIONS]}; +} +#-------------------------------------------------------------------# + +#-------------------------------------------------------------------# +# get_uri() - get an URI given a prefix +#-------------------------------------------------------------------# +sub get_uri { + my $self = shift; + my $prefix = shift; + + warn "Prefix must not be undef in get_uri(). The emtpy prefix must be ''" unless defined $prefix; + + return $self->[NSMAP]->[-1]->[DEFAULT] if $prefix eq ''; + return $self->[NSMAP]->[-1]->[PREFIX_MAP]->{$prefix} if exists $self->[NSMAP]->[-1]->[PREFIX_MAP]->{$prefix}; + return undef; +} +#-------------------------------------------------------------------# + +#-------------------------------------------------------------------# +# process_name() - provide details on a name +#-------------------------------------------------------------------# +sub process_name { + my $self = shift; + my $qname = shift; + my $aflag = shift; + + if ($self->[FATALS]) { + return( ($self->_get_ns_details($qname, $aflag))[0,2], $qname ); + } + else { + eval { return( ($self->_get_ns_details($qname, $aflag))[0,2], $qname ); } + } +} +#-------------------------------------------------------------------# + +#-------------------------------------------------------------------# +# process_element_name() - provide details on a element's name +#-------------------------------------------------------------------# +sub process_element_name { + my $self = shift; + my $qname = shift; + + if ($self->[FATALS]) { + return $self->_get_ns_details($qname, 0); + } + else { + eval { return $self->_get_ns_details($qname, 0); } + } +} +#-------------------------------------------------------------------# + + +#-------------------------------------------------------------------# +# process_attribute_name() - provide details on a attribute's name +#-------------------------------------------------------------------# +sub process_attribute_name { + my $self = shift; + my $qname = shift; + + if ($self->[FATALS]) { + return $self->_get_ns_details($qname, 1); + } + else { + eval { return $self->_get_ns_details($qname, 1); } + } +} +#-------------------------------------------------------------------# + + +#-------------------------------------------------------------------# +# ($ns, $prefix, $lname) = $self->_get_ns_details($qname, $f_attr) +# returns ns, prefix, and lname for a given attribute name +# >> the $f_attr flag, if set to one, will work for an attribute +#-------------------------------------------------------------------# +sub _get_ns_details { + my $self = shift; + my $qname = shift; + my $aflag = shift; + + my ($ns, $prefix, $lname); + (my ($tmp_prefix, $tmp_lname) = split /:/, $qname, 3) + < 3 or die "Invalid QName: $qname"; + + # no prefix + my $cur_map = $self->[NSMAP]->[-1]; + if (not defined($tmp_lname)) { + $prefix = undef; + $lname = $qname; + # attr don't have a default namespace + $ns = ($aflag) ? undef : $cur_map->[DEFAULT]; + } + + # prefix + else { + if (exists $cur_map->[PREFIX_MAP]->{$tmp_prefix}) { + $prefix = $tmp_prefix; + $lname = $tmp_lname; + $ns = $cur_map->[PREFIX_MAP]->{$prefix} + } + else { # no ns -> lname == name, all rest undef + die "Undeclared prefix: $tmp_prefix"; + } + } + + return ($ns, $prefix, $lname); +} +#-------------------------------------------------------------------# + +#-------------------------------------------------------------------# +# parse_jclark_notation() - parse the Clarkian notation +#-------------------------------------------------------------------# +sub parse_jclark_notation { + shift; + my $jc = shift; + $jc =~ m/^\{(.*)\}([^}]+)$/; + return $1, $2; +} +#-------------------------------------------------------------------# + + +#-------------------------------------------------------------------# +# Java names mapping +#-------------------------------------------------------------------# +*XML::NamespaceSupport::pushContext = \&push_context; +*XML::NamespaceSupport::popContext = \&pop_context; +*XML::NamespaceSupport::declarePrefix = \&declare_prefix; +*XML::NamespaceSupport::declarePrefixes = \&declare_prefixes; +*XML::NamespaceSupport::getPrefix = \&get_prefix; +*XML::NamespaceSupport::getPrefixes = \&get_prefixes; +*XML::NamespaceSupport::getDeclaredPrefixes = \&get_declared_prefixes; +*XML::NamespaceSupport::getURI = \&get_uri; +*XML::NamespaceSupport::processName = \&process_name; +*XML::NamespaceSupport::processElementName = \&process_element_name; +*XML::NamespaceSupport::processAttributeName = \&process_attribute_name; +*XML::NamespaceSupport::parseJClarkNotation = \&parse_jclark_notation; +*XML::NamespaceSupport::undeclarePrefix = \&undeclare_prefix; +#-------------------------------------------------------------------# + + +1; +#,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,# +#`,`, Documentation `,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,# +#```````````````````````````````````````````````````````````````````# + +=pod + +=head1 NAME + +XML::NamespaceSupport - a simple generic namespace support class + +=head1 SYNOPSIS + + use XML::NamespaceSupport; + my $nsup = XML::NamespaceSupport->new; + + # add a new empty context + $nsup->push_context; + # declare a few prefixes + $nsup->declare_prefix($prefix1, $uri1); + $nsup->declare_prefix($prefix2, $uri2); + # the same shorter + $nsup->declare_prefixes($prefix1 => $uri1, $prefix2 => $uri2); + + # get a single prefix for a URI (randomly) + $prefix = $nsup->get_prefix($uri); + # get all prefixes for a URI (probably better) + @prefixes = $nsup->get_prefixes($uri); + # get all prefixes in scope + @prefixes = $nsup->get_prefixes(); + # get all prefixes that were declared for the current scope + @prefixes = $nsup->get_declared_prefixes; + # get a URI for a given prefix + $uri = $nsup->get_uri($prefix); + + # get info on a qname (java-ish way, it's a bit weird) + ($ns_uri, $local_name, $qname) = $nsup->process_name($qname, $is_attr); + # the same, more perlish + ($ns_uri, $prefix, $local_name) = $nsup->process_element_name($qname); + ($ns_uri, $prefix, $local_name) = $nsup->process_attribute_name($qname); + + # remove the current context + $nsup->pop_context; + + # reset the object for reuse in another document + $nsup->reset; + + # a simple helper to process Clarkian Notation + my ($ns, $lname) = $nsup->parse_jclark_notation('{http://foo}bar'); + # or (given that it doesn't care about the object + my ($ns, $lname) = XML::NamespaceSupport->parse_jclark_notation('{http://foo}bar'); + + +=head1 DESCRIPTION + +This module offers a simple to process namespaced XML names (unames) +from within any application that may need them. It also helps maintain +a prefix to namespace URI map, and provides a number of basic checks. + +The model for this module is SAX2's NamespaceSupport class, readable at +http://www.megginson.com/SAX/Java/javadoc/org/xml/sax/helpers/NamespaceSupport.html. +It adds a few perlisations where we thought it appropriate. + +=head1 METHODS + +=over 4 + +=item * XML::NamespaceSupport->new(\%options) + +A simple constructor. + +The options are C, C, and C + +If C is turned on (it is off by default) the mapping from the +xmlns prefix to the URI defined for it in DOM level 2 is added to the +list of predefined mappings (which normally only contains the xml +prefix mapping). + +If C is turned off (it is on by default) a number of +validity errors will simply be flagged as failures, instead of +die()ing. + +If C is turned on (it is off by default) when one +provides a prefix of C to C it will generate a +random prefix mapped to that namespace. Otherwise an undef prefix will +trigger a warning (you should probably know what you're doing if you +turn this option on). + +If C us turned off, it becomes illegal to undeclare namespace +prefixes. It is on by default. This behaviour is compliant with Namespaces +in XML 1.1, turning it off reverts you to version 1.0. + +=item * $nsup->push_context + +Adds a new empty context to the stack. You can then populate it with +new prefixes defined at this level. + +=item * $nsup->pop_context + +Removes the topmost context in the stack and reverts to the previous +one. It will die() if you try to pop more than you have pushed. + +=item * $nsup->declare_prefix($prefix, $uri) + +Declares a mapping of $prefix to $uri, at the current level. + +Note that with C turned on, if you declare a prefix +mapping in which $prefix is undef(), you will get an automatic prefix +selected for you. If it is off you will get a warning. + +This is useful when you deal with code that hasn't kept prefixes around +and need to reserialize the nodes. It also means that if you want to +set the default namespace (ie with an empty prefix) you must use the +empty string instead of undef. This behaviour is consistent with the +SAX 2.0 specification. + +=item * $nsup->declare_prefixes(%prefixes2uris) + +Declares a mapping of several prefixes to URIs, at the current level. + +=item * $nsup->get_prefix($uri) + +Returns a prefix given an URI. Note that as several prefixes may be +mapped to the same URI, it returns an arbitrary one. It'll return +undef on failure. + +=item * $nsup->get_prefixes($uri) + +Returns an array of prefixes given an URI. It'll return all the +prefixes if the uri is undef. + +=item * $nsup->get_declared_prefixes + +Returns an array of all the prefixes that have been declared within +this context, ie those that were declared on the last element, not +those that were declared above and are simply in scope. + +=item * $nsup->get_uri($prefix) + +Returns a URI for a given prefix. Returns undef on failure. + +=item * $nsup->process_name($qname, $is_attr) + +Given a qualified name and a boolean indicating whether this is an +attribute or another type of name (those are differently affected by +default namespaces), it returns a namespace URI, local name, qualified +name tuple. I know that that is a rather abnormal list to return, but +it is so for compatibility with the Java spec. See below for more +Perlish alternatives. + +If the prefix is not declared, or if the name is not valid, it'll +either die or return undef depending on the current setting of +C. + +=item * $nsup->undeclare_prefix($prefix); + +Removes a namespace prefix from the current context. This function may +be used in SAX's end_prefix_mapping when there is fear that a namespace +declaration might be available outside their scope (which shouldn't +normally happen, but you never know ;). This may be needed in order to +properly support Namespace 1.1. + +=item * $nsup->process_element_name($qname) + +Given a qualified name, it returns a namespace URI, prefix, and local +name tuple. This method applies to element names. + +If the prefix is not declared, or if the name is not valid, it'll +either die or return undef depending on the current setting of +C. + +=item * $nsup->process_attribute_name($qname) + +Given a qualified name, it returns a namespace URI, prefix, and local +name tuple. This method applies to attribute names. + +If the prefix is not declared, or if the name is not valid, it'll +either die or return undef depending on the current setting of +C. + +=item * $nsup->reset + +Resets the object so that it can be reused on another document. + +=back + +All methods of the interface have an alias that is the name used in +the original Java specification. You can use either name +interchangeably. Here is the mapping: + + Java name Perl name + --------------------------------------------------- + pushContext push_context + popContext pop_context + declarePrefix declare_prefix + declarePrefixes declare_prefixes + getPrefix get_prefix + getPrefixes get_prefixes + getDeclaredPrefixes get_declared_prefixes + getURI get_uri + processName process_name + processElementName process_element_name + processAttributeName process_attribute_name + parseJClarkNotation parse_jclark_notation + undeclarePrefix undeclare_prefix + +=head1 VARIABLES + +Two global variables are made available to you. They used to be constants but +simple scalars are easier to use in a number of contexts. They are not +exported but can easily be accessed from any package, or copied into it. + +=over 4 + +=item * C<$NS_XMLNS> + +The namespace for xmlns prefixes, http://www.w3.org/2000/xmlns/. + +=item * C<$NS_XML> + +The namespace for xml prefixes, http://www.w3.org/XML/1998/namespace. + +=back + +=head1 TODO + + - add more tests + - optimise here and there + +=head1 AUTHOR + +Robin Berjon, robin@knowscape.com, with lots of it having been done +by Duncan Cameron, and a number of suggestions from the perl-xml +list. + +=head1 COPYRIGHT + +Copyright (c) 2001-2005 Robin Berjon. All rights reserved. This program is +free software; you can redistribute it and/or modify it under the same terms +as Perl itself. + +=head1 SEE ALSO + +XML::Parser::PerlSAX + +=cut + diff --git a/tests/lib/XML/SAX.pm b/tests/lib/XML/SAX.pm new file mode 100644 index 0000000000..b63a46474d --- /dev/null +++ b/tests/lib/XML/SAX.pm @@ -0,0 +1,379 @@ +# $Id$ + +package XML::SAX; + +use strict; +use vars qw($VERSION @ISA @EXPORT_OK); + +$VERSION = '0.99'; + +use Exporter (); +@ISA = ('Exporter'); + +@EXPORT_OK = qw(Namespaces Validation); + +use File::Basename qw(dirname); +use File::Spec (); +use Symbol qw(gensym); +use XML::SAX::ParserFactory (); # loaded for simplicity + +use constant PARSER_DETAILS => "ParserDetails.ini"; + +use constant Namespaces => "http://xml.org/sax/features/namespaces"; +use constant Validation => "http://xml.org/sax/features/validation"; + +my $known_parsers = undef; + +# load_parsers takes the ParserDetails.ini file out of the same directory +# that XML::SAX is in, and looks at it. Format in POD below + +=begin EXAMPLE + +[XML::SAX::PurePerl] +http://xml.org/sax/features/namespaces = 1 +http://xml.org/sax/features/validation = 0 +# a comment + +# blank lines ignored + +[XML::SAX::AnotherParser] +http://xml.org/sax/features/namespaces = 0 +http://xml.org/sax/features/validation = 1 + +=end EXAMPLE + +=cut + +sub load_parsers { + my $class = shift; + my $dir = shift; + + # reset parsers + $known_parsers = []; + + # get directory from wherever XML::SAX is installed + if (!$dir) { + $dir = $INC{'XML/SAX.pm'}; + $dir = dirname($dir); + } + + my $fh = gensym(); + if (!open($fh, File::Spec->catfile($dir, "SAX", PARSER_DETAILS))) { + XML::SAX->do_warn("could not find " . PARSER_DETAILS . " in $dir/SAX\n"); + return $class; + } + + $known_parsers = $class->_parse_ini_file($fh); + + return $class; +} + +sub _parse_ini_file { + my $class = shift; + my ($fh) = @_; + + my @config; + + my $lineno = 0; + while (defined(my $line = <$fh>)) { + $lineno++; + my $original = $line; + # strip whitespace + $line =~ s/\s*$//m; + $line =~ s/^\s*//m; + # strip comments + $line =~ s/[#;].*$//m; + # ignore blanks + next if $line =~ /^$/m; + + # heading + if ($line =~ /^\[\s*(.*)\s*\]$/m) { + push @config, { Name => $1 }; + next; + } + + # instruction + elsif ($line =~ /^(.*?)\s*?=\s*(.*)$/) { + unless(@config) { + push @config, { Name => '' }; + } + $config[-1]{Features}{$1} = $2; + } + + # not whitespace, comment, or instruction + else { + die "Invalid line in ini: $lineno\n>>> $original\n"; + } + } + + return \@config; +} + +sub parsers { + my $class = shift; + if (!$known_parsers) { + $class->load_parsers(); + } + return $known_parsers; +} + +sub remove_parser { + my $class = shift; + my ($parser_module) = @_; + + if (!$known_parsers) { + $class->load_parsers(); + } + + @$known_parsers = grep { $_->{Name} ne $parser_module } @$known_parsers; + + return $class; +} + +sub add_parser { + my $class = shift; + my ($parser_module) = @_; + + if (!$known_parsers) { + $class->load_parsers(); + } + + # first load module, then query features, then push onto known_parsers, + + my $parser_file = $parser_module; + $parser_file =~ s/::/\//g; + $parser_file .= ".pm"; + + require $parser_file; + + my @features = $parser_module->supported_features(); + + my $new = { Name => $parser_module }; + foreach my $feature (@features) { + $new->{Features}{$feature} = 1; + } + + # If exists in list already, move to end. + my $done = 0; + my $pos = undef; + for (my $i = 0; $i < @$known_parsers; $i++) { + my $p = $known_parsers->[$i]; + if ($p->{Name} eq $parser_module) { + $pos = $i; + } + } + if (defined $pos) { + splice(@$known_parsers, $pos, 1); + push @$known_parsers, $new; + $done++; + } + + # Otherwise (not in list), add at end of list. + if (!$done) { + push @$known_parsers, $new; + } + + return $class; +} + +sub save_parsers { + my $class = shift; + + # get directory from wherever XML::SAX is installed + my $dir = $INC{'XML/SAX.pm'}; + $dir = dirname($dir); + + my $file = File::Spec->catfile($dir, "SAX", PARSER_DETAILS); + chmod 0644, $file; + unlink($file); + + my $fh = gensym(); + open($fh, ">$file") || + die "Cannot write to $file: $!"; + + foreach my $p (@$known_parsers) { + print $fh "[$p->{Name}]\n"; + foreach my $key (keys %{$p->{Features}}) { + print $fh "$key = $p->{Features}{$key}\n"; + } + print $fh "\n"; + } + + print $fh "\n"; + + close $fh; + + return $class; +} + +sub do_warn { + my $class = shift; + # Don't output warnings if running under Test::Harness + warn(@_) unless $ENV{HARNESS_ACTIVE}; +} + +1; +__END__ + +=head1 NAME + +XML::SAX - Simple API for XML + +=head1 SYNOPSIS + + use XML::SAX; + + # get a list of known parsers + my $parsers = XML::SAX->parsers(); + + # add/update a parser + XML::SAX->add_parser(q(XML::SAX::PurePerl)); + + # remove parser + XML::SAX->remove_parser(q(XML::SAX::Foodelberry)); + + # save parsers + XML::SAX->save_parsers(); + +=head1 DESCRIPTION + +XML::SAX is a SAX parser access API for Perl. It includes classes +and APIs required for implementing SAX drivers, along with a factory +class for returning any SAX parser installed on the user's system. + +=head1 USING A SAX2 PARSER + +The factory class is XML::SAX::ParserFactory. Please see the +documentation of that module for how to instantiate a SAX parser: +L. However if you don't want to load up +another manual page, here's a short synopsis: + + use XML::SAX::ParserFactory; + use XML::SAX::XYZHandler; + my $handler = XML::SAX::XYZHandler->new(); + my $p = XML::SAX::ParserFactory->parser(Handler => $handler); + $p->parse_uri("foo.xml"); + # or $p->parse_string("") or $p->parse_file($fh); + +This will automatically load a SAX2 parser (defaulting to +XML::SAX::PurePerl if no others are found) and return it to you. + +In order to learn how to use SAX to parse XML, you will need to read +L and for reference, L. + +=head1 WRITING A SAX2 PARSER + +The first thing to remember in writing a SAX2 parser is to subclass +XML::SAX::Base. This will make your life infinitely easier, by providing +a number of methods automagically for you. See L for more +details. + +When writing a SAX2 parser that is compatible with XML::SAX, you need +to inform XML::SAX of the presence of that driver when you install it. +In order to do that, XML::SAX contains methods for saving the fact that +the parser exists on your system to a "INI" file, which is then loaded +to determine which parsers are installed. + +The best way to do this is to follow these rules: + +=over 4 + +=item * Add XML::SAX as a prerequisite in Makefile.PL: + + WriteMakefile( + ... + PREREQ_PM => { 'XML::SAX' => 0 }, + ... + ); + +Alternatively you may wish to check for it in other ways that will +cause more than just a warning. + +=item * Add the following code snippet to your Makefile.PL: + + sub MY::install { + package MY; + my $script = shift->SUPER::install(@_); + if (ExtUtils::MakeMaker::prompt( + "Do you want to modify ParserDetails.ini?", 'Y') + =~ /^y/i) { + $script =~ s/install :: (.*)$/install :: $1 install_sax_driver/m; + $script .= <<"INSTALL"; + + install_sax_driver : + \t\@\$(PERL) -MXML::SAX -e "XML::SAX->add_parser(q(\$(NAME)))->save_parsers()" + + INSTALL + } + return $script; + } + +Note that you should check the output of this - \$(NAME) will use the name of +your distribution, which may not be exactly what you want. For example XML::LibXML +has a driver called XML::LibXML::SAX::Generator, which is used in place of +\$(NAME) in the above. + +=item * Add an XML::SAX test: + +A test file should be added to your t/ directory containing something like the +following: + + use Test; + BEGIN { plan tests => 3 } + use XML::SAX; + use XML::SAX::PurePerl::DebugHandler; + XML::SAX->add_parser(q(XML::SAX::MyDriver)); + local $XML::SAX::ParserPackage = 'XML::SAX::MyDriver'; + eval { + my $handler = XML::SAX::PurePerl::DebugHandler->new(); + ok($handler); + my $parser = XML::SAX::ParserFactory->parser(Handler => $handler); + ok($parser); + ok($parser->isa('XML::SAX::MyDriver'); + $parser->parse_string(""); + ok($handler->{seen}{start_element}); + }; + +=back + +=head1 EXPORTS + +By default, XML::SAX exports nothing into the caller's namespace. However you +can request the symbols C and C which are the +URIs for those features, allowing an easier way to request those features +via ParserFactory: + + use XML::SAX qw(Namespaces Validation); + my $factory = XML::SAX::ParserFactory->new(); + $factory->require_feature(Namespaces); + $factory->require_feature(Validation); + my $parser = $factory->parser(); + +=head1 AUTHOR + +Current maintainer: Grant McLean, grantm@cpan.org + +Originally written by: + +Matt Sergeant, matt@sergeant.org + +Kip Hampton, khampton@totalcinema.com + +Robin Berjon, robin@knowscape.com + +=head1 LICENSE + +This is free software, you may use it and distribute it under +the same terms as Perl itself. + +=head1 SEE ALSO + +L for writing SAX Filters and Parsers + +L for an XML parser written in 100% +pure perl. + +L for details on exception handling + +=cut + diff --git a/tests/lib/XML/SAX/Base.pm b/tests/lib/XML/SAX/Base.pm new file mode 100644 index 0000000000..fa70b263ab --- /dev/null +++ b/tests/lib/XML/SAX/Base.pm @@ -0,0 +1,3176 @@ +package XML::SAX::Base; +BEGIN { + $XML::SAX::Base::VERSION = '1.08'; +} + +# version 0.10 - Kip Hampton +# version 0.13 - Robin Berjon +# version 0.15 - Kip Hampton +# version 0.17 - Kip Hampton +# version 0.19 - Kip Hampton +# version 0.21 - Kip Hampton +# version 0.22 - Robin Berjon +# version 0.23 - Matt Sergeant +# version 0.24 - Robin Berjon +# version 0.25 - Kip Hampton +# version 1.00 - Kip Hampton +# version 1.01 - Kip Hampton +# version 1.02 - Robin Berjon +# version 1.03 - Matt Sergeant +# version 1.04 - Kip Hampton +# version 1.05 - Grant McLean +# version 1.06 - Grant McLean +# version 1.07 - Grant McLean +# version 1.08 - Grant McLean + +#-----------------------------------------------------# +# STOP!!!!! +# +# This file is generated by the 'BuildSAXBase.pl' file +# that ships with the XML::SAX::Base distribution. +# If you need to make changes, patch that file NOT +# XML/SAX/Base.pm Better yet, fork the git repository +# commit your changes and send a pull request: +# https://github.com/grantm/XML-SAX-Base +#-----------------------------------------------------# + +use strict; + +use XML::SAX::Exception qw(); + +sub end_prefix_mapping { + my $self = shift; + if (defined $self->{Methods}->{'end_prefix_mapping'}) { + $self->{Methods}->{'end_prefix_mapping'}->(@_); + } + else { + my $method; + my $callbacks; + if (exists $self->{ParseOptions}) { + $callbacks = $self->{ParseOptions}; + } + else { + $callbacks = $self; + } + if (0) { # dummy to make elsif's below compile + } + elsif (defined $callbacks->{'ContentHandler'} and $method = $callbacks->{'ContentHandler'}->can('end_prefix_mapping') ) { + my $handler = $callbacks->{'ContentHandler'}; + $self->{Methods}->{'end_prefix_mapping'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'Handler'} and $method = $callbacks->{'Handler'}->can('end_prefix_mapping') ) { + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'end_prefix_mapping'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'ContentHandler'} + and $callbacks->{'ContentHandler'}->can('AUTOLOAD') + and $callbacks->{'ContentHandler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'ContentHandler'}->end_prefix_mapping(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'ContentHandler'}; + $self->{Methods}->{'end_prefix_mapping'} = sub { $handler->end_prefix_mapping(@_) }; + } + return $res; + } + elsif (defined $callbacks->{'Handler'} + and $callbacks->{'Handler'}->can('AUTOLOAD') + and $callbacks->{'Handler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'Handler'}->end_prefix_mapping(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'end_prefix_mapping'} = sub { $handler->end_prefix_mapping(@_) }; + } + return $res; + } + else { + $self->{Methods}->{'end_prefix_mapping'} = sub { }; + } + } + +} + +sub internal_entity_decl { + my $self = shift; + if (defined $self->{Methods}->{'internal_entity_decl'}) { + $self->{Methods}->{'internal_entity_decl'}->(@_); + } + else { + my $method; + my $callbacks; + if (exists $self->{ParseOptions}) { + $callbacks = $self->{ParseOptions}; + } + else { + $callbacks = $self; + } + if (0) { # dummy to make elsif's below compile + } + elsif (defined $callbacks->{'DeclHandler'} and $method = $callbacks->{'DeclHandler'}->can('internal_entity_decl') ) { + my $handler = $callbacks->{'DeclHandler'}; + $self->{Methods}->{'internal_entity_decl'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'Handler'} and $method = $callbacks->{'Handler'}->can('internal_entity_decl') ) { + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'internal_entity_decl'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'DeclHandler'} + and $callbacks->{'DeclHandler'}->can('AUTOLOAD') + and $callbacks->{'DeclHandler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'DeclHandler'}->internal_entity_decl(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'DeclHandler'}; + $self->{Methods}->{'internal_entity_decl'} = sub { $handler->internal_entity_decl(@_) }; + } + return $res; + } + elsif (defined $callbacks->{'Handler'} + and $callbacks->{'Handler'}->can('AUTOLOAD') + and $callbacks->{'Handler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'Handler'}->internal_entity_decl(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'internal_entity_decl'} = sub { $handler->internal_entity_decl(@_) }; + } + return $res; + } + else { + $self->{Methods}->{'internal_entity_decl'} = sub { }; + } + } + +} + +sub characters { + my $self = shift; + if (defined $self->{Methods}->{'characters'}) { + $self->{Methods}->{'characters'}->(@_); + } + else { + my $method; + my $callbacks; + if (exists $self->{ParseOptions}) { + $callbacks = $self->{ParseOptions}; + } + else { + $callbacks = $self; + } + if (0) { # dummy to make elsif's below compile + } + elsif (defined $callbacks->{'ContentHandler'} and $method = $callbacks->{'ContentHandler'}->can('characters') ) { + my $handler = $callbacks->{'ContentHandler'}; + $self->{Methods}->{'characters'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'DocumentHandler'} and $method = $callbacks->{'DocumentHandler'}->can('characters') ) { + my $handler = $callbacks->{'DocumentHandler'}; + $self->{Methods}->{'characters'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'Handler'} and $method = $callbacks->{'Handler'}->can('characters') ) { + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'characters'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'ContentHandler'} + and $callbacks->{'ContentHandler'}->can('AUTOLOAD') + and $callbacks->{'ContentHandler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'ContentHandler'}->characters(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'ContentHandler'}; + $self->{Methods}->{'characters'} = sub { $handler->characters(@_) }; + } + return $res; + } + elsif (defined $callbacks->{'DocumentHandler'} + and $callbacks->{'DocumentHandler'}->can('AUTOLOAD') + and $callbacks->{'DocumentHandler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'DocumentHandler'}->characters(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'DocumentHandler'}; + $self->{Methods}->{'characters'} = sub { $handler->characters(@_) }; + } + return $res; + } + elsif (defined $callbacks->{'Handler'} + and $callbacks->{'Handler'}->can('AUTOLOAD') + and $callbacks->{'Handler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'Handler'}->characters(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'characters'} = sub { $handler->characters(@_) }; + } + return $res; + } + else { + $self->{Methods}->{'characters'} = sub { }; + } + } + +} + +sub start_element { + my $self = shift; + if (defined $self->{Methods}->{'start_element'}) { + $self->{Methods}->{'start_element'}->(@_); + } + else { + my $method; + my $callbacks; + if (exists $self->{ParseOptions}) { + $callbacks = $self->{ParseOptions}; + } + else { + $callbacks = $self; + } + if (0) { # dummy to make elsif's below compile + } + elsif (defined $callbacks->{'ContentHandler'} and $method = $callbacks->{'ContentHandler'}->can('start_element') ) { + my $handler = $callbacks->{'ContentHandler'}; + $self->{Methods}->{'start_element'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'DocumentHandler'} and $method = $callbacks->{'DocumentHandler'}->can('start_element') ) { + my $handler = $callbacks->{'DocumentHandler'}; + $self->{Methods}->{'start_element'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'Handler'} and $method = $callbacks->{'Handler'}->can('start_element') ) { + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'start_element'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'ContentHandler'} + and $callbacks->{'ContentHandler'}->can('AUTOLOAD') + and $callbacks->{'ContentHandler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'ContentHandler'}->start_element(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'ContentHandler'}; + $self->{Methods}->{'start_element'} = sub { $handler->start_element(@_) }; + } + return $res; + } + elsif (defined $callbacks->{'DocumentHandler'} + and $callbacks->{'DocumentHandler'}->can('AUTOLOAD') + and $callbacks->{'DocumentHandler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'DocumentHandler'}->start_element(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'DocumentHandler'}; + $self->{Methods}->{'start_element'} = sub { $handler->start_element(@_) }; + } + return $res; + } + elsif (defined $callbacks->{'Handler'} + and $callbacks->{'Handler'}->can('AUTOLOAD') + and $callbacks->{'Handler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'Handler'}->start_element(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'start_element'} = sub { $handler->start_element(@_) }; + } + return $res; + } + else { + $self->{Methods}->{'start_element'} = sub { }; + } + } + +} + +sub external_entity_decl { + my $self = shift; + if (defined $self->{Methods}->{'external_entity_decl'}) { + $self->{Methods}->{'external_entity_decl'}->(@_); + } + else { + my $method; + my $callbacks; + if (exists $self->{ParseOptions}) { + $callbacks = $self->{ParseOptions}; + } + else { + $callbacks = $self; + } + if (0) { # dummy to make elsif's below compile + } + elsif (defined $callbacks->{'DeclHandler'} and $method = $callbacks->{'DeclHandler'}->can('external_entity_decl') ) { + my $handler = $callbacks->{'DeclHandler'}; + $self->{Methods}->{'external_entity_decl'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'Handler'} and $method = $callbacks->{'Handler'}->can('external_entity_decl') ) { + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'external_entity_decl'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'DeclHandler'} + and $callbacks->{'DeclHandler'}->can('AUTOLOAD') + and $callbacks->{'DeclHandler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'DeclHandler'}->external_entity_decl(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'DeclHandler'}; + $self->{Methods}->{'external_entity_decl'} = sub { $handler->external_entity_decl(@_) }; + } + return $res; + } + elsif (defined $callbacks->{'Handler'} + and $callbacks->{'Handler'}->can('AUTOLOAD') + and $callbacks->{'Handler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'Handler'}->external_entity_decl(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'external_entity_decl'} = sub { $handler->external_entity_decl(@_) }; + } + return $res; + } + else { + $self->{Methods}->{'external_entity_decl'} = sub { }; + } + } + +} + +sub xml_decl { + my $self = shift; + if (defined $self->{Methods}->{'xml_decl'}) { + $self->{Methods}->{'xml_decl'}->(@_); + } + else { + my $method; + my $callbacks; + if (exists $self->{ParseOptions}) { + $callbacks = $self->{ParseOptions}; + } + else { + $callbacks = $self; + } + if (0) { # dummy to make elsif's below compile + } + elsif (defined $callbacks->{'DTDHandler'} and $method = $callbacks->{'DTDHandler'}->can('xml_decl') ) { + my $handler = $callbacks->{'DTDHandler'}; + $self->{Methods}->{'xml_decl'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'Handler'} and $method = $callbacks->{'Handler'}->can('xml_decl') ) { + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'xml_decl'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'DTDHandler'} + and $callbacks->{'DTDHandler'}->can('AUTOLOAD') + and $callbacks->{'DTDHandler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'DTDHandler'}->xml_decl(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'DTDHandler'}; + $self->{Methods}->{'xml_decl'} = sub { $handler->xml_decl(@_) }; + } + return $res; + } + elsif (defined $callbacks->{'Handler'} + and $callbacks->{'Handler'}->can('AUTOLOAD') + and $callbacks->{'Handler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'Handler'}->xml_decl(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'xml_decl'} = sub { $handler->xml_decl(@_) }; + } + return $res; + } + else { + $self->{Methods}->{'xml_decl'} = sub { }; + } + } + +} + +sub entity_decl { + my $self = shift; + if (defined $self->{Methods}->{'entity_decl'}) { + $self->{Methods}->{'entity_decl'}->(@_); + } + else { + my $method; + my $callbacks; + if (exists $self->{ParseOptions}) { + $callbacks = $self->{ParseOptions}; + } + else { + $callbacks = $self; + } + if (0) { # dummy to make elsif's below compile + } + elsif (defined $callbacks->{'DTDHandler'} and $method = $callbacks->{'DTDHandler'}->can('entity_decl') ) { + my $handler = $callbacks->{'DTDHandler'}; + $self->{Methods}->{'entity_decl'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'Handler'} and $method = $callbacks->{'Handler'}->can('entity_decl') ) { + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'entity_decl'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'DTDHandler'} + and $callbacks->{'DTDHandler'}->can('AUTOLOAD') + and $callbacks->{'DTDHandler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'DTDHandler'}->entity_decl(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'DTDHandler'}; + $self->{Methods}->{'entity_decl'} = sub { $handler->entity_decl(@_) }; + } + return $res; + } + elsif (defined $callbacks->{'Handler'} + and $callbacks->{'Handler'}->can('AUTOLOAD') + and $callbacks->{'Handler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'Handler'}->entity_decl(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'entity_decl'} = sub { $handler->entity_decl(@_) }; + } + return $res; + } + else { + $self->{Methods}->{'entity_decl'} = sub { }; + } + } + +} + +sub end_dtd { + my $self = shift; + if (defined $self->{Methods}->{'end_dtd'}) { + $self->{Methods}->{'end_dtd'}->(@_); + } + else { + my $method; + my $callbacks; + if (exists $self->{ParseOptions}) { + $callbacks = $self->{ParseOptions}; + } + else { + $callbacks = $self; + } + if (0) { # dummy to make elsif's below compile + } + elsif (defined $callbacks->{'LexicalHandler'} and $method = $callbacks->{'LexicalHandler'}->can('end_dtd') ) { + my $handler = $callbacks->{'LexicalHandler'}; + $self->{Methods}->{'end_dtd'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'Handler'} and $method = $callbacks->{'Handler'}->can('end_dtd') ) { + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'end_dtd'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'LexicalHandler'} + and $callbacks->{'LexicalHandler'}->can('AUTOLOAD') + and $callbacks->{'LexicalHandler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'LexicalHandler'}->end_dtd(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'LexicalHandler'}; + $self->{Methods}->{'end_dtd'} = sub { $handler->end_dtd(@_) }; + } + return $res; + } + elsif (defined $callbacks->{'Handler'} + and $callbacks->{'Handler'}->can('AUTOLOAD') + and $callbacks->{'Handler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'Handler'}->end_dtd(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'end_dtd'} = sub { $handler->end_dtd(@_) }; + } + return $res; + } + else { + $self->{Methods}->{'end_dtd'} = sub { }; + } + } + +} + +sub unparsed_entity_decl { + my $self = shift; + if (defined $self->{Methods}->{'unparsed_entity_decl'}) { + $self->{Methods}->{'unparsed_entity_decl'}->(@_); + } + else { + my $method; + my $callbacks; + if (exists $self->{ParseOptions}) { + $callbacks = $self->{ParseOptions}; + } + else { + $callbacks = $self; + } + if (0) { # dummy to make elsif's below compile + } + elsif (defined $callbacks->{'DTDHandler'} and $method = $callbacks->{'DTDHandler'}->can('unparsed_entity_decl') ) { + my $handler = $callbacks->{'DTDHandler'}; + $self->{Methods}->{'unparsed_entity_decl'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'Handler'} and $method = $callbacks->{'Handler'}->can('unparsed_entity_decl') ) { + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'unparsed_entity_decl'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'DTDHandler'} + and $callbacks->{'DTDHandler'}->can('AUTOLOAD') + and $callbacks->{'DTDHandler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'DTDHandler'}->unparsed_entity_decl(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'DTDHandler'}; + $self->{Methods}->{'unparsed_entity_decl'} = sub { $handler->unparsed_entity_decl(@_) }; + } + return $res; + } + elsif (defined $callbacks->{'Handler'} + and $callbacks->{'Handler'}->can('AUTOLOAD') + and $callbacks->{'Handler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'Handler'}->unparsed_entity_decl(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'unparsed_entity_decl'} = sub { $handler->unparsed_entity_decl(@_) }; + } + return $res; + } + else { + $self->{Methods}->{'unparsed_entity_decl'} = sub { }; + } + } + +} + +sub processing_instruction { + my $self = shift; + if (defined $self->{Methods}->{'processing_instruction'}) { + $self->{Methods}->{'processing_instruction'}->(@_); + } + else { + my $method; + my $callbacks; + if (exists $self->{ParseOptions}) { + $callbacks = $self->{ParseOptions}; + } + else { + $callbacks = $self; + } + if (0) { # dummy to make elsif's below compile + } + elsif (defined $callbacks->{'ContentHandler'} and $method = $callbacks->{'ContentHandler'}->can('processing_instruction') ) { + my $handler = $callbacks->{'ContentHandler'}; + $self->{Methods}->{'processing_instruction'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'DocumentHandler'} and $method = $callbacks->{'DocumentHandler'}->can('processing_instruction') ) { + my $handler = $callbacks->{'DocumentHandler'}; + $self->{Methods}->{'processing_instruction'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'Handler'} and $method = $callbacks->{'Handler'}->can('processing_instruction') ) { + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'processing_instruction'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'ContentHandler'} + and $callbacks->{'ContentHandler'}->can('AUTOLOAD') + and $callbacks->{'ContentHandler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'ContentHandler'}->processing_instruction(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'ContentHandler'}; + $self->{Methods}->{'processing_instruction'} = sub { $handler->processing_instruction(@_) }; + } + return $res; + } + elsif (defined $callbacks->{'DocumentHandler'} + and $callbacks->{'DocumentHandler'}->can('AUTOLOAD') + and $callbacks->{'DocumentHandler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'DocumentHandler'}->processing_instruction(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'DocumentHandler'}; + $self->{Methods}->{'processing_instruction'} = sub { $handler->processing_instruction(@_) }; + } + return $res; + } + elsif (defined $callbacks->{'Handler'} + and $callbacks->{'Handler'}->can('AUTOLOAD') + and $callbacks->{'Handler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'Handler'}->processing_instruction(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'processing_instruction'} = sub { $handler->processing_instruction(@_) }; + } + return $res; + } + else { + $self->{Methods}->{'processing_instruction'} = sub { }; + } + } + +} + +sub attribute_decl { + my $self = shift; + if (defined $self->{Methods}->{'attribute_decl'}) { + $self->{Methods}->{'attribute_decl'}->(@_); + } + else { + my $method; + my $callbacks; + if (exists $self->{ParseOptions}) { + $callbacks = $self->{ParseOptions}; + } + else { + $callbacks = $self; + } + if (0) { # dummy to make elsif's below compile + } + elsif (defined $callbacks->{'DeclHandler'} and $method = $callbacks->{'DeclHandler'}->can('attribute_decl') ) { + my $handler = $callbacks->{'DeclHandler'}; + $self->{Methods}->{'attribute_decl'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'Handler'} and $method = $callbacks->{'Handler'}->can('attribute_decl') ) { + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'attribute_decl'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'DeclHandler'} + and $callbacks->{'DeclHandler'}->can('AUTOLOAD') + and $callbacks->{'DeclHandler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'DeclHandler'}->attribute_decl(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'DeclHandler'}; + $self->{Methods}->{'attribute_decl'} = sub { $handler->attribute_decl(@_) }; + } + return $res; + } + elsif (defined $callbacks->{'Handler'} + and $callbacks->{'Handler'}->can('AUTOLOAD') + and $callbacks->{'Handler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'Handler'}->attribute_decl(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'attribute_decl'} = sub { $handler->attribute_decl(@_) }; + } + return $res; + } + else { + $self->{Methods}->{'attribute_decl'} = sub { }; + } + } + +} + +sub fatal_error { + my $self = shift; + if (defined $self->{Methods}->{'fatal_error'}) { + $self->{Methods}->{'fatal_error'}->(@_); + } + else { + my $method; + my $callbacks; + if (exists $self->{ParseOptions}) { + $callbacks = $self->{ParseOptions}; + } + else { + $callbacks = $self; + } + if (0) { # dummy to make elsif's below compile + } + elsif (defined $callbacks->{'ErrorHandler'} and $method = $callbacks->{'ErrorHandler'}->can('fatal_error') ) { + my $handler = $callbacks->{'ErrorHandler'}; + $self->{Methods}->{'fatal_error'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'Handler'} and $method = $callbacks->{'Handler'}->can('fatal_error') ) { + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'fatal_error'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'ErrorHandler'} + and $callbacks->{'ErrorHandler'}->can('AUTOLOAD') + and $callbacks->{'ErrorHandler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'ErrorHandler'}->fatal_error(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'ErrorHandler'}; + $self->{Methods}->{'fatal_error'} = sub { $handler->fatal_error(@_) }; + } + return $res; + } + elsif (defined $callbacks->{'Handler'} + and $callbacks->{'Handler'}->can('AUTOLOAD') + and $callbacks->{'Handler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'Handler'}->fatal_error(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'fatal_error'} = sub { $handler->fatal_error(@_) }; + } + return $res; + } + else { + $self->{Methods}->{'fatal_error'} = sub { }; + } + } + +} + +sub end_cdata { + my $self = shift; + if (defined $self->{Methods}->{'end_cdata'}) { + $self->{Methods}->{'end_cdata'}->(@_); + } + else { + my $method; + my $callbacks; + if (exists $self->{ParseOptions}) { + $callbacks = $self->{ParseOptions}; + } + else { + $callbacks = $self; + } + if (0) { # dummy to make elsif's below compile + } + elsif (defined $callbacks->{'DocumentHandler'} and $method = $callbacks->{'DocumentHandler'}->can('end_cdata') ) { + my $handler = $callbacks->{'DocumentHandler'}; + $self->{Methods}->{'end_cdata'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'LexicalHandler'} and $method = $callbacks->{'LexicalHandler'}->can('end_cdata') ) { + my $handler = $callbacks->{'LexicalHandler'}; + $self->{Methods}->{'end_cdata'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'Handler'} and $method = $callbacks->{'Handler'}->can('end_cdata') ) { + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'end_cdata'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'DocumentHandler'} + and $callbacks->{'DocumentHandler'}->can('AUTOLOAD') + and $callbacks->{'DocumentHandler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'DocumentHandler'}->end_cdata(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'DocumentHandler'}; + $self->{Methods}->{'end_cdata'} = sub { $handler->end_cdata(@_) }; + } + return $res; + } + elsif (defined $callbacks->{'LexicalHandler'} + and $callbacks->{'LexicalHandler'}->can('AUTOLOAD') + and $callbacks->{'LexicalHandler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'LexicalHandler'}->end_cdata(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'LexicalHandler'}; + $self->{Methods}->{'end_cdata'} = sub { $handler->end_cdata(@_) }; + } + return $res; + } + elsif (defined $callbacks->{'Handler'} + and $callbacks->{'Handler'}->can('AUTOLOAD') + and $callbacks->{'Handler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'Handler'}->end_cdata(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'end_cdata'} = sub { $handler->end_cdata(@_) }; + } + return $res; + } + else { + $self->{Methods}->{'end_cdata'} = sub { }; + } + } + +} + +sub start_entity { + my $self = shift; + if (defined $self->{Methods}->{'start_entity'}) { + $self->{Methods}->{'start_entity'}->(@_); + } + else { + my $method; + my $callbacks; + if (exists $self->{ParseOptions}) { + $callbacks = $self->{ParseOptions}; + } + else { + $callbacks = $self; + } + if (0) { # dummy to make elsif's below compile + } + elsif (defined $callbacks->{'LexicalHandler'} and $method = $callbacks->{'LexicalHandler'}->can('start_entity') ) { + my $handler = $callbacks->{'LexicalHandler'}; + $self->{Methods}->{'start_entity'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'Handler'} and $method = $callbacks->{'Handler'}->can('start_entity') ) { + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'start_entity'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'LexicalHandler'} + and $callbacks->{'LexicalHandler'}->can('AUTOLOAD') + and $callbacks->{'LexicalHandler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'LexicalHandler'}->start_entity(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'LexicalHandler'}; + $self->{Methods}->{'start_entity'} = sub { $handler->start_entity(@_) }; + } + return $res; + } + elsif (defined $callbacks->{'Handler'} + and $callbacks->{'Handler'}->can('AUTOLOAD') + and $callbacks->{'Handler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'Handler'}->start_entity(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'start_entity'} = sub { $handler->start_entity(@_) }; + } + return $res; + } + else { + $self->{Methods}->{'start_entity'} = sub { }; + } + } + +} + +sub start_prefix_mapping { + my $self = shift; + if (defined $self->{Methods}->{'start_prefix_mapping'}) { + $self->{Methods}->{'start_prefix_mapping'}->(@_); + } + else { + my $method; + my $callbacks; + if (exists $self->{ParseOptions}) { + $callbacks = $self->{ParseOptions}; + } + else { + $callbacks = $self; + } + if (0) { # dummy to make elsif's below compile + } + elsif (defined $callbacks->{'ContentHandler'} and $method = $callbacks->{'ContentHandler'}->can('start_prefix_mapping') ) { + my $handler = $callbacks->{'ContentHandler'}; + $self->{Methods}->{'start_prefix_mapping'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'Handler'} and $method = $callbacks->{'Handler'}->can('start_prefix_mapping') ) { + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'start_prefix_mapping'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'ContentHandler'} + and $callbacks->{'ContentHandler'}->can('AUTOLOAD') + and $callbacks->{'ContentHandler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'ContentHandler'}->start_prefix_mapping(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'ContentHandler'}; + $self->{Methods}->{'start_prefix_mapping'} = sub { $handler->start_prefix_mapping(@_) }; + } + return $res; + } + elsif (defined $callbacks->{'Handler'} + and $callbacks->{'Handler'}->can('AUTOLOAD') + and $callbacks->{'Handler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'Handler'}->start_prefix_mapping(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'start_prefix_mapping'} = sub { $handler->start_prefix_mapping(@_) }; + } + return $res; + } + else { + $self->{Methods}->{'start_prefix_mapping'} = sub { }; + } + } + +} + +sub error { + my $self = shift; + if (defined $self->{Methods}->{'error'}) { + $self->{Methods}->{'error'}->(@_); + } + else { + my $method; + my $callbacks; + if (exists $self->{ParseOptions}) { + $callbacks = $self->{ParseOptions}; + } + else { + $callbacks = $self; + } + if (0) { # dummy to make elsif's below compile + } + elsif (defined $callbacks->{'ErrorHandler'} and $method = $callbacks->{'ErrorHandler'}->can('error') ) { + my $handler = $callbacks->{'ErrorHandler'}; + $self->{Methods}->{'error'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'Handler'} and $method = $callbacks->{'Handler'}->can('error') ) { + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'error'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'ErrorHandler'} + and $callbacks->{'ErrorHandler'}->can('AUTOLOAD') + and $callbacks->{'ErrorHandler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'ErrorHandler'}->error(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'ErrorHandler'}; + $self->{Methods}->{'error'} = sub { $handler->error(@_) }; + } + return $res; + } + elsif (defined $callbacks->{'Handler'} + and $callbacks->{'Handler'}->can('AUTOLOAD') + and $callbacks->{'Handler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'Handler'}->error(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'error'} = sub { $handler->error(@_) }; + } + return $res; + } + else { + $self->{Methods}->{'error'} = sub { }; + } + } + +} + +sub start_document { + my $self = shift; + if (defined $self->{Methods}->{'start_document'}) { + $self->{Methods}->{'start_document'}->(@_); + } + else { + my $method; + my $callbacks; + if (exists $self->{ParseOptions}) { + $callbacks = $self->{ParseOptions}; + } + else { + $callbacks = $self; + } + if (0) { # dummy to make elsif's below compile + } + elsif (defined $callbacks->{'ContentHandler'} and $method = $callbacks->{'ContentHandler'}->can('start_document') ) { + my $handler = $callbacks->{'ContentHandler'}; + $self->{Methods}->{'start_document'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'DocumentHandler'} and $method = $callbacks->{'DocumentHandler'}->can('start_document') ) { + my $handler = $callbacks->{'DocumentHandler'}; + $self->{Methods}->{'start_document'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'Handler'} and $method = $callbacks->{'Handler'}->can('start_document') ) { + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'start_document'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'ContentHandler'} + and $callbacks->{'ContentHandler'}->can('AUTOLOAD') + and $callbacks->{'ContentHandler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'ContentHandler'}->start_document(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'ContentHandler'}; + $self->{Methods}->{'start_document'} = sub { $handler->start_document(@_) }; + } + return $res; + } + elsif (defined $callbacks->{'DocumentHandler'} + and $callbacks->{'DocumentHandler'}->can('AUTOLOAD') + and $callbacks->{'DocumentHandler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'DocumentHandler'}->start_document(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'DocumentHandler'}; + $self->{Methods}->{'start_document'} = sub { $handler->start_document(@_) }; + } + return $res; + } + elsif (defined $callbacks->{'Handler'} + and $callbacks->{'Handler'}->can('AUTOLOAD') + and $callbacks->{'Handler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'Handler'}->start_document(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'start_document'} = sub { $handler->start_document(@_) }; + } + return $res; + } + else { + $self->{Methods}->{'start_document'} = sub { }; + } + } + +} + +sub ignorable_whitespace { + my $self = shift; + if (defined $self->{Methods}->{'ignorable_whitespace'}) { + $self->{Methods}->{'ignorable_whitespace'}->(@_); + } + else { + my $method; + my $callbacks; + if (exists $self->{ParseOptions}) { + $callbacks = $self->{ParseOptions}; + } + else { + $callbacks = $self; + } + if (0) { # dummy to make elsif's below compile + } + elsif (defined $callbacks->{'ContentHandler'} and $method = $callbacks->{'ContentHandler'}->can('ignorable_whitespace') ) { + my $handler = $callbacks->{'ContentHandler'}; + $self->{Methods}->{'ignorable_whitespace'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'DocumentHandler'} and $method = $callbacks->{'DocumentHandler'}->can('ignorable_whitespace') ) { + my $handler = $callbacks->{'DocumentHandler'}; + $self->{Methods}->{'ignorable_whitespace'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'Handler'} and $method = $callbacks->{'Handler'}->can('ignorable_whitespace') ) { + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'ignorable_whitespace'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'ContentHandler'} + and $callbacks->{'ContentHandler'}->can('AUTOLOAD') + and $callbacks->{'ContentHandler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'ContentHandler'}->ignorable_whitespace(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'ContentHandler'}; + $self->{Methods}->{'ignorable_whitespace'} = sub { $handler->ignorable_whitespace(@_) }; + } + return $res; + } + elsif (defined $callbacks->{'DocumentHandler'} + and $callbacks->{'DocumentHandler'}->can('AUTOLOAD') + and $callbacks->{'DocumentHandler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'DocumentHandler'}->ignorable_whitespace(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'DocumentHandler'}; + $self->{Methods}->{'ignorable_whitespace'} = sub { $handler->ignorable_whitespace(@_) }; + } + return $res; + } + elsif (defined $callbacks->{'Handler'} + and $callbacks->{'Handler'}->can('AUTOLOAD') + and $callbacks->{'Handler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'Handler'}->ignorable_whitespace(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'ignorable_whitespace'} = sub { $handler->ignorable_whitespace(@_) }; + } + return $res; + } + else { + $self->{Methods}->{'ignorable_whitespace'} = sub { }; + } + } + +} + +sub end_document { + my $self = shift; + if (defined $self->{Methods}->{'end_document'}) { + $self->{Methods}->{'end_document'}->(@_); + } + else { + my $method; + my $callbacks; + if (exists $self->{ParseOptions}) { + $callbacks = $self->{ParseOptions}; + } + else { + $callbacks = $self; + } + if (0) { # dummy to make elsif's below compile + } + elsif (defined $callbacks->{'ContentHandler'} and $method = $callbacks->{'ContentHandler'}->can('end_document') ) { + my $handler = $callbacks->{'ContentHandler'}; + $self->{Methods}->{'end_document'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'DocumentHandler'} and $method = $callbacks->{'DocumentHandler'}->can('end_document') ) { + my $handler = $callbacks->{'DocumentHandler'}; + $self->{Methods}->{'end_document'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'Handler'} and $method = $callbacks->{'Handler'}->can('end_document') ) { + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'end_document'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'ContentHandler'} + and $callbacks->{'ContentHandler'}->can('AUTOLOAD') + and $callbacks->{'ContentHandler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'ContentHandler'}->end_document(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'ContentHandler'}; + $self->{Methods}->{'end_document'} = sub { $handler->end_document(@_) }; + } + return $res; + } + elsif (defined $callbacks->{'DocumentHandler'} + and $callbacks->{'DocumentHandler'}->can('AUTOLOAD') + and $callbacks->{'DocumentHandler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'DocumentHandler'}->end_document(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'DocumentHandler'}; + $self->{Methods}->{'end_document'} = sub { $handler->end_document(@_) }; + } + return $res; + } + elsif (defined $callbacks->{'Handler'} + and $callbacks->{'Handler'}->can('AUTOLOAD') + and $callbacks->{'Handler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'Handler'}->end_document(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'end_document'} = sub { $handler->end_document(@_) }; + } + return $res; + } + else { + $self->{Methods}->{'end_document'} = sub { }; + } + } + +} + +sub start_cdata { + my $self = shift; + if (defined $self->{Methods}->{'start_cdata'}) { + $self->{Methods}->{'start_cdata'}->(@_); + } + else { + my $method; + my $callbacks; + if (exists $self->{ParseOptions}) { + $callbacks = $self->{ParseOptions}; + } + else { + $callbacks = $self; + } + if (0) { # dummy to make elsif's below compile + } + elsif (defined $callbacks->{'DocumentHandler'} and $method = $callbacks->{'DocumentHandler'}->can('start_cdata') ) { + my $handler = $callbacks->{'DocumentHandler'}; + $self->{Methods}->{'start_cdata'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'LexicalHandler'} and $method = $callbacks->{'LexicalHandler'}->can('start_cdata') ) { + my $handler = $callbacks->{'LexicalHandler'}; + $self->{Methods}->{'start_cdata'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'Handler'} and $method = $callbacks->{'Handler'}->can('start_cdata') ) { + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'start_cdata'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'DocumentHandler'} + and $callbacks->{'DocumentHandler'}->can('AUTOLOAD') + and $callbacks->{'DocumentHandler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'DocumentHandler'}->start_cdata(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'DocumentHandler'}; + $self->{Methods}->{'start_cdata'} = sub { $handler->start_cdata(@_) }; + } + return $res; + } + elsif (defined $callbacks->{'LexicalHandler'} + and $callbacks->{'LexicalHandler'}->can('AUTOLOAD') + and $callbacks->{'LexicalHandler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'LexicalHandler'}->start_cdata(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'LexicalHandler'}; + $self->{Methods}->{'start_cdata'} = sub { $handler->start_cdata(@_) }; + } + return $res; + } + elsif (defined $callbacks->{'Handler'} + and $callbacks->{'Handler'}->can('AUTOLOAD') + and $callbacks->{'Handler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'Handler'}->start_cdata(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'start_cdata'} = sub { $handler->start_cdata(@_) }; + } + return $res; + } + else { + $self->{Methods}->{'start_cdata'} = sub { }; + } + } + +} + +sub set_document_locator { + my $self = shift; + if (defined $self->{Methods}->{'set_document_locator'}) { + $self->{Methods}->{'set_document_locator'}->(@_); + } + else { + my $method; + my $callbacks; + if (exists $self->{ParseOptions}) { + $callbacks = $self->{ParseOptions}; + } + else { + $callbacks = $self; + } + if (0) { # dummy to make elsif's below compile + } + elsif (defined $callbacks->{'ContentHandler'} and $method = $callbacks->{'ContentHandler'}->can('set_document_locator') ) { + my $handler = $callbacks->{'ContentHandler'}; + $self->{Methods}->{'set_document_locator'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'DocumentHandler'} and $method = $callbacks->{'DocumentHandler'}->can('set_document_locator') ) { + my $handler = $callbacks->{'DocumentHandler'}; + $self->{Methods}->{'set_document_locator'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'Handler'} and $method = $callbacks->{'Handler'}->can('set_document_locator') ) { + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'set_document_locator'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'ContentHandler'} + and $callbacks->{'ContentHandler'}->can('AUTOLOAD') + and $callbacks->{'ContentHandler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'ContentHandler'}->set_document_locator(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'ContentHandler'}; + $self->{Methods}->{'set_document_locator'} = sub { $handler->set_document_locator(@_) }; + } + return $res; + } + elsif (defined $callbacks->{'DocumentHandler'} + and $callbacks->{'DocumentHandler'}->can('AUTOLOAD') + and $callbacks->{'DocumentHandler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'DocumentHandler'}->set_document_locator(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'DocumentHandler'}; + $self->{Methods}->{'set_document_locator'} = sub { $handler->set_document_locator(@_) }; + } + return $res; + } + elsif (defined $callbacks->{'Handler'} + and $callbacks->{'Handler'}->can('AUTOLOAD') + and $callbacks->{'Handler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'Handler'}->set_document_locator(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'set_document_locator'} = sub { $handler->set_document_locator(@_) }; + } + return $res; + } + else { + $self->{Methods}->{'set_document_locator'} = sub { }; + } + } + +} + +sub attlist_decl { + my $self = shift; + if (defined $self->{Methods}->{'attlist_decl'}) { + $self->{Methods}->{'attlist_decl'}->(@_); + } + else { + my $method; + my $callbacks; + if (exists $self->{ParseOptions}) { + $callbacks = $self->{ParseOptions}; + } + else { + $callbacks = $self; + } + if (0) { # dummy to make elsif's below compile + } + elsif (defined $callbacks->{'DTDHandler'} and $method = $callbacks->{'DTDHandler'}->can('attlist_decl') ) { + my $handler = $callbacks->{'DTDHandler'}; + $self->{Methods}->{'attlist_decl'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'Handler'} and $method = $callbacks->{'Handler'}->can('attlist_decl') ) { + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'attlist_decl'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'DTDHandler'} + and $callbacks->{'DTDHandler'}->can('AUTOLOAD') + and $callbacks->{'DTDHandler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'DTDHandler'}->attlist_decl(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'DTDHandler'}; + $self->{Methods}->{'attlist_decl'} = sub { $handler->attlist_decl(@_) }; + } + return $res; + } + elsif (defined $callbacks->{'Handler'} + and $callbacks->{'Handler'}->can('AUTOLOAD') + and $callbacks->{'Handler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'Handler'}->attlist_decl(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'attlist_decl'} = sub { $handler->attlist_decl(@_) }; + } + return $res; + } + else { + $self->{Methods}->{'attlist_decl'} = sub { }; + } + } + +} + +sub start_dtd { + my $self = shift; + if (defined $self->{Methods}->{'start_dtd'}) { + $self->{Methods}->{'start_dtd'}->(@_); + } + else { + my $method; + my $callbacks; + if (exists $self->{ParseOptions}) { + $callbacks = $self->{ParseOptions}; + } + else { + $callbacks = $self; + } + if (0) { # dummy to make elsif's below compile + } + elsif (defined $callbacks->{'LexicalHandler'} and $method = $callbacks->{'LexicalHandler'}->can('start_dtd') ) { + my $handler = $callbacks->{'LexicalHandler'}; + $self->{Methods}->{'start_dtd'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'Handler'} and $method = $callbacks->{'Handler'}->can('start_dtd') ) { + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'start_dtd'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'LexicalHandler'} + and $callbacks->{'LexicalHandler'}->can('AUTOLOAD') + and $callbacks->{'LexicalHandler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'LexicalHandler'}->start_dtd(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'LexicalHandler'}; + $self->{Methods}->{'start_dtd'} = sub { $handler->start_dtd(@_) }; + } + return $res; + } + elsif (defined $callbacks->{'Handler'} + and $callbacks->{'Handler'}->can('AUTOLOAD') + and $callbacks->{'Handler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'Handler'}->start_dtd(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'start_dtd'} = sub { $handler->start_dtd(@_) }; + } + return $res; + } + else { + $self->{Methods}->{'start_dtd'} = sub { }; + } + } + +} + +sub resolve_entity { + my $self = shift; + if (defined $self->{Methods}->{'resolve_entity'}) { + $self->{Methods}->{'resolve_entity'}->(@_); + } + else { + my $method; + my $callbacks; + if (exists $self->{ParseOptions}) { + $callbacks = $self->{ParseOptions}; + } + else { + $callbacks = $self; + } + if (0) { # dummy to make elsif's below compile + } + elsif (defined $callbacks->{'EntityResolver'} and $method = $callbacks->{'EntityResolver'}->can('resolve_entity') ) { + my $handler = $callbacks->{'EntityResolver'}; + $self->{Methods}->{'resolve_entity'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'Handler'} and $method = $callbacks->{'Handler'}->can('resolve_entity') ) { + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'resolve_entity'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'EntityResolver'} + and $callbacks->{'EntityResolver'}->can('AUTOLOAD') + and $callbacks->{'EntityResolver'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'EntityResolver'}->resolve_entity(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'EntityResolver'}; + $self->{Methods}->{'resolve_entity'} = sub { $handler->resolve_entity(@_) }; + } + return $res; + } + elsif (defined $callbacks->{'Handler'} + and $callbacks->{'Handler'}->can('AUTOLOAD') + and $callbacks->{'Handler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'Handler'}->resolve_entity(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'resolve_entity'} = sub { $handler->resolve_entity(@_) }; + } + return $res; + } + else { + $self->{Methods}->{'resolve_entity'} = sub { }; + } + } + +} + +sub entity_reference { + my $self = shift; + if (defined $self->{Methods}->{'entity_reference'}) { + $self->{Methods}->{'entity_reference'}->(@_); + } + else { + my $method; + my $callbacks; + if (exists $self->{ParseOptions}) { + $callbacks = $self->{ParseOptions}; + } + else { + $callbacks = $self; + } + if (0) { # dummy to make elsif's below compile + } + elsif (defined $callbacks->{'DocumentHandler'} and $method = $callbacks->{'DocumentHandler'}->can('entity_reference') ) { + my $handler = $callbacks->{'DocumentHandler'}; + $self->{Methods}->{'entity_reference'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'Handler'} and $method = $callbacks->{'Handler'}->can('entity_reference') ) { + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'entity_reference'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'DocumentHandler'} + and $callbacks->{'DocumentHandler'}->can('AUTOLOAD') + and $callbacks->{'DocumentHandler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'DocumentHandler'}->entity_reference(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'DocumentHandler'}; + $self->{Methods}->{'entity_reference'} = sub { $handler->entity_reference(@_) }; + } + return $res; + } + elsif (defined $callbacks->{'Handler'} + and $callbacks->{'Handler'}->can('AUTOLOAD') + and $callbacks->{'Handler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'Handler'}->entity_reference(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'entity_reference'} = sub { $handler->entity_reference(@_) }; + } + return $res; + } + else { + $self->{Methods}->{'entity_reference'} = sub { }; + } + } + +} + +sub element_decl { + my $self = shift; + if (defined $self->{Methods}->{'element_decl'}) { + $self->{Methods}->{'element_decl'}->(@_); + } + else { + my $method; + my $callbacks; + if (exists $self->{ParseOptions}) { + $callbacks = $self->{ParseOptions}; + } + else { + $callbacks = $self; + } + if (0) { # dummy to make elsif's below compile + } + elsif (defined $callbacks->{'DeclHandler'} and $method = $callbacks->{'DeclHandler'}->can('element_decl') ) { + my $handler = $callbacks->{'DeclHandler'}; + $self->{Methods}->{'element_decl'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'Handler'} and $method = $callbacks->{'Handler'}->can('element_decl') ) { + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'element_decl'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'DeclHandler'} + and $callbacks->{'DeclHandler'}->can('AUTOLOAD') + and $callbacks->{'DeclHandler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'DeclHandler'}->element_decl(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'DeclHandler'}; + $self->{Methods}->{'element_decl'} = sub { $handler->element_decl(@_) }; + } + return $res; + } + elsif (defined $callbacks->{'Handler'} + and $callbacks->{'Handler'}->can('AUTOLOAD') + and $callbacks->{'Handler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'Handler'}->element_decl(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'element_decl'} = sub { $handler->element_decl(@_) }; + } + return $res; + } + else { + $self->{Methods}->{'element_decl'} = sub { }; + } + } + +} + +sub notation_decl { + my $self = shift; + if (defined $self->{Methods}->{'notation_decl'}) { + $self->{Methods}->{'notation_decl'}->(@_); + } + else { + my $method; + my $callbacks; + if (exists $self->{ParseOptions}) { + $callbacks = $self->{ParseOptions}; + } + else { + $callbacks = $self; + } + if (0) { # dummy to make elsif's below compile + } + elsif (defined $callbacks->{'DTDHandler'} and $method = $callbacks->{'DTDHandler'}->can('notation_decl') ) { + my $handler = $callbacks->{'DTDHandler'}; + $self->{Methods}->{'notation_decl'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'Handler'} and $method = $callbacks->{'Handler'}->can('notation_decl') ) { + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'notation_decl'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'DTDHandler'} + and $callbacks->{'DTDHandler'}->can('AUTOLOAD') + and $callbacks->{'DTDHandler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'DTDHandler'}->notation_decl(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'DTDHandler'}; + $self->{Methods}->{'notation_decl'} = sub { $handler->notation_decl(@_) }; + } + return $res; + } + elsif (defined $callbacks->{'Handler'} + and $callbacks->{'Handler'}->can('AUTOLOAD') + and $callbacks->{'Handler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'Handler'}->notation_decl(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'notation_decl'} = sub { $handler->notation_decl(@_) }; + } + return $res; + } + else { + $self->{Methods}->{'notation_decl'} = sub { }; + } + } + +} + +sub skipped_entity { + my $self = shift; + if (defined $self->{Methods}->{'skipped_entity'}) { + $self->{Methods}->{'skipped_entity'}->(@_); + } + else { + my $method; + my $callbacks; + if (exists $self->{ParseOptions}) { + $callbacks = $self->{ParseOptions}; + } + else { + $callbacks = $self; + } + if (0) { # dummy to make elsif's below compile + } + elsif (defined $callbacks->{'ContentHandler'} and $method = $callbacks->{'ContentHandler'}->can('skipped_entity') ) { + my $handler = $callbacks->{'ContentHandler'}; + $self->{Methods}->{'skipped_entity'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'Handler'} and $method = $callbacks->{'Handler'}->can('skipped_entity') ) { + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'skipped_entity'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'ContentHandler'} + and $callbacks->{'ContentHandler'}->can('AUTOLOAD') + and $callbacks->{'ContentHandler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'ContentHandler'}->skipped_entity(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'ContentHandler'}; + $self->{Methods}->{'skipped_entity'} = sub { $handler->skipped_entity(@_) }; + } + return $res; + } + elsif (defined $callbacks->{'Handler'} + and $callbacks->{'Handler'}->can('AUTOLOAD') + and $callbacks->{'Handler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'Handler'}->skipped_entity(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'skipped_entity'} = sub { $handler->skipped_entity(@_) }; + } + return $res; + } + else { + $self->{Methods}->{'skipped_entity'} = sub { }; + } + } + +} + +sub end_element { + my $self = shift; + if (defined $self->{Methods}->{'end_element'}) { + $self->{Methods}->{'end_element'}->(@_); + } + else { + my $method; + my $callbacks; + if (exists $self->{ParseOptions}) { + $callbacks = $self->{ParseOptions}; + } + else { + $callbacks = $self; + } + if (0) { # dummy to make elsif's below compile + } + elsif (defined $callbacks->{'ContentHandler'} and $method = $callbacks->{'ContentHandler'}->can('end_element') ) { + my $handler = $callbacks->{'ContentHandler'}; + $self->{Methods}->{'end_element'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'DocumentHandler'} and $method = $callbacks->{'DocumentHandler'}->can('end_element') ) { + my $handler = $callbacks->{'DocumentHandler'}; + $self->{Methods}->{'end_element'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'Handler'} and $method = $callbacks->{'Handler'}->can('end_element') ) { + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'end_element'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'ContentHandler'} + and $callbacks->{'ContentHandler'}->can('AUTOLOAD') + and $callbacks->{'ContentHandler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'ContentHandler'}->end_element(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'ContentHandler'}; + $self->{Methods}->{'end_element'} = sub { $handler->end_element(@_) }; + } + return $res; + } + elsif (defined $callbacks->{'DocumentHandler'} + and $callbacks->{'DocumentHandler'}->can('AUTOLOAD') + and $callbacks->{'DocumentHandler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'DocumentHandler'}->end_element(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'DocumentHandler'}; + $self->{Methods}->{'end_element'} = sub { $handler->end_element(@_) }; + } + return $res; + } + elsif (defined $callbacks->{'Handler'} + and $callbacks->{'Handler'}->can('AUTOLOAD') + and $callbacks->{'Handler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'Handler'}->end_element(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'end_element'} = sub { $handler->end_element(@_) }; + } + return $res; + } + else { + $self->{Methods}->{'end_element'} = sub { }; + } + } + +} + +sub doctype_decl { + my $self = shift; + if (defined $self->{Methods}->{'doctype_decl'}) { + $self->{Methods}->{'doctype_decl'}->(@_); + } + else { + my $method; + my $callbacks; + if (exists $self->{ParseOptions}) { + $callbacks = $self->{ParseOptions}; + } + else { + $callbacks = $self; + } + if (0) { # dummy to make elsif's below compile + } + elsif (defined $callbacks->{'DTDHandler'} and $method = $callbacks->{'DTDHandler'}->can('doctype_decl') ) { + my $handler = $callbacks->{'DTDHandler'}; + $self->{Methods}->{'doctype_decl'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'Handler'} and $method = $callbacks->{'Handler'}->can('doctype_decl') ) { + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'doctype_decl'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'DTDHandler'} + and $callbacks->{'DTDHandler'}->can('AUTOLOAD') + and $callbacks->{'DTDHandler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'DTDHandler'}->doctype_decl(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'DTDHandler'}; + $self->{Methods}->{'doctype_decl'} = sub { $handler->doctype_decl(@_) }; + } + return $res; + } + elsif (defined $callbacks->{'Handler'} + and $callbacks->{'Handler'}->can('AUTOLOAD') + and $callbacks->{'Handler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'Handler'}->doctype_decl(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'doctype_decl'} = sub { $handler->doctype_decl(@_) }; + } + return $res; + } + else { + $self->{Methods}->{'doctype_decl'} = sub { }; + } + } + +} + +sub comment { + my $self = shift; + if (defined $self->{Methods}->{'comment'}) { + $self->{Methods}->{'comment'}->(@_); + } + else { + my $method; + my $callbacks; + if (exists $self->{ParseOptions}) { + $callbacks = $self->{ParseOptions}; + } + else { + $callbacks = $self; + } + if (0) { # dummy to make elsif's below compile + } + elsif (defined $callbacks->{'DocumentHandler'} and $method = $callbacks->{'DocumentHandler'}->can('comment') ) { + my $handler = $callbacks->{'DocumentHandler'}; + $self->{Methods}->{'comment'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'LexicalHandler'} and $method = $callbacks->{'LexicalHandler'}->can('comment') ) { + my $handler = $callbacks->{'LexicalHandler'}; + $self->{Methods}->{'comment'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'Handler'} and $method = $callbacks->{'Handler'}->can('comment') ) { + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'comment'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'DocumentHandler'} + and $callbacks->{'DocumentHandler'}->can('AUTOLOAD') + and $callbacks->{'DocumentHandler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'DocumentHandler'}->comment(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'DocumentHandler'}; + $self->{Methods}->{'comment'} = sub { $handler->comment(@_) }; + } + return $res; + } + elsif (defined $callbacks->{'LexicalHandler'} + and $callbacks->{'LexicalHandler'}->can('AUTOLOAD') + and $callbacks->{'LexicalHandler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'LexicalHandler'}->comment(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'LexicalHandler'}; + $self->{Methods}->{'comment'} = sub { $handler->comment(@_) }; + } + return $res; + } + elsif (defined $callbacks->{'Handler'} + and $callbacks->{'Handler'}->can('AUTOLOAD') + and $callbacks->{'Handler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'Handler'}->comment(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'comment'} = sub { $handler->comment(@_) }; + } + return $res; + } + else { + $self->{Methods}->{'comment'} = sub { }; + } + } + +} + +sub end_entity { + my $self = shift; + if (defined $self->{Methods}->{'end_entity'}) { + $self->{Methods}->{'end_entity'}->(@_); + } + else { + my $method; + my $callbacks; + if (exists $self->{ParseOptions}) { + $callbacks = $self->{ParseOptions}; + } + else { + $callbacks = $self; + } + if (0) { # dummy to make elsif's below compile + } + elsif (defined $callbacks->{'LexicalHandler'} and $method = $callbacks->{'LexicalHandler'}->can('end_entity') ) { + my $handler = $callbacks->{'LexicalHandler'}; + $self->{Methods}->{'end_entity'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'Handler'} and $method = $callbacks->{'Handler'}->can('end_entity') ) { + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'end_entity'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'LexicalHandler'} + and $callbacks->{'LexicalHandler'}->can('AUTOLOAD') + and $callbacks->{'LexicalHandler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'LexicalHandler'}->end_entity(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'LexicalHandler'}; + $self->{Methods}->{'end_entity'} = sub { $handler->end_entity(@_) }; + } + return $res; + } + elsif (defined $callbacks->{'Handler'} + and $callbacks->{'Handler'}->can('AUTOLOAD') + and $callbacks->{'Handler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'Handler'}->end_entity(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'end_entity'} = sub { $handler->end_entity(@_) }; + } + return $res; + } + else { + $self->{Methods}->{'end_entity'} = sub { }; + } + } + +} + +sub warning { + my $self = shift; + if (defined $self->{Methods}->{'warning'}) { + $self->{Methods}->{'warning'}->(@_); + } + else { + my $method; + my $callbacks; + if (exists $self->{ParseOptions}) { + $callbacks = $self->{ParseOptions}; + } + else { + $callbacks = $self; + } + if (0) { # dummy to make elsif's below compile + } + elsif (defined $callbacks->{'ErrorHandler'} and $method = $callbacks->{'ErrorHandler'}->can('warning') ) { + my $handler = $callbacks->{'ErrorHandler'}; + $self->{Methods}->{'warning'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'Handler'} and $method = $callbacks->{'Handler'}->can('warning') ) { + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'warning'} = sub { $method->($handler, @_) }; + return $method->($handler, @_); + } + elsif (defined $callbacks->{'ErrorHandler'} + and $callbacks->{'ErrorHandler'}->can('AUTOLOAD') + and $callbacks->{'ErrorHandler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'ErrorHandler'}->warning(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'ErrorHandler'}; + $self->{Methods}->{'warning'} = sub { $handler->warning(@_) }; + } + return $res; + } + elsif (defined $callbacks->{'Handler'} + and $callbacks->{'Handler'}->can('AUTOLOAD') + and $callbacks->{'Handler'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my $res = eval { $callbacks->{'Handler'}->warning(@_) }; + if ($@) { + die $@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my $handler = $callbacks->{'Handler'}; + $self->{Methods}->{'warning'} = sub { $handler->warning(@_) }; + } + return $res; + } + else { + $self->{Methods}->{'warning'} = sub { }; + } + } + +} + +#-------------------------------------------------------------------# +# Class->new(%options) +#-------------------------------------------------------------------# +sub new { + my $proto = shift; + my $class = ref($proto) || $proto; + my $options = ($#_ == 0) ? shift : { @_ }; + + unless ( defined( $options->{Handler} ) or + defined( $options->{ContentHandler} ) or + defined( $options->{DTDHandler} ) or + defined( $options->{DocumentHandler} ) or + defined( $options->{LexicalHandler} ) or + defined( $options->{ErrorHandler} ) or + defined( $options->{DeclHandler} ) ) { + + $options->{Handler} = XML::SAX::Base::NoHandler->new; + } + + my $self = bless $options, $class; + # turn NS processing on by default + $self->set_feature('http://xml.org/sax/features/namespaces', 1); + return $self; +} +#-------------------------------------------------------------------# + +#-------------------------------------------------------------------# +# $p->parse(%options) +#-------------------------------------------------------------------# +sub parse { + my $self = shift; + my $parse_options = $self->get_options(@_); + local $self->{ParseOptions} = $parse_options; + if ($self->{Parent}) { # calling parse on a filter for some reason + return $self->{Parent}->parse($parse_options); + } + else { + my $method; + if (defined $parse_options->{Source}{CharacterStream} and $method = $self->can('_parse_characterstream')) { + warn("parse charstream???\n"); + return $method->($self, $parse_options->{Source}{CharacterStream}); + } + elsif (defined $parse_options->{Source}{ByteStream} and $method = $self->can('_parse_bytestream')) { + return $method->($self, $parse_options->{Source}{ByteStream}); + } + elsif (defined $parse_options->{Source}{String} and $method = $self->can('_parse_string')) { + return $method->($self, $parse_options->{Source}{String}); + } + elsif (defined $parse_options->{Source}{SystemId} and $method = $self->can('_parse_systemid')) { + return $method->($self, $parse_options->{Source}{SystemId}); + } + else { + die "No _parse_* routine defined on this driver (If it is a filter, remember to set the Parent property. If you call the parse() method, make sure to set a Source. You may want to call parse_uri, parse_string or parse_file instead.) [$self]"; + } + } +} +#-------------------------------------------------------------------# + +#-------------------------------------------------------------------# +# $p->parse_file(%options) +#-------------------------------------------------------------------# +sub parse_file { + my $self = shift; + my $file = shift; + return $self->parse_uri($file, @_) if ref(\$file) eq 'SCALAR'; + my $parse_options = $self->get_options(@_); + $parse_options->{Source}{ByteStream} = $file; + return $self->parse($parse_options); +} +#-------------------------------------------------------------------# + +#-------------------------------------------------------------------# +# $p->parse_uri(%options) +#-------------------------------------------------------------------# +sub parse_uri { + my $self = shift; + my $file = shift; + my $parse_options = $self->get_options(@_); + $parse_options->{Source}{SystemId} = $file; + return $self->parse($parse_options); +} +#-------------------------------------------------------------------# + +#-------------------------------------------------------------------# +# $p->parse_string(%options) +#-------------------------------------------------------------------# +sub parse_string { + my $self = shift; + my $string = shift; + my $parse_options = $self->get_options(@_); + $parse_options->{Source}{String} = $string; + return $self->parse($parse_options); +} +#-------------------------------------------------------------------# + +#-------------------------------------------------------------------# +# get_options +#-------------------------------------------------------------------# +sub get_options { + my $self = shift; + + if (@_ == 1) { + return { %$self, %{$_[0]} }; + } else { + return { %$self, @_ }; + } +} +#-------------------------------------------------------------------# + +#-------------------------------------------------------------------# +# get_features +#-------------------------------------------------------------------# +sub get_features { + return ( + 'http://xml.org/sax/features/external-general-entities' => undef, + 'http://xml.org/sax/features/external-parameter-entities' => undef, + 'http://xml.org/sax/features/is-standalone' => undef, + 'http://xml.org/sax/features/lexical-handler' => undef, + 'http://xml.org/sax/features/parameter-entities' => undef, + 'http://xml.org/sax/features/namespaces' => 1, + 'http://xml.org/sax/features/namespace-prefixes' => 0, + 'http://xml.org/sax/features/string-interning' => undef, + 'http://xml.org/sax/features/use-attributes2' => undef, + 'http://xml.org/sax/features/use-locator2' => undef, + 'http://xml.org/sax/features/validation' => undef, + + 'http://xml.org/sax/properties/dom-node' => undef, + 'http://xml.org/sax/properties/xml-string' => undef, + ); +} +#-------------------------------------------------------------------# + +#-------------------------------------------------------------------# +# get_feature +#-------------------------------------------------------------------# +sub get_feature { + my $self = shift; + my $feat = shift; + + # check %FEATURES to see if it's there, and return it if so + # throw XML::SAX::Exception::NotRecognized if it's not there + # throw XML::SAX::Exception::NotSupported if it's there but we + # don't support it + + my %features = $self->get_features(); + if (exists $features{$feat}) { + my %supported = map { $_ => 1 } $self->supported_features(); + if ($supported{$feat}) { + return $self->{__PACKAGE__ . "::Features"}{$feat}; + } + throw XML::SAX::Exception::NotSupported( + Message => "The feature '$feat' is not supported by " . ref($self), + Exception => undef, + ); + } + throw XML::SAX::Exception::NotRecognized( + Message => "The feature '$feat' is not recognized by " . ref($self), + Exception => undef, + ); +} +#-------------------------------------------------------------------# + +#-------------------------------------------------------------------# +# set_feature +#-------------------------------------------------------------------# +sub set_feature { + my $self = shift; + my $feat = shift; + my $value = shift; + # check %FEATURES to see if it's there, and set it if so + # throw XML::SAX::Exception::NotRecognized if it's not there + # throw XML::SAX::Exception::NotSupported if it's there but we + # don't support it + + my %features = $self->get_features(); + if (exists $features{$feat}) { + my %supported = map { $_ => 1 } $self->supported_features(); + if ($supported{$feat}) { + return $self->{__PACKAGE__ . "::Features"}{$feat} = $value; + } + throw XML::SAX::Exception::NotSupported( + Message => "The feature '$feat' is not supported by " . ref($self), + Exception => undef, + ); + } + throw XML::SAX::Exception::NotRecognized( + Message => "The feature '$feat' is not recognized by " . ref($self), + Exception => undef, + ); +} +#-------------------------------------------------------------------# + +#-------------------------------------------------------------------# +# get_handler and friends +#-------------------------------------------------------------------# +sub get_handler { + my $self = shift; + my $handler_type = shift; + $handler_type ||= 'Handler'; + return defined( $self->{$handler_type} ) ? $self->{$handler_type} : undef; +} + +sub get_document_handler { + my $self = shift; + return $self->get_handler('DocumentHandler', @_); +} + +sub get_content_handler { + my $self = shift; + return $self->get_handler('ContentHandler', @_); +} + +sub get_dtd_handler { + my $self = shift; + return $self->get_handler('DTDHandler', @_); +} + +sub get_lexical_handler { + my $self = shift; + return $self->get_handler('LexicalHandler', @_); +} + +sub get_decl_handler { + my $self = shift; + return $self->get_handler('DeclHandler', @_); +} + +sub get_error_handler { + my $self = shift; + return $self->get_handler('ErrorHandler', @_); +} + +sub get_entity_resolver { + my $self = shift; + return $self->get_handler('EntityResolver', @_); +} +#-------------------------------------------------------------------# + +#-------------------------------------------------------------------# +# set_handler and friends +#-------------------------------------------------------------------# +sub set_handler { + my $self = shift; + my ($new_handler, $handler_type) = reverse @_; + $handler_type ||= 'Handler'; + $self->{Methods} = {} if $self->{Methods}; + $self->{$handler_type} = $new_handler; + $self->{ParseOptions}->{$handler_type} = $new_handler; + return 1; +} + +sub set_document_handler { + my $self = shift; + return $self->set_handler('DocumentHandler', @_); +} + +sub set_content_handler { + my $self = shift; + return $self->set_handler('ContentHandler', @_); +} +sub set_dtd_handler { + my $self = shift; + return $self->set_handler('DTDHandler', @_); +} +sub set_lexical_handler { + my $self = shift; + return $self->set_handler('LexicalHandler', @_); +} +sub set_decl_handler { + my $self = shift; + return $self->set_handler('DeclHandler', @_); +} +sub set_error_handler { + my $self = shift; + return $self->set_handler('ErrorHandler', @_); +} +sub set_entity_resolver { + my $self = shift; + return $self->set_handler('EntityResolver', @_); +} + +#-------------------------------------------------------------------# + +#-------------------------------------------------------------------# +# supported_features +#-------------------------------------------------------------------# +sub supported_features { + my $self = shift; + # Only namespaces are required by all parsers + return ( + 'http://xml.org/sax/features/namespaces', + ); +} +#-------------------------------------------------------------------# + +sub no_op { + # this space intentionally blank +} + + +package XML::SAX::Base::NoHandler; +BEGIN { + $XML::SAX::Base::NoHandler::VERSION = '1.08'; +} + +# we need a fake handler that doesn't implement anything, this +# simplifies the code a lot (though given the recent changes, +# it may be better to do without) +sub new { + #warn "no handler called\n"; + return bless {}; +} + +1; + +__END__ + +=head1 NAME + +XML::SAX::Base - Base class SAX Drivers and Filters + +=head1 SYNOPSIS + + package MyFilter; + use XML::SAX::Base; + @ISA = ('XML::SAX::Base'); + +=head1 DESCRIPTION + +This module has a very simple task - to be a base class for PerlSAX +drivers and filters. It's default behaviour is to pass the input directly +to the output unchanged. It can be useful to use this module as a base class +so you don't have to, for example, implement the characters() callback. + +The main advantages that it provides are easy dispatching of events the right +way (ie it takes care for you of checking that the handler has implemented +that method, or has defined an AUTOLOAD), and the guarantee that filters +will pass along events that they aren't implementing to handlers downstream +that might nevertheless be interested in them. + +=head1 WRITING SAX DRIVERS AND FILTERS + +The Perl Sax API Reference is at L. + +Writing SAX Filters is tremendously easy: all you need to do is +inherit from this module, and define the events you want to handle. A +more detailed explanation can be found at +http://www.xml.com/pub/a/2001/10/10/sax-filters.html. + +Writing Drivers is equally simple. The one thing you need to pay +attention to is B to call events yourself (this applies to Filters +as well). For instance: + + package MyFilter; + use base qw(XML::SAX::Base); + + sub start_element { + my $self = shift; + my $data = shift; + # do something + $self->{Handler}->start_element($data); # BAD + } + +The above example works well as precisely that: an example. But it has +several faults: 1) it doesn't test to see whether the handler defines +start_element. Perhaps it doesn't want to see that event, in which +case you shouldn't throw it (otherwise it'll die). 2) it doesn't check +ContentHandler and then Handler (ie it doesn't look to see that the +user hasn't requested events on a specific handler, and if not on the +default one), 3) if it did check all that, not only would the code be +cumbersome (see this module's source to get an idea) but it would also +probably have to check for a DocumentHandler (in case this were SAX1) +and for AUTOLOADs potentially defined in all these packages. As you can +tell, that would be fairly painful. Instead of going through that, +simply remember to use code similar to the following instead: + + package MyFilter; + use base qw(XML::SAX::Base); + + sub start_element { + my $self = shift; + my $data = shift; + # do something to filter + $self->SUPER::start_element($data); # GOOD (and easy) ! + } + +This way, once you've done your job you hand the ball back to +XML::SAX::Base and it takes care of all those problems for you! + +Note that the above example doesn't apply to filters only, drivers +will benefit from the exact same feature. + +=head1 METHODS + +A number of methods are defined within this class for the purpose of +inheritance. Some probably don't need to be overridden (eg parse_file) +but some clearly should be (eg parse). Options for these methods are +described in the PerlSAX2 specification available from +http://cvs.sourceforge.net/cgi-bin/viewcvs.cgi/~checkout~/perl-xml/libxml-perl/doc/sax-2.0.html?rev=HEAD&content-type=text/html. + +=over 4 + +=item * parse + +The parse method is the main entry point to parsing documents. Internally +the parse method will detect what type of "thing" you are parsing, and +call the appropriate method in your implementation class. Here is the +mapping table of what is in the Source options (see the Perl SAX 2.0 +specification for the meaning of these values): + + Source Contains parse() calls + =============== ============= + CharacterStream (*) _parse_characterstream($stream, $options) + ByteStream _parse_bytestream($stream, $options) + String _parse_string($string, $options) + SystemId _parse_systemid($string, $options) + +However note that these methods may not be sensible if your driver class +is not for parsing XML. An example might be a DBI driver that generates +XML/SAX from a database table. If that is the case, you likely want to +write your own parse() method. + +Also note that the Source may contain both a PublicId entry, and an +Encoding entry. To get at these, examine $options->{Source} as passed +to your method. + +(*) A CharacterStream is a filehandle that does not need any encoding +translation done on it. This is implemented as a regular filehandle +and only works under Perl 5.7.2 or higher using PerlIO. To get a single +character, or number of characters from it, use the perl core read() +function. To get a single byte from it (or number of bytes), you can +use sysread(). The encoding of the stream should be in the Encoding +entry for the Source. + +=item * parse_file, parse_uri, parse_string + +These are all convenience variations on parse(), and in fact simply +set up the options before calling it. You probably don't need to +override these. + +=item * get_options + +This is a convenience method to get options in SAX2 style, or more +generically either as hashes or as hashrefs (it returns a hashref). +You will probably want to use this method in your own implementations +of parse() and of new(). + +=item * get_feature, set_feature + +These simply get and set features, and throw the +appropriate exceptions defined in the specification if need be. + +If your subclass defines features not defined in this one, +then you should override these methods in such a way that they check for +your features first, and then call the base class's methods +for features not defined by your class. An example would be: + + sub get_feature { + my $self = shift; + my $feat = shift; + if (exists $MY_FEATURES{$feat}) { + # handle the feature in various ways + } + else { + return $self->SUPER::get_feature($feat); + } + } + +Currently this part is unimplemented. + + +=item * set_handler + +This method takes a handler type (Handler, ContentHandler, etc.) and a +handler object as arguments, and changes the current handler for that +handler type, while taking care of resetting the internal state that +needs to be reset. This allows one to change a handler during parse +without running into problems (changing it on the parser object +directly will most likely cause trouble). + +=item * set_document_handler, set_content_handler, set_dtd_handler, set_lexical_handler, set_decl_handler, set_error_handler, set_entity_resolver + +These are just simple wrappers around the former method, and take a +handler object as their argument. Internally they simply call +set_handler with the correct arguments. + +=item * get_handler + +The inverse of set_handler, this method takes a an optional string containing a handler type (DTDHandler, +ContentHandler, etc. 'Handler' is used if no type is passed). It returns a reference to the object that implements +that that class, or undef if that handler type is not set for the current driver/filter. + +=item * get_document_handler, get_content_handler, get_dtd_handler, get_lexical_handler, get_decl_handler, +get_error_handler, get_entity_resolver + +These are just simple wrappers around the get_handler() method, and take no arguments. Internally +they simply call get_handler with the correct handler type name. + +=back + +It would be rather useless to describe all the methods that this +module implements here. They are all the methods supported in SAX1 and +SAX2. In case your memory is a little short, here is a list. The +apparent duplicates are there so that both versions of SAX can be +supported. + +=over 4 + +=item * start_document + +=item * end_document + +=item * start_element + +=item * start_document + +=item * end_document + +=item * start_element + +=item * end_element + +=item * characters + +=item * processing_instruction + +=item * ignorable_whitespace + +=item * set_document_locator + +=item * start_prefix_mapping + +=item * end_prefix_mapping + +=item * skipped_entity + +=item * start_cdata + +=item * end_cdata + +=item * comment + +=item * entity_reference + +=item * notation_decl + +=item * unparsed_entity_decl + +=item * element_decl + +=item * attlist_decl + +=item * doctype_decl + +=item * xml_decl + +=item * entity_decl + +=item * attribute_decl + +=item * internal_entity_decl + +=item * external_entity_decl + +=item * resolve_entity + +=item * start_dtd + +=item * end_dtd + +=item * start_entity + +=item * end_entity + +=item * warning + +=item * error + +=item * fatal_error + +=back + +=head1 TODO + + - more tests + - conform to the "SAX Filters" and "Java and DOM compatibility" + sections of the SAX2 document. + +=head1 AUTHOR + +Kip Hampton (khampton@totalcinema.com) did most of the work, after porting +it from XML::Filter::Base. + +Robin Berjon (robin@knowscape.com) pitched in with patches to make it +usable as a base for drivers as well as filters, along with other patches. + +Matt Sergeant (matt@sergeant.org) wrote the original XML::Filter::Base, +and patched a few things here and there, and imported it into +the XML::SAX distribution. + +=head1 SEE ALSO + +L + +=cut + diff --git a/tests/lib/XML/SAX/BuildSAXBase.pl b/tests/lib/XML/SAX/BuildSAXBase.pl new file mode 100644 index 0000000000..a3fa2a67e2 --- /dev/null +++ b/tests/lib/XML/SAX/BuildSAXBase.pl @@ -0,0 +1,847 @@ +#!/usr/bin/perl +# +# This file is used to generate lib/XML/SAX/Base.pm. There is a pre-generated +# Base.pm file included in the distribution so you don't need to run this +# script unless you are attempting to modify the code. +# +# The code in this file was adapted from the Makefile.PL when XML::SAX::Base +# was split back out into its own distribution. +# +# You can manually run this file: +# +# perl ./BuildSAXBase.pl +# +# or better yet it will be invoked by automatically Dist::Zilla when building +# a release from the git repository. +# +# dzil build +# + +package SAX::Base::Builder; + +use strict; +use warnings; + +use File::Spec; + +write_xml_sax_base() unless caller(); + +sub build_xml_sax_base { + my $code = <<'EOHEADER'; +package XML::SAX::Base; + +# version 0.10 - Kip Hampton +# version 0.13 - Robin Berjon +# version 0.15 - Kip Hampton +# version 0.17 - Kip Hampton +# version 0.19 - Kip Hampton +# version 0.21 - Kip Hampton +# version 0.22 - Robin Berjon +# version 0.23 - Matt Sergeant +# version 0.24 - Robin Berjon +# version 0.25 - Kip Hampton +# version 1.00 - Kip Hampton +# version 1.01 - Kip Hampton +# version 1.02 - Robin Berjon +# version 1.03 - Matt Sergeant +# version 1.04 - Kip Hampton +# version 1.05 - Grant McLean +# version 1.06 - Grant McLean +# version 1.07 - Grant McLean +# version 1.08 - Grant McLean + +#-----------------------------------------------------# +# STOP!!!!! +# +# This file is generated by the 'BuildSAXBase.pl' file +# that ships with the XML::SAX::Base distribution. +# If you need to make changes, patch that file NOT +# XML/SAX/Base.pm Better yet, fork the git repository +# commit your changes and send a pull request: +# https://github.com/grantm/XML-SAX-Base +#-----------------------------------------------------# + +use strict; + +use XML::SAX::Exception qw(); + +EOHEADER + + my %EVENT_SPEC = ( + start_document => [qw(ContentHandler DocumentHandler Handler)], + end_document => [qw(ContentHandler DocumentHandler Handler)], + start_element => [qw(ContentHandler DocumentHandler Handler)], + end_element => [qw(ContentHandler DocumentHandler Handler)], + characters => [qw(ContentHandler DocumentHandler Handler)], + processing_instruction => [qw(ContentHandler DocumentHandler Handler)], + ignorable_whitespace => [qw(ContentHandler DocumentHandler Handler)], + set_document_locator => [qw(ContentHandler DocumentHandler Handler)], + start_prefix_mapping => [qw(ContentHandler Handler)], + end_prefix_mapping => [qw(ContentHandler Handler)], + skipped_entity => [qw(ContentHandler Handler)], + start_cdata => [qw(DocumentHandler LexicalHandler Handler)], + end_cdata => [qw(DocumentHandler LexicalHandler Handler)], + comment => [qw(DocumentHandler LexicalHandler Handler)], + entity_reference => [qw(DocumentHandler Handler)], + notation_decl => [qw(DTDHandler Handler)], + unparsed_entity_decl => [qw(DTDHandler Handler)], + element_decl => [qw(DeclHandler Handler)], + attlist_decl => [qw(DTDHandler Handler)], + doctype_decl => [qw(DTDHandler Handler)], + xml_decl => [qw(DTDHandler Handler)], + entity_decl => [qw(DTDHandler Handler)], + attribute_decl => [qw(DeclHandler Handler)], + internal_entity_decl => [qw(DeclHandler Handler)], + external_entity_decl => [qw(DeclHandler Handler)], + resolve_entity => [qw(EntityResolver Handler)], + start_dtd => [qw(LexicalHandler Handler)], + end_dtd => [qw(LexicalHandler Handler)], + start_entity => [qw(LexicalHandler Handler)], + end_entity => [qw(LexicalHandler Handler)], + warning => [qw(ErrorHandler Handler)], + error => [qw(ErrorHandler Handler)], + fatal_error => [qw(ErrorHandler Handler)], + ); + + for my $ev (keys %EVENT_SPEC) { + $code .= <<" EOTOPCODE"; +sub $ev { + my \$self = shift; + if (defined \$self->{Methods}->{'$ev'}) { + \$self->{Methods}->{'$ev'}->(\@_); + } + else { + my \$method; + my \$callbacks; + if (exists \$self->{ParseOptions}) { + \$callbacks = \$self->{ParseOptions}; + } + else { + \$callbacks = \$self; + } + if (0) { # dummy to make elsif's below compile + } + EOTOPCODE + + my ($can_string, $aload_string); + for my $h (@{$EVENT_SPEC{$ev}}) { + $can_string .= <<" EOCANBLOCK"; + elsif (defined \$callbacks->{'$h'} and \$method = \$callbacks->{'$h'}->can('$ev') ) { + my \$handler = \$callbacks->{'$h'}; + \$self->{Methods}->{'$ev'} = sub { \$method->(\$handler, \@_) }; + return \$method->(\$handler, \@_); + } + EOCANBLOCK + $aload_string .= <<" EOALOADBLOCK"; + elsif (defined \$callbacks->{'$h'} + and \$callbacks->{'$h'}->can('AUTOLOAD') + and \$callbacks->{'$h'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '') + ) + { + my \$res = eval { \$callbacks->{'$h'}->$ev(\@_) }; + if (\$@) { + die \$@; + } + else { + # I think there's a buggette here... + # if the first call throws an exception, we don't set it up right. + # Not fatal, but we might want to address it. + my \$handler = \$callbacks->{'$h'}; + \$self->{Methods}->{'$ev'} = sub { \$handler->$ev(\@_) }; + } + return \$res; + } + EOALOADBLOCK + } + + $code .= $can_string . $aload_string; + + $code .= <<" EOFALLTHROUGH"; + else { + \$self->{Methods}->{'$ev'} = sub { }; + } + } + EOFALLTHROUGH + + $code .= "\n}\n\n"; + } + + $code .= <<'BODY'; +#-------------------------------------------------------------------# +# Class->new(%options) +#-------------------------------------------------------------------# +sub new { + my $proto = shift; + my $class = ref($proto) || $proto; + my $options = ($#_ == 0) ? shift : { @_ }; + + unless ( defined( $options->{Handler} ) or + defined( $options->{ContentHandler} ) or + defined( $options->{DTDHandler} ) or + defined( $options->{DocumentHandler} ) or + defined( $options->{LexicalHandler} ) or + defined( $options->{ErrorHandler} ) or + defined( $options->{DeclHandler} ) ) { + + $options->{Handler} = XML::SAX::Base::NoHandler->new; + } + + my $self = bless $options, $class; + # turn NS processing on by default + $self->set_feature('http://xml.org/sax/features/namespaces', 1); + return $self; +} +#-------------------------------------------------------------------# + +#-------------------------------------------------------------------# +# $p->parse(%options) +#-------------------------------------------------------------------# +sub parse { + my $self = shift; + my $parse_options = $self->get_options(@_); + local $self->{ParseOptions} = $parse_options; + if ($self->{Parent}) { # calling parse on a filter for some reason + return $self->{Parent}->parse($parse_options); + } + else { + my $method; + if (defined $parse_options->{Source}{CharacterStream} and $method = $self->can('_parse_characterstream')) { + warn("parse charstream???\n"); + return $method->($self, $parse_options->{Source}{CharacterStream}); + } + elsif (defined $parse_options->{Source}{ByteStream} and $method = $self->can('_parse_bytestream')) { + return $method->($self, $parse_options->{Source}{ByteStream}); + } + elsif (defined $parse_options->{Source}{String} and $method = $self->can('_parse_string')) { + return $method->($self, $parse_options->{Source}{String}); + } + elsif (defined $parse_options->{Source}{SystemId} and $method = $self->can('_parse_systemid')) { + return $method->($self, $parse_options->{Source}{SystemId}); + } + else { + die "No _parse_* routine defined on this driver (If it is a filter, remember to set the Parent property. If you call the parse() method, make sure to set a Source. You may want to call parse_uri, parse_string or parse_file instead.) [$self]"; + } + } +} +#-------------------------------------------------------------------# + +#-------------------------------------------------------------------# +# $p->parse_file(%options) +#-------------------------------------------------------------------# +sub parse_file { + my $self = shift; + my $file = shift; + return $self->parse_uri($file, @_) if ref(\$file) eq 'SCALAR'; + my $parse_options = $self->get_options(@_); + $parse_options->{Source}{ByteStream} = $file; + return $self->parse($parse_options); +} +#-------------------------------------------------------------------# + +#-------------------------------------------------------------------# +# $p->parse_uri(%options) +#-------------------------------------------------------------------# +sub parse_uri { + my $self = shift; + my $file = shift; + my $parse_options = $self->get_options(@_); + $parse_options->{Source}{SystemId} = $file; + return $self->parse($parse_options); +} +#-------------------------------------------------------------------# + +#-------------------------------------------------------------------# +# $p->parse_string(%options) +#-------------------------------------------------------------------# +sub parse_string { + my $self = shift; + my $string = shift; + my $parse_options = $self->get_options(@_); + $parse_options->{Source}{String} = $string; + return $self->parse($parse_options); +} +#-------------------------------------------------------------------# + +#-------------------------------------------------------------------# +# get_options +#-------------------------------------------------------------------# +sub get_options { + my $self = shift; + + if (@_ == 1) { + return { %$self, %{$_[0]} }; + } else { + return { %$self, @_ }; + } +} +#-------------------------------------------------------------------# + +#-------------------------------------------------------------------# +# get_features +#-------------------------------------------------------------------# +sub get_features { + return ( + 'http://xml.org/sax/features/external-general-entities' => undef, + 'http://xml.org/sax/features/external-parameter-entities' => undef, + 'http://xml.org/sax/features/is-standalone' => undef, + 'http://xml.org/sax/features/lexical-handler' => undef, + 'http://xml.org/sax/features/parameter-entities' => undef, + 'http://xml.org/sax/features/namespaces' => 1, + 'http://xml.org/sax/features/namespace-prefixes' => 0, + 'http://xml.org/sax/features/string-interning' => undef, + 'http://xml.org/sax/features/use-attributes2' => undef, + 'http://xml.org/sax/features/use-locator2' => undef, + 'http://xml.org/sax/features/validation' => undef, + + 'http://xml.org/sax/properties/dom-node' => undef, + 'http://xml.org/sax/properties/xml-string' => undef, + ); +} +#-------------------------------------------------------------------# + +#-------------------------------------------------------------------# +# get_feature +#-------------------------------------------------------------------# +sub get_feature { + my $self = shift; + my $feat = shift; + + # check %FEATURES to see if it's there, and return it if so + # throw XML::SAX::Exception::NotRecognized if it's not there + # throw XML::SAX::Exception::NotSupported if it's there but we + # don't support it + + my %features = $self->get_features(); + if (exists $features{$feat}) { + my %supported = map { $_ => 1 } $self->supported_features(); + if ($supported{$feat}) { + return $self->{__PACKAGE__ . "::Features"}{$feat}; + } + throw XML::SAX::Exception::NotSupported( + Message => "The feature '$feat' is not supported by " . ref($self), + Exception => undef, + ); + } + throw XML::SAX::Exception::NotRecognized( + Message => "The feature '$feat' is not recognized by " . ref($self), + Exception => undef, + ); +} +#-------------------------------------------------------------------# + +#-------------------------------------------------------------------# +# set_feature +#-------------------------------------------------------------------# +sub set_feature { + my $self = shift; + my $feat = shift; + my $value = shift; + # check %FEATURES to see if it's there, and set it if so + # throw XML::SAX::Exception::NotRecognized if it's not there + # throw XML::SAX::Exception::NotSupported if it's there but we + # don't support it + + my %features = $self->get_features(); + if (exists $features{$feat}) { + my %supported = map { $_ => 1 } $self->supported_features(); + if ($supported{$feat}) { + return $self->{__PACKAGE__ . "::Features"}{$feat} = $value; + } + throw XML::SAX::Exception::NotSupported( + Message => "The feature '$feat' is not supported by " . ref($self), + Exception => undef, + ); + } + throw XML::SAX::Exception::NotRecognized( + Message => "The feature '$feat' is not recognized by " . ref($self), + Exception => undef, + ); +} +#-------------------------------------------------------------------# + +#-------------------------------------------------------------------# +# get_handler and friends +#-------------------------------------------------------------------# +sub get_handler { + my $self = shift; + my $handler_type = shift; + $handler_type ||= 'Handler'; + return defined( $self->{$handler_type} ) ? $self->{$handler_type} : undef; +} + +sub get_document_handler { + my $self = shift; + return $self->get_handler('DocumentHandler', @_); +} + +sub get_content_handler { + my $self = shift; + return $self->get_handler('ContentHandler', @_); +} + +sub get_dtd_handler { + my $self = shift; + return $self->get_handler('DTDHandler', @_); +} + +sub get_lexical_handler { + my $self = shift; + return $self->get_handler('LexicalHandler', @_); +} + +sub get_decl_handler { + my $self = shift; + return $self->get_handler('DeclHandler', @_); +} + +sub get_error_handler { + my $self = shift; + return $self->get_handler('ErrorHandler', @_); +} + +sub get_entity_resolver { + my $self = shift; + return $self->get_handler('EntityResolver', @_); +} +#-------------------------------------------------------------------# + +#-------------------------------------------------------------------# +# set_handler and friends +#-------------------------------------------------------------------# +sub set_handler { + my $self = shift; + my ($new_handler, $handler_type) = reverse @_; + $handler_type ||= 'Handler'; + $self->{Methods} = {} if $self->{Methods}; + $self->{$handler_type} = $new_handler; + $self->{ParseOptions}->{$handler_type} = $new_handler; + return 1; +} + +sub set_document_handler { + my $self = shift; + return $self->set_handler('DocumentHandler', @_); +} + +sub set_content_handler { + my $self = shift; + return $self->set_handler('ContentHandler', @_); +} +sub set_dtd_handler { + my $self = shift; + return $self->set_handler('DTDHandler', @_); +} +sub set_lexical_handler { + my $self = shift; + return $self->set_handler('LexicalHandler', @_); +} +sub set_decl_handler { + my $self = shift; + return $self->set_handler('DeclHandler', @_); +} +sub set_error_handler { + my $self = shift; + return $self->set_handler('ErrorHandler', @_); +} +sub set_entity_resolver { + my $self = shift; + return $self->set_handler('EntityResolver', @_); +} + +#-------------------------------------------------------------------# + +#-------------------------------------------------------------------# +# supported_features +#-------------------------------------------------------------------# +sub supported_features { + my $self = shift; + # Only namespaces are required by all parsers + return ( + 'http://xml.org/sax/features/namespaces', + ); +} +#-------------------------------------------------------------------# + +sub no_op { + # this space intentionally blank +} + + +package XML::SAX::Base::NoHandler; + +# we need a fake handler that doesn't implement anything, this +# simplifies the code a lot (though given the recent changes, +# it may be better to do without) +sub new { + #warn "no handler called\n"; + return bless {}; +} + +1; + +BODY + + $code .= "__END__\n"; + + $code .= <<'FOOTER'; + +=head1 NAME + +XML::SAX::Base - Base class SAX Drivers and Filters + +=head1 SYNOPSIS + + package MyFilter; + use XML::SAX::Base; + @ISA = ('XML::SAX::Base'); + +=head1 DESCRIPTION + +This module has a very simple task - to be a base class for PerlSAX +drivers and filters. It's default behaviour is to pass the input directly +to the output unchanged. It can be useful to use this module as a base class +so you don't have to, for example, implement the characters() callback. + +The main advantages that it provides are easy dispatching of events the right +way (ie it takes care for you of checking that the handler has implemented +that method, or has defined an AUTOLOAD), and the guarantee that filters +will pass along events that they aren't implementing to handlers downstream +that might nevertheless be interested in them. + +=head1 WRITING SAX DRIVERS AND FILTERS + +The Perl Sax API Reference is at L. + +Writing SAX Filters is tremendously easy: all you need to do is +inherit from this module, and define the events you want to handle. A +more detailed explanation can be found at +http://www.xml.com/pub/a/2001/10/10/sax-filters.html. + +Writing Drivers is equally simple. The one thing you need to pay +attention to is B to call events yourself (this applies to Filters +as well). For instance: + + package MyFilter; + use base qw(XML::SAX::Base); + + sub start_element { + my $self = shift; + my $data = shift; + # do something + $self->{Handler}->start_element($data); # BAD + } + +The above example works well as precisely that: an example. But it has +several faults: 1) it doesn't test to see whether the handler defines +start_element. Perhaps it doesn't want to see that event, in which +case you shouldn't throw it (otherwise it'll die). 2) it doesn't check +ContentHandler and then Handler (ie it doesn't look to see that the +user hasn't requested events on a specific handler, and if not on the +default one), 3) if it did check all that, not only would the code be +cumbersome (see this module's source to get an idea) but it would also +probably have to check for a DocumentHandler (in case this were SAX1) +and for AUTOLOADs potentially defined in all these packages. As you can +tell, that would be fairly painful. Instead of going through that, +simply remember to use code similar to the following instead: + + package MyFilter; + use base qw(XML::SAX::Base); + + sub start_element { + my $self = shift; + my $data = shift; + # do something to filter + $self->SUPER::start_element($data); # GOOD (and easy) ! + } + +This way, once you've done your job you hand the ball back to +XML::SAX::Base and it takes care of all those problems for you! + +Note that the above example doesn't apply to filters only, drivers +will benefit from the exact same feature. + +=head1 METHODS + +A number of methods are defined within this class for the purpose of +inheritance. Some probably don't need to be overridden (eg parse_file) +but some clearly should be (eg parse). Options for these methods are +described in the PerlSAX2 specification available from +http://cvs.sourceforge.net/cgi-bin/viewcvs.cgi/~checkout~/perl-xml/libxml-perl/doc/sax-2.0.html?rev=HEAD&content-type=text/html. + +=over 4 + +=item * parse + +The parse method is the main entry point to parsing documents. Internally +the parse method will detect what type of "thing" you are parsing, and +call the appropriate method in your implementation class. Here is the +mapping table of what is in the Source options (see the Perl SAX 2.0 +specification for the meaning of these values): + + Source Contains parse() calls + =============== ============= + CharacterStream (*) _parse_characterstream($stream, $options) + ByteStream _parse_bytestream($stream, $options) + String _parse_string($string, $options) + SystemId _parse_systemid($string, $options) + +However note that these methods may not be sensible if your driver class +is not for parsing XML. An example might be a DBI driver that generates +XML/SAX from a database table. If that is the case, you likely want to +write your own parse() method. + +Also note that the Source may contain both a PublicId entry, and an +Encoding entry. To get at these, examine $options->{Source} as passed +to your method. + +(*) A CharacterStream is a filehandle that does not need any encoding +translation done on it. This is implemented as a regular filehandle +and only works under Perl 5.7.2 or higher using PerlIO. To get a single +character, or number of characters from it, use the perl core read() +function. To get a single byte from it (or number of bytes), you can +use sysread(). The encoding of the stream should be in the Encoding +entry for the Source. + +=item * parse_file, parse_uri, parse_string + +These are all convenience variations on parse(), and in fact simply +set up the options before calling it. You probably don't need to +override these. + +=item * get_options + +This is a convenience method to get options in SAX2 style, or more +generically either as hashes or as hashrefs (it returns a hashref). +You will probably want to use this method in your own implementations +of parse() and of new(). + +=item * get_feature, set_feature + +These simply get and set features, and throw the +appropriate exceptions defined in the specification if need be. + +If your subclass defines features not defined in this one, +then you should override these methods in such a way that they check for +your features first, and then call the base class's methods +for features not defined by your class. An example would be: + + sub get_feature { + my $self = shift; + my $feat = shift; + if (exists $MY_FEATURES{$feat}) { + # handle the feature in various ways + } + else { + return $self->SUPER::get_feature($feat); + } + } + +Currently this part is unimplemented. + + +=item * set_handler + +This method takes a handler type (Handler, ContentHandler, etc.) and a +handler object as arguments, and changes the current handler for that +handler type, while taking care of resetting the internal state that +needs to be reset. This allows one to change a handler during parse +without running into problems (changing it on the parser object +directly will most likely cause trouble). + +=item * set_document_handler, set_content_handler, set_dtd_handler, set_lexical_handler, set_decl_handler, set_error_handler, set_entity_resolver + +These are just simple wrappers around the former method, and take a +handler object as their argument. Internally they simply call +set_handler with the correct arguments. + +=item * get_handler + +The inverse of set_handler, this method takes a an optional string containing a handler type (DTDHandler, +ContentHandler, etc. 'Handler' is used if no type is passed). It returns a reference to the object that implements +that that class, or undef if that handler type is not set for the current driver/filter. + +=item * get_document_handler, get_content_handler, get_dtd_handler, get_lexical_handler, get_decl_handler, +get_error_handler, get_entity_resolver + +These are just simple wrappers around the get_handler() method, and take no arguments. Internally +they simply call get_handler with the correct handler type name. + +=back + +It would be rather useless to describe all the methods that this +module implements here. They are all the methods supported in SAX1 and +SAX2. In case your memory is a little short, here is a list. The +apparent duplicates are there so that both versions of SAX can be +supported. + +=over 4 + +=item * start_document + +=item * end_document + +=item * start_element + +=item * start_document + +=item * end_document + +=item * start_element + +=item * end_element + +=item * characters + +=item * processing_instruction + +=item * ignorable_whitespace + +=item * set_document_locator + +=item * start_prefix_mapping + +=item * end_prefix_mapping + +=item * skipped_entity + +=item * start_cdata + +=item * end_cdata + +=item * comment + +=item * entity_reference + +=item * notation_decl + +=item * unparsed_entity_decl + +=item * element_decl + +=item * attlist_decl + +=item * doctype_decl + +=item * xml_decl + +=item * entity_decl + +=item * attribute_decl + +=item * internal_entity_decl + +=item * external_entity_decl + +=item * resolve_entity + +=item * start_dtd + +=item * end_dtd + +=item * start_entity + +=item * end_entity + +=item * warning + +=item * error + +=item * fatal_error + +=back + +=head1 TODO + + - more tests + - conform to the "SAX Filters" and "Java and DOM compatibility" + sections of the SAX2 document. + +=head1 AUTHOR + +Kip Hampton (khampton@totalcinema.com) did most of the work, after porting +it from XML::Filter::Base. + +Robin Berjon (robin@knowscape.com) pitched in with patches to make it +usable as a base for drivers as well as filters, along with other patches. + +Matt Sergeant (matt@sergeant.org) wrote the original XML::Filter::Base, +and patched a few things here and there, and imported it into +the XML::SAX distribution. + +=head1 SEE ALSO + +L + +=cut + +FOOTER + + + return $code; +} + + +sub write_xml_sax_base { + confirm_forced_update(); + + my $path = File::Spec->catfile("lib", "XML", "SAX", "Base.pm"); + save_original_xml_sax_base($path); + + my $code = build_xml_sax_base(); + $code = add_version_stanzas($code); + + open my $fh, ">", $path or die "Cannot write $path: $!"; + print $fh $code; + close $fh or die "Error writing $path: $!"; + print "Wrote $path\n"; +} + + +sub confirm_forced_update { + return if grep { $_ eq '--force' } @ARGV; + + print <<'EOF'; +*** WARNING *** + +The BuildSAXBase.pl script is used to generate the lib/XML/SAX/Base.pm file. +However a pre-generated version of Base.pm is included in the distribution +so you do not need to run this script unless you intend to modify the code. + +You must use the --force option to deliberately overwrite the distributed +version of lib/XML/SAX/Base.pm + +EOF + + exit; +} + + +sub save_original_xml_sax_base { + my($path) = @_; + + return unless -e $path; + (my $save_path = $path) =~ s{Base}{Base-orig}; + return if -e $save_path; + print "Saving $path to $save_path\n"; + rename($path, $save_path); +} + + +sub add_version_stanzas { + my($code) = @_; + + my $version = get_xml_sax_base_version(); + $code =~ s<^(package\s+(\w[:\w]+).*?\n)> + <${1}BEGIN {\n \$${2}::VERSION = '$version';\n}\n>mg; + return $code; +} + + +sub get_xml_sax_base_version { + open my $fh, '<', 'dist.ini' or die "open() { + m{^\s*version\s*=\s*(\S+)} && return $1; + } + die "Failed to find version in dist.ini"; +} + diff --git a/tests/lib/XML/SAX/DocumentLocator.pm b/tests/lib/XML/SAX/DocumentLocator.pm new file mode 100644 index 0000000000..dba56d5ae7 --- /dev/null +++ b/tests/lib/XML/SAX/DocumentLocator.pm @@ -0,0 +1,134 @@ +# $Id$ + +package XML::SAX::DocumentLocator; +use strict; + +sub new { + my $class = shift; + my %object; + tie %object, $class, @_; + + return bless \%object, $class; +} + +sub TIEHASH { + my $class = shift; + my ($pubmeth, $sysmeth, $linemeth, $colmeth, $encmeth, $xmlvmeth) = @_; + return bless { + pubmeth => $pubmeth, + sysmeth => $sysmeth, + linemeth => $linemeth, + colmeth => $colmeth, + encmeth => $encmeth, + xmlvmeth => $xmlvmeth, + }, $class; +} + +sub FETCH { + my ($self, $key) = @_; + my $method; + if ($key eq 'PublicId') { + $method = $self->{pubmeth}; + } + elsif ($key eq 'SystemId') { + $method = $self->{sysmeth}; + } + elsif ($key eq 'LineNumber') { + $method = $self->{linemeth}; + } + elsif ($key eq 'ColumnNumber') { + $method = $self->{colmeth}; + } + elsif ($key eq 'Encoding') { + $method = $self->{encmeth}; + } + elsif ($key eq 'XMLVersion') { + $method = $self->{xmlvmeth}; + } + if ($method) { + my $value = $method->($key); + return $value; + } + return undef; +} + +sub EXISTS { + my ($self, $key) = @_; + if ($key =~ /^(PublicId|SystemId|LineNumber|ColumnNumber|Encoding|XMLVersion)$/) { + return 1; + } + return 0; +} + +sub STORE { + my ($self, $key, $value) = @_; +} + +sub DELETE { + my ($self, $key) = @_; +} + +sub CLEAR { + my ($self) = @_; +} + +sub FIRSTKEY { + my ($self) = @_; + # assignment resets. + $self->{keys} = { + PublicId => 1, + SystemId => 1, + LineNumber => 1, + ColumnNumber => 1, + Encoding => 1, + XMLVersion => 1, + }; + return each %{$self->{keys}}; +} + +sub NEXTKEY { + my ($self, $lastkey) = @_; + return each %{$self->{keys}}; +} + +1; +__END__ + +=head1 NAME + +XML::SAX::DocumentLocator - Helper class for document locators + +=head1 SYNOPSIS + + my $locator = XML::SAX::DocumentLocator->new( + sub { $object->get_public_id }, + sub { $object->get_system_id }, + sub { $reader->current_line }, + sub { $reader->current_column }, + sub { $reader->get_encoding }, + sub { $reader->get_xml_version }, + ); + +=head1 DESCRIPTION + +This module gives you a tied hash reference that calls the +specified closures when asked for PublicId, SystemId, +LineNumber and ColumnNumber. + +It is useful for writing SAX Parsers so that you don't have +to constantly update the line numbers in a hash reference on +the object you pass to set_document_locator(). See the source +code for XML::SAX::PurePerl for a usage example. + +=head1 API + +There is only 1 method: C. Simply pass it a list of +closures that when called will return the PublicId, the +SystemId, the LineNumber, the ColumnNumber, the Encoding +and the XMLVersion respectively. + +The closures are passed a single parameter, the key being +requested. But you're free to ignore that. + +=cut + diff --git a/tests/lib/XML/SAX/Exception.pm b/tests/lib/XML/SAX/Exception.pm new file mode 100644 index 0000000000..e8bf0d1b6f --- /dev/null +++ b/tests/lib/XML/SAX/Exception.pm @@ -0,0 +1,129 @@ +package XML::SAX::Exception; +BEGIN { + $XML::SAX::Exception::VERSION = '1.08'; +} + +use strict; + +use overload '""' => "stringify", + 'fallback' => 1; + +use vars qw($StackTrace); + +use Carp; + +$StackTrace = $ENV{XML_DEBUG} || 0; + +# Other exception classes: + +@XML::SAX::Exception::NotRecognized::ISA = ('XML::SAX::Exception'); +@XML::SAX::Exception::NotSupported::ISA = ('XML::SAX::Exception'); +@XML::SAX::Exception::Parse::ISA = ('XML::SAX::Exception'); + + +sub throw { + my $class = shift; + if (ref($class)) { + die $class; + } + die $class->new(@_); +} + +sub new { + my $class = shift; + my %opts = @_; + confess "Invalid options: " . join(', ', keys %opts) unless exists $opts{Message}; + + bless { ($StackTrace ? (StackTrace => stacktrace()) : ()), %opts }, + $class; +} + +sub stringify { + my $self = shift; + local $^W; + my $error; + if (exists $self->{LineNumber}) { + $error = $self->{Message} . " [Ln: " . $self->{LineNumber} . + ", Col: " . $self->{ColumnNumber} . "]"; + } + else { + $error = $self->{Message}; + } + if ($StackTrace) { + $error .= stackstring($self->{StackTrace}); + } + $error .= "\n"; + return $error; +} + +sub stacktrace { + my $i = 2; + my @fulltrace; + while (my @trace = caller($i++)) { + my %hash; + @hash{qw(Package Filename Line)} = @trace[0..2]; + push @fulltrace, \%hash; + } + return \@fulltrace; +} + +sub stackstring { + my $stacktrace = shift; + my $string = "\nFrom:\n"; + foreach my $current (@$stacktrace) { + $string .= $current->{Filename} . " Line: " . $current->{Line} . "\n"; + } + return $string; +} + +1; + +__END__ + +=head1 NAME + +XML::SAX::Exception - Exception classes for XML::SAX + +=head1 SYNOPSIS + + throw XML::SAX::Exception::NotSupported( + Message => "The foo feature is not supported", + ); + +=head1 DESCRIPTION + +This module is the base class for all SAX Exceptions, those defined in +the spec as well as those that one may create for one's own SAX errors. + +There are three subclasses included, corresponding to those of the SAX +spec: + + XML::SAX::Exception::NotSupported + XML::SAX::Exception::NotRecognized + XML::SAX::Exception::Parse + +Use them wherever you want, and as much as possible when you encounter +such errors. SAX is meant to use exceptions as much as possible to +flag problems. + +=head1 CREATING NEW EXCEPTION CLASSES + +All you need to do to create a new exception class is: + + @XML::SAX::Exception::MyException::ISA = ('XML::SAX::Exception') + +The given package doesn't need to exist, it'll behave correctly this +way. If your exception refines an existing exception class, then you +may also inherit from that instead of from the base class. + +=head1 THROWING EXCEPTIONS + +This is as simple as exemplified in the SYNOPSIS. In fact, there's +nothing more to know. All you have to do is: + + throw XML::SAX::Exception::MyException( Message => 'Something went wrong' ); + +and voila, you've thrown an exception which can be caught in an eval block. + +=cut + diff --git a/tests/lib/XML/SAX/Intro.pod b/tests/lib/XML/SAX/Intro.pod new file mode 100644 index 0000000000..3e06160f06 --- /dev/null +++ b/tests/lib/XML/SAX/Intro.pod @@ -0,0 +1,407 @@ +=head1 NAME + +XML::SAX::Intro - An Introduction to SAX Parsing with Perl + +=head1 Introduction + +XML::SAX is a new way to work with XML Parsers in Perl. In this article +we'll discuss why you should be using SAX, why you should be using +XML::SAX, and we'll see some of the finer implementation details. The +text below assumes some familiarity with callback, or push based +parsing, but if you are unfamiliar with these techniques then a good +place to start is Kip Hampton's excellent series of articles on XML.com. + +=head1 Replacing XML::Parser + +The de-facto way of parsing XML under perl is to use Larry Wall and +Clark Cooper's XML::Parser. This module is a Perl and XS wrapper around +the expat XML parser library by James Clark. It has been a hugely +successful project, but suffers from a couple of rather major flaws. +Firstly it is a proprietary API, designed before the SAX API was +conceived, which means that it is not easily replaceable by other +streaming parsers. Secondly it's callbacks are subrefs. This doesn't +sound like much of an issue, but unfortunately leads to code like: + + sub handle_start { + my ($e, $el, %attrs) = @_; + if ($el eq 'foo') { + $e->{inside_foo}++; # BAD! $e is an XML::Parser::Expat object. + } + } + +As you can see, we're using the $e object to hold our state +information, which is a bad idea because we don't own that object - we +didn't create it. It's an internal object of XML::Parser, that happens +to be a hashref. We could all too easily overwrite XML::Parser internal +state variables by using this, or Clark could change it to an array ref +(not that he would, because it would break so much code, but he could). + +The only way currently with XML::Parser to safely maintain state is to +use a closure: + + my $state = MyState->new(); + $parser->setHandlers(Start => sub { handle_start($state, @_) }); + +This closure traps the $state variable, which now gets passed as the +first parameter to your callback. Unfortunately very few people use +this technique, as it is not documented in the XML::Parser POD files. + +Another reason you might not want to use XML::Parser is because you +need some feature that it doesn't provide (such as validation), or you +might need to use a library that doesn't use expat, due to it not being +installed on your system, or due to having a restrictive ISP. Using SAX +allows you to work around these restrictions. + +=head1 Introducing SAX + +SAX stands for the Simple API for XML. And simple it really is. +Constructing a SAX parser and passing events to handlers is done as +simply as: + + use XML::SAX; + use MySAXHandler; + + my $parser = XML::SAX::ParserFactory->parser( + Handler => MySAXHandler->new + ); + + $parser->parse_uri("foo.xml"); + +The important concept to grasp here is that SAX uses a factory class +called XML::SAX::ParserFactory to create a new parser instance. The +reason for this is so that you can support other underlying +parser implementations for different feature sets. This is one thing +that XML::Parser has always sorely lacked. + +In the code above we see the parse_uri method used, but we could +have equally well +called parse_file, parse_string, or parse(). Please see XML::SAX::Base +for what these methods take as parameters, but don't be fooled into +believing parse_file takes a filename. No, it takes a file handle, a +glob, or a subclass of IO::Handle. Beware. + +SAX works very similarly to XML::Parser's default callback method, +except it has one major difference: rather than setting individual +callbacks, you create a new class in which to recieve the callbacks. +Each callback is called as a method call on an instance of that handler +class. An example will best demonstrate this: + + package MySAXHandler; + use base qw(XML::SAX::Base); + + sub start_document { + my ($self, $doc) = @_; + # process document start event + } + + sub start_element { + my ($self, $el) = @_; + # process element start event + } + +Now, when we instantiate this as above, and parse some XML with this as +the handler, the methods start_document and start_element will be +called as method calls, so this would be the equivalent of directly +calling: + + $object->start_element($el); + +Notice how this is different to XML::Parser's calling style, which +calls: + + start_element($e, $name, %attribs); + +It's the difference between function calling and method calling which +allows you to subclass SAX handlers which contributes to SAX being a +powerful solution. + +As you can see, unlike XML::Parser, we have to define a new package in +which to do our processing (there are hacks you can do to make this +uneccessary, but I'll leave figuring those out to the experts). The +biggest benefit of this is that you maintain your own state variable +($self in the above example) thus freeing you of the concerns listed +above. It is also an improvement in maintainability - you can place the +code in a separate file if you wish to, and your callback methods are +always called the same thing, rather than having to choose a suitable +name for them as you had to with XML::Parser. This is an obvious win. + +SAX parsers are also very flexible in how you pass a handler to them. +You can use a constructor parameter as we saw above, or we can pass the +handler directly in the call to one of the parse methods: + + $parser->parse(Handler => $handler, + Source => { SystemId => "foo.xml" }); + # or... + $parser->parse_file($fh, Handler => $handler); + +This flexibility allows for one parser to be used in many different +scenarios throughout your script (though one shouldn't feel pressure to +use this method, as parser construction is generally not a time +consuming process). + +=head1 Callback Parameters + +The only other thing you need to know to understand basic SAX is the +structure of the parameters passed to each of the callbacks. In +XML::Parser, all parameters are passed as multiple options to the +callbacks, so for example the Start callback would be called as +my_start($e, $name, %attributes), and the PI callback would be called +as my_processing_instruction($e, $target, $data). In SAX, every +callback is passed a hash reference, containing entries that define our +"node". The key callbacks and the structures they receive are: + +=head2 start_element + +The start_element handler is called whenever a parser sees an opening +tag. It is passed an element structure consisting of: + +=over 4 + +=item LocalName + +The name of the element minus any namespace prefix it may +have come with in the document. + +=item NamespaceURI + +The URI of the namespace associated with this element, +or the empty string for none. + +=item Attributes + +A set of attributes as described below. + +=item Name + +The name of the element as it was seen in the document (i.e. +including any prefix associated with it) + +=item Prefix + +The prefix used to qualify this element's namespace, or the +empty string if none. + +=back + +The B are a hash reference, keyed by what we have called +"James Clark" notation. This means that the attribute name has been +expanded to include any associated namespace URI, and put together as +{ns}name, where "ns" is the expanded namespace URI of the attribute if +and only if the attribute had a prefix, and "name" is the LocalName of +the attribute. + +The value of each entry in the attributes hash is another hash +structure consisting of: + +=over 4 + +=item LocalName + +The name of the attribute minus any namespace prefix it may have +come with in the document. + +=item NamespaceURI + +The URI of the namespace associated with this attribute. If the +attribute had no prefix, then this consists of just the empty string. + +=item Name + +The attribute's name as it appeared in the document, including any +namespace prefix. + +=item Prefix + +The prefix used to qualify this attribute's namepace, or the +empty string if none. + +=item Value + +The value of the attribute. + +=back + +So a full example, as output by Data::Dumper might be: + + .... + +=head2 end_element + +The end_element handler is called either when a parser sees a closing +tag, or after start_element has been called for an empty element (do +note however that a parser may if it is so inclined call characters +with an empty string when it sees an empty element. There is no simple +way in SAX to determine if the parser in fact saw an empty element, a +start and end element with no content.. + +The end_element handler receives exactly the same structure as +start_element, minus the Attributes entry. One must note though that it +should not be a reference to the same data as start_element receives, +so you may change the values in start_element but this will not affect +the values later seen by end_element. + +=head2 characters + +The characters callback may be called in serveral circumstances. The +most obvious one is when seeing ordinary character data in the markup. +But it is also called for text in a CDATA section, and is also called +in other situations. A SAX parser has to make no guarantees whatsoever +about how many times it may call characters for a stretch of text in an +XML document - it may call once, or it may call once for every +character in the text. In order to work around this it is often +important for the SAX developer to use a bundling technique, where text +is gathered up and processed in one of the other callbacks. This is not +always necessary, but it is a worthwhile technique to learn, which we +will cover in XML::SAX::Advanced (when I get around to writing it). + +The characters handler is called with a very simple structure - a hash +reference consisting of just one entry: + +=over 4 + +=item Data + +The text data that was received. + +=back + +=head2 comment + +The comment callback is called for comment text. Unlike with +C, the comment callback *must* be invoked just once for an +entire comment string. It receives a single simple structure - a hash +reference containing just one entry: + +=over 4 + +=item Data + +The text of the comment. + +=back + +=head2 processing_instruction + +The processing instruction handler is called for all processing +instructions in the document. Note that these processing instructions +may appear before the document root element, or after it, or anywhere +where text and elements would normally appear within the document, +according to the XML specification. + +The handler is passed a structure containing just two entries: + +=over 4 + +=item Target + +The target of the processing instrcution + +=item Data + +The text data in the processing instruction. Can be an empty +string for a processing instruction that has no data element. +For example E?wiggle?E is a perfectly valid processing instruction. + +=back + +=head1 Tip of the iceberg + +What we have discussed above is really the tip of the SAX iceberg. And +so far it looks like there's not much of interest to SAX beyond what we +have seen with XML::Parser. But it does go much further than that, I +promise. + +People who hate Object Oriented code for the sake of it may be thinking +here that creating a new package just to parse something is a waste +when they've been parsing things just fine up to now using procedural +code. But there's reason to all this madness. And that reason is SAX +Filters. + +As you saw right at the very start, to let the parser know about our +class, we pass it an instance of our class as the Handler to the +parser. But now imagine what would happen if our class could also take +a Handler option, and simply do some processing and pass on our data +further down the line? That in a nutshell is how SAX filters work. It's +Unix pipes for the 21st century! + +There are two downsides to this. Number 1 - writing SAX filters can be +tricky. If you look into the future and read the advanced tutorial I'm +writing, you'll see that Handler can come in several shapes and sizes. +So making sure your filter does the right thing can be tricky. +Secondly, constructing complex filter chains can be difficult, and +simple thinking tells us that we only get one pass at our document, +when often we'll need more than that. + +Luckily though, those downsides have been fixed by the release of two +very cool modules. What's even better is that I didn't write either of +them! + +The first module is XML::SAX::Base. This is a VITAL SAX module that +acts as a base class for all SAX parsers and filters. It provides an +abstraction away from calling the handler methods, that makes sure your +filter or parser does the right thing, and it does it FAST. So, if you +ever need to write a SAX filter, which if you're processing XML -> XML, +or XML -> HTML, then you probably do, then you need to be writing it as +a subclass of XML::SAX::Base. Really - this is advice not to ignore +lightly. I will not go into the details of writing a SAX filter here. +Kip Hampton, the author of XML::SAX::Base has covered this nicely in +his article on XML.com here . + +To construct SAX pipelines, Barrie Slaymaker, a long time Perl hacker +whose modules you will probably have heard of or used, wrote a very +clever module called XML::SAX::Machines. This combines some really +clever SAX filter-type modules, with a construction toolkit for filters +that makes building pipelines easy. But before we see how it makes +things easy, first lets see how tricky it looks to build complex SAX +filter pipelines. + + use XML::SAX::ParserFactory; + use XML::Filter::Filter1; + use XML::Filter::Filter2; + use XML::SAX::Writer; + + my $output_string; + my $writer = XML::SAX::Writer->new(Output => \$output_string); + my $filter2 = XML::SAX::Filter2->new(Handler => $writer); + my $filter1 = XML::SAX::Filter1->new(Handler => $filter2); + my $parser = XML::SAX::ParserFactory->parser(Handler => $filter1); + + $parser->parse_uri("foo.xml"); + +This is a lot easier with XML::SAX::Machines: + + use XML::SAX::Machines qw(Pipeline); + + my $output_string; + my $parser = Pipeline( + XML::SAX::Filter1 => XML::SAX::Filter2 => \$output_string + ); + + $parser->parse_uri("foo.xml"); + +One of the main benefits of XML::SAX::Machines is that the pipelines +are constructed in natural order, rather than the reverse order we saw +with manual pipeline construction. XML::SAX::Machines takes care of all +the internals of pipe construction, providing you at the end with just +a parser you can use (and you can re-use the same parser as many times +as you need to). + +Just a final tip. If you ever get stuck and are confused about what is +being passed from one SAX filter or parser to the next, then +Devel::TraceSAX will come to your rescue. This perl debugger plugin +will allow you to dump the SAX stream of events as it goes by. Usage is +really very simple just call your perl script that uses SAX as follows: + + $ perl -d:TraceSAX + +And preferably pipe the output to a pager of some sort, such as more or +less. The output is extremely verbose, but should help clear some +issues up. + +=head1 AUTHOR + +Matt Sergeant, matt@sergeant.org + +$Id$ + +=cut diff --git a/tests/lib/XML/SAX/ParserFactory.pm b/tests/lib/XML/SAX/ParserFactory.pm new file mode 100644 index 0000000000..165c1303ee --- /dev/null +++ b/tests/lib/XML/SAX/ParserFactory.pm @@ -0,0 +1,230 @@ +# $Id$ + +package XML::SAX::ParserFactory; + +use strict; +use vars qw($VERSION); + +$VERSION = '1.01'; + +use Symbol qw(gensym); +use XML::SAX; +use XML::SAX::Exception; + +sub new { + my $class = shift; + my %params = @_; # TODO : Fix this in spec. + my $self = bless \%params, $class; + $self->{KnownParsers} = XML::SAX->parsers(); + return $self; +} + +sub parser { + my $self = shift; + my @parser_params = @_; + if (!ref($self)) { + $self = $self->new(); + } + + my $parser_class = $self->_parser_class(); + + my $version = ''; + if ($parser_class =~ s/\s*\(([\d\.]+)\)\s*$//) { + $version = " $1"; + } + + if (!$parser_class->can('new')) { + eval "require $parser_class $version;"; + die $@ if $@; + } + + return $parser_class->new(@parser_params); +} + +sub require_feature { + my $self = shift; + my ($feature) = @_; + $self->{RequiredFeatures}{$feature}++; + return $self; +} + +sub _parser_class { + my $self = shift; + + # First try ParserPackage + if ($XML::SAX::ParserPackage) { + return $XML::SAX::ParserPackage; + } + + # Now check if required/preferred is there + if ($self->{RequiredFeatures}) { + my %required = %{$self->{RequiredFeatures}}; + # note - we never go onto the next try (ParserDetails.ini), + # because if we can't provide the requested feature + # we need to throw an exception. + PARSER: + foreach my $parser (reverse @{$self->{KnownParsers}}) { + foreach my $feature (keys %required) { + if (!exists $parser->{Features}{$feature}) { + next PARSER; + } + } + # got here - all features must exist! + return $parser->{Name}; + } + # TODO : should this be NotSupported() ? + throw XML::SAX::Exception ( + Message => "Unable to provide required features", + ); + } + + # Next try SAX.ini + for my $dir (@INC) { + my $fh = gensym(); + if (open($fh, "$dir/SAX.ini")) { + my $param_list = XML::SAX->_parse_ini_file($fh); + my $params = $param_list->[0]->{Features}; + if ($params->{ParserPackage}) { + return $params->{ParserPackage}; + } + else { + # we have required features (or nothing?) + PARSER: + foreach my $parser (reverse @{$self->{KnownParsers}}) { + foreach my $feature (keys %$params) { + if (!exists $parser->{Features}{$feature}) { + next PARSER; + } + } + return $parser->{Name}; + } + XML::SAX->do_warn("Unable to provide SAX.ini required features. Using fallback\n"); + } + last; # stop after first INI found + } + } + + if (@{$self->{KnownParsers}}) { + return $self->{KnownParsers}[-1]{Name}; + } + else { + return "XML::SAX::PurePerl"; # backup plan! + } +} + +1; +__END__ + +=head1 NAME + +XML::SAX::ParserFactory - Obtain a SAX parser + +=head1 SYNOPSIS + + use XML::SAX::ParserFactory; + use XML::SAX::XYZHandler; + my $handler = XML::SAX::XYZHandler->new(); + my $p = XML::SAX::ParserFactory->parser(Handler => $handler); + $p->parse_uri("foo.xml"); + # or $p->parse_string("") or $p->parse_file($fh); + +=head1 DESCRIPTION + +XML::SAX::ParserFactory is a factory class for providing an application +with a Perl SAX2 XML parser. It is akin to DBI - a front end for other +parser classes. Each new SAX2 parser installed will register itself +with XML::SAX, and then it will become available to all applications +that use XML::SAX::ParserFactory to obtain a SAX parser. + +Unlike DBI however, XML/SAX parsers almost all work alike (especially +if they subclass XML::SAX::Base, as they should), so rather than +specifying the parser you want in the call to C, XML::SAX +has several ways to automatically choose which parser to use: + +=over 4 + +=item * $XML::SAX::ParserPackage + +If this package variable is set, then this package is Cd +and an instance of this package is returned by calling the C +class method in that package. If it cannot be loaded or there is +an error, an exception will be thrown. The variable can also contain +a version number: + + $XML::SAX::ParserPackage = "XML::SAX::Expat (0.72)"; + +And the number will be treated as a minimum version number. + +=item * Required features + +It is possible to require features from the parsers. For example, you +may wish for a parser that supports validation via a DTD. To do that, +use the following code: + + use XML::SAX::ParserFactory; + my $factory = XML::SAX::ParserFactory->new(); + $factory->require_feature('http://xml.org/sax/features/validation'); + my $parser = $factory->parser(...); + +Alternatively, specify the required features in the call to the +ParserFactory constructor: + + my $factory = XML::SAX::ParserFactory->new( + RequiredFeatures => { + 'http://xml.org/sax/features/validation' => 1, + } + ); + +If the features you have asked for are unavailable (for example the +user might not have a validating parser installed), then an +exception will be thrown. + +The list of known parsers is searched in reverse order, so it will +always return the last installed parser that supports all of your +requested features (Note: this is subject to change if someone +comes up with a better way of making this work). + +=item * SAX.ini + +ParserFactory will search @INC for a file called SAX.ini, which +is in a simple format: + + # a comment looks like this, + ; or like this, and are stripped anywhere in the file + key = value # SAX.in contains key/value pairs. + +All whitespace is non-significant. + +This file can contain either a line: + + ParserPackage = MyParserModule (1.02) + +Where MyParserModule is the module to load and use for the parser, +and the number in brackets is a minimum version to load. + +Or you can list required features: + + http://xml.org/sax/features/validation = 1 + +And each feature with a true value will be required. + +=item * Fallback + +If none of the above works, the last parser installed on the user's +system will be used. The XML::SAX package ships with a pure perl +XML parser, XML::SAX::PurePerl, so that there will always be a +fallback parser. + +=back + +=head1 AUTHOR + +Matt Sergeant, matt@sergeant.org + +=head1 LICENSE + +This is free software, you may use it and distribute it under the same +terms as Perl itself. + +=cut + diff --git a/tests/lib/XML/SAX/PurePerl.pm b/tests/lib/XML/SAX/PurePerl.pm new file mode 100644 index 0000000000..d5801f612e --- /dev/null +++ b/tests/lib/XML/SAX/PurePerl.pm @@ -0,0 +1,751 @@ +# $Id$ + +package XML::SAX::PurePerl; + +use strict; +use vars qw/$VERSION/; + +$VERSION = '0.99'; + +use XML::SAX::PurePerl::Productions qw($NameChar $SingleChar); +use XML::SAX::PurePerl::Reader; +use XML::SAX::PurePerl::EncodingDetect (); +use XML::SAX::Exception; +use XML::SAX::PurePerl::DocType (); +use XML::SAX::PurePerl::DTDDecls (); +use XML::SAX::PurePerl::XMLDecl (); +use XML::SAX::DocumentLocator (); +use XML::SAX::Base (); +use XML::SAX qw(Namespaces); +use XML::NamespaceSupport (); +use IO::File; + +if ($] < 5.006) { + require XML::SAX::PurePerl::NoUnicodeExt; +} +else { + require XML::SAX::PurePerl::UnicodeExt; +} + +use vars qw(@ISA); +@ISA = ('XML::SAX::Base'); + +my %int_ents = ( + amp => '&', + lt => '<', + gt => '>', + quot => '"', + apos => "'", + ); + +my $xmlns_ns = "http://www.w3.org/2000/xmlns/"; +my $xml_ns = "http://www.w3.org/XML/1998/namespace"; + +use Carp; +sub _parse_characterstream { + my $self = shift; + my ($fh) = @_; + confess("CharacterStream is not yet correctly implemented"); + my $reader = XML::SAX::PurePerl::Reader::Stream->new($fh); + return $self->_parse($reader); +} + +sub _parse_bytestream { + my $self = shift; + my ($fh) = @_; + my $reader = XML::SAX::PurePerl::Reader::Stream->new($fh); + return $self->_parse($reader); +} + +sub _parse_string { + my $self = shift; + my ($str) = @_; + my $reader = XML::SAX::PurePerl::Reader::String->new($str); + return $self->_parse($reader); +} + +sub _parse_systemid { + my $self = shift; + my ($uri) = @_; + my $reader = XML::SAX::PurePerl::Reader::URI->new($uri); + return $self->_parse($reader); +} + +sub _parse { + my ($self, $reader) = @_; + + $reader->public_id($self->{ParseOptions}{Source}{PublicId}); + $reader->system_id($self->{ParseOptions}{Source}{SystemId}); + + $self->{NSHelper} = XML::NamespaceSupport->new({xmlns => 1}); + + $self->set_document_locator( + XML::SAX::DocumentLocator->new( + sub { $reader->public_id }, + sub { $reader->system_id }, + sub { $reader->line }, + sub { $reader->column }, + sub { $reader->get_encoding }, + sub { $reader->get_xml_version }, + ), + ); + + $self->start_document({}); + + if (defined $self->{ParseOptions}{Source}{Encoding}) { + $reader->set_encoding($self->{ParseOptions}{Source}{Encoding}); + } + else { + $self->encoding_detect($reader); + } + + # parse a document + $self->document($reader); + + return $self->end_document({}); +} + +sub parser_error { + my $self = shift; + my ($error, $reader) = @_; + +# warn("parser error: $error from ", $reader->line, " : ", $reader->column, "\n"); + my $exception = XML::SAX::Exception::Parse->new( + Message => $error, + ColumnNumber => $reader->column, + LineNumber => $reader->line, + PublicId => $reader->public_id, + SystemId => $reader->system_id, + ); + + $self->fatal_error($exception); + $exception->throw; +} + +sub document { + my ($self, $reader) = @_; + + # document ::= prolog element Misc* + + $self->prolog($reader); + $self->element($reader) || + $self->parser_error("Document requires an element", $reader); + + while(length($reader->data)) { + $self->Misc($reader) || + $self->parser_error("Only Comments, PIs and whitespace allowed at end of document", $reader); + } +} + +sub prolog { + my ($self, $reader) = @_; + + $self->XMLDecl($reader); + + # consume all misc bits + 1 while($self->Misc($reader)); + + if ($self->doctypedecl($reader)) { + while (length($reader->data)) { + $self->Misc($reader) || last; + } + } +} + +sub element { + my ($self, $reader) = @_; + + return 0 unless $reader->match('<'); + + my $name = $self->Name($reader) || $self->parser_error("Invalid element name", $reader); + + my %attribs; + + while( my ($k, $v) = $self->Attribute($reader) ) { + $attribs{$k} = $v; + } + + my $have_namespaces = $self->get_feature(Namespaces); + + # Namespace processing + $self->{NSHelper}->push_context; + my @new_ns; +# my %attrs = @attribs; +# while (my ($k,$v) = each %attrs) { + if ($have_namespaces) { + while ( my ($k, $v) = each %attribs ) { + if ($k =~ m/^xmlns(:(.*))?$/) { + my $prefix = $2 || ''; + $self->{NSHelper}->declare_prefix($prefix, $v); + my $ns = + { + Prefix => $prefix, + NamespaceURI => $v, + }; + push @new_ns, $ns; + $self->SUPER::start_prefix_mapping($ns); + } + } + } + + # Create element object and fire event + my %attrib_hash; + while (my ($name, $value) = each %attribs ) { + # TODO normalise value here + my ($ns, $prefix, $lname); + if ($have_namespaces) { + ($ns, $prefix, $lname) = $self->{NSHelper}->process_attribute_name($name); + } + $ns ||= ''; $prefix ||= ''; $lname ||= ''; + $attrib_hash{"{$ns}$lname"} = { + Name => $name, + LocalName => $lname, + Prefix => $prefix, + NamespaceURI => $ns, + Value => $value, + }; + } + + %attribs = (); # lose the memory since we recurse deep + + my ($ns, $prefix, $lname); + if ($self->get_feature(Namespaces)) { + ($ns, $prefix, $lname) = $self->{NSHelper}->process_element_name($name); + } + else { + $lname = $name; + } + $ns ||= ''; $prefix ||= ''; $lname ||= ''; + + # Process remainder of start_element + $self->skip_whitespace($reader); + my $have_content; + my $data = $reader->data(2); + if ($data =~ /^\/>/) { + $reader->move_along(2); + } + else { + $data =~ /^>/ or $self->parser_error("No close element tag", $reader); + $reader->move_along(1); + $have_content++; + } + + my $el = + { + Name => $name, + LocalName => $lname, + Prefix => $prefix, + NamespaceURI => $ns, + Attributes => \%attrib_hash, + }; + $self->start_element($el); + + # warn("($name\n"); + + if ($have_content) { + $self->content($reader); + + my $data = $reader->data(2); + $data =~ /^<\// or $self->parser_error("No close tag marker", $reader); + $reader->move_along(2); + my $end_name = $self->Name($reader); + $end_name eq $name || $self->parser_error("End tag mismatch ($end_name != $name)", $reader); + $self->skip_whitespace($reader); + $reader->match('>') or $self->parser_error("No close '>' on end tag", $reader); + } + + my %end_el = %$el; + delete $end_el{Attributes}; + $self->end_element(\%end_el); + + for my $ns (@new_ns) { + $self->end_prefix_mapping($ns); + } + $self->{NSHelper}->pop_context; + + return 1; +} + +sub content { + my ($self, $reader) = @_; + + while (1) { + $self->CharData($reader); + + my $data = $reader->data(2); + + if ($data =~ /^<\//) { + return 1; + } + elsif ($data =~ /^&/) { + $self->Reference($reader) or $self->parser_error("bare & not allowed in content", $reader); + next; + } + elsif ($data =~ /^CDSect($reader) + or + $self->Comment($reader)) + and next; + } + elsif ($data =~ /^<\?/) { + $self->PI($reader) and next; + } + elsif ($data =~ /^element($reader) and next; + } + last; + } + + return 1; +} + +sub CDSect { + my ($self, $reader) = @_; + + my $data = $reader->data(9); + return 0 unless $data =~ /^move_along(9); + + $self->start_cdata({}); + + $data = $reader->data; + while (1) { + $self->parser_error("EOF looking for CDATA section end", $reader) + unless length($data); + + if ($data =~ /^(.*?)\]\]>/s) { + my $chars = $1; + $reader->move_along(length($chars) + 3); + $self->characters({Data => $chars}); + last; + } + else { + $self->characters({Data => $data}); + $reader->move_along(length($data)); + $data = $reader->data; + } + } + $self->end_cdata({}); + return 1; +} + +sub CharData { + my ($self, $reader) = @_; + + my $data = $reader->data; + + while (1) { + return unless length($data); + + if ($data =~ /^([^<&]*)[<&]/s) { + my $chars = $1; + $self->parser_error("String ']]>' not allowed in character data", $reader) + if $chars =~ /\]\]>/; + $reader->move_along(length($chars)); + $self->characters({Data => $chars}) if length($chars); + last; + } + else { + $self->characters({Data => $data}); + $reader->move_along(length($data)); + $data = $reader->data; + } + } +} + +sub Misc { + my ($self, $reader) = @_; + if ($self->Comment($reader)) { + return 1; + } + elsif ($self->PI($reader)) { + return 1; + } + elsif ($self->skip_whitespace($reader)) { + return 1; + } + + return 0; +} + +sub Reference { + my ($self, $reader) = @_; + + return 0 unless $reader->match('&'); + + my $data = $reader->data; + + # Fetch more data if we have an incomplete numeric reference + if ($data =~ /^(#\d*|#x[0-9a-fA-F]*)$/) { + $data = $reader->data(length($data) + 6); + } + + if ($data =~ /^#x([0-9a-fA-F]+);/) { + my $ref = $1; + $reader->move_along(length($ref) + 3); + my $char = chr_ref(hex($ref)); + $self->parser_error("Character reference &#$ref; refers to an illegal XML character ($char)", $reader) + unless $char =~ /$SingleChar/o; + $self->characters({ Data => $char }); + return 1; + } + elsif ($data =~ /^#([0-9]+);/) { + my $ref = $1; + $reader->move_along(length($ref) + 2); + my $char = chr_ref($ref); + $self->parser_error("Character reference &#$ref; refers to an illegal XML character ($char)", $reader) + unless $char =~ /$SingleChar/o; + $self->characters({ Data => $char }); + return 1; + } + else { + # EntityRef + my $name = $self->Name($reader) + || $self->parser_error("Invalid name in entity", $reader); + $reader->match(';') or $self->parser_error("No semi-colon found after entity name", $reader); + + # warn("got entity: \&$name;\n"); + + # expand it + if ($self->_is_entity($name)) { + + if ($self->_is_external($name)) { + my $value = $self->_get_entity($name); + my $ent_reader = XML::SAX::PurePerl::Reader::URI->new($value); + $self->encoding_detect($ent_reader); + $self->extParsedEnt($ent_reader); + } + else { + my $value = $self->_stringify_entity($name); + my $ent_reader = XML::SAX::PurePerl::Reader::String->new($value); + $self->content($ent_reader); + } + return 1; + } + elsif ($name =~ /^(?:amp|gt|lt|quot|apos)$/) { + $self->characters({ Data => $int_ents{$name} }); + return 1; + } + else { + $self->parser_error("Undeclared entity", $reader); + } + } +} + +sub AttReference { + my ($self, $name, $reader) = @_; + if ($name =~ /^#x([0-9a-fA-F]+)$/) { + my $chr = chr_ref(hex($1)); + $chr =~ /$SingleChar/o or $self->parser_error("Character reference '&$name;' refers to an illegal XML character", $reader); + return $chr; + } + elsif ($name =~ /^#([0-9]+)$/) { + my $chr = chr_ref($1); + $chr =~ /$SingleChar/o or $self->parser_error("Character reference '&$name;' refers to an illegal XML character", $reader); + return $chr; + } + else { + if ($self->_is_entity($name)) { + if ($self->_is_external($name)) { + $self->parser_error("No external entity references allowed in attribute values", $reader); + } + else { + my $value = $self->_stringify_entity($name); + return $value; + } + } + elsif ($name =~ /^(?:amp|lt|gt|quot|apos)$/) { + return $int_ents{$name}; + } + else { + $self->parser_error("Undeclared entity '$name'", $reader); + } + } +} + +sub extParsedEnt { + my ($self, $reader) = @_; + + $self->TextDecl($reader); + $self->content($reader); +} + +sub _is_external { + my ($self, $name) = @_; +# TODO: Fix this to use $reader to store the entities perhaps. + if ($self->{ParseOptions}{external_entities}{$name}) { + return 1; + } + return ; +} + +sub _is_entity { + my ($self, $name) = @_; +# TODO: ditto above + if (exists $self->{ParseOptions}{entities}{$name}) { + return 1; + } + return 0; +} + +sub _stringify_entity { + my ($self, $name) = @_; +# TODO: ditto above + if (exists $self->{ParseOptions}{expanded_entity}{$name}) { + return $self->{ParseOptions}{expanded_entity}{$name}; + } + # expand + my $reader = XML::SAX::PurePerl::Reader::URI->new($self->{ParseOptions}{entities}{$name}); + my $ent = ''; + while(1) { + my $data = $reader->data; + $ent .= $data; + $reader->move_along(length($data)) or last; + } + return $self->{ParseOptions}{expanded_entity}{$name} = $ent; +} + +sub _get_entity { + my ($self, $name) = @_; +# TODO: ditto above + return $self->{ParseOptions}{entities}{$name}; +} + +sub skip_whitespace { + my ($self, $reader) = @_; + + my $data = $reader->data; + + my $found = 0; + while ($data =~ s/^([\x20\x0A\x0D\x09]*)//) { + last unless length($1); + $found++; + $reader->move_along(length($1)); + $data = $reader->data; + } + + return $found; +} + +sub Attribute { + my ($self, $reader) = @_; + + $self->skip_whitespace($reader) || return; + + my $data = $reader->data(2); + return if $data =~ /^\/?>/; + + if (my $name = $self->Name($reader)) { + $self->skip_whitespace($reader); + $reader->match('=') or $self->parser_error("No '=' in Attribute", $reader); + $self->skip_whitespace($reader); + my $value = $self->AttValue($reader); + + if (!$self->cdata_attrib($name)) { + $value =~ s/^\x20*//; # discard leading spaces + $value =~ s/\x20*$//; # discard trailing spaces + $value =~ s/ {1,}/ /g; # all >1 space to single space + } + + return $name, $value; + } + + return; +} + +sub cdata_attrib { + # TODO implement this! + return 1; +} + +sub AttValue { + my ($self, $reader) = @_; + + my $quote = $self->quote($reader); + + my $value = ''; + + while (1) { + my $data = $reader->data; + $self->parser_error("EOF found while looking for the end of attribute value", $reader) + unless length($data); + if ($data =~ /^([^$quote]*)$quote/) { + $reader->move_along(length($1) + 1); + $value .= $1; + last; + } + else { + $value .= $data; + $reader->move_along(length($data)); + } + } + + if ($value =~ /parser_error("< character not allowed in attribute values", $reader); + } + + $value =~ s/[\x09\x0A\x0D]/\x20/g; + $value =~ s/&(#(x[0-9a-fA-F]+)|#([0-9]+)|\w+);/$self->AttReference($1, $reader)/geo; + + return $value; +} + +sub Comment { + my ($self, $reader) = @_; + + my $data = $reader->data(4); + if ($data =~ /^/s) { + $comment_str .= $1; + $self->parser_error("Invalid comment (dash)", $reader) if $comment_str =~ /-$/; + $reader->move_along(length($1) + 3); + last; + } + else { + $comment_str .= $data; + $reader->move_along(length($data)); + } + } + + $self->comment({ Data => $comment_str }); + + return 1; + } + return 0; +} + +sub PI { + my ($self, $reader) = @_; + + my $data = $reader->data(2); + + if ($data =~ /^<\?/) { + $reader->move_along(2); + my ($target); + $target = $self->Name($reader) || + $self->parser_error("PI has no target", $reader); + + my $pi_data = ''; + if ($self->skip_whitespace($reader)) { + while (1) { + my $data = $reader->data; + $self->parser_error("End of data seen while looking for close PI marker", $reader) + unless length($data); + if ($data =~ /^(.*?)\?>/s) { + $pi_data .= $1; + $reader->move_along(length($1) + 2); + last; + } + else { + $pi_data .= $data; + $reader->move_along(length($data)); + } + } + } + else { + my $data = $reader->data(2); + $data =~ /^\?>/ or $self->parser_error("PI closing sequence not found", $reader); + $reader->move_along(2); + } + + $self->processing_instruction({ Target => $target, Data => $pi_data }); + + return 1; + } + return 0; +} + +sub Name { + my ($self, $reader) = @_; + + my $name = ''; + while(1) { + my $data = $reader->data; + return unless length($data); + $data =~ /^([^\s>\/&\?;=<\)\(\[\],\%\#\!\*\|]*)/ or return; + $name .= $1; + my $len = length($1); + $reader->move_along($len); + last if ($len != length($data)); + } + + return unless length($name); + + $name =~ /$NameChar/o or $self->parser_error("Name <$name> does not match NameChar production", $reader); + + return $name; +} + +sub quote { + my ($self, $reader) = @_; + + my $data = $reader->data; + + $data =~ /^(['"])/ or $self->parser_error("Invalid quote token", $reader); + $reader->move_along(1); + return $1; +} + +1; +__END__ + +=head1 NAME + +XML::SAX::PurePerl - Pure Perl XML Parser with SAX2 interface + +=head1 SYNOPSIS + + use XML::Handler::Foo; + use XML::SAX::PurePerl; + my $handler = XML::Handler::Foo->new(); + my $parser = XML::SAX::PurePerl->new(Handler => $handler); + $parser->parse_uri("myfile.xml"); + +=head1 DESCRIPTION + +This module implements an XML parser in pure perl. It is written around the +upcoming perl 5.8's unicode support and support for multiple document +encodings (using the PerlIO layer), however it has been ported to work with +ASCII/UTF8 documents under lower perl versions. + +The SAX2 API is described in detail at http://sourceforge.net/projects/perl-xml/, in +the CVS archive, under libxml-perl/docs. Hopefully those documents will be in a +better location soon. + +Please refer to the SAX2 documentation for how to use this module - it is merely a +front end to SAX2, and implements nothing that is not in that spec (or at least tries +not to - please email me if you find errors in this implementation). + +=head1 BUGS + +XML::SAX::PurePerl is B. Very slow. I suggest you use something else +in fact. However it is great as a fallback parser for XML::SAX, where the +user might not be able to install an XS based parser or C library. + +Currently lots, probably. At the moment the weakest area is parsing DOCTYPE declarations, +though the code is in place to start doing this. Also parsing parameter entity +references is causing me much confusion, since it's not exactly what I would call +trivial, or well documented in the XML grammar. XML documents with internal subsets +are likely to fail. + +I am however trying to work towards full conformance using the Oasis test suite. + +=head1 AUTHOR + +Matt Sergeant, matt@sergeant.org. Copyright 2001. + +Please report all bugs to the Perl-XML mailing list at perl-xml@listserv.activestate.com. + +=head1 LICENSE + +This is free software. You may use it or redistribute it under the same terms as +Perl 5.7.2 itself. + +=cut + diff --git a/tests/lib/XML/SAX/PurePerl/DTDDecls.pm b/tests/lib/XML/SAX/PurePerl/DTDDecls.pm new file mode 100644 index 0000000000..eaa782548c --- /dev/null +++ b/tests/lib/XML/SAX/PurePerl/DTDDecls.pm @@ -0,0 +1,603 @@ +# $Id$ + +package XML::SAX::PurePerl; + +use strict; +use XML::SAX::PurePerl::Productions qw($SingleChar); + +sub elementdecl { + my ($self, $reader) = @_; + + my $data = $reader->data(9); + return 0 unless $data =~ /^move_along(9); + + $self->skip_whitespace($reader) || + $self->parser_error("No whitespace after ELEMENT declaration", $reader); + + my $name = $self->Name($reader); + + $self->skip_whitespace($reader) || + $self->parser_error("No whitespace after ELEMENT's name", $reader); + + $self->contentspec($reader, $name); + + $self->skip_whitespace($reader); + + $reader->match('>') or $self->parser_error("Closing angle bracket not found on ELEMENT declaration", $reader); + + return 1; +} + +sub contentspec { + my ($self, $reader, $name) = @_; + + my $data = $reader->data(5); + + my $model; + if ($data =~ /^EMPTY/) { + $reader->move_along(5); + $model = 'EMPTY'; + } + elsif ($data =~ /^ANY/) { + $reader->move_along(3); + $model = 'ANY'; + } + else { + $model = $self->Mixed_or_children($reader); + } + + if ($model) { + # call SAX callback now. + $self->element_decl({Name => $name, Model => $model}); + return 1; + } + + $self->parser_error("contentspec not found in ELEMENT declaration", $reader); +} + +sub Mixed_or_children { + my ($self, $reader) = @_; + + my $data = $reader->data(8); + $data =~ /^\(/ or return; # $self->parser_error("No opening bracket in Mixed or children", $reader); + + if ($data =~ /^\(\s*\#PCDATA/) { + $reader->match('('); + $self->skip_whitespace($reader); + $reader->move_along(7); + my $model = $self->Mixed($reader); + return $model; + } + + # not matched - must be Children + return $self->children($reader); +} + +# Mixed ::= ( '(' S* PCDATA ( S* '|' S* QName )* S* ')' '*' ) +# | ( '(' S* PCDATA S* ')' ) +sub Mixed { + my ($self, $reader) = @_; + + # Mixed_or_children already matched '(' S* '#PCDATA' + + my $model = '(#PCDATA'; + + $self->skip_whitespace($reader); + + my %seen; + + while (1) { + last unless $reader->match('|'); + $self->skip_whitespace($reader); + + my $name = $self->Name($reader) || + $self->parser_error("No 'Name' after Mixed content '|'", $reader); + + if ($seen{$name}) { + $self->parser_error("Element '$name' has already appeared in this group", $reader); + } + $seen{$name}++; + + $model .= "|$name"; + + $self->skip_whitespace($reader); + } + + $reader->match(')') || $self->parser_error("no closing bracket on mixed content", $reader); + + $model .= ")"; + + if ($reader->match('*')) { + $model .= "*"; + } + + return $model; +} + +# [[47]] Children ::= ChoiceOrSeq Cardinality? +# [[48]] Cp ::= ( QName | ChoiceOrSeq ) Cardinality? +# ChoiceOrSeq ::= '(' S* Cp ( Choice | Seq )? S* ')' +# [[49]] Choice ::= ( S* '|' S* Cp )+ +# [[50]] Seq ::= ( S* ',' S* Cp )+ +# // Children ::= (Choice | Seq) Cardinality? +# // Cp ::= ( QName | Choice | Seq) Cardinality? +# // Choice ::= '(' S* Cp ( S* '|' S* Cp )+ S* ')' +# // Seq ::= '(' S* Cp ( S* ',' S* Cp )* S* ')' +# [[51]] Mixed ::= ( '(' S* PCDATA ( S* '|' S* QName )* S* ')' MixedCardinality ) +# | ( '(' S* PCDATA S* ')' ) +# Cardinality ::= '?' | '+' | '*' +# MixedCardinality ::= '*' +sub children { + my ($self, $reader) = @_; + + return $self->ChoiceOrSeq($reader) . $self->Cardinality($reader); +} + +sub ChoiceOrSeq { + my ($self, $reader) = @_; + + $reader->match('(') or $self->parser_error("choice/seq contains no opening bracket", $reader); + + my $model = '('; + + $self->skip_whitespace($reader); + + $model .= $self->Cp($reader); + + if (my $choice = $self->Choice($reader)) { + $model .= $choice; + } + else { + $model .= $self->Seq($reader); + } + + $self->skip_whitespace($reader); + + $reader->match(')') or $self->parser_error("choice/seq contains no closing bracket", $reader); + + $model .= ')'; + + return $model; +} + +sub Cardinality { + my ($self, $reader) = @_; + # cardinality is always optional + my $data = $reader->data; + if ($data =~ /^([\?\+\*])/) { + $reader->move_along(1); + return $1; + } + return ''; +} + +sub Cp { + my ($self, $reader) = @_; + + my $model; + my $name = eval + { + if (my $name = $self->Name($reader)) { + return $name . $self->Cardinality($reader); + } + }; + return $name if defined $name; + return $self->ChoiceOrSeq($reader) . $self->Cardinality($reader); +} + +sub Choice { + my ($self, $reader) = @_; + + my $model = ''; + $self->skip_whitespace($reader); + + while ($reader->match('|')) { + $self->skip_whitespace($reader); + $model .= '|'; + $model .= $self->Cp($reader); + $self->skip_whitespace($reader); + } + + return $model; +} + +sub Seq { + my ($self, $reader) = @_; + + my $model = ''; + $self->skip_whitespace($reader); + + while ($reader->match(',')) { + $self->skip_whitespace($reader); + my $cp = $self->Cp($reader); + if ($cp) { + $model .= ','; + $model .= $cp; + } + $self->skip_whitespace($reader); + } + + return $model; +} + +sub AttlistDecl { + my ($self, $reader) = @_; + + my $data = $reader->data(9); + if ($data =~ /^move_along(9); + + $self->skip_whitespace($reader) || + $self->parser_error("No whitespace after ATTLIST declaration", $reader); + my $name = $self->Name($reader); + + $self->AttDefList($reader, $name); + + $self->skip_whitespace($reader); + + $reader->match('>') or $self->parser_error("Closing angle bracket not found on ATTLIST declaration", $reader); + + return 1; + } + + return 0; +} + +sub AttDefList { + my ($self, $reader, $name) = @_; + + 1 while $self->AttDef($reader, $name); +} + +sub AttDef { + my ($self, $reader, $el_name) = @_; + + $self->skip_whitespace($reader) || return 0; + my $att_name = $self->Name($reader) || return 0; + $self->skip_whitespace($reader) || + $self->parser_error("No whitespace after Name in attribute definition", $reader); + my $att_type = $self->AttType($reader); + + $self->skip_whitespace($reader) || + $self->parser_error("No whitespace after AttType in attribute definition", $reader); + my ($mode, $value) = $self->DefaultDecl($reader); + + # fire SAX event here! + $self->attribute_decl({ + eName => $el_name, + aName => $att_name, + Type => $att_type, + Mode => $mode, + Value => $value, + }); + return 1; +} + +sub AttType { + my ($self, $reader) = @_; + + return $self->StringType($reader) || + $self->TokenizedType($reader) || + $self->EnumeratedType($reader) || + $self->parser_error("Can't match AttType", $reader); +} + +sub StringType { + my ($self, $reader) = @_; + + my $data = $reader->data(5); + return unless $data =~ /^CDATA/; + $reader->move_along(5); + return 'CDATA'; +} + +sub TokenizedType { + my ($self, $reader) = @_; + + my $data = $reader->data(8); + if ($data =~ /^(IDREFS?|ID|ENTITIES|ENTITY|NMTOKENS?)/) { + $reader->move_along(length($1)); + return $1; + } + return; +} + +sub EnumeratedType { + my ($self, $reader) = @_; + return $self->NotationType($reader) || $self->Enumeration($reader); +} + +sub NotationType { + my ($self, $reader) = @_; + + my $data = $reader->data(8); + return unless $data =~ /^NOTATION/; + $reader->move_along(8); + + $self->skip_whitespace($reader) || + $self->parser_error("No whitespace after NOTATION", $reader); + $reader->match('(') or $self->parser_error("No opening bracket in notation section", $reader); + + $self->skip_whitespace($reader); + my $model = 'NOTATION ('; + my $name = $self->Name($reader) || + $self->parser_error("No name in notation section", $reader); + $model .= $name; + $self->skip_whitespace($reader); + $data = $reader->data; + while ($data =~ /^\|/) { + $reader->move_along(1); + $model .= '|'; + $self->skip_whitespace($reader); + my $name = $self->Name($reader) || + $self->parser_error("No name in notation section", $reader); + $model .= $name; + $self->skip_whitespace($reader); + $data = $reader->data; + } + $data =~ /^\)/ or $self->parser_error("No closing bracket in notation section", $reader); + $reader->move_along(1); + + $model .= ')'; + + return $model; +} + +sub Enumeration { + my ($self, $reader) = @_; + + return unless $reader->match('('); + + $self->skip_whitespace($reader); + my $model = '('; + my $nmtoken = $self->Nmtoken($reader) || + $self->parser_error("No Nmtoken in enumerated declaration", $reader); + $model .= $nmtoken; + $self->skip_whitespace($reader); + my $data = $reader->data; + while ($data =~ /^\|/) { + $model .= '|'; + $reader->move_along(1); + $self->skip_whitespace($reader); + my $nmtoken = $self->Nmtoken($reader) || + $self->parser_error("No Nmtoken in enumerated declaration", $reader); + $model .= $nmtoken; + $self->skip_whitespace($reader); + $data = $reader->data; + } + $data =~ /^\)/ or $self->parser_error("No closing bracket in enumerated declaration", $reader); + $reader->move_along(1); + + $model .= ')'; + + return $model; +} + +sub Nmtoken { + my ($self, $reader) = @_; + return $self->Name($reader); +} + +sub DefaultDecl { + my ($self, $reader) = @_; + + my $data = $reader->data(9); + if ($data =~ /^(\#REQUIRED|\#IMPLIED)/) { + $reader->move_along(length($1)); + return $1; + } + my $model = ''; + if ($data =~ /^\#FIXED/) { + $reader->move_along(6); + $self->skip_whitespace($reader) || $self->parser_error( + "no whitespace after FIXED specifier", $reader); + my $value = $self->AttValue($reader); + return "#FIXED", $value; + } + my $value = $self->AttValue($reader); + return undef, $value; +} + +sub EntityDecl { + my ($self, $reader) = @_; + + my $data = $reader->data(8); + return 0 unless $data =~ /^move_along(8); + + $self->skip_whitespace($reader) || $self->parser_error( + "No whitespace after ENTITY declaration", $reader); + + $self->PEDecl($reader) || $self->GEDecl($reader); + + $self->skip_whitespace($reader); + + $reader->match('>') or $self->parser_error("No closing '>' in entity definition", $reader); + + return 1; +} + +sub GEDecl { + my ($self, $reader) = @_; + + my $name = $self->Name($reader) || $self->parser_error("No entity name given", $reader); + $self->skip_whitespace($reader) || $self->parser_error("No whitespace after entity name", $reader); + + # TODO: ExternalID calls lexhandler method. Wrong place for it. + my $value; + if ($value = $self->ExternalID($reader)) { + $value .= $self->NDataDecl($reader); + } + else { + $value = $self->EntityValue($reader); + } + + if ($self->{ParseOptions}{entities}{$name}) { + warn("entity $name already exists\n"); + } else { + $self->{ParseOptions}{entities}{$name} = 1; + $self->{ParseOptions}{expanded_entity}{$name} = $value; # ??? + } + # do callback? + return 1; +} + +sub PEDecl { + my ($self, $reader) = @_; + + return 0 unless $reader->match('%'); + + $self->skip_whitespace($reader) || $self->parser_error("No whitespace after parameter entity marker", $reader); + my $name = $self->Name($reader) || $self->parser_error("No parameter entity name given", $reader); + $self->skip_whitespace($reader) || $self->parser_error("No whitespace after parameter entity name", $reader); + my $value = $self->ExternalID($reader) || + $self->EntityValue($reader) || + $self->parser_error("PE is not a value or an external resource", $reader); + # do callback? + return 1; +} + +my $quotre = qr/[^%&\"]/; +my $aposre = qr/[^%&\']/; + +sub EntityValue { + my ($self, $reader) = @_; + + my $data = $reader->data; + my $quote = '"'; + my $re = $quotre; + if ($data !~ /^"/) { + $data =~ /^'/ or $self->parser_error("Not a quote character", $reader); + $quote = "'"; + $re = $aposre; + } + $reader->move_along(1); + + my $value = ''; + + while (1) { + my $data = $reader->data; + + $self->parser_error("EOF found while reading entity value", $reader) + unless length($data); + + if ($data =~ /^($re+)/) { + my $match = $1; + $value .= $match; + $reader->move_along(length($match)); + } + elsif ($reader->match('&')) { + # if it's a char ref, expand now: + if ($reader->match('#')) { + my $char; + my $ref = ''; + if ($reader->match('x')) { + my $data = $reader->data; + while (1) { + $self->parser_error("EOF looking for reference end", $reader) + unless length($data); + if ($data !~ /^([0-9a-fA-F]*)/) { + last; + } + $ref .= $1; + $reader->move_along(length($1)); + if (length($1) == length($data)) { + $data = $reader->data; + } + else { + last; + } + } + $char = chr_ref(hex($ref)); + $ref = "x$ref"; + } + else { + my $data = $reader->data; + while (1) { + $self->parser_error("EOF looking for reference end", $reader) + unless length($data); + if ($data !~ /^([0-9]*)/) { + last; + } + $ref .= $1; + $reader->move_along(length($1)); + if (length($1) == length($data)) { + $data = $reader->data; + } + else { + last; + } + } + $char = chr($ref); + } + $reader->match(';') || + $self->parser_error("No semi-colon found after character reference", $reader); + if ($char !~ $SingleChar) { # match a single character + $self->parser_error("Character reference '&#$ref;' refers to an illegal XML character ($char)", $reader); + } + $value .= $char; + } + else { + # entity refs in entities get expanded later, so don't parse now. + $value .= '&'; + } + } + elsif ($reader->match('%')) { + $value .= $self->PEReference($reader); + } + elsif ($reader->match($quote)) { + # end of attrib + last; + } + else { + $self->parser_error("Invalid character in attribute value: " . substr($reader->data, 0, 1), $reader); + } + } + + return $value; +} + +sub NDataDecl { + my ($self, $reader) = @_; + $self->skip_whitespace($reader) || return ''; + my $data = $reader->data(5); + return '' unless $data =~ /^NDATA/; + $reader->move_along(5); + $self->skip_whitespace($reader) || $self->parser_error("No whitespace after NDATA declaration", $reader); + my $name = $self->Name($reader) || $self->parser_error("NDATA declaration lacks a proper Name", $reader); + return " NDATA $name"; +} + +sub NotationDecl { + my ($self, $reader) = @_; + + my $data = $reader->data(10); + return 0 unless $data =~ /^move_along(10); + $self->skip_whitespace($reader) || + $self->parser_error("No whitespace after NOTATION declaration", $reader); + $data = $reader->data; + my $value = ''; + while(1) { + $self->parser_error("EOF found while looking for end of NotationDecl", $reader) + unless length($data); + + if ($data =~ /^([^>]*)>/) { + $value .= $1; + $reader->move_along(length($1) + 1); + $self->notation_decl({Name => "FIXME", SystemId => "FIXME", PublicId => "FIXME" }); + last; + } + else { + $value .= $data; + $reader->move_along(length($data)); + $data = $reader->data; + } + } + return 1; +} + +1; diff --git a/tests/lib/XML/SAX/PurePerl/DebugHandler.pm b/tests/lib/XML/SAX/PurePerl/DebugHandler.pm new file mode 100644 index 0000000000..b80d060916 --- /dev/null +++ b/tests/lib/XML/SAX/PurePerl/DebugHandler.pm @@ -0,0 +1,95 @@ +# $Id$ + +package XML::SAX::PurePerl::DebugHandler; + +use strict; + +sub new { + my $class = shift; + my %opts = @_; + return bless \%opts, $class; +} + +# DocumentHandler + +sub set_document_locator { + my $self = shift; + print "set_document_locator\n" if $ENV{DEBUG_XML}; + $self->{seen}{set_document_locator}++; +} + +sub start_document { + my $self = shift; + print "start_document\n" if $ENV{DEBUG_XML}; + $self->{seen}{start_document}++; +} + +sub end_document { + my $self = shift; + print "end_document\n" if $ENV{DEBUG_XML}; + $self->{seen}{end_document}++; +} + +sub start_element { + my $self = shift; + print "start_element\n" if $ENV{DEBUG_XML}; + $self->{seen}{start_element}++; +} + +sub end_element { + my $self = shift; + print "end_element\n" if $ENV{DEBUG_XML}; + $self->{seen}{end_element}++; +} + +sub characters { + my $self = shift; + print "characters\n" if $ENV{DEBUG_XML}; +# warn "Char: ", $_[0]->{Data}, "\n"; + $self->{seen}{characters}++; +} + +sub processing_instruction { + my $self = shift; + print "processing_instruction\n" if $ENV{DEBUG_XML}; + $self->{seen}{processing_instruction}++; +} + +sub ignorable_whitespace { + my $self = shift; + print "ignorable_whitespace\n" if $ENV{DEBUG_XML}; + $self->{seen}{ignorable_whitespace}++; +} + +# LexHandler + +sub comment { + my $self = shift; + print "comment\n" if $ENV{DEBUG_XML}; + $self->{seen}{comment}++; +} + +# DTDHandler + +sub notation_decl { + my $self = shift; + print "notation_decl\n" if $ENV{DEBUG_XML}; + $self->{seen}{notation_decl}++; +} + +sub unparsed_entity_decl { + my $self = shift; + print "unparsed_entity_decl\n" if $ENV{DEBUG_XML}; + $self->{seen}{entity_decl}++; +} + +# EntityResolver + +sub resolve_entity { + my $self = shift; + print "resolve_entity\n" if $ENV{DEBUG_XML}; + $self->{seen}{resolve_entity}++; + return ''; +} + +1; diff --git a/tests/lib/XML/SAX/PurePerl/DocType.pm b/tests/lib/XML/SAX/PurePerl/DocType.pm new file mode 100644 index 0000000000..de48572773 --- /dev/null +++ b/tests/lib/XML/SAX/PurePerl/DocType.pm @@ -0,0 +1,180 @@ +# $Id$ + +package XML::SAX::PurePerl; + +use strict; +use XML::SAX::PurePerl::Productions qw($PubidChar); + +sub doctypedecl { + my ($self, $reader) = @_; + + my $data = $reader->data(9); + if ($data =~ /^move_along(9); + $self->skip_whitespace($reader) || + $self->parser_error("No whitespace after doctype declaration", $reader); + + my $root_name = $self->Name($reader) || + $self->parser_error("Doctype declaration has no root element name", $reader); + + if ($self->skip_whitespace($reader)) { + # might be externalid... + my %dtd = $self->ExternalID($reader); + # TODO: Call SAX event + } + + $self->skip_whitespace($reader); + + $self->InternalSubset($reader); + + $reader->match('>') or $self->parser_error("Doctype not closed", $reader); + + return 1; + } + + return 0; +} + +sub ExternalID { + my ($self, $reader) = @_; + + my $data = $reader->data(6); + + if ($data =~ /^SYSTEM/) { + $reader->move_along(6); + $self->skip_whitespace($reader) || + $self->parser_error("No whitespace after SYSTEM identifier", $reader); + return (SYSTEM => $self->SystemLiteral($reader)); + } + elsif ($data =~ /^PUBLIC/) { + $reader->move_along(6); + $self->skip_whitespace($reader) || + $self->parser_error("No whitespace after PUBLIC identifier", $reader); + + my $quote = $self->quote($reader) || + $self->parser_error("Not a quote character in PUBLIC identifier", $reader); + + my $data = $reader->data; + my $pubid = ''; + while(1) { + $self->parser_error("EOF while looking for end of PUBLIC identifiier", $reader) + unless length($data); + + if ($data =~ /^([^$quote]*)$quote/) { + $pubid .= $1; + $reader->move_along(length($1) + 1); + last; + } + else { + $pubid .= $data; + $reader->move_along(length($data)); + $data = $reader->data; + } + } + + if ($pubid !~ /^($PubidChar)+$/) { + $self->parser_error("Invalid characters in PUBLIC identifier", $reader); + } + + $self->skip_whitespace($reader) || + $self->parser_error("Not whitespace after PUBLIC ID in DOCTYPE", $reader); + + return (PUBLIC => $pubid, + SYSTEM => $self->SystemLiteral($reader)); + } + else { + return; + } + + return 1; +} + +sub SystemLiteral { + my ($self, $reader) = @_; + + my $quote = $self->quote($reader); + + my $data = $reader->data; + my $systemid = ''; + while (1) { + $self->parser_error("EOF found while looking for end of Sytem Literal", $reader) + unless length($data); + if ($data =~ /^([^$quote]*)$quote/) { + $systemid .= $1; + $reader->move_along(length($1) + 1); + return $systemid; + } + else { + $systemid .= $data; + $reader->move_along(length($data)); + $data = $reader->data; + } + } +} + +sub InternalSubset { + my ($self, $reader) = @_; + + return 0 unless $reader->match('['); + + 1 while $self->IntSubsetDecl($reader); + + $reader->match(']') or $self->parser_error("No close bracket on internal subset (found: " . $reader->data, $reader); + $self->skip_whitespace($reader); + return 1; +} + +sub IntSubsetDecl { + my ($self, $reader) = @_; + + return $self->DeclSep($reader) || $self->markupdecl($reader); +} + +sub DeclSep { + my ($self, $reader) = @_; + + if ($self->skip_whitespace($reader)) { + return 1; + } + + if ($self->PEReference($reader)) { + return 1; + } + +# if ($self->ParsedExtSubset($reader)) { +# return 1; +# } + + return 0; +} + +sub PEReference { + my ($self, $reader) = @_; + + return 0 unless $reader->match('%'); + + my $peref = $self->Name($reader) || + $self->parser_error("PEReference did not find a Name", $reader); + # TODO - load/parse the peref + + $reader->match(';') or $self->parser_error("Invalid token in PEReference", $reader); + return 1; +} + +sub markupdecl { + my ($self, $reader) = @_; + + if ($self->elementdecl($reader) || + $self->AttlistDecl($reader) || + $self->EntityDecl($reader) || + $self->NotationDecl($reader) || + $self->PI($reader) || + $self->Comment($reader)) + { + return 1; + } + + return 0; +} + +1; diff --git a/tests/lib/XML/SAX/PurePerl/EncodingDetect.pm b/tests/lib/XML/SAX/PurePerl/EncodingDetect.pm new file mode 100644 index 0000000000..7ab4548ab7 --- /dev/null +++ b/tests/lib/XML/SAX/PurePerl/EncodingDetect.pm @@ -0,0 +1,105 @@ +# $Id$ + +package XML::SAX::PurePerl; # NB, not ::EncodingDetect! + +use strict; + +sub encoding_detect { + my ($parser, $reader) = @_; + + my $error = "Invalid byte sequence at start of file"; + + my $data = $reader->data; + if ($data =~ /^\x00\x00\xFE\xFF/) { + # BO-UCS4-be + $reader->move_along(4); + $reader->set_encoding('UCS-4BE'); + return; + } + elsif ($data =~ /^\x00\x00\xFF\xFE/) { + # BO-UCS-4-2143 + $reader->move_along(4); + $reader->set_encoding('UCS-4-2143'); + return; + } + elsif ($data =~ /^\x00\x00\x00\x3C/) { + $reader->set_encoding('UCS-4BE'); + return; + } + elsif ($data =~ /^\x00\x00\x3C\x00/) { + $reader->set_encoding('UCS-4-2143'); + return; + } + elsif ($data =~ /^\x00\x3C\x00\x00/) { + $reader->set_encoding('UCS-4-3412'); + return; + } + elsif ($data =~ /^\x00\x3C\x00\x3F/) { + $reader->set_encoding('UTF-16BE'); + return; + } + elsif ($data =~ /^\xFF\xFE\x00\x00/) { + # BO-UCS-4LE + $reader->move_along(4); + $reader->set_encoding('UCS-4LE'); + return; + } + elsif ($data =~ /^\xFF\xFE/) { + $reader->move_along(2); + $reader->set_encoding('UTF-16LE'); + return; + } + elsif ($data =~ /^\xFE\xFF\x00\x00/) { + $reader->move_along(4); + $reader->set_encoding('UCS-4-3412'); + return; + } + elsif ($data =~ /^\xFE\xFF/) { + $reader->move_along(2); + $reader->set_encoding('UTF-16BE'); + return; + } + elsif ($data =~ /^\xEF\xBB\xBF/) { # UTF-8 BOM + $reader->move_along(3); + $reader->set_encoding('UTF-8'); + return; + } + elsif ($data =~ /^\x3C\x00\x00\x00/) { + $reader->set_encoding('UCS-4LE'); + return; + } + elsif ($data =~ /^\x3C\x00\x3F\x00/) { + $reader->set_encoding('UTF-16LE'); + return; + } + elsif ($data =~ /^\x3C\x3F\x78\x6D/) { + # $reader->set_encoding('UTF-8'); + return; + } + elsif ($data =~ /^\x3C\x3F\x78/) { + # $reader->set_encoding('UTF-8'); + return; + } + elsif ($data =~ /^\x3C\x3F/) { + # $reader->set_encoding('UTF-8'); + return; + } + elsif ($data =~ /^\x3C/) { + # $reader->set_encoding('UTF-8'); + return; + } + elsif ($data =~ /^[\x20\x09\x0A\x0D]+\x3C[^\x3F]/) { + # $reader->set_encoding('UTF-8'); + return; + } + elsif ($data =~ /^\x4C\x6F\xA7\x94/) { + $reader->set_encoding('EBCDIC'); + return; + } + + warn("Unable to recognise encoding of this document"); + return; +} + +1; + diff --git a/tests/lib/XML/SAX/PurePerl/Exception.pm b/tests/lib/XML/SAX/PurePerl/Exception.pm new file mode 100644 index 0000000000..1ade99bb96 --- /dev/null +++ b/tests/lib/XML/SAX/PurePerl/Exception.pm @@ -0,0 +1,67 @@ +# $Id$ + +package XML::SAX::PurePerl::Exception; + +use strict; + +use overload '""' => "stringify"; + +use vars qw/$StackTrace/; + +$StackTrace = $ENV{XML_DEBUG} || 0; + +sub throw { + my $class = shift; + die $class->new(@_); +} + +sub new { + my $class = shift; + my %opts = @_; + die "Invalid options" unless exists $opts{Message}; + + if ($opts{reader}) { + return bless { Message => $opts{Message}, + Exception => undef, # not sure what this is for!!! + ColumnNumber => $opts{reader}->column, + LineNumber => $opts{reader}->line, + PublicId => $opts{reader}->public_id, + SystemId => $opts{reader}->system_id, + $StackTrace ? (StackTrace => stacktrace()) : (), + }, $class; + } + return bless { Message => $opts{Message}, + Exception => undef, # not sure what this is for!!! + }, $class; +} + +sub stringify { + my $self = shift; + local $^W; + return $self->{Message} . " [Ln: " . $self->{LineNumber} . + ", Col: " . $self->{ColumnNumber} . "]" . + ($StackTrace ? stackstring($self->{StackTrace}) : "") . "\n"; +} + +sub stacktrace { + my $i = 2; + my @fulltrace; + while (my @trace = caller($i++)) { + my %hash; + @hash{qw(Package Filename Line)} = @trace[0..2]; + push @fulltrace, \%hash; + } + return \@fulltrace; +} + +sub stackstring { + my $stacktrace = shift; + my $string = "\nFrom:\n"; + foreach my $current (@$stacktrace) { + $string .= $current->{Filename} . " Line: " . $current->{Line} . "\n"; + } + return $string; +} + +1; + diff --git a/tests/lib/XML/SAX/PurePerl/NoUnicodeExt.pm b/tests/lib/XML/SAX/PurePerl/NoUnicodeExt.pm new file mode 100644 index 0000000000..70d20712aa --- /dev/null +++ b/tests/lib/XML/SAX/PurePerl/NoUnicodeExt.pm @@ -0,0 +1,28 @@ +# $Id$ + +package XML::SAX::PurePerl; +use strict; + +sub chr_ref { + my $n = shift; + if ($n < 0x80) { + return chr ($n); + } + elsif ($n < 0x800) { + return pack ("CC", (($n >> 6) | 0xc0), (($n & 0x3f) | 0x80)); + } + elsif ($n < 0x10000) { + return pack ("CCC", (($n >> 12) | 0xe0), ((($n >> 6) & 0x3f) | 0x80), + (($n & 0x3f) | 0x80)); + } + elsif ($n < 0x110000) + { + return pack ("CCCC", (($n >> 18) | 0xf0), ((($n >> 12) & 0x3f) | 0x80), + ((($n >> 6) & 0x3f) | 0x80), (($n & 0x3f) | 0x80)); + } + else { + return undef; + } +} + +1; diff --git a/tests/lib/XML/SAX/PurePerl/Productions.pm b/tests/lib/XML/SAX/PurePerl/Productions.pm new file mode 100644 index 0000000000..25d49fbfce --- /dev/null +++ b/tests/lib/XML/SAX/PurePerl/Productions.pm @@ -0,0 +1,147 @@ +# $Id$ + +package XML::SAX::PurePerl::Productions; + +use Exporter; +@ISA = ('Exporter'); +@EXPORT_OK = qw($S $Char $VersionNum $BaseChar $Ideographic + $Extender $Digit $CombiningChar $EncNameStart $EncNameEnd $NameChar $CharMinusDash + $PubidChar $Any $SingleChar); + +### WARNING!!! All productions here must *only* match a *single* character!!! ### + +BEGIN { +$S = qr/[\x20\x09\x0D\x0A]/; + +$CharMinusDash = qr/[^-]/x; + +$Any = qr/ . /xms; + +$VersionNum = qr/ [a-zA-Z0-9_.:-]+ /x; + +$EncNameStart = qr/ [A-Za-z] /x; +$EncNameEnd = qr/ [A-Za-z0-9\._-] /x; + +$PubidChar = qr/ [\x20\x0D\x0Aa-zA-Z0-9'()\+,.\/:=\?;!*\#@\$_\%-] /x; + +if ($] < 5.006) { + eval <<' PERL'; + $Char = qr/^ [\x09\x0A\x0D\x20-\x7F]|([\xC0-\xFD][\x80-\xBF]+) $/x; + + $SingleChar = qr/^$Char$/; + + $BaseChar = qr/ [\x41-\x5A\x61-\x7A]|([\xC0-\xFD][\x80-\xBF]+) /x; + + $Extender = qr/ \xB7 /x; + + $Digit = qr/ [\x30-\x39] /x; + + # can't do this one without unicode + # $CombiningChar = qr/^$/msx; + + $NameChar = qr/^ (?: $BaseChar | $Digit | [._:-] | $Extender )+ $/x; + PERL + die $@ if $@; +} +else { + eval <<' PERL'; + + use utf8; # for 5.6 + + $Char = qr/^ [\x09\x0A\x0D\x{0020}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}] $/x; + + $SingleChar = qr/^$Char$/; + + $BaseChar = qr/ +[\x{0041}-\x{005A}\x{0061}-\x{007A}\x{00C0}-\x{00D6}\x{00D8}-\x{00F6}] | +[\x{00F8}-\x{00FF}\x{0100}-\x{0131}\x{0134}-\x{013E}\x{0141}-\x{0148}] | +[\x{014A}-\x{017E}\x{0180}-\x{01C3}\x{01CD}-\x{01F0}\x{01F4}-\x{01F5}] | +[\x{01FA}-\x{0217}\x{0250}-\x{02A8}\x{02BB}-\x{02C1}\x{0386}\x{0388}-\x{038A}] | +[\x{038C}\x{038E}-\x{03A1}\x{03A3}-\x{03CE}\x{03D0}-\x{03D6}\x{03DA}] | +[\x{03DC}\x{03DE}\x{03E0}\x{03E2}-\x{03F3}\x{0401}-\x{040C}\x{040E}-\x{044F}] | +[\x{0451}-\x{045C}\x{045E}-\x{0481}\x{0490}-\x{04C4}\x{04C7}-\x{04C8}] | +[\x{04CB}-\x{04CC}\x{04D0}-\x{04EB}\x{04EE}-\x{04F5}\x{04F8}-\x{04F9}] | +[\x{0531}-\x{0556}\x{0559}\x{0561}-\x{0586}\x{05D0}-\x{05EA}\x{05F0}-\x{05F2}] | +[\x{0621}-\x{063A}\x{0641}-\x{064A}\x{0671}-\x{06B7}\x{06BA}-\x{06BE}] | +[\x{06C0}-\x{06CE}\x{06D0}-\x{06D3}\x{06D5}\x{06E5}-\x{06E6}\x{0905}-\x{0939}] | +[\x{093D}\x{0958}-\x{0961}\x{0985}-\x{098C}\x{098F}-\x{0990}] | +[\x{0993}-\x{09A8}\x{09AA}-\x{09B0}\x{09B2}\x{09B6}-\x{09B9}\x{09DC}-\x{09DD}] | +[\x{09DF}-\x{09E1}\x{09F0}-\x{09F1}\x{0A05}-\x{0A0A}\x{0A0F}-\x{0A10}] | +[\x{0A13}-\x{0A28}\x{0A2A}-\x{0A30}\x{0A32}-\x{0A33}\x{0A35}-\x{0A36}] | +[\x{0A38}-\x{0A39}\x{0A59}-\x{0A5C}\x{0A5E}\x{0A72}-\x{0A74}\x{0A85}-\x{0A8B}] | +[\x{0A8D}\x{0A8F}-\x{0A91}\x{0A93}-\x{0AA8}\x{0AAA}-\x{0AB0}] | +[\x{0AB2}-\x{0AB3}\x{0AB5}-\x{0AB9}\x{0ABD}\x{0AE0}\x{0B05}-\x{0B0C}] | +[\x{0B0F}-\x{0B10}\x{0B13}-\x{0B28}\x{0B2A}-\x{0B30}\x{0B32}-\x{0B33}] | +[\x{0B36}-\x{0B39}\x{0B3D}\x{0B5C}-\x{0B5D}\x{0B5F}-\x{0B61}\x{0B85}-\x{0B8A}] | +[\x{0B8E}-\x{0B90}\x{0B92}-\x{0B95}\x{0B99}-\x{0B9A}\x{0B9C}] | +[\x{0B9E}-\x{0B9F}\x{0BA3}-\x{0BA4}\x{0BA8}-\x{0BAA}\x{0BAE}-\x{0BB5}] | +[\x{0BB7}-\x{0BB9}\x{0C05}-\x{0C0C}\x{0C0E}-\x{0C10}\x{0C12}-\x{0C28}] | +[\x{0C2A}-\x{0C33}\x{0C35}-\x{0C39}\x{0C60}-\x{0C61}\x{0C85}-\x{0C8C}] | +[\x{0C8E}-\x{0C90}\x{0C92}-\x{0CA8}\x{0CAA}-\x{0CB3}\x{0CB5}-\x{0CB9}\x{0CDE}] | +[\x{0CE0}-\x{0CE1}\x{0D05}-\x{0D0C}\x{0D0E}-\x{0D10}\x{0D12}-\x{0D28}] | +[\x{0D2A}-\x{0D39}\x{0D60}-\x{0D61}\x{0E01}-\x{0E2E}\x{0E30}\x{0E32}-\x{0E33}] | +[\x{0E40}-\x{0E45}\x{0E81}-\x{0E82}\x{0E84}\x{0E87}-\x{0E88}\x{0E8A}] | +[\x{0E8D}\x{0E94}-\x{0E97}\x{0E99}-\x{0E9F}\x{0EA1}-\x{0EA3}\x{0EA5}\x{0EA7}] | +[\x{0EAA}-\x{0EAB}\x{0EAD}-\x{0EAE}\x{0EB0}\x{0EB2}-\x{0EB3}\x{0EBD}] | +[\x{0EC0}-\x{0EC4}\x{0F40}-\x{0F47}\x{0F49}-\x{0F69}\x{10A0}-\x{10C5}] | +[\x{10D0}-\x{10F6}\x{1100}\x{1102}-\x{1103}\x{1105}-\x{1107}\x{1109}] | +[\x{110B}-\x{110C}\x{110E}-\x{1112}\x{113C}\x{113E}\x{1140}\x{114C}\x{114E}] | +[\x{1150}\x{1154}-\x{1155}\x{1159}\x{115F}-\x{1161}\x{1163}\x{1165}] | +[\x{1167}\x{1169}\x{116D}-\x{116E}\x{1172}-\x{1173}\x{1175}\x{119E}\x{11A8}] | +[\x{11AB}\x{11AE}-\x{11AF}\x{11B7}-\x{11B8}\x{11BA}\x{11BC}-\x{11C2}] | +[\x{11EB}\x{11F0}\x{11F9}\x{1E00}-\x{1E9B}\x{1EA0}-\x{1EF9}\x{1F00}-\x{1F15}] | +[\x{1F18}-\x{1F1D}\x{1F20}-\x{1F45}\x{1F48}-\x{1F4D}\x{1F50}-\x{1F57}] | +[\x{1F59}\x{1F5B}\x{1F5D}\x{1F5F}-\x{1F7D}\x{1F80}-\x{1FB4}\x{1FB6}-\x{1FBC}] | +[\x{1FBE}\x{1FC2}-\x{1FC4}\x{1FC6}-\x{1FCC}\x{1FD0}-\x{1FD3}] | +[\x{1FD6}-\x{1FDB}\x{1FE0}-\x{1FEC}\x{1FF2}-\x{1FF4}\x{1FF6}-\x{1FFC}] | +[\x{2126}\x{212A}-\x{212B}\x{212E}\x{2180}-\x{2182}\x{3041}-\x{3094}] | +[\x{30A1}-\x{30FA}\x{3105}-\x{312C}\x{AC00}-\x{D7A3}] + /x; + + $Extender = qr/ +[\x{00B7}\x{02D0}\x{02D1}\x{0387}\x{0640}\x{0E46}\x{0EC6}\x{3005}\x{3031}-\x{3035}\x{309D}-\x{309E}\x{30FC}-\x{30FE}] +/x; + + $Digit = qr/ +[\x{0030}-\x{0039}\x{0660}-\x{0669}\x{06F0}-\x{06F9}\x{0966}-\x{096F}] | +[\x{09E6}-\x{09EF}\x{0A66}-\x{0A6F}\x{0AE6}-\x{0AEF}\x{0B66}-\x{0B6F}] | +[\x{0BE7}-\x{0BEF}\x{0C66}-\x{0C6F}\x{0CE6}-\x{0CEF}\x{0D66}-\x{0D6F}] | +[\x{0E50}-\x{0E59}\x{0ED0}-\x{0ED9}\x{0F20}-\x{0F29}] +/x; + + $CombiningChar = qr/ +[\x{0300}-\x{0345}\x{0360}-\x{0361}\x{0483}-\x{0486}\x{0591}-\x{05A1}] | +[\x{05A3}-\x{05B9}\x{05BB}-\x{05BD}\x{05BF}\x{05C1}-\x{05C2}\x{05C4}] | +[\x{064B}-\x{0652}\x{0670}\x{06D6}-\x{06DC}\x{06DD}-\x{06DF}\x{06E0}-\x{06E4}] | +[\x{06E7}-\x{06E8}\x{06EA}-\x{06ED}\x{0901}-\x{0903}\x{093C}] | +[\x{093E}-\x{094C}\x{094D}\x{0951}-\x{0954}\x{0962}-\x{0963}\x{0981}-\x{0983}] | +[\x{09BC}\x{09BE}\x{09BF}\x{09C0}-\x{09C4}\x{09C7}-\x{09C8}] | +[\x{09CB}-\x{09CD}\x{09D7}\x{09E2}-\x{09E3}\x{0A02}\x{0A3C}\x{0A3E}\x{0A3F}] | +[\x{0A40}-\x{0A42}\x{0A47}-\x{0A48}\x{0A4B}-\x{0A4D}\x{0A70}-\x{0A71}] | +[\x{0A81}-\x{0A83}\x{0ABC}\x{0ABE}-\x{0AC5}\x{0AC7}-\x{0AC9}\x{0ACB}-\x{0ACD}] | +[\x{0B01}-\x{0B03}\x{0B3C}\x{0B3E}-\x{0B43}\x{0B47}-\x{0B48}] | +[\x{0B4B}-\x{0B4D}\x{0B56}-\x{0B57}\x{0B82}-\x{0B83}\x{0BBE}-\x{0BC2}] | +[\x{0BC6}-\x{0BC8}\x{0BCA}-\x{0BCD}\x{0BD7}\x{0C01}-\x{0C03}\x{0C3E}-\x{0C44}] | +[\x{0C46}-\x{0C48}\x{0C4A}-\x{0C4D}\x{0C55}-\x{0C56}\x{0C82}-\x{0C83}] | +[\x{0CBE}-\x{0CC4}\x{0CC6}-\x{0CC8}\x{0CCA}-\x{0CCD}\x{0CD5}-\x{0CD6}] | +[\x{0D02}-\x{0D03}\x{0D3E}-\x{0D43}\x{0D46}-\x{0D48}\x{0D4A}-\x{0D4D}\x{0D57}] | +[\x{0E31}\x{0E34}-\x{0E3A}\x{0E47}-\x{0E4E}\x{0EB1}\x{0EB4}-\x{0EB9}] | +[\x{0EBB}-\x{0EBC}\x{0EC8}-\x{0ECD}\x{0F18}-\x{0F19}\x{0F35}\x{0F37}\x{0F39}] | +[\x{0F3E}\x{0F3F}\x{0F71}-\x{0F84}\x{0F86}-\x{0F8B}\x{0F90}-\x{0F95}] | +[\x{0F97}\x{0F99}-\x{0FAD}\x{0FB1}-\x{0FB7}\x{0FB9}\x{20D0}-\x{20DC}\x{20E1}] | +[\x{302A}-\x{302F}\x{3099}\x{309A}] +/x; + + $Ideographic = qr/ +[\x{4E00}-\x{9FA5}\x{3007}\x{3021}-\x{3029}] +/x; + + $NameChar = qr/^ (?: $BaseChar | $Ideographic | $Digit | [._:-] | $CombiningChar | $Extender )+ $/x; + PERL + + die $@ if $@; +} + +} + +1; diff --git a/tests/lib/XML/SAX/PurePerl/Reader.pm b/tests/lib/XML/SAX/PurePerl/Reader.pm new file mode 100644 index 0000000000..cbd72837b1 --- /dev/null +++ b/tests/lib/XML/SAX/PurePerl/Reader.pm @@ -0,0 +1,136 @@ +# $Id$ + +package XML::SAX::PurePerl::Reader; + +use strict; +use XML::SAX::PurePerl::Reader::URI; +use Exporter (); + +use vars qw(@ISA @EXPORT_OK); +@ISA = qw(Exporter); +@EXPORT_OK = qw( + EOF + BUFFER + LINE + COLUMN + ENCODING + XML_VERSION +); + +use constant EOF => 0; +use constant BUFFER => 1; +use constant LINE => 2; +use constant COLUMN => 3; +use constant ENCODING => 4; +use constant SYSTEM_ID => 5; +use constant PUBLIC_ID => 6; +use constant XML_VERSION => 7; + +require XML::SAX::PurePerl::Reader::Stream; +require XML::SAX::PurePerl::Reader::String; + +if ($] >= 5.007002) { + require XML::SAX::PurePerl::Reader::UnicodeExt; +} +else { + require XML::SAX::PurePerl::Reader::NoUnicodeExt; +} + +sub new { + my $class = shift; + my $thing = shift; + + # try to figure if this $thing is a handle of some sort + if (ref($thing) && UNIVERSAL::isa($thing, 'IO::Handle')) { + return XML::SAX::PurePerl::Reader::Stream->new($thing)->init; + } + my $ioref; + if (tied($thing)) { + my $class = ref($thing); + no strict 'refs'; + $ioref = $thing if defined &{"${class}::TIEHANDLE"}; + } + else { + eval { + $ioref = *{$thing}{IO}; + }; + undef $@; + } + if ($ioref) { + return XML::SAX::PurePerl::Reader::Stream->new($thing)->init; + } + + if ($thing =~ /new($thing)->init; + } + + # assume it is a uri + return XML::SAX::PurePerl::Reader::URI->new($thing)->init; +} + +sub init { + my $self = shift; + $self->[LINE] = 1; + $self->[COLUMN] = 1; + $self->read_more; + return $self; +} + +sub data { + my ($self, $min_length) = (@_, 1); + if (length($self->[BUFFER]) < $min_length) { + $self->read_more; + } + return $self->[BUFFER]; +} + +sub match { + my ($self, $char) = @_; + my $data = $self->data; + if (substr($data, 0, 1) eq $char) { + $self->move_along(1); + return 1; + } + return 0; +} + +sub public_id { + my $self = shift; + @_ and $self->[PUBLIC_ID] = shift; + $self->[PUBLIC_ID]; +} + +sub system_id { + my $self = shift; + @_ and $self->[SYSTEM_ID] = shift; + $self->[SYSTEM_ID]; +} + +sub line { + shift->[LINE]; +} + +sub column { + shift->[COLUMN]; +} + +sub get_encoding { + my $self = shift; + return $self->[ENCODING]; +} + +sub get_xml_version { + my $self = shift; + return $self->[XML_VERSION]; +} + +1; + +__END__ + +=head1 NAME + +XML::Parser::PurePerl::Reader - Abstract Reader factory class + +=cut diff --git a/tests/lib/XML/SAX/PurePerl/Reader/NoUnicodeExt.pm b/tests/lib/XML/SAX/PurePerl/Reader/NoUnicodeExt.pm new file mode 100644 index 0000000000..1d27416327 --- /dev/null +++ b/tests/lib/XML/SAX/PurePerl/Reader/NoUnicodeExt.pm @@ -0,0 +1,25 @@ +# $Id$ + +package XML::SAX::PurePerl::Reader; +use strict; + +sub set_raw_stream { + # no-op +} + +sub switch_encoding_stream { + my ($fh, $encoding) = @_; + throw XML::SAX::Exception::Parse ( + Message => "Only ASCII encoding allowed without perl 5.7.2 or higher. You tried: $encoding", + ) if $encoding !~ /(ASCII|UTF\-?8)/i; +} + +sub switch_encoding_string { + my (undef, $encoding) = @_; + throw XML::SAX::Exception::Parse ( + Message => "Only ASCII encoding allowed without perl 5.7.2 or higher. You tried: $encoding", + ) if $encoding !~ /(ASCII|UTF\-?8)/i; +} + +1; + diff --git a/tests/lib/XML/SAX/PurePerl/Reader/Stream.pm b/tests/lib/XML/SAX/PurePerl/Reader/Stream.pm new file mode 100644 index 0000000000..b2b04568c1 --- /dev/null +++ b/tests/lib/XML/SAX/PurePerl/Reader/Stream.pm @@ -0,0 +1,84 @@ +# $Id$ + +package XML::SAX::PurePerl::Reader::Stream; + +use strict; +use vars qw(@ISA); + +use XML::SAX::PurePerl::Reader qw( + EOF + BUFFER + LINE + COLUMN + ENCODING + XML_VERSION +); +use XML::SAX::Exception; + +@ISA = ('XML::SAX::PurePerl::Reader'); + +# subclassed by adding 1 to last element +use constant FH => 8; +use constant BUFFER_SIZE => 4096; + +sub new { + my $class = shift; + my $ioref = shift; + XML::SAX::PurePerl::Reader::set_raw_stream($ioref); + my @parts; + @parts[FH, LINE, COLUMN, BUFFER, EOF, XML_VERSION] = + ($ioref, 1, 0, '', 0, '1.0'); + return bless \@parts, $class; +} + +sub read_more { + my $self = shift; + my $buf; + my $bytesread = read($self->[FH], $buf, BUFFER_SIZE); + if ($bytesread) { + $self->[BUFFER] .= $buf; + return 1; + } + elsif (defined($bytesread)) { + $self->[EOF]++; + return 0; + } + else { + throw XML::SAX::Exception::Parse( + Message => "Error reading from filehandle: $!", + ); + } +} + +sub move_along { + my $self = shift; + my $discarded = substr($self->[BUFFER], 0, $_[0], ''); + + # Wish I could skip this lot - tells us where we are in the file + my $lines = $discarded =~ tr/\n//; + $self->[LINE] += $lines; + if ($lines) { + $discarded =~ /\n([^\n]*)$/; + $self->[COLUMN] = length($1); + } + else { + $self->[COLUMN] += $_[0]; + } +} + +sub set_encoding { + my $self = shift; + my ($encoding) = @_; + # warn("set encoding to: $encoding\n"); + XML::SAX::PurePerl::Reader::switch_encoding_stream($self->[FH], $encoding); + XML::SAX::PurePerl::Reader::switch_encoding_string($self->[BUFFER], $encoding); + $self->[ENCODING] = $encoding; +} + +sub bytepos { + my $self = shift; + tell($self->[FH]); +} + +1; + diff --git a/tests/lib/XML/SAX/PurePerl/Reader/String.pm b/tests/lib/XML/SAX/PurePerl/Reader/String.pm new file mode 100644 index 0000000000..bd8e6e78cf --- /dev/null +++ b/tests/lib/XML/SAX/PurePerl/Reader/String.pm @@ -0,0 +1,78 @@ +# $Id$ + +package XML::SAX::PurePerl::Reader::String; + +use strict; +use vars qw(@ISA); + +use XML::SAX::PurePerl::Reader qw( + LINE + COLUMN + BUFFER + ENCODING + EOF +); + +@ISA = ('XML::SAX::PurePerl::Reader'); + +use constant DISCARDED => 8; +use constant STRING => 9; +use constant USED => 10; +use constant CHUNK_SIZE => 2048; + +sub new { + my $class = shift; + my $string = shift; + my @parts; + @parts[BUFFER, EOF, LINE, COLUMN, DISCARDED, STRING, USED] = + ('', 0, 1, 0, 0, $string, 0); + return bless \@parts, $class; +} + +sub read_more () { + my $self = shift; + if ($self->[USED] >= length($self->[STRING])) { + $self->[EOF]++; + return 0; + } + my $bytes = CHUNK_SIZE; + if ($bytes > (length($self->[STRING]) - $self->[USED])) { + $bytes = (length($self->[STRING]) - $self->[USED]); + } + $self->[BUFFER] .= substr($self->[STRING], $self->[USED], $bytes); + $self->[USED] += $bytes; + return 1; + } + + +sub move_along { + my($self, $bytes) = @_; + my $discarded = substr($self->[BUFFER], 0, $bytes, ''); + $self->[DISCARDED] += length($discarded); + + # Wish I could skip this lot - tells us where we are in the file + my $lines = $discarded =~ tr/\n//; + $self->[LINE] += $lines; + if ($lines) { + $discarded =~ /\n([^\n]*)$/; + $self->[COLUMN] = length($1); + } + else { + $self->[COLUMN] += $_[0]; + } +} + +sub set_encoding { + my $self = shift; + my ($encoding) = @_; + + XML::SAX::PurePerl::Reader::switch_encoding_string($self->[BUFFER], $encoding, "utf-8"); + $self->[ENCODING] = $encoding; +} + +sub bytepos { + my $self = shift; + $self->[DISCARDED]; +} + +1; diff --git a/tests/lib/XML/SAX/PurePerl/Reader/URI.pm b/tests/lib/XML/SAX/PurePerl/Reader/URI.pm new file mode 100644 index 0000000000..88e449a87a --- /dev/null +++ b/tests/lib/XML/SAX/PurePerl/Reader/URI.pm @@ -0,0 +1,57 @@ +# $Id$ + +package XML::SAX::PurePerl::Reader::URI; + +use strict; + +use XML::SAX::PurePerl::Reader; +use File::Temp qw(tempfile); +use Symbol; + +## NOTE: This is *not* a subclass of Reader. It just returns Stream or String +## Reader objects depending on what it's capabilities are. + +sub new { + my $class = shift; + my $uri = shift; + # request the URI + if (-e $uri && -f _) { + my $fh = gensym; + open($fh, $uri) || die "Cannot open file $uri : $!"; + return XML::SAX::PurePerl::Reader::Stream->new($fh); + } + elsif ($uri =~ /^file:(.*)$/ && -e $1 && -f _) { + my $file = $1; + my $fh = gensym; + open($fh, $file) || die "Cannot open file $file : $!"; + return XML::SAX::PurePerl::Reader::Stream->new($fh); + } + else { + # request URI, return String reader + require LWP::UserAgent; + my $ua = LWP::UserAgent->new; + $ua->agent("Perl/XML/SAX/PurePerl/1.0 " . $ua->agent); + + my $req = HTTP::Request->new(GET => $uri); + + my $fh = tempfile(); + + my $callback = sub { + my ($data, $response, $protocol) = @_; + print $fh $data; + }; + + my $res = $ua->request($req, $callback, 4096); + + if ($res->is_success) { + seek($fh, 0, 0); + return XML::SAX::PurePerl::Reader::Stream->new($fh); + } + else { + die "LWP Request Failed"; + } + } +} + + +1; diff --git a/tests/lib/XML/SAX/PurePerl/Reader/UnicodeExt.pm b/tests/lib/XML/SAX/PurePerl/Reader/UnicodeExt.pm new file mode 100644 index 0000000000..38f90f7d3c --- /dev/null +++ b/tests/lib/XML/SAX/PurePerl/Reader/UnicodeExt.pm @@ -0,0 +1,23 @@ +# $Id$ + +package XML::SAX::PurePerl::Reader; +use strict; + +use Encode (); + +sub set_raw_stream { + my ($fh) = @_; + binmode($fh, ":bytes"); +} + +sub switch_encoding_stream { + my ($fh, $encoding) = @_; + binmode($fh, ":encoding($encoding)"); +} + +sub switch_encoding_string { + $_[0] = Encode::decode($_[1], $_[0]); +} + +1; + diff --git a/tests/lib/XML/SAX/PurePerl/UnicodeExt.pm b/tests/lib/XML/SAX/PurePerl/UnicodeExt.pm new file mode 100644 index 0000000000..edbd6fc7c5 --- /dev/null +++ b/tests/lib/XML/SAX/PurePerl/UnicodeExt.pm @@ -0,0 +1,22 @@ +# $Id$ + +package XML::SAX::PurePerl; +use strict; + +no warnings 'utf8'; + +sub chr_ref { + return chr(shift); +} + +if ($] >= 5.007002) { + require Encode; + + Encode::define_alias( "UTF-16" => "UCS-2" ); + Encode::define_alias( "UTF-16BE" => "UCS-2" ); + Encode::define_alias( "UTF-16LE" => "ucs-2le" ); + Encode::define_alias( "UTF16LE" => "ucs-2le" ); +} + +1; + diff --git a/tests/lib/XML/SAX/PurePerl/XMLDecl.pm b/tests/lib/XML/SAX/PurePerl/XMLDecl.pm new file mode 100644 index 0000000000..0fba016f73 --- /dev/null +++ b/tests/lib/XML/SAX/PurePerl/XMLDecl.pm @@ -0,0 +1,129 @@ +# $Id$ + +package XML::SAX::PurePerl; + +use strict; +use XML::SAX::PurePerl::Productions qw($S $VersionNum $EncNameStart $EncNameEnd); + +sub XMLDecl { + my ($self, $reader) = @_; + + my $data = $reader->data(5); + # warn("Looking for xmldecl in: $data"); + if ($data =~ /^<\?xml$S/o) { + $reader->move_along(5); + $self->skip_whitespace($reader); + + # get version attribute + $self->VersionInfo($reader) || + $self->parser_error("XML Declaration lacks required version attribute, or version attribute does not match XML specification", $reader); + + if (!$self->skip_whitespace($reader)) { + my $data = $reader->data(2); + $data =~ /^\?>/ or $self->parser_error("Syntax error", $reader); + $reader->move_along(2); + return; + } + + if ($self->EncodingDecl($reader)) { + if (!$self->skip_whitespace($reader)) { + my $data = $reader->data(2); + $data =~ /^\?>/ or $self->parser_error("Syntax error", $reader); + $reader->move_along(2); + return; + } + } + + $self->SDDecl($reader); + + $self->skip_whitespace($reader); + + my $data = $reader->data(2); + $data =~ /^\?>/ or $self->parser_error("Syntax error", $reader); + $reader->move_along(2); + } + else { + # warn("first 5 bytes: ", join(',', unpack("CCCCC", $data)), "\n"); + # no xml decl + if (!$reader->get_encoding) { + $reader->set_encoding("UTF-8"); + } + } +} + +sub VersionInfo { + my ($self, $reader) = @_; + + my $data = $reader->data(11); + + # warn("Looking for version in $data"); + + $data =~ /^(version$S*=$S*(["'])($VersionNum)\2)/o or return 0; + $reader->move_along(length($1)); + my $vernum = $3; + + if ($vernum ne "1.0") { + $self->parser_error("Only XML version 1.0 supported. Saw: '$vernum'", $reader); + } + + return 1; +} + +sub SDDecl { + my ($self, $reader) = @_; + + my $data = $reader->data(15); + + $data =~ /^(standalone$S*=$S*(["'])(yes|no)\2)/o or return 0; + $reader->move_along(length($1)); + my $yesno = $3; + + if ($yesno eq 'yes') { + $self->{standalone} = 1; + } + else { + $self->{standalone} = 0; + } + + return 1; +} + +sub EncodingDecl { + my ($self, $reader) = @_; + + my $data = $reader->data(12); + + $data =~ /^(encoding$S*=$S*(["'])($EncNameStart$EncNameEnd*)\2)/o or return 0; + $reader->move_along(length($1)); + my $encoding = $3; + + $reader->set_encoding($encoding); + + return 1; +} + +sub TextDecl { + my ($self, $reader) = @_; + + my $data = $reader->data(6); + $data =~ /^<\?xml$S+/ or return; + $reader->move_along(5); + $self->skip_whitespace($reader); + + if ($self->VersionInfo($reader)) { + $self->skip_whitespace($reader) || + $self->parser_error("Lack of whitespace after version attribute in text declaration", $reader); + } + + $self->EncodingDecl($reader) || + $self->parser_error("Encoding declaration missing from external entity text declaration", $reader); + + $self->skip_whitespace($reader); + + $data = $reader->data(2); + $data =~ /^\?>/ or $self->parser_error("Syntax error", $reader); + + return 1; +} + +1; diff --git a/tests/lib/XML/Simple.pm b/tests/lib/XML/Simple.pm new file mode 100644 index 0000000000..38c402d926 --- /dev/null +++ b/tests/lib/XML/Simple.pm @@ -0,0 +1,3284 @@ +# $Id: Simple.pm,v 1.40 2007/08/15 10:36:48 grantm Exp $ + +package XML::Simple; + +=head1 NAME + +XML::Simple - Easy API to maintain XML (esp config files) + +=head1 SYNOPSIS + + use XML::Simple; + + my $ref = XMLin([] [, ]); + + my $xml = XMLout($hashref [, ]); + +Or the object oriented way: + + require XML::Simple; + + my $xs = XML::Simple->new(options); + + my $ref = $xs->XMLin([] [, ]); + + my $xml = $xs->XMLout($hashref [, ]); + +(or see L<"SAX SUPPORT"> for 'the SAX way'). + +To catch common errors: + + use XML::Simple qw(:strict); + +(see L<"STRICT MODE"> for more details). + +=cut + +# See after __END__ for more POD documentation + + +# Load essentials here, other modules loaded on demand later + +use strict; +use Carp; +require Exporter; + + +############################################################################## +# Define some constants +# + +use vars qw($VERSION @ISA @EXPORT @EXPORT_OK $PREFERRED_PARSER); + +@ISA = qw(Exporter); +@EXPORT = qw(XMLin XMLout); +@EXPORT_OK = qw(xml_in xml_out); +$VERSION = '2.18'; +$PREFERRED_PARSER = undef; + +my $StrictMode = 0; + +my @KnownOptIn = qw(keyattr keeproot forcecontent contentkey noattr + searchpath forcearray cache suppressempty parseropts + grouptags nsexpand datahandler varattr variables + normalisespace normalizespace valueattr); + +my @KnownOptOut = qw(keyattr keeproot contentkey noattr + rootname xmldecl outputfile noescape suppressempty + grouptags nsexpand handler noindent attrindent nosort + valueattr numericescape); + +my @DefKeyAttr = qw(name key id); +my $DefRootName = qq(opt); +my $DefContentKey = qq(content); +my $DefXmlDecl = qq(); + +my $xmlns_ns = 'http://www.w3.org/2000/xmlns/'; +my $bad_def_ns_jcn = '{' . $xmlns_ns . '}'; # LibXML::SAX workaround + + +############################################################################## +# Globals for use by caching routines +# + +my %MemShareCache = (); +my %MemCopyCache = (); + + +############################################################################## +# Wrapper for Exporter - handles ':strict' +# + +sub import { + # Handle the :strict tag + + $StrictMode = 1 if grep(/^:strict$/, @_); + + # Pass everything else to Exporter.pm + + @_ = grep(!/^:strict$/, @_); + goto &Exporter::import; +} + + +############################################################################## +# Constructor for optional object interface. +# + +sub new { + my $class = shift; + + if(@_ % 2) { + croak "Default options must be name=>value pairs (odd number supplied)"; + } + + my %known_opt; + @known_opt{@KnownOptIn, @KnownOptOut} = (undef) x 100; + + my %raw_opt = @_; + my %def_opt; + while(my($key, $val) = each %raw_opt) { + my $lkey = lc($key); + $lkey =~ s/_//g; + croak "Unrecognised option: $key" unless(exists($known_opt{$lkey})); + $def_opt{$lkey} = $val; + } + my $self = { def_opt => \%def_opt }; + + return(bless($self, $class)); +} + + +############################################################################## +# Sub: _get_object() +# +# Helper routine called from XMLin() and XMLout() to create an object if none +# was provided. Note, this routine does mess with the caller's @_ array. +# + +sub _get_object { + my $self; + if($_[0] and UNIVERSAL::isa($_[0], 'XML::Simple')) { + $self = shift; + } + else { + $self = XML::Simple->new(); + } + + return $self; +} + + +############################################################################## +# Sub/Method: XMLin() +# +# Exported routine for slurping XML into a hashref - see pod for info. +# +# May be called as object method or as a plain function. +# +# Expects one arg for the source XML, optionally followed by a number of +# name => value option pairs. +# + +sub XMLin { + my $self = &_get_object; # note, @_ is passed implicitly + + my $target = shift; + + + # Work out whether to parse a string, a file or a filehandle + + if(not defined $target) { + return $self->parse_file(undef, @_); + } + + elsif($target eq '-') { + local($/) = undef; + $target = ; + return $self->parse_string(\$target, @_); + } + + elsif(my $type = ref($target)) { + if($type eq 'SCALAR') { + return $self->parse_string($target, @_); + } + else { + return $self->parse_fh($target, @_); + } + } + + elsif($target =~ m{<.*?>}s) { + return $self->parse_string(\$target, @_); + } + + else { + return $self->parse_file($target, @_); + } +} + + +############################################################################## +# Sub/Method: parse_file() +# +# Same as XMLin, but only parses from a named file. +# + +sub parse_file { + my $self = &_get_object; # note, @_ is passed implicitly + + my $filename = shift; + + $self->handle_options('in', @_); + + $filename = $self->default_config_file if not defined $filename; + + $filename = $self->find_xml_file($filename, @{$self->{opt}->{searchpath}}); + + # Check cache for previous parse + + if($self->{opt}->{cache}) { + foreach my $scheme (@{$self->{opt}->{cache}}) { + my $method = 'cache_read_' . $scheme; + my $opt = $self->$method($filename); + return($opt) if($opt); + } + } + + my $ref = $self->build_simple_tree($filename, undef); + + if($self->{opt}->{cache}) { + my $method = 'cache_write_' . $self->{opt}->{cache}->[0]; + $self->$method($ref, $filename); + } + + return $ref; +} + + +############################################################################## +# Sub/Method: parse_fh() +# +# Same as XMLin, but only parses from a filehandle. +# + +sub parse_fh { + my $self = &_get_object; # note, @_ is passed implicitly + + my $fh = shift; + croak "Can't use " . (defined $fh ? qq{string ("$fh")} : 'undef') . + " as a filehandle" unless ref $fh; + + $self->handle_options('in', @_); + + return $self->build_simple_tree(undef, $fh); +} + + +############################################################################## +# Sub/Method: parse_string() +# +# Same as XMLin, but only parses from a string or a reference to a string. +# + +sub parse_string { + my $self = &_get_object; # note, @_ is passed implicitly + + my $string = shift; + + $self->handle_options('in', @_); + + return $self->build_simple_tree(undef, ref $string ? $string : \$string); +} + + +############################################################################## +# Method: default_config_file() +# +# Returns the name of the XML file to parse if no filename (or XML string) +# was provided. +# + +sub default_config_file { + my $self = shift; + + require File::Basename; + + my($basename, $script_dir, $ext) = File::Basename::fileparse($0, '\.[^\.]+'); + + # Add script directory to searchpath + + if($script_dir) { + unshift(@{$self->{opt}->{searchpath}}, $script_dir); + } + + return $basename . '.xml'; +} + + +############################################################################## +# Method: build_simple_tree() +# +# Builds a 'tree' data structure as provided by XML::Parser and then +# 'simplifies' it as specified by the various options in effect. +# + +sub build_simple_tree { + my $self = shift; + + my $tree = $self->build_tree(@_); + + return $self->{opt}->{keeproot} + ? $self->collapse({}, @$tree) + : $self->collapse(@{$tree->[1]}); +} + + +############################################################################## +# Method: build_tree() +# +# This routine will be called if there is no suitable pre-parsed tree in a +# cache. It parses the XML and returns an XML::Parser 'Tree' style data +# structure (summarised in the comments for the collapse() routine below). +# +# XML::Simple requires the services of another module that knows how to parse +# XML. If XML::SAX is installed, the default SAX parser will be used, +# otherwise XML::Parser will be used. +# +# This routine expects to be passed a filename as argument 1 or a 'string' as +# argument 2. The 'string' might be a string of XML (passed by reference to +# save memory) or it might be a reference to an IO::Handle. (This +# non-intuitive mess results in part from the way XML::Parser works but that's +# really no excuse). +# + +sub build_tree { + my $self = shift; + my $filename = shift; + my $string = shift; + + + my $preferred_parser = $PREFERRED_PARSER; + unless(defined($preferred_parser)) { + $preferred_parser = $ENV{XML_SIMPLE_PREFERRED_PARSER} || ''; + } + if($preferred_parser eq 'XML::Parser') { + return($self->build_tree_xml_parser($filename, $string)); + } + + eval { require XML::SAX; }; # We didn't need it until now + if($@) { # No XML::SAX - fall back to XML::Parser + if($preferred_parser) { # unless a SAX parser was expressly requested + croak "XMLin() could not load XML::SAX"; + } + return($self->build_tree_xml_parser($filename, $string)); + } + + $XML::SAX::ParserPackage = $preferred_parser if($preferred_parser); + + my $sp = XML::SAX::ParserFactory->parser(Handler => $self); + + $self->{nocollapse} = 1; + my($tree); + if($filename) { + $tree = $sp->parse_uri($filename); + } + else { + if(ref($string) && ref($string) ne 'SCALAR') { + $tree = $sp->parse_file($string); + } + else { + $tree = $sp->parse_string($$string); + } + } + + return($tree); +} + + +############################################################################## +# Method: build_tree_xml_parser() +# +# This routine will be called if XML::SAX is not installed, or if XML::Parser +# was specifically requested. It takes the same arguments as build_tree() and +# returns the same data structure (XML::Parser 'Tree' style). +# + +sub build_tree_xml_parser { + my $self = shift; + my $filename = shift; + my $string = shift; + + + eval { + local($^W) = 0; # Suppress warning from Expat.pm re File::Spec::load() + require XML::Parser; # We didn't need it until now + }; + if($@) { + croak "XMLin() requires either XML::SAX or XML::Parser"; + } + + if($self->{opt}->{nsexpand}) { + carp "'nsexpand' option requires XML::SAX"; + } + + my $xp = XML::Parser->new(Style => 'Tree', @{$self->{opt}->{parseropts}}); + my($tree); + if($filename) { + # $tree = $xp->parsefile($filename); # Changed due to prob w/mod_perl + local(*XML_FILE); + open(XML_FILE, '<', $filename) || croak qq($filename - $!); + $tree = $xp->parse(*XML_FILE); + close(XML_FILE); + } + else { + $tree = $xp->parse($$string); + } + + return($tree); +} + + +############################################################################## +# Method: cache_write_storable() +# +# Wrapper routine for invoking Storable::nstore() to cache a parsed data +# structure. +# + +sub cache_write_storable { + my($self, $data, $filename) = @_; + + my $cachefile = $self->storable_filename($filename); + + require Storable; # We didn't need it until now + + if ('VMS' eq $^O) { + Storable::nstore($data, $cachefile); + } + else { + # If the following line fails for you, your Storable.pm is old - upgrade + Storable::lock_nstore($data, $cachefile); + } + +} + + +############################################################################## +# Method: cache_read_storable() +# +# Wrapper routine for invoking Storable::retrieve() to read a cached parsed +# data structure. Only returns cached data if the cache file exists and is +# newer than the source XML file. +# + +sub cache_read_storable { + my($self, $filename) = @_; + + my $cachefile = $self->storable_filename($filename); + + return unless(-r $cachefile); + return unless((stat($cachefile))[9] > (stat($filename))[9]); + + require Storable; # We didn't need it until now + + if ('VMS' eq $^O) { + return(Storable::retrieve($cachefile)); + } + else { + return(Storable::lock_retrieve($cachefile)); + } + +} + + +############################################################################## +# Method: storable_filename() +# +# Translates the supplied source XML filename into a filename for the storable +# cached data. A '.stor' suffix is added after stripping an optional '.xml' +# suffix. +# + +sub storable_filename { + my($self, $cachefile) = @_; + + $cachefile =~ s{(\.xml)?$}{.stor}; + return $cachefile; +} + + +############################################################################## +# Method: cache_write_memshare() +# +# Takes the supplied data structure reference and stores it away in a global +# hash structure. +# + +sub cache_write_memshare { + my($self, $data, $filename) = @_; + + $MemShareCache{$filename} = [time(), $data]; +} + + +############################################################################## +# Method: cache_read_memshare() +# +# Takes a filename and looks in a global hash for a cached parsed version. +# + +sub cache_read_memshare { + my($self, $filename) = @_; + + return unless($MemShareCache{$filename}); + return unless($MemShareCache{$filename}->[0] > (stat($filename))[9]); + + return($MemShareCache{$filename}->[1]); + +} + + +############################################################################## +# Method: cache_write_memcopy() +# +# Takes the supplied data structure and stores a copy of it in a global hash +# structure. +# + +sub cache_write_memcopy { + my($self, $data, $filename) = @_; + + require Storable; # We didn't need it until now + + $MemCopyCache{$filename} = [time(), Storable::dclone($data)]; +} + + +############################################################################## +# Method: cache_read_memcopy() +# +# Takes a filename and looks in a global hash for a cached parsed version. +# Returns a reference to a copy of that data structure. +# + +sub cache_read_memcopy { + my($self, $filename) = @_; + + return unless($MemCopyCache{$filename}); + return unless($MemCopyCache{$filename}->[0] > (stat($filename))[9]); + + return(Storable::dclone($MemCopyCache{$filename}->[1])); + +} + + +############################################################################## +# Sub/Method: XMLout() +# +# Exported routine for 'unslurping' a data structure out to XML. +# +# Expects a reference to a data structure and an optional list of option +# name => value pairs. +# + +sub XMLout { + my $self = &_get_object; # note, @_ is passed implicitly + + croak "XMLout() requires at least one argument" unless(@_); + my $ref = shift; + + $self->handle_options('out', @_); + + + # If namespace expansion is set, XML::NamespaceSupport is required + + if($self->{opt}->{nsexpand}) { + require XML::NamespaceSupport; + $self->{nsup} = XML::NamespaceSupport->new(); + $self->{ns_prefix} = 'aaa'; + } + + + # Wrap top level arrayref in a hash + + if(UNIVERSAL::isa($ref, 'ARRAY')) { + $ref = { anon => $ref }; + } + + + # Extract rootname from top level hash if keeproot enabled + + if($self->{opt}->{keeproot}) { + my(@keys) = keys(%$ref); + if(@keys == 1) { + $ref = $ref->{$keys[0]}; + $self->{opt}->{rootname} = $keys[0]; + } + } + + # Ensure there are no top level attributes if we're not adding root elements + + elsif($self->{opt}->{rootname} eq '') { + if(UNIVERSAL::isa($ref, 'HASH')) { + my $refsave = $ref; + $ref = {}; + foreach (keys(%$refsave)) { + if(ref($refsave->{$_})) { + $ref->{$_} = $refsave->{$_}; + } + else { + $ref->{$_} = [ $refsave->{$_} ]; + } + } + } + } + + + # Encode the hashref and write to file if necessary + + $self->{_ancestors} = []; + my $xml = $self->value_to_xml($ref, $self->{opt}->{rootname}, ''); + delete $self->{_ancestors}; + + if($self->{opt}->{xmldecl}) { + $xml = $self->{opt}->{xmldecl} . "\n" . $xml; + } + + if($self->{opt}->{outputfile}) { + if(ref($self->{opt}->{outputfile})) { + my $fh = $self->{opt}->{outputfile}; + if(UNIVERSAL::isa($fh, 'GLOB') and !UNIVERSAL::can($fh, 'print')) { + eval { require IO::Handle; }; + croak $@ if $@; + } + return($fh->print($xml)); + } + else { + local(*OUT); + open(OUT, '>', "$self->{opt}->{outputfile}") || + croak "open($self->{opt}->{outputfile}): $!"; + binmode(OUT, ':utf8') if($] >= 5.008); + print OUT $xml || croak "print: $!"; + close(OUT); + } + } + elsif($self->{opt}->{handler}) { + require XML::SAX; + my $sp = XML::SAX::ParserFactory->parser( + Handler => $self->{opt}->{handler} + ); + return($sp->parse_string($xml)); + } + else { + return($xml); + } +} + + +############################################################################## +# Method: handle_options() +# +# Helper routine for both XMLin() and XMLout(). Both routines handle their +# first argument and assume all other args are options handled by this routine. +# Saves a hash of options in $self->{opt}. +# +# If default options were passed to the constructor, they will be retrieved +# here and merged with options supplied to the method call. +# +# First argument should be the string 'in' or the string 'out'. +# +# Remaining arguments should be name=>value pairs. Sets up default values +# for options not supplied. Unrecognised options are a fatal error. +# + +sub handle_options { + my $self = shift; + my $dirn = shift; + + + # Determine valid options based on context + + my %known_opt; + if($dirn eq 'in') { + @known_opt{@KnownOptIn} = @KnownOptIn; + } + else { + @known_opt{@KnownOptOut} = @KnownOptOut; + } + + + # Store supplied options in hashref and weed out invalid ones + + if(@_ % 2) { + croak "Options must be name=>value pairs (odd number supplied)"; + } + my %raw_opt = @_; + my $opt = {}; + $self->{opt} = $opt; + + while(my($key, $val) = each %raw_opt) { + my $lkey = lc($key); + $lkey =~ s/_//g; + croak "Unrecognised option: $key" unless($known_opt{$lkey}); + $opt->{$lkey} = $val; + } + + + # Merge in options passed to constructor + + foreach (keys(%known_opt)) { + unless(exists($opt->{$_})) { + if(exists($self->{def_opt}->{$_})) { + $opt->{$_} = $self->{def_opt}->{$_}; + } + } + } + + + # Set sensible defaults if not supplied + + if(exists($opt->{rootname})) { + unless(defined($opt->{rootname})) { + $opt->{rootname} = ''; + } + } + else { + $opt->{rootname} = $DefRootName; + } + + if($opt->{xmldecl} and $opt->{xmldecl} eq '1') { + $opt->{xmldecl} = $DefXmlDecl; + } + + if(exists($opt->{contentkey})) { + if($opt->{contentkey} =~ m{^-(.*)$}) { + $opt->{contentkey} = $1; + $opt->{collapseagain} = 1; + } + } + else { + $opt->{contentkey} = $DefContentKey; + } + + unless(exists($opt->{normalisespace})) { + $opt->{normalisespace} = $opt->{normalizespace}; + } + $opt->{normalisespace} = 0 unless(defined($opt->{normalisespace})); + + # Cleanups for values assumed to be arrays later + + if($opt->{searchpath}) { + unless(ref($opt->{searchpath})) { + $opt->{searchpath} = [ $opt->{searchpath} ]; + } + } + else { + $opt->{searchpath} = [ ]; + } + + if($opt->{cache} and !ref($opt->{cache})) { + $opt->{cache} = [ $opt->{cache} ]; + } + if($opt->{cache}) { + $_ = lc($_) foreach (@{$opt->{cache}}); + foreach my $scheme (@{$opt->{cache}}) { + my $method = 'cache_read_' . $scheme; + croak "Unsupported caching scheme: $scheme" + unless($self->can($method)); + } + } + + if(exists($opt->{parseropts})) { + if($^W) { + carp "Warning: " . + "'ParserOpts' is deprecated, contact the author if you need it"; + } + } + else { + $opt->{parseropts} = [ ]; + } + + + # Special cleanup for {forcearray} which could be regex, arrayref or boolean + # or left to default to 0 + + if(exists($opt->{forcearray})) { + if(ref($opt->{forcearray}) eq 'Regexp') { + $opt->{forcearray} = [ $opt->{forcearray} ]; + } + + if(ref($opt->{forcearray}) eq 'ARRAY') { + my @force_list = @{$opt->{forcearray}}; + if(@force_list) { + $opt->{forcearray} = {}; + foreach my $tag (@force_list) { + if(ref($tag) eq 'Regexp') { + push @{$opt->{forcearray}->{_regex}}, $tag; + } + else { + $opt->{forcearray}->{$tag} = 1; + } + } + } + else { + $opt->{forcearray} = 0; + } + } + else { + $opt->{forcearray} = ( $opt->{forcearray} ? 1 : 0 ); + } + } + else { + if($StrictMode and $dirn eq 'in') { + croak "No value specified for 'ForceArray' option in call to XML$dirn()"; + } + $opt->{forcearray} = 0; + } + + + # Special cleanup for {keyattr} which could be arrayref or hashref or left + # to default to arrayref + + if(exists($opt->{keyattr})) { + if(ref($opt->{keyattr})) { + if(ref($opt->{keyattr}) eq 'HASH') { + + # Make a copy so we can mess with it + + $opt->{keyattr} = { %{$opt->{keyattr}} }; + + + # Convert keyattr => { elem => '+attr' } + # to keyattr => { elem => [ 'attr', '+' ] } + + foreach my $el (keys(%{$opt->{keyattr}})) { + if($opt->{keyattr}->{$el} =~ /^(\+|-)?(.*)$/) { + $opt->{keyattr}->{$el} = [ $2, ($1 ? $1 : '') ]; + if($StrictMode and $dirn eq 'in') { + next if($opt->{forcearray} == 1); + next if(ref($opt->{forcearray}) eq 'HASH' + and $opt->{forcearray}->{$el}); + croak "<$el> set in KeyAttr but not in ForceArray"; + } + } + else { + delete($opt->{keyattr}->{$el}); # Never reached (famous last words?) + } + } + } + else { + if(@{$opt->{keyattr}} == 0) { + delete($opt->{keyattr}); + } + } + } + else { + $opt->{keyattr} = [ $opt->{keyattr} ]; + } + } + else { + if($StrictMode) { + croak "No value specified for 'KeyAttr' option in call to XML$dirn()"; + } + $opt->{keyattr} = [ @DefKeyAttr ]; + } + + + # Special cleanup for {valueattr} which could be arrayref or hashref + + if(exists($opt->{valueattr})) { + if(ref($opt->{valueattr}) eq 'ARRAY') { + $opt->{valueattrlist} = {}; + $opt->{valueattrlist}->{$_} = 1 foreach(@{ delete $opt->{valueattr} }); + } + } + + # make sure there's nothing weird in {grouptags} + + if($opt->{grouptags}) { + croak "Illegal value for 'GroupTags' option - expected a hashref" + unless UNIVERSAL::isa($opt->{grouptags}, 'HASH'); + + while(my($key, $val) = each %{$opt->{grouptags}}) { + next if $key ne $val; + croak "Bad value in GroupTags: '$key' => '$val'"; + } + } + + + # Check the {variables} option is valid and initialise variables hash + + if($opt->{variables} and !UNIVERSAL::isa($opt->{variables}, 'HASH')) { + croak "Illegal value for 'Variables' option - expected a hashref"; + } + + if($opt->{variables}) { + $self->{_var_values} = { %{$opt->{variables}} }; + } + elsif($opt->{varattr}) { + $self->{_var_values} = {}; + } + +} + + +############################################################################## +# Method: find_xml_file() +# +# Helper routine for XMLin(). +# Takes a filename, and a list of directories, attempts to locate the file in +# the directories listed. +# Returns a full pathname on success; croaks on failure. +# + +sub find_xml_file { + my $self = shift; + my $file = shift; + my @search_path = @_; + + + require File::Basename; + require File::Spec; + + my($filename, $filedir) = File::Basename::fileparse($file); + + if($filename ne $file) { # Ignore searchpath if dir component + return($file) if(-e $file); + } + else { + my($path); + foreach $path (@search_path) { + my $fullpath = File::Spec->catfile($path, $file); + return($fullpath) if(-e $fullpath); + } + } + + # If user did not supply a search path, default to current directory + + if(!@search_path) { + return($file) if(-e $file); + croak "File does not exist: $file"; + } + + croak "Could not find $file in ", join(':', @search_path); +} + + +############################################################################## +# Method: collapse() +# +# Helper routine for XMLin(). This routine really comprises the 'smarts' (or +# value add) of this module. +# +# Takes the parse tree that XML::Parser produced from the supplied XML and +# recurses through it 'collapsing' unnecessary levels of indirection (nested +# arrays etc) to produce a data structure that is easier to work with. +# +# Elements in the original parser tree are represented as an element name +# followed by an arrayref. The first element of the array is a hashref +# containing the attributes. The rest of the array contains a list of any +# nested elements as name+arrayref pairs: +# +# , [ { }, , [ ... ], ... ] +# +# The special element name '0' (zero) flags text content. +# +# This routine cuts down the noise by discarding any text content consisting of +# only whitespace and then moves the nested elements into the attribute hash +# using the name of the nested element as the hash key and the collapsed +# version of the nested element as the value. Multiple nested elements with +# the same name will initially be represented as an arrayref, but this may be +# 'folded' into a hashref depending on the value of the keyattr option. +# + +sub collapse { + my $self = shift; + + + # Start with the hash of attributes + + my $attr = shift; + if($self->{opt}->{noattr}) { # Discard if 'noattr' set + $attr = {}; + } + elsif($self->{opt}->{normalisespace} == 2) { + while(my($key, $value) = each %$attr) { + $attr->{$key} = $self->normalise_space($value) + } + } + + + # Do variable substitutions + + if(my $var = $self->{_var_values}) { + while(my($key, $val) = each(%$attr)) { + $val =~ s{\$\{([\w.]+)\}}{ $self->get_var($1) }ge; + $attr->{$key} = $val; + } + } + + + # Roll up 'value' attributes (but only if no nested elements) + + if(!@_ and keys %$attr == 1) { + my($k) = keys %$attr; + if($self->{opt}->{valueattrlist} and $self->{opt}->{valueattrlist}->{$k}) { + return $attr->{$k}; + } + } + + + # Add any nested elements + + my($key, $val); + while(@_) { + $key = shift; + $val = shift; + + if(ref($val)) { + $val = $self->collapse(@$val); + next if(!defined($val) and $self->{opt}->{suppressempty}); + } + elsif($key eq '0') { + next if($val =~ m{^\s*$}s); # Skip all whitespace content + + $val = $self->normalise_space($val) + if($self->{opt}->{normalisespace} == 2); + + # do variable substitutions + + if(my $var = $self->{_var_values}) { + $val =~ s{\$\{(\w+)\}}{ $self->get_var($1) }ge; + } + + + # look for variable definitions + + if(my $var = $self->{opt}->{varattr}) { + if(exists $attr->{$var}) { + $self->set_var($attr->{$var}, $val); + } + } + + + # Collapse text content in element with no attributes to a string + + if(!%$attr and !@_) { + return($self->{opt}->{forcecontent} ? + { $self->{opt}->{contentkey} => $val } : $val + ); + } + $key = $self->{opt}->{contentkey}; + } + + + # Combine duplicate attributes into arrayref if required + + if(exists($attr->{$key})) { + if(UNIVERSAL::isa($attr->{$key}, 'ARRAY')) { + push(@{$attr->{$key}}, $val); + } + else { + $attr->{$key} = [ $attr->{$key}, $val ]; + } + } + elsif(defined($val) and UNIVERSAL::isa($val, 'ARRAY')) { + $attr->{$key} = [ $val ]; + } + else { + if( $key ne $self->{opt}->{contentkey} + and ( + ($self->{opt}->{forcearray} == 1) + or ( + (ref($self->{opt}->{forcearray}) eq 'HASH') + and ( + $self->{opt}->{forcearray}->{$key} + or (grep $key =~ $_, @{$self->{opt}->{forcearray}->{_regex}}) + ) + ) + ) + ) { + $attr->{$key} = [ $val ]; + } + else { + $attr->{$key} = $val; + } + } + + } + + + # Turn arrayrefs into hashrefs if key fields present + + if($self->{opt}->{keyattr}) { + while(($key,$val) = each %$attr) { + if(defined($val) and UNIVERSAL::isa($val, 'ARRAY')) { + $attr->{$key} = $self->array_to_hash($key, $val); + } + } + } + + + # disintermediate grouped tags + + if($self->{opt}->{grouptags}) { + while(my($key, $val) = each(%$attr)) { + next unless(UNIVERSAL::isa($val, 'HASH') and (keys %$val == 1)); + next unless(exists($self->{opt}->{grouptags}->{$key})); + + my($child_key, $child_val) = %$val; + + if($self->{opt}->{grouptags}->{$key} eq $child_key) { + $attr->{$key}= $child_val; + } + } + } + + + # Fold hashes containing a single anonymous array up into just the array + + my $count = scalar keys %$attr; + if($count == 1 + and exists $attr->{anon} + and UNIVERSAL::isa($attr->{anon}, 'ARRAY') + ) { + return($attr->{anon}); + } + + + # Do the right thing if hash is empty, otherwise just return it + + if(!%$attr and exists($self->{opt}->{suppressempty})) { + if(defined($self->{opt}->{suppressempty}) and + $self->{opt}->{suppressempty} eq '') { + return(''); + } + return(undef); + } + + + # Roll up named elements with named nested 'value' attributes + + if($self->{opt}->{valueattr}) { + while(my($key, $val) = each(%$attr)) { + next unless($self->{opt}->{valueattr}->{$key}); + next unless(UNIVERSAL::isa($val, 'HASH') and (keys %$val == 1)); + my($k) = keys %$val; + next unless($k eq $self->{opt}->{valueattr}->{$key}); + $attr->{$key} = $val->{$k}; + } + } + + return($attr) + +} + + +############################################################################## +# Method: set_var() +# +# Called when a variable definition is encountered in the XML. (A variable +# definition looks like value where attrname +# matches the varattr setting). +# + +sub set_var { + my($self, $name, $value) = @_; + + $self->{_var_values}->{$name} = $value; +} + + +############################################################################## +# Method: get_var() +# +# Called during variable substitution to get the value for the named variable. +# + +sub get_var { + my($self, $name) = @_; + + my $value = $self->{_var_values}->{$name}; + return $value if(defined($value)); + + return '${' . $name . '}'; +} + + +############################################################################## +# Method: normalise_space() +# +# Strips leading and trailing whitespace and collapses sequences of whitespace +# characters to a single space. +# + +sub normalise_space { + my($self, $text) = @_; + + $text =~ s/^\s+//s; + $text =~ s/\s+$//s; + $text =~ s/\s\s+/ /sg; + + return $text; +} + + +############################################################################## +# Method: array_to_hash() +# +# Helper routine for collapse(). +# Attempts to 'fold' an array of hashes into an hash of hashes. Returns a +# reference to the hash on success or the original array if folding is +# not possible. Behaviour is controlled by 'keyattr' option. +# + +sub array_to_hash { + my $self = shift; + my $name = shift; + my $arrayref = shift; + + my $hashref = $self->new_hashref; + + my($i, $key, $val, $flag); + + + # Handle keyattr => { .... } + + if(ref($self->{opt}->{keyattr}) eq 'HASH') { + return($arrayref) unless(exists($self->{opt}->{keyattr}->{$name})); + ($key, $flag) = @{$self->{opt}->{keyattr}->{$name}}; + for($i = 0; $i < @$arrayref; $i++) { + if(UNIVERSAL::isa($arrayref->[$i], 'HASH') and + exists($arrayref->[$i]->{$key}) + ) { + $val = $arrayref->[$i]->{$key}; + if(ref($val)) { + $self->die_or_warn("<$name> element has non-scalar '$key' key attribute"); + return($arrayref); + } + $val = $self->normalise_space($val) + if($self->{opt}->{normalisespace} == 1); + $self->die_or_warn("<$name> element has non-unique value in '$key' key attribute: $val") + if(exists($hashref->{$val})); + $hashref->{$val} = { %{$arrayref->[$i]} }; + $hashref->{$val}->{"-$key"} = $hashref->{$val}->{$key} if($flag eq '-'); + delete $hashref->{$val}->{$key} unless($flag eq '+'); + } + else { + $self->die_or_warn("<$name> element has no '$key' key attribute"); + return($arrayref); + } + } + } + + + # Or assume keyattr => [ .... ] + + else { + my $default_keys = + join(',', @DefKeyAttr) eq join(',', @{$self->{opt}->{keyattr}}); + + ELEMENT: for($i = 0; $i < @$arrayref; $i++) { + return($arrayref) unless(UNIVERSAL::isa($arrayref->[$i], 'HASH')); + + foreach $key (@{$self->{opt}->{keyattr}}) { + if(defined($arrayref->[$i]->{$key})) { + $val = $arrayref->[$i]->{$key}; + if(ref($val)) { + $self->die_or_warn("<$name> element has non-scalar '$key' key attribute") + if not $default_keys; + return($arrayref); + } + $val = $self->normalise_space($val) + if($self->{opt}->{normalisespace} == 1); + $self->die_or_warn("<$name> element has non-unique value in '$key' key attribute: $val") + if(exists($hashref->{$val})); + $hashref->{$val} = { %{$arrayref->[$i]} }; + delete $hashref->{$val}->{$key}; + next ELEMENT; + } + } + + return($arrayref); # No keyfield matched + } + } + + # collapse any hashes which now only have a 'content' key + + if($self->{opt}->{collapseagain}) { + $hashref = $self->collapse_content($hashref); + } + + return($hashref); +} + + +############################################################################## +# Method: die_or_warn() +# +# Takes a diagnostic message and does one of three things: +# 1. dies if strict mode is enabled +# 2. warns if warnings are enabled but strict mode is not +# 3. ignores message and resturns silently if neither strict mode nor warnings +# are enabled +# + +sub die_or_warn { + my $self = shift; + my $msg = shift; + + croak $msg if($StrictMode); + carp "Warning: $msg" if($^W); +} + + +############################################################################## +# Method: new_hashref() +# +# This is a hook routine for overriding in a sub-class. Some people believe +# that using Tie::IxHash here will solve order-loss problems. +# + +sub new_hashref { + my $self = shift; + + return { @_ }; +} + + +############################################################################## +# Method: collapse_content() +# +# Helper routine for array_to_hash +# +# Arguments expected are: +# - an XML::Simple object +# - a hasref +# the hashref is a former array, turned into a hash by array_to_hash because +# of the presence of key attributes +# at this point collapse_content avoids over-complicated structures like +# dir => { libexecdir => { content => '$exec_prefix/libexec' }, +# localstatedir => { content => '$prefix' }, +# } +# into +# dir => { libexecdir => '$exec_prefix/libexec', +# localstatedir => '$prefix', +# } + +sub collapse_content { + my $self = shift; + my $hashref = shift; + + my $contentkey = $self->{opt}->{contentkey}; + + # first go through the values,checking that they are fit to collapse + foreach my $val (values %$hashref) { + return $hashref unless ( (ref($val) eq 'HASH') + and (keys %$val == 1) + and (exists $val->{$contentkey}) + ); + } + + # now collapse them + foreach my $key (keys %$hashref) { + $hashref->{$key}= $hashref->{$key}->{$contentkey}; + } + + return $hashref; +} + + +############################################################################## +# Method: value_to_xml() +# +# Helper routine for XMLout() - recurses through a data structure building up +# and returning an XML representation of that structure as a string. +# +# Arguments expected are: +# - the data structure to be encoded (usually a reference) +# - the XML tag name to use for this item +# - a string of spaces for use as the current indent level +# + +sub value_to_xml { + my $self = shift;; + + + # Grab the other arguments + + my($ref, $name, $indent) = @_; + + my $named = (defined($name) and $name ne '' ? 1 : 0); + + my $nl = "\n"; + + my $is_root = $indent eq '' ? 1 : 0; # Warning, dirty hack! + if($self->{opt}->{noindent}) { + $indent = ''; + $nl = ''; + } + + + # Convert to XML + + if(ref($ref)) { + croak "circular data structures not supported" + if(grep($_ == $ref, @{$self->{_ancestors}})); + push @{$self->{_ancestors}}, $ref; + } + else { + if($named) { + return(join('', + $indent, '<', $name, '>', + ($self->{opt}->{noescape} ? $ref : $self->escape_value($ref)), + '", $nl + )); + } + else { + return("$ref$nl"); + } + } + + + # Unfold hash to array if possible + + if(UNIVERSAL::isa($ref, 'HASH') # It is a hash + and keys %$ref # and it's not empty + and $self->{opt}->{keyattr} # and folding is enabled + and !$is_root # and its not the root element + ) { + $ref = $self->hash_to_array($name, $ref); + } + + + my @result = (); + my($key, $value); + + + # Handle hashrefs + + if(UNIVERSAL::isa($ref, 'HASH')) { + + # Reintermediate grouped values if applicable + + if($self->{opt}->{grouptags}) { + $ref = $self->copy_hash($ref); + while(my($key, $val) = each %$ref) { + if($self->{opt}->{grouptags}->{$key}) { + $ref->{$key} = { $self->{opt}->{grouptags}->{$key} => $val }; + } + } + } + + + # Scan for namespace declaration attributes + + my $nsdecls = ''; + my $default_ns_uri; + if($self->{nsup}) { + $ref = $self->copy_hash($ref); + $self->{nsup}->push_context(); + + # Look for default namespace declaration first + + if(exists($ref->{xmlns})) { + $self->{nsup}->declare_prefix('', $ref->{xmlns}); + $nsdecls .= qq( xmlns="$ref->{xmlns}"); + delete($ref->{xmlns}); + } + $default_ns_uri = $self->{nsup}->get_uri(''); + + + # Then check all the other keys + + foreach my $qname (keys(%$ref)) { + my($uri, $lname) = $self->{nsup}->parse_jclark_notation($qname); + if($uri) { + if($uri eq $xmlns_ns) { + $self->{nsup}->declare_prefix($lname, $ref->{$qname}); + $nsdecls .= qq( xmlns:$lname="$ref->{$qname}"); + delete($ref->{$qname}); + } + } + } + + # Translate any remaining Clarkian names + + foreach my $qname (keys(%$ref)) { + my($uri, $lname) = $self->{nsup}->parse_jclark_notation($qname); + if($uri) { + if($default_ns_uri and $uri eq $default_ns_uri) { + $ref->{$lname} = $ref->{$qname}; + delete($ref->{$qname}); + } + else { + my $prefix = $self->{nsup}->get_prefix($uri); + unless($prefix) { + # $self->{nsup}->declare_prefix(undef, $uri); + # $prefix = $self->{nsup}->get_prefix($uri); + $prefix = $self->{ns_prefix}++; + $self->{nsup}->declare_prefix($prefix, $uri); + $nsdecls .= qq( xmlns:$prefix="$uri"); + } + $ref->{"$prefix:$lname"} = $ref->{$qname}; + delete($ref->{$qname}); + } + } + } + } + + + my @nested = (); + my $text_content = undef; + if($named) { + push @result, $indent, '<', $name, $nsdecls; + } + + if(keys %$ref) { + my $first_arg = 1; + foreach my $key ($self->sorted_keys($name, $ref)) { + my $value = $ref->{$key}; + next if(substr($key, 0, 1) eq '-'); + if(!defined($value)) { + next if $self->{opt}->{suppressempty}; + unless(exists($self->{opt}->{suppressempty}) + and !defined($self->{opt}->{suppressempty}) + ) { + carp 'Use of uninitialized value' if($^W); + } + if($key eq $self->{opt}->{contentkey}) { + $text_content = ''; + } + else { + $value = exists($self->{opt}->{suppressempty}) ? {} : ''; + } + } + + if(!ref($value) + and $self->{opt}->{valueattr} + and $self->{opt}->{valueattr}->{$key} + ) { + $value = { $self->{opt}->{valueattr}->{$key} => $value }; + } + + if(ref($value) or $self->{opt}->{noattr}) { + push @nested, + $self->value_to_xml($value, $key, "$indent "); + } + else { + $value = $self->escape_value($value) unless($self->{opt}->{noescape}); + if($key eq $self->{opt}->{contentkey}) { + $text_content = $value; + } + else { + push @result, "\n$indent " . ' ' x length($name) + if($self->{opt}->{attrindent} and !$first_arg); + push @result, ' ', $key, '="', $value , '"'; + $first_arg = 0; + } + } + } + } + else { + $text_content = ''; + } + + if(@nested or defined($text_content)) { + if($named) { + push @result, ">"; + if(defined($text_content)) { + push @result, $text_content; + $nested[0] =~ s/^\s+// if(@nested); + } + else { + push @result, $nl; + } + if(@nested) { + push @result, @nested, $indent; + } + push @result, '", $nl; + } + else { + push @result, @nested; # Special case if no root elements + } + } + else { + push @result, " />", $nl; + } + $self->{nsup}->pop_context() if($self->{nsup}); + } + + + # Handle arrayrefs + + elsif(UNIVERSAL::isa($ref, 'ARRAY')) { + foreach $value (@$ref) { + next if !defined($value) and $self->{opt}->{suppressempty}; + if(!ref($value)) { + push @result, + $indent, '<', $name, '>', + ($self->{opt}->{noescape} ? $value : $self->escape_value($value)), + '$nl"; + } + elsif(UNIVERSAL::isa($value, 'HASH')) { + push @result, $self->value_to_xml($value, $name, $indent); + } + else { + push @result, + $indent, '<', $name, ">$nl", + $self->value_to_xml($value, 'anon', "$indent "), + $indent, '$nl"; + } + } + } + + else { + croak "Can't encode a value of type: " . ref($ref); + } + + + pop @{$self->{_ancestors}} if(ref($ref)); + + return(join('', @result)); +} + + +############################################################################## +# Method: sorted_keys() +# +# Returns the keys of the referenced hash sorted into alphabetical order, but +# with the 'key' key (as in KeyAttr) first, if there is one. +# + +sub sorted_keys { + my($self, $name, $ref) = @_; + + return keys %$ref if $self->{opt}->{nosort}; + + my %hash = %$ref; + my $keyattr = $self->{opt}->{keyattr}; + + my @key; + + if(ref $keyattr eq 'HASH') { + if(exists $keyattr->{$name} and exists $hash{$keyattr->{$name}->[0]}) { + push @key, $keyattr->{$name}->[0]; + delete $hash{$keyattr->{$name}->[0]}; + } + } + elsif(ref $keyattr eq 'ARRAY') { + foreach (@{$keyattr}) { + if(exists $hash{$_}) { + push @key, $_; + delete $hash{$_}; + last; + } + } + } + + return(@key, sort keys %hash); +} + +############################################################################## +# Method: escape_value() +# +# Helper routine for automatically escaping values for XMLout(). +# Expects a scalar data value. Returns escaped version. +# + +sub escape_value { + my($self, $data) = @_; + + return '' unless(defined($data)); + + $data =~ s/&/&/sg; + $data =~ s//>/sg; + $data =~ s/"/"/sg; + + my $level = $self->{opt}->{numericescape} or return $data; + + return $self->numeric_escape($data, $level); +} + +sub numeric_escape { + my($self, $data, $level) = @_; + + use utf8; # required for 5.6 + + if($self->{opt}->{numericescape} eq '2') { + $data =~ s/([^\x00-\x7F])/'&#' . ord($1) . ';'/gse; + } + else { + $data =~ s/([^\x00-\xFF])/'&#' . ord($1) . ';'/gse; + } + + return $data; +} + + +############################################################################## +# Method: hash_to_array() +# +# Helper routine for value_to_xml(). +# Attempts to 'unfold' a hash of hashes into an array of hashes. Returns a +# reference to the array on success or the original hash if unfolding is +# not possible. +# + +sub hash_to_array { + my $self = shift; + my $parent = shift; + my $hashref = shift; + + my $arrayref = []; + + my($key, $value); + + my @keys = $self->{opt}->{nosort} ? keys %$hashref : sort keys %$hashref; + foreach $key (@keys) { + $value = $hashref->{$key}; + return($hashref) unless(UNIVERSAL::isa($value, 'HASH')); + + if(ref($self->{opt}->{keyattr}) eq 'HASH') { + return($hashref) unless(defined($self->{opt}->{keyattr}->{$parent})); + push @$arrayref, $self->copy_hash( + $value, $self->{opt}->{keyattr}->{$parent}->[0] => $key + ); + } + else { + push(@$arrayref, { $self->{opt}->{keyattr}->[0] => $key, %$value }); + } + } + + return($arrayref); +} + + +############################################################################## +# Method: copy_hash() +# +# Helper routine for hash_to_array(). When unfolding a hash of hashes into +# an array of hashes, we need to copy the key from the outer hash into the +# inner hash. This routine makes a copy of the original hash so we don't +# destroy the original data structure. You might wish to override this +# method if you're using tied hashes and don't want them to get untied. +# + +sub copy_hash { + my($self, $orig, @extra) = @_; + + return { @extra, %$orig }; +} + +############################################################################## +# Methods required for building trees from SAX events +############################################################################## + +sub start_document { + my $self = shift; + + $self->handle_options('in') unless($self->{opt}); + + $self->{lists} = []; + $self->{curlist} = $self->{tree} = []; +} + + +sub start_element { + my $self = shift; + my $element = shift; + + my $name = $element->{Name}; + if($self->{opt}->{nsexpand}) { + $name = $element->{LocalName} || ''; + if($element->{NamespaceURI}) { + $name = '{' . $element->{NamespaceURI} . '}' . $name; + } + } + my $attributes = {}; + if($element->{Attributes}) { # Might be undef + foreach my $attr (values %{$element->{Attributes}}) { + if($self->{opt}->{nsexpand}) { + my $name = $attr->{LocalName} || ''; + if($attr->{NamespaceURI}) { + $name = '{' . $attr->{NamespaceURI} . '}' . $name + } + $name = 'xmlns' if($name eq $bad_def_ns_jcn); + $attributes->{$name} = $attr->{Value}; + } + else { + $attributes->{$attr->{Name}} = $attr->{Value}; + } + } + } + my $newlist = [ $attributes ]; + push @{ $self->{lists} }, $self->{curlist}; + push @{ $self->{curlist} }, $name => $newlist; + $self->{curlist} = $newlist; +} + + +sub characters { + my $self = shift; + my $chars = shift; + + my $text = $chars->{Data}; + my $clist = $self->{curlist}; + my $pos = $#$clist; + + if ($pos > 0 and $clist->[$pos - 1] eq '0') { + $clist->[$pos] .= $text; + } + else { + push @$clist, 0 => $text; + } +} + + +sub end_element { + my $self = shift; + + $self->{curlist} = pop @{ $self->{lists} }; +} + + +sub end_document { + my $self = shift; + + delete($self->{curlist}); + delete($self->{lists}); + + my $tree = $self->{tree}; + delete($self->{tree}); + + + # Return tree as-is to XMLin() + + return($tree) if($self->{nocollapse}); + + + # Or collapse it before returning it to SAX parser class + + if($self->{opt}->{keeproot}) { + $tree = $self->collapse({}, @$tree); + } + else { + $tree = $self->collapse(@{$tree->[1]}); + } + + if($self->{opt}->{datahandler}) { + return($self->{opt}->{datahandler}->($self, $tree)); + } + + return($tree); +} + +*xml_in = \&XMLin; +*xml_out = \&XMLout; + +1; + +__END__ + +=head1 QUICK START + +Say you have a script called B and a file of configuration options +called B containing this: + + + +
10.0.0.101
+
10.0.1.101
+
+ +
10.0.0.102
+
+ +
10.0.0.103
+
10.0.1.103
+
+
+ +The following lines of code in B: + + use XML::Simple; + + my $config = XMLin(); + +will 'slurp' the configuration options into the hashref $config (because no +arguments are passed to C the name and location of the XML file will +be inferred from name and location of the script). You can dump out the +contents of the hashref using Data::Dumper: + + use Data::Dumper; + + print Dumper($config); + +which will produce something like this (formatting has been adjusted for +brevity): + + { + 'logdir' => '/var/log/foo/', + 'debugfile' => '/tmp/foo.debug', + 'server' => { + 'sahara' => { + 'osversion' => '2.6', + 'osname' => 'solaris', + 'address' => [ '10.0.0.101', '10.0.1.101' ] + }, + 'gobi' => { + 'osversion' => '6.5', + 'osname' => 'irix', + 'address' => '10.0.0.102' + }, + 'kalahari' => { + 'osversion' => '2.0.34', + 'osname' => 'linux', + 'address' => [ '10.0.0.103', '10.0.1.103' ] + } + } + } + +Your script could then access the name of the log directory like this: + + print $config->{logdir}; + +similarly, the second address on the server 'kalahari' could be referenced as: + + print $config->{server}->{kalahari}->{address}->[1]; + +What could be simpler? (Rhetorical). + +For simple requirements, that's really all there is to it. If you want to +store your XML in a different directory or file, or pass it in as a string or +even pass it in via some derivative of an IO::Handle, you'll need to check out +L<"OPTIONS">. If you want to turn off or tweak the array folding feature (that +neat little transformation that produced $config->{server}) you'll find options +for that as well. + +If you want to generate XML (for example to write a modified version of +$config back out as XML), check out C. + +If your needs are not so simple, this may not be the module for you. In that +case, you might want to read L<"WHERE TO FROM HERE?">. + +=head1 DESCRIPTION + +The XML::Simple module provides a simple API layer on top of an underlying XML +parsing module (either XML::Parser or one of the SAX2 parser modules). Two +functions are exported: C and C. Note: you can explicity +request the lower case versions of the function names: C and +C. + +The simplest approach is to call these two functions directly, but an +optional object oriented interface (see L<"OPTIONAL OO INTERFACE"> below) +allows them to be called as methods of an B object. The object +interface can also be used at either end of a SAX pipeline. + +=head2 XMLin() + +Parses XML formatted data and returns a reference to a data structure which +contains the same information in a more readily accessible form. (Skip +down to L<"EXAMPLES"> below, for more sample code). + +C accepts an optional XML specifier followed by zero or more 'name => +value' option pairs. The XML specifier can be one of the following: + +=over 4 + +=item A filename + +If the filename contains no directory components C will look for the +file in each directory in the SearchPath (see L<"OPTIONS"> below) or in the +current directory if the SearchPath option is not defined. eg: + + $ref = XMLin('/etc/params.xml'); + +Note, the filename '-' can be used to parse from STDIN. + +=item undef + +If there is no XML specifier, C will check the script directory and +each of the SearchPath directories for a file with the same name as the script +but with the extension '.xml'. Note: if you wish to specify options, you +must specify the value 'undef'. eg: + + $ref = XMLin(undef, ForceArray => 1); + +=item A string of XML + +A string containing XML (recognised by the presence of '<' and '>' characters) +will be parsed directly. eg: + + $ref = XMLin(''); + +=item An IO::Handle object + +An IO::Handle object will be read to EOF and its contents parsed. eg: + + $fh = IO::File->new('/etc/params.xml'); + $ref = XMLin($fh); + +=back + +=head2 XMLout() + +Takes a data structure (generally a hashref) and returns an XML encoding of +that structure. If the resulting XML is parsed using C, it should +return a data structure equivalent to the original (see caveats below). + +The C function can also be used to output the XML as SAX events +see the C option and L<"SAX SUPPORT"> for more details). + +When translating hashes to XML, hash keys which have a leading '-' will be +silently skipped. This is the approved method for marking elements of a +data structure which should be ignored by C. (Note: If these items +were not skipped the key names would be emitted as element or attribute names +with a leading '-' which would not be valid XML). + +=head2 Caveats + +Some care is required in creating data structures which will be passed to +C. Hash keys from the data structure will be encoded as either XML +element names or attribute names. Therefore, you should use hash key names +which conform to the relatively strict XML naming rules: + +Names in XML must begin with a letter. The remaining characters may be +letters, digits, hyphens (-), underscores (_) or full stops (.). It is also +allowable to include one colon (:) in an element name but this should only be +used when working with namespaces (B can only usefully work with +namespaces when teamed with a SAX Parser). + +You can use other punctuation characters in hash values (just not in hash +keys) however B does not support dumping binary data. + +If you break these rules, the current implementation of C will +simply emit non-compliant XML which will be rejected if you try to read it +back in. (A later version of B might take a more proactive +approach). + +Note also that although you can nest hashes and arrays to arbitrary levels, +circular data structures are not supported and will cause C to die. + +If you wish to 'round-trip' arbitrary data structures from Perl to XML and back +to Perl, then you should probably disable array folding (using the KeyAttr +option) both with C and with C. If you still don't get the +expected results, you may prefer to use L which is designed for +exactly that purpose. + +Refer to L<"WHERE TO FROM HERE?"> if C is too simple for your needs. + + +=head1 OPTIONS + +B supports a number of options (in fact as each release of +B adds more options, the module's claim to the name 'Simple' +becomes increasingly tenuous). If you find yourself repeatedly having to +specify the same options, you might like to investigate L<"OPTIONAL OO +INTERFACE"> below. + +If you can't be bothered reading the documentation, refer to +L<"STRICT MODE"> to automatically catch common mistakes. + +Because there are so many options, it's hard for new users to know which ones +are important, so here are the two you really need to know about: + +=over 4 + +=item * + +check out C because you'll almost certainly want to turn it on + +=item * + +make sure you know what the C option does and what its default value is +because it may surprise you otherwise (note in particular that 'KeyAttr' +affects both C and C) + +=back + +The option name headings below have a trailing 'comment' - a hash followed by +two pieces of metadata: + +=over 4 + +=item * + +Options are marked with 'I' if they are recognised by C and +'I' if they are recognised by C. + +=item * + +Each option is also flagged to indicate whether it is: + + 'important' - don't use the module until you understand this one + 'handy' - you can skip this on the first time through + 'advanced' - you can skip this on the second time through + 'SAX only' - don't worry about this unless you're using SAX (or + alternatively if you need this, you also need SAX) + 'seldom used' - you'll probably never use this unless you were the + person that requested the feature + +=back + +The options are listed alphabetically: + +Note: option names are no longer case sensitive so you can use the mixed case +versions shown here; all lower case as required by versions 2.03 and earlier; +or you can add underscores between the words (eg: key_attr). + + +=head2 AttrIndent => 1 I<# out - handy> + +When you are using C, enable this option to have attributes printed +one-per-line with sensible indentation rather than all on one line. + +=head2 Cache => [ cache schemes ] I<# in - advanced> + +Because loading the B module and parsing an XML file can consume a +significant number of CPU cycles, it is often desirable to cache the output of +C for later reuse. + +When parsing from a named file, B supports a number of caching +schemes. The 'Cache' option may be used to specify one or more schemes (using +an anonymous array). Each scheme will be tried in turn in the hope of finding +a cached pre-parsed representation of the XML file. If no cached copy is +found, the file will be parsed and the first cache scheme in the list will be +used to save a copy of the results. The following cache schemes have been +implemented: + +=over 4 + +=item storable + +Utilises B to read/write a cache file with the same name as the +XML file but with the extension .stor + +=item memshare + +When a file is first parsed, a copy of the resulting data structure is retained +in memory in the B module's namespace. Subsequent calls to parse +the same file will return a reference to this structure. This cached version +will persist only for the life of the Perl interpreter (which in the case of +mod_perl for example, may be some significant time). + +Because each caller receives a reference to the same data structure, a change +made by one caller will be visible to all. For this reason, the reference +returned should be treated as read-only. + +=item memcopy + +This scheme works identically to 'memshare' (above) except that each caller +receives a reference to a new data structure which is a copy of the cached +version. Copying the data structure will add a little processing overhead, +therefore this scheme should only be used where the caller intends to modify +the data structure (or wishes to protect itself from others who might). This +scheme uses B to perform the copy. + +=back + +Warning! The memory-based caching schemes compare the timestamp on the file to +the time when it was last parsed. If the file is stored on an NFS filesystem +(or other network share) and the clock on the file server is not exactly +synchronised with the clock where your script is run, updates to the source XML +file may appear to be ignored. + +=head2 ContentKey => 'keyname' I<# in+out - seldom used> + +When text content is parsed to a hash value, this option let's you specify a +name for the hash key to override the default 'content'. So for example: + + XMLin('Text', ContentKey => 'text') + +will parse to: + + { 'one' => 1, 'text' => 'Text' } + +instead of: + + { 'one' => 1, 'content' => 'Text' } + +C will also honour the value of this option when converting a hashref +to XML. + +You can also prefix your selected key name with a '-' character to have +C try a little harder to eliminate unnecessary 'content' keys after +array folding. For example: + + XMLin( + 'FirstSecond', + KeyAttr => {item => 'name'}, + ForceArray => [ 'item' ], + ContentKey => '-content' + ) + +will parse to: + + { + 'item' => { + 'one' => 'First' + 'two' => 'Second' + } + } + +rather than this (without the '-'): + + { + 'item' => { + 'one' => { 'content' => 'First' } + 'two' => { 'content' => 'Second' } + } + } + +=head2 DataHandler => code_ref I<# in - SAX only> + +When you use an B object as a SAX handler, it will return a +'simple tree' data structure in the same format as C would return. If +this option is set (to a subroutine reference), then when the tree is built the +subroutine will be called and passed two arguments: a reference to the +B object and a reference to the data tree. The return value from +the subroutine will be returned to the SAX driver. (See L<"SAX SUPPORT"> for +more details). + +=head2 ForceArray => 1 I<# in - important> + +This option should be set to '1' to force nested elements to be represented +as arrays even when there is only one. Eg, with ForceArray enabled, this +XML: + + + value + + +would parse to this: + + { + 'name' => [ + 'value' + ] + } + +instead of this (the default): + + { + 'name' => 'value' + } + +This option is especially useful if the data structure is likely to be written +back out as XML and the default behaviour of rolling single nested elements up +into attributes is not desirable. + +If you are using the array folding feature, you should almost certainly enable +this option. If you do not, single nested elements will not be parsed to +arrays and therefore will not be candidates for folding to a hash. (Given that +the default value of 'KeyAttr' enables array folding, the default value of this +option should probably also have been enabled too - sorry). + +=head2 ForceArray => [ names ] I<# in - important> + +This alternative (and preferred) form of the 'ForceArray' option allows you to +specify a list of element names which should always be forced into an array +representation, rather than the 'all or nothing' approach above. + +It is also possible (since version 2.05) to include compiled regular +expressions in the list - any element names which match the pattern will be +forced to arrays. If the list contains only a single regex, then it is not +necessary to enclose it in an arrayref. Eg: + + ForceArray => qr/_list$/ + +=head2 ForceContent => 1 I<# in - seldom used> + +When C parses elements which have text content as well as attributes, +the text content must be represented as a hash value rather than a simple +scalar. This option allows you to force text content to always parse to +a hash value even when there are no attributes. So for example: + + XMLin('text1text2', ForceContent => 1) + +will parse to: + + { + 'x' => { 'content' => 'text1' }, + 'y' => { 'a' => 2, 'content' => 'text2' } + } + +instead of: + + { + 'x' => 'text1', + 'y' => { 'a' => 2, 'content' => 'text2' } + } + +=head2 GroupTags => { grouping tag => grouped tag } I<# in+out - handy> + +You can use this option to eliminate extra levels of indirection in your Perl +data structure. For example this XML: + + + + /usr/bin + /usr/local/bin + /usr/X11/bin + + + +Would normally be read into a structure like this: + + { + searchpath => { + dir => [ '/usr/bin', '/usr/local/bin', '/usr/X11/bin' ] + } + } + +But when read in with the appropriate value for 'GroupTags': + + my $opt = XMLin($xml, GroupTags => { searchpath => 'dir' }); + +It will return this simpler structure: + + { + searchpath => [ '/usr/bin', '/usr/local/bin', '/usr/X11/bin' ] + } + +The grouping element (C<< >> in the example) must not contain any +attributes or elements other than the grouped element. + +You can specify multiple 'grouping element' to 'grouped element' mappings in +the same hashref. If this option is combined with C, the array +folding will occur first and then the grouped element names will be eliminated. + +C will also use the grouptag mappings to re-introduce the tags around +the grouped elements. Beware though that this will occur in all places that +the 'grouping tag' name occurs - you probably don't want to use the same name +for elements as well as attributes. + +=head2 Handler => object_ref I<# out - SAX only> + +Use the 'Handler' option to have C generate SAX events rather than +returning a string of XML. For more details see L<"SAX SUPPORT"> below. + +Note: the current implementation of this option generates a string of XML +and uses a SAX parser to translate it into SAX events. The normal encoding +rules apply here - your data must be UTF8 encoded unless you specify an +alternative encoding via the 'XMLDecl' option; and by the time the data reaches +the handler object, it will be in UTF8 form regardless of the encoding you +supply. A future implementation of this option may generate the events +directly. + +=head2 KeepRoot => 1 I<# in+out - handy> + +In its attempt to return a data structure free of superfluous detail and +unnecessary levels of indirection, C normally discards the root +element name. Setting the 'KeepRoot' option to '1' will cause the root element +name to be retained. So after executing this code: + + $config = XMLin('', KeepRoot => 1) + +You'll be able to reference the tempdir as +C<$config-E{config}-E{tempdir}> instead of the default +C<$config-E{tempdir}>. + +Similarly, setting the 'KeepRoot' option to '1' will tell C that the +data structure already contains a root element name and it is not necessary to +add another. + +=head2 KeyAttr => [ list ] I<# in+out - important> + +This option controls the 'array folding' feature which translates nested +elements from an array to a hash. It also controls the 'unfolding' of hashes +to arrays. + +For example, this XML: + + + + + + +would, by default, parse to this: + + { + 'user' => [ + { + 'login' => 'grep', + 'fullname' => 'Gary R Epstein' + }, + { + 'login' => 'stty', + 'fullname' => 'Simon T Tyson' + } + ] + } + +If the option 'KeyAttr => "login"' were used to specify that the 'login' +attribute is a key, the same XML would parse to: + + { + 'user' => { + 'stty' => { + 'fullname' => 'Simon T Tyson' + }, + 'grep' => { + 'fullname' => 'Gary R Epstein' + } + } + } + +The key attribute names should be supplied in an arrayref if there is more +than one. C will attempt to match attribute names in the order +supplied. C will use the first attribute name supplied when +'unfolding' a hash into an array. + +Note 1: The default value for 'KeyAttr' is ['name', 'key', 'id']. If you do +not want folding on input or unfolding on output you must setting this option +to an empty list to disable the feature. + +Note 2: If you wish to use this option, you should also enable the +C option. Without 'ForceArray', a single nested element will be +rolled up into a scalar rather than an array and therefore will not be folded +(since only arrays get folded). + +=head2 KeyAttr => { list } I<# in+out - important> + +This alternative (and preferred) method of specifiying the key attributes +allows more fine grained control over which elements are folded and on which +attributes. For example the option 'KeyAttr => { package => 'id' } will cause +any package elements to be folded on the 'id' attribute. No other elements +which have an 'id' attribute will be folded at all. + +Note: C will generate a warning (or a fatal error in L<"STRICT MODE">) +if this syntax is used and an element which does not have the specified key +attribute is encountered (eg: a 'package' element without an 'id' attribute, to +use the example above). Warnings will only be generated if B<-w> is in force. + +Two further variations are made possible by prefixing a '+' or a '-' character +to the attribute name: + +The option 'KeyAttr => { user => "+login" }' will cause this XML: + + + + + + +to parse to this data structure: + + { + 'user' => { + 'stty' => { + 'fullname' => 'Simon T Tyson', + 'login' => 'stty' + }, + 'grep' => { + 'fullname' => 'Gary R Epstein', + 'login' => 'grep' + } + } + } + +The '+' indicates that the value of the key attribute should be copied rather +than moved to the folded hash key. + +A '-' prefix would produce this result: + + { + 'user' => { + 'stty' => { + 'fullname' => 'Simon T Tyson', + '-login' => 'stty' + }, + 'grep' => { + 'fullname' => 'Gary R Epstein', + '-login' => 'grep' + } + } + } + +As described earlier, C will ignore hash keys starting with a '-'. + +=head2 NoAttr => 1 I<# in+out - handy> + +When used with C, the generated XML will contain no attributes. +All hash key/values will be represented as nested elements instead. + +When used with C, any attributes in the XML will be ignored. + +=head2 NoEscape => 1 I<# out - seldom used> + +By default, C will translate the characters 'E', 'E', '&' and +'"' to '<', '>', '&' and '"' respectively. Use this option to +suppress escaping (presumably because you've already escaped the data in some +more sophisticated manner). + +=head2 NoIndent => 1 I<# out - seldom used> + +Set this option to 1 to disable C's default 'pretty printing' mode. +With this option enabled, the XML output will all be on one line (unless there +are newlines in the data) - this may be easier for downstream processing. + +=head2 NoSort => 1 I<# out - seldom used> + +Newer versions of XML::Simple sort elements and attributes alphabetically (*), +by default. Enable this option to suppress the sorting - possibly for +backwards compatibility. + +* Actually, sorting is alphabetical but 'key' attribute or element names (as in +'KeyAttr') sort first. Also, when a hash of hashes is 'unfolded', the elements +are sorted alphabetically by the value of the key field. + +=head2 NormaliseSpace => 0 | 1 | 2 I<# in - handy> + +This option controls how whitespace in text content is handled. Recognised +values for the option are: + +=over 4 + +=item * + +0 = (default) whitespace is passed through unaltered (except of course for the +normalisation of whitespace in attribute values which is mandated by the XML +recommendation) + +=item * + +1 = whitespace is normalised in any value used as a hash key (normalising means +removing leading and trailing whitespace and collapsing sequences of whitespace +characters to a single space) + +=item * + +2 = whitespace is normalised in all text content + +=back + +Note: you can spell this option with a 'z' if that is more natural for you. + +=head2 NSExpand => 1 I<# in+out handy - SAX only> + +This option controls namespace expansion - the translation of element and +attribute names of the form 'prefix:name' to '{uri}name'. For example the +element name 'xsl:template' might be expanded to: +'{http://www.w3.org/1999/XSL/Transform}template'. + +By default, C will return element names and attribute names exactly as +they appear in the XML. Setting this option to 1 will cause all element and +attribute names to be expanded to include their namespace prefix. + +I. + +This option also controls whether C performs the reverse translation +from '{uri}name' back to 'prefix:name'. The default is no translation. If +your data contains expanded names, you should set this option to 1 otherwise +C will emit XML which is not well formed. + +I to translate URIs back to prefixes>. + +=head2 NumericEscape => 0 | 1 | 2 I<# out - handy> + +Use this option to have 'high' (non-ASCII) characters in your Perl data +structure converted to numeric entities (eg: €) in the XML output. Three +levels are possible: + +0 - default: no numeric escaping (OK if you're writing out UTF8) + +1 - only characters above 0xFF are escaped (ie: characters in the 0x80-FF range are not escaped), possibly useful with ISO8859-1 output + +2 - all characters above 0x7F are escaped (good for plain ASCII output) + +=head2 OutputFile => I<# out - handy> + +The default behaviour of C is to return the XML as a string. If you +wish to write the XML to a file, simply supply the filename using the +'OutputFile' option. + +This option also accepts an IO handle object - especially useful in Perl 5.8.0 +and later for output using an encoding other than UTF-8, eg: + + open my $fh, '>:encoding(iso-8859-1)', $path or die "open($path): $!"; + XMLout($ref, OutputFile => $fh); + +Note, XML::Simple does not require that the object you pass in to the +OutputFile option inherits from L - it simply assumes the object +supports a C method. + +=head2 ParserOpts => [ XML::Parser Options ] I<# in - don't use this> + +I. + +This option allows you to pass parameters to the constructor of the underlying +XML::Parser object (which of course assumes you're not using SAX). + +=head2 RootName => 'string' I<# out - handy> + +By default, when C generates XML, the root element will be named +'opt'. This option allows you to specify an alternative name. + +Specifying either undef or the empty string for the RootName option will +produce XML with no root elements. In most cases the resulting XML fragment +will not be 'well formed' and therefore could not be read back in by C. +Nevertheless, the option has been found to be useful in certain circumstances. + +=head2 SearchPath => [ list ] I<# in - handy> + +If you pass C a filename, but the filename include no directory +component, you can use this option to specify which directories should be +searched to locate the file. You might use this option to search first in the +user's home directory, then in a global directory such as /etc. + +If a filename is provided to C but SearchPath is not defined, the +file is assumed to be in the current directory. + +If the first parameter to C is undefined, the default SearchPath +will contain only the directory in which the script itself is located. +Otherwise the default SearchPath will be empty. + +=head2 SuppressEmpty => 1 | '' | undef I<# in+out - handy> + +This option controls what C should do with empty elements (no +attributes and no content). The default behaviour is to represent them as +empty hashes. Setting this option to a true value (eg: 1) will cause empty +elements to be skipped altogether. Setting the option to 'undef' or the empty +string will cause empty elements to be represented as the undefined value or +the empty string respectively. The latter two alternatives are a little +easier to test for in your code than a hash with no keys. + +The option also controls what C does with undefined values. Setting +the option to undef causes undefined values to be output as empty elements +(rather than empty attributes), it also suppresses the generation of warnings +about undefined values. Setting the option to a true value (eg: 1) causes +undefined values to be skipped altogether on output. + +=head2 ValueAttr => [ names ] I<# in - handy> + +Use this option to deal elements which always have a single attribute and no +content. Eg: + + + + + + +Setting C<< ValueAttr => [ 'value' ] >> will cause the above XML to parse to: + + { + colour => 'red', + size => 'XXL' + } + +instead of this (the default): + + { + colour => { value => 'red' }, + size => { value => 'XXL' } + } + +Note: This form of the ValueAttr option is not compatible with C - +since the attribute name is discarded at parse time, the original XML cannot be +reconstructed. + +=head2 ValueAttr => { element => attribute, ... } I<# in+out - handy> + +This (preferred) form of the ValueAttr option requires you to specify both +the element and the attribute names. This is not only safer, it also allows +the original XML to be reconstructed by C. + +Note: You probably don't want to use this option and the NoAttr option at the +same time. + +=head2 Variables => { name => value } I<# in - handy> + +This option allows variables in the XML to be expanded when the file is read. +(there is no facility for putting the variable names back if you regenerate +XML using C). + +A 'variable' is any text of the form C<${name}> which occurs in an attribute +value or in the text content of an element. If 'name' matches a key in the +supplied hashref, C<${name}> will be replaced with the corresponding value from +the hashref. If no matching key is found, the variable will not be replaced. +Names must match the regex: C<[\w.]+> (ie: only 'word' characters and dots are +allowed). + +=head2 VarAttr => 'attr_name' I<# in - handy> + +In addition to the variables defined using C, this option allows +variables to be defined in the XML. A variable definition consists of an +element with an attribute called 'attr_name' (the value of the C +option). The value of the attribute will be used as the variable name and the +text content of the element will be used as the value. A variable defined in +this way will override a variable defined using the C option. For +example: + + XMLin( ' + /usr/local/apache + ${prefix} + ${exec_prefix}/bin + ', + VarAttr => 'name', ContentKey => '-content' + ); + +produces the following data structure: + + { + dir => { + prefix => '/usr/local/apache', + exec_prefix => '/usr/local/apache', + bindir => '/usr/local/apache/bin', + } + } + +=head2 XMLDecl => 1 or XMLDecl => 'string' I<# out - handy> + +If you want the output from C to start with the optional XML +declaration, simply set the option to '1'. The default XML declaration is: + + + +If you want some other string (for example to declare an encoding value), set +the value of this option to the complete string you require. + + +=head1 OPTIONAL OO INTERFACE + +The procedural interface is both simple and convenient however there are a +couple of reasons why you might prefer to use the object oriented (OO) +interface: + +=over 4 + +=item * + +to define a set of default values which should be used on all subsequent calls +to C or C + +=item * + +to override methods in B to provide customised behaviour + +=back + +The default values for the options described above are unlikely to suit +everyone. The OO interface allows you to effectively override B's +defaults with your preferred values. It works like this: + +First create an XML::Simple parser object with your preferred defaults: + + my $xs = XML::Simple->new(ForceArray => 1, KeepRoot => 1); + +then call C or C as a method of that object: + + my $ref = $xs->XMLin($xml); + my $xml = $xs->XMLout($ref); + +You can also specify options when you make the method calls and these values +will be merged with the values specified when the object was created. Values +specified in a method call take precedence. + +Note: when called as methods, the C and C routines may be +called as C or C. The method names are aliased so the +only difference is the aesthetics. + +=head2 Parsing Methods + +You can explicitly call one of the following methods rather than rely on the +C method automatically determining whether the target to be parsed is +a string, a file or a filehandle: + +=over 4 + +=item parse_string(text) + +Works exactly like the C method but assumes the first argument is +a string of XML (or a reference to a scalar containing a string of XML). + +=item parse_file(filename) + +Works exactly like the C method but assumes the first argument is +the name of a file containing XML. + +=item parse_fh(file_handle) + +Works exactly like the C method but assumes the first argument is +a filehandle which can be read to get XML. + +=back + +=head2 Hook Methods + +You can make your own class which inherits from XML::Simple and overrides +certain behaviours. The following methods may provide useful 'hooks' upon +which to hang your modified behaviour. You may find other undocumented methods +by examining the source, but those may be subject to change in future releases. + +=over 4 + +=item handle_options(direction, name => value ...) + +This method will be called when one of the parsing methods or the C +method is called. The initial argument will be a string (either 'in' or 'out') +and the remaining arguments will be name value pairs. + +=item default_config_file() + +Calculates and returns the name of the file which should be parsed if no +filename is passed to C (default: C<$0.xml>). + +=item build_simple_tree(filename, string) + +Called from C or any of the parsing methods. Takes either a file name +as the first argument or C followed by a 'string' as the second +argument. Returns a simple tree data structure. You could override this +method to apply your own transformations before the data structure is returned +to the caller. + +=item new_hashref() + +When the 'simple tree' data structure is being built, this method will be +called to create any required anonymous hashrefs. + +=item sorted_keys(name, hashref) + +Called when C is translating a hashref to XML. This routine returns +a list of hash keys in the order that the corresponding attributes/elements +should appear in the output. + +=item escape_value(string) + +Called from C, takes a string and returns a copy of the string with +XML character escaping rules applied. + +=item numeric_escape(string) + +Called from C, to handle non-ASCII characters (depending on the +value of the NumericEscape option). + +=item copy_hash(hashref, extra_key => value, ...) + +Called from C, when 'unfolding' a hash of hashes into an array of +hashes. You might wish to override this method if you're using tied hashes and +don't want them to get untied. + +=back + +=head2 Cache Methods + +XML::Simple implements three caching schemes ('storable', 'memshare' and +'memcopy'). You can implement a custom caching scheme by implementing +two methods - one for reading from the cache and one for writing to it. + +For example, you might implement a new 'dbm' scheme that stores cached data +structures using the L module. First, you would add a +C method which accepted a filename for use as a lookup key +and returned a data structure on success, or undef on failure. Then, you would +implement a C method which accepted a data structure and a +filename. + +You would use this caching scheme by specifying the option: + + Cache => [ 'dbm' ] + +=head1 STRICT MODE + +If you import the B routines like this: + + use XML::Simple qw(:strict); + +the following common mistakes will be detected and treated as fatal errors + +=over 4 + +=item * + +Failing to explicitly set the C option - if you can't be bothered +reading about this option, turn it off with: KeyAttr => [ ] + +=item * + +Failing to explicitly set the C option - if you can't be bothered +reading about this option, set it to the safest mode with: ForceArray => 1 + +=item * + +Setting ForceArray to an array, but failing to list all the elements from the +KeyAttr hash. + +=item * + +Data error - KeyAttr is set to say { part => 'partnum' } but the XML contains +one or more EpartE elements without a 'partnum' attribute (or nested +element). Note: if strict mode is not set but -w is, this condition triggers a +warning. + +=item * + +Data error - as above, but non-unique values are present in the key attribute +(eg: more than one EpartE element with the same partnum). This will +also trigger a warning if strict mode is not enabled. + +=item * + +Data error - as above, but value of key attribute (eg: partnum) is not a +scalar string (due to nested elements etc). This will also trigger a warning +if strict mode is not enabled. + +=back + +=head1 SAX SUPPORT + +From version 1.08_01, B includes support for SAX (the Simple API +for XML) - specifically SAX2. + +In a typical SAX application, an XML parser (or SAX 'driver') module generates +SAX events (start of element, character data, end of element, etc) as it parses +an XML document and a 'handler' module processes the events to extract the +required data. This simple model allows for some interesting and powerful +possibilities: + +=over 4 + +=item * + +Applications written to the SAX API can extract data from huge XML documents +without the memory overheads of a DOM or tree API. + +=item * + +The SAX API allows for plug and play interchange of parser modules without +having to change your code to fit a new module's API. A number of SAX parsers +are available with capabilities ranging from extreme portability to blazing +performance. + +=item * + +A SAX 'filter' module can implement both a handler interface for receiving +data and a generator interface for passing modified data on to a downstream +handler. Filters can be chained together in 'pipelines'. + +=item * + +One filter module might split a data stream to direct data to two or more +downstream handlers. + +=item * + +Generating SAX events is not the exclusive preserve of XML parsing modules. +For example, a module might extract data from a relational database using DBI +and pass it on to a SAX pipeline for filtering and formatting. + +=back + +B can operate at either end of a SAX pipeline. For example, +you can take a data structure in the form of a hashref and pass it into a +SAX pipeline using the 'Handler' option on C: + + use XML::Simple; + use Some::SAX::Filter; + use XML::SAX::Writer; + + my $ref = { + .... # your data here + }; + + my $writer = XML::SAX::Writer->new(); + my $filter = Some::SAX::Filter->new(Handler => $writer); + my $simple = XML::Simple->new(Handler => $filter); + $simple->XMLout($ref); + +You can also put B at the opposite end of the pipeline to take +advantage of the simple 'tree' data structure once the relevant data has been +isolated through filtering: + + use XML::SAX; + use Some::SAX::Filter; + use XML::Simple; + + my $simple = XML::Simple->new(ForceArray => 1, KeyAttr => ['partnum']); + my $filter = Some::SAX::Filter->new(Handler => $simple); + my $parser = XML::SAX::ParserFactory->parser(Handler => $filter); + + my $ref = $parser->parse_uri('some_huge_file.xml'); + + print $ref->{part}->{'555-1234'}; + +You can build a filter by using an XML::Simple object as a handler and setting +its DataHandler option to point to a routine which takes the resulting tree, +modifies it and sends it off as SAX events to a downstream handler: + + my $writer = XML::SAX::Writer->new(); + my $filter = XML::Simple->new( + DataHandler => sub { + my $simple = shift; + my $data = shift; + + # Modify $data here + + $simple->XMLout($data, Handler => $writer); + } + ); + my $parser = XML::SAX::ParserFactory->parser(Handler => $filter); + + $parser->parse_uri($filename); + +I but it could also have been specified in the constructor>. + +=head1 ENVIRONMENT + +If you don't care which parser module B uses then skip this +section entirely (it looks more complicated than it really is). + +B will default to using a B parser if one is available or +B if SAX is not available. + +You can dictate which parser module is used by setting either the environment +variable 'XML_SIMPLE_PREFERRED_PARSER' or the package variable +$XML::Simple::PREFERRED_PARSER to contain the module name. The following rules +are used: + +=over 4 + +=item * + +The package variable takes precedence over the environment variable if both are defined. To force B to ignore the environment settings and use +its default rules, you can set the package variable to an empty string. + +=item * + +If the 'preferred parser' is set to the string 'XML::Parser', then +L will be used (or C will die if L is not +installed). + +=item * + +If the 'preferred parser' is set to some other value, then it is assumed to be +the name of a SAX parser module and is passed to L +If L is not installed, or the requested parser module is not +installed, then C will die. + +=item * + +If the 'preferred parser' is not defined at all (the normal default +state), an attempt will be made to load L. If L is +installed, then a parser module will be selected according to +L's normal rules (which typically means the last SAX +parser installed). + +=item * + +if the 'preferred parser' is not defined and B is not +installed, then B will be used. C will die if +L is not installed. + +=back + +Note: The B distribution includes an XML parser written entirely in +Perl. It is very portable but it is not very fast. You should consider +installing L or L if they are available for your +platform. + +=head1 ERROR HANDLING + +The XML standard is very clear on the issue of non-compliant documents. An +error in parsing any single element (for example a missing end tag) must cause +the whole document to be rejected. B will die with an appropriate +message if it encounters a parsing error. + +If dying is not appropriate for your application, you should arrange to call +C in an eval block and look for errors in $@. eg: + + my $config = eval { XMLin() }; + PopUpMessage($@) if($@); + +Note, there is a common misconception that use of B will significantly +slow down a script. While that may be true when the code being eval'd is in a +string, it is not true of code like the sample above. + +=head1 EXAMPLES + +When C reads the following very simple piece of XML: + + + +it returns the following data structure: + + { + 'username' => 'testuser', + 'password' => 'frodo' + } + +The identical result could have been produced with this alternative XML: + + + +Or this (although see 'ForceArray' option for variations): + + + testuser + frodo + + +Repeated nested elements are represented as anonymous arrays: + + + + joe@smith.com + jsmith@yahoo.com + + + bob@smith.com + + + + { + 'person' => [ + { + 'email' => [ + 'joe@smith.com', + 'jsmith@yahoo.com' + ], + 'firstname' => 'Joe', + 'lastname' => 'Smith' + }, + { + 'email' => 'bob@smith.com', + 'firstname' => 'Bob', + 'lastname' => 'Smith' + } + ] + } + +Nested elements with a recognised key attribute are transformed (folded) from +an array into a hash keyed on the value of that attribute (see the C +option): + + + + + + + + { + 'person' => { + 'jbloggs' => { + 'firstname' => 'Joe', + 'lastname' => 'Bloggs' + }, + 'tsmith' => { + 'firstname' => 'Tom', + 'lastname' => 'Smith' + }, + 'jsmith' => { + 'firstname' => 'Joe', + 'lastname' => 'Smith' + } + } + } + + +The tag can be used to form anonymous arrays: + + + Col 1Col 2Col 3 + R1C1R1C2R1C3 + R2C1R2C2R2C3 + R3C1R3C2R3C3 + + + { + 'head' => [ + [ 'Col 1', 'Col 2', 'Col 3' ] + ], + 'data' => [ + [ 'R1C1', 'R1C2', 'R1C3' ], + [ 'R2C1', 'R2C2', 'R2C3' ], + [ 'R3C1', 'R3C2', 'R3C3' ] + ] + } + +Anonymous arrays can be nested to arbirtrary levels and as a special case, if +the surrounding tags for an XML document contain only an anonymous array the +arrayref will be returned directly rather than the usual hashref: + + + Col 1Col 2 + R1C1R1C2 + R2C1R2C2 + + + [ + [ 'Col 1', 'Col 2' ], + [ 'R1C1', 'R1C2' ], + [ 'R2C1', 'R2C2' ] + ] + +Elements which only contain text content will simply be represented as a +scalar. Where an element has both attributes and text content, the element +will be represented as a hashref with the text content in the 'content' key +(see the C option): + + + first + second + + + { + 'one' => 'first', + 'two' => { 'attr' => 'value', 'content' => 'second' } + } + +Mixed content (elements which contain both text content and nested elements) +will be not be represented in a useful way - element order and significant +whitespace will be lost. If you need to work with mixed content, then +XML::Simple is not the right tool for your job - check out the next section. + +=head1 WHERE TO FROM HERE? + +B is able to present a simple API because it makes some +assumptions on your behalf. These include: + +=over 4 + +=item * + +You're not interested in text content consisting only of whitespace + +=item * + +You don't mind that when things get slurped into a hash the order is lost + +=item * + +You don't want fine-grained control of the formatting of generated XML + +=item * + +You would never use a hash key that was not a legal XML element name + +=item * + +You don't need help converting between different encodings + +=back + +In a serious XML project, you'll probably outgrow these assumptions fairly +quickly. This section of the document used to offer some advice on chosing a +more powerful option. That advice has now grown into the 'Perl-XML FAQ' +document which you can find at: L + +The advice in the FAQ boils down to a quick explanation of tree versus +event based parsers and then recommends: + +For event based parsing, use SAX (do not set out to write any new code for +XML::Parser's handler API - it is obselete). + +For tree-based parsing, you could choose between the 'Perlish' approach of +L and more standards based DOM implementations - preferably one with +XPath support. + + +=head1 SEE ALSO + +B requires either L or L. + +To generate documents with namespaces, L is required. + +The optional caching functions require L. + +Answers to Frequently Asked Questions about XML::Simple are bundled with this +distribution as: L + +=head1 COPYRIGHT + +Copyright 1999-2004 Grant McLean Egrantm@cpan.orgE + +This library is free software; you can redistribute it and/or modify it +under the same terms as Perl itself. + +=cut + + diff --git a/tests/lib/XML/Simple/FAQ.pod b/tests/lib/XML/Simple/FAQ.pod new file mode 100644 index 0000000000..0aa52e8c7b --- /dev/null +++ b/tests/lib/XML/Simple/FAQ.pod @@ -0,0 +1,646 @@ +package XML::Simple::FAQ; +1; + +__END__ + +=head1 Frequently Asked Questions about XML::Simple + + +=head1 Basics + + +=head2 What is XML::Simple designed to be used for? + +XML::Simple is a Perl module that was originally developed as a tool for +reading and writing configuration data in XML format. You can use it for +many other purposes that involve storing and retrieving structured data in +XML. + +You might also find XML::Simple a good starting point for playing with XML +from Perl. It doesn't have a steep learning curve and if you outgrow its +capabilities there are plenty of other Perl/XML modules to 'step up' to. + + +=head2 Why store configuration data in XML anyway? + +The many advantages of using XML format for configuration data include: + +=over 4 + +=item * + +Using existing XML parsing tools requires less development time, is easier +and more robust than developing your own config file parsing code + +=item * + +XML can represent relationships between pieces of data, such as nesting of +sections to arbitrary levels (not easily done with .INI files for example) + +=item * + +XML is basically just text, so you can easily edit a config file (easier than +editing a Win32 registry) + +=item * + +XML provides standard solutions for handling character sets and encoding +beyond basic ASCII (important for internationalization) + +=item * + +If it becomes necessary to change your configuration file format, there are +many tools available for performing transformations on XML files + +=item * + +XML is an open standard (the world does not need more proprietary binary +file formats) + +=item * + +Taking the extra step of developing a DTD allows the format of configuration +files to be validated before your program reads them (not directly supported +by XML::Simple) + +=item * + +Combining a DTD with a good XML editor can give you a GUI config editor for +minimal coding effort + +=back + + +=head2 What isn't XML::Simple good for? + +The main limitation of XML::Simple is that it does not work with 'mixed +content' (see the next question). If you consider your XML files contain +marked up text rather than structured data, you should probably use another +module. + +If you are working with very large XML files, XML::Simple's approach of +representing the whole file in memory as a 'tree' data structure may not be +suitable. + + +=head2 What is mixed content? + +Consider this example XML: + + + This is mixed content. + + +This is said to be mixed content, because the EparaE element contains +both character data (text content) and nested elements. + +Here's some more XML: + + + Joe + Bloggs + 25-April-1969 + + +This second example is not generally considered to be mixed content. The +Efirst_nameE, Elast_nameE and EdobE elements contain +only character data and the EpersonE element contains only nested +elements. (Note: Strictly speaking, the whitespace between the nested +elements is character data, but it is ignored by XML::Simple). + + +=head2 Why doesn't XML::Simple handle mixed content? + +Because if it did, it would no longer be simple :-) + +Seriously though, there are plenty of excellent modules that allow you to +work with mixed content in a variety of ways. Handling mixed content +correctly is not easy and by ignoring these issues, XML::Simple is able to +present an API without a steep learning curve. + + +=head2 Which Perl modules do handle mixed content? + +Every one of them except XML::Simple :-) + +If you're looking for a recommendation, I'd suggest you look at the Perl-XML +FAQ at: + + http://perl-xml.sourceforge.net/faq/ + + +=head1 Installation + + +=head2 How do I install XML::Simple? + +If you're running ActiveState Perl, you've probably already got XML::Simple +(although you may want to upgrade to version 1.09 or better for SAX support). + +If you do need to install XML::Simple, you'll need to install an XML parser +module first. Install either XML::Parser (which you may have already) or +XML::SAX. If you install both, XML::SAX will be used by default. + +Once you have a parser installed ... + +On Unix systems, try: + + perl -MCPAN -e 'install XML::Simple' + +If that doesn't work, download the latest distribution from +ftp://ftp.cpan.org/pub/CPAN/authors/id/G/GR/GRANTM , unpack it and run these +commands: + + perl Makefile.PL + make + make test + make install + +On Win32, if you have a recent build of ActiveState Perl (618 or better) try +this command: + + ppm install XML::Simple + +If that doesn't work, you really only need the Simple.pm file, so extract it +from the .tar.gz file (eg: using WinZIP) and save it in the \site\lib\XML +directory under your Perl installation (typically C:\Perl). + + +=head2 I'm trying to install XML::Simple and 'make test' fails + +Is the directory where you've unpacked XML::Simple mounted from a file server +using NFS, SMB or some other network file sharing? If so, that may cause +errors in the the following test scripts: + + 3_Storable.t + 4_MemShare.t + 5_MemCopy.t + +The test suite is designed to exercise the boundary conditions of all +XML::Simple's functionality and these three scripts exercise the caching +functions. If XML::Simple is asked to parse a file for which it has a cached +copy of a previous parse, then it compares the timestamp on the XML file with +the timestamp on the cached copy. If the cached copy is *newer* then it will +be used. If the cached copy is older or the same age then the file is +re-parsed. The test scripts will get confused by networked filesystems if +the workstation and server system clocks are not synchronised (to the +second). + +If you get an error in one of these three test scripts but you don't plan to +use the caching options (they're not enabled by default), then go right ahead +and run 'make install'. If you do plan to use caching, then try unpacking +the distribution on local disk and doing the build/test there. + +It's probably not a good idea to use the caching options with networked +filesystems in production. If the file server's clock is ahead of the local +clock, XML::Simple will re-parse files when it could have used the cached +copy. However if the local clock is ahead of the file server clock and a +file is changed immediately after it is cached, the old cached copy will be +used. + +Is one of the three test scripts (above) failing but you're not running on +a network filesystem? Are you running Win32? If so, you may be seeing a bug +in Win32 where writes to a file do not affect its modfication timestamp. + +If none of these scenarios match your situation, please confirm you're +running the latest version of XML::Simple and then email the output of +'make test' to me at grantm@cpan.org + +=head2 Why is XML::Simple so slow? + +If you find that XML::Simple is very slow reading XML, the most likely reason +is that you have XML::SAX installed but no additional SAX parser module. The +XML::SAX distribution includes an XML parser written entirely in Perl. This is +very portable but not very fast. For better performance install either +XML::SAX::Expat or XML::LibXML. + + +=head1 Usage + +=head2 How do I use XML::Simple? + +If you had an XML document called /etc/appconfig/foo.xml you could 'slurp' it +into a simple data structure (typically a hashref) with these lines of code: + + use XML::Simple; + + my $config = XMLin('/etc/appconfig/foo.xml'); + +The XMLin() function accepts options after the filename. + + +=head2 There are so many options, which ones do I really need to know about? + +Although you can get by without using any options, you shouldn't even +consider using XML::Simple in production until you know what these two +options do: + +=over 4 + +=item * + +forcearray + +=item * + +keyattr + +=back + +The reason you really need to read about them is because the default values +for these options will trip you up if you don't. Although everyone agrees +that these defaults are not ideal, there is not wide agreement on what they +should be changed to. The answer therefore is to read about them (see below) +and select values which are right for you. + + +=head2 What is the forcearray option all about? + +Consider this XML in a file called ./person.xml: + + + Joe + Bloggs + bungy jumping + sky diving + knitting + + +You could read it in with this line: + + my $person = XMLin('./person.xml'); + +Which would give you a data structure like this: + + $person = { + 'first_name' => 'Joe', + 'last_name' => 'Bloggs', + 'hobbie' => [ 'bungy jumping', 'sky diving', 'knitting' ] + }; + +The Efirst_nameE and Elast_nameE elements are represented as +simple scalar values which you could refer to like this: + + print "$person->{first_name} $person->{last_name}\n"; + +The EhobbieE elements are represented as an array - since there is +more than one. You could refer to the first one like this: + + print $person->{hobbie}->[0], "\n"; + +Or the whole lot like this: + + print join(', ', @{$person->{hobbie}} ), "\n"; + +The catch is, that these last two lines of code will only work for people +who have more than one hobbie. If there is only one EhobbieE +element, it will be represented as a simple scalar (just like +Efirst_nameE and Elast_nameE). Which might lead you to write +code like this: + + if(ref($person->{hobbie})) { + print join(', ', @{$person->{hobbie}} ), "\n"; + } + else { + print $person->{hobbie}, "\n"; + } + +Don't do that. + +One alternative approach is to set the forcearray option to a true value: + + my $person = XMLin('./person.xml', forcearray => 1); + +Which will give you a data structure like this: + + $person = { + 'first_name' => [ 'Joe' ], + 'last_name' => [ 'Bloggs' ], + 'hobbie' => [ 'bungy jumping', 'sky diving', 'knitting' ] + }; + +Then you can use this line to refer to all the list of hobbies even if there +was only one: + + print join(', ', @{$person->{hobbie}} ), "\n"; + +The downside of this approach is that the Efirst_nameE and +Elast_nameE elements will also always be represented as arrays even +though there will never be more than one: + + print "$person->{first_name}->[0] $person->{last_name}->[0]\n"; + +This might be OK if you change the XML to use attributes for things that +will always be singular and nested elements for things that may be plural: + + + motorcycle maintenance + + +On the other hand, if you prefer not to use attributes, then you could +specify that any EhobbieE elements should always be represented as +arrays and all other nested elements should be simple scalar values unless +there is more than one: + + my $person = XMLin('./person.xml', forcearray => [ 'hobbie' ]); + +The forcearray option accepts a list of element names which should always +be forced to an array representation: + + forcearray => [ qw(hobbie qualification childs_name) ] + +See the XML::Simple manual page for more information. + + +=head2 What is the keyattr option all about? + +Consider this sample XML: + + + + + + + +You could slurp it in with this code: + + my $catalog = XMLin('./catalog.xml'); + +Which would return a data structure like this: + + $catalog = { + 'part' => [ + { + 'partnum' => '1842334', + 'desc' => 'High pressure flange', + 'price' => '24.50' + }, + { + 'partnum' => '9344675', + 'desc' => 'Threaded gasket', + 'price' => '9.25' + }, + { + 'partnum' => '5634896', + 'desc' => 'Low voltage washer', + 'price' => '12.00' + } + ] + }; + +Then you could access the description of the first part in the catalog +with this code: + + print $catalog->{part}->[0]->{desc}, "\n"; + +However, if you wanted to access the description of the part with the +part number of "9344675" then you'd have to code a loop like this: + + foreach my $part (@{$catalog->{part}}) { + if($part->{partnum} eq '9344675') { + print $part->{desc}, "\n"; + last; + } + } + +The knowledge that each EpartE element has a unique partnum attribute +allows you to eliminate this search. You can pass this knowledge on to +XML::Simple like this: + + my $catalog = XMLin($xml, keyattr => ['partnum']); + +Which will return a data structure like this: + + $catalog = { + 'part' => { + '5634896' => { 'desc' => 'Low voltage washer', 'price' => '12.00' }, + '1842334' => { 'desc' => 'High pressure flange', 'price' => '24.50' }, + '9344675' => { 'desc' => 'Threaded gasket', 'price' => '9.25' } + } + }; + +XML::Simple has been able to transform $catalog->{part} from an arrayref to +a hashref (keyed on partnum). This transformation is called 'array folding'. + +Through the use of array folding, you can now index directly to the +description of the part you want: + + print $catalog->{part}->{9344675}->{desc}, "\n"; + +The 'keyattr' option also enables array folding when the unique key is in a +nested element rather than an attribute. eg: + + + + 1842334 + High pressure flange + 24.50 + + + 9344675 + Threaded gasket + 9.25 + + + 5634896 + Low voltage washer + 12.00 + + + +See the XML::Simple manual page for more information. + + +=head2 So what's the catch with 'keyattr'? + +One thing to watch out for is that you might get array folding even if you +don't supply the keyattr option. The default value for this option is: + + [ 'name', 'key', 'id'] + +Which means if your XML elements have a 'name', 'key' or 'id' attribute (or +nested element) then they may get folded on those values. This means that +you can take advantage of array folding simply through careful choice of +attribute names. On the hand, if you really don't want array folding at all, +you'll need to set 'key attr to an empty list: + + my $ref = XMLin($xml, keyattr => []); + +A second 'gotcha' is that array folding only works on arrays. That might +seem obvious, but if there's only one record in your XML and you didn't set +the 'forcearray' option then it won't be represented as an array and +consequently won't get folded into a hash. The moral is that if you're +using array folding, you should always turn on the forcearray option. + +You probably want to be as specific as you can be too. For instance, the +safest way to parse the EcatalogE example above would be: + + my $catalog = XMLin($xml, keyattr => { part => 'partnum'}, + forcearray => ['part']); + +By using the hashref for keyattr, you can specify that only EpartE +elements should be folded on the 'partnum' attribute (and that the +EpartE elements should not be folded on any other attribute). + +By supplying a list of element names for forcearray, you're ensuring that +folding will work even if there's only one EpartE. You're also +ensuring that if the 'partnum' unique key is supplied in a nested element +then that element won't get forced to an array too. + + +=head2 How do I know what my data structure should look like? + +The rules are fairly straightforward: + +=over 4 + +=item * + +each element gets represented as a hash + +=item * + +unless it contains only text, in which case it'll be a simple scalar value + +=item * + +or unless there's more than one element with the same name, in which case +they'll be represented as an array + +=item * + +unless you've got array folding enabled, in which case they'll be folded into +a hash + +=item * + +empty elements (no text contents B no attributes) will either be +represented as an empty hash, an empty string or undef - depending on the value +of the 'suppressempty' option. + +=back + +If you're in any doubt, use Data::Dumper, eg: + + use XML::Simple; + use Data::Dumper; + + my $ref = XMLin($xml); + + print Dumper($ref); + + +=head2 I'm getting 'Use of uninitialized value' warnings + +You're probably trying to index into a non-existant hash key - try +Data::Dumper. + + +=head2 I'm getting a 'Not an ARRAY reference' error + +Something that you expect to be an array is not. The two most likely causes +are that you forgot to use 'forcearray' or that the array got folded into a +hash - try Data::Dumper. + + +=head2 I'm getting a 'No such array field' error + +Something that you expect to be a hash is actually an array. Perhaps array +folding failed because one element was missing the key attribute - try +Data::Dumper. + + +=head2 I'm getting an 'Out of memory' error + +Something in the data structure is not as you expect and Perl may be trying +unsuccessfully to autovivify things - try Data::Dumper. + +If you're already using Data::Dumper, try calling Dumper() immediately after +XMLin() - ie: before you attempt to access anything in the data structure. + + +=head2 My element order is getting jumbled up + +If you read an XML file with XMLin() and then write it back out with +XMLout(), the order of the elements will likely be different. (However, if +you read the file back in with XMLin() you'll get the same Perl data +structure). + +The reordering happens because XML::Simple uses hashrefs to store your data +and Perl hashes do not really have any order. + +It is possible that a future version of XML::Simple will use Tie::IxHash +to store the data in hashrefs which do retain the order. However this will +not fix all cases of element order being lost. + +If your application really is sensitive to element order, don't use +XML::Simple (and don't put order-sensitive values in attributes). + + +=head2 XML::Simple turns nested elements into attributes + +If you read an XML file with XMLin() and then write it back out with +XMLout(), some data which was originally stored in nested elements may end up +in attributes. (However, if you read the file back in with XMLin() you'll +get the same Perl data structure). + +There are a number of ways you might handle this: + +=over 4 + +=item * + +use the 'forcearray' option with XMLin() + +=item * + +use the 'noattr' option with XMLout() + +=item * + +live with it + +=item * + +don't use XML::Simple + +=back + + +=head2 Why does XMLout() insert EnameE elements (or attributes)? + +Try setting keyattr => []. + +When you call XMLin() to read XML, the 'keyattr' option controls whether arrays +get 'folded' into hashes. Similarly, when you call XMLout(), the 'keyattr' +option controls whether hashes get 'unfolded' into arrays. As described above, +'keyattr' is enabled by default. + +=head2 Why are empty elements represented as empty hashes? + +An element is always represented as a hash unless it contains only text, in +which case it is represented as a scalar string. + +If you would prefer empty elements to be represented as empty strings or the +undefined value, set the 'suppressempty' option to '' or undef respectively. + +=head2 Why is ParserOpts deprecated? + +The C option is a remnant of the time when XML::Simple only worked +with the XML::Parser API. Its value is completely ignored if you're using a +SAX parser, so writing code which relied on it would bar you from taking +advantage of SAX. + +Even if you are using XML::Parser, it is seldom necessary to pass options to +the parser object. A number of people have written to say they use this option +to set XML::Parser's C option. Don't do that, it's wrong, +Wrong, WRONG! Fix the XML document so that it's well-formed and you won't have +a problem. + +Having said all of that, as long as XML::Simple continues to support the +XML::Parser API, this option will not be removed. There are currently no plans +to remove support for the XML::Parser API. + +=cut + +