--- /dev/null
+# -*- coding:utf-8 -*-
+
+# Context object for XPath
+# Copyright (C) 2005 Frédéric Jolliton <frederic@jolliton.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+__all__ = [
+ 'Context'
+ , 'nullContext'
+ ]
+
+import time
+
+class Context( object ) :
+
+ __slots__ = [ 'item' , 'position' , 'last' , 'documents' , 'variables' , 'functions' , 'currentDatetime' ]
+
+ def __init__( self ) :
+
+ self.item = None
+ self.position = None
+ self.last = None
+ self.documents = {}
+ self.variables = {}
+ self.functions = {}
+ self.resetDate()
+
+ def resetDate( self ) :
+
+ self.currentDatetime = time.time()
+
+ def resetFocus( self ) :
+
+ self.item = None
+ self.position = None
+ self.last = None
+
+ def getFocus( self ) :
+
+ return self.item , self.position , self.last
+
+ def restoreFocus( self , state ) :
+
+ self.item , self.position , self.last = state
+
+ def getVariable( self , name ) :
+
+ '''Return value of variable named 'name'.'''
+
+ return self.variables.get( name , () )
+
+ def getFunction( self , name ) :
+
+ '''Return function named 'name'.'''
+
+ return self.functions.get( name , None )
+
+ def registerDocument( self , uri , doc ) :
+
+ '''Store document 'doc' with URI reference 'uri'.'''
+
+ self.documents[ uri ] = doc
+
+ def __repr__( self ) :
+
+ return '<Context item=%s position=%s last=%s>' \
+ % ( self.item , self.position , self.last )
+
+nullContext = Context()
+
+# Local Variables:
+# tab-width: 4
+# python-indent: 4
+# End:
--- /dev/null
+# -*- coding:utf-8 -*-
+
+# Exceptions hierarchy
+# Copyright (C) 2005 Frédéric Jolliton <frederic@jolliton.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+class Error( Exception ) :
+
+ '''Generic error'''
+
+ pass
+
+class XPathError( Error ) :
+
+ '''Error defined by XPath/XQuery spec'''
+
+ def __init__( self , errorId , note ) :
+
+ self.errorId = errorId
+ self.note = note
+ msg = errorId
+ if note :
+ msg += ': %s' % note
+ Error.__init__( self , msg )
+
+class NodeError( Error ) :
+
+ '''Exception related to node construction.'''
+
+ pass
+
+# Local Variables:
+# tab-width: 4
+# python-indent: 4
+# End:
--- /dev/null
+# -*- coding: utf-8 -*-
+
+# htmlparser.py - An error tolerant HTML parser.
+# Copyright (C) 2004,2005 Frédéric Jolliton <frederic@jolliton.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+#
+# This module can replace HTMLParser Python parser.
+#
+# It never throw an exception even for worst HTML document.
+# However, it is not able to parse SGML declaration statement
+# with complex syntax.
+#
+
+__all__ = [ 'HTMLParser' , 'HTMLParseError' ]
+
+import re
+from htmlentitydefs import entitydefs
+
+from misc import shortenText
+
+reEndOfData = re.compile(
+ '[&<]' # Either & or <
+)
+reCheckTag = re.compile(
+ '/?[a-z]' , re.I
+)
+reTagName = re.compile(
+ '^[a-z][-:a-z0-9]*' , re.I
+)
+reEndOfTag = re.compile( # <CHAR*> (<QUOTED> <CHAR*>)* '>'
+ '[^\'">]*' # ..
+ '(?:'
+ '(?:'
+ r"'[^']*'" # double-quote string
+ '|'
+ r'"[^"]*"' # single-quote string
+ ')'
+ '[^\'">]*' # ..
+ ')*'
+ '>'
+)
+reAttr = re.compile(
+ '([a-z_][a-z0-9._-]*(?::[a-z_][a-z0-9._-]*)?)' # name
+ '(?:'
+ r'\s*=\s*' # spaces then =
+ '('
+ '[^\'"\\s]+' # anything but spaces and quote characters
+ '|'
+ '"[^"]*"' # double-quote string
+ '|'
+ "'[^']*'" # single-quote string
+ ')'
+ ')?' , re.I
+)
+reEntity = re.compile(
+ '&'
+ '(?:'
+ r'#(\d+|x[\da-f]+)' # numeric entity (  or  )
+ '|'
+ '([a-z]+)' # named entity ( )
+ ')'
+ ';?' , re.I
+)
+
+# Not used. Only for compatibility with HTMLParser python module.
+class HTMLParseError : pass
+
+defaultEntities = {
+ 'amp' : '&' ,
+ 'lt' : '<' ,
+ 'gt' : '>' ,
+ 'quot' : '"' ,
+ 'apos' : "'"
+}
+
+def _decodeAttr( s ) :
+
+ '''
+ >>> _decodeAttr( 'bar's & baz Ā' )
+ u"bar's & baz \u0100"
+ '''
+
+ r = reEntity.search( s )
+ if r is None :
+ return s
+ else :
+ result = []
+ p = 0
+ while r is not None :
+ result.append( s[ p : r.start( 0 ) ] )
+ p = r.end( 0 )
+ if r.group( 1 ) is not None :
+ try :
+ result.append( unichr( int( r.group( 1 ) ) ) )
+ except OverflowError :
+ result.append( r.group( 0 ) )
+ else :
+ e = defaultEntities.get( r.group( 2 ).lower() )
+ if e is None :
+ result.append( r.group( 0 ) )
+ else :
+ result.append( e )
+ r = reEntity.search( s , p )
+ result.append( s[ p : ] )
+ return ''.join( result )
+
+def _parseAttr( s ) :
+
+ '''
+ >>> _parseAttr( 'foo bar=baz x="y" z=\'quux\'' )
+ [('foo', None), ('bar', 'baz'), ('x', 'y'), ('z', 'quux')]
+ '''
+
+ attrs = []
+ p = 0
+ while p < len( s ) :
+ r = reAttr.search( s , p )
+ if r is None :
+ break
+ k , v = r.groups()
+ if v :
+ if v[ 0 ] == "'" or v[ 0 ] == '"' :
+ v = v[ 1 : -1 ]
+ v = _decodeAttr( v )
+ attrs.append( ( k , v ) )
+ p = r.end( 0 )
+ return attrs
+
+class HTMLParser( object ) :
+
+ __slots__ = [ '__buffer' , '__pos' , '__cdataTags' , '__waitTag' ]
+
+ def __init__( self ) :
+
+ self.__buffer = ''
+ self.__pos = 0
+ #
+ # Tags with CDATA type
+ #
+ self.__cdataTags = set( ( 'script' , 'style' ) )
+ self.__waitTag = None
+
+ def feed( self , s ) :
+
+ self.__buffer = self.__buffer[ self.__pos : ] + s
+ self.__pos = 0
+ self.__process()
+
+ def close( self ) :
+
+ self.__process( finalize = True )
+
+ def handle_data( self , data ) : pass
+ def handle_starttag( self , name , attr ) : pass
+ def handle_endtag( self , name ) : pass
+ def handle_charref( self , name ) : pass
+ def handle_entityref( self , name ) : pass
+ def handle_comment( self , data ) : pass
+ def handle_decl( self , data ) : pass
+ def handle_pi( self , data ) : pass
+ def handle_startendtag( self , name , attr ) :
+
+ self.handle_starttag( name , attr )
+ self.handle_endtag( name )
+
+ def getpos( self ) :
+
+ return ( 0 , 0 )
+
+ def __process( self , finalize = False ) :
+
+ #
+ # 1-letter variable used here:
+ #
+ # b = buffer
+ # p = current position
+ # e,f = end markers
+ # r = regex result
+ # s = size
+ #
+ b , p = self.__buffer , self.__pos
+ if self.__waitTag :
+ e = b.find( '</' , p )
+ if e != -1 :
+ self.handle_data( b[ p : e ] )
+ p = e
+ self.__waitTag = None
+ else :
+ while p < len( b ) :
+ #print '%4d' % p , shortenText( b[ p : ] , 30 )
+ if b[ p ] == '<' :
+ if b.startswith( '<?' , p ) :
+ e = b.find( '?>' , p + 2 )
+ if e == -1 :
+ break
+ else :
+ pi = b[ p + 2 : e ]
+ p = e + 2
+ self.handle_pi( pi )
+ elif not b.startswith( '<!' , p ) :
+ r = reEndOfTag.match( b , p + 1 )
+ if r is None :
+ break
+ e = r.end( 0 )
+ tag = b[ p : e ]
+ rn = reCheckTag.match( tag , 1 )
+ if rn is None :
+ self.handle_data( b[ p ] )
+ p += 1
+ else :
+ self.__processTag( tag )
+ p = e
+ if self.__waitTag :
+ e = b.find( '</' , p )
+ if e != -1 :
+ self.handle_data( b[ p : e ] )
+ p = e
+ self.__waitTag = None
+ else :
+ break
+ elif b.startswith( '<![CDATA[' , p ) :
+ e = b.find( ']]>' , p + 9 )
+ if e == -1 :
+ break
+ else :
+ cdata = b[ p + 9 : e ]
+ p = e + 3
+ self.handle_data( cdata )
+ elif b.startswith( '<!--' , p ) :
+ e , s = b.find( '-->' , p + 4 ) , 3
+ if e == -1 :
+ e , s = b.find( '->' , p + 4 ) , 2
+ if e == -1 :
+ e , s = b.find( '>' , p + 4 ) , 1
+ if e == -1 : # Unterminated comment
+ break
+ else :
+ comment = b[ p + 4 : e ]
+ p = e + s
+ self.handle_comment( comment )
+ else : # b.startswith( '<!' )
+ # Discard it.
+ e = b.find( '>' , p + 2 ) # We only handle "simple" declaration.
+ if e == -1 :
+ break
+ else :
+ self.handle_decl( b[ p + 2 : e ] )
+ p = e + 1
+ elif b[ p ] == '&' :
+ r = reEntity.match( b , p )
+ if r is None :
+ if len( b ) - p > 3 :
+ self.handle_data( '&' )
+ p += 1
+ else :
+ break
+ else :
+ if r.group( 1 ) is not None :
+ ref = r.group( 1 )
+ if not finalize and not ref.endswith( ';' ) and r.end( 0 ) == len( b ) :
+ break
+ self.handle_charref( ref )
+ else :
+ ref = r.group( 2 )
+ if not finalize and not ref.endswith( ';' ) and r.end( 0 ) == len( b ) :
+ break
+ self.handle_entityref( ref )
+ p = r.end( 0 )
+ else :
+ r = reEndOfData.search( b , p )
+ if r is None :
+ if not finalize :
+ break # wait for end of data
+ data = b[ p : ]
+ p = len( b )
+ else :
+ e = r.start( 0 )
+ data = b[ p : e ]
+ p = e
+ self.handle_data( data )
+ if finalize :
+ if p < len( b ) :
+ self.handle_data( b[ p : ] )
+ p = len( b )
+ self.__buffer , self.__pos = b , p
+
+ def __processTag( self , tag ) :
+
+ if tag.startswith( '<!' ) :
+ self.handle_decl( tag[ 2 : -1 ] )
+ else :
+ tagContents = tag[ 1 : -1 ]
+ tagType = 0 # 0: start, 1: end, 2: empty
+ if tagContents.startswith( '/' ) :
+ tagType = 1
+ tagContents = tagContents[ 1 : ]
+ elif tagContents.endswith( '/' ) : # and ' ' not in tagContents :
+ tagType = 2
+ tagContents = tagContents[ : -1 ]
+ r = reTagName.match( tagContents )
+ if r is not None :
+ e = r.end( 0 )
+ name , attr = tagContents[ : e ] , tagContents[ e : ]
+ attr = _parseAttr( attr )
+ if tagType == 0 :
+ self.handle_starttag( name , attr )
+ name = name.lower()
+ if name in self.__cdataTags :
+ self.__waitTag = name # Start of CDATA element
+ elif tagType == 1 :
+ self.handle_endtag( name )
+ elif tagType == 2 :
+ self.handle_startendtag( name , attr )
+ else :
+ raise HTMLParser
+ else :
+ self.handle_data( tag )
+
+class HTMLParserDebug( HTMLParser ) :
+
+ def handle_data( self , data ) :
+
+ print 'data(%r)' % data
+
+ def handle_starttag( self , name , attr ) :
+
+ print 'starttag(%r,%r)' % ( name , attr )
+
+ def handle_endtag( self , name ) :
+
+ print 'endtag(%r)' % name
+
+ def handle_startendtag( self , name , attr ) :
+
+ print 'startendtag(%r,%r)...' % ( name , attr )
+ HTMLParser.handle_startendtag( self , name , attr )
+
+ def handle_charref( self , name ) :
+
+ print 'charref(%r)' % name
+
+ def handle_entityref( self , name ) :
+
+ print 'entityref(%r)' % name
+
+ def handle_comment( self , data ) :
+
+ print 'comment(%r)' % data
+
+ def handle_decl( self , data ) :
+
+ print 'decl(%r)' % data
+
+ def handle_pi( self , data ) :
+
+ print 'pi(%r)' % data
+
+# Local Variables:
+# tab-width: 4
+# python-indent: 4
+# End:
--- /dev/null
+# -*- coding: utf-8 -*-
+
+# htmltree.py - An error tolerant HTML parser.
+# Copyright (C) 2004,2005 Frédéric Jolliton <frederic@jolliton.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+__all__ = [
+ 'Parser' ,
+ 'parseHtml'
+ ]
+
+import re
+from htmlentitydefs import entitydefs
+
+from nodes import Document, Element, Attribute, Comment, Text, ProcessingInstruction
+from htmlparser import HTMLParser, HTMLParseError
+
+reSpace = re.compile( r'\s' )
+reNotSpace = re.compile( r'\S' )
+
+def normalizeSpace( s ) :
+
+ if reSpace.search( s ) is not None :
+ return ' '.join( s.split() )
+ else :
+ return s
+
+class Parser( HTMLParser ) :
+
+ def __init__( self ) :
+
+ HTMLParser.__init__( self )
+ self.__stack = [ ( None , None , [] ) ] # List of RXP style elements
+ self.__skipBlank = False
+ self.__caseSensitive = False
+ #
+ # Empty elements.
+ #
+ self.emptyElements = set( ( 'param' , 'option' , 'hr' , 'br' , 'input' , 'meta' , 'link' , 'area' , 'img' , 'meta' ) )
+ #
+ # Elements that shouldn't have ancestor with the same name.
+ #
+ self.redundantElements = set( ( 'script' , 'form' , 'p' , 'span' ) )
+ #
+ # Elements that shouldn't have parent with the same name.
+ #
+ self.directRedundantElements = set( ( 'table' , 'tr' , 'td' , 'li' , 'ul' , 'ol' ) )
+
+ def __addText( self , data ) :
+
+ #
+ # Close current elements which should not contains text.
+ #
+ s = self.__stack
+ while 1 :
+ top = s[ -1 ][ 0 ]
+ if top not in self.emptyElements :
+ break
+ name , attrs , children = s.pop()
+ s[ -1 ][ 2 ].append( Element( name , attrs , children ) )
+ #
+ # Add the Text node.
+ #
+ s[ -1 ][ 2 ].append( Text( data ) )
+
+ def __closeElementsAsNeeded( self , tag ) :
+
+ '''Close any elements such that opening a new element with name 'tag'
+ ensure that document is still valid accordingly to
+ directRedundantElements and redundantElements restrictions.'''
+
+ s = self.__stack
+ #
+ # Close current elements which should not have children.
+ #
+ while 1 :
+ top = s[ -1 ][ 0 ]
+ if top not in self.emptyElements :
+ break
+ name , attrs , children = s.pop()
+ s[ -1 ][ 2 ].append( Element( name , attrs , children ) )
+ if tag in self.directRedundantElements :
+ #
+ # Parent with same name ?
+ #
+ if s[ -1 ][ 0 ] == tag :
+ name , attrs , children = s.pop()
+ s[ -1 ][ 2 ].append( Element( name , attrs , children ) )
+ elif tag in self.redundantElements :
+ #
+ # Ancestor with same name ?
+ #
+ i = len( s )
+ while i > 0 :
+ i -= 1
+ if s[ i ][ 0 ] == tag :
+ for j in range( len( s ) - i ) :
+ name , attrs , children = s.pop()
+ s[ -1 ][ 2 ].append( Element( name , attrs , children ) )
+ break
+
+ def handle_starttag( self , tag , attrs ) :
+
+ if not self.__caseSensitive :
+ tag = tag.lower()
+
+ self.__closeElementsAsNeeded( tag )
+
+ if not self.__caseSensitive :
+ attrs = [
+ Attribute( name.lower() , normalizeSpace( ( value , name )[ value is None ] ) )
+ for name , value in attrs ]
+ else :
+ attrs = [
+ Attribute( name , normalizeSpace( ( value , name )[ value is None ] ) )
+ for name , value in attrs ]
+ self.__stack.append( ( tag , attrs , [] ) )
+
+ def handle_endtag( self , tag ) :
+
+ if not self.__caseSensitive :
+ tag = tag.lower()
+
+ #
+ # Check if tag close an already opened tag.
+ #
+ s = self.__stack
+ i = len( s )
+ while i > 0 :
+ i -= 1
+ if s[ i ][ 0 ] == tag :
+ for j in range( len( s ) - i ) :
+ name , attrs , children = s.pop()
+ s[ -1 ][ 2 ].append( Element( name , attrs , children ) )
+ break
+
+ def handle_data( self , data ) :
+
+ if not self.__skipBlank or reNotSpace.search( data ) is not None :
+ self.__addText( data )
+
+ def handle_comment( self , data ) :
+
+ #
+ # Script and style elements cannot contains comment (because
+ # they are declared as CDATA) so we treat comment as regular
+ # text. However, the parser should have handled that
+ # correctly, so we should not expect comment inside script and
+ # style element at this point.
+ #
+ s = self.__stack
+ if s[ -1 ][ 0 ] and s[ -1 ][ 0 ].lower() == 'script' :
+ s[ -1 ][ 2 ].append( Text( data ) )
+ else :
+ comment = data.replace( '--' , '- -' ).replace( '--' , '- -' )
+ if comment.endswith( '-' ) :
+ comment += ' '
+ s[ -1 ][ 2 ].append( Comment( comment ) )
+
+ def handle_charref( self , name ) :
+
+ try :
+ n = int( name )
+ except :
+ data = '?' # FIXME: Or ignore ?
+ else :
+ data = unichr( n )
+ self.__addText( data )
+
+ def handle_entityref( self , name ) :
+
+ character = entitydefs.get( name )
+ if character :
+ data = character.decode( 'iso-8859-1' )
+ else :
+ # FIXME: Or ignore ?
+ data = '&' + name + ';'
+ self.__addText( data )
+
+ def handle_pi( self , data ) :
+
+ data = data.split( None , 1 )
+ if len( data ) == 1 :
+ target , content = data[ 0 ] , ''
+ elif len( data ) == 2 :
+ target , content = data
+ else :
+ target , content = None , None
+ if target is not None and target.lower() != 'xml' :
+ pi = ProcessingInstruction( target , content )
+ self.__stack[ -1 ][ 2 ].append( pi )
+
+ def close( self ) :
+
+ try :
+ HTMLParser.close( self )
+ except HTMLParseError :
+ pass
+ s = self.__stack
+ while len( s ) > 1 :
+ name , attrs , children = s.pop()
+ s[ -1 ][ 2 ].append( Element( name , attrs , children ) )
+ return Document( s[ 0 ][ 2 ] )
+
+def parse( data ) :
+
+ hp = Parser()
+ hp.feed( data )
+ return hp.close()
+
+# Local Variables:
+# tab-width: 4
+# python-indent: 4
+# End:
--- /dev/null
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+#
+# TuxeeNet installer
+# Copyright (C) 2005 Frédéric Jolliton <frederic@jolliton.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+
+#--[ Configuration ]----------------------------------------------------------
+
+from install_conf import *
+
+#--[ Global variables ]-------------------------------------------------------
+
+g_verbose = False
+g_pretend = False
+
+#-----------------------------------------------------------------------------
+
+import sys
+import os
+import errno
+import time
+import py_compile
+
+from getopt import GetoptError
+try :
+ from getopt import gnu_getopt as getopt
+except :
+ from getopt import getopt
+
+from distutils.sysconfig import get_python_lib
+
+def isoDate() :
+
+ return '%04d-%02d-%02dT%02d:%02d:%02dZ' % time.gmtime()[ : 6 ]
+
+def archVersion() :
+
+ try :
+ treeVersion = os.popen( 'tla tree-version' ).read().splitlines()[ 0 ]
+ patchLevel = os.popen( 'tla logs' ).read().splitlines()
+ if patchLevel :
+ patchLevel = patchLevel[ -1 ]
+ else :
+ patchLevel = 'empty'
+ result = treeVersion + '--' + patchLevel
+ except :
+ result = None
+ return result
+
+def touch( filename ) :
+
+ open( filename , 'a+' ).close()
+
+def createFile( filename , contents ) :
+
+ f = open( filename , 'w' )
+ f.write( contents )
+ f.close()
+
+def byteCompile( filename ) :
+
+ if g_verbose :
+ print 'Byte-compiling %s' % filename
+ py_compile.compile( filename )
+
+def installPath() :
+
+ '''Install Python path.'''
+
+ path = get_python_lib()
+ path += '/' + g_globalModule + '.pth'
+ target = g_targetPrefix
+ if g_verbose or g_pretend :
+ print 'Creating Python path from %s to %s.' % ( path , target )
+ if not g_pretend :
+ try :
+ createFile( path , target )
+ except IOError , e :
+ print '** Error creating Python path into %s' % path
+ print e
+
+def installModule() :
+
+ '''Install modules.'''
+
+ myPrefix = g_targetPrefix + '/' + g_globalModule
+ if g_subModule :
+ myPrefix += '/' + g_subModule
+
+ if g_verbose or g_pretend :
+ print 'Creating directory %s' % myPrefix
+ if not g_pretend :
+ try :
+ os.makedirs( myPrefix )
+ except OSError , e :
+ if e[ 0 ] == errno.EEXIST :
+ pass
+ else :
+ raise
+
+ if g_subModule :
+ base = g_targetPrefix + '/' + g_globalModule
+ for part in g_subModule.split( '/' ) :
+ base += '/' + part
+ if g_verbose or g_pretend :
+ print 'Creating __init__.py in %s' % base
+ if not g_pretend :
+ createFile( base + '/__init__.py' , '' )
+
+ if g_verbose or g_pretend :
+ print 'Creating module %s' % g_globalModule
+ if not g_pretend :
+ p = myPrefix + '/__init__.py'
+ try :
+ touch( p )
+ except IOError :
+ print '** Error touching %s' % p
+ else :
+ byteCompile( p )
+ modulesInstalled = []
+ for module in g_modules :
+ flags = ''
+ symlink = None
+ if isinstance( module , ( tuple , list ) ) :
+ if len( module ) >= 3 :
+ module , flags , symlink = module[ : 3 ]
+ elif len( module ) >= 2 :
+ module , flags = module[ : 2 ]
+ elif module :
+ module = module[ 0 ]
+ else :
+ continue
+ targetFilename = myPrefix + '/' + module
+ if g_verbose or g_pretend :
+ print 'Copying module %s -> %s' % ( module , myPrefix + '/' )
+ if not g_pretend :
+ mod = open( module ).read()
+ try :
+ createFile( targetFilename , mod )
+ except IOError , e :
+ print '** Error copying %s -> %s' % ( module , myPrefix + '/' )
+ print e
+ else :
+ modulesInstalled.append( module )
+ if 'x' in flags :
+ if g_verbose :
+ print 'Changing file mode of %s (+x)' % targetFilename
+ try :
+ os.chmod( targetFilename , 0755 )
+ except :
+ print '** Error changing mode of file %s' % targetFilename
+ byteCompile( targetFilename )
+ if symlink :
+ if g_verbose or g_pretend :
+ print 'Creating symbolic link %s -> %s' % ( symlink , targetFilename )
+ if not g_pretend :
+ try :
+ try :
+ os.unlink( symlink )
+ except :
+ pass
+ os.symlink( targetFilename , symlink )
+ except :
+ print '** Error creating symbolic link %s -> %s' % ( symlink , targetFilename )
+
+ if g_verbose or g_pretend :
+ print 'Storing Arch version'
+
+ if not g_pretend :
+ currentVersion = archVersion()
+ if currentVersion is None :
+ print ' Unable to compute Arch version'
+ else :
+ logFilename = myPrefix + '/install.log'
+ try :
+ f = open( logFilename , 'a+' )
+ f.write( isoDate() + ' Version %s installed.\n' % currentVersion )
+ f.write( isoDate() + ' including files: %r\n' % modulesInstalled )
+ f.close()
+ except ( IOError , OSError ) :
+ print '** Error updating log file %s' % logFilename
+
+def usage() :
+
+ print '''Usage: install [OPTIONS]
+
+ -h, --help Print this help.
+ -n, --pretend Display operations, but do nothing.
+ -v, --verbose Verbose output.
+ --pth Install Python path (.pth) only.
+
+Report bugs to <fj@tuxee.net>.'''
+
+def main() :
+
+ global g_verbose
+ global g_pretend
+
+ try :
+ options , paramaters = getopt( sys.argv[ 1 : ] ,
+ 'hnv' , ( 'help' , 'pth' , 'pretend' , 'verbose' ) )
+ except GetoptError , e :
+ print 'install:' , e
+ print 'Try `install --help\' for more information.'
+ sys.exit( 1 )
+
+ pathOnly = False
+
+ for option , argument in options :
+ if option in [ '-h' , '--help' ] :
+ usage()
+ sys.exit( 0 )
+ elif option in [ '-v' , '--verbose' ] :
+ g_verbose = True
+ elif option in [ '-n' , '--pretend' ] :
+ g_pretend = True
+ elif option in [ '--pth' ] :
+ pathOnly = True
+
+ installPath()
+ if not pathOnly :
+ installModule()
+
+if __name__ == '__main__' :
+ main()
+
+# Local Variables:
+# tab-width: 4
+# python-indent: 4
+# End:
--- /dev/null
+# -*- coding:utf-8 -*-
+
+_binPrefix = '/usr/local/bin'
+
+#
+# Modules to install.
+#
+g_modules = [
+ 'context.py' ,
+ 'error.py' ,
+ 'htmlparser.py' ,
+ 'htmltree.py' ,
+ 'iterators.py' ,
+ 'misc.py' ,
+ 'nodes.py' ,
+ 'nodes_misc.py' ,
+ 'parser.py' ,
+ 'rxpcompat.py' ,
+ 'sequence.py' ,
+ 'sequence_misc.py' ,
+ 'tags.py' ,
+ 'xpath.py' ,
+ 'xpath_misc.py' ,
+ ( 'xpath_prompt.py' , 'x' , _binPrefix + '/tx-prompt' ) ,
+ 'xpathfn.py' ,
+ 'xpathparser.py'
+]
+
+#
+# Module name.
+#
+g_globalModule = 'tuxeenet'
+
+#
+# Sub-module name (or None)
+#
+g_subModule = 'tx'
+
+#
+# Target directory.
+#
+g_targetPrefix = '/opt/tuxeenet/python'
+
+# Local Variables:
+# tab-width: 4
+# python-indent: 4
+# End:
--- /dev/null
+# -*- coding:utf-8 -*-
+
+# Iterator for traversing xml tree in various way
+# Copyright (C) 2005 Frédéric Jolliton <frederic@jolliton.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+# [xpath20] is http://www.w3.org/TR/xpath20/
+
+# [xpath20] "The parent, ancestor, ancestor-or-self, preceding, and
+# preceding-sibling axes are reverse axes; all other axes are forward
+# axes. The ancestor, descendant, following, preceding and self axes
+# partition a document (ignoring attribute and namespace nodes): they
+# do not overlap and together they contain all the nodes in the
+# document."
+
+__all__ = [
+ # XPath 2.0 axis
+ 'iterChild' ,
+ 'iterDescendant' ,
+ 'iterAttribute' ,
+ 'iterSelf' ,
+ 'iterDescendantOrSelf' ,
+ 'iterFollowingSibling' ,
+ 'iterFollowing' ,
+ 'iterParent' ,
+ 'iterAncestor' ,
+ 'iterPrecedingSibling' ,
+ 'iterPreceding' ,
+ 'iterAncestorOrSelf' ,
+ # TX Extension
+ 'iterDescendantOrSelfFull' ,
+ 'iterChildReversed' ,
+ 'iterDescendantReversed' ,
+ 'iterDescendantOrSelfReversed'
+]
+
+from nodes import *
+
+#----------------------------------------------------------------------------
+#
+# Forward Axis
+#
+#----------------------------------------------------------------------------
+
+# [xpath20] "The child axis contains the children of the context node,
+# which are the nodes returned by the dm:children accessor in [XQuery
+# 1.0 and XPath 2.0 Data Model]. Note: Only document nodes and element
+# nodes have children. If the context node is any other kind of node,
+# or if the context node is an empty document or element node, then
+# the child axis is an empty sequence. The children of a document node
+# or element node may be element, processing instruction, comment, or
+# text nodes. Attribute, namespace, and document nodes can never
+# appear as children."
+
+def iterChild( node ) :
+
+ '''Children of the node 'node' in document order.'''
+
+ if isinstance( node , ( Document , Element ) ) :
+ for child in node.children :
+ yield child
+
+# [xpath20] "the descendant axis is defined as the transitive closure
+# of the child axis; it contains the descendants of the context node
+# (the children, the children of the children, and so on)"
+
+def iterDescendant( node ) :
+
+ '''Descendants of the node 'node' in document order.'''
+
+ if isinstance( node , ( Document , Element ) ) :
+ current = node
+ if current.children :
+ current = current.children[ 0 ]
+ while current is not node :
+ yield current
+ if isinstance( current , Element ) :
+ if current.children :
+ current = current.children[ 0 ]
+ continue
+ while current is not node and current.next is None :
+ current = current.parent
+ if current is not node :
+ current = current.next
+
+# [xpath20] "the attribute axis contains the attributes of the context
+# node, which are the nodes returned by the dm:attributes accessor in
+# [XQuery 1.0 and XPath 2.0 Data Model]; the axis will be empty unless
+# the context node is an element"
+
+def iterAttribute( node ) :
+
+ '''Attributes of the node 'node' in stable order.'''
+
+ if isinstance( node , Element ) :
+ for attribute in node.attributes :
+ yield attribute
+
+# [xpath20] "the self axis contains just the context node itself"
+
+def iterSelf( node ) :
+
+ ''''node' and descendants of 'node' in document order.'''
+
+ if isinstance( node , Node ) :
+ yield node
+
+# [xpath20] "the descendant-or-self axis contains the context node and
+# the descendants of the context node"
+
+def iterDescendantOrSelf( node ) :
+
+ ''''node' and descendants of 'node' in document order.'''
+
+ if isinstance( node , Node ) : # FIXME: Really allow Attribute ?
+ yield node
+ if isinstance( node , ( Document , Element ) ) :
+ current = node
+ if current.children :
+ current = current.children[ 0 ]
+ while current is not node :
+ yield current
+ if isinstance( current , Element ) :
+ if current.children :
+ current = current.children[ 0 ]
+ continue
+ while current is not node and current.next is None :
+ current = current.parent
+ if current is not node :
+ current = current.next
+
+def iterDescendantOrSelfFull( node ) :
+
+ ''''node' and descendants of 'node' in document order.'''
+
+ if isinstance( node , ( Document , Element , Text ) ) :
+ yield node
+ current = node
+ if current.children :
+ current = current.children[ 0 ]
+ while current is not node :
+ yield current
+ if isinstance( current , Element ) :
+ for attribute in current.attributes :
+ yield attribute
+ if current.children :
+ current = current.children[ 0 ]
+ continue
+ while current is not node and current.next is None :
+ current = current.parent
+ if current is not node :
+ current = current.next
+
+# [xpath20] "the following-sibling axis contains the context node's
+# following siblings, those children of the context node's parent that
+# occur after the context node in document order; if the context node
+# is an attribute or namespace node, the following-sibling axis is
+# empty"
+
+def iterFollowingSibling( node ) :
+
+ '''Following siblings of node 'node' in document order.'''
+
+ if isinstance( node , Node ) and not isinstance( node , Attribute ) :
+ parent = node.parent
+ if parent is not None :
+ found = False
+ for sibling in parent.children :
+ if found :
+ yield sibling
+ elif sibling is node :
+ found = True
+
+# [xpath20] "the following axis contains all nodes that are
+# descendants of the root of the tree in which the context node is
+# found, are not descendants of the context node, and occur after the
+# context node in document order"
+
+def iterFollowing( node ) :
+
+ '''Following nodes of node 'node' in document order.'''
+
+ if isinstance( node , Node ) and not isinstance( node , Attribute ) :
+ current = node
+ while current is not None :
+ for sibling in iterFollowingSibling( current ) :
+ for descendant in iterDescendantOrSelf( sibling ) :
+ yield descendant
+ current = current.parent
+
+#----------------------------------------------------------------------------
+#
+# Reverse Axis
+#
+#----------------------------------------------------------------------------
+
+def iterChildReversed( node ) :
+
+ '''Children of the node 'node' in reverse document order.'''
+
+ if isinstance( node , ( Document , Element ) ) :
+ for child in reversed( node.children ) :
+ yield child
+
+def iterDescendantReversed( node ) :
+
+ '''Descendants of the node 'node' in reverse document order.'''
+
+ if isinstance( node , ( Document , Element ) ) :
+ for node in iterChildReversed( node ) :
+ for descendant in iterDescendantReversed( node ) :
+ yield descendant
+ yield node
+
+def iterDescendantOrSelfReversed( node ) :
+
+ ''''node' and descendants of 'node' in reverse document order.'''
+
+ if isinstance( node , ( Document , Element ) ):
+ for descendant in iterDescendantReversed( node ) :
+ yield descendant
+ yield node
+
+# [xpath20] "the parent axis contains the sequence returned by the
+# dm:parent accessor in [XQuery 1.0 and XPath 2.0 Data Model], which
+# returns the parent of the context node, or an empty sequence if the
+# context node has no parent. Note: An attribute node may have an
+# element node as its parent, even though the attribute node is not a
+# child of the element node."
+
+def iterParent( node ) :
+
+ '''Parent of node 'node'.'''
+
+ if isinstance( node , Node ) :
+ parent = node.parent
+ if parent is not None :
+ yield parent
+
+# [xpath20] "the ancestor axis is defined as the transitive closure of
+# the parent axis; it contains the ancestors of the context node (the
+# parent, the parent of the parent, and so on) Note: The ancestor axis
+# includes the root node of the tree in which the context node is
+# found, unless the context node is the root node."
+
+def iterAncestor( node ) :
+
+ '''Ancestors of node 'node' up to root node in reverse document order.'''
+
+ if isinstance( node , Node ) :
+ while 1 :
+ node = node.parent
+ if node is None :
+ break
+ yield node
+
+# [xpath20] "the preceding-sibling axis contains the context node's
+# preceding siblings, those children of the context node's parent that
+# occur before the context node in document order; if the context node
+# is an attribute or namespace node, the preceding-sibling axis is
+# empty"
+
+def iterPrecedingSibling( node ) :
+
+ '''Preceding siblings of node 'node' in reverse document order.'''
+
+ if isinstance( node , Node ) and not isinstance( node , Attribute ) :
+ parent = node.parent
+ if parent is not None :
+ found = False
+ for sibling in reversed( parent.children ) :
+ if found :
+ yield sibling
+ elif sibling is node :
+ found = True
+
+# [xpath20] "the preceding axis contains all nodes that are
+# descendants of the root of the tree in which the context node is
+# found, are not ancestors of the context node, and occur before the
+# context node in document order"
+
+def iterPreceding( node ) :
+
+ '''Predecing nodes of node 'node' in reverse document order.'''
+
+ if isinstance( node , Node ) and not isinstance( node , Attribute ) :
+ while node is not None :
+ for sibling in iterPrecedingSibling( node ) :
+ for descendant in iterDescendantOrSelfReversed( sibling ) :
+ yield descendant
+ node = node.parent
+
+# [xpath20] "the ancestor-or-self axis contains the context node and
+# the ancestors of the context node; thus, the ancestor-or-self axis
+# will always include the root node"
+
+def iterAncestorOrSelf( node ) :
+
+ ''''node' and ancestors of node 'node' in reverse document order.'''
+
+ if isinstance( node , Node ) :
+ yield node
+ for ancestor in iterAncestor( node ) :
+ yield ancestor
+
+# Local Variables:
+# tab-width: 4
+# python-indent: 4
+# End:
--- /dev/null
+# -*- coding:utf-8 -*-
+
+# Misc - Various functions not directly related to TX
+# Copyright (C) 2005 Frédéric Jolliton <frederic@jolliton.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+__all__ = [
+ # Exception
+ 'NotReached' ,
+ # Functions
+ 'iterSingle' ,
+ 'identity' ,
+ 'constantly' ,
+ 'plural' ,
+ 'bench' ,
+ 'typeOf' ,
+ 'shortenText'
+ ]
+
+import time
+
+class NotReached( Exception ) :
+
+ '''Error when reaching a point in the program that should'nt in normal
+ condition.'''
+
+ pass
+
+#
+# From Appendix F of http://www.w3.org/TR/REC-xml/
+# "Autodetection of Character Encodings (Non-Normative)"
+#
+encodingsBom = [
+ # With a Byte Order Mark:
+ ( '\x00\x00\xfe\xff' , 'UTF-32/BE' ) , # codecs.BOM_UTF32_BE
+ ( '\xff\xfe\x00\x00' , 'UTF-32/LE' ) , # codecs.BOM_UTF32_LE
+ ( '\x00\x00\xff\xfe' , 'UTF-32/2143' ) ,
+ ( '\xfe\xff\x00\x00' , 'UTF-32/3412' ) ,
+ ( '\xfe\xff' , 'UTF-16/BE' ) , # codecs.BOM_UTF16_BE
+ ( '\xff\xfe' , 'UTF-16/LE' ) , # codecs.BOM_UTF16_LE
+ ( '\xef\xbb\xbf' , 'UTF-8' ) # codecs.BOM_UTF8
+]
+
+encodingsXml = [
+ # Without a Byte Order Mark:
+ ( '\x00\x00\x00\x3c' , '32BIT/BE' ) ,
+ ( '\x3c\x00\x00\x00' , '32BIT/LE' ) ,
+ ( '\x00\x00\x3c\x00' , '32BIT/2143' ) ,
+ ( '\x00\x3c\x00\x00' , '32BIT/3412' ) ,
+ ( '\x00\x3c\x00\x3f' , '16BIT/BE' ) ,
+ ( '\x3c\x00\x3f\x00' , '16BIT/LE' ) ,
+ ( '\x3c\x3f\x78\x6d' , '8BIT' ) ,
+ ( '\x4c\x6f\xa7\x94' , 'EBCDIC' ) ,
+ ( '' , 'UNKNOWN' )
+]
+
+def guessXmlCharacterEncoding( s ) :
+
+ '''Guess character encoding up to the first 4 characters of string 's'.
+ Return encoding name (or None if no guess can be made) and how many
+ characters to skip to get beginning of real data.'''
+
+ if isinstance( s , unicode ) :
+ return 'UNICODE'
+ else :
+ for encoding , name in encodingsBom :
+ if s[ : len( encoding ) ] == encoding :
+ return name , len( encoding )
+ for encoding , name in encodingsXml :
+ if s[ : len( encoding ) ] == encoding :
+ return name , 0
+ return None , 0
+
+def iterSingle( o ) :
+
+ '''Iterate over singleton 'o'.'''
+
+ yield o
+
+def identity( o ) :
+
+ '''Return its argument.'''
+
+ return o
+
+def constantly( value ) :
+
+ '''Construct a function returning the same value all the time,
+ discarding its argument.'''
+
+ return lambda o : value
+
+def plural( n , s = 's' , alt = '' ) :
+
+ '''
+ Return 's' if 'n' is equal to 1, otherwise return 'alt'.
+
+ '%d bottle%s' % ( n , plural( n ) )
+ '%d child%s' % ( n , plural( n , 'ren' ) )
+ '%d %s' % ( n , plural( n , 'children' , 'child' ) )
+ '''
+
+ if abs( n ) != 1 :
+ return s
+ else :
+ return alt
+
+def bench( name , f , n = 1 ) :
+
+ '''Evaluate 'n' times the function 'f', printing
+ timing details including 'name' in the output.
+
+ The result of the last evaluation of function 'f' is
+ returned.'''
+
+ if n == 1 :
+ t = time.time()
+ # Avoid for/in overhead for n = 1 for better timing
+ # results (if that really matter..)
+ r = f()
+ d = time.time() - t
+ else :
+ t = time.time()
+ for i in range( n ) :
+ r = f()
+ d = time.time() - t
+ print 'BENCH[%s] (x%d) %fs' % ( name , n , d / n )
+ return r
+
+def typeOf( o ) :
+
+ '''Return type name of object 'o'.'''
+
+ # FIXME: Find a better way to extract type name !
+
+ r = `o.__class__`
+ if r.startswith( '<' ) and r.endswith( '>' ) :
+ r = r[ 1 : -1 ]
+ r = r.split()
+ if len( r ) >= 2 :
+ r = r[ 1 ]
+ if r.startswith( '"' ) or r.startswith( "'" ) :
+ r = r[ 1 : -1 ]
+ elif len( r ) >= 1 :
+ r = r[ 0 ]
+ else :
+ r = '???'
+ return r
+
+def shortenText( string , size , suffix = '[..]' ) :
+
+ '''Shorten text to 'size' characters if 'string' is larger.
+ In such case, the suffix 'suffix' is pasted at the end of
+ string. Note that the result contains at least as much as
+ characters in suffix.
+
+ >>> shortenText( 'foobarbaz' , 12 )
+ 'foobarbaz'
+ >>> shortenText( 'foobarbaz' , 8 )
+ 'foob[..]'
+ >>> shortenText( 'foobarbaz' , 2 )
+ '[..]'
+ '''
+
+ if size is None :
+ result = string
+ elif len( string ) > size :
+ if len( string ) <= len( suffix ) :
+ result = suffix
+ else :
+ result = string[ : max( 0 , size - len( suffix ) ) ] + suffix
+ else :
+ result = string
+ return result
+
+# Local Variables:
+# tab-width: 4
+# python-indent: 4
+# End:
--- /dev/null
+# -*- coding:utf-8 -*-
+
+# XML Nodes
+# Copyright (C) 2005 Frédéric Jolliton <frederic@jolliton.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+__all__ = [
+ 'Node'
+ , 'Document'
+ , 'Element'
+ , 'Attribute'
+ , 'ProcessingInstruction'
+ , 'Comment'
+ , 'Text'
+]
+
+import cStringIO as StringIO
+import codecs
+import time
+import re
+
+from error import Error, NodeError
+from misc import shortenText, typeOf
+
+# IMPORTANT: Additionnal import at end of module
+
+def someDuplicates( items , getValue = id ) :
+
+ '''Predicate to test if there is duplicate in 'items'.'''
+
+ values = set()
+ for item in items :
+ value = getValue( item )
+ if value in values :
+ return True
+ values.add( value )
+ return False
+
+def _combineAdjacentText( nodes ) :
+
+ '''Combine adjacent text nodes.'''
+
+ lastTextNode = None
+ result = []
+ for node in nodes :
+ if isinstance( node , Text ) :
+ if node.content != '' :
+ if lastTextNode is None :
+ result.append( node )
+ lastTextNode = node
+ else :
+ lastTextNode.content += node.content
+ else :
+ result.append( node )
+ lastTextNode = None
+ return tuple( result )
+
+def _chainNodes( nodes , parent ) :
+
+ '''Update parent, prev and next reference for nodes in 'nodes'. The
+ parent reference will point to 'parent'.'''
+
+ last = len( nodes ) - 1
+ for i , node in enumerate( nodes ) :
+ node._setParent( parent )
+ if i < last :
+ node._setNext( nodes[ i + 1 ] )
+ if i > 0 :
+ node._setPrev( nodes[ i - 1 ] )
+
+def _getEncoder( charset , errors = 'strict' ) :
+
+ '''Return a function translating text to charset 'charset' with error
+ handling set to 'errors'.'''
+
+ encoderFunction = codecs.getencoder( charset )
+ encoder = lambda s : encoderFunction( s , errors )[ 0 ]
+ return encoder
+
+def xmlQuoteText( s ) :
+
+ '''Return string 's' with & and < characters translated to XML entity
+ references.'''
+
+ return s \
+ .replace( '&' , '&' ) \
+ .replace( '<' , '<' )
+
+reCharacterToQuoteForText = re.compile( '[&<]' )
+def streamXmlQuoteText( s , prt ) :
+
+ '''Feed string 's' to 'prt' with & and < characters translated to XML
+ entity references.'''
+
+ p = 0
+ while 1 :
+ r = reCharacterToQuoteForText.search( s , p )
+ if r is None :
+ break
+ e = r.start( 0 )
+ prt( s[ p : e ] )
+ p = r.end( 0 )
+ if s[ e ] == '&' :
+ prt( '&' )
+ elif s[ e ] == '<' :
+ prt( '<' )
+ prt( s[ p : ] )
+
+def xmlQuoteAttribute( s ) :
+
+ '''Return string 's' with &, <, " and ' characters translated to XML
+ entity references.'''
+
+ return s \
+ .replace( '&' , '&' ) \
+ .replace( '<' , '<' ) \
+ .replace( '"' , '"' ) \
+ .replace( "'" , ''' )
+
+reCharacterToQuoteForText = re.compile( '''[&<"']''' )
+def streamXmlQuoteAttribute( s , prt ) :
+
+ '''Feed string 's' to 'prt' with &, <, " and ' characters translated
+ to XML entity references.'''
+
+ p = 0
+ while 1 :
+ r = reCharacterToQuoteForText.search( s , p )
+ if r is None :
+ break
+ e = r.start( 0 )
+ prt( s[ p : e ] )
+ p = r.end( 0 )
+ if s[ e ] == '&' :
+ prt( '&' )
+ elif s[ e ] == '<' :
+ prt( '<' )
+ elif s[ e ] == '"' :
+ prt( '"' )
+ elif s[ e ] == "'" :
+ prt( ''' )
+ prt( s[ p : ] )
+
+#----------------------------------------------------------------------------
+
+class DebugOutputContext( object ) :
+
+ '''Object to hold attributes when generating debug output of a XML
+ tree.'''
+
+ __slots__ = [
+ 'prt' , 'depth' , 'treeSpace' , 'prefix' ,
+ 'depthLimit' , 'nameLimit' , 'attributeLimit' ,
+ 'childrenLimit' , 'textLimit' , 'commentLimit' ]
+
+ def __init__( self ) :
+
+ self.prt = None
+ self.depth = 0
+ self.treeSpace = ' '
+ self.prefix = ''
+ self.depthLimit = None
+ self.nameLimit = None
+ self.attributeLimit = None
+ self.childrenLimit = None
+ self.textLimit = None
+ self.commentLimit = None
+
+ def checkAttributeCount( self , count ) :
+
+ '''Predicate to test if we can output 'count' attributes.'''
+
+ return self.attributeLimit is None or count <= self.attributeLimit
+
+ def checkChildrenCount( self , count ) :
+
+ '''Predicate to test if we can output 'count' children.'''
+
+ return self.childrenLimit is None or count <= self.childrenLimit
+
+ def checkDepth( self , level = 1 ) :
+
+ '''Predicate to test if at least 'level' level are still allowed
+ before reaching depth limit.'''
+
+ return self.depthLimit is None or self.depth + level <= self.depthLimit
+
+#----------------------------------------------------------------------------
+
+class Node( object ) :
+
+ __slots__ = [ 'root' , 'parent' , 'next' , 'prev' , 'position' , 'meta' ]
+
+ def __init__( self ) :
+
+ self.root = None # Root node (document)
+ self.parent = None # Parent node
+ self.next = None # Next sibling node
+ self.prev = None # Previous sibling node
+ self.position = None # Relative position in document tree
+ self.meta = None # Place holder for misc data
+
+ def clone( self ) :
+
+ '''Return a clone of the document (or subtree) 'self'.'''
+
+ raise NotImplementedError
+
+ def iterStringValue( self ) :
+
+ '''Iterator that list all the strings inside the node 'self'.'''
+
+ raise NotImplementedError
+
+ def dmAttributes( self ) :
+
+ raise NotImplementedError
+
+ def dmChildren( self ) :
+
+ raise NotImplementedError
+
+ def dmDocumentUri( self ) :
+
+ raise NotImplementedError
+
+ def dmNamespaceNodes( self ) :
+
+ raise NotImplementedError
+
+ def dmNodeKind( self ) :
+
+ raise NotImplementedError
+
+ def dmNodeName( self ) :
+
+ raise NotImplementedError
+
+ def dmParent( self ) :
+
+ raise NotImplementedError
+
+ def dmStringValue( self ) :
+
+ raise NotImplementedError
+
+ def _setParent( self , node ) :
+
+ if self.parent is not None and self.parent is not node :
+ raise NodeError( 'parent reference is already set' )
+ self.parent = node
+
+ def _setNext( self , node ) :
+
+ if self.next is not None and self.next is not node :
+ raise NodeError( 'next reference is already set' )
+ self.next = node
+
+ def _setPrev( self , node ) :
+
+ if self.prev is not None and self.prev is not node :
+ raise NodeError( 'prev reference is already set' )
+ self.prev = node
+
+ def __getitem__( self , path ) :
+
+ if not isinstance( path , basestring ) :
+ return super( Node , self ).__getitem__( path )
+ else :
+ import xpath
+ xpathObject = xpath.XPath( path )
+ return xpathObject.eval( self )
+
+ # Abusing operator overloading
+ def __div__( self , path ) :
+
+ return self.__getitem__( path )
+
+ def __idiv__( self , other ) :
+
+ raise NotImplementedError
+
+ def serialize( self , charset = 'utf8' , errors = 'strict' , file = None ) :
+
+ if file is None :
+ f = StringIO.StringIO()
+ else :
+ f = file
+ encoder = codecs.getencoder( charset )
+ prt = lambda s : f.write( encoder( s , errors )[ 0 ] )
+ self._serialize( prt )
+ if file is None :
+ f.seek( 0 )
+ return f.read()
+
+ def _serialize( self , prt ) :
+
+ raise NotImplementedError
+
+ def asRxp( self ) :
+
+ raise NotImplementedError
+
+ def asDebug( self , charset = 'utf8' , errors = 'strict' , file = None , prefix = '' ,
+ maxDepth = None , maxAttributes = None , maxChildren = None ) :
+
+ if file is None :
+ f = StringIO.StringIO()
+ else :
+ f = file
+ encoder = codecs.getencoder( charset )
+ prt = lambda s : f.write( encoder( s , errors )[ 0 ] )
+
+ context = DebugOutputContext()
+ context.prt = prt
+ context.prefix = prefix
+ context.depthLimit = maxDepth
+ context.attributeLimit = maxAttributes
+ context.childrenLimit = maxChildren
+ context.nameLimit = 24
+ context.commentLimit = 64
+ context.textLimit = 64
+
+ if context.checkDepth( 1 ) :
+ self._asDebug( context )
+
+ if file is None :
+ f.seek( 0 )
+ return f.read()
+
+ def _asDebug( self , context ) :
+
+ raise NotImplementedError
+
+ def __hash__( self ) :
+
+ return id( self )
+
+class Document( Node ) :
+
+ __slots__ = [ 'children' , 'documentUri' , 'numberOfNodes' , 'ids' , 'attributesByName' , 'elementsByName' ]
+
+ def __init__( self , children = () , finalize = True ) :
+
+ super( Document , self ).__init__()
+
+ for child in children :
+ if not isinstance( child , ( Element , ProcessingInstruction , Comment , Text ) ) :
+ raise NodeError( '%s not allowed as child of Document' % typeOf( child ) )
+
+ if someDuplicates( children , id ) :
+ raise NodeError( 'Duplicate node is not allowed in an element' )
+ children = _combineAdjacentText( children )
+
+ self.children = children
+ self.documentUri = None
+
+ self.numberOfNodes = None
+
+ self.ids = {}
+ self.attributesByName = {}
+ self.elementsByName = {}
+
+ _chainNodes( self.children , self )
+
+ if finalize :
+ self.finalize()
+
+ def clone( self ) :
+
+ return Document( map( lambda child : child.clone() , self.children ) )
+
+ def iterStringValue( self ) :
+
+ for node in iterDescendant( self ) :
+ if isinstance( node , Text ) :
+ if node.content :
+ yield node.content
+
+ def dmAttributes( self ) :
+
+ return ()
+
+ def dmChildren( self ) :
+
+ return self.children
+
+ def dmDocumentUri( self ) :
+
+ return self.uri or ()
+
+ def dmNamespaceNodes( self ) :
+
+ return ()
+
+ def dmNodeKind( self ) :
+
+ return 'document'
+
+ def dmNodeName( self ) :
+
+ return ()
+
+ def dmParent( self ) :
+
+ return ()
+
+ def dmStringValue( self ) :
+
+ return ''.join( self.iterStringValue() )
+
+ def finalize( self , start = 0 ) :
+
+ '''For every descendant of the document, update position, set root
+ node, keep reference to elements with an 'id' attribute, keep
+ references to all attributes and elements by name.'''
+
+ idNames = ( 'id' , )
+ self.ids = {} # for 'id' attribute
+ self.attributesByName = {}
+ self.elementsByName = {}
+ pos = start
+ t = time.time()
+ for node in iterDescendantOrSelfFull( self ) :
+ node.position = pos
+ node.root = self
+ pos += 1
+ if isinstance( node , Attribute ) :
+ if node.name in idNames :
+ if node.name in self.ids :
+ raise NodeError( 'Duplicate ID %r' % nade.name )
+ self.ids[ node.value ] = node.parent
+ self.attributesByName.setdefault( node.name , [] ).append( node )
+ elif isinstance( node , Element ) :
+ self.elementsByName.setdefault( node.name , [] ).append( node )
+ self.numberOfNodes = pos - start
+ return pos
+
+ def _check( self ) :
+
+ pass
+
+ def _serialize( self , prt ) :
+
+ for children in self.children :
+ children._serialize( prt )
+
+ def asRxp( self ) :
+
+ return ( None , None , [ item.asRxp() for item in self.children ] , None )
+
+ def _asDebug( self , context ) :
+
+ prefix = context.prefix + context.depth * context.treeSpace
+ assert context.checkDepth( 1 ) , 'reached depth limit'
+ context.prt( prefix )
+ context.prt( 'DOCUMENT[%s] with %d nodes'
+ % ( self.position , self.numberOfNodes or -1 ) )
+ context.prt( '\n' )
+ #
+ # Children
+ #
+ if self.children :
+ if not context.checkDepth( 2 ) or context.childrenLimit == 0 :
+ context.prt( prefix )
+ context.prt( context.treeSpace )
+ context.prt( '[.. %d children ..]' % len( self.children ) )
+ context.prt( '\n' )
+ elif context.checkChildrenCount( len( self.children ) ) :
+ context.depth += 1
+ for child in self.children :
+ child._asDebug( context )
+ context.depth -= 1
+ else :
+ context.depth += 1
+ for child in self.children[ : context.childrenLimit ] :
+ child._asDebug( context )
+ context.depth -= 1
+ context.prt( prefix )
+ context.prt( context.treeSpace )
+ context.prt( '[.. and %d more children ..]'
+ % ( len( self.children ) - context.childrenLimit ) )
+ context.prt( '\n' )
+
+ def __repr__( self ) :
+
+ return '<Document with %d children>' % len( self.children )
+
+class Element( Node ) :
+
+ __slots__ = [ 'name' , 'children' , 'attributes' ]
+
+ def __init__( self , name , attributes = () , children = () , finalize = False ) :
+
+ super( Element , self ).__init__()
+
+ for child in children :
+ if not isinstance( child , ( Element , ProcessingInstruction , Comment , Text ) ) :
+ raise NodeError( '%s not allowed as child of Element' % typeOf( child ) )
+
+ if someDuplicates( children , id ) :
+ raise NodeError( 'Duplicate node is not allowed in an element' )
+ children = _combineAdjacentText( children )
+
+ names = set()
+ for attribute in attributes :
+ if not isinstance( attribute , Attribute ) :
+ raise NodeError( '%s not allowed as attribute of Element' % typeOf( attribute ) )
+ if attribute.name in names :
+ raise NodeError( 'Duplicate attribute with name %r' % attribute.name )
+ names.add( attribute.name )
+
+ self.name = name
+ self.children = children
+ self.attributes = tuple( attributes )
+
+ _chainNodes( self.children , self )
+ _chainNodes( self.attributes , self )
+
+ if finalize :
+ self.finalize()
+
+ def clone( self ) :
+
+ return Element( self.name ,
+ map( lambda attr : attr.clone() , self.attributes ) ,
+ map( lambda child : child.clone() , self.children ) )
+
+ def getAttribute( self , name ) :
+
+ for attribute in self.attributes :
+ if attribute.name == name :
+ return attribute
+
+ def iterStringValue( self ) :
+
+ for node in iterDescendant( self ) :
+ if isinstance( node , Text ) :
+ if node.content :
+ yield node.content
+
+ def dmAttributes( self ) :
+
+ return self.attributes
+
+ def dmChildren( self ) :
+
+ return self.children
+
+ def dmDocumentUri( self ) :
+
+ return ()
+
+ def dmNamespaceNodes( self ) :
+
+ return ()
+
+ def dmNodeKind( self ) :
+
+ return 'element'
+
+ def dmNodeName( self ) :
+
+ return self.name
+
+ def dmParent( self ) :
+
+ return self.parent or ()
+
+ def dmStringValue( self ) :
+
+ return ''.join( self.iterStringValue() )
+
+ def finalize( self , start = 0 ) :
+
+ '''For every descendant of the element, update position and set root
+ node'''
+
+ pos = start
+ for node in iterDescendantOrSelfFull( self ) :
+ node.position = pos
+ node.root = self
+ pos += 1
+ return pos
+
+ def _serialize( self , prt ) :
+
+ prt( '<' )
+ prt( self.name )
+ for attribute in self.attributes :
+ prt( ' ' )
+ attribute._serialize( prt )
+ if not self.children :
+ prt( '/>' )
+ else :
+ prt( '>' )
+ for child in self.children :
+ child._serialize( prt )
+ prt( '</' )
+ prt( self.name )
+ prt( '>' )
+
+ def asRxp( self ) :
+
+ return ( self.name ,
+ dict( item.asRxp() for item in self.attributes ) or None ,
+ list( item.asRxp() for item in self.children ) or None ,
+ self.meta )
+
+ def _asDebug( self , context ) :
+
+ prefix = context.prefix + context.depth * context.treeSpace
+ assert context.checkDepth( 1 ) , 'reached depth limit'
+ context.prt( prefix )
+ context.prt( 'ELEMENT[%s] %s' % ( self.position , shortenText( self.name , context.nameLimit ) ) )
+ context.prt( '\n' )
+ #
+ # Attributes
+ #
+ if self.attributes :
+ if not context.checkDepth( 2 ) or context.attributeLimit == 0 :
+ context.prt( context.treeSpace )
+ context.prt( prefix )
+ context.prt( '[.. %d attributes ..]' % len( self.attributes ) )
+ context.prt( '\n' )
+ elif context.checkAttributeCount( len( self.attributes ) ) :
+ context.depth += 1
+ for attribute in self.attributes :
+ attribute._asDebug( context )
+ context.depth -= 1
+ else :
+ context.depth += 1
+ for attribute in self.attributes[ : context.attributeLimit ] :
+ attribute._asDebug( context )
+ context.depth -= 1
+ context.prt( prefix )
+ context.prt( context.treeSpace )
+ context.prt( '[.. and %d more attributes ..]'
+ % ( len( self.attributes ) - context.attributeLimit ) )
+ context.prt( '\n' )
+ #
+ # Children
+ #
+ if self.children :
+ if not context.checkDepth( 2 ) or context.childrenLimit == 0 :
+ context.prt( context.treeSpace )
+ context.prt( prefix )
+ context.prt( '[.. %d children ..]' % len( self.children ) )
+ context.prt( '\n' )
+ elif context.checkChildrenCount( len( self.children ) ) :
+ context.depth += 1
+ for child in self.children :
+ child._asDebug( context )
+ context.depth -= 1
+ else :
+ context.depth += 1
+ for child in self.children[ : context.childrenLimit ] :
+ child._asDebug( context )
+ context.depth -= 1
+ context.prt( prefix )
+ context.prt( context.treeSpace )
+ context.prt( '[.. and %d more children ..]'
+ % ( len( self.children ) - context.childrenLimit ) )
+ context.prt( '\n' )
+
+ def __repr__( self ) :
+
+ return '<Element %s with %d attributes and %d children>' \
+ % ( shortenText( self.name , 24 ) , len( self.attributes ) , len( self.children ) )
+
+class Attribute( Node ) :
+
+ __slots__ = [ 'name' , 'value' ]
+
+ def __init__( self , name , value ) :
+
+ Node.__init__( self )
+ if not isinstance( name , basestring ) :
+ raise NodeError( '%s not allowed as name to construct Attribute' % typeOf( name ) )
+ if not isinstance( value , basestring ) :
+ raise NodeError( '%s not allowed as value to construct Attribute' % typeOf( value ) )
+ self.name = name
+ self.value = value
+
+ def clone( self ) :
+
+ return Attribute( self.name , self.value )
+
+ def iterStringValue( self ) :
+
+ yield self.value
+
+ def dmAttributes( self ) :
+
+ return ()
+
+ def dmChildren( self ) :
+
+ return ()
+
+ def dmDocumentUri( self ) :
+
+ return ()
+
+ def dmNamespaceNodes( self ) :
+
+ return ()
+
+ def dmNodeKind( self ) :
+
+ return 'attribute'
+
+ def dmNodeName( self ) :
+
+ return self.name
+
+ def dmParent( self ) :
+
+ return self.parent or ()
+
+ def dmStringValue( self ) :
+
+ return self.value
+
+ def _serialize( self , prt ) :
+
+ prt( self.name )
+ prt( '="' )
+ streamXmlQuoteAttribute( self.value , prt )
+ prt( '"' )
+
+ def asRxp( self ) :
+
+ return ( self.name , self.value )
+
+ def _asDebug( self , context ) :
+
+ prefix = context.prefix + context.depth * context.treeSpace
+ assert context.checkDepth( 1 ) , 'reached depth limit'
+ context.prt( prefix )
+ context.prt( 'ATTRIBUTE[%s] %s = `%s`' \
+ % ( self.position ,
+ shortenText( self.name , context.nameLimit ) ,
+ shortenText( self.value , context.textLimit ) ) )
+ context.prt( '\n' )
+
+ def __repr__( self ) :
+
+ return '<Attribute %s>' % shortenText( self.name , 24 )
+
+class ProcessingInstruction( Node ) :
+
+ __slots__ = [ 'target' , 'content' ]
+
+ def __init__( self , target , content = '' ) :
+
+ super( ProcessingInstruction , self ).__init__()
+
+ if target.lower() == 'xml' :
+ raise NodeError( 'A processing instruction target cannot be named \'%s\'.' % ( target , ) )
+ if '?>' in content :
+ raise NodeError( "'?>' must not occur inside content of processing instruction." )
+ self.target = target
+ self.content = content
+
+ def iterStringValue( self ) :
+
+ yield self.value
+
+ def dmAttributes( self ) :
+
+ return ()
+
+ def dmChildren( self ) :
+
+ return ()
+
+ def dmDocumentUri( self ) :
+
+ return ()
+
+ def dmNamespaceNodes( self ) :
+
+ return ()
+
+ def dmNodeKind( self ) :
+
+ return 'processing-instruction'
+
+ def dmNodeName( self ) :
+
+ return self.target
+
+ def dmParent( self ) :
+
+ return self.parent or ()
+
+ def dmStringValue( self ) :
+
+ return self.content
+
+ def _serialize( self , prt ) :
+
+ assert self.target.lower() != 'xml' , '"xml" not allowed as PI target name'
+ assert '?>' not in self.content , '"?>" not allowed inside PI contents'
+
+ prt( '<?' )
+ prt( self.target )
+ if self.content :
+ prt( ' ' )
+ prt( self.content )
+ prt( '?>' )
+
+ def _asDebug( self , context ) :
+
+ prefix = context.prefix + context.depth * context.treeSpace
+ assert context.checkDepth( 1 ) , 'reached depth limit'
+ context.prt( prefix )
+ context.prt( 'PI[%s] %s = `%s`'
+ % ( self.position ,
+ shortenText( self.target , context.nameLimit ) ,
+ shortenText( self.content , context.commentLimit ) ) )
+ context.prt( '\n' )
+
+class Comment( Node ) :
+
+ __slots__ = [ 'content' ]
+
+ def __init__( self , content ) :
+
+ super( Comment , self ).__init__()
+
+ if not isinstance( content , basestring ) :
+ raise NodeError( '%s not allowed to construct Comment' % typeOf( content ) )
+ if '--' in content :
+ raise NodeError( "'--' at position %d is not allowed in comment" % content.index( '--' ) )
+ if content.endswith( '-' ) :
+ raise NodeError( "'-' must not occur at end of comment" )
+
+ self.content = content
+
+ def iterStringValue( self ) :
+
+ yield self.content
+
+ def dmAttributes( self ) :
+
+ return ()
+
+ def dmChildren( self ) :
+
+ return ()
+
+ def dmDocumentUri( self ) :
+
+ return ()
+
+ def dmNamespaceNodes( self ) :
+
+ return ()
+
+ def dmNodeKind( self ) :
+
+ return 'comment'
+
+ def dmNodeName( self ) :
+
+ return ()
+
+ def dmParent( self ) :
+
+ return self.parent or ()
+
+ def dmStringValue( self ) :
+
+ return self.content
+
+ def _serialize( self , prt ) :
+
+ assert '--' not in self.content \
+ and not self.content.endswith( '-' ) , \
+ 'invalid comment content'
+
+ prt( '<!--' )
+ prt( self.content )
+ prt( '-->' )
+
+ def asRxp( self ) :
+
+ # Not supported, use a raw element instead.
+ return ( None , None , '<!--' + self.content.replace( '--' , '- -' ).replace( '--' , '- -' ) + '-->' )
+
+ def _asDebug( self , context ) :
+
+ prefix = context.prefix + context.depth * context.treeSpace
+ assert context.checkDepth( 1 ) , 'reached depth limit'
+ context.prt( prefix )
+ context.prt( 'COMMENT[%s] %r' % ( self.position , shortenText( self.content , context.commentLimit ) ) )
+ context.prt( '\n' )
+
+ def __repr__( self ) :
+
+ return '<Comment with %d characters>' % len( self.content )
+
+class Text( Node ) :
+
+ __slots__ = [ 'content' ]
+
+ def __init__( self , content ) :
+
+ super( Text , self ).__init__()
+
+ if not isinstance( content , basestring ) :
+ raise NodeError( '%s not allowed to construct Text' % typeOf( content ) )
+
+ self.content = content
+
+ def clone( self ) :
+
+ return Text( self.content )
+
+ def setParent( self , parent ) :
+
+ if not self.content :
+ raise NodeError( 'Text node inside document must not be empty' )
+ return super( Text , self ).setParent( self , parent )
+
+ def iterStringValue( self ) :
+
+ yield self.content
+
+ def dmAttributes( self ) :
+
+ return ()
+
+ def dmChildren( self ) :
+
+ return ()
+
+ def dmDocumentUri( self ) :
+
+ return ()
+
+ def dmNamespaceNodes( self ) :
+
+ return ()
+
+ def dmNodeKind( self ) :
+
+ return 'text'
+
+ def dmNodeName( self ) :
+
+ return ()
+
+ def dmParent( self ) :
+
+ return self.parent or ()
+
+ def dmStringValue( self ) :
+
+ return self.content
+
+ def _serialize( self , prt ) :
+
+ streamXmlQuoteText( self.content , prt )
+
+ def asRxp( self ) :
+
+ return self.content
+
+ def _asDebug( self , context ) :
+
+ prefix = context.prefix + context.depth * context.treeSpace
+ assert context.checkDepth( 1 ) , 'reached depth limit'
+ context.prt( prefix )
+ context.prt( 'TEXT[%s] %r' % ( self.position , shortenText( self.content , context.textLimit ) ) )
+ context.prt( '\n' )
+
+ def __repr__( self ) :
+
+ return '<Text with %d characters>' % len( self.content )
+
+from iterators import iterDescendant, iterDescendantOrSelfFull
+
+# Local Variables:
+# tab-width: 4
+# python-indent: 4
+# End:
--- /dev/null
+# -*- coding:utf-8 -*-
+
+# Nodes misc
+# Copyright (C) 2005 Frédéric Jolliton <frederic@jolliton.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+from misc import NotReached, typeOf
+from nodes import *
+
+def isText( item ) :
+
+ '''Test if 'item' is a Text node.'''
+
+ return isinstance( item , Text )
+
+def isComment( item ) :
+
+ '''Test if 'item' is a Comment node.'''
+
+ return isinstance( item , Comment )
+
+def makeElementTest( name ) :
+
+ '''Return a function taking a item as parameter and
+ testing if it's a Element with the name 'name'.'''
+
+ def fun( item ) :
+ return isinstance( item , Element ) and item.name == name
+ return fun
+
+def findPosition( siblings , node , test ) :
+
+ '''Return index of the node 'node' inside the list of 'siblings'
+ who match the 'test' predicate. Also return a boolean to specify
+ if there was more than 1 node matching the 'test' predicate.'''
+
+ pos = 0
+ found = 0
+ for item in siblings :
+ if test( item ) :
+ pos += 1
+ if found :
+ break
+ if item is node :
+ assert pos , 'node is not matched by predicate test'
+ found = pos
+ assert found , 'node not found in siblings list'
+ return found , ( found <= 1 and pos <= 1 )
+
+def nodeToXpath( node ) :
+
+ '''Return a XPath expression that point to the node 'node'.'''
+
+ if isinstance( node , Node ) :
+ root = node.root
+ steps = []
+ while True :
+ parent = node.parent
+ if not parent :
+ break
+ if isinstance( node , ( Text , Element , Comment ) ) :
+ if isinstance( node , Text ) :
+ test = isText
+ step = 'text()'
+ elif isinstance( node , Element ) :
+ test = makeElementTest( node.name )
+ step = node.name
+ elif isinstance( node , Comment ) :
+ test = isComment
+ step = 'comment()'
+ else :
+ raise NotReached
+ pos , alone = findPosition( parent.children , node , test )
+ if not alone :
+ step += '[%d]' % pos
+ steps.append( step )
+ elif isinstance( node , Attribute ) :
+ steps.append( '@%s' % node.name )
+ else :
+ assert False , 'Unexpected node type %r' % typeOf( node )
+ node = parent
+ result = '/'.join( reversed( steps ) )
+ if node == node.root :
+ result = '/' + result
+ return result
+
+# Local Variables:
+# tab-width: 4
+# python-indent: 4
+# End:
--- /dev/null
+# -*- coding:utf-8 -*-
+
+# Generic parser
+# Copyright (C) 2005 Frédéric Jolliton <frederic@jolliton.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+__all__ = [
+ #
+ # Exceptions
+ #
+ 'Reject' , 'NoMatch' , 'AmbiguousMatch' ,
+ #
+ # State
+ #
+ 'ParserState' ,
+ #
+ # Primitives
+ #
+ 'Regex' , 'Either' , 'Sequence' , 'Multi' , 'Wrapper' ,
+ #
+ # Complex
+ #
+ 'Integer' , # *DEPRECATED*
+ 'Space' , # *DEPRECATED*
+ 'Literal' ,
+ 'Empty' ,'ZeroOrMore' , 'OneOrMore' , 'Optional' ,
+ 'List'
+]
+
+#
+# Syntax inspired from pyparsing (http://pyparsing.sourceforge.net/)
+# by Paul McGuire
+#
+
+import sys
+
+_debug = False
+
+import re
+
+from error import Error
+from misc import identity, constantly
+
+def flattenSequence( sequence ) :
+
+ '''Flatten sequence 'sequence' as a list.'''
+
+ result = []
+ if isinstance( sequence , basestring ) :
+ result = [ sequence ]
+ else :
+ try :
+ for item in sequence :
+ result += flattenSequence( item )
+ except TypeError :
+ result = [ sequence ]
+ return result
+
+def _preprocess( expr ) :
+
+ '''Process subexpression. Currently used to automatically
+ treat string as Literal.'''
+
+ if expr is None :
+ pass
+ elif isinstance( expr , basestring ) :
+ expr = Literal( expr )
+ return expr
+
+class Reject( Error ) : pass
+
+class NoMatch( Error ) :
+
+ '''Exception raised when no matches can be found.'''
+
+ def __init__( self , state ) :
+
+ self.state = state
+
+ def __repr__( self ) :
+
+ return '<NoMatch at %d>' % ( self.state.pos + 1 )
+
+ def __str__( self ) :
+
+ return 'Error at position %d' % ( self.state.pos + 1 )
+
+class AmbiguousMatch( Error ) :
+
+ '''Exception raised when several matches are found.'''
+
+ def __init__( self , count ) :
+
+ self.count = count
+
+ def __repr__( self ) :
+
+ return '<AmbiguousMatch>' % ( self.count , )
+
+ def __str__( self ) :
+
+ return 'Ambiguous match (%d results)' % ( self.count , )
+
+class ParserState( object ) :
+
+ '''State of the parser (the text itself, and the current position.)'''
+
+ def __init__( self , data , pos = 0 ) :
+
+ self.data = data
+ self.pos = pos
+
+ def _advance( self , count ) :
+
+ self.pos += count
+
+ def clone( self , offset = 0 ) :
+
+ newInstance = self.__new__( self.__class__ )
+ newInstance.restore( self , offset )
+ return newInstance
+
+ def restore( self , other , offset = 0 ) :
+
+ self.data = other.data
+ self.pos = other.pos + offset
+ return self
+
+ def __repr__( self ) :
+
+ return '<ParserState %d bytes in buffer, position at %d>' \
+ % ( len( self.data ) , self.pos )
+
+#############################################################################
+#
+# Primitive
+#
+#############################################################################
+
+# 5 primitives: Regex, Either, Sequence, Multi, Wrapper
+
+class Base( object ) :
+
+ def __init__( self ) :
+
+ self.__callEnter = []
+ self.__callLeave = []
+ self.__ignore = False
+
+ def parse( self , s , start = 0 ) :
+
+ state = ParserState( s , start )
+ results = self._parse( state )
+ assert results , 'expected some results'
+ #
+ # Keep longest result
+ #
+ matches = [ result for result in results if result[ 1 ].pos == len( s ) ]
+ if not matches :
+ raise NoMatch( result[ 0 ].state )
+ elif len( matches ) > 1 :
+ raise AmbiguousMatch( len( matches ) )
+ else :
+ return matches[ 0 ][ 0 ]
+
+ def __add__( self , other ) :
+
+ return Sequence( self , other )
+
+ def __or__( self , other ) :
+
+ return Either( self , other )
+
+ def __debug( self , data , markers ) :
+
+ if _debug :
+ if isinstance( self , Wrapper ) and not self.__name :
+ pass
+ else :
+ n = len( markers ) - 1
+ if n is not None :
+ prefix = '#%d' % n
+ else :
+ prefix = '##'
+ s = self.asString( 3 )
+ print prefix , '%d: %s' % ( markers[ 0 ] , s )
+ print prefix , data
+ blankLine = ' ' * ( max( markers ) + 1 )
+ base = blankLine
+ lines = []
+ for i , m in enumerate( markers ) :
+ base = base[ : m ] + '^' + base[ m + 1 : ]
+ if i == 0 :
+ mm = '@'
+ elif 1 <= i <= 9 :
+ mm = chr( ord( '1' ) + i - 1 )
+ elif 10 <= i <= 35 :
+ mm = chr( ord( 'A' ) + i - 10 )
+ else :
+ mm = '#'
+ for j , line in enumerate( lines ) :
+ if line[ m ] == ' ' :
+ lines[ j ] = line[ : m ] + mm + line[ m + 1 : ]
+ break
+ else :
+ line = blankLine[ : m ] + mm + blankLine[ m + 1 : ]
+ lines.append( line )
+ lines.insert( 0 , base )
+ for line in lines :
+ print prefix , line
+ print
+
+ def _parse( self , state ) :
+
+ for fun in self.__callEnter :
+ fun()
+ results = self._process( state )
+ if not isinstance( results , list ) :
+ results = [ results ]
+ self.__debug( state.data , [ state.pos ] + [ s[ 1 ].pos for s in results ] )
+ newResults = []
+ for value , state in results :
+ discard = False
+ if True :
+ for i , fun in enumerate( self.__callLeave ) :
+ try :
+ v = fun( value )
+ except Reject :
+ discard = True
+ break
+ else :
+ if v is not None :
+ value = v
+ else :
+ value = ( self.__name or '#%X' % id( self ) , value )
+ if not discard :
+ newResults.append( ( value , state ) )
+ return newResults
+
+ def name( self , newName ) :
+
+ self.__name = newName
+ return self
+
+ def enter( self , fun ) :
+
+ self.__callEnter.insert( 0 , fun )
+ return self
+
+ def call( self , fun ) :
+
+ self.__callLeave.append( fun )
+ return self
+
+ def ignore( self ) :
+
+ self.__ignore = True
+ return self
+
+ def ignorable( self ) :
+
+ return self.__ignore
+
+ def asString( self , level = 1 , refs = None ) :
+
+ # 'refs' is used to prevent recursion
+ if refs is None :
+ refs = {}
+ x = id( self )
+ if x in refs :
+ s = '##'
+ else :
+ refs[ x ] = True
+ if level <= 0 :
+ s = '...'
+ else :
+ s = self._str( level - 1 , refs )
+ return s
+
+ def __str__( self ) :
+
+ return self.asString()
+
+ def _str( self , level , refs ) :
+
+ raise NotImplementedError
+
+ def _getName( self ) :
+
+ result = ''
+ if self.__name :
+ result = ':[%s]' % self.__name
+ return result
+
+class Regex( Base ) :
+
+ def __init__( self , data , flags = 0 ) :
+
+ Base.__init__( self )
+ self.__data = data
+ self.regexp = re.compile( data , flags )
+
+ def _process( self , state ) :
+
+ r = self.regexp.match( state.data , state.pos )
+ if r is None :
+ raise NoMatch( state )
+ if r.groups() :
+ s = r.group( 1 )
+ else :
+ s = r.group( 0 )
+ result = s , state.clone( r.end( 0 ) - state.pos )
+ return result
+
+ def _str( self , level , refs ) :
+
+ return '<Regex%s %r>' % ( self._getName() , self.__data )
+
+class Either( Base ) :
+
+ def __init__( self , *exprs ) :
+
+ Base.__init__( self )
+ self.__exprs = map( _preprocess , flattenSequence( exprs ) )
+
+ def _getExprs( self ) :
+
+ return self.__exprs
+
+ def _process( self , state ) :
+
+ branches = []
+ for expr in self.__exprs :
+ try :
+ branches += expr._parse( state )
+ except NoMatch :
+ pass
+ if not branches :
+ raise NoMatch( state )
+ return branches
+
+ def _str( self , level , refs ) :
+
+ return '<EitherFork%s %s>' \
+ % ( self._getName() , ', '.join( [ expr.asString( level , refs ) for expr in self.__exprs ] ) )
+
+class Sequence( Base ) :
+
+ def __init__( self , *exprs ) :
+
+ Base.__init__( self )
+ self.__exprs = map( _preprocess , flattenSequence( exprs ) )
+
+ def _getExprs( self ) : return self.__exprs
+
+ def _process( self , state ) :
+
+ branches = [ ( [] , state ) ]
+ for expr in self.__exprs :
+ newBranches = []
+ for value , state in branches :
+ try :
+ for newValue , newState in expr._parse( state ) :
+ if expr.ignorable() :
+ branch = value , newState
+ else :
+ branch = value + [ newValue ] , newState
+ newBranches.append( branch )
+ except NoMatch :
+ pass
+ branches = newBranches
+ if not branches :
+ raise NoMatch( state )
+ return branches
+
+ def _str( self , level , refs ) :
+
+ return '<Sequence%s %s>' \
+ % ( self._getName() , ', '.join( [ expr.asString( level , refs ) for expr in self.__exprs ] ) )
+
+class Multi( Base ) :
+
+ def __init__( self , min , max , expr ) :
+
+ Base.__init__( self )
+ self.__min = min
+ self.__max = max
+ self.__expr = _preprocess( expr )
+
+ def _process( self , state ) :
+
+ branches = [ ( [] , state ) ]
+ expr = self.__expr
+ for i in range( self.__min ) :
+ newBranches = []
+ for value , state in branches :
+ try :
+ for newValue , newState in expr._parse( state ) :
+ newBranches.append( ( value + [ newValue ] , newState ) )
+ except NoMatch :
+ pass
+ branches = newBranches
+ if not branches :
+ raise NoMatch( state )
+ i = self.__min
+ finalBranches = []
+ while self.__max is None or i < self.__max :
+ i += 1
+ newBranches = []
+ for value , state in branches :
+ try :
+ for newValue , newState in expr._parse( state ) :
+ newBranches.append( ( value + [ newValue ] , newState ) )
+ except NoMatch :
+ pass
+ finalBranches.append( ( value , state ) )
+ branches = newBranches
+ if not branches :
+ break
+ finalBranches += branches
+ return finalBranches
+
+ def _str( self , level , refs ) :
+
+ return '<Multi%s %s {%s,%s}>' \
+ % ( self._getName() , self.__expr.asString( level , refs ) , self.__min , self.__max or '' )
+
+class Wrapper( Base ) :
+
+ def __init__( self , expr = None ) :
+
+ Base.__init__( self )
+ self.__expr = _preprocess( expr )
+
+ def _process( self , branches ) :
+
+ assert self.__expr is not None , 'Wrapper should be set with an expression'
+ return self.__expr._parse( branches )
+
+ def set( self , expr ) :
+
+ assert self.__expr is None , 'Wrapper already contains an expression'
+ self.__expr = _preprocess( expr )
+ return self
+
+ __lshift__ = set
+
+ def _str( self , level , refs ) :
+
+ if self._getName() :
+ return '<Wrapper%s %s>' \
+ % ( self._getName() , self.__expr.asString( level , refs ) )
+ else :
+ return self.__expr.asString( level + 1 , refs )
+
+#--[ Misc ]------------------------------------------------------------------
+
+def Integer() :
+
+ return Regex( r'\d+' )
+
+def Space() :
+
+ return Regex( r'\s+' )
+
+def Literal( s ) :
+
+ return Regex( re.escape( s ) )
+
+def Empty() :
+
+ return Literal( '' )
+
+def ZeroOrMore( expr ) :
+
+ return Multi( 0 , None , expr )
+
+def OneOrMore( expr ) :
+
+ return Multi( 1 , None , expr )
+
+def Optional( expr ) :
+
+ return Multi( 0 , 1 , expr )
+
+def List( expr1 , expr2 ) :
+
+ def normalize( e ) :
+ return [ e[ 0 ] ] + sum( e[ 1 ] , [] )
+
+ return Wrapper( Sequence( expr1 , ZeroOrMore( Sequence( expr2 , expr1 ) ) ).call( normalize ) )
+
+# Local Variables:
+# tab-width: 4
+# python-indent: 4
+# End:
--- /dev/null
+# -*- coding:utf-8 -*-
+
+# RXP Compat
+# Copyright (C) 2005 Frédéric Jolliton <frederic@jolliton.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+from nodes import Document, Element, Attribute, Text
+
+def isNodeTuple( node ) :
+
+ return isinstance( node , tuple ) and 3 <= len( node ) <= 4
+
+def nodeIsRaw( node ) :
+
+ return isNodeTuple( node ) \
+ and node[ 0 ] is None \
+ and isinstance( node[ 2 ] , basestring )
+
+def nodeIsElement( node ) :
+
+ return isNodeTuple( node ) \
+ and node[ 0 ] is not None
+
+def nodeIsText( node ) :
+
+ return isinstance( node , basestring )
+
+def nodeIsDocument( node ) :
+
+ return isNodeTuple( node ) \
+ and node[ 0 ] is None \
+ and isinstance( node[ 2 ] , list )
+
+def fromRxp( tree ) :
+
+ if nodeIsRaw( tree ) :
+ raise 'Not supported' # FIXME: Support Text with CDATA/PCDATA ?
+ elif nodeIsText( tree ) :
+ return Text( tree )
+ elif nodeIsDocument( tree ) :
+ return Document( map( fromRxp , tree[ 2 ] or [] ) )
+ elif nodeIsElement( tree ) :
+ return Element( tree[ 0 ] ,
+ [ Attribute( k , v ) for k , v in ( tree[ 1 ] or {} ).items() ] ,
+ map( fromRxp , tree[ 2 ] or [] ) )
+ else :
+ raise 'Unexpected case'
+
+# Local Variables:
+# tab-width: 4
+# python-indent: 4
+# End:
--- /dev/null
+# -*- coding:utf-8 -*-
+
+# Sequence
+# Copyright (C) 2005 Frédéric Jolliton <frederic@jolliton.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+__all__ = [
+ 'Sequence' ,
+ 'SEQUENCE_EMPTY' ,
+ 'SEQUENCE_NODES' ,
+ 'SEQUENCE_ATOMICS' ,
+ 'SEQUENCE_MIXED'
+]
+
+from nodes import Node
+from types import GeneratorType
+
+# Sequence types
+SEQUENCE_EMPTY = 0
+SEQUENCE_ATOMICS = 1
+SEQUENCE_NODES = 2
+SEQUENCE_MIXED = 3
+
+def _checkSequenceType( sequence ) :
+
+ '''Check a sequence (tuple or Sequence) type and return either
+ SEQUENCE_EMPTY, SEQUENCE_ATOMICS, SEQUENCE_NODES or
+ SEQUENCE_MIXED.'''
+
+ if isinstance( sequence , Sequence ) :
+ type = sequence.type
+ else :
+ type = SEQUENCE_EMPTY
+ for item in sequence :
+ if isinstance( item , Node ) :
+ type |= SEQUENCE_NODES
+ else :
+ type |= SEQUENCE_ATOMICS
+ if type == SEQUENCE_MIXED :
+ break
+ return type
+
+class Sequence( tuple ) :
+
+ '''XPath/XQuery sequence type'''
+
+ def __new__( cls , *items , **kwargs ) :
+
+ unionType = kwargs.get( 'type' )
+ if unionType is not None :
+ normalizedItems = []
+ for item in items :
+ if isinstance( item , Sequence ) :
+ normalizedItems += item
+ elif isinstance( item , ( tuple , list , GeneratorType ) ) :
+ normalizedItems += tuple( item )
+ else :
+ normalizedItems.append( item )
+ else :
+ unionType = SEQUENCE_EMPTY
+ normalizedItems = []
+ for item in items :
+ if isinstance( item , Sequence ) :
+ unionType |= item.type
+ normalizedItems += item
+ elif isinstance( item , GeneratorType ) :
+ item = tuple( item )
+ unionType |= _checkSequenceType( item )
+ normalizedItems += item
+ elif isinstance( item , ( tuple , list ) ) :
+ item = Sequence( *item )
+ unionType |= _checkSequenceType( item )
+ normalizedItems += item
+ else :
+ if isinstance( item , Node ) :
+ unionType |= SEQUENCE_NODES
+ else :
+ unionType |= SEQUENCE_ATOMICS
+ normalizedItems.append( item )
+ if not normalizedItems :
+ unionType = SEQUENCE_EMPTY
+ instance = tuple.__new__( cls , normalizedItems )
+ instance.type = unionType
+ return instance
+
+ def __eq__( self , other ) :
+
+ return self.type == other.type and tuple.__eq__( self , other )
+
+ def __add__( self , other , type = None ) :
+
+ if type is None :
+ type = _checkSequenceType( other )
+ return Sequence( self , other , type = self.type | type )
+
+ def __getitem__( self , n ) :
+
+ if not isinstance( n , slice ) :
+ return tuple.__getitem__( self , n )
+ else :
+ return Sequence( tuple.__getitem__( self , n ) , type = self.type )
+
+ def __getslice__( self , a , b ) :
+
+ return Sequence( tuple.__getslice__( self , a , b ) , type = self.type )
+
+ def __str__( self ) :
+
+ s = 'Sequence('
+ if self :
+ s += ', '.join( map( repr , self ) )
+ s += ')'
+ return s
+
+ def __repr__( self ) :
+
+ return '<Sequence [%s] of %d items>' % ( self.type , len( self ) )
+
+_Empty = Sequence()
+_False = Sequence( False )
+_True = Sequence( True )
+_EmptyString = Sequence( '' ) # FIXME: or u'' ?
+_Boolean = ( _False , _True )
+
+# Local Variables:
+# tab-width: 4
+# python-indent: 4
+# End:
--- /dev/null
+# -*- coding:utf-8 -*-
+
+# Sequence misc
+# Copyright (C) 2005 Frédéric Jolliton <frederic@jolliton.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+__all__ = [
+ 'printSequence'
+ ]
+
+import sys
+
+from sequence import Sequence
+from nodes import Node
+from nodes_misc import nodeToXpath
+
+def printSequence( sequence , fullTree = False , displayLocation = False ) :
+
+ if not isinstance( sequence , Sequence ) :
+ print 'Not a sequence' , `sequence`
+ elif not sequence :
+ print 'EMPTY'
+ else :
+ for i , item in enumerate( sequence ) :
+ i += 1
+ if isinstance( item , Node ) :
+ print i , 'NODE' ,
+ if displayLocation :
+ print nodeToXpath( item )
+ else :
+ print '' , # Needed to insert a space before the asDebug() output !
+ if not fullTree :
+ item.asDebug( maxDepth = 2 , maxChildren = 0 , file = sys.stdout )
+ else :
+ item.asDebug( file = sys.stdout )
+ elif isinstance( item , basestring ) :
+ print i , 'STRING' , item
+ elif isinstance( item , bool ) :
+ print i , 'BOOLEAN' , item
+ elif isinstance( item , ( int , long ) ) :
+ print i , 'INTEGER' , item
+ elif isinstance( item , Decimal ) :
+ print i , 'DECIMAL' , item
+ elif isinstance( item , float ) :
+ print i , 'FLOAT|DOUBLE' , item
+ else :
+ print i , '*UNKNOWN*' , `item`
+
+# Local Variables:
+# tab-width: 4
+# python-indent: 4
+# End:
--- /dev/null
+# -*- coding:utf-8 -*-
+
+# Tags
+# Copyright (C) 2005 Frédéric Jolliton <frederic@jolliton.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+__all__ = [ 'Tags' , 'tags' ]
+
+from nodes import *
+from error import *
+
+def decodeName( name ) :
+
+ '''
+ Decode python variable name. If '_' is the first character, it's
+ discarded. Other '_' are converted to '-', except for 2 '__' that
+ are converted to ':'.
+
+ '_class' -> 'class'
+ 'xml__lang' -> 'xml:lang'
+ 'http_equiv' -> 'http-equiv'
+ '''
+
+ if name.startswith( '_' ) and len( name ) > 1 :
+ name = name[ 1 : ]
+ name = name.replace( '__' , ':' )
+ name = name.replace( '_' , '-' )
+ return name
+
+def kws( kwargs ) :
+
+ '''Copy 'kwargs' dictionnary, rewriting keys using decodeName().'''
+
+ return dict( [ ( decodeName( key ) , value ) for key , value in kwargs.items() if value is not None ] )
+
+def ensureNode( node ) :
+
+ if isinstance( node , Node ) :
+ return node
+ elif isinstance( node , basestring ) :
+ return Text( node )
+ else :
+ raise Error( 'Invalid expression to build tree' )
+
+class Tags :
+
+ def __getattr__( self , name ) :
+
+ '''Create functions on the fly as needed.'''
+
+ if name == '_doc_' :
+ def fun( *contents ) :
+ children = [ ensureNode( child ) for child in contents if child ]
+ return Document( children )
+ elif name == '_comment_' :
+ def fun( contents ) :
+ return Comment( contents )
+ elif name == '_pi_' :
+ def fun( target , contents ) :
+ return ProcessingInstruction( target , contents )
+ else :
+ def fun( *contents , **kwargs ) :
+
+ '''If first item of contents is a dictionnary, then its items
+ take precedence on kwargs items.'''
+
+ if contents and isinstance( contents[ 0 ] , dict ) :
+ k , contents = contents[ 0 ] , contents[ 1 : ]
+ kwargs.update( k )
+ attributes = [ Attribute( n , v ) for n , v in kws( kwargs ).items() ]
+ children = [ ensureNode( child ) for child in contents if child ]
+ return Element( decodeName( name ) , attributes , children )
+
+ self.__dict__[ name ] = fun
+
+ return fun
+
+tags = Tags()
+
+# Local Variables:
+# tab-width: 4
+# python-indent: 4
+# End:
--- /dev/null
+xpath_prompt.py
\ No newline at end of file
--- /dev/null
+# -*- coding:utf-8 -*-
+
+# XPath main module
+# Copyright (C) 2005 Frédéric Jolliton <frederic@jolliton.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+__all__ = [
+ 'compile' ,
+ 'XPath'
+ ]
+
+#
+# TODO:
+#
+# [ ] Use iterator instead of sequence in some place to optimize speed
+# and memory usage ?
+# [ ] Identify more place for optimization
+#
+
+g_debug = False
+g_dontOptimize = False
+
+from error import Error, XPathError
+from sequence import _Empty, _False, _True
+from context import Context, nullContext
+from nodes import Node, Document, Element, Attribute, Comment, Text
+from iterators import *
+from misc import NotReached, plural
+import xpathparser
+from xpath_misc import lispy
+
+from xpathfn import *
+
+# [TR/xquery-operators]
+
+from xpathfn import functions as xpathFunctions
+
+# [xpath20] 3.2.1.1 Axes
+
+axes = {
+ 'child' : iterChild
+ , 'descendant' : iterDescendant
+ , 'attribute' : iterAttribute
+ , 'self' : iterSelf
+ , 'descendant-or-self' : iterDescendantOrSelf
+ , 'following-sibling' : iterFollowingSibling
+ , 'following' : iterFollowing
+ , 'namespace' : None
+ , 'parent' : iterParent
+ , 'ancestor' : iterAncestor
+ , 'preceding-sibling' : iterPrecedingSibling
+ , 'preceding' : iterPreceding
+ , 'ancestor-or-self' : iterAncestorOrSelf
+}
+
+unaryOperators = {
+ 'neg' : opNeg
+}
+
+binaryOperators = {
+ 'eq' : opValueEqual
+ , 'ne' : opValueNotEqual
+ , 'lt' : opValueLessThan
+ , 'gt' : opValueGreaterThan
+ , '=' : opGeneralEqual
+ , '!=' : opGeneralNotEqual
+ , '<' : opGeneralLessThan
+ , '>' : opGeneralGreaterThan
+ , 'is' : opIsSameNode
+ , '<<' : opNodeBefore
+ , '>>' : opNodeAfter
+ , 'and' : opAnd
+ , '|' : opUnion
+ , 'union' : opUnion
+ , 'except' : opExcept
+ , 'intersect' : opIntersection
+ , '+' : opAdd
+ , '-' : opSubstract
+ , '*' : opMultiply
+ , 'div' : opDivide
+ , 'idiv' : opIntegerDivide
+ , 'range' : opTo
+}
+
+def functionArity( f ) :
+
+ n = f.func_code.co_argcount
+ return n - len( f.func_defaults or () ) , n
+
+def assertFunctionArity( fun , label , n , diff = 0 ) :
+
+ range = functionArity( fun )
+ error = None
+ if range[ 0 ] == range[ 1 ] :
+ expected = range[ 0 ] - diff
+ if n != expected :
+ if expected == 0 :
+ error = 'no' , 0
+ else :
+ error = 'exactly %d' % expected , expected
+ else :
+ if n + diff < range[ 0 ] :
+ error = 'at least %d' % ( range[ 0 ] - diff ) , range[ 0 ] - diff
+ elif n + diff > range[ 1 ] :
+ error = 'at most %d' % ( range[ 1 ] - diff ) , range[ 1 ] - diff
+ if error is not None :
+ raise XPathError( 'XPST0017' ,
+ '%s() takes %s argument%s (%d given)' \
+ % ( label ,
+ error[ 0 ] ,
+ plural( error[ 1 ] , 's' ) , n ) )
+
+#--[ Functions used during evaluation ]--------------------------------------
+# ...
+
+def contextItem( context ) :
+
+ if context.item is None :
+ raise XPathError( 'XPDY0002' , 'undefined context item' )
+ return Sequence( context.item )
+
+def unionOfSequences( sequences ) :
+
+ '''
+ Merge several sequence of nodes
+ '''
+
+ unionType = SEQUENCE_EMPTY
+ for sequence in sequences :
+ unionType |= sequence.type
+ if unionType == SEQUENCE_MIXED :
+ raise XPathError( 'XPTY0018' , 'expected either nodes or atomic items but not both' )
+
+ # FIXME: Don't work if nodes come from several documents
+ if unionType == SEQUENCE_EMPTY :
+ return Sequence()
+ elif len( sequences ) == 1 :
+ return sequences[ 0 ]
+ elif unionType == SEQUENCE_NODES :
+ nodes = {}
+ for sequence in sequences :
+ for node in sequence :
+ nodes[ node.position ] = node
+ nodes = nodes.items()
+ nodes.sort()
+ return Sequence( node[ 1 ] for node in nodes )
+ else :
+ return Sequence( sequences )
+
+#----------------------------------------------------------------------------
+
+def ensureCallable( e ) :
+
+ if not callable( e ) :
+ def fun( context ) :
+ return e
+ fun.__name__ = '_static_'
+ else :
+ fun = e
+ return fun
+
+#--[ Functions producing functions ]-----------------------------------------
+
+#
+# Return (node -> bool)
+#
+
+def is_node() :
+
+ def fun( node ) :
+ return isinstance( node , Node )
+ fun.__name__ = '_is_node_'
+
+ return fun
+
+def is_document() :
+
+ def fun( node ) :
+ return isinstance( node , Document )
+ fun.__name__ = '_is_document_'
+
+ return fun
+
+def is_element( name = None ) :
+
+ if name is None or name == '*' :
+ def fun( node ) :
+ return isinstance( node , Element )
+ else :
+ def fun( node ) :
+ return isinstance( node , Element ) and node.name == name
+ fun.__name__ = '_is_element(..)_'
+
+ return fun
+
+def is_attribute( name = None ) :
+
+ if name is None or name == '*' :
+ def fun( node ) :
+ return isinstance( node , Attribute )
+ else :
+ def fun( node ) :
+ return isinstance( node , Attribute ) and node.name == name
+ fun.__name__ = '_attribute(..)_'
+
+ return fun
+
+def is_comment() :
+
+ def fun( node ) :
+ return isinstance( node , Comment )
+ fun.__name__ = '_is_comment_'
+
+ return fun
+
+def is_text() :
+
+ def fun( node ) :
+ return isinstance( node , Text )
+ fun.__name__ = '_is_text_'
+
+ return fun
+
+def makeTest( t ) :
+
+ '''
+ Return (node -> bool)
+ '''
+
+ if t[ 0 ] == 'element' :
+ test = is_element( *t[ 1 : ] )
+ elif t[ 0 ] == 'attribute' :
+ test = is_attribute( *t[ 1 : ] )
+ elif t[ 0 ] == 'node' :
+ test = is_node()
+ elif t[ 0 ] == 'text' :
+ test = is_text()
+ elif t[ 0 ] == 'comment' :
+ test = is_comment()
+ else :
+ raise NotReached
+ return test
+
+#--[ Sequence filter ]-------------------------------------------------------
+
+def at_position( n ) :
+
+ '''
+ Return (context x sequence -> sequence)
+ '''
+
+ def fun( context , sequence ) :
+ if 1 <= n <= len( sequence ) :
+ return Sequence( sequence[ n - 1 ] )
+ else :
+ return Sequence()
+ fun.__name__ = '_at_position_'
+
+ return fun
+
+#----------------------------------------------------------------------------
+
+def doStep( producer , predicates = () ) :
+
+ '''
+ A step is for example: descendant-or-self::element(foo)[@bar>10][2][quux and ../baz]
+ where the 'descendant-or-self::element(foo)' part is performed by function 'producer',
+ and each predicates ([@bar>10],..) are performed by functions listed in 'predicates'.
+
+ producer: (context -> sequence)
+ predicates: list-of (context x sequence -> sequence)
+ Return (context x sequence -> sequence)
+ '''
+
+ def fun( context , sequence ) :
+
+ if sequence.type not in ( SEQUENCE_EMPTY , SEQUENCE_NODES ) :
+ raise XPathError( 'XPTY0020' , 'XPath step should start with a sequence of nodes' )
+
+ results = []
+ focus = context.getFocus()
+ for i , item in enumerate( sequence ) :
+ #
+ # Update context
+ #
+ context.last = len( sequence )
+ context.item = item
+ context.position = i + 1
+ #
+ # Evaluate the step
+ #
+ result = producer( context )
+ #
+ # Apply predicates
+ #
+ for predicate in predicates :
+ result = predicate( context , result )
+ #
+ # Check consistency
+ #
+ if sequence.type not in ( SEQUENCE_EMPTY , SEQUENCE_NODES ) :
+ raise XPathError( 'XPTY0019' , 'XPath step should return a sequence of nodes.' )
+ results.append( result )
+ context.restoreFocus( focus )
+
+ return unionOfSequences( results )
+
+ fun.__name__ = '_do_step_'
+
+ return fun
+
+def makePredicate( predicate ) :
+
+ '''
+ predicates: sequence | (context -> sequence)
+ Return list-of (context x sequence -> sequence)
+ '''
+
+ if isinstance( predicate , Sequence ) :
+ if len( predicate ) == 1 :
+ try :
+ index = predicate[ 0 ]
+ except :
+ raise Error( 'Expected an index as static predicate.' )
+ return at_position( int( predicate[ 0 ] ) )
+ else :
+ raise Error( 'Unexpected static sequence' )
+ else :
+ def fun( context , sequence ) :
+
+ focus = context.getFocus()
+ result = []
+ for i , item in enumerate( sequence ) :
+ i += 1
+ #
+ # Update context
+ #
+ context.item = item
+ context.position = i
+ context.last = len( sequence )
+ #
+ # Do the test
+ #
+ r = predicate( context )
+ keep = False
+ if len( r ) == 1 and isNumber( r[ 0 ] ) :
+ n = int( r[ 0 ] )
+ if n == i :
+ keep = True
+ elif sequenceToBoolean( r ) :
+ keep = True
+ if keep :
+ result.append( item )
+ context.restoreFocus( focus )
+ return Sequence( result )
+
+ fun.__name__ = '_predicate_'
+
+ return fun
+
+def makeVariableReference( name ) :
+
+ '''
+ Return (context -> sequence)
+ '''
+ def fun( context ) :
+ return Sequence( context.getVariable( name ) )
+ fun.__name__ = '_var_%s_' % name
+
+ return fun
+
+def makeAxis( it , test ) :
+
+ if it is None :
+ raise XError( 'XPST0010' )
+ def fun( context ) :
+ return Sequence( node for node in it( context.item ) if test( node ) )
+ fun.__name__ = '_iterate_'
+ return fun
+
+def makeFunctionCall( expr ) :
+
+ '''
+ Return (context -> sequence)
+ '''
+
+ functionName , args = expr[ 0 ] , expr[ 1 : ]
+ fargs = map( makeExpr , args )
+ fargs = map( ensureCallable , fargs )
+ if functionName in xpathFunctions :
+ #
+ # Known function.
+ #
+ fn = xpathFunctions[ functionName ]
+
+ assertFunctionArity( fn , functionName , len( fargs ) , 1 )
+
+ def fun( context ) :
+ args = map( lambda f : f( context ) , fargs )
+ return fn( context , *args )
+ fun.__name__ = '_fn_%s_' % functionName
+ else :
+ #
+ # Unknown function. Resolution at run-time
+ #
+ def fun( context ) :
+ fn = context.getFunction( functionName )
+ if fn is None :
+ raise XPathError( 'XPST0017' , 'Undefined function %r' % ( functionName , ) )
+ assertFunctionArity( fn , functionName , len( fargs ) , 1 )
+ args = map( lambda f : f( context ) , fargs )
+ return fn( context , *args )
+ fun.__name__ = '_fn[rt]_%s_' % functionName
+ return fun
+
+def makeFilter( exprlist , predicates ) :
+
+ if exprlist[ 0 ] == 'exprlist' :
+ process = makeExprList
+ else :
+ process = makeStep
+ exprlist = process( exprlist )
+ predicates = map( makeExprList , predicates )
+ predicates = map( makePredicate , predicates )
+ if not callable( exprlist ) :
+ r = exprlist
+ for predicate in predicates :
+ r = predicate( nullContext , r )
+ return r
+ else :
+ def fun( context ) :
+ sequence = exprlist( context )
+ for predicate in predicates :
+ sequence = predicate( context , sequence )
+ return sequence
+ fun.__name__ = '_predicate_'
+ return fun
+
+def makeStep( step ) :
+
+ first , rest = step[ 0 ] , step[ 1 : ]
+ if first == '.' :
+ return contextItem
+ elif first == '/' :
+ return fnRoot
+ elif first in axes :
+ return makeAxis( axes[ first ] , makeTest( rest[ 0 ] ) )
+ elif first == 'integer' :
+ return Sequence( int( rest[ 0 ] ) )
+ elif first == 'decimal' :
+ return Sequence( float( rest[ 0 ] ) )
+ elif first == 'double' :
+ return Sequence( float( rest[ 0 ] ) )
+ elif first == 'string' :
+ return Sequence( rest[ 0 ] )
+ elif first in ( 'filter' , 'predicates' ) :
+ return makeFilter( rest[ 0 ] , rest[ 1 : ] )
+ elif first == 'void' :
+ return lambda context : _Empty
+ elif first == 'call' :
+ return makeFunctionCall( rest )
+ elif first == 'varref' :
+ return makeVariableReference( rest[ 0 ] )
+ elif first == 'exprlist' :
+ return makeExprList( step )
+ else :
+ raise Error( 'Unexpected step %r' % ( step , ) )
+
+
+# bindings = {}
+# match( bindings , ('+', 1, ('+', 2, 3) ) , ('+', 'e1' , ('+', 'e2', 'e3')))
+# bindings => {1: 'e1', 2: 'e2', 3: 'e3'}
+def match( bindings , pattern , expression ) :
+
+ def match_( pattern , expression ) :
+ if len( pattern ) == len( expression ) :
+ for a , b in zip( pattern , expression ) :
+ if isinstance( a , ( int , long ) ) :
+ if a in bindings :
+ if bindings[ a ] != b :
+ return False
+ else :
+ bindings[ a ] = b
+ elif isinstance( a , tuple ) :
+ if not isinstance( b , tuple ) :
+ return False
+ if not match_( a , b ) :
+ return False
+ elif a == b :
+ pass
+ else :
+ return False
+ else :
+ return False
+ return True
+ return match_( pattern , expression )
+
+def replace( bindings , pattern ) :
+
+ if isinstance( pattern , int ) :
+ return bindings.get( pattern )
+ elif isinstance( pattern , tuple ) :
+ return tuple( replace( bindings , item ) for item in pattern )
+ else :
+ return pattern
+
+def rewrite( expression , sourcePattern , targetPattern , recurse = False ) :
+
+ bindings = {}
+ result = None
+ if match( bindings , sourcePattern , expression ) :
+ result = replace( bindings , targetPattern )
+ return result
+
+def Rule( a , b ) : return ( a , b )
+def From( *a ) : return a
+def To( *a ) : return a
+
+rules = [
+ # <STEP>/() -> ()
+ Rule( From( 1 , ( 'void' , ) ) ,
+ To( 'void' ) ) ,
+
+ # descendant-or-self::node()/child::<TEST> -> descendant::<TEST>
+ Rule( From( ( 'descendant-or-self' , ( 'node' , ) ) ,
+ ( 'child' , 1 ) ) ,
+ To( 'descendant' , 1 ) ) ,
+
+ # descendant-or-self::node()/child::<TEST>[<PRED>] -> descendant::<TEST>[<PRED>]
+ Rule( From( ( 'descendant-or-self' , ( 'node' , ) ) ,
+ ( 'predicates' , ( 'child' , 1 ) , 2 ) ) ,
+ To( 'predicates' , ( 'descendant' , 1 ) , 2 ) ) ,
+
+ # child::<TEST>/descendant::<TEST> -> descendant::<TEST>
+ Rule( From( ( 'child' , 1 ) ,
+ ( 'descendant' , 1 ) ) ,
+ To( 'descendant' , 1 ) ) ,
+
+ # child::<TEST>/descendant::<TEST>[<PRED>] -> descendant::<TEST>[<PRED>]
+ Rule( From( ( 'child' , 1 ) ,
+ ( 'predicates' , ( 'descendant' , 1 ) , 2 ) ) ,
+ To( 'predicates' , ( 'descendant' , 1 ) , 2 ) ) ,
+
+ # descendant-or-self::node()/attribute::<NAME> -> ext:descendant-attribute(<NAME>)
+ Rule( From( ( 'descendant-or-self' , ( 'node' , ) ) ,
+ ( 'attribute' , ( 'attribute' , 1 ) ) ) ,
+ To( 'call' , 'ext:descendant-attribute' , ( 'path' , ( 'string' , 1 ) ) ) ) ,
+
+ # descendant-or-self::node()/attribute::<NAME>[<PRED>] -> ext:descendant-attribute(<NAME>)[PRED]
+ Rule( From( ( 'descendant-or-self' , ( 'node' , ) ) ,
+ ( 'predicates' , ( 'attribute' , ( 'attribute' , 1 ) ) , 2 ) ) ,
+ To( 'predicates' , ( 'call' , 'ext:descendant-attribute' ,
+ ( 'path' , ( 'string' , 1 ) ) ) ,
+ 2 ) ) ,
+
+ # descendant-or-self::node()/child::<NAME>[attribute::<ATT>]
+ # -> ext:descendant-attribute(<ATT>)/parent::<NAME>
+ Rule( From( ( 'descendant-or-self' , ( 'node' , ) ) ,
+ ( 'predicates' , ( 'child' , 1 ) ,
+ ( 'exprlist' , ( 'path' , ( 'attribute' , ( 'attribute' , 2 ) ) ) ) ) ) ,
+ To( ( 'call' , 'ext:descendant-attribute' , ( 'path' , ( 'string' , 2 ) ) ) ,
+ ( 'parent' , 1 ) ) )
+]
+
+def optimizeSteps( steps ) :
+
+ if g_dontOptimize :
+ return steps
+ current = 0
+ if g_debug :
+ print 'OPTIMIZE:'
+ print lispy( ( 'path' , ) + steps )
+ while current < len( steps ) - 1 :
+ a = steps[ current ]
+ b = steps[ current + 1 ]
+ replace = None
+ for rule in rules :
+ r = rewrite( ( a , b ) , *rule )
+ if r is not None :
+ replace = r
+ break
+ if replace is not None :
+ if g_debug :
+ print 'OPT' , a
+ print ' +' , b
+ print ' ->' , replace
+ if isinstance( replace , tuple ) and replace and isinstance( replace[ 0 ] , tuple ) :
+ pass
+ else :
+ replace = ( replace , )
+ steps = steps[ : current ] + replace + steps[ current + 2 : ]
+ if current > 0 :
+ current -= 1 # look again at previous pair
+ else :
+ current += 1
+ if g_debug :
+ print 'RESULT:'
+ print lispy( ( 'path' , ) + steps )
+ return steps
+
+def makePath( stepExpressions ) :
+
+ '''
+ Return (context -> sequence)
+ '''
+
+ stepExpressions = optimizeSteps( stepExpressions )
+ steps = map( makeStep , stepExpressions )
+ first , rest = steps[ 0 ] , steps[ 1 : ]
+
+ if not callable( first ) :
+ if rest :
+ raise XPathError( 'XPST0003' , 'atomic values cannot be used to start a path' )
+ return first
+
+ first = ensureCallable( first )
+ rest = map( ensureCallable , rest )
+
+ rest = map( doStep , rest )
+
+ def fun( context ) :
+ sequence = first( context )
+ for step_ in rest :
+ sequence = step_( context , sequence )
+ return sequence
+ fun.__name__ = '_path_'
+
+ return fun
+
+def makeIf( predicate , consequent , alternative ) :
+
+ '''
+ Return sequence | (context -> sequence)
+ '''
+
+ predicate = makeExprList( predicate )
+ consequent = makeExpr( consequent )
+ alternative = makeExpr( alternative )
+
+ if not callable( predicate ) :
+ if sequenceToBoolean( predicate ) :
+ return consequent
+ else :
+ return alternative
+ else :
+ consequent = ensureCallable( consequent )
+ alternative = ensureCallable( alternative )
+
+ def fun( context ) :
+ if sequenceToBoolean( predicate( context ) ) :
+ return consequent( context )
+ else :
+ return alternative( context )
+ fun.__name__ = '_if_'
+ return fun
+
+def makeUnary( opName , arg ) :
+
+ '''
+ Return sequence | (context -> sequence)
+ '''
+
+ op = unaryOperators[ opName ]
+
+ arg = makeExpr( arg )
+ if not callable( arg ) :
+ return op( nullContext , arg )
+ else :
+ arg = ensureCallable( arg )
+ def fun( context ) :
+ return op( context , arg( context ) )
+ fun.__name__ = '_op1_%s_' % opName
+ return fun
+
+def makeBinary( opName , arg1 , arg2 ) :
+
+ '''
+ Return sequence | (context -> sequence)
+ '''
+
+ op = binaryOperators[ opName ]
+
+ arg1 = makeExpr( arg1 )
+ arg2 = makeExpr( arg2 )
+ if not callable( arg1 ) and not callable( arg2 ) :
+ return op( nullContext , arg1 , arg2 )
+ else :
+ arg1 = ensureCallable( arg1 )
+ arg2 = ensureCallable( arg2 )
+ if getattr( op , 'hold' , False ) :
+ def fun( context ) :
+ return op( context , arg1 , arg2 )
+ else :
+ def fun( context ) :
+ return op( context , arg1( context ) , arg2( context ) )
+ fun.__name__ = '_op2_%s_' % opName
+ return fun
+
+def makeFor( clauses , returnExpr ) :
+
+ '''
+ Return (context -> sequence)
+ '''
+
+ vars = []
+ for var , seqExpr in clauses :
+ seqExpr = makeExpr( seqExpr )
+ vars.append( ( var , seqExpr ) )
+ returnExpr = makeExpr( returnExpr )
+
+ def makeFun( name , seq , returnExpr ) :
+ seq = ensureCallable( seq )
+ def fun( context ) :
+ result = []
+ for item in seq( context ) :
+ context.variables[ name ] = item
+ result.append( returnExpr( context ) )
+ return Sequence( result )
+ return fun
+
+ fun = returnExpr
+ for name , seq in reversed( vars ) :
+ fun = makeFun( name , seq , fun )
+ return fun
+
+def makeQuantified( quantifier , clauses , test ) :
+
+ '''
+ Return (context -> sequence)
+ '''
+
+ vars = []
+ for var , seqExpr in clauses :
+ seqExpr = makeExpr( seqExpr )
+ vars.append( ( var , seqExpr ) )
+ testExpr = makeExpr( test )
+
+ def makeFun( name , seq , testExpr ) :
+ seq = ensureCallable( seq )
+ if quantifier == 'some' :
+ def fun( context ) :
+ result = []
+ for item in seq( context ) :
+ context.variables[ name ] = item
+ r = sequenceToBoolean( testExpr( context ) )
+ if r is True :
+ return _True
+ return _False
+ return fun
+ elif quantifier == 'every' :
+ def fun( context ) :
+ result = []
+ for item in seq( context ) :
+ context.variables[ name ] = item
+ r = sequenceToBoolean( testExpr( context ) )
+ if r is False :
+ return _False
+ return _True
+ return fun
+ else :
+ raise NotReached
+
+ fun = testExpr
+ for name , seq in reversed( vars ) :
+ fun = makeFun( name , seq , fun )
+ return fun
+
+def makeExpr( e ) :
+
+ '''
+ Return sequence | (context -> sequence)
+ '''
+
+ if e[ 0 ] == 'path' :
+ return makePath( e[ 1 : ] )
+ elif e[ 0 ] == 'if' :
+ return makeIf( e[ 1 ] , e[ 2 ] , e[ 3 ] )
+ elif e[ 0 ] == 'for' :
+ return makeFor( e[ 1 ] , e[ 2 ] )
+ elif e[ 0 ] in ( 'some' , 'every' ) :
+ return makeQuantified( e[ 0 ] , e[ 1 ] , e[ 2 ] )
+ elif e[ 0 ] in unaryOperators :
+ return makeUnary( e[ 0 ] , e[ 1 ] )
+ elif e[ 0 ] in binaryOperators :
+ return makeBinary( e[ 0 ] , e[ 1 ] , e[ 2 ] )
+ else :
+ raise Error( 'Unexpected expression %r' % ( e , ) )
+
+def makeExprList( e ) :
+
+ '''Return (context -> sequence) or a static sequence.
+
+ The returned function evaluate a static XPath expression
+ and return sequence as result.'''
+
+ assert e[ 0 ] == 'exprlist' , 'Unexpected expression %r' % ( e , )
+
+ exprs = []
+ dynamic = False
+ #
+ # Compile each expression
+ #
+ for expr in e[ 1 : ] :
+ r = makeExpr( expr )
+ if callable( r ) :
+ dynamic = True
+ exprs.append( r )
+ elif isinstance( r , tuple ) :
+ exprs += list( r )
+ else :
+ exprs.append( r )
+
+ if not dynamic :
+ #
+ # If fully static, then just return concatenation of all
+ # the sequences.
+ #
+ return Sequence( exprs )
+ else :
+ def fun( context ) :
+ result = []
+ for item in exprs :
+ if callable( item ) :
+ # Dynamic
+ result.append( item( context ) )
+ else :
+ # Constant
+ result.append( item )
+ return Sequence( result )
+ fun.__name__ = '_exprlist_'
+ return fun
+
+def compile( s ) :
+
+ '''Compile XPath expression 's' into a Python function.'''
+
+ e = xpathparser.parser( s )
+
+ return makeExprList( e )
+
+_cache = {}
+
+def flushCache() :
+
+ _cache.clear()
+
+class XPath( object ) :
+
+ __slots__ = [ 'path' , 'fun' ]
+
+ def __init__( self , path ) :
+
+ self.path = path
+ if path in _cache :
+ self.fun = _cache[ path ]
+ else :
+ try :
+ self.fun = compile( path )
+ except xpathparser.NoMatch :
+ raise XPathError( 'XPST0003' , 'Unable to parse %r' % path )
+ _cache[ path ] = self.fun
+ if len( _cache ) > 50 :
+ _cache.clear() # FIXME: OUCH!
+
+ def eval( self , doc = None , variables = {} , functions = {} ) :
+
+ f = self.fun
+ if callable( f ) :
+ context = Context()
+ if doc is not None :
+ context.item = doc
+ context.position = 1
+ context.last = 1
+ context.variables.update( variables )
+ context.functions.update( functions )
+ return f( context )
+ else :
+ return f
+
+# Local Variables:
+# tab-width: 4
+# python-indent: 4
+# End:
--- /dev/null
+# -*- coding:utf-8 -*-
+
+# XPath misc
+# Copyright (C) 2005 Frédéric Jolliton <frederic@jolliton.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+try :
+ import cStringIO as StringIO
+except :
+ import StringIO as StringIO
+
+def lispy( e ) :
+
+ assert isinstance( e , tuple ) and e , 'expected a non empty tuple'
+ s = StringIO.StringIO()
+ prt = s.write
+ parens = []
+ x = [ 1 ]
+ breakFlag = [ 0 ]
+ last = [ None ]
+ def forward( n ) :
+ x[ 0 ] += n
+ def push() :
+ parens.append( x[ 0 ] )
+ def couldBreak() :
+ breakFlag[ 0 ] += 1
+ last[ 0 ] = parens.pop()
+ def breakHere() :
+ if breakFlag[ 0 ] :
+ prt( '\n' )
+ prt( ' ' * ( last[ 0 ] - 1 ) )
+ breakFlag[ 0 ] = 0
+ x[ 0 ] = last[ 0 ]
+ else :
+ prt( ' ' )
+ forward( 1 )
+ def lispy_( e ) :
+ prt( '(' )
+ push()
+ forward( 1 )
+ if isinstance( e[ 0 ] , tuple ) :
+ lispy_( e[ 0 ] )
+ else :
+ t = e[ 0 ].replace( '\\' , '\\\\' ).replace( '\n' , '\\n' )
+ prt( t )
+ forward( len( t ) )
+ for item in e[ 1 : ] :
+ breakHere()
+ if isinstance( item , tuple ) :
+ lispy_( item )
+ else :
+ s = `item`
+ if s[ 0 ] == "'" : # force double-quote style
+ s = '"%s"' % ( s[ 1 : -1 ].replace( '"' , '\\"' ) )
+ push()
+ prt( s )
+ forward( len( s ) )
+ couldBreak()
+ prt( ')' )
+ forward( 1 )
+ couldBreak()
+ lispy_( e )
+ s.seek( 0 )
+ return s.read()
+
+# Local Variables:
+# tab-width: 4
+# python-indent: 4
+# End:
+
+
--- /dev/null
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+
+# XPath prompt
+# Copyright (C) 2005 Frédéric Jolliton <frederic@jolliton.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+import re
+import sys
+import time
+import urllib
+import StringIO
+
+import htmltree
+from xpath import XPath
+import xpathparser
+from misc import guessXmlCharacterEncoding
+from sequence import Sequence
+from nodes import Node, Document
+from error import Error
+from parser import NoMatch
+from sequence_misc import printSequence
+from xpathfn import *
+
+from xpath_misc import lispy
+
+def printInlineSequence( sequence ) :
+
+ for item in sequence :
+ if isinstance( item , Node ) :
+ item.serialize( file = sys.stdout )
+ print
+ else :
+ print item
+
+rtXmlDeclVersionInfo = r'''(?:\s+version=(?P<version>(?:'[^']*'|"[^"]*")))'''
+rtXmlDeclEncodingDecl = r'''(?:\s+encoding=(?P<encoding>(?:'[^']*'|"[^"]*")))'''
+rtXmlDeclStandaloneDecl = r'''(?:\s+standalone=(?P<standalone>(?:'[^']*'|"[^"]*")))'''
+reXmlDecl = re.compile( r'<\?xml(?:%s|%s|%s)*\s*\??>' \
+ % ( rtXmlDeclVersionInfo ,
+ rtXmlDeclEncodingDecl ,
+ rtXmlDeclStandaloneDecl ) )
+def decodeDocument( txt ) :
+
+ if not isinstance( txt , unicode ) :
+ enc , skip = guessXmlCharacterEncoding( txt[ : 4 ] )
+ dec = 'utf_8'
+ if enc == 'UTF-8' :
+ dec = 'utf_8'
+ elif enc == '8BIT' :
+ r = reXmlDecl.search( txt )
+ if r is not None :
+ enc = r.groupdict()[ 'encoding' ]
+ if enc is not None :
+ enc = enc[ 1 : -1 ].lower()
+ if enc.startswith( 'iso-8859-' ) :
+ dec = enc
+ elif enc in ( 'utf8' , 'utf-8' ) :
+ dec = 'utf_8'
+ else :
+ dec = None
+ else :
+ dec = None
+ if dec is None :
+ dec = 'utf_8'
+ txt = txt[ skip : ]
+ try :
+ txt = txt.decode( dec )
+ except UnicodeDecodeError :
+ # fallback to ISO-8859-1
+ txt = txt.decode( 'iso-8859-1' )
+ return txt
+
+def readDoc( uri ) :
+
+ txt = urllib.urlopen( uri ).read()
+ txt = decodeDocument( txt )
+ return htmltree.parse( txt )
+
+def extSerialize( context ) :
+
+ return Sequence( context.item.serialize() )
+
+def fnDoc( context , uri , _cache = {} ) :
+
+ uri = uri[ 0 ]
+ if uri not in _cache :
+ try :
+ doc = readDoc( uri )
+ except :
+ raise
+ print 'Error reading URI %s' % uri
+ return ()
+ _cache[ uri ] = Sequence( doc )
+ return _cache[ uri ]
+
+def evaluate( expr , dot , env , functions ) :
+
+ #
+ # Compile the expression
+ #
+ t = time.time()
+ x = XPath( expr )
+ dp = time.time() - t
+
+ #
+ # Evaluate the compiled expression
+ #
+ t = time.time()
+ r = x.eval( dot , env , functions )
+ de = time.time() - t
+
+ return r , dp , de
+
+def printResult( sequence , mode , displayLocation ) :
+
+ if mode == 'inline' :
+ printInlineSequence( sequence )
+ elif mode == 'full' :
+ printSequence( sequence , True , displayLocation )
+ elif mode == 'short' :
+ printSequence( sequence , False , displayLocation )
+ else : # assuming default mode
+ if len( sequence ) == 0 :
+ pass
+ elif len( sequence ) == 1 :
+ print sequence[ 0 ]
+ else :
+ print sequence
+
+help = r'''
+<expression> Evaluate XPath expression.
+$var := <expression> Evaluate XPath expression and store result in 'var'.
+
+Commands:
+
+\. URI Load document from URI and set it as the current node.
+\? Print this help.
+\c Flush cache.
+\d Default mode.
+\e EXPRESSION Display XPath parser result
+\f Full mode.
+\i Inline mode.
+\l Toggle location display in full and short mode.
+\o Switch optimization on/off.
+\s Short mode.
+\v Print name of available variables.
+\x Switch timer on/off.
+'''.strip()
+
+def main() :
+
+ import readline
+ import re
+
+ reDef = re.compile( r'^\s*\$([a-z_][a-z_0-9-]*)\s*:=\s*(.*)$' , re.I )
+
+ doc = Document()
+
+ mode = 'default'
+
+ modes = {
+ 'd' : 'default' ,
+ 'f' : 'full' ,
+ 's' : 'short' ,
+ 'i' : 'inline'
+ }
+
+ env = {
+ 'current' : doc ,
+ 'version' : Sequence( 'TX' , '0.1' ) ,
+ }
+
+ functions = {
+ 'doc' : fnDoc ,
+ 'serialize' : extSerialize
+ }
+
+ print 'XPath TX 0.1 - (c)2005 Frederic Jolliton <frederic@jolliton.com>\n'
+ print 'Use \? for help.\n'
+
+ displayLocation = False
+ showTime = False
+ while 1 :
+ try :
+ line = raw_input( 'XPath2.0> ' )
+ except EOFError :
+ print
+ break
+ line = line.strip()
+ if line.startswith( '#' ) or not line :
+ pass
+ elif line.startswith( '\\' ) :
+ cmd = line[ 1 : ]
+ if cmd in modes :
+ mode = modes[ cmd ]
+ print '[%s]' % mode
+ elif cmd.startswith( '. ' ) :
+ uri = cmd[ 2 : ].strip()
+ try :
+ env[ 'current' ] = readDoc( uri )
+ except :
+ print 'Error reading URI %r' % uri
+ elif cmd.startswith( 'e ' ) :
+ try :
+ print lispy( xpathparser.parse( cmd[ 1 : ].strip() ) )
+ except NoMatch , e :
+ print e
+ elif cmd == 'c' :
+ import xpath
+ xpath.flushCache()
+ print 'Cache flushed'
+ elif cmd == 'o' :
+ import xpath
+ xpath.g_dontOptimize = not xpath.g_dontOptimize
+ print 'Optimization turned' , ('off','on')[ not xpath.g_dontOptimize ]
+ elif cmd == 'l' :
+ displayLocation = not displayLocation
+ print 'Location' , ('off','on')[ displayLocation ]
+ elif cmd == 'x' :
+ showTime = not showTime
+ print 'Timer' , ('off','on')[ showTime ]
+ elif cmd == 'v' :
+ print '$' + ', $'.join( sorted( env.keys() ) )
+ elif cmd == '?' :
+ print help
+ print
+ print 'Current mode is %r' % mode
+ print 'Location is' , ('off','on')[ displayLocation ]
+ print 'Timer is' , ('off','on')[ showTime ]
+ else :
+ print 'Unknown command %r' % cmd
+ else :
+ r = reDef.match( line )
+ if r is not None :
+ varName , line = r.groups()
+ else :
+ varName = None
+ try :
+ dot = env[ 'current' ]
+ if isinstance( dot , Sequence ) :
+ assert len( dot ) == 1 , 'expected a sequence of 1 item'
+ dot = dot[ 0 ]
+ try :
+ result , dp , de = evaluate( line , dot , env , functions )
+ except KeyboardInterrupt :
+ print '[Interrupted]'
+ else :
+ if varName is not None :
+ env[ varName ] = result
+ else :
+ try :
+ printResult( result , mode , displayLocation )
+ except KeyboardInterrupt : # FIXME: Don't work.
+ print '[Interrupted]'
+ if showTime :
+ print '-- %fs(parse) + %fs(eval) --' % ( dp , de )
+ except KeyboardInterrupt :
+ raise
+ except NoMatch , e :
+ print e
+ except XPathError , e :
+ print 'XPATH-ERROR [%s]' % ( e , )
+ except Error , e :
+ print 'GENERIC-ERROR [%s]' % ( e , )
+
+if __name__ == '__main__' :
+ main()
+
+# Local Variables:
+# tab-width: 4
+# python-indent: 4
+# End:
--- /dev/null
+# -*- coding:utf-8 -*-
+
+# XPath/XQuery functions
+# Copyright (C) 2005 Frédéric Jolliton <frederic@jolliton.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+from __future__ import division
+
+# __all__ = [
+# 'functions'
+# ]
+
+import operator
+
+from error import Error, XPathError
+from sequence import *
+from sequence import _Empty, _EmptyString, _False, _True, _Boolean
+from nodes import *
+from misc import typeOf, iterSingle, identity
+from iterators import iterDescendantOrSelfFull
+
+functions = {}
+
+def functionNames( qname ) :
+
+ '''
+ 'foo' -> ['foo']
+ 'bar:foo' -> ['bar:foo']
+ 'fn:foo' -> ['fn:foo', 'foo']
+ '''
+
+ names = [ qname ]
+ parts = qname.split( ':' )
+ if len( parts ) == 1 :
+ pass
+ elif len( parts ) == 2 :
+ if parts[ 0 ] == 'fn' :
+ names.append( parts[ 1 ] )
+ else :
+ raise Error( 'invalid function name %r' % qname )
+ return names
+
+def registerFast( qname , hold = False ) :
+
+ names = functionNames( qname )
+
+ def fun( f ) :
+ f.hold = hold
+ for name in names :
+ functions[ name ] = f
+ return f
+ return fun
+
+def register( qname ) :
+
+ names = functionNames( qname )
+
+ def fun( f ) :
+ #
+ # Wrapper ensure than return value is a Sequence
+ #
+ def wrapper( *args , **kwargs ) :
+ result = f( args , kwargs )
+ if not isinstance( result , Sequence ) :
+ result = Sequence( result )
+ return result
+ wrapper.hold = False
+ wrapper.__name__ = 'wrapper_%s' % ( f.__name__ or '<??>' )
+ wrapper.__dict__ = f.__dict__
+ wrapper.__doc__ = f.__doc__
+ for name in names :
+ functions[ name ] = wrapper
+ return wrapper
+ return fun
+
+#----------------------------------------------------------------------------
+
+def isBoolean( item ) :
+
+ return isinstance( item , bool )
+
+def isNumber( item ) :
+
+ return isinstance( item , ( int , float , long ) ) \
+ and not isinstance( item , bool )
+
+def isString( item ) :
+
+ return isinstance( item , basestring )
+
+def isAtomic( item ) :
+
+ return not isinstance( item , Node )
+
+def isAttribute( item ) :
+
+ return isinstance( item , Attribute )
+
+def isDocument( item ) :
+
+ return isinstance( item , Document )
+
+def isNode( item ) :
+
+ return isinstance( item , Node )
+
+def isItem( item ) :
+
+ return True
+
+#----------------------------------------------------------------------------
+
+def ZeroOrOne( t ) :
+
+ def fun( sequence ) :
+ if len( sequence ) == 0 :
+ return None
+ elif len( sequence ) == 1 :
+ item = sequence[ 0 ]
+ if t( item ) :
+ return item
+ raise XPathError( 'XPTY0004' , 'Expected a sequence of 0 or 1 item (%r)' % t )
+ return fun
+
+def One( t ) :
+
+ def fun( sequence ) :
+ if len( sequence ) == 1 :
+ item = sequence[ 0 ]
+ if t( item ) :
+ return item
+ raise XPathError( 'XPTY0004' , 'Expected a sequence of 1 item (%r)' % t )
+ return fun
+
+def zeroOrMoreNodes( sequence ) :
+
+ if sequence.type not in ( SEQUENCE_EMPTY , SEQUENCE_NODES ) :
+ raise XPathError( 'XPTY0004' , 'Expected a sequence of nodes' ) # really XPTY0004 here ?
+ return sequence
+
+zeroOrOneNode = ZeroOrOne( isNode )
+zeroOrOneItem = ZeroOrOne( isItem )
+zeroOrMoreItem = identity
+oneNumber = One( isNumber )
+oneAtomic = One( isAtomic )
+oneItem = One( isItem )
+oneNode = One( isNode )
+
+#----------------------------------------------------------------------------
+
+def atomize( item ) :
+
+ '''Convert node to string, and keep inchanged atomic value.'''
+
+ if isNode( item ) :
+ return item.dmStringValue()
+ else :
+ return item
+
+def sequenceToData( sequence ) :
+
+ return tuple( atomize( item ) for item in sequence )
+
+def asString( item ) :
+
+ '''Convert item (node or atomic value) to a string.'''
+
+ result = atomize( item )
+ if not isinstance( result , basestring ) :
+ result = str( result )
+ return result
+
+def asNumber( item ) :
+
+ '''
+ Convert item (node or atomic value) as a number.
+ Return None if no conversion can be done.
+ '''
+
+ if isNumber( item ) :
+ return item
+ else :
+ item = atomize( item )
+ try :
+ return int( item )
+ except :
+ try :
+ return float( item )
+ except :
+ pass
+
+def oneAtomizedItem( sequence ) :
+
+ return atomize( oneItem( sequence ) )
+
+#----------------------------------------------------------------------------
+
+def sequenceToBoolean( sequence ) :
+
+ if not sequence :
+ return False
+ else :
+ item = sequence[ 0 ]
+ if isNode( item ) :
+ return True
+ elif len( sequence ) == 1 :
+ if isBoolean( item ) :
+ return item
+ elif isString( item ) :
+ return bool( item )
+ elif isNumber( item ) :
+ return item != 0
+ raise XError( 'FORG0006' )
+
+#----------------------------------------------------------------------------
+
+def _compareString( s1 , s2 ) :
+
+ '''
+ Compare strings 's1' and 's2'. This function also accept iterators
+ returning string. This is usefull to compare two nodes without
+ first converting them to strings for performance reason.
+ '''
+
+ if isinstance( s1 , basestring ) :
+ if isinstance( s2 , basestring ) :
+ return cmp( s1 , s2 )
+ else :
+ s1 = iterSingle( s1 )
+ elif isinstance( s2 , basestring ) :
+ s2 = iterSingle( s2 )
+
+ result = 0
+ d1 = ''
+ d2 = ''
+ while 1 :
+ if not d1 :
+ try :
+ d1 = s1.next()
+ except StopIteration :
+ d1 = ''
+ if not d2 :
+ try :
+ d2 = s2.next()
+ except StopIteration :
+ d2 = ''
+ if not d1 :
+ if not d2 :
+ r = 0
+ else :
+ r = -1
+ break
+ elif not d2 :
+ r = 1
+ break
+ m = min( len( d1 ) , len( d2 ) )
+ r = cmp( d1[ : m ] , d2[ : m ] )
+ if r != 0 :
+ break
+ d1 = d1[ m : ]
+ d2 = d2[ m : ]
+ return r
+
+#--[ Extensions ]------------------------------------------------------------
+
+@registerFast( 'ext:descendant-attribute' )
+def extDescendantAttribute( context , arg ) :
+
+ '''
+ Find all attributes of context node and its descendant with name
+ 'arg'.
+ '''
+
+ name = arg[ 0 ]
+ item = context.item
+ if isDocument( item ) :
+ return Sequence( item.attributesByName.get( name , () ) )
+ else :
+ return Sequence( attribute
+ for attribute in iterDescendantOrSelfFull( item )
+ if isAttribute( attribute ) and attribute.name == name )
+
+#--[ XPath/XQuery functions ]------------------------------------------------
+
+@registerFast( 'fn:id' )
+def fnId( context , arg , node = None ) :
+
+ if node is None :
+ item = context.item
+ if item is None :
+ raise XPathError( 'FONC0001' )
+ else :
+ item = oneNode( node )
+ doc = item.root
+ if doc is None or not isDocument( doc ) :
+ raise XPathError( 'FODC0001' ) # unsure
+ ids = map( asString , zeroOrMoreItem( arg ) )
+ result = []
+ for id in ids :
+ n = doc.ids.get( id )
+ if n is not None :
+ result.append( n )
+ result.sort( lambda a , b : cmp( a.position , b.position ) )
+ return Sequence( result )
+
+@registerFast( 'fn:normalize-space' )
+def fnNormalizeSpace( context , arg = None ) :
+
+ if arg is None :
+ item = context.item
+ if item is None :
+ raise XPathError( 'FONC0001' )
+ item = asString( item )
+ else :
+ item = zeroOrOneItem( arg )
+ if item is None :
+ return _EmptyString
+ item = asString( item )
+ if not item :
+ return _EmptyString
+ else :
+ s = ' '.join( item.split() )
+ if s[ 0 ] != item[ 0 ] :
+ s = ' ' + s
+ if s[ -1 ] != item[ -1 ] :
+ s = s + ' '
+ return Sequence( s )
+
+@registerFast( 'fn:lower-case' )
+def fnLowerCase( context , arg ) :
+
+ arg = zeroOrOneItem( arg )
+ if arg is None :
+ return _EmptyString
+ else :
+ return Sequence( asString( arg ).lower() )
+
+@registerFast( 'fn:upper-case' )
+def fnUpperCase( context , arg ) :
+
+ arg = zeroOrOneItem( arg )
+ if arg is None :
+ return _EmptyString
+ else :
+ return Sequence( asString( arg ).upper() )
+
+@registerFast( 'fn:false' )
+def fnFalse( context ) :
+
+ return _False
+
+@registerFast( 'fn:true' )
+def fnTrue( context ) :
+
+ return _True
+
+@registerFast( 'fn:empty' )
+def fnEmpty( context , sequence ) :
+
+ return _Boolean[ len( sequence ) == 0 ]
+
+@registerFast( 'fn:exists' )
+def fnExists( context , sequence ) :
+
+ return _Boolean[ len( sequence ) > 0 ]
+
+@registerFast( 'fn:string' )
+def fnString( context , arg = None ) :
+
+ if arg is None :
+ item = context.item
+ if item is None :
+ raise XError( 'FONC0001' )
+ else :
+ item = zeroOrOneItem( arg )
+ if item is None :
+ return _EmptyString
+ return Sequence( asString( item ) )
+
+@registerFast( 'fn:distinct-values' )
+def fnDistinctValues( context , arg ) :
+
+ if not arg :
+ return arg
+ elif arg.type == SEQUENCE_ATOMICS :
+ result = {}
+ for item in arg :
+ result[ item ] = True
+ return Sequence( result.keys() )
+ else :
+ # FIXME: Naive implementation O(n^2)
+ result = []
+ for item in arg :
+ for existingItem in result :
+ if compareValue( existingItem , item ) == 0 :
+ break
+ else :
+ result.append( item )
+ return Sequence( result )
+
+@registerFast( 'fn:not' )
+def fnNot( context , arg ) :
+
+ return _Boolean[ not sequenceToBoolean( arg ) ]
+
+@registerFast( 'fn:count' )
+def fnCount( context , arg ) :
+
+ return Sequence( len( arg ) )
+
+@registerFast( 'fn:root' )
+def fnRoot( context , sequence = None ) :
+
+ if sequence is None :
+ item = context.item
+ if item is None :
+ raise XError( 'FONC0001' )
+ elif not isNode( item ) :
+ raise XError( 'XPTY0006' )
+ elif isDocument( item ) :
+ return Sequence( item )
+ else :
+ return Sequence( item.root )
+ else :
+ item = zeroOrOneNode( sequence )
+ if item is None :
+ return _Empty
+ elif isDocument( item ) :
+ return sequence
+ else :
+ return Sequence( item.root )
+
+@registerFast( 'fn:position' )
+def fnPosition( context ) :
+
+ return Sequence( context.position )
+
+@registerFast( 'fn:last' )
+def fnLast( context ) :
+
+ return Sequence( context.last )
+
+@registerFast( 'fn:boolean' )
+def fnBoolean( context , arg ) :
+
+ return Sequence( sequenceToBoolean( arg ) )
+
+@registerFast( 'fn:trace' )
+def fnTrace( context , value , label ) :
+
+ label = atomize( oneItem( label ) )
+ print '[TRACE] %s = %s' % ( label , value )
+ return value
+
+@registerFast( 'fn:node-name' )
+def fnNodeName( context , arg ) :
+
+ node = zeroOrOneNode( arg )
+ if node is None :
+ return _Empty
+ else :
+ return Sequence( node.dmNodeName() )
+
+@registerFast( 'fn:name' )
+def fnName( context , arg = None ) :
+
+ if arg is None :
+ item = context.item
+ if item is None :
+ raise XError( 'FONC0001' )
+ if not isNode( item ) :
+ raise XError( 'XPTY0006' )
+ else :
+ item = zeroOrOneNode( arg )
+ if item is None :
+ return _EmptyString
+ return Sequence( item.dmNodeName() )
+
+#----------------------------------------------------------------------------
+
+@registerFast( 'op:is-same-node' )
+def opIsSameNode( context , arg1 , arg2 ) :
+
+ arg1 = oneNode( arg1 )
+ arg2 = oneNode( arg2 )
+ return _Boolean[ arg1 is arg2 ]
+
+@registerFast( 'op:node-before' )
+def opNodeBefore( context , arg1 , arg2 ) :
+
+ arg1 = oneNode( arg1 )
+ arg2 = oneNode( arg2 )
+ return _Boolean[ arg1.position < arg2.position ]
+
+@registerFast( 'op:node-after' )
+def opNodeAfter( context , arg1 , arg2 ) :
+
+ arg1 = oneNode( arg1 )
+ arg2 = oneNode( arg2 )
+ return _Boolean[ arg1.position > arg2.position ]
+
+@registerFast( 'op:or' , hold = True )
+def opOr( context , arg1 , arg2 ) :
+
+ return _Boolean[ sequenceToBoolean( arg1( context ) ) \
+ or sequenceToBoolean( arg2( context ) ) ]
+
+@registerFast( 'op:and' , hold = True )
+def opAnd( context , arg1 , arg2 ) :
+
+ return _Boolean[ sequenceToBoolean( arg1( context ) ) \
+ and sequenceToBoolean( arg2( context ) ) ]
+
+@registerFast( 'op:to' )
+def opTo( context , arg1 , arg2 ) :
+
+ arg1 = oneNumber( arg1 )
+ arg2 = oneNumber( arg2 )
+ return Sequence( range( int( arg1 ) , int( arg2 ) + 1 ) )
+
+@registerFast( 'op:union' )
+def opUnion( context , arg1 , arg2 ) :
+
+ if not arg1 :
+ return arg2
+ if not arg2 :
+ return arg1
+ if arg1.type != SEQUENCE_NODES \
+ or arg2.type != SEQUENCE_NODES :
+ raise XPathError( 'XPTY0004' , 'union operator expect sequence of nodes only' )
+ arg1 = zeroOrMoreNodes( arg1 )
+ arg2 = zeroOrMoreNodes( arg2 )
+ result = list( set( arg1 ) | set( arg2 ) )
+ result.sort( lambda a , b : cmp( a.position , b.position ) )
+ return Sequence( result )
+
+@registerFast( 'op:except' )
+def opExcept( context , arg1 , arg2 ) :
+
+ if not arg1 :
+ return arg2
+ if not arg2 :
+ return arg1
+ if arg1.type != SEQUENCE_NODES \
+ or arg2.type != SEQUENCE_NODES :
+ raise XPathError( 'XPTY0004' , 'except operator expect sequence of nodes only' )
+ arg1 = zeroOrMoreNodes( arg1 )
+ arg2 = zeroOrMoreNodes( arg2 )
+ result = list( set( arg1 ) - set( arg2 ) )
+ result.sort( lambda a , b : cmp( a.position , b.position ) )
+ return Sequence( result )
+
+@registerFast( 'op:intersection' )
+def opIntersection( context , arg1 , arg2 ) :
+
+ if not arg1 :
+ return arg2
+ if not arg2 :
+ return arg1
+ if arg1.type != SEQUENCE_NODES \
+ or arg2.type != SEQUENCE_NODES :
+ raise XPathError( 'XPTY0004' , 'intersection operator expect sequence of nodes only' )
+ arg1 = zeroOrMoreNodes( arg1 )
+ arg2 = zeroOrMoreNodes( arg2 )
+ result = list( set( arg1 ) & set( arg2 ) )
+ result.sort( lambda a , b : cmp( a.position , b.position ) )
+ return Sequence( result )
+
+@registerFast( 'op:neg' )
+def opNeg( context , arg ) :
+
+ arg = oneAtomic( arg )
+ if isNumber( arg ) :
+ return Sequence( -arg )
+ else :
+ arg = asNumber( arg )
+ if arg is not None :
+ return Sequence( -arg )
+ else :
+ return _Empty
+
+@registerFast( 'op:add' )
+def opAdd( context , arg1 , arg2 ) :
+
+ arg1 = asNumber( oneItem( arg1 ) )
+ arg2 = asNumber( oneItem( arg2 ) )
+ if arg1 is None or arg2 is None :
+ return _Empty
+ else :
+ return Sequence( arg1 + arg2 )
+
+@registerFast( 'op:substract' )
+def opSubstract( context , arg1 , arg2 ) :
+
+ arg1 = asNumber( oneItem( arg1 ) )
+ arg2 = asNumber( oneItem( arg2 ) )
+ if arg1 is None or arg2 is None :
+ return _Empty
+ else :
+ return Sequence( arg1 - arg2 )
+
+@registerFast( 'op:multiply' )
+def opMultiply( context , arg1 , arg2 ) :
+
+ arg1 = asNumber( oneItem( arg1 ) )
+ arg2 = asNumber( oneItem( arg2 ) )
+ if arg1 is None or arg2 is None :
+ return _Empty
+ else :
+ return Sequence( arg1 * arg2 )
+
+@registerFast( 'op:divide' )
+def opDivide( context , arg1 , arg2 ) :
+
+ arg1 = asNumber( oneItem( arg1 ) )
+ arg2 = asNumber( oneItem( arg2 ) )
+ if arg1 is None or arg2 is None :
+ return _Empty
+ else :
+ return Sequence( arg1 / float( arg2 ) )
+
+@registerFast( 'op:integer-divide' )
+def opIntegerDivide( context , arg1 , arg2 ) :
+
+ arg1 = asNumber( oneItem( arg1 ) )
+ arg2 = asNumber( oneItem( arg2 ) )
+ if arg1 is None or arg2 is None :
+ return _Empty
+ else :
+ return Sequence( arg1 // arg2 )
+
+# Return -1, 0, 1 or None
+def compareValue( a , b ) :
+
+ if isNode( a ) :
+ if isNode( b ) :
+ return _compareString( a.iterStringValue() , b.iterStringValue() )
+ elif isString( b ) :
+ return _compareString( a.iterStringValue() , b )
+ elif isString( a ) :
+ if isNode( b ) :
+ return _compareString( a , b.iterStringValue() )
+ elif isString( b ) :
+ return cmp( a , b )
+ elif isNumber( a ) :
+ if isNumber( b ) :
+ return cmp( a , b )
+
+# eq, ne, lt, le, gt, ge
+def makeValueComparator( comparator ) :
+
+ def fun( context , arg1 , arg2 ) :
+ if not arg1 or not arg2 :
+ return _Empty
+ elif len( arg1 ) == 1 and len( arg2 ) == 1 :
+ item1 = arg1[ 0 ]
+ item2 = arg2[ 0 ]
+ r = compareValue( item1 , item2 )
+ if r is not None :
+ return _Boolean[ comparator( r , 0 ) ]
+ else :
+ raise XPathError( 'XPTY0004' , 'cannot compare %s and %s' % ( typeOf( item1 ) , typeOf( item2 ) ) )
+ else :
+ raise XPathError( 'XPTY0004' , 'Value comparison operators expect arguments of 0 or 1 items' )
+ return fun
+
+# !=, =, <, <=, >, >=
+def makeGeneralComparator( comparator ) :
+
+ def fun( context , arg1 , arg2 ) :
+ if arg1 and arg2 :
+ for item1 in arg1 :
+ for item2 in arg2 :
+ r = False
+ if isNumber( item1 ) :
+ if isNumber( item2 ) :
+ r = comparator( item1 , item2 )
+ else :
+ item2 = asNumber( item2 )
+ if item2 is None : # NaN
+ r = False
+ else :
+ r = comparator( item1 , item2 )
+ elif isNode( item1 ) :
+ if isNumber( item2 ) :
+ item1 = asNumber( item1 )
+ if item1 is None :
+ r = False
+ else :
+ r = comparator( item1 , item2 )
+ elif isNode( item2 ) :
+ r = comparator( _compareString( item1.iterStringValue() , item2.iterStringValue() ) , 0 )
+ elif isString( item2 ) :
+ r = comparator( _compareString( item1.iterStringValue() , item2 ) , 0 )
+ else :
+ raise XPathError( 'XPTY0004' , 'cannot compare %s and %s' % ( typeOf( item1 ) , typeOf( item2 ) ) )
+ elif isString( item1 ) :
+ if isString( item2 ) :
+ r = comparator( item1 , item2 )
+ elif isNode( item2 ) :
+ r = comparator( _compareString( item1 , item2.iterStringValue() ) , 0 )
+ else :
+ raise XPathError( 'XPTY0004' , 'cannot compare %s and %s' % ( typeOf( item1 ) , typeOf( item2 ) ) )
+ else :
+ raise XPathError( 'XPTY0004' , 'cannot compare %s and %s' % ( typeOf( item1 ) , typeOf( item2 ) ) )
+ if r :
+ return _True
+ return _False
+ return fun
+
+#
+# Not really XPath operators, but rather dispatcher to the right
+# operator.
+#
+opValueEqual = makeValueComparator( operator.eq )
+opValueNotEqual = makeValueComparator( operator.ne )
+opValueLessThan = makeValueComparator( operator.lt )
+opValueGreaterThan = makeValueComparator( operator.gt )
+
+opGeneralEqual = makeGeneralComparator( operator.eq )
+opGeneralNotEqual = makeGeneralComparator( operator.ne )
+opGeneralLessThan = makeGeneralComparator( operator.lt )
+opGeneralGreaterThan = makeGeneralComparator( operator.gt )
+
+# Local Variables:
+# tab-width: 4
+# python-indent: 4
+# End:
--- /dev/null
+# -*- coding:utf-8 -*-
+
+# XPath parser
+# Copyright (C) 2005 Frédéric Jolliton <frederic@jolliton.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+__all__ = [ 'parse' ]
+
+import re
+import time
+
+import parser
+from parser import *
+
+first = lambda l : l[ 0 ]
+second = lambda l : l[ 1 ]
+
+def debug( s ) :
+
+ print 'DEBUG' , s
+ return s
+
+def Debug( name ) :
+
+ def fun( s ) :
+ print 'DEBUG[%s] %s' % ( name , s )
+ return fun
+
+def join( lst ) :
+
+ return lst[ : 1 ] + lst[ 1 ]
+
+def constantly( item ) :
+
+ return lambda o : item
+
+def tag( name ) :
+
+ return lambda o : ( name , o )
+
+def headIs( seq , o ) :
+
+ return isinstance( seq , ( tuple , list ) ) and seq[ 0 ] == o
+
+def xpath() :
+
+ EOS = Regex( '$' )
+
+ textInsideComment = Regex( r'(?:[^:(]|:[^)]|\([^:])*' )
+ comment = Wrapper()
+ comment << Sequence( '(:' , List( textInsideComment , comment ) , ':)' )
+
+ space = List( Regex( '\s*' ) , comment ).ignore()
+
+ reservedFunctionName = [
+ 'attribute' ,
+ 'comment' ,
+ 'document-node' ,
+ 'element' ,
+ 'if' ,
+ 'item' ,
+ 'node' ,
+ 'processing-instruction' ,
+ 'schema-attribute' ,
+ 'schema-element' ,
+ 'text' ,
+ 'typeswitch' ,
+ 'void'
+ ]
+
+ def mark( s ) :
+ def fun() :
+ print s
+ return fun
+
+ def WithSpaceAround( expr ) :
+ return Wrapper( Sequence( space , expr , space ).call( first ) )
+
+ xpString = lambda o : ( 'STRING' , o[ 1 : -1 ].replace( '""' , '"' ) )
+
+ rtNcname = '(?:[a-z_][-a-z_0-9]*)'
+ rtQname = '(?:(?:%s:)?%s)' % ( rtNcname , rtNcname )
+
+ #
+ # [78] QName
+ #
+ qname = Regex( rtQname , re.I )
+
+ value = WithSpaceAround( qname )
+
+ expr = Wrapper()
+ exprSingle = Wrapper()
+
+ #
+ # [74] VarName
+ #
+ varName = qname
+
+ #
+ # [73] StringLiteral
+ #
+ def xpString( e ) :
+ if e[ 0 ] == '"' :
+ s = e[ 1 : -1 ].replace( '""' , '"' )
+ elif e[ 0 ] == "'" :
+ s = e[ 1 : -1 ].replace( "''" , "'" )
+ else :
+ raise NotReached
+ return ( 'string' , s )
+ stringLiteral = \
+ Regex( '"(?:""|[^"])*"|\'(?:\'\'|[^\'])*\'' ).call( xpString )
+
+ #
+ # [72] DoubleLiteral
+ #
+ doubleLiteral = \
+ Regex( r'(?:\.\d+|\d+(?:\.\d*)?)[eE][+-]?\d+' ).call( tag( 'double' ) )
+
+ #
+ # [71] DecimalLiteral
+ #
+ decimalLiteral = \
+ Regex( r'\.\d+|\d+\.\d*' ).call( tag( 'decimal' ) )
+
+ #
+ # [70] IntegerLiteral
+ #
+ integerLiteral = \
+ Regex( '[-+]?\\d+' ).call( tag( 'integer' ) )
+
+ #
+ # [68] ElementName
+ #
+ elementName = value
+
+ #
+ # [67] AttributeName
+ #
+ attributeName = value
+
+ #
+ # [64] ElementNameOrWildcard
+ #
+ elementNameOrWildcard = \
+ Either( elementName ,
+ WithSpaceAround( Literal( '*' ) ) )
+
+ #
+ # [63] ElementTest
+ #
+ def xpElementTest( e ) :
+ if e[ 2 ] :
+ return ( 'element' , e[ 2 ][ 0 ] )
+ else :
+ return ( 'element' , '*' )
+ elementTest = \
+ Sequence( 'element' ,
+ space ,
+ '(' ,
+ space ,
+ Optional( elementNameOrWildcard ) ,
+ space ,
+ ')' ).call( xpElementTest )
+
+ #
+ # [60] AttribNameOrWildcard
+ #
+ attribNameOrWildcard = \
+ Either( attributeName ,
+ WithSpaceAround( Literal( '*' ) ) )
+
+ #
+ # [59] AttributeTest
+ #
+ def xpAttributeTest( e ) :
+ if e[ 2 ] :
+ return ( 'attribute' , e[ 2 ][ 0 ] )
+ else :
+ return ( 'attribute' , )
+ attributeTest = \
+ Sequence( 'attribute' ,
+ space ,
+ '(' ,
+ space ,
+ Optional( attribNameOrWildcard ) ,
+ space ,
+ ')' ).call( xpAttributeTest )
+
+ #
+ # [57] CommentTest
+ #
+ commentTest = \
+ Sequence( 'comment' ,
+ space ,
+ '(' ,
+ space ,
+ ')' ).call( constantly( ( 'comment' , ) ) )
+
+ #
+ # [56] TextTest
+ #
+ textTest = \
+ Sequence( 'text' ,
+ space ,
+ '(' ,
+ space ,
+ ')' ).call( constantly( ( 'text' , ) ) )
+
+ #
+ # [54] AnyKindTest
+ #
+ anyKindTest = \
+ Sequence( 'node' ,
+ space ,
+ '(' ,
+ space ,
+ ')' ).call( constantly( ( 'node' , ) ) )
+
+ #
+ # [53] KindTest
+ #
+ kindTest = \
+ Either( elementTest ,
+ attributeTest ,
+ commentTest ,
+ textTest ,
+ anyKindTest )
+
+ #
+ # [47] FunctionCall
+ #
+ def xpFunctionCall( e ) :
+ if e[ 0 ] in reservedFunctionName :
+ raise Reject
+ elif e[ 2 ] :
+ return ( 'call' , e[ 0 ] ) + tuple( e[ 2 ][ 0 ] )
+ else :
+ return ( 'call' , e[ 0 ] )
+ functionCall = \
+ Sequence( qname ,
+ space ,
+ '(' ,
+ space ,
+ Optional( List( exprSingle , Literal( ',' ).ignore() ) ) ,
+ space ,
+ ')' ).call( xpFunctionCall )
+
+ #
+ # [46] ContextItemExpr
+ #
+ contextItemExpr = \
+ Literal( '.' )
+
+ #
+ # [45] ParenthesizedExpr
+ #
+ def xpParenthesizedExpr( e ) :
+ if not e :
+ return ( 'void' , )
+ else :
+ return e[ 0 ]
+ parenthesizedExpr = \
+ Sequence( '(' , Optional( expr ) , ')' ).call( second ).call( xpParenthesizedExpr )
+
+ #
+ # [44] VarRef
+ #
+ def xpVarRef( e ) :
+ return ( 'varref' , e[ 1 ] )
+ varRef = WithSpaceAround( Sequence( Literal( '$' ) , varName ) ).call( xpVarRef )
+
+ #
+ # [43] NumericLiteral
+ #
+ numericLiteral = \
+ Either( integerLiteral , decimalLiteral , doubleLiteral )
+
+ #
+ # [42] Literal
+ #
+ literal = \
+ Either( numericLiteral , stringLiteral )
+
+ #
+ # [41] PrimaryExpr
+ #
+ primaryExpr = \
+ Either( literal ,
+ varRef ,
+ parenthesizedExpr ,
+ contextItemExpr ,
+ functionCall )
+
+ #
+ # [40] Predicate
+ #
+ predicate = Sequence( '[' , space , expr , space , ']' ).call( second )
+
+ #
+ # [39] PredicateList
+ #
+ predicateList = \
+ ZeroOrMore( predicate )
+
+ #
+ # [38] FilterExpr
+ #
+ def xpFilterExpr( e ) :
+ e , p = e
+ if not p :
+ r = e
+ else :
+ if e[ 0 ] == '.' :
+ # Allowd by grammar, but don't seem allowed by other XPath implementation. Bah.
+ raise Reject
+ r = ( 'filter' , e ) + tuple( p )
+ return r
+ filterExpr = \
+ Sequence( primaryExpr ,
+ predicateList ).call( xpFilterExpr )
+
+ #
+ # [37] Wildcard
+ #
+ wildcard = WithSpaceAround( '*' )
+
+ #
+ # [36] NameTest
+ #
+ def xpNameTest( e ) :
+ if e == '*' :
+ return ( 'name' , '*' )
+ else :
+ return ( 'name' , e )
+ nameTest = \
+ Either( value ,
+ wildcard ).call( xpNameTest )
+
+ #
+ # [35] NodeTest
+ #
+ nodeTest = \
+ Either( kindTest ,
+ nameTest )
+
+ #
+ # [34] AbbrevReverseStep
+ #
+ abbrevReverseStep = \
+ Literal( '..' )
+
+ #
+ # [33] ReverseAxis
+ #
+ reverseAxis = \
+ Regex( '(' +
+ '|'.join( ( 'parent' ,
+ 'ancestor' ,
+ 'preceding-sibling' ,
+ 'preceding' ,
+ 'ancestor-or-self' ) ) +
+ ')::' )
+
+ #
+ # [32] ReverseStep
+ #
+ def xpReverseStepUnabbrev( e ) :
+ if headIs( e[ 1 ] , 'name' ) :
+ if e[ 0 ] == 'attribute' :
+ raise Reject
+ else :
+ default = 'element'
+ return ( e[ 0 ] , ( default , e[ 1 ][ 1 ] ) )
+ else :
+ return tuple( e )
+ reverseStep = \
+ Either( Sequence( reverseAxis , nodeTest ).call( xpReverseStepUnabbrev ) ,
+ abbrevReverseStep )
+
+ #
+ # [31] AbbrevForwardStep
+ #
+ def xpAbbrevForwardStep( e ) :
+ if e[ 0 ] :
+ if headIs( e[ 1 ] , 'name' ) :
+ return ( 'attribute' , ( 'attribute' , e[ 1 ][ 1 ] ) )
+ else :
+ return ( 'attribute' , e[ 1 ] )
+ else :
+ if headIs( e[ 1 ] , 'attribute' ) :
+ return ( 'attribute' , e[ 1 ] )
+ elif headIs( e[ 1 ] , 'name' ) :
+ return ( 'child' , ( 'element' , e[ 1 ][ 1 ] ) )
+ else :
+ return ( 'child' , e[ 1 ] )
+ abbrevForwardStep = \
+ Sequence( Optional( WithSpaceAround( '@' ) ) ,
+ nodeTest ).call( xpAbbrevForwardStep )
+
+ #
+ # [30] ForwardAxis
+ #
+ forwardAxis = \
+ Regex( '(' +
+ '|'.join( ( 'child' ,
+ 'descendant' ,
+ 'attribute' ,
+ 'self' ,
+ 'descendant-or-self' ,
+ 'following-sibling' ,
+ 'following' ) ) +
+ ')::' )
+
+ #
+ # [29] ForwardStep
+ #
+ def xpForwardStepUnabbrev( e ) :
+ if headIs( e[ 1 ] , 'name' ) :
+ if e[ 0 ] == 'attribute' :
+ default = 'attribute'
+ else :
+ default = 'element'
+ return ( e[ 0 ] , ( default , e[ 1 ][ 1 ] ) )
+ else :
+ return tuple( e )
+ forwardStep = \
+ Either( Sequence( forwardAxis , nodeTest ).call( xpForwardStepUnabbrev ) ,
+ abbrevForwardStep )
+
+ #
+ # [28] AxisStep
+ #
+ def xpAxisStep( e ) :
+ if not e[ 1 ] :
+ if e[ 0 ] == '..' :
+ return ( 'parent' , ( 'node' , ) )
+ else :
+ return e[ 0 ]
+ else :
+ if e[ 0 ] == '..' :
+ # See remark for '.'
+ raise Reject
+ return ( 'predicates' , e[ 0 ] ) + tuple( e[ 1 ] )
+ axisStep = \
+ Sequence( Either( forwardStep ,
+ reverseStep ) ,
+ predicateList ).call( xpAxisStep )
+
+ #
+ # [27] StepExpr
+ #
+ stepExpr = \
+ Either( axisStep ,
+ filterExpr )
+
+ #
+ # [26] RelativePathExpr
+ #
+ def xpRelativePathExpr( e ) :
+ r , e = [ e[ 0 ] ] , e[ 1 : ]
+ while e :
+ if e[ 0 ] == '//' :
+ r.append( ( 'descendant-or-self' , ( 'node' , ) ) )
+ r.append( e[ 1 ] )
+ e = e[ 2 : ]
+ return r
+ relativePathExpr = \
+ List( stepExpr ,
+ Regex( '//|/' ) ).call( xpRelativePathExpr )
+
+ #
+ # [25] PathExpr
+ #
+ # FIXME: Simplify (path E) into E ?
+ #
+ def xpPathExprRoot( e ) :
+ return ( 'path' , '/' )
+ def xpPathExpr( e ) :
+ return ( 'path' , ) + tuple( e )
+ def xpPathExprAbs( e ) :
+ head , rest = e
+ if head == '/' :
+ head = ( '/' , )
+ elif head == '//' :
+ head = ( '/' ,
+ ( 'descendant-or-self' , ( 'node' , ) ) )
+ else :
+ raise NotReached
+ return ( 'path' , ) + head + tuple( rest )
+ pathExpr = \
+ WithSpaceAround( Either( Literal( '/' ).call( xpPathExprRoot ) ,
+ Sequence( Regex( '//|/' ) ,
+ relativePathExpr ).call( xpPathExprAbs ) ,
+ Wrapper( relativePathExpr ).call( xpPathExpr ) ) )
+
+ #
+ # [24] NodeComp
+ #
+ nodeComp = \
+ Regex( '|'.join( ( 'is' , '<<' , '>>' ) ) )
+
+ #
+ # [23] ValueComp
+ #
+ valueComp = \
+ Regex( '|'.join( ( 'eq' , 'ne' , 'lt' , 'le' , 'gt' , 'ge' ) ) )
+
+ #
+ # [22] GeneralComp
+ #
+ generalComp = \
+ Regex( '|'.join( ( '=' , '!=', '<=' , '<' , '>=' , '>' ) ) )
+
+ #
+ # [21] ValueExpr
+ #
+ valueExpr = pathExpr
+
+ #
+ # [20] UnaryExpr
+ #
+ def xpUnaryExpr( e ) :
+ neg = ( e[ 0 ].count( '-' ) % 2 )
+ if neg :
+ return ( 'neg' , e[ 1 ] )
+ else :
+ return e[ 1 ]
+ unaryExpr = \
+ Sequence( Regex( '[-+]*' ) , valueExpr ).call( xpUnaryExpr )
+
+ #
+ # [15] IntersectExceptExpr
+ #
+ def xpIntersectExceptExpr( e ) :
+ result , e = e[ 0 ] , e[ 1 : ]
+ while e :
+ result , e = ( e[ 0 ] , result , e[ 1 ] ) , e[ 2 : ]
+ return result
+ intersectExceptExpr = \
+ List( unaryExpr ,
+ Regex( 'intersect|except' ) ).call( xpIntersectExceptExpr )
+
+ #
+ # [14] UnionExpr
+ #
+ def xpUnionExpr( e ) :
+ if len( e ) == 1 :
+ return e[ 0 ]
+ r = ( 'union' , e[ 0 ] , e[ 1 ] )
+ for ee in e[ 2 : ] :
+ r = ( 'union' , r , ee )
+ return r
+ unionExpr = \
+ List( intersectExceptExpr ,
+ Regex( 'union|[|]' ).ignore() ).call( xpUnionExpr )
+
+ #
+ # [13] MultiplicativeExpr
+ #
+ def xpMultiplicativeExpr( e ) :
+ r , rest = e[ 0 ] , e[ 1 : ]
+ if rest and r == ( 'path' , '/' ) :
+ raise Reject
+ while rest :
+ r , rest = ( rest[ 0 ] , r , rest[ 1 ] ) , rest[ 2 : ]
+ return r
+ multiplicativeExpr = \
+ List( unionExpr ,
+ Regex( '[*]|div|idiv|mod' ) ).call( xpMultiplicativeExpr )
+
+ #
+ # [12] AdditiveExpr
+ #
+ def xpAdditiveExpr( e ) :
+ r , rest = e[ 0 ] , e[ 1 : ]
+ while rest :
+ r , rest = ( rest[ 0 ] , r , rest[ 1 ] ) , rest[ 2 : ]
+ return r
+ additiveExpr = \
+ List( multiplicativeExpr ,
+ Regex( '[+-]' ) ).call( xpAdditiveExpr )
+
+ #
+ # [11] RangeExpr
+ #
+ def xpRangeExpr( e ) :
+ if not e[ 1 ] :
+ return e[ 0 ]
+ else :
+ return ( 'range' , e[ 0 ] , e[ 1 ][ 0 ][ 1 ] )
+ rangeExpr = \
+ Sequence( additiveExpr ,
+ Optional( Sequence( space , 'to' , space , additiveExpr ) ) ).call( xpRangeExpr )
+
+ #
+ # [10] ComparisonExpr
+ #
+ def xpComparisonExpr( e ) :
+ if not e[ 1 ] :
+ return e[ 0 ]
+ else :
+ return ( e[ 1 ][ 0 ][ 0 ] , e[ 0 ] , e[ 1 ][ 0 ][ 1 ] )
+ comparisonExpr = \
+ Sequence( rangeExpr ,
+ Optional( Sequence( Either( valueComp , generalComp , nodeComp ) ,
+ rangeExpr ) ) ).call( xpComparisonExpr )
+
+ #
+ # [9] AndExpr
+ #
+ def xpAndExpr( e ) :
+ return reduce( lambda a , b : ( 'and' , b , a ) , reversed( e ) )
+ andExpr = \
+ List( comparisonExpr ,
+ Literal( 'and' ).ignore() ).call( xpAndExpr )
+
+ #
+ # [8] OrExpr
+ #
+ def xpOrExpr( e ) :
+ return reduce( lambda a , b : ( 'or' , b , a ) , reversed( e ) )
+ orExpr = \
+ List( andExpr ,
+ Literal( 'or' ).ignore() ).call( xpOrExpr )
+
+ #
+ # [7] IfExpr
+ #
+ def xpIfExpr( e ) :
+ return ( 'if' , e[ 2 ] , e[ 5 ] , e[ 7 ] )
+ ifExpr = \
+ WithSpaceAround( Sequence( 'if' , space , '(' , expr , ')' , space ,
+ 'then' , exprSingle ,
+ 'else' , exprSingle ) ).call( xpIfExpr )
+
+ #
+ # [6] QuantifiedExpr
+ #
+ def xpQuantifiedExprClause( e ) :
+ return ( e[ 1 ] , e[ 3 ] )
+ def xpQuantifiedExpr( e ) :
+ return ( e[ 0 ] , tuple( e[ 1 ] ) , e[ 3 ] )
+ quantifiedExpr = \
+ Sequence( Either( 'some' , 'every' ) , space ,
+ List( Sequence( '$' , varName , space , 'in' , space , exprSingle ).call( xpQuantifiedExprClause ) ,
+ WithSpaceAround( ',' ).ignore() ) ,
+ 'satisfies' , exprSingle ).call( xpQuantifiedExpr )
+
+ #
+ # [5] SimpleForClause
+ #
+ def xpSimpleForClauseItem( e ) :
+ return ( e[ 1 ] , e[ 3 ] )
+ def xpSimpleForClause( e ) :
+ return tuple( e[ 1 ] )
+ simpleForClause = \
+ Sequence( 'for' , space ,
+ List( Sequence( '$' , varName , space , 'in' , space , exprSingle ).call( xpSimpleForClauseItem ) ,
+ WithSpaceAround( Literal( ',' ) ).ignore() ) ).call( xpSimpleForClause )
+
+
+ #
+ # [4] ForExpr
+ #
+ def xpForExpr( e ) :
+ return ( 'for' , e[ 0 ] , e[ 2 ] )
+ forExpr = \
+ Sequence( simpleForClause , space , 'return' , space , exprSingle ).call( xpForExpr )
+
+ #
+ # [3] ExprSingle
+ #
+ exprSingle << Either( forExpr , quantifiedExpr , ifExpr , orExpr )
+
+ #
+ # [2] Expr
+ #
+ def xpExpr( e ) :
+ return ( 'exprlist' , ) + tuple( e )
+ expr << List( exprSingle ,
+ Literal( ',' ).ignore() ).call( xpExpr )
+
+ #
+ # [1] XPath
+ #
+ XPath = Sequence( expr , EOS ).call( first )
+
+ #parser._debug = True
+
+ return XPath.parse
+
+parser = xpath()
+
+def parse( s ) :
+
+ t = time.time()
+ r = parser( s )
+ duration = time.time() - t
+ #print 'DEBUG: Parsed %r in %.2gs' % ( s , duration )
+ return r
+
+# Local Variables:
+# tab-width: 4
+# python-indent: 4
+# End: