4 # Copyright (C) 2005 Frédéric Jolliton <frederic@jolliton.com>
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
28 first = lambda l : l[ 0 ]
29 second = lambda l : l[ 1 ]
39 print 'DEBUG[%s] %s' % ( name , s )
44 return lst[ : 1 ] + lst[ 1 ]
46 def constantly( item ) :
48 return lambda o : item
52 return lambda o : ( name , o )
54 def headIs( seq , o ) :
56 return isinstance( seq , ( tuple , list ) ) and seq[ 0 ] == o
60 textInsideComment = Regex( r'(?:[^:(]|:[^)]|\([^:])*' )
62 comment << Sequence( '(:' , List( textInsideComment , comment ) , ':)' )
64 space = List( Regex( '\s*' ) , comment ).ignore()
66 reservedFunctionName = [
74 'processing-instruction' ,
87 def WithSpaceAround( expr ) :
88 return Wrapper( Sequence( space , expr , space ).call( first ) )
90 xpString = lambda o : ( 'STRING' , o[ 1 : -1 ].replace( '""' , '"' ) )
92 rtNcname = '(?:[a-z_][-a-z_0-9]*)'
93 rtQname = '(?:(?:%s:)?%s)' % ( rtNcname , rtNcname )
98 qname = Regex( rtQname , re.I )
100 value = WithSpaceAround( qname )
103 exprSingle = Wrapper()
115 s = e[ 1 : -1 ].replace( '""' , '"' )
117 s = e[ 1 : -1 ].replace( "''" , "'" )
120 return ( 'string' , s )
122 Regex( '"(?:""|[^"])*"|\'(?:\'\'|[^\'])*\'' ).call( xpString )
128 Regex( r'(?:\.\d+|\d+(?:\.\d*)?)[eE][+-]?\d+' ).call( tag( 'double' ) )
131 # [71] DecimalLiteral
134 Regex( r'\.\d+|\d+\.\d*' ).call( tag( 'decimal' ) )
137 # [70] IntegerLiteral
140 Regex( '[-+]?\\d+' ).call( tag( 'integer' ) )
150 attributeName = value
153 # [64] ElementNameOrWildcard
155 elementNameOrWildcard = \
156 Either( elementName ,
157 WithSpaceAround( Literal( '*' ) ) )
162 def xpElementTest( e ) :
164 return ( 'element' , e[ 2 ][ 0 ] )
166 return ( 'element' , '*' )
168 Sequence( 'element' ,
172 Optional( elementNameOrWildcard ) ,
174 ')' ).call( xpElementTest )
177 # [60] AttribNameOrWildcard
179 attribNameOrWildcard = \
180 Either( attributeName ,
181 WithSpaceAround( Literal( '*' ) ) )
186 def xpAttributeTest( e ) :
188 return ( 'attribute' , e[ 2 ][ 0 ] )
190 return ( 'attribute' , )
192 Sequence( 'attribute' ,
196 Optional( attribNameOrWildcard ) ,
198 ')' ).call( xpAttributeTest )
204 Sequence( 'comment' ,
208 ')' ).call( constantly( ( 'comment' , ) ) )
218 ')' ).call( constantly( ( 'text' , ) ) )
224 Sequence( 'document-node' ,
228 ')' ).call( constantly( ( 'document' , ) ) )
238 ')' ).call( constantly( ( 'node' , ) ) )
244 Either( documentTest ,
254 def xpFunctionCall( e ) :
255 if e[ 0 ] in reservedFunctionName :
258 return ( 'call' , e[ 0 ] ) + tuple( e[ 2 ][ 0 ] )
260 return ( 'call' , e[ 0 ] )
266 Optional( List( exprSingle , Literal( ',' ).ignore() ) ) ,
268 ')' ).call( xpFunctionCall )
271 # [46] ContextItemExpr
277 # [45] ParenthesizedExpr
279 def xpParenthesizedExpr( e ) :
284 parenthesizedExpr = \
285 Sequence( '(' , Optional( expr ) , ')' ).call( second ).call( xpParenthesizedExpr )
291 return ( 'varref' , e[ 1 ] )
292 varRef = WithSpaceAround( Sequence( Literal( '$' ) , varName ) ).call( xpVarRef )
295 # [43] NumericLiteral
298 Either( integerLiteral , decimalLiteral , doubleLiteral )
304 Either( numericLiteral , stringLiteral )
319 predicate = Sequence( '[' , space , expr , space , ']' ).call( second )
325 ZeroOrMore( predicate )
330 def xpFilterExpr( e ) :
336 # Allowd by grammar, but don't seem allowed by other XPath implementation. Bah.
338 r = ( 'filter' , e ) + tuple( p )
341 Sequence( primaryExpr ,
342 predicateList ).call( xpFilterExpr )
347 wildcard = WithSpaceAround( '*' )
352 def xpNameTest( e ) :
354 return ( 'name' , '*' )
356 return ( 'name' , e )
359 wildcard ).call( xpNameTest )
369 # [34] AbbrevReverseStep
371 abbrevReverseStep = \
379 '|'.join( ( 'parent' ,
381 'preceding-sibling' ,
383 'ancestor-or-self' ) ) +
389 def xpReverseStepUnabbrev( e ) :
390 if headIs( e[ 1 ] , 'name' ) :
391 if e[ 0 ] == 'attribute' :
395 return ( e[ 0 ] , ( default , e[ 1 ][ 1 ] ) )
399 Either( Sequence( reverseAxis , nodeTest ).call( xpReverseStepUnabbrev ) ,
403 # [31] AbbrevForwardStep
405 def xpAbbrevForwardStep( e ) :
407 if headIs( e[ 1 ] , 'name' ) :
408 return ( 'attribute' , ( 'attribute' , e[ 1 ][ 1 ] ) )
410 return ( 'attribute' , e[ 1 ] )
412 if headIs( e[ 1 ] , 'attribute' ) :
413 return ( 'attribute' , e[ 1 ] )
414 elif headIs( e[ 1 ] , 'name' ) :
415 return ( 'child' , ( 'element' , e[ 1 ][ 1 ] ) )
417 return ( 'child' , e[ 1 ] )
418 abbrevForwardStep = \
419 Sequence( Optional( WithSpaceAround( '@' ) ) ,
420 nodeTest ).call( xpAbbrevForwardStep )
427 '|'.join( ( 'child' ,
431 'descendant-or-self' ,
432 'following-sibling' ,
439 def xpForwardStepUnabbrev( e ) :
440 if headIs( e[ 1 ] , 'name' ) :
441 if e[ 0 ] == 'attribute' :
442 default = 'attribute'
445 return ( e[ 0 ] , ( default , e[ 1 ][ 1 ] ) )
449 Either( Sequence( forwardAxis , nodeTest ).call( xpForwardStepUnabbrev ) ,
455 def xpAxisStep( e ) :
458 return ( 'parent' , ( 'node' , ) )
465 return ( 'predicates' , e[ 0 ] ) + tuple( e[ 1 ] )
467 Sequence( Either( forwardStep ,
469 predicateList ).call( xpAxisStep )
479 # [26] RelativePathExpr
481 def xpRelativePathExpr( e ) :
482 r , e = [ e[ 0 ] ] , e[ 1 : ]
485 r.append( ( 'descendant-or-self' , ( 'node' , ) ) )
491 Regex( '//|/' ) ).call( xpRelativePathExpr )
496 # FIXME: Simplify (path E) into E ?
498 def xpPathExprRoot( e ) :
499 return ( 'path' , '/' )
500 def xpPathExpr( e ) :
501 return ( 'path' , ) + tuple( e )
502 def xpPathExprAbs( e ) :
508 ( 'descendant-or-self' , ( 'node' , ) ) )
511 return ( 'path' , ) + head + tuple( rest )
513 WithSpaceAround( Either( Literal( '/' ).call( xpPathExprRoot ) ,
514 Sequence( Regex( '//|/' ) ,
515 relativePathExpr ).call( xpPathExprAbs ) ,
516 Wrapper( relativePathExpr ).call( xpPathExpr ) ) )
522 Regex( '|'.join( ( 'is' , '<<' , '>>' ) ) )
528 Regex( '|'.join( ( 'eq' , 'ne' , 'lt' , 'le' , 'gt' , 'ge' ) ) )
534 Regex( '|'.join( ( '=' , '!=', '<=' , '<' , '>=' , '>' ) ) )
544 def xpUnaryExpr( e ) :
546 neg = ( e[ 0 ].count( '-' ) % 2 )
548 return ( 'minus' , e[ 1 ] )
550 return ( 'plus' , e[ 1 ] )
554 Sequence( WithSpaceAround( Regex( '[-+]*' ) ) , valueExpr ).call( xpUnaryExpr )
557 # [15] IntersectExceptExpr
559 def xpIntersectExceptExpr( e ) :
560 result , e = e[ 0 ] , e[ 1 : ]
562 result , e = ( e[ 0 ] , result , e[ 1 ] ) , e[ 2 : ]
564 intersectExceptExpr = \
566 Regex( 'intersect|except' ) ).call( xpIntersectExceptExpr )
571 def xpUnionExpr( e ) :
574 r = ( 'union' , e[ 0 ] , e[ 1 ] )
576 r = ( 'union' , r , ee )
579 List( intersectExceptExpr ,
580 Regex( 'union|[|]' ).ignore() ).call( xpUnionExpr )
583 # [13] MultiplicativeExpr
585 def xpMultiplicativeExpr( e ) :
586 r , rest = e[ 0 ] , e[ 1 : ]
587 if rest and r == ( 'path' , '/' ) :
590 r , rest = ( rest[ 0 ] , r , rest[ 1 ] ) , rest[ 2 : ]
592 multiplicativeExpr = \
594 Regex( '[*]|div|idiv|mod' ) ).call( xpMultiplicativeExpr )
599 def xpAdditiveExpr( e ) :
600 r , rest = e[ 0 ] , e[ 1 : ]
602 r , rest = ( rest[ 0 ] , r , rest[ 1 ] ) , rest[ 2 : ]
605 List( multiplicativeExpr ,
606 Regex( '[+-]' ) ).call( xpAdditiveExpr )
611 def xpRangeExpr( e ) :
615 return ( 'range' , e[ 0 ] , e[ 1 ][ 0 ][ 1 ] )
617 Sequence( additiveExpr ,
618 Optional( Sequence( space , 'to' , space , additiveExpr ) ) ).call( xpRangeExpr )
621 # [10] ComparisonExpr
623 def xpComparisonExpr( e ) :
627 return ( e[ 1 ][ 0 ][ 0 ] , e[ 0 ] , e[ 1 ][ 0 ][ 1 ] )
629 Sequence( rangeExpr ,
630 Optional( Sequence( Either( valueComp , generalComp , nodeComp ) ,
631 rangeExpr ) ) ).call( xpComparisonExpr )
637 return reduce( lambda a , b : ( 'and' , b , a ) , reversed( e ) )
639 List( comparisonExpr ,
640 Literal( 'and' ).ignore() ).call( xpAndExpr )
646 return reduce( lambda a , b : ( 'or' , b , a ) , reversed( e ) )
649 Literal( 'or' ).ignore() ).call( xpOrExpr )
655 return ( 'if' , e[ 2 ] , e[ 5 ] , e[ 7 ] )
657 WithSpaceAround( Sequence( 'if' , space , '(' , expr , ')' , space ,
658 'then' , exprSingle ,
659 'else' , exprSingle ) ).call( xpIfExpr )
664 def xpQuantifiedExprClause( e ) :
665 return ( e[ 1 ] , e[ 3 ] )
666 def xpQuantifiedExpr( e ) :
667 return ( e[ 0 ] , tuple( e[ 1 ] ) , e[ 3 ] )
669 Sequence( Either( 'some' , 'every' ) , space ,
670 List( Sequence( '$' , varName , space , 'in' , space , exprSingle ).call( xpQuantifiedExprClause ) ,
671 WithSpaceAround( ',' ).ignore() ) ,
672 'satisfies' , exprSingle ).call( xpQuantifiedExpr )
675 # [5] SimpleForClause
677 def xpSimpleForClauseItem( e ) :
678 return ( e[ 1 ] , e[ 3 ] )
679 def xpSimpleForClause( e ) :
680 return tuple( e[ 1 ] )
682 Sequence( 'for' , space ,
683 List( Sequence( '$' , varName , space , 'in' , space , exprSingle ).call( xpSimpleForClauseItem ) ,
684 WithSpaceAround( Literal( ',' ) ).ignore() ) ).call( xpSimpleForClause )
691 return ( 'for' , e[ 0 ] , e[ 2 ] )
693 Sequence( simpleForClause , space , 'return' , space , exprSingle ).call( xpForExpr )
698 exprSingle << Either( forExpr , quantifiedExpr , ifExpr , orExpr )
704 return ( 'exprlist' , ) + tuple( e )
705 expr << List( exprSingle ,
706 Literal( ',' ).ignore() ).call( xpExpr )
711 XPath = Sequence( expr , EOS ).call( first )
713 #parser._debug = True
721 duration = time.time() - t
722 #print 'DEBUG: Parsed %r in %.2gs' % ( s , duration )