5 # Copyright (C) 2005 Frédéric Jolliton <frederic@jolliton.com>
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
28 from xpath import XPath
30 from sequence import Sequence
31 from nodes import Node, Document
32 from error import Error
33 from parser import NoMatch
34 from sequence_misc import printSequence
37 from xpath_misc import lispy
39 g_defaultUserAgent = 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)'
41 def printInlineSequence( sequence ) :
43 for item in sequence :
44 if isinstance( item , Node ) :
45 item.serialize( file = sys.stdout )
52 txt = urllib.urlopen( uri ).read()
53 return htmltree.parse( txt )
55 def resetUserAgent( env ) :
60 if isinstance( ua , basestring ) :
61 urllib.URLopener.version = ua
63 def extSerialize( context ) :
65 return Sequence( context.item.serialize() )
67 def fnParse( context , text ) :
69 return Sequence( htmltree.parse( text[ 0 ] ) )
71 def fnDoc( context , uri , _cache = {} ) :
74 if uri not in _cache :
79 print 'Error reading URI %s' % uri
81 _cache[ uri ] = Sequence( doc )
84 def evaluate( expr , dot , env , functions ) :
87 # Compile the expression
94 # Evaluate the compiled expression
97 r = x.eval( dot , env , functions )
102 def printResult( sequence , mode , displayLocation ) :
104 if mode == 'inline' :
105 printInlineSequence( sequence )
106 elif mode == 'full' :
107 printSequence( sequence , True , displayLocation )
108 elif mode == 'short' :
109 printSequence( sequence , False , displayLocation )
110 else : # assuming default mode
111 if len( sequence ) == 0 :
113 elif len( sequence ) == 1 :
119 <expression> Evaluate XPath expression.
120 $var := <expression> Evaluate XPath expression and store result in 'var'.
124 \. URI Load document from URI and set it as the current node.
128 \e EXPRESSION Display XPath parser result
131 \l Toggle location display in full and short mode.
132 \o Switch optimization on/off.
134 \v Print name of available variables.
135 \x Switch timer on/off.
143 reDef = re.compile( r'^\s*\$([a-z_][a-z_0-9-]*)\s*:=\s*(.*)$' , re.I )
158 'version' : Sequence( 'TX' , '0.1' ) ,
159 'ua' : Sequence( g_defaultUserAgent )
165 'serialize' : extSerialize
168 print 'XPath TX 0.1 - (c)2005 Frederic Jolliton <frederic@jolliton.com>\n'
169 print 'Use \? for help.\n'
171 displayLocation = False
175 line = raw_input( 'XPath2.0> ' )
179 except KeyboardInterrupt :
183 line = line.decode( sys.stdin.encoding )
185 print 'Error decoding input with encoding %r. Using raw input.' % ( sys.stdin.encoding , )
187 if line.startswith( '#' ) or not line :
189 elif line.startswith( '\\' ) :
190 resetUserAgent( env )
195 elif cmd.startswith( '. ' ) :
196 uri = cmd[ 2 : ].strip()
198 env[ 'current' ] = readDoc( uri )
200 print 'Error reading URI %r' % uri
201 elif cmd.startswith( 'e ' ) :
203 print lispy( xpathparser.parse( cmd[ 1 : ].strip() ) )
209 print 'Cache flushed'
212 xpath.g_dontOptimize = not xpath.g_dontOptimize
213 print 'Optimization turned' , ('off','on')[ not xpath.g_dontOptimize ]
215 displayLocation = not displayLocation
216 print 'Location' , ('off','on')[ displayLocation ]
218 showTime = not showTime
219 print 'Timer' , ('off','on')[ showTime ]
221 print '$' + ', $'.join( sorted( env.keys() ) )
225 print 'Current mode is %r' % mode
226 print 'Location is' , ('off','on')[ displayLocation ]
227 print 'Timer is' , ('off','on')[ showTime ]
229 print 'Unknown command %r' % cmd
231 r = reDef.match( line )
233 varName , line = r.groups()
236 resetUserAgent( env )
238 dot = env[ 'current' ]
239 if isinstance( dot , Sequence ) :
240 assert len( dot ) == 1 , 'expected a sequence of 1 item'
243 result , dp , de = evaluate( line , dot , env , functions )
244 except KeyboardInterrupt :
245 print '[Interrupted]'
247 if varName is not None :
248 env[ varName ] = result
251 printResult( result , mode , displayLocation )
252 except KeyboardInterrupt : # FIXME: Don't work.
253 print '[Interrupted]'
255 print '-- %fs(parse) + %fs(eval) --' % ( dp , de )
256 except KeyboardInterrupt :
260 except XPathError , e :
261 print 'XPATH-ERROR [%s]' % ( e , )
263 print 'GENERIC-ERROR [%s]' % ( e , )
266 if __name__ == '__main__' :