5 # Copyright (C) 2005 Frédéric Jolliton <frederic@jolliton.com>
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
28 from xpath import XPath
30 from sequence import Sequence
31 from nodes import Node, Document
32 from error import Error
33 from parser import NoMatch
34 from sequence_misc import printSequence
37 from xpath_misc import lispy
39 g_defaultUserAgent = 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)'
41 def printInlineSequence( sequence ) :
43 for item in sequence :
44 if isinstance( item , Node ) :
45 item.serialize( file = sys.stdout )
52 txt = urllib.urlopen( uri ).read()
53 return htmltree.parse( txt )
55 def resetUserAgent( env ) :
60 if isinstance( ua , basestring ) :
61 urllib.URLopener.version = ua
63 def extSerialize( context ) :
65 return Sequence( context.item.serialize() )
67 def extParse( context , text ) :
69 return Sequence( htmltree.parse( text[ 0 ] ) )
71 def fnDoc( context , uri , _cache = {} ) :
74 if uri not in _cache :
79 print 'Error reading URI %s' % uri
81 _cache[ uri ] = Sequence( doc )
84 def extSort( context , arg ) :
86 return Sequence( sorted( arg ) )
88 def evaluate( expr , dot , env , functions ) :
91 # Compile the expression
98 # Evaluate the compiled expression
101 r = x.eval( dot , env , functions )
106 def printResult( sequence , mode , displayLocation ) :
108 if mode == 'inline' :
109 printInlineSequence( sequence )
110 elif mode == 'full' :
111 printSequence( sequence , True , displayLocation )
112 elif mode == 'short' :
113 printSequence( sequence , False , displayLocation )
114 else : # assuming default mode
115 if len( sequence ) == 0 :
117 elif len( sequence ) == 1 :
123 <expression> Evaluate XPath expression.
124 $var := <expression> Evaluate XPath expression and store result in 'var'.
128 \. URI Load document from URI and set it as the current node.
132 \e EXPRESSION Display XPath parser result
135 \l Toggle location display in full and short mode.
136 \o Switch optimization on/off.
138 \v Print name of available variables.
139 \x Switch timer on/off.
147 reDef = re.compile( r'^\s*\$([a-z_][a-z_0-9-]*)\s*:=\s*(.*)$' , re.I )
162 'version' : Sequence( 'TX' , '0.1' ) ,
163 'ua' : Sequence( g_defaultUserAgent )
169 'serialize' : extSerialize ,
173 print 'XPath TX 0.1 - (c)2005 Frederic Jolliton <frederic@jolliton.com>\n'
174 print 'Use \? for help.\n'
176 displayLocation = False
180 line = raw_input( 'XPath2.0> ' )
184 except KeyboardInterrupt :
188 line = line.decode( sys.stdin.encoding )
190 print 'Error decoding input with encoding %r. Using raw input.' % ( sys.stdin.encoding , )
192 if line.startswith( '#' ) or not line :
194 elif line.startswith( '\\' ) :
195 resetUserAgent( env )
200 elif cmd.startswith( '. ' ) :
201 uri = cmd[ 2 : ].strip()
203 env[ 'current' ] = readDoc( uri )
205 print 'Error reading URI %r' % uri
206 elif cmd.startswith( 'e ' ) :
208 print lispy( xpathparser.parse( cmd[ 1 : ].strip() ) )
214 print 'Cache flushed'
217 xpath.g_dontOptimize = not xpath.g_dontOptimize
218 print 'Optimization turned' , ('off','on')[ not xpath.g_dontOptimize ]
220 displayLocation = not displayLocation
221 print 'Location' , ('off','on')[ displayLocation ]
223 showTime = not showTime
224 print 'Timer' , ('off','on')[ showTime ]
226 print '$' + ', $'.join( sorted( env.keys() ) )
230 print 'Current mode is %r' % mode
231 print 'Location is' , ('off','on')[ displayLocation ]
232 print 'Timer is' , ('off','on')[ showTime ]
234 print 'Unknown command %r' % cmd
236 r = reDef.match( line )
238 varName , line = r.groups()
241 resetUserAgent( env )
243 dot = env[ 'current' ]
244 if isinstance( dot , Sequence ) :
245 assert len( dot ) == 1 , 'expected a sequence of 1 item'
248 result , dp , de = evaluate( line , dot , env , functions )
249 except KeyboardInterrupt :
250 print '[Interrupted]'
253 if varName is not None :
254 env[ varName ] = result
257 printResult( result , mode , displayLocation )
258 except KeyboardInterrupt : # FIXME: Don't work.
259 print '[Interrupted]'
261 print '-- %fs(parse) + %fs(eval) --' % ( dp , de )
262 except KeyboardInterrupt :
266 except XPathError , e :
267 print 'XPATH-ERROR [%s]' % ( e , )
269 print 'GENERIC-ERROR [%s]' % ( e , )
272 if __name__ == '__main__' :