--- /dev/null
+# -*- coding: iso-8859-1 -*-
+
+#
+# A quick made parser for mail.filter configuration file.
+#
+# The parser could be interesting for some other uses though.
+#
+
+import re
+import types
+
+#-----------------------------------------------------------------------------
+
+class ParserError( Exception ) : pass
+
+class Parser :
+
+ def __init__( self , tokens , text ) :
+
+ self.tokenMatches = {}
+ for k , v in tokens.items() :
+ if type( v ) in types.StringTypes :
+ v = re.compile( v )
+ self.tokenMatches[ k ] = v
+ self.text = text
+ self.pos = 0
+ self.tokensToIgnore = ()
+ self.x = 1
+ self.y = 1
+
+ def ignore( self , *tokens ) :
+
+ self.tokensToIgnore = tokens
+
+ def peek( self , *set ) :
+
+ tokens = []
+ if not set :
+ set = self.tokenMatches.keys()
+ for tokenName in set :
+ tokenMatch = self.tokenMatches.get( tokenName )
+ if not tokenMatch : continue
+ m = tokenMatch.match( self.text , self.pos )
+ if m :
+ tk = m.group( 0 )
+ tokens.append( ( tokenName , tk ) )
+ tokens.sort( lambda a , b : cmp( len( a[ 1 ] ) , len( b[ 1 ] ) ) )
+ return tokens
+
+ def advance( self , n ) :
+
+ if type( n ) in types.StringTypes :
+ n = len( n )
+ p = self.text.rfind( '\n' , self.pos , self.pos + n )
+ if p == -1 :
+ self.x += n
+ else :
+ self.x = self.pos + n - p
+ self.y += self.text.count( '\n' , self.pos , self.pos + n )
+ self.pos += n
+
+ def snext( self , *set ) :
+
+ set += self.tokensToIgnore
+ r = None
+ while 1 :
+ r = self.peek( *set )
+ if not r :
+ break
+ r = r[ 0 ]
+ self.advance( r[ 1 ] )
+ if r[ 0 ] not in self.tokensToIgnore :
+ break
+ return r
+
+ def next( self , *set ) :
+
+ r = self.snext( *set )
+ if not r :
+ r = self.peek()
+ if r :
+ misc = 'found %r but ' % r[ 0 ][ 1 ]
+ else :
+ misc = ''
+ raise ParserError( '%s, %sexpected one of the following tokens: %r'
+ % ( self.getPos() , misc , list( set ) ) )
+ return r
+
+ def getPos( self ) :
+
+ return 'at line %d, column %d' % ( self.y , self.x )
+
+ def point( self ) :
+
+ lineStart = self.text.rfind( '\n' , 0 , self.pos + 1 ) + 1
+ lineEnd = self.text.find( '\n' , self.pos + 1 )
+ if lineEnd == -1 :
+ lineEnd = len( self.text )
+ prefix = 'line %s: ' % self.y
+ r = ''
+ r += prefix + self.text[ lineStart : lineEnd ].replace( '\t' , ' ' ) + '\n'
+ r += prefix + ' ' * ( self.pos - lineStart ) + '^' + '\n'
+ return r
# -*- coding: iso-8859-1 -*-
+import basicparser
+import basicvalidator
+
import re
+import types
import sys
+import os
+import stat
+
+try :
+ import cPickle as pickle
+except :
+ import pickle
-__all__ = [ 'parse' , 'printTree' ]
+class Error( Exception ) : pass
#
-# expr := <item> <item>* { <expr>* } | <item> <item>* ';'
-# item := [_a-zA-Z][0-9a-zA-Z]* | '...' | "..."
+# Configuration parser check *syntax*.
+# Validator check *structure*, and eventually some values.
#
+#--[ Parser ]-----------------------------------------------------------------
+
+def parseString( s ) :
+
+ if s.startswith( "'" ) :
+ return s[ 1 : -1 ]
+ elif s.startswith( '"' ) :
+ return s[ 1 : -1 ]
+ else :
+ return s
+
+def parseConf( p , meta = None ) :
+
+ def parseNode() :
+
+ x , y = p.x , p.y
+ #
+ # Node name
+ #
+ t = p.next( 'keyword' )
+ kw = t[ 1 ]
+ #
+ # Values
+ #
+ values = []
+ while 1 :
+ t = p.next( 'string' , '{' , ';' )
+ if t[ 0 ] in [ '{' , ';' ] : break
+ values.append( parseString( t[ 1 ] ) )
+ #
+ # Contents
+ #
+ subNodes = []
+ if t[ 0 ] == '{' :
+ subNodes = []
+ while not p.snext( '}' ) :
+ r = parseNode()
+ subNodes.append( r )
+ return ( kw , values , subNodes , ( y , x , meta ) )
+
+ nodes = []
+ #
+ # Parse the entire file
+ #
+ while not p.snext( 'eot' ) :
+ r = parseNode()
+ if not r : break
+ nodes.append( r )
+ return ('__root__',None,nodes,None)
+
+def parse( doc , filename = None ) :
+
+ tokenMatches = {
+ 'eot' : '$' ,
+ 'blank' : r'\s+' ,
+ 'keyword' : r'[_a-zA-Z][_a-zA-Z0-9]+' ,
+ 'string' : r'[_a-zA-Z][_a-zA-Z0-9]+|\'(?:[^\\\']|\\.)*\'' ,
+ 'comment' : r'#[^\n]*(?:\n|$)' ,
+ '{' : '{' ,
+ '}' : '}' ,
+ ';' : ';'
+ }
+ p = basicparser.Parser( tokenMatches , doc )
+ p.ignore( 'blank' , 'comment' )
+ try :
+ return parseConf( p , filename )
+ except basicparser.ParserError , e :
+ msg = p.point()
+ msg += str( e )
+ raise Error( msg )
+
+#--[ Validator ]--------------------------------------------------------------
+
#
-# >>> confparser.printTree( confparser.parse( 'foo; bar;\nc; sub { sub-foo; sub-bar; }' ) )
-# foo ; # 1:1
-# bar ; # 1:6
-# c ; # 2:1
-# sub { # 2:4
-# sub-foo ; # 2:10
-# sub-bar ; # 2:19
-# }
-# >>>
+# Check mail.filter configuration file structure.
#
-# FIXME: Build "preparsed" file ?
+from basicvalidator import *
+
+#-----------------------------------------------------------------------------
+
+class Keyword( Validator ) : pass
+
+class Header( Validator ) :
+
+ allowedMatches = [ 'is' , 'contains' , 'match' ]
+
+ def check( self , values ) :
+
+ if len( values ) != 3 :
+ error( 'header expect 3 arguments: HEADER-NAME MATCH-TYPE MATCH-ARGUMENT.' )
+ elif values[ 1 ] not in self.allowedMatches :
+ error( '%r is not an allowed match type. Allowed matches type are: %r'
+ % ( values[ 1 ] , self.allowedMatches ) )
+
+#-----------------------------------------------------------------------------
+
+class Logical( Validator , MixinNonEmpty ) :
+
+ def descend( self , item ) :
+
+ self.children += 1
+ return ruleValidator( item )
+
+class Reject( Validator , MixinNonEmpty ) :
+
+ def descend( self , item ) :
+
+ self.children += 1
+ return ruleValidator( item )
+
+ def check( self , values ) :
+
+ if len( values ) != 1 :
+ error( 'reject CODE { .. }' )
+
+class Folder( Validator , MixinNonEmpty ) :
-# TODO: Rewrite all of this more clearly
+ def descend( self , item ) :
-def advancePosition( text , x , y ) :
+ self.children += 1
+ return ruleValidator( item )
- p = text.rfind( '\n' )
- if p == -1 :
- x += len( text )
+ def check( self , values ) :
+
+ if len( values ) != 1 :
+ error( 'folder FOLDER-NAME { .. }' )
+
+#-----------------------------------------------------------------------------
+
+def ruleValidator( item ) :
+
+ if item in [ 'broken' , 'infected' , 'spam' ] :
+ return Keyword( item )
+ elif item in [ 'or' , 'and' , 'not' ] :
+ return Logical( item )
+ elif item == 'header' :
+ return Header( item )
else :
- y += text.count( '\n' )
- x = len( text ) - ( p + 1 ) + 1
- return x , y
-
-unslashMap = {
- 'n' : '\n' ,
- 't' : '\t'
-}
-
-def unslash( s ) :
-
- pos = 0
- while 1 :
- p = s.find( '\\' , pos )
- if p == -1 or p == len( s ) - 1 :
- break
- c = s[ p + 1 ]
- c = unslashMap.get( c , c )
- s = s[ : p ] + c + s[ p + 2 : ]
- pos = p + 2
- return s
+ error( 'unexpected keyword %r.' % item )
+
+#-----------------------------------------------------------------------------
+
+class Root( Validator ) :
+
+ def descend( self , item ) :
+
+ if item == 'reject' :
+ return Reject( item )
+ elif item == 'folder' :
+ return Folder( item )
+ else :
+ error( 'unexpected keyword %r.' % item )
+
+ def values( self , values ) :
+
+ raise Exception( 'Internal error' )
+
+#--[ Read&Write configuration ]-----------------------------------------------
+
+def changedDate( filename ) :
+
+ try :
+ return os.stat( filename )[ stat.ST_CTIME ]
+ except :
+ return
#
-# instruction-end: ;
-# begin-block: {
-# end-block: }
-# keyword: [a-z0-9\-_]+
-# string: \'(?:\\\'|[^\\\'])+\'
-# string: "(?:\\\"|[^\\"])+"
-# comment: #[^\n]*
+# Return None | ( tree , isValid )
#
-class Tokeniser :
+def readCachedConfiguration( filename ) :
+ cachedFilename = filename + '.cache'
+ #
+ # Check if cached file is older than the source.
+ #
+ dateCached = changedDate( cachedFilename )
+ if not dateCached : return
+ dateSource = changedDate( filename )
+ if not dateSource : return
+ if dateCached <= dateSource : return
#
- # FIXME: Instead of truncating self.str, keep a position ?
#
- def __init__( self , str ) :
-
- self.str = str
- self.lineNumber = 1
- self.colNumber = 1
- self.reBlank = re.compile( r'^\s*' )
- self.reParser = re.compile( '^'
- '('
- ';' # end-of-statement
- '|'
- r'\{' # start-of-block
- '|'
- r'\}' # end-of-block
- '|'
- r'(?:[-_.a-z0-9]+|\*)' # identifier
- '|'
- r"'[^']*'" # quoted string
- '|'
- r"'''.+?'''" # quoted string
- '|'
- r'"(?:\\"|[^"])*"' # quoted string
- '|'
- r'#[^\n]*' # comment
- ')' ,
- re.I|re.S )
-
- def next( self , __empty = [ None , None , None ] ) :
-
- r = self.reBlank.search( self.str )
- if r != None :
- blank = r.group( 0 )
- self.colNumber , self.lineNumber = advancePosition( blank , self.colNumber , self.lineNumber )
- self.str = self.str[ r.end( 0 ) : ]
-
- if self.str == '' : return __empty
-
- # Match the next token
- r = self.reParser.search( self.str )
- if r == None : return [ False , self.lineNumber , self.colNumber ]
-
- # Remove parsed text from the buffer
- self.str = self.str[ r.end( 0 ) : ]
-
- token = r.group( 0 )
-
- # Keep current position
- tokenLine = self.lineNumber
- tokenColumn = self.colNumber
-
- # Advance position after token
- self.colNumber , self.lineNumber = advancePosition( token , self.colNumber , self.lineNumber )
-
- # Return the token and its position
- return token , tokenLine , tokenColumn
+ #
+ try :
+ r = pickle.load( open( cachedFilename ) )
+ except :
+ return
+ return r
-#
-# Parse configuration
-#
-def parse( str , relax = False , warn = False , meta = None ) :
-
- stack = [ ( 'root' , [] , [] , ( None , None , meta ) ) ]
- cmd = None
- newElement = True
- tok = Tokeniser( str )
- lastLine , lastColumn = 0 , 0
- while 1 :
- item , line , column = tok.next()
- if item == None : break
- if item == False :
- raise Exception( 'Syntax error at line %s, column %s' % ( line , column ) )
- lastLine = line
- lastColumn = column
- if item.startswith( '#' ) : continue
- if relax :
- if column == 1 and len( stack ) > 1 and item != '}' :
- while len( stack ) > 1 :
- cmd = stack[ -1 ]
- stack = stack[ : -1 ]
- if cmd[ 0 ] != 'discard' :
- stack[ -1 ][ 2 ].append( cmd )
- newElement = True
- print '** Error recovered before line %s (missing `}\' ?)' % line
- if item == '}' :
- if not newElement and cmd != None :
- raise Exception( 'Missing semicolon before line %s, column %s' % ( line , column ) )
- cmd = stack[ -1 ]
- stack = stack[ : -1 ]
- if len( stack ) == 0 :
- raise Exception( 'Unexpected } at line %s, column %s' % ( line , column ) )
- if cmd[ 0 ] != 'discard' :
- stack[ -1 ][ 2 ].append( cmd )
- newElement = True
- elif newElement :
- if item in [ ';' , '{' , '}' ] :
- raise Exception( 'Unexpected token `%s\' at line %s, column %s' % ( item , line , column ) )
- elif item.find( '\n' ) != -1 :
- raise Exception( 'Unexpected newline character at line %s, column %s' % ( line , column + item.find( '\n' ) ) )
- cmd = ( item , [] , [] , ( line , column , meta ) )
- newElement = False
- elif item == ';' :
- stack[ -1 ][ 2 ].append( cmd )
- cmd = None
- newElement = True
- elif item == '{' :
- stack.append( cmd )
- newElement = True
+def writeCachedConfiguration( filename , tree , isValid ) :
+
+ try :
+ pickle.dump( ( tree , isValid ) , open( filename + '.cache' , 'w' ) )
+ except :
+ pass
+
+def readConfiguration( filename ) :
+
+ try :
+ #
+ # 1. Read from cache file
+ #
+ r = readCachedConfiguration( filename )
+ cached = False
+ if r :
+ conf , isValid = r
+ cached = True
else :
- if item.startswith( "'''" ) :
- item = item[ 3 : -3 ]
- elif item.startswith( '"' ) :
- item = unslash( item[ 1 : -1 ] )
- elif item.startswith( "'" ) :
- item = item[ 1 : -1 ]
- if item.find( '\n' ) != -1 :
- print '** Warning: string with newline character(s)'
- cmd[ 1 ].append( item )
- if len( stack ) != 1 or not newElement :
- raise Exception( 'Unexpected end of file (last token was at line %s, column %s)' % ( lastLine , lastColumn ) )
- return stack[ -1 ]
+ isValid = False
+ #
+ # 2. Parse the file
+ #
+ conf = open( filename ).read()
+ conf = parse( conf , filename )
+ if not isValid :
+ #
+ # 3. Validate it
+ #
+ basicvalidator.checkConf( conf , Root )
+ #
+ # 4. Keep cached result
+ #
+ writeCachedConfiguration( filename , conf , isValid )
+ except Exception , e :
+ raise Exception( 'While reading file %s:\n%s' % ( filename , str( e ) ) )
+
+#--[ Dump configuration tree ]------------------------------------------------
-#
-# Helper function to dump configuration
-#
def printTreeInner( t , prt = sys.stdout.write , prefix = '' ) :
prt( prefix )
prt( t[ 0 ] )
for kw in t[ 1 ] :
prt( ' ' + kw )
- if len( t[ 2 ] ) > 0 :
+ if t[ 2 ] :
prt( ' {' )
else :
prt( ' ;' )
- if True :
+ if t[ 3 ] :
prt( ' # ' )
if t[ 3 ][ 2 ] :
- prt( '%r:' % t[ 3 ][ 2 ] )
+ prt( '%s:' % t[ 3 ][ 2 ] )
prt( '%s:%s' % ( t[ 3 ][ 0 ] , t[ 3 ][ 1 ] ) )
prt( '\n' )
- if len( t[ 2 ] ) > 0 :
+ if t[ 2 ] :
for sub in t[ 2 ] :
printTreeInner( sub , prt , prefix + ' ' )
prt( prefix )
prt( '}\n' )
-#
-# Dump configuration
-#
def printTree( t ) :
- if len( t[ 2 ] ) > 0 :
- for sub in t[ 2 ] :
- printTreeInner( sub )
+ for sub in t[ 2 ] or [] :
+ printTreeInner( sub )
+
+def main() :
+
+ doc = open( 'fred.mf' ).read()
+ printTree( parse( doc , 'fred.mf' ) )
+
+if __name__ == '__main__' :
+ main()
+++ /dev/null
-from validator import *
-
-#
-# Configuration parser check *syntax*.
-# Validator check *structure*, and eventually some values.
-#
-
-#-----------------------------------------------------------------------------
-
-class Keyword( Validator ) : pass
-
-class Header( Validator ) :
-
- allowedMatches = [ 'is' , 'contains' , 'match' ]
-
- def check( self , values ) :
-
- if len( values ) != 3 :
- error( 'header HEADER-NAME MATCH-TYPE MATCH-ARGUMENT ;' )
- elif values[ 1 ] not in self.allowedMatches :
- error( 'Allowed matches type in header rule are: %r' % self.allowedMatches )
-
-#-----------------------------------------------------------------------------
-
-class Logical( Validator , MixinNonEmpty ) :
-
- def descend( self , item ) :
-
- self.children += 1
- return ruleValidator( item )
-
-class Reject( Validator , MixinNonEmpty ) :
-
- def descend( self , item ) :
-
- self.children += 1
- return ruleValidator( item )
-
- def check( self , values ) :
-
- if len( values ) != 1 :
- error( 'reject CODE { .. }' )
-
-class Folder( Validator , MixinNonEmpty ) :
-
- def descend( self , item ) :
-
- self.children += 1
- return ruleValidator( item )
-
- def check( self , values ) :
-
- if len( values ) != 1 :
- error( 'folder FOLDER-NAME { .. }' )
-
-#-----------------------------------------------------------------------------
-
-def ruleValidator( item ) :
-
- if item in [ 'broken' , 'infected' , 'spam' ] :
- return Keyword( item )
- elif item in [ 'or' , 'and' , 'not' ] :
- return Logical( item )
- elif item == 'header' :
- return Header( item )
- else :
- error( 'Invalid keyword `%r\'.' % item )
-
-#-----------------------------------------------------------------------------
-
-class Root( Validator ) :
-
- def descend( self , item ) :
-
- if item == 'reject' :
- return Reject( item )
- elif item == 'folder' :
- return Folder( item )
- else :
- error( 'Invalid keyword `%r\'.' % item )
-
- def values( self , values ) :
-
- raise Exception( 'Internal error' )
-
-#-----------------------------------------------------------------------------
-
-def checkConf( confNode ) :
-
- checkConf.lastNode = confNode
-
- def _checkConf( confNode , syntaxNode ) :
-
- checkConf.lastNode = confNode
- name , values , contents , meta = confNode
- r = syntaxNode.descend( name )
- r.check( values )
- for item in contents :
- _checkConf( item , r )
- r.valid()
-
- name , values , contents , info = confNode
- root = Root( '__root__' )
- try :
- for item in contents :
- _checkConf( item , root )
- except ValidatorError , e :
- return e , checkConf.lastNode
-
-def checkFile( filename ) :
-
- import confparser
-
- doc = open( filename ).read()
- doc = confparser.parse( doc )
- r = checkConf( doc )
- print '%s:' % filename ,
- if r :
- exception , node = r
- meta = node[ 3 ]
- msg = 'at line %s, column %s' % ( meta[ 0 ] , meta[ 1 ] )
- if meta[ 2 ] :
- msg = 'in file %s, ' % meta[ 2 ] + msg
- print '%s: %s' % ( msg , exception )
- else :
- print 'ok'