# -*- coding: iso-8859-1 -*-
+import basicparser
+import basicvalidator
+
import re
+import types
import sys
+import os
+import stat
-__all__ = [ 'parse' , 'printTree' ]
+try :
+ import cPickle as pickle
+except :
+ import pickle
-# FIXME: Build "preparsed" file ?
+class Error( Exception ) : pass
-# TODO: Rewrite all of this more clearly
+#
+# Configuration parser check *syntax*.
+# Validator check *structure*, and eventually some values.
+#
-def advancePosition( text , x , y ) :
+#--[ Parser ]-----------------------------------------------------------------
- p = text.rfind( '\n' )
- if p == -1 :
- x += len( text )
+def parseString( s ) :
+
+ if s.startswith( "'" ) :
+ return s[ 1 : -1 ]
+ elif s.startswith( '"' ) :
+ return s[ 1 : -1 ]
else :
- y += text.count( '\n' )
- x = len( text ) - ( p + 1 ) + 1
- return x , y
-
-unslashMap = {
- 'n' : '\n' ,
- 't' : '\t'
-}
-
-def unslash( s ) :
-
- pos = 0
- while 1 :
- p = s.find( '\\' , pos )
- if p == -1 or p == len( s ) - 1 :
- break
- c = s[ p + 1 ]
- c = unslashMap.get( c , c )
- s = s[ : p ] + c + s[ p + 2 : ]
- pos = p + 2
- return s
+ return s
-#
-# instruction-end: ;
-# begin-block: {
-# end-block: }
-# keyword: [a-z0-9\-_]+
-# string: \'(?:\\\'|[^\\\'])+\'
-# string: "(?:\\\"|[^\\"])+"
-# comment: #[^\n]*
-#
-class Tokeniser :
+def parseConf( p , meta = None ) :
+
+ def parseNode() :
+ x , y = p.x , p.y
+ #
+ # Node name
+ #
+ t = p.next( 'keyword' )
+ kw = t[ 1 ]
+ #
+ # Values
+ #
+ values = []
+ while 1 :
+ t = p.next( 'string' , '{' , ';' )
+ if t[ 0 ] in [ '{' , ';' ] : break
+ values.append( parseString( t[ 1 ] ) )
+ #
+ # Contents
+ #
+ subNodes = []
+ if t[ 0 ] == '{' :
+ subNodes = []
+ while not p.snext( '}' ) :
+ r = parseNode()
+ subNodes.append( r )
+ return ( kw , values , subNodes , ( y , x , meta ) )
+
+ nodes = []
#
- # FIXME: Instead of truncating self.str, keep a position ?
+ # Parse the entire file
#
- def __init__( self , str ) :
-
- self.str = str
- self.lineNumber = 1
- self.colNumber = 1
- self.reBlank = re.compile( r'^\s*' )
- self.reParser = re.compile( '^'
- '('
- ';' # end-of-statement
- '|'
- r'\{' # start-of-block
- '|'
- r'\}' # end-of-block
- '|'
- r'(?:[-_.a-z0-9]+|\*)' # identifier
- '|'
- r"'[^']*'" # quoted string
- '|'
- r"'''.+?'''" # quoted string
- '|'
- r'"(?:\\"|[^"])*"' # quoted string
- '|'
- r'#[^\n]*' # comment
- ')' ,
- re.I|re.S )
-
- def next( self , __empty = [ None , None , None ] ) :
-
- r = self.reBlank.search( self.str )
- if r != None :
- blank = r.group( 0 )
- self.colNumber , self.lineNumber = advancePosition( blank , self.colNumber , self.lineNumber )
- self.str = self.str[ r.end( 0 ) : ]
-
- if self.str == '' : return __empty
-
- # Match the next token
- r = self.reParser.search( self.str )
- if r == None : return [ False , self.lineNumber , self.colNumber ]
-
- # Remove parsed text from the buffer
- self.str = self.str[ r.end( 0 ) : ]
-
- token = r.group( 0 )
-
- # Keep current position
- tokenLine = self.lineNumber
- tokenColumn = self.colNumber
-
- # Advance position after token
- self.colNumber , self.lineNumber = advancePosition( token , self.colNumber , self.lineNumber )
-
- # Return the token and its position
- return token , tokenLine , tokenColumn
+ while not p.snext( 'eot' ) :
+ r = parseNode()
+ if not r : break
+ nodes.append( r )
+ return ('__root__',None,nodes,None)
+
+def parse( doc , filename = None ) :
+
+ tokenMatches = {
+ 'eot' : '$' ,
+ 'blank' : r'\s+' ,
+ 'keyword' : r'[_a-zA-Z][_a-zA-Z0-9]*' ,
+ 'string' : r'[_a-zA-Z][_a-zA-Z0-9]*|\'(?:[^\\\']|\\.)*\'' ,
+ 'comment' : r'#[^\n]*(?:\n|$)' ,
+ '{' : '{' ,
+ '}' : '}' ,
+ ';' : ';'
+ }
+ p = basicparser.Parser( tokenMatches , doc )
+ p.ignore( 'blank' , 'comment' )
+ try :
+ return parseConf( p , filename )
+ except basicparser.ParserError , e :
+ msg = p.point()
+ msg += str( e )
+ raise Error( msg )
+
+#--[ Validator ]--------------------------------------------------------------
#
-# Parse configuration
+# Check mail.filter configuration file structure.
#
-def parse( str , relax = False , warn = False , meta = None ) :
-
- stack = [ ( 'root' , [] , [] , ( None , None , meta ) ) ]
- cmd = None
- newElement = True
- tok = Tokeniser( str )
- lastLine , lastColumn = 0 , 0
- while 1 :
- item , line , column = tok.next()
- if item == None : break
- if item == False :
- raise Exception( 'Syntax error at line %s, column %s' % ( line , column ) )
- lastLine = line
- lastColumn = column
- if item.startswith( '#' ) : continue
- if relax :
- if column == 1 and len( stack ) > 1 and item != '}' :
- while len( stack ) > 1 :
- cmd = stack[ -1 ]
- stack = stack[ : -1 ]
- if cmd[ 0 ] != 'discard' :
- stack[ -1 ][ 2 ].append( cmd )
- newElement = True
- print '** Error recovered before line %s (missing `}\' ?)' % line
- if item == '}' :
- if not newElement and cmd != None :
- raise Exception( 'Missing semicolon before line %s, column %s' % ( line , column ) )
- cmd = stack[ -1 ]
- stack = stack[ : -1 ]
- if len( stack ) == 0 :
- raise Exception( 'Unexpected } at line %s, column %s' % ( line , column ) )
- if cmd[ 0 ] != 'discard' :
- stack[ -1 ][ 2 ].append( cmd )
- newElement = True
- elif newElement :
- if item in [ ';' , '{' , '}' ] :
- raise Exception( 'Unexpected token `%s\' at line %s, column %s' % ( item , line , column ) )
- elif item.find( '\n' ) != -1 :
- raise Exception( 'Unexpected newline character at line %s, column %s' % ( line , column + item.find( '\n' ) ) )
- cmd = ( item , [] , [] , ( line , column , meta ) )
- newElement = False
- elif item == ';' :
- stack[ -1 ][ 2 ].append( cmd )
- cmd = None
- newElement = True
- elif item == '{' :
- stack.append( cmd )
- newElement = True
+
+from basicvalidator import *
+
+#-----------------------------------------------------------------------------
+
+class Keyword( Validator ) : pass
+
+class Header( Validator ) :
+
+ allowedMatches = [ 'is' , 'contains' , 'match' ]
+
+ def check( self , values ) :
+
+ if len( values ) != 3 :
+ error( 'header expect 3 arguments: HEADER-NAME MATCH-TYPE MATCH-ARGUMENT.' )
+ elif values[ 1 ] not in self.allowedMatches :
+ error( '%r is not an allowed match type. Allowed matches type are: %r'
+ % ( values[ 1 ] , self.allowedMatches ) )
+
+#-----------------------------------------------------------------------------
+
+class Logical( Validator , MixinNonEmpty ) :
+
+ def descend( self , item ) :
+
+ self.children += 1
+ return ruleValidator( item )
+
+class Reject( Validator , MixinNonEmpty ) :
+
+ def descend( self , item ) :
+
+ self.children += 1
+ return ruleValidator( item )
+
+ def check( self , values ) :
+
+ if len( values ) != 1 :
+ error( 'reject CODE { .. }' )
+
+class Folder( Validator , MixinNonEmpty ) :
+
+ def descend( self , item ) :
+
+ self.children += 1
+ return ruleValidator( item )
+
+ def check( self , values ) :
+
+ if len( values ) != 1 :
+ error( 'folder FOLDER-NAME { .. }' )
+
+#-----------------------------------------------------------------------------
+
+def ruleValidator( item ) :
+
+ if item in [ 'broken' , 'infected' , 'spam' , 'all' ] :
+ return Keyword( item )
+ elif item in [ 'or' , 'and' , 'not' ] :
+ return Logical( item )
+ elif item == 'header' :
+ return Header( item )
+ else :
+ error( 'unexpected keyword %r.' % item )
+
+#-----------------------------------------------------------------------------
+
+class Root( Validator ) :
+
+ def descend( self , item ) :
+
+ if item == 'reject' :
+ return Reject( item )
+ elif item == 'folder' :
+ return Folder( item )
else :
- if item.startswith( "'''" ) :
- item = item[ 3 : -3 ]
- elif item.startswith( '"' ) :
- item = unslash( item[ 1 : -1 ] )
- elif item.startswith( "'" ) :
- item = item[ 1 : -1 ]
- if item.find( '\n' ) != -1 :
- print '** Warning: string with newline character(s)'
- cmd[ 1 ].append( item )
- if len( stack ) != 1 or not newElement :
- raise Exception( 'Unexpected end of file (last token was at line %s, column %s)' % ( lastLine , lastColumn ) )
- return stack[ -1 ]
+ error( 'unexpected keyword %r.' % item )
+
+ def values( self , values ) :
+
+ raise Exception( 'Internal error' )
+
+#--[ Read&Write configuration ]-----------------------------------------------
+
+def changedDate( filename ) :
+
+ try :
+ return os.stat( filename )[ stat.ST_CTIME ]
+ except :
+ return
#
-# Helper function to dump configuration
+# Return None | ( tree , isValid )
#
+def readCachedConfiguration( filename ) :
+
+ cachedFilename = filename + '.cache'
+ #
+ # Check if cached file is older than the source.
+ #
+ dateCached = changedDate( cachedFilename )
+ if not dateCached : return
+ dateSource = changedDate( filename )
+ if not dateSource : return
+ if dateCached <= dateSource : return
+ #
+ #
+ #
+ try :
+ r = pickle.load( open( cachedFilename ) )
+ except :
+ return
+ return r
+
+def writeCachedConfiguration( filename , tree , isValid ) :
+
+ try :
+ pickle.dump( ( tree , isValid ) , open( filename + '.cache' , 'w' ) )
+ except :
+ pass
+
+def readConfiguration( filename ) :
+
+ try :
+ #
+ # 1. Read from cache file
+ #
+ r = readCachedConfiguration( filename )
+ cached = False
+ if r :
+ conf , isValid = r
+ cached = True
+ else :
+ isValid = False
+ #
+ # 2. Parse the file
+ #
+ conf = open( filename ).read()
+ conf = parse( conf , filename )
+ if not isValid :
+ #
+ # 3. Validate it
+ #
+ basicvalidator.checkConf( conf , Root )
+ #
+ # 4. Keep cached result
+ #
+ writeCachedConfiguration( filename , conf , isValid )
+ except Exception , e :
+ raise Exception( 'While reading file %s:\n%s' % ( filename , str( e ) ) )
+ return conf
+
+#--[ Dump configuration tree ]------------------------------------------------
+
def printTreeInner( t , prt = sys.stdout.write , prefix = '' ) :
prt( prefix )
prt( t[ 0 ] )
for kw in t[ 1 ] :
prt( ' ' + kw )
- prt( ' (%s,%s,%r)' % ( t[ 3 ][ 0 ] , t[ 3 ][ 1 ] , t[ 3 ][ 2 ] ) )
- if len( t[ 2 ] ) > 0 :
- prt( ' {\n' )
+ if t[ 2 ] :
+ prt( ' {' )
+ else :
+ prt( ' ;' )
+ if t[ 3 ] :
+ prt( ' # ' )
+ if t[ 3 ][ 2 ] :
+ prt( '%s:' % t[ 3 ][ 2 ] )
+ prt( '%s:%s' % ( t[ 3 ][ 0 ] , t[ 3 ][ 1 ] ) )
+ prt( '\n' )
+
+ if t[ 2 ] :
for sub in t[ 2 ] :
printTreeInner( sub , prt , prefix + ' ' )
prt( prefix )
prt( '}\n' )
- else :
- prt( ' ;\n' )
-#
-# Dump configuration
-#
def printTree( t ) :
- if len( t[ 2 ] ) > 0 :
- for sub in t[ 2 ] :
- printTreeInner( sub )
+ for sub in t[ 2 ] or [] :
+ printTreeInner( sub )
+
+def main() :
+
+ doc = open( 'fred.mf' ).read()
+ printTree( parse( doc , 'fred.mf' ) )
+
+if __name__ == '__main__' :
+ main()