From baddd69ca12ba03c1f74c410cf09054335951231 Mon Sep 17 00:00:00 2001 From: Frederic Jolliton Date: Thu, 28 Oct 2004 06:55:48 +0000 Subject: [PATCH 1/1] Initital import of mail-filter This is the inital import of mail-filter project. At this point the filter should not be used in production ! git-archimport-id: frederic@jolliton.com--2004-private/mail-filter--main--0.1--base-0 --- confparser.py | 200 +++++++++++++++++ mail.filter | 710 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 910 insertions(+) create mode 100644 confparser.py create mode 100755 mail.filter diff --git a/confparser.py b/confparser.py new file mode 100644 index 0000000..123b402 --- /dev/null +++ b/confparser.py @@ -0,0 +1,200 @@ +# -*- coding: iso-8859-1 -*- + +import re +import sys + +__all__ = [ 'parse' , 'printTree' ] + +# FIXME: Build "preparsed" file ? + +# TODO: Rewrite all of this more clearly + +def advancePosition( text , x , y ) : + + p = text.rfind( '\n' ) + if p == -1 : + x += len( text ) + else : + y += text.count( '\n' ) + x = len( text ) - ( p + 1 ) + 1 + return x , y + +unslashMap = { + 'n' : '\n' , + 't' : '\t' +} + +def unslash( s ) : + + pos = 0 + while 1 : + p = s.find( '\\' , pos ) + if p == -1 or p == len( s ) - 1 : + break + c = s[ p + 1 ] + c = unslashMap.get( c , c ) + s = s[ : p ] + c + s[ p + 2 : ] + pos = p + 2 + return s + +# +# instruction-end: ; +# begin-block: { +# end-block: } +# keyword: [a-z0-9\-_]+ +# string: \'(?:\\\'|[^\\\'])+\' +# string: "(?:\\\"|[^\\"])+" +# comment: #[^\n]* +# +class Tokeniser : + + # + # FIXME: Instead of truncating self.str, keep a position ? + # + def __init__( self , str ) : + + self.str = str + self.lineNumber = 1 + self.colNumber = 1 + self.reBlank = re.compile( r'^\s*' ) + self.reParser = re.compile( '^' + '(' + ';' # end-of-statement + '|' + r'\{' # start-of-block + '|' + r'\}' # end-of-block + '|' + r'(?:[-_.a-z0-9]+|\*)' # identifier + '|' + r"'[^']*'" # quoted string + '|' + r"'''.+?'''" # quoted string + '|' + r'"(?:\\"|[^"])*"' # quoted string + '|' + r'#[^\n]*' # comment + ')' , + re.I|re.S ) + + def next( self , __empty = [ None , None , None ] ) : + + r = self.reBlank.search( self.str ) + if r != None : + blank = r.group( 0 ) + self.colNumber , self.lineNumber = advancePosition( blank , self.colNumber , self.lineNumber ) + self.str = self.str[ r.end( 0 ) : ] + + if self.str == '' : return __empty + + # Match the next token + r = self.reParser.search( self.str ) + if r == None : return [ False , self.lineNumber , self.colNumber ] + + # Remove parsed text from the buffer + self.str = self.str[ r.end( 0 ) : ] + + token = r.group( 0 ) + + # Keep current position + tokenLine = self.lineNumber + tokenColumn = self.colNumber + + # Advance position after token + self.colNumber , self.lineNumber = advancePosition( token , self.colNumber , self.lineNumber ) + + # Return the token and its position + return token , tokenLine , tokenColumn + +# +# Parse configuration +# +def parse( str , relax = False , warn = False , meta = None ) : + + stack = [ ( 'root' , [] , [] , ( None , None , meta ) ) ] + cmd = None + newElement = True + tok = Tokeniser( str ) + lastLine , lastColumn = 0 , 0 + while 1 : + item , line , column = tok.next() + if item == None : break + if item == False : + raise Exception( 'Syntax error at line %s, column %s' % ( line , column ) ) + lastLine = line + lastColumn = column + if item.startswith( '#' ) : continue + if relax : + if column == 1 and len( stack ) > 1 and item != '}' : + while len( stack ) > 1 : + cmd = stack[ -1 ] + stack = stack[ : -1 ] + if cmd[ 0 ] != 'discard' : + stack[ -1 ][ 2 ].append( cmd ) + newElement = True + print '** Error recovered before line %s (missing `}\' ?)' % line + if item == '}' : + if not newElement and cmd != None : + raise Exception( 'Missing semicolon before line %s, column %s' % ( line , column ) ) + cmd = stack[ -1 ] + stack = stack[ : -1 ] + if len( stack ) == 0 : + raise Exception( 'Unexpected } at line %s, column %s' % ( line , column ) ) + if cmd[ 0 ] != 'discard' : + stack[ -1 ][ 2 ].append( cmd ) + newElement = True + elif newElement : + if item in [ ';' , '{' , '}' ] : + raise Exception( 'Unexpected token `%s\' at line %s, column %s' % ( item , line , column ) ) + elif item.find( '\n' ) != -1 : + raise Exception( 'Unexpected newline character at line %s, column %s' % ( line , column + item.find( '\n' ) ) ) + cmd = ( item , [] , [] , ( line , column , meta ) ) + newElement = False + elif item == ';' : + stack[ -1 ][ 2 ].append( cmd ) + cmd = None + newElement = True + elif item == '{' : + stack.append( cmd ) + newElement = True + else : + if item.startswith( "'''" ) : + item = item[ 3 : -3 ] + elif item.startswith( '"' ) : + item = unslash( item[ 1 : -1 ] ) + elif item.startswith( "'" ) : + item = item[ 1 : -1 ] + if item.find( '\n' ) != -1 : + print '** Warning: string with newline character(s)' + cmd[ 1 ].append( item ) + if len( stack ) != 1 or not newElement : + raise Exception( 'Unexpected end of file (last token was at line %s, column %s)' % ( lastLine , lastColumn ) ) + return stack[ -1 ] + +# +# Helper function to dump configuration +# +def printTreeInner( t , prt = sys.stdout.write , prefix = '' ) : + + prt( prefix ) + prt( t[ 0 ] ) + for kw in t[ 1 ] : + prt( ' ' + kw ) + prt( ' (%s,%s,%r)' % ( t[ 3 ][ 0 ] , t[ 3 ][ 1 ] , t[ 3 ][ 2 ] ) ) + if len( t[ 2 ] ) > 0 : + prt( ' {\n' ) + for sub in t[ 2 ] : + printTreeInner( sub , prt , prefix + ' ' ) + prt( prefix ) + prt( '}\n' ) + else : + prt( ' ;\n' ) + +# +# Dump configuration +# +def printTree( t ) : + + if len( t[ 2 ] ) > 0 : + for sub in t[ 2 ] : + printTreeInner( sub ) diff --git a/mail.filter b/mail.filter new file mode 100755 index 0000000..003b164 --- /dev/null +++ b/mail.filter @@ -0,0 +1,710 @@ +#!/usr/bin/python +# -*- coding: iso-8859-1 -*- + +# +# Noname - Mail filter to replace procmail. +# Copyright (C) 2004 Frédéric Jolliton +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# + +# +# TODO: +# +# No todo. +# + +import sys +import getopt +import os +from os import EX_OK, EX_NOUSER, EX_TEMPFAIL, EX_DATAERR +import time +import email +import re + +# +# Path to subprocess module. Ideally not needed if subprocess +# (formerly popen5) is installed into /site-packages/ directory. +# +#sys.path.insert( 0 , '/usr/local/lib/python/' ) + +# +# subprocess (formerly popen5) - See PEP 324 +# http://www.lysator.liu.se/~astrand/popen5/ +# +# >>> cat = subprocess.Popen( 'cat' , stdin = subprocess.PIPE , stdout = subprocess.PIPE ) +# >>> cat.communicate( 'bla' ) +# ('bla', None) +# >>> cat.returncode +# 0 +# +try : + import subprocess +except ImportError : + try : + import popen5 as subprocess + except ImportError : + print 'Please install subprocess module.' + print 'See http://www.lysator.liu.se/~astrand/popen5/.' + +#--[ Configuration variables ]------------------------------------------------ + +# +# For which users receiving a mail should we send a UDP packet. +# +g_userNotificationFilter = [ 'fred' ] + +# +# For which IP address should we send the notification. +# (Can include broadcast address.) +# +g_notificationAddresses = [ '192.168.1.255' ] + +# +# On which port should we send the notification. +# +g_notificationPort = 23978 + +# +# Max mail size to be processed by this script. +# +# Larger mail are just not filtered. +# +g_maxMailSize = 2 * 1024 * 1024 + +# +# Where to save copy of mail in case of error. +# +g_directoryBackup = '/var/mail.filter/recovery/' + +# +# Where to find rules about each user. +# +# Filename for user 'joe' will be named 'joe.mf' in that +# directory. If the file doesn't exist, no filtering is +# done (not even spam/virus filtering.) +# +g_directoryRules = '/var/mail.filter/rules/' + +# +# Path to spamprobe binary. +# +g_pathSpamProbe = '/usr/bin/spamprobe' + +# +# Path to ClamAV binary. +# +# Could point either to clamdscan or clamscan. +# +# The first one is *HIGHLY* recommended since +# it will use the ClamAV daemon. +# +g_pathClamdscan = '/usr/bin/clamdscan' + +# +# Path to Cyrus's deliver binary. +# +g_pathCyrusDeliver = '/usr/cyrus/bin/deliver' + +# +# Filename where to put log. +# +g_pathLog = '/var/log/mail.filter.log' + +#--[ Global variables ]------------------------------------------------------- + +# +# Should the log be also printed on stdout ? +# +g_copyLogToStdout = False + +# +# Don't actually feed the mail to Cyrus. +# +g_testMode = False + +# +# The user name of the recipient. +# +g_user = None + +# +# The current mail as string (as read from stdin.) +# +g_mailText = None + +# +# The current mail as email.Message.Message object. +# +g_mail = None + +#----------------------------------------------------------------------------- + +# +# Utility function to return traceback as string from most recent +# exception. +# +def getTraceBack() : + + import traceback, sys + return ''.join( traceback.format_exception( *sys.exc_info() ) ) + +# +# Return (rc, output) +# +def pipe( cmd , input ) : + + p = subprocess.Popen( cmd , stdin = subprocess.PIPE , stdout = subprocess.PIPE , stderr = subprocess.PIPE ) + # much faster than passing 'input' to communicate directly.. + try : + p.stdin.write( input ) + except IOError : + pass + r = p.communicate() + return p.returncode , r[ 0 ] , r[ 1 ] + +# +# Return an ISO-8661 date representation in the UTC +# timezone. +# +def timestamp( t = None ) : + + if t == None : + t = time.gmtime() + return '%04d-%02d-%02dT%02d:%02d:%02dZ' % t[ : 6 ] + +# +# Log message 'msg'. +# +def logMessage( msg ) : + + logMessage.__dict__.setdefault( 'logFile' , None ) + + if not logMessage.logFile and not g_testMode : + # + # If log file is not yet open, try to open it. + # + try : + logMessage.logFile = open( g_pathLog , 'a+' ) + except : + return + + msg = msg.splitlines() + prefix = timestamp() + ' [%s] ' % os.getpid() + + # + # Output to log file. + # + if logMessage.logFile : + + for line in msg : + line = prefix + line + try : + logMessage.logFile.write( line + '\n' ) + logMessage.logFile.flush() + except : + pass + + # + # Output to standard output. + # + if g_copyLogToStdout : + + for line in msg : + line = prefix + line + sys.stdout.write( line + '\n' ) + sys.stdout.flush() + +# +# Make a backup of the mail (in case it's impossible +# to store the mail to Cyrus.) +# +def backup( filenamePrefix = None ) : + + if g_testMode : + logMessage( 'TEST MODE: Backup of the mail.' ) + return + + try : + # Ensure directory exist + import os + os.makedirs( g_directoryBackup ) + except : + pass + basename = '' + if filenamePrefix : + basename += filenamePrefix + '-' + basename += '%.3f' % time.time() + fn = g_directoryBackup + '/' + basename + try : + f = open( fn , 'a+' ) + f.write( g_mailText ) + f.close() + except : + logMessage( 'Error saving backup copy.' ) + else : + logMessage( 'Message appended to backup directory as `%s\'.' % basename ) + +# +# Experimental ! +# +# Packet payload contains: +# +# + char( 0 ) + + char( 0 ) +# +def notifyDeliver( user , folder ) : + + if user not in g_userNotificationFilter : + return + try : + import socket + s = socket.socket( socket.AF_INET , socket.SOCK_DGRAM ) + msg = user + chr( 0 ) + folder + chr( 0 ) + for address in g_notificationAddresses : + s.sendto( msg , ( address , g_notificationPort ) ) + except : + pass + +# +# Deliver a mail to Cyrus for user 'username' in +# folder 'folderName' (or default folder if not +# specified.) +# +def deliverTo( username , folderName = None ) : + + if not folderName : + pseudoFolderName = 'INBOX' + folderName = 'user.' + username + else : + pseudoFolderName = 'INBOX.' + folderName + folderName = 'user.' + username + '.' + folderName + + if g_testMode : + logMessage( 'TEST MODE: Delivering mail in `%s\'.' % ( folderName , ) ) + return EX_OK + + # + # Build the command line for running deliver. + # + cmd = [ g_pathCyrusDeliver ] + cmd += [ '-a' , username ] + cmd += [ '-m' , folderName ] + + try : + rc , stdout , stderr = pipe( cmd , g_mailText ) + except OSError , e : + logMessage( 'Error running `%s\': %s.' % ( cmd[ 0 ] , e[ 1 ] ) ) + return EX_TEMPFAIL + + if rc == EX_OK : + logMessage( 'Message delivered in folder `%s\'.' % folderName ) + notifyDeliver( username , pseudoFolderName ) + else : + errorMessage = stdout.rstrip() + # + # Extract raw error message + # + # Example of output: + # + # +user.fred: Message contains invalid header + # + m = errorMessage.split( ': ' , 1 ) + if len( m ) == 2 : + m = m[ 1 ] + else : + m = None + rcMsg = '%d' % rc + if m == 'Message contains invalid header' : + rc = EX_DATAERR + rcMsg += '->%d' % rc + elif m == 'Mailbox does not exist' : + rc = EX_NOUSER + rcMsg += '->%d' % rc + logMessage( 'Refused by Cyrus: [%s] `%s\'.' % ( rcMsg , errorMessage ) ) + return rc + +#----------------------------------------------------------------------------- + +# +# Return virus list from the output of ClamAV. +# +def extractVirusList( clamdOutput ) : + + res = [] + for line in clamdOutput.splitlines() : + r = extractVirusList.reClamdVirus.search( line.rstrip() ) + if r == None : continue + res.append( r.group( 1 ) ) + return res + +extractVirusList.reClamdVirus = re.compile( r'^[^:]+: (\S+) FOUND$' ) + +# +# Check for virus. +# +# Return True if mail is clean. +# +def antivirusScan() : + + cmd = [ g_pathClamdscan , '-' ] + rc , stdout , stderr = pipe( cmd , g_mailText ) + output = stderr or '' + #logMessage( 'clamdscan returned %s' % rc ) + if rc == 2 : + raise 'Unable to scan for viruses (%s)' % cmd + ok = not rc + if not ok : + msg = 'Virus found.' + viruses = extractVirusList( output ) + if viruses : + msg += ' [%s]' % ' '.join( viruses ) + logMessage( msg ) + return ok + +# +# Check for spam. +# +# Return True if mail is correct. +# +def spamScan() : + + if not g_user : return True + + cmd = [ g_pathSpamProbe ] + cmd += [ '-d' , '/var/spamprobe/db/%s/' % g_user ] + cmd += [ 'receive' ] + rc , stdout , stderr = pipe( cmd , g_mailText ) + r = ( stdout or '' ).split() + return r[ 0 ] != 'SPAM' + +#----------------------------------------------------------------------------- + +def readUserRules( user ) : + + import confparser + try : + f = open( g_directoryRules + '/' + user + '.mf' ) + except IOError : + return + return confparser.parse( f.read() ) + +def ruleMatch( header , matchType , text ) : + + if matchType == 'match' : + try : + return re.search( text , header , re.I ) != None + except : + logMessage( 'Error with regex `%s\' from %s\'s user configuration.' % ( text , g_user ) ) + return False + elif matchType == 'is' : + return header.strip().lower() == text.strip().lower() + elif matchType == 'contains' : + return header.lower().find( text.strip().lower() ) != -1 + else : + logMessage( 'Unknown match type `%s\' from %s\'s user configuration.' % ( matchType , g_user ) ) + return False + +def testRule( rule ) : + + cmd = rule[ 0 ] + if cmd == 'and' : + for subrule in rule[ 2 ] : + if testRule( subrule ) == False : + break + else : + return True + elif cmd == 'or' : + for subrule in rule[ 2 ] : + if testRule( subrule ) == True : + return True + elif cmd == 'not' : + for subrule in rule[ 2 ] : + if testRule( subrule ) == True : + break + else : + return True + elif cmd == 'header' : + if g_mail : + args = rule[ 1 ] + headerName , matchType , text = args + header = g_mail[ headerName ] or '' + header = header.replace( '\n' , ' ' ) + if ruleMatch( header , matchType , text ) : + return True + elif cmd == 'broken' : + if not g_mail : + return True + elif cmd == 'infected' : + if g_mail : + return not antivirusScan() + elif cmd == 'spam' : + if g_mail : + return not spamScan() + else : + logMessage( 'Unknown rule name `%s\'.' % ( cmd , ) ) + return False + +#----------------------------------------------------------------------------- + +def errorNameToErrorCode( code ) : + + code = code.lower() + if code == 'nouser' : + return EX_NOUSER + elif code == 'tempfail' : + return EX_TEMPFAIL + elif code == 'dataerr' : + return EX_DATAERR + else : + try : + return int( code ) + except : + return 0 + +# +# Find the destination folder for user 'user' according to rules defined for +# him/her against the current mail. +# +# Return either: +# +# ( False , mailBoxName | None ) +# ( True , customErrorCode ) +# +def getDestFolder( user ) : + + conf = readUserRules( user ) + + if not conf : + + logMessage( 'No rules defined for user `%s\'.' % user ) + return ( False , None ) + + if not conf[ 2 ] : + + logMessage( 'Empty rules set or syntax error encountered for user `%s\'.' % user ) + return ( False , None ) + + for item in conf[ 2 ] : + + action , args , subs = item[ : 3 ] + + if action not in [ 'folder' , 'reject' ] : continue + + for rule in subs : + # + # First rule that match is used. + # + if testRule( rule ) : + break + else : + continue + + if action == 'folder' : + return ( False , args[ 0 ] ) + + if action == 'reject' : + try : + return ( True , errorNameToErrorCode( args[ 0 ] ) ) + except : + logMessage( 'Invalid reject code %r.' % args[ 0 ] ) + + return ( False , None ) + +#----------------------------------------------------------------------------- + +# +# Dispatch the mail to the correct folder (deduced from rules.) +# +# Return either: +# +# ( False , errorCode ) +# ( True , customErrorCode ) +# +def dispatch() : + + isCustomErrorCode , value = getDestFolder( g_user ) + + # + # + # + if isCustomErrorCode : + return ( True , value ) + + # + # We got a folder name (or None for default folder.) + # + mbox = value + pseudoName = 'INBOX' + if mbox : pseudoName += '.' + mbox + + # + # Try to deliver in the named folder + # + rc = deliverTo( g_user , mbox ) + # + # If we get an error code, then we deliver the mail to default folder, + # except if the error was "data error" or if we already tried to deliver + # it to default folder. + # + if rc not in [ EX_OK , EX_DATAERR ] and mbox : + logMessage( 'Error delivering to folder %s of user `%s\'.' % ( pseudoName , g_user ) ) + logMessage( 'Mail will go into default folder.' ) + rc = deliverTo( g_user ) + # + # Check again. + # + # Here we also handle the case of EX_DATAERR not handled above. + # + if rc != EX_OK : + logMessage( 'Error delivering to default folder of user `%s\'.' % ( g_user , ) ) + # + # All errors code different from "data error" are translated to + # "no user" error code. + # + if rc != EX_DATAERR : + rc = EX_NOUSER + + return ( False , rc ) + +#----------------------------------------------------------------------------- + +def usage() : + + print '''Usage: mail.filter [OPTIONS] [username] + + -h, --help Print this help. + -v, --verbose Verbose mode, output log to stdout. + -t, --test Test mode. Don't feed the mail to Cyrus, and don't + write anything into log file. + -l, --log=FILENAME Set log filename (default: %s). + -r, --rules=DIRECTORY Directory where are located users rules (default %s). + +Report bugs to .''' \ + % ( g_pathLog , g_directoryRules ) + +def main() : + + global g_user, g_mail, g_mailText, g_copyLogToStdout, g_pathLog, g_directoryRules , g_testMode + + options , parameters = \ + getopt.getopt( sys.argv[ 1 : ] , + 'hvtl:r:' , + ( 'help' , 'verbose' , 'test' , 'log=' , 'rules=' ) ) + + for option , argument in options : + if option in [ '-h' , '--help' ] : + usage() + sys.exit( 0 ) + elif option in [ '-v' , '--verbose' ] : + g_copyLogToStdout = True + elif option in [ '-t' , '--test' ] : + g_testMode = True + elif option in [ '-l' , '--log' ] : + g_pathLog = argument + elif option in [ '-r' , '--rules' ] : + g_directoryRules = argument + + # + # At most one parameter expected. + # + if len( parameters ) > 1 : + # + # We just log a error message. We continue to proceed + # to not lost the mail ! + # + logMessage( 'Warning: More than one parameter on command line.' ) + + if parameters : + g_user = parameters[ 0 ] + + logMessage( 'Running mail.filter for user `%s\'.' % g_user ) + + # + # FIXME: Should we be reading the mail by block, so that + # we can at least read and backup a part of the standard input + # in case an error occur ? (broken pipe for example) + # + try : + g_mailText = sys.stdin.read() + except : + logMessage( getTraceBack() ) + sys.exit( 1 ) + + # + # Handling big mail. + # + if len( g_mailText ) > g_maxMailSize : + logMessage( 'Message too big (%s bytes). Not filtering it.' % len( g_mailText ) ) + ok = False + if g_user : + ok = deliverTo( g_user ) + logMessage( 'Unable to deliver it to user `%s\'.' % g_user ) + if not ok : + backup( g_user ) + sys.exit( 0 ) + + # + # Parsing the mail. + # + try : + g_mail = email.message_from_string( g_mailText , strict = False ) + except : + logMessage( getTraceBack() ) + + # + # No user specified. + # + if not g_user : + logMessage( 'No user specified.' ) + backup( g_user ) + sys.exit( 0 ) + + # + # Return code default to "temporary failure". + # + isCustomErrorCode , rc = ( False , EX_TEMPFAIL ) + + # + # Dispatch the mail + # + try : + isCustomErrorCode , rc = dispatch() + except : + logMessage( getTraceBack() ) + + if not isCustomErrorCode : + + if rc not in [ EX_OK , EX_DATAERR ] : + logMessage( 'Rescue mode - Trying to deliver to default folder of user %s.' % g_user ) + rc = deliverTo( g_user ) + + if rc != EX_OK : + backup( g_user ) + if rc != EX_DATAERR : + rc = EX_NOUSER + logMessage( 'Exit code is %d.' % rc ) + # + # FIXME: !!!!! + # + rc = EX_OK + + else : + + logMessage( 'Custom exit code is %d.' % rc ) + + sys.exit( rc ) + +if __name__ == '__main__' : + main() -- 2.11.0