2 # -*- coding: iso-8859-1 -*-
5 # MailFilter - Mail filter to replace procmail.
6 # Copyright (C) 2004 Frédéric Jolliton <frederic@jolliton.com>
8 # This program is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 # Policy when error are encountered:
26 # We backup the mail in a special directory. It will be
27 # at the admin discretion to feed it again to this program
28 # (or may be script that.)
34 # [ ] Define precisely what return code use for each possible case.
37 myName = 'mail.filter'
51 from os import EX_USAGE, EX_OK, EX_NOUSER, EX_TEMPFAIL, EX_DATAERR
53 # EX_USAGE 64 command line usage error
54 # EX_DATAERR 65 data format error
55 # EX_NOINPUT 66 cannot open input
56 # EX_NOUSER 67 addressee unknown
57 # EX_NOHOST 68 host name unknown
58 # EX_UNAVAILABLE 69 service unavailable
59 # EX_SOFTWARE 70 internal software error
60 # EX_OSERR 71 system error (e.g., can't fork)
61 # EX_OSFILE 72 critical OS file missing
62 # EX_CANTCREAT 73 can't create (user) output file
63 # EX_IOERR 74 input/output error
64 # EX_TEMPFAIL 75 temp failure; user is invited to retry
65 # EX_PROTOCOL 76 remote error in protocol
66 # EX_NOPERM 77 permission denied
67 # EX_CONFIG 78 configuration error
70 # Path to subprocess module. Ideally not needed if subprocess
71 # (formerly popen5) is installed into /site-packages/ directory.
73 #sys.path.insert( 0 , '/usr/local/lib/python/' )
76 # subprocess (formerly popen5) - See PEP 324
77 # http://www.lysator.liu.se/~astrand/popen5/
79 # >>> cat = subprocess.Popen( 'cat' , stdin = subprocess.PIPE , stdout = subprocess.PIPE )
80 # >>> cat.communicate( 'bla' )
89 import popen5 as subprocess
91 print 'Please install subprocess module.'
92 print 'See http://www.lysator.liu.se/~astrand/popen5/.'
95 #--[ Configuration variables ]------------------------------------------------
98 # Filename where to put log.
100 g_pathLog = '/var/log/mail.filter.log'
103 # For which users receiving a mail should we send a UDP packet.
105 g_userNotificationFilter = [ 'fred' ]
108 # For which IP address should we send the notification.
109 # (Can include broadcast address.)
111 g_notificationAddresses = [ '192.168.1.255' ]
114 # On which port should we send the notification.
116 g_notificationPort = 23978
119 # Max mail size to be processed by this script.
121 # Larger mail are just not filtered.
123 g_maxMailSize = 2 * 1024 * 1024
126 # Where to save copy of mail in case of error.
128 g_directoryBackup = '/var/mail.filter/recovery/'
131 # If set, then no backup are produced in case of error.
133 g_backupDisabled = False
136 # Where to find rules about each user.
138 # Filename for user 'joe' will be named 'joe.mf' in that
139 # directory. If the file doesn't exist, no filtering is
140 # done (not even spam/virus filtering.)
142 g_directoryRules = '/var/mail.filter/rules/'
144 #--[ External commands ]------------------------------------------------------
147 # Path to Cyrus's deliver binary.
149 g_pathCyrusDeliver = '/usr/lib/cyrus/deliver'
152 # Path to spamprobe binary.
154 g_pathSpamProbe = '/usr/bin/spamprobe'
156 g_pathSpamProbeDb = '/var/spamprobe/db'
159 # Path to ClamAV binary.
161 # Could point either to 'clamdscan' or 'clamscan'.
163 # The first one is *HIGHLY* recommended since
164 # it will use the ClamAV daemon.
166 g_pathClamdscan = '/usr/bin/clamdscan'
168 #--[ Global variables ]-------------------------------------------------------
171 # Should the log be also printed on stdout ?
173 g_copyLogToStdout = False
176 # Don't actually feed the mail to Cyrus.
181 # The user name of the recipient.
186 # The current mail as string (as read from stdin.)
191 # The current mail as email.Message.Message object.
195 #-----------------------------------------------------------------------------
198 # check if predicate is True for all items in list 'lst'.
200 def all( lst , predicate ) :
203 if not predicate( item ) :
208 # check if predicate is True for at least one item in list 'lst'.
210 def some( lst , predicate ) :
213 if predicate( item ) :
218 # Remove leading and trailing blank, and replace any
219 # blank character sequence by one space character.
221 def normalizeBlank( s ) :
223 return ' '.join( s.split() )
225 #-----------------------------------------------------------------------------
228 # Utility function to return traceback as string from most recent
233 import traceback, sys
234 return ''.join( traceback.format_exception( *sys.exc_info() ) )
237 # Return (returnCode, stdout, stderr)
239 def pipe( cmd , input ) :
241 p = subprocess.Popen( cmd ,
242 stdin = subprocess.PIPE ,
243 stdout = subprocess.PIPE ,
244 stderr = subprocess.PIPE )
246 # much faster than passing 'input' to communicate directly..
247 p.stdin.write( input )
251 return p.returncode , r[ 0 ] , r[ 1 ]
254 # Return an ISO-8661 date representation for the UTC
257 # timestamp( 0 ) => 1970-01-01T00:00:00Z
262 return '%04d-%02d-%02dT%02d:%02d:%02dZ' % t[ : 6 ]
267 def logMessage( msg ) :
269 if not logMessage.logFile and not g_testMode :
271 # If log file is not yet open, try to open it.
274 logMessage.logFile = open( g_pathLog , 'a+' )
276 if not g_copyLogToStdout :
279 msg = msg.splitlines()
280 prefix = timestamp() + ' [%s] ' % os.getpid()
283 # Output to log file.
285 if logMessage.logFile :
290 logMessage.logFile.write( line + '\n' )
291 logMessage.logFile.flush()
296 # Output to standard output.
298 if g_copyLogToStdout :
302 sys.stdout.write( line + '\n' )
305 logMessage.logFile = None
308 # Make a backup of the mail (in case it's impossible
309 # to store the mail to Cyrus.)
311 def backup( filenamePrefix = None ) :
314 logMessage( 'TEST MODE: Backup of the mail requested.' )
317 if g_backupDisabled :
318 logMessage( 'Backup requested, but disabled.' )
322 # Ensure directory exist
324 os.makedirs( g_directoryBackup )
330 basename += filenamePrefix + '-'
332 # Append current unix time as suffix
334 basename += '%.3f' % time.time()
336 fn = g_directoryBackup + '/' + basename
338 f = open( fn , 'a+' )
339 f.write( g_mailText )
342 logMessage( 'PANIC: Unable to write backup to %s.' % fn )
344 logMessage( 'Message appended to backup directory as `%s\'.' % basename )
346 #-----------------------------------------------------------------------------
350 class NullAction( Action ) :
352 def __repr__( self ) :
354 return '<NullAction>'
356 class FileToFolderAction( Action ) :
358 def __init__( self , folder ) :
362 def __repr__( self ) :
364 return '<FileToFolderAction %r>' % ( self.folder , )
366 class CustomErrorCodeAction( Action ) :
368 def __init__( self , code ) :
372 def __repr__( self ) :
374 return '<NullAction %r>' % ( self.code , )
376 #-----------------------------------------------------------------------------
381 # Packet payload contains:
383 # <username> + char( 0 ) + <foldername> + char( 0 )
385 def notifyDeliver( user , folder ) :
387 if user not in g_userNotificationFilter :
391 s = socket.socket( socket.AF_INET , socket.SOCK_DGRAM )
392 msg = user + chr( 0 ) + folder + chr( 0 )
393 for address in g_notificationAddresses :
394 s.sendto( msg , ( address , g_notificationPort ) )
399 # Deliver a mail to Cyrus for user 'username' in
400 # folder 'folderName' (or default folder if not
403 def deliverTo( username , folderName = None ) :
406 pseudoFolderName = 'INBOX'
407 folderName = 'user.' + username
409 pseudoFolderName = 'INBOX.' + folderName
410 folderName = 'user.' + username + '.' + folderName
413 # Build the command line for running deliver.
415 cmd = [ g_pathCyrusDeliver ]
416 cmd += [ '-a' , username ]
417 cmd += [ '-m' , folderName ]
420 logMessage( 'TEST MODE: Delivering mail in `%s\' requested.' % ( folderName , ) )
421 logMessage( 'TEST MODE: Command: %r.' % cmd )
425 rc , stdout , stderr = pipe( cmd , g_mailText )
427 logMessage( 'Error running `%s\': %s.' % ( cmd[ 0 ] , e[ 1 ] ) )
431 logMessage( 'Message delivered in folder `%s\'.' % folderName )
432 notifyDeliver( username , pseudoFolderName )
434 errorMessage = stdout.rstrip()
436 # Extract raw error message
440 # +user.fred: Message contains invalid header
442 m = errorMessage.split( ': ' , 1 )
448 if m == 'Message contains invalid header' :
451 elif m == 'Mailbox does not exist' :
455 # FIXME: DATAERR ok here ?
457 logMessage( 'Refused by Cyrus: [%s] `%s\'.' % ( rcMsg , errorMessage ) )
460 #--[ Antivirus ]--------------------------------------------------------------
463 # Return virus list from the output of ClamAV.
465 def extractVirusList( clamdOutput ) :
468 for line in clamdOutput.splitlines() :
469 r = extractVirusList.reClamdVirus.search( line.rstrip() )
470 if r == None : continue
471 res.append( r.group( 1 ) )
474 extractVirusList.reClamdVirus = re.compile( r'^[^:]+: (\S+) FOUND$' )
479 # Return True if mail is clean.
481 def antivirusScan() :
483 cmd = [ g_pathClamdscan , '-' ]
486 logMessage( 'TEST MODE: Virus scan requested.' )
487 logMessage( 'TEST MODE: Command: %r.' % cmd )
490 rc , stdout , stderr = pipe( cmd , g_mailText )
491 output = stderr or ''
492 #logMessage( 'clamdscan returned %s' % rc )
494 raise Exception( 'Unable to scan for viruses (%s)' % cmd )
498 viruses = extractVirusList( output )
500 msg += ' [%s]' % ' '.join( viruses )
504 #--[ Antispam ]---------------------------------------------------------------
509 # Return True if mail is correct.
513 if not g_user : return True
515 cmd = [ g_pathSpamProbe ]
516 cmd += [ '-d' , g_pathSpamProbeDb + '/' + g_user + '/' ]
520 logMessage( 'TEST MODE: Spam scan requested.' )
521 logMessage( 'TEST MODE: Command: %r.' % cmd )
524 rc , stdout , stderr = pipe( cmd , g_mailText )
525 r = ( stdout or '' ).split()
526 return r[ 0 ] != 'SPAM'
528 #-----------------------------------------------------------------------------
530 def errorNameToErrorCode( code ) :
533 if code == 'nouser' :
535 elif code == 'tempfail' :
537 elif code == 'dataerr' :
545 #-----------------------------------------------------------------------------
548 # FIXME: I think it could be better to cache the parsed
549 # configuration, and also to cache the result of the validator
550 # so that we don't run the test each time this script is run !
552 def readUserRules( user ) :
554 filename = g_directoryRules + '/' + user + '.mf'
557 # Read the configuration.
560 return confparser.readConfiguration( filename )
563 except Exception , e :
564 logMessage( str( e ) )
565 logMessage( 'Error in file %r. See option -c to check this file.' % ( filename , ) )
567 #-----------------------------------------------------------------------------
570 # Test a match rule against a particular header.
573 # matchType : string in [ 'match' , 'is' , 'contains' ]
576 def ruleMatch( header , matchType , text ) :
578 if matchType == 'match' :
580 return re.search( text , header , re.I ) != None
582 logMessage( 'Error with regex `%s\' from %s\'s user configuration.' % ( text , g_user ) )
584 elif matchType == 'is' :
585 return header.strip().lower() == text.strip().lower()
586 elif matchType == 'contains' :
587 return header.lower().find( text.strip().lower() ) != -1
589 logMessage( 'Unknown match type `%s\' from %s\'s user configuration.' % ( matchType , g_user ) )
593 # '=?iso-8859-1?q?Fr=E9d=E9ric_Jolliton?= <frederic@jolliton.com>'
594 # => u'Fr\xe9d\xe9ric Jolliton <frederic@jolliton.com>'
596 def decodeHeader( s ) :
599 return ' '.join( [ p[ 0 ].decode( p[ 1 ] or 'ascii' ) for p in email.Header.decode_header( s ) ] )
601 logMessage( 'Error decoding %r' % s )
609 for headerName , headerContents in g_mail.items() :
611 for text , charset in email.Header.decode_header( headerContents ) :
612 text.decode( charset or 'ascii' )
613 except LookupError , e :
615 # FIXME: We got an unknown charset.. In doubt, we
616 # consider the header as valid.
619 except UnicodeDecodeError , e :
620 brokenHeaders.append( headerName )
623 if len( brokenHeaders ) > maxHeaderToLog :
624 brokenHeaders = brokenHeaders[ : maxHeaderToLog ] + [ '...' ]
625 logMessage( 'The following headers are broken: [%s]' % ' '.join( brokenHeaders ) )
630 # Test rule 'rule' against the mail.
632 def testRule( rule ) :
637 return all( rule[ 2 ] , testRule )
640 return some( rule[ 2 ] , testRule )
643 return not some( rule[ 2 ] , testRule )
652 if len( args ) == 2 :
656 headerName , matchType = args
657 if matchType not in [ 'present' ] :
658 logMessage( 'Unknown match type %r' % matchType )
660 return ( g_mail.get( headerName ) != None )
662 headerName , matchType , text = args
663 text = normalizeBlank( text )
664 if headerName.find( '.' ) != -1 :
665 headerName , partName = headerName.split( '.' , 1 )
666 partName = partName.lower()
669 headers = g_mail.get_all( headerName ) or []
670 headers = map( decodeHeader , headers )
672 if partName != 'raw' :
674 # Support for .name, .address and .domain part.
678 # with 'From: Frederic Jolliton <frederic@jolliton.com>, a@b.c (Foo)'
680 # 'From.name' -> [ 'Frederic Jolliton' , 'Foo' ]
681 # 'From.address' -> [ 'frederic@jolliton.com' , 'a@b.c' ]
682 # 'From.user' -> [ 'frederic' , 'a' ]
683 # 'From.domain' -> [ 'jolliton.com' , 'b.c' ]
685 if partName not in [ 'name' , 'address' , 'user' , 'domain' ] :
686 logMessage( 'Unknown header part %r' % partName )
688 adrs = email.Utils.getaddresses( headers )
689 if partName == 'name' :
690 headers = [ adr[ 0 ] for adr in adrs ]
691 elif partName == 'address' :
692 headers = [ adr[ 1 ] for adr in adrs ]
693 elif partName == 'user' :
694 headers = [ adr[ 1 ].split( '@' )[ 0 ] for adr in adrs ]
695 elif partName == 'domain' :
696 headers = [ adr[ 1 ].split( '@' )[ -1 ] for adr in adrs ]
698 return some( headers , lambda header : ruleMatch( header , matchType , text ) )
704 return g_mail == None or isMailBroken()
709 if cmd == 'infected' :
710 return g_mail != None and not antivirusScan()
716 return g_mail != None and not spamScan()
727 logMessage( 'Unknown rule name `%s\'.' % ( cmd , ) )
730 #-----------------------------------------------------------------------------
733 # Find the destination folder for user 'user' according to rules defined for
734 # him/her against the current mail.
738 def checkUserRules( user ) :
740 action = FileToFolderAction( None )
742 conf = readUserRules( user )
745 # conf == None => No rule file.
746 # not conf[ 2 ] => File exist, but contains no rules.
748 if conf != None and conf[ 2 ] :
749 for item in conf[ 2 ] :
750 actionName , args , subs = item[ : 3 ]
752 if some( subs , testRule ) :
754 logMessage( 'MATCH: %s %s { ... }' % ( actionName , ' '.join( args ) ) )
756 if actionName == 'folder' :
757 action = FileToFolderAction( args[ 0 ] )
758 elif actionName == 'reject' :
759 action = CustomErrorCodeAction( errorNameToErrorCode( args[ 0 ] ) )
761 logMessage( 'Unknown action `%s\'.' % actionName )
766 #-----------------------------------------------------------------------------
769 # Read mail from standard input.
776 # FIXME: Should we be reading the mail by block, so that
777 # we can at least read and backup a part of the standard input
778 # in case an error occur ? (broken pipe for example)
780 # If error occur, and since we can't backup the mail,
781 # we ask sendmail to retry later.
784 g_mailText = sys.stdin.read()
786 logMessage( getTraceBack() )
787 sys.exit( EX_TEMPFAIL )
790 # Check if the mail is bigger than a predefined amount.
792 def checkForLargeMail() :
794 if len( g_mailText ) > g_maxMailSize :
795 logMessage( 'Message too big (%s bytes). Not filtering it.' % len( g_mailText ) )
798 rc = deliverTo( g_user )
800 logMessage( 'Unable to deliver it to user `%s\'.' % g_user )
806 # Check if user was specified of command line.
814 logMessage( 'No user specified.' )
819 # Parse the mail using email python standard module.
829 g_mail = email.message_from_string( g_mailText , strict = False )
831 logMessage( getTraceBack() )
834 # Dispatch the mail to the correct folder (deduced from rules.)
836 # Return an error code.
840 action = checkUserRules( g_user )
845 # If custom error code is returned, stop processing
846 # here (mail is not saved.)
848 if isinstance( action , CustomErrorCodeAction ) :
849 logMessage( 'Custom exit code is %d.' % action.code )
853 # File the mail into the specified folder.
855 if isinstance( action , FileToFolderAction ) :
857 # We got a folder name (or None for default folder.)
859 folder = action.folder
861 if folder : pseudoName += '.' + folder
864 # Try to deliver in the named folder
866 rc = deliverTo( g_user , folder )
868 # If we get an error code, then we deliver the mail to default folder,
869 # except if the error was "data error" or if we already tried to deliver
870 # it to default folder.
872 if rc not in [ EX_OK , EX_DATAERR ] and folder != None :
873 logMessage( 'Error delivering to folder %s of user `%s\'.' % ( pseudoName , g_user ) )
874 logMessage( 'Mail will go into default folder.' )
875 rc = deliverTo( g_user )
879 # Here we also handle the case of EX_DATAERR not handled above.
882 logMessage( 'Error delivering to default folder of user `%s\'.' % ( g_user , ) )
884 # Since it's still not ok, backup the mail.
888 # All errors code different from "data error" are translated to
889 # "no user" error code.
893 if rc != EX_DATAERR :
903 raise Exception( 'Unknown action type' )
915 return dispatchMail()
919 logMessage( getTraceBack() )
922 #-----------------------------------------------------------------------------
924 def checkConfiguration( filename ) :
927 confparser.readConfiguration( filename )
928 except Exception , e :
931 print filename , 'ok'
933 #-----------------------------------------------------------------------------
939 Copyright (C) 2004, Frederic Jolliton <frederic@jolliton.com>
940 This is free software; see the source for copying conditions. There is NO
941 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.''' \
942 % ( myName , myVersion )
946 print '''Usage: mail.filter [OPTIONS] username < EMAIL
948 -h, --help Print this help.
949 -v, --verbose Verbose mode, output log to stdout.
950 -t, --test Test mode. Don't feed the mail to Cyrus, don't
951 do backup, and don't write anything into log file.
952 -l, --log=FILENAME Set log filename.
953 -r, --rules=DIRECTORY Directory where are located users rules.
954 -c, --check-config=FILENAME
955 Check syntax and structure of configuration file
957 --disable-backup Disable backup.
958 --version Output version information and exit.
961 print 'Current paths are:\n'
962 print ' spamprobe : %s' % g_pathSpamProbe
963 print ' clamd : %s' % g_pathClamdscan
964 print ' deliver : %s' % g_pathCyrusDeliver
965 print ' log : %s' % g_pathLog
967 print 'Current directories are:\n'
968 print ' spamprobedb: %s' % g_pathSpamProbeDb
969 print ' rules : %s' % g_directoryRules
971 Latest version is available from:
973 arch://arch.intra.tuxee.net/2004/mail-filter
975 Report bugs to <fj@tuxee.net>.'''
979 global g_user, g_mail, g_mailText, g_copyLogToStdout, g_pathLog, g_directoryRules , g_testMode, g_backupDisabled
981 #--[ Command line ]-------------------------------------------------------
985 _getopt = getopt.gnu_getopt
987 _getopt = getopt.getopt
990 options , parameters = \
991 _getopt( sys.argv[ 1 : ] ,
993 ( 'help' , 'verbose' , 'test' , 'log=' , 'rules=' ,
994 'check-config=' , 'disable-backup' , 'version' ) )
995 except getopt.GetoptError , e :
996 myName = sys.argv[ 0 ].split( '/' )[ -1 ]
997 print '%s: %s' % ( myName , e[ 0 ] )
998 print 'Try `%s --help\' for more information.' % myName
1001 for option , argument in options :
1002 if option in [ '-h' , '--help' ] :
1005 elif option in [ '-v' , '--verbose' ] :
1006 g_copyLogToStdout = True
1007 elif option in [ '-t' , '--test' ] :
1009 elif option in [ '-l' , '--log' ] :
1010 g_pathLog = argument
1011 elif option in [ '-r' , '--rules' ] :
1012 g_directoryRules = argument
1013 elif option in [ '-c' , '--check-config' ] :
1014 checkConfiguration( argument )
1016 elif option in [ '--disable-backup' ] :
1017 g_backupDisabled = True
1018 elif option in [ '--version' ] :
1023 # At most one parameter expected.
1025 if len( parameters ) > 1 :
1027 # We just log a error message. We continue to proceed
1028 # to not lost the mail !
1030 logMessage( 'Warning: Expected only one user name.' )
1033 g_user = parameters[ 0 ]
1035 #--[ Core ]---------------------------------------------------------------
1037 logMessage( 'Running mail.filter for user `%s\'.' % g_user )
1041 if __name__ == '__main__' :