2 # -*- coding: iso-8859-1 -*-
5 # MailFilter - Mail filter to replace procmail.
6 # Copyright (C) 2004 Frédéric Jolliton <frederic@jolliton.com>
8 # This program is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 # Policy when error are encountered:
26 # We backup the mail in a special directory. It will be
27 # at the admin discretion to feed it again to this program
28 # (or may be script that.)
34 # [ ] Define precisely what return code use for each possible case.
46 from os import EX_USAGE, EX_OK, EX_NOUSER, EX_TEMPFAIL, EX_DATAERR
48 # EX_USAGE 64 command line usage error
49 # EX_DATAERR 65 data format error
50 # EX_NOINPUT 66 cannot open input
51 # EX_NOUSER 67 addressee unknown
52 # EX_NOHOST 68 host name unknown
53 # EX_UNAVAILABLE 69 service unavailable
54 # EX_SOFTWARE 70 internal software error
55 # EX_OSERR 71 system error (e.g., can't fork)
56 # EX_OSFILE 72 critical OS file missing
57 # EX_CANTCREAT 73 can't create (user) output file
58 # EX_IOERR 74 input/output error
59 # EX_TEMPFAIL 75 temp failure; user is invited to retry
60 # EX_PROTOCOL 76 remote error in protocol
61 # EX_NOPERM 77 permission denied
62 # EX_CONFIG 78 configuration error
65 # Path to subprocess module. Ideally not needed if subprocess
66 # (formerly popen5) is installed into /site-packages/ directory.
68 #sys.path.insert( 0 , '/usr/local/lib/python/' )
71 # subprocess (formerly popen5) - See PEP 324
72 # http://www.lysator.liu.se/~astrand/popen5/
74 # >>> cat = subprocess.Popen( 'cat' , stdin = subprocess.PIPE , stdout = subprocess.PIPE )
75 # >>> cat.communicate( 'bla' )
84 import popen5 as subprocess
86 print 'Please install subprocess module.'
87 print 'See http://www.lysator.liu.se/~astrand/popen5/.'
89 #--[ Configuration variables ]------------------------------------------------
92 # Filename where to put log.
94 g_pathLog = '/var/log/mail.filter.log'
97 # For which users receiving a mail should we send a UDP packet.
99 g_userNotificationFilter = [ 'fred' ]
102 # For which IP address should we send the notification.
103 # (Can include broadcast address.)
105 g_notificationAddresses = [ '192.168.1.255' ]
108 # On which port should we send the notification.
110 g_notificationPort = 23978
113 # Max mail size to be processed by this script.
115 # Larger mail are just not filtered.
117 g_maxMailSize = 2 * 1024 * 1024
120 # Where to save copy of mail in case of error.
122 g_directoryBackup = '/var/mail.filter/recovery/'
125 # Where to find rules about each user.
127 # Filename for user 'joe' will be named 'joe.mf' in that
128 # directory. If the file doesn't exist, no filtering is
129 # done (not even spam/virus filtering.)
131 g_directoryRules = '/var/mail.filter/rules/'
133 #--[ External commands ]------------------------------------------------------
136 # Path to Cyrus's deliver binary.
138 g_pathCyrusDeliver = '/usr/cyrus/bin/deliver'
141 # Path to spamprobe binary.
143 g_pathSpamProbe = '/usr/bin/spamprobe'
145 g_pathSpamProbeDb = '/var/spamprobe/db'
148 # Path to ClamAV binary.
150 # Could point either to 'clamdscan' or 'clamscan'.
152 # The first one is *HIGHLY* recommended since
153 # it will use the ClamAV daemon.
155 g_pathClamdscan = '/usr/bin/clamdscan'
157 #--[ Global variables ]-------------------------------------------------------
160 # Should the log be also printed on stdout ?
162 g_copyLogToStdout = False
165 # Don't actually feed the mail to Cyrus.
170 # The user name of the recipient.
175 # The current mail as string (as read from stdin.)
180 # The current mail as email.Message.Message object.
184 #-----------------------------------------------------------------------------
187 # check if predicate is True for all items in list 'lst'.
189 def all( lst , predicate ) :
192 if not predicate( item ) :
197 # check if predicate is True for at least one item in list 'lst'.
199 def some( lst , predicate ) :
202 if predicate( item ) :
207 # Remove leading and trailing blank, and replace any
208 # blank character sequence by one space character.
210 def normalizeBlank( s ) :
212 return ' '.join( s.split() )
214 #-----------------------------------------------------------------------------
217 # Utility function to return traceback as string from most recent
222 import traceback, sys
223 return ''.join( traceback.format_exception( *sys.exc_info() ) )
226 # Return (returnCode, stdout, stderr)
228 def pipe( cmd , input ) :
230 p = subprocess.Popen( cmd ,
231 stdin = subprocess.PIPE ,
232 stdout = subprocess.PIPE ,
233 stderr = subprocess.PIPE )
235 # much faster than passing 'input' to communicate directly..
236 p.stdin.write( input )
240 return p.returncode , r[ 0 ] , r[ 1 ]
243 # Return an ISO-8661 date representation for the UTC
246 # timestamp( 0 ) => 1970-01-01T00:00:00Z
251 return '%04d-%02d-%02dT%02d:%02d:%02dZ' % t[ : 6 ]
256 def logMessage( msg ) :
258 if not logMessage.logFile and not g_testMode :
260 # If log file is not yet open, try to open it.
263 logMessage.logFile = open( g_pathLog , 'a+' )
267 msg = msg.splitlines()
268 prefix = timestamp() + ' [%s] ' % os.getpid()
271 # Output to log file.
273 if logMessage.logFile :
278 logMessage.logFile.write( line + '\n' )
279 logMessage.logFile.flush()
284 # Output to standard output.
286 if g_copyLogToStdout :
290 sys.stdout.write( line + '\n' )
293 logMessage.logFile = None
296 # Make a backup of the mail (in case it's impossible
297 # to store the mail to Cyrus.)
299 def backup( filenamePrefix = None ) :
302 logMessage( 'TEST MODE: Backup of the mail requested.' )
306 # Ensure directory exist
308 os.makedirs( g_directoryBackup )
314 basename += filenamePrefix + '-'
316 # Append current unix time as suffix
318 basename += '%.3f' % time.time()
320 fn = g_directoryBackup + '/' + basename
322 f = open( fn , 'a+' )
323 f.write( g_mailText )
326 logMessage( 'Error saving backup copy.' )
328 logMessage( 'Message appended to backup directory as `%s\'.' % basename )
330 #-----------------------------------------------------------------------------
334 class NullAction( Action ) :
336 def __repr__( self ) :
338 return '<NullAction>'
340 class FileToFolderAction( Action ) :
342 def __init__( self , folder ) :
346 def __repr__( self ) :
348 return '<FileToFolderAction %r>' % ( self.folder , )
350 class CustomErrorCodeAction( Action ) :
352 def __init__( self , code ) :
356 def __repr__( self ) :
358 return '<NullAction %r>' % ( self.code , )
360 #-----------------------------------------------------------------------------
365 # Packet payload contains:
367 # <username> + char( 0 ) + <foldername> + char( 0 )
369 def notifyDeliver( user , folder ) :
371 if user not in g_userNotificationFilter :
375 s = socket.socket( socket.AF_INET , socket.SOCK_DGRAM )
376 msg = user + chr( 0 ) + folder + chr( 0 )
377 for address in g_notificationAddresses :
378 s.sendto( msg , ( address , g_notificationPort ) )
383 # Deliver a mail to Cyrus for user 'username' in
384 # folder 'folderName' (or default folder if not
387 def deliverTo( username , folderName = None ) :
390 pseudoFolderName = 'INBOX'
391 folderName = 'user.' + username
393 pseudoFolderName = 'INBOX.' + folderName
394 folderName = 'user.' + username + '.' + folderName
397 # Build the command line for running deliver.
399 cmd = [ g_pathCyrusDeliver ]
400 cmd += [ '-a' , username ]
401 cmd += [ '-m' , folderName ]
404 logMessage( 'TEST MODE: Delivering mail in `%s\' requested.' % ( folderName , ) )
405 logMessage( 'TEST MODE: Command: %r.' % cmd )
409 rc , stdout , stderr = pipe( cmd , g_mailText )
411 logMessage( 'Error running `%s\': %s.' % ( cmd[ 0 ] , e[ 1 ] ) )
415 logMessage( 'Message delivered in folder `%s\'.' % folderName )
416 notifyDeliver( username , pseudoFolderName )
418 errorMessage = stdout.rstrip()
420 # Extract raw error message
424 # +user.fred: Message contains invalid header
426 m = errorMessage.split( ': ' , 1 )
432 if m == 'Message contains invalid header' :
435 elif m == 'Mailbox does not exist' :
439 # FIXME: DATAERR ok here ?
441 logMessage( 'Refused by Cyrus: [%s] `%s\'.' % ( rcMsg , errorMessage ) )
444 #--[ Antivirus ]--------------------------------------------------------------
447 # Return virus list from the output of ClamAV.
449 def extractVirusList( clamdOutput ) :
452 for line in clamdOutput.splitlines() :
453 r = extractVirusList.reClamdVirus.search( line.rstrip() )
454 if r == None : continue
455 res.append( r.group( 1 ) )
458 extractVirusList.reClamdVirus = re.compile( r'^[^:]+: (\S+) FOUND$' )
463 # Return True if mail is clean.
465 def antivirusScan() :
467 cmd = [ g_pathClamdscan , '-' ]
470 logMessage( 'TEST MODE: Virus scan requested.' )
471 logMessage( 'TEST MODE: Command: %r.' % cmd )
474 rc , stdout , stderr = pipe( cmd , g_mailText )
475 output = stderr or ''
476 #logMessage( 'clamdscan returned %s' % rc )
478 raise 'Unable to scan for viruses (%s)' % cmd
482 viruses = extractVirusList( output )
484 msg += ' [%s]' % ' '.join( viruses )
488 #--[ Antispam ]---------------------------------------------------------------
493 # Return True if mail is correct.
497 if not g_user : return True
499 cmd = [ g_pathSpamProbe ]
500 cmd += [ '-d' , g_pathSpamProbeDb + '/' + g_user + '/' ]
504 logMessage( 'TEST MODE: Spam scan requested.' )
505 logMessage( 'TEST MODE: Command: %r.' % cmd )
508 rc , stdout , stderr = pipe( cmd , g_mailText )
509 r = ( stdout or '' ).split()
510 return r[ 0 ] != 'SPAM'
512 #-----------------------------------------------------------------------------
514 def errorNameToErrorCode( code ) :
517 if code == 'nouser' :
519 elif code == 'tempfail' :
521 elif code == 'dataerr' :
529 #-----------------------------------------------------------------------------
532 # FIXME: I think it could be better to cache the parsed
533 # configuration, and also to cache the result of the validator
534 # so that we don't run the test each time this script is run !
536 def readUserRules( user ) :
540 # Read the configuration file.
543 f = open( g_directoryRules + '/' + user + '.mf' )
549 # Parse the configuration.
551 conf = confparser.parse( conf )
554 # Validate the configuration.
556 r = mfvalidator.checkConf( conf )
561 # Output a error message in case an error is encountered.
565 msg = 'at line %s, column %s' % ( meta[ 0 ] , meta[ 1 ] )
566 logMessage( 'Error in configuration file %s: %s' % ( msg , exception ) )
568 #-----------------------------------------------------------------------------
571 # Test a match rule against a particular header.
573 def ruleMatch( header , matchType , text ) :
575 if matchType == 'match' :
577 return re.search( text , header , re.I ) != None
579 logMessage( 'Error with regex `%s\' from %s\'s user configuration.' % ( text , g_user ) )
581 elif matchType == 'is' :
582 return header.strip().lower() == text.strip().lower()
583 elif matchType == 'contains' :
584 return header.lower().find( text.strip().lower() ) != -1
586 logMessage( 'Unknown match type `%s\' from %s\'s user configuration.' % ( matchType , g_user ) )
590 # Test rule 'rule' against the mail.
592 def testRule( rule ) :
597 return all( rule[ 2 ] , testRule )
600 return some( rule[ 2 ] , testRule )
603 return not some( rule[ 2 ] , testRule )
612 headerName , matchType , text = args
613 headers = map( normalizeBlank , g_mail.get_all( headerName ) or [] )
614 return some( headers , lambda header : ruleMatch( header , matchType , text ) )
620 return g_mail == None
625 if cmd == 'infected' :
626 return g_mail != None and not antivirusScan()
632 return g_mail != None and not spamScan()
637 logMessage( 'Unknown rule name `%s\'.' % ( cmd , ) )
640 #-----------------------------------------------------------------------------
643 # Find the destination folder for user 'user' according to rules defined for
644 # him/her against the current mail.
648 def checkUserRules( user ) :
650 action = FileToFolderAction( None )
652 conf = readUserRules( user )
656 logMessage( 'No rules defined for user `%s\'.' % user )
660 logMessage( 'Empty rules set or syntax error encountered for user `%s\'.' % user )
664 for item in conf[ 2 ] :
665 actionName , args , subs = item[ : 3 ]
667 if some( subs , testRule ) :
668 if actionName == 'folder' :
669 action = FileToFolderAction( args[ 0 ] )
670 elif actionName == 'reject' :
671 action = CustomErrorCodeAction( errorNameToErrorCode( args[ 0 ] ) )
673 logMessage( 'Unknown action `%s\'.' % actionName )
678 #-----------------------------------------------------------------------------
681 # Read mail from standard input.
688 # FIXME: Should we be reading the mail by block, so that
689 # we can at least read and backup a part of the standard input
690 # in case an error occur ? (broken pipe for example)
692 # If error occur, and since we can't backup the mail,
693 # we ask sendmail to retry later.
696 g_mailText = sys.stdin.read()
698 logMessage( getTraceBack() )
699 sys.exit( EX_TEMPFAIL )
702 # Check if the mail is bigger than a predefined amount.
704 def checkForLargeMail() :
706 if len( g_mailText ) > g_maxMailSize :
707 logMessage( 'Message too big (%s bytes). Not filtering it.' % len( g_mailText ) )
710 rc = deliverTo( g_user )
712 logMessage( 'Unable to deliver it to user `%s\'.' % g_user )
718 # Check if user was specified of command line.
726 logMessage( 'No user specified.' )
731 # Parse the mail using email python standard module.
741 g_mail = email.message_from_string( g_mailText , strict = False )
743 logMessage( getTraceBack() )
746 # Dispatch the mail to the correct folder (deduced from rules.)
748 # Return an error code.
752 action = checkUserRules( g_user )
755 # If custom error code is returned, stop processing
756 # here (mail is not saved.)
758 if isinstance( action , CustomErrorCodeAction ) :
759 logMessage( 'Custom exit code is %d.' % r.code )
763 # File the mail into the specified folder.
765 if isinstance( action , FileToFolderAction ) :
767 # We got a folder name (or None for default folder.)
769 folder = action.folder
771 if folder : pseudoName += '.' + folder
774 # Try to deliver in the named folder
776 rc = deliverTo( g_user , folder )
778 # If we get an error code, then we deliver the mail to default folder,
779 # except if the error was "data error" or if we already tried to deliver
780 # it to default folder.
782 if rc not in [ EX_OK , EX_DATAERR ] and folder != None :
783 logMessage( 'Error delivering to folder %s of user `%s\'.' % ( pseudoName , g_user ) )
784 logMessage( 'Mail will go into default folder.' )
785 rc = deliverTo( g_user )
789 # Here we also handle the case of EX_DATAERR not handled above.
792 logMessage( 'Error delivering to default folder of user `%s\'.' % ( g_user , ) )
794 # Since it's still not ok, backup the mail.
798 # All errors code different from "data error" are translated to
799 # "no user" error code.
803 if rc != EX_DATAERR :
813 raise Exception( 'Unknown action type' )
825 return dispatchMail()
827 logMessage( getTraceBack() )
830 #-----------------------------------------------------------------------------
832 def checkConfiguration( filename ) :
834 mfvalidator.checkFile( filename )
836 #-----------------------------------------------------------------------------
840 print '''Usage: mail.filter [OPTIONS] username < EMAIL
842 -h, --help Print this help.
843 -v, --verbose Verbose mode, output log to stdout.
844 -t, --test Test mode. Don't feed the mail to Cyrus, don't
845 do backup, and don't write anything into log file.
846 -l, --log=FILENAME Set log filename.
847 -r, --rules=DIRECTORY Directory where are located users rules.
848 -c, --check-config=FILENAME
849 Check syntax and structure of configuration file
853 print 'Current paths are:\n'
854 print ' spamprobe : %s' % g_pathSpamProbe
855 print ' clamd : %s' % g_pathClamdscan
856 print ' deliver : %s' % g_pathCyrusDeliver
857 print ' log : %s' % g_pathLog
859 print 'Current directories are:\n'
860 print ' spamprobedb: %s' % g_pathSpamProbeDb
861 print ' rules : %s' % g_directoryRules
863 Latest version is available from:
865 arch://arch.intra.tuxee.net/2004/mail-filter
867 Report bugs to <fj@tuxee.net>.'''
871 global g_user, g_mail, g_mailText, g_copyLogToStdout, g_pathLog, g_directoryRules , g_testMode
873 #--[ Command line ]-------------------------------------------------------
877 _getopt = getopt.gnu_getopt
879 _getopt = getopt.getopt
882 options , parameters = \
883 _getopt( sys.argv[ 1 : ] ,
885 ( 'help' , 'verbose' , 'test' , 'log=' , 'rules=' , 'check-config=' ) )
886 except getopt.GetoptError , e :
887 myName = sys.argv[ 0 ].split( '/' )[ -1 ]
888 print '%s: %s' % ( myName , e[ 0 ] )
889 print 'Try `%s --help\' for more information.' % myName
892 for option , argument in options :
893 if option in [ '-h' , '--help' ] :
896 elif option in [ '-v' , '--verbose' ] :
897 g_copyLogToStdout = True
898 elif option in [ '-t' , '--test' ] :
900 elif option in [ '-l' , '--log' ] :
902 elif option in [ '-r' , '--rules' ] :
903 g_directoryRules = argument
904 elif option in [ '-c' , '--check-config' ] :
905 checkConfiguration( argument )
909 # At most one parameter expected.
911 if len( parameters ) > 1 :
913 # We just log a error message. We continue to proceed
914 # to not lost the mail !
916 logMessage( 'Warning: Expected only one user name.' )
919 g_user = parameters[ 0 ]
921 #--[ Core ]---------------------------------------------------------------
923 logMessage( 'Running mail.filter for user `%s\'.' % g_user )
927 if __name__ == '__main__' :