The unified diff between revisions [584728dc..] and [600e1471..] is displayed below. It can also be downloaded as a raw diff.

#
#
# add_file "rblsummary.py"
#  content [694292ba787f609475800c1551cb94be95957058]
#
# patch "twistedrbl.py"
#  from [0728368049f1d95643adeff4b30222de02b376f3]
#    to [60c9ddd5584ef80d61332138e79d1f43fee73fd0]
#
#   set "rblsummary.py"
#  attr "mtn:execute"
# value "true"
#
============================================================
--- rblsummary.py	694292ba787f609475800c1551cb94be95957058
+++ rblsummary.py	694292ba787f609475800c1551cb94be95957058
@@ -0,0 +1,77 @@
+#!/usr/bin/env python2.4
+
+import time
+import csv
+import sys
+from twistedrbl import rbls_to_check
+
+def summarise(lines):
+    class RBLStats:
+        def __init__(self, domain):
+            self.domain = domain
+            self.reset_counters()
+        def reset_counters(self):
+            self.kas_y, self.kas_n = 0, 0
+            self.y_agree, self.n_agree, self.y_disagree, self.n_disagree = 0, 0, 0, 0
+        def update(self, kas_is_spam, our_is_spam):
+            if kas_is_spam:
+                self.kas_y += 1
+                if our_is_spam:
+                    self.y_agree += 1
+                else:
+                    self.n_disagree += 1
+            else:
+                self.kas_n += 1
+                if our_is_spam:
+                    self.y_disagree += 1
+                else:
+                    self.n_agree += 1
+
+    writer = csv.writer(sys.stdout)
+    def write_header(rbls):
+        row = ["Starting Time", "KAS Total", "Kas Y", "Kas N"]
+        for rbl in rbls:
+            row.append("%s Y agree" % rbl.domain)
+            row.append("%s Y disagree" % rbl.domain)
+            row.append("%s N agree" % rbl.domain)
+            row.append("%s N disagree" % rbl.domain)
+        writer.writerow(row)
+        sys.stdout.flush()
+
+    def write_update(rbls, starting_at):
+        row = [time.ctime(starting_at)]
+        kas_y = rbls[0].kas_y
+        kas_n = rbls[0].kas_n
+        kas_total = kas_y + kas_n
+        row += [kas_total, kas_y, kas_n]
+        for rbl in rbls:
+            row.append(rbl.y_agree)
+            row.append(rbl.y_disagree)
+            row.append(rbl.n_agree)
+            row.append(rbl.n_disagree)
+            rbl.reset_counters()
+        writer.writerow(row)
+        sys.stdout.flush()
+
+    last_summary = None
+    rblstats = {}
+    for blacklist in rbls_to_check:
+        rblstats[blacklist] = RBLStats(blacklist)
+    for line in lines:
+        timestamp, blacklist, ip_address, in_kas, in_rbl = line.strip().split(' ')
+        timestamp = int(timestamp)
+        if last_summary == None:
+            write_header(rblstats.values())
+            last_summary = timestamp
+        elif timestamp - last_summary > 10 * 60:
+            write_update(rblstats.values(), last_summary)
+            last_summary = timestamp
+        if not rblstats.has_key(blacklist):
+            rblstats[blacklist] = RBLStats(blacklist)
+        rblstats[blacklist].update(in_kas=='in_kas=True', in_rbl=='in_rbl=True')
+    # left-over data..
+    write_update(rblstats.values(), last_summary)
+
+if __name__ == '__main__':
+    # pre-sort this file!!
+    summarise(sys.stdin)
============================================================
--- twistedrbl.py	0728368049f1d95643adeff4b30222de02b376f3
+++ twistedrbl.py	60c9ddd5584ef80d61332138e79d1f43fee73fd0
@@ -6,6 +6,7 @@ import datetime, time
 import fcntl, os, sys, re
 import datetime, time

+rbls_to_check = ['bl.spamcop.net', 'sbl-xbl.spamhaus.org', 'dnsbl.sorbs.net']
 spam_info_re = re.compile(r'^(?P<date>[^ ]+ [^ ]+ [^ ]+) (?P<scannerhost>[^ ]+) filter-module\[(?P<pid>\d+)\]: (?P<queueid>\w+): spam_status=(?P<spamstatus>[^ ]+) \(from=(?P<from>[^ ]*) to=(?P<to>[^ ]*) uwa_client_ip= ?(?P<ip>[\d\.]+) \((?P<class>[^\)]+)\) message_id= ?(?P<message_id>.*)$')

 def set_nonblocking(fd):
@@ -40,7 +41,7 @@ class Echo(basic.LineReceiver):
         if match:
             match = match.groupdict()
             ip_address = match['ip']
-            for rbl in ['bl.spamcop.net', 'sbl-xbl.spamhaus.org', 'dnsbl.sorbs.net']:
+            for rbl in rbls_to_check:
                 self.track += 1
                 ipr = '.'.join(reversed(ip_address.split('.'))) + '.' + rbl
                 log_fd.write('%s (%d) issued\n' % (ipr, self.track))