Below is the file 'rblsummary.py' from this revision. You can also download the file.

#!/usr/bin/env python2.4

import time
import csv
import sys
from twistedrbl import rbls_to_check

def summarise(lines):
    class RBLStats:
        def __init__(self, domain):
            self.domain = domain
            self.reset_counters()
        def reset_counters(self):
            self.kas_y, self.kas_n = 0, 0
            self.y_agree, self.n_agree, self.y_disagree, self.n_disagree = 0, 0, 0, 0
        def update(self, kas_is_spam, our_is_spam):
            if kas_is_spam:
                self.kas_y += 1
                if our_is_spam:
                    self.y_agree += 1
                else:
                    self.n_disagree += 1
            else:
                self.kas_n += 1
                if our_is_spam:
                    self.y_disagree += 1
                else:
                    self.n_agree += 1

    writer = csv.writer(sys.stdout)
    def write_header(rbls):
        row = ["Starting Time", "KAS Total", "Kas Y", "Kas N"]
        for rbl in rbls:
            row.append("%s Y agree" % rbl.domain)
            row.append("%s Y disagree" % rbl.domain)
            row.append("%s N agree" % rbl.domain)
            row.append("%s N disagree" % rbl.domain)
        writer.writerow(row)
        sys.stdout.flush()

    def write_update(rbls, starting_at):
        row = [time.ctime(starting_at)]
        kas_y = rbls[0].kas_y
        kas_n = rbls[0].kas_n
        kas_total = kas_y + kas_n
        row += [kas_total, kas_y, kas_n]
        for rbl in rbls:
            row.append(rbl.y_agree)
            row.append(rbl.y_disagree)
            row.append(rbl.n_agree)
            row.append(rbl.n_disagree)
            rbl.reset_counters()
        writer.writerow(row)
        sys.stdout.flush()

    last_summary = None
    rblstats = {}
    for blacklist in rbls_to_check:
        rblstats[blacklist] = RBLStats(blacklist)
    for line in lines:
        try:
            timestamp, blacklist, ip_address, from_domain, in_kas, in_rbl = line.strip().split(' ')
        except:
            continue
        timestamp = int(timestamp)
        if last_summary == None:
            write_header(rblstats.values())
            last_summary = timestamp
        elif timestamp - last_summary > 10 * 60:
            write_update(rblstats.values(), last_summary)
            last_summary = timestamp
        if not rblstats.has_key(blacklist):
            rblstats[blacklist] = RBLStats(blacklist)
        rblstats[blacklist].update(in_kas=='in_kas=True', in_rbl=='in_rbl=True')
    # left-over data..
    write_update(rblstats.values(), last_summary)

if __name__ == '__main__':
    # pre-sort this file!!
    summarise(sys.stdin)