Below is the file 'ttparse.py' from this revision. You can also download the file.
#!/usr/bin/python from sys import stdin from string import find, split, strip, join from datetime import date import re import calendar from sets import Set from random import random from datetime import datetime, timedelta from calendar import weekday import sha import cgi import cgitb from urllib2 import urlopen from urllib import urlencode cgitb.enable() THISYEAR=2007 WEEKMUNGE=-1 yearstartoffset = weekday( THISYEAR, 1, 1 ) sem1 = None sem2 = None counter = 1 def getcgilines( ): reqfields = [] form = cgi.FieldStorage() for z in form.keys(): if len(z) >= 8 and z[0:8] == 'ttparse_': if isinstance(form[z], list): for l in form[z]: reqfields.append((z[8:], l.value)) else: reqfields.append((z[8:], form[z].value)) if isinstance(form['ttparse_url'], list): file = form['ttparse_url'][0].value else: file = form['ttparse_url'].value file = strip(file, "@;\\/%$&? ") urldata = urlencode(reqfields) url = urlopen("http://www.timetable.uwa.edu.au/Curr/%s" % file, urldata) return url.readlines() def main( ): global sem1, sem2 print "Content-type: text/calendar" print 'Content-Disposition: attachment; filename="timetable.ics"' print lines = getcgilines( ) sem1 = getsem( lines, 1 ) sem2 = getsem( lines, 2 ) t1 = parsetable( lines, 1 ) t2 = parsetable( lines, 2 ) tt = [] if len(t1) > 0: tt = tt + t1 if len(t2) > 0: tt = tt + t2 print makevcal( tt ) def makevcal( tt ): global counter out = "BEGIN:VCALENDAR\nVERSION:2.0\nPRODID:-//Matt Johnston//Timetable Scraper 1.0//EN" out = "%s\nBEGIN:VTIMEZONE\nTZID:Australia/Perth\nLAST-MODIFIED:20040229T101040Z\nBEGIN:STANDARD\nDTSTART:19321213T204552\nTZOFFSETTO:+0800\nTZOFFSETFROM:+0000\nTZNAME:WST\nEND:STANDARD\nEND:VTIMEZONE" % out for e in tt: hashstr = "%s %s %d %10.10f" % (str(e),\ datetime(1,1,1).utcnow().isoformat(), \ counter, \ random()*10000000) hash = sha.new(hashstr) nowstamp = datetime(1,1,1).utcnow().strftime("%Y%m%dT%H%M%SZ") counter = counter + 1 uid = "%s-ttparse@matt.ucc.asn.au" % hash.hexdigest() out = "%s\nBEGIN:VEVENT\nLOCATION:%s\nDTSTAMP:%s\nUID:%s" % \ ( out, e["location"], nowstamp, uid ) out = "%s\nSEQUENCE:0\nSUMMARY:%s %s\n%s" % \ ( out, e["code"], e["type"], vcaldates( e ) ) out = "%s\nDESCRIPTION:%s\n" % \ ( out, e["location"] ) out = "%s\nEND:VEVENT" % out out = "%s\nEND:VCALENDAR" % out return out def dateforweek( week, day, hour = 0, min = 0): global yearstartoffset delta = timedelta( days = day - yearstartoffset, weeks = week + WEEKMUNGE) date = datetime( THISYEAR, 1, 1, hour, min ) + delta return date.strftime("%Y%m%dT%H%M%S") def vcaldates( e ): startdate = dateforweek( e["weeks"]["start"], e["day"], e["time"] ) enddate = dateforweek( e["weeks"]["start"], e["day"], \ e["time"] + e["duration"], 45) out = "DTSTART;TZID=Australia/Perth:%s\nDTEND;TZID=Australia/Perth:%s" % \ ( startdate, enddate ) out = "%s\nRRULE:FREQ=WEEKLY;COUNT=%d" % \ ( out, e["weeks"]["end"] - e["weeks"]["start"] + 1) out = "%s\nEXDATE;TZID=Australia/Perth:%s" % \ ( out, join( \ map( lambda w: dateforweek( w, e["day"], e["time"] ), \ e["weeks"]["exlist"] ), "," ) ) return out #\nDTSTART;TZID=Australia/Perth:%s def getsem( lines, num ): semdates = filter( lambda l: find( l, "<STRONG>Semester %d" % num ) != -1,\ lines )[0] m = re.match(".* weeks (?P<a1>\d*) to (?P<a2>\d*) and (?P<b1>\d*) to (?P<b2>\d*).*", semdates) a1 = int(m.group("a1")) a2 = int(m.group("a2")) b1 = int(m.group("b1")) b2 = int(m.group("b2")) return getdates([(a1, a2), (b1, b2)]) # pass it a list of start/end week pairs, and get returned a dict def getdates( datelist ): incset = Set([]) start = 100000 end = 0 for s,e in datelist: if s < start: start = s if e > end: end = e incset = incset | Set(range(s, e+1)) exlist = Set(range(start, end+1)) - incset ret = {} ret["start"] = start ret["end"] = end ret["exlist"] = exlist return ret def event_cmp( x, y ): if x["day"] != y["day"]: return cmp(x["day"], y["day"]) return cmp(x["time"], y["time"]) def aggregate_continued( eventlist ): if len(eventlist) < 2: return eventlist # sort sequentially eventlist.sort( event_cmp ) newlist = [ eventlist[0] ] for e in eventlist[1:]: if e["cont"] and e["code"] == newlist[-1]["code"]: newlist[-1]["duration"] += 1 else: newlist.append(e) return newlist def parsetable( lines, sem ): timelist = {} eventlist = [] tl = [] # get just this semester's timetable foundtable = 0 for l in lines: if foundtable: if find(l, "</table>") != -1: break tl.append(l) continue if find(l, "<p><font size=2>Semester %d</font>" % sem) != -1: foundtable = 1 tl.append(l) rownum = 0 colnum = 0 for l in tl: if l[0:4] == "<tr>": rownum = rownum + 1 colnum = 0 continue if l[0:4] == "<td ": colnum = colnum + 1 if colnum == 1: timelist[rownum] = rowtime( l ) continue event = getevent( l ) if event: event["row"] = rownum event["day"] = colnum - 2 eventlist.append(event) for e in eventlist: e["time"] = timelist[e["row"]] eventlist = aggregate_continued( eventlist ) return eventlist def getevent( l ): m = re.match(".*<b>(?P<unitcode>\S+) .. (?P<type>.*)</b>.*(?P<weeks>(Sem|Wk).*)</font>.*\[(?P<place>.*)\].*", l) if not m: return event = {} event["code"] = m.group("unitcode") event["type"] = m.group("type") event["location"] = m.group("place") event["weeks" ] = parseweeks( m.group("weeks") ) event["cont"] = "(cont)" in l event["duration"] = 0 return event def parseweeks( s ): global sem1, sem2 weeks = [] s = strip(s) if s == "Sem1": return sem1 if s == "Sem2": return sem2 s = strip(s, "Wks ") for w in [split(z, "-") for z in split(s, ",")]: if len(w) == 1: weeks.append((int(w[0]), int(w[0]))) else: weeks.append((int(w[0]), int(w[1]))) return getdates(weeks) def rowtime( l ): m = re.match(".*<b>(?P<time>.*) (?P<ampm>.M)</.*", l) time = int(m.group("time")) if m.group("ampm") == "PM" and time != 12: time = time + 12 return time if __name__ == '__main__': main()