Below is the file 'ttparse.py' from this revision. You can also download the file.

#!/usr/bin/python
from sys import stdin
from string import find, split, strip, join
from datetime import date
import re
import calendar
from sets import Set
from random import random
from datetime import datetime, timedelta
from calendar import weekday
import sha
import cgi
import cgitb
from urllib2 import urlopen
from urllib import urlencode

cgitb.enable()

THISYEAR=2007
WEEKMUNGE=-1

yearstartoffset = weekday( THISYEAR, 1, 1 )

sem1 = None
sem2 = None

counter = 1

def getcgilines( ):

	reqfields = []
	form = cgi.FieldStorage()


	for z in form.keys():
		if len(z) >= 8 and z[0:8] == 'ttparse_':
			if isinstance(form[z], list):
				for l in form[z]:
					reqfields.append((z[8:], l.value))
			else:
				reqfields.append((z[8:], form[z].value))


	if isinstance(form['ttparse_url'], list):
		file = form['ttparse_url'][0].value
	else:
		file = form['ttparse_url'].value
	file = strip(file, "@;\\/%$&? ")

	urldata = urlencode(reqfields)
	url = urlopen("http://www.timetable.uwa.edu.au/Curr/%s" % file, urldata)
	return url.readlines()

def main( ):
	global sem1, sem2

	print "Content-type: text/calendar"
	print 'Content-Disposition: attachment; filename="timetable.ics"'
	print

	lines = getcgilines( )

	sem1 = getsem( lines, 1 )
	sem2 = getsem( lines, 2 )

	t1 = parsetable( lines, 1 )
	t2 = parsetable( lines, 2 )

	tt = []
	if len(t1) > 0:
		tt = tt + t1
	if len(t2) > 0:
		tt = tt + t2

	print makevcal( tt )

def makevcal( tt ):
	global counter

	out = "BEGIN:VCALENDAR\nVERSION:2.0\nPRODID:-//Matt Johnston//Timetable Scraper 1.0//EN"

	out = "%s\nBEGIN:VTIMEZONE\nTZID:Australia/Perth\nLAST-MODIFIED:20040229T101040Z\nBEGIN:STANDARD\nDTSTART:19321213T204552\nTZOFFSETTO:+0800\nTZOFFSETFROM:+0000\nTZNAME:WST\nEND:STANDARD\nEND:VTIMEZONE" % out

	for e in tt:
		hashstr = "%s %s %d %10.10f" % (str(e),\
						datetime(1,1,1).utcnow().isoformat(), \
						counter, \
						random()*10000000)
		hash = sha.new(hashstr)
		nowstamp = datetime(1,1,1).utcnow().strftime("%Y%m%dT%H%M%SZ")
		counter = counter + 1
		uid = "%s-ttparse@matt.ucc.asn.au" % hash.hexdigest()
		out = "%s\nBEGIN:VEVENT\nLOCATION:%s\nDTSTAMP:%s\nUID:%s" % \
				( out, e["location"], nowstamp, uid )

		out = "%s\nSEQUENCE:0\nSUMMARY:%s %s\n%s" % \
				( out, e["code"], e["type"], vcaldates( e ) )

		out = "%s\nDESCRIPTION:%s\n" % \
				( out, e["location"] )

		out = "%s\nEND:VEVENT" % out

	out = "%s\nEND:VCALENDAR" % out

	return out

def dateforweek( week, day, hour = 0, min = 0):
	global yearstartoffset
	delta = timedelta( days = day - yearstartoffset, weeks = week + WEEKMUNGE)
	date = datetime( THISYEAR, 1, 1, hour, min ) + delta
	return date.strftime("%Y%m%dT%H%M%S")

def vcaldates( e ):

	startdate = dateforweek( e["weeks"]["start"], e["day"], e["time"] )
	enddate = dateforweek( e["weeks"]["start"], e["day"], \
				e["time"] + e["duration"], 45)

	out = "DTSTART;TZID=Australia/Perth:%s\nDTEND;TZID=Australia/Perth:%s" % \
			( startdate, enddate )

	out = "%s\nRRULE:FREQ=WEEKLY;COUNT=%d" % \
			( out, e["weeks"]["end"] - e["weeks"]["start"] + 1)

	out = "%s\nEXDATE;TZID=Australia/Perth:%s" % \
			( out, join( \
				map( lambda w: dateforweek( w, e["day"], e["time"] ), \
						e["weeks"]["exlist"] ), "," ) )

	return out

#\nDTSTART;TZID=Australia/Perth:%s

def getsem( lines, num ):

	semdates = filter( lambda l: find( l, "<STRONG>Semester %d" % num ) != -1,\
				lines )[0]
	m = re.match(".* weeks (?P<a1>\d*) to (?P<a2>\d*) and (?P<b1>\d*) to (?P<b2>\d*).*", semdates)

	a1 = int(m.group("a1"))
	a2 = int(m.group("a2"))
	b1 = int(m.group("b1"))
	b2 = int(m.group("b2"))

	return getdates([(a1, a2), (b1, b2)])

# pass it a list of start/end week pairs, and get returned a dict
def getdates( datelist ):

	incset = Set([])
	start = 100000
	end = 0

	for s,e in datelist:
		if s < start:
			start = s
		if e > end:
			end = e
		incset = incset | Set(range(s, e+1))

	exlist = Set(range(start, end+1)) - incset

	ret = {}
	ret["start"] = start
	ret["end"] = end
	ret["exlist"] = exlist
	return ret

def event_cmp( x, y ):
	if x["day"] != y["day"]:
		return cmp(x["day"], y["day"])
	return cmp(x["time"], y["time"])

def aggregate_continued( eventlist ):

	if len(eventlist) < 2:
		return eventlist

	# sort sequentially
	eventlist.sort( event_cmp )

	newlist = [ eventlist[0] ]
	for e in eventlist[1:]:
		if e["cont"] and e["code"] == newlist[-1]["code"]:
			newlist[-1]["duration"] += 1
		else:
			newlist.append(e)

	return newlist

def parsetable( lines, sem ):

	timelist = {}
	eventlist = []
	tl = []

	# get just this semester's timetable
	foundtable = 0
	for l in lines:
		if foundtable:
			if find(l, "</table>") != -1:
				break
			tl.append(l)
			continue
		if find(l, "<p><font size=2>Semester %d</font>" % sem) != -1:
			foundtable = 1
			tl.append(l)

	rownum = 0
	colnum = 0

	for l in tl:
		if l[0:4] == "<tr>":
			rownum = rownum + 1
			colnum = 0
			continue

		if l[0:4] == "<td ":
			colnum = colnum + 1
			if colnum == 1:
				timelist[rownum] = rowtime( l )
				continue

			event = getevent( l )
			if event:
				event["row"] = rownum
				event["day"] = colnum - 2
				eventlist.append(event)

	for e in eventlist:
		e["time"] = timelist[e["row"]]

	eventlist = aggregate_continued( eventlist )

	return eventlist


def getevent( l ):
	m = re.match(".*<b>(?P<unitcode>\S+) .. (?P<type>.*)</b>.*(?P<weeks>(Sem|Wk).*)</font>.*\[(?P<place>.*)\].*", l)
	if not m:
		return
	event = {}
	event["code"] = m.group("unitcode")
	event["type"] = m.group("type")
	event["location"] = m.group("place")
	event["weeks" ] = parseweeks( m.group("weeks") )
	event["cont"] = "(cont)" in l
	event["duration"] = 0
	return event


def parseweeks( s ):

	global sem1, sem2

	weeks = []

	s = strip(s)

	if s == "Sem1":
		return sem1

	if s == "Sem2":
		return sem2

	s = strip(s, "Wks ")
	for w in [split(z, "-") for z in split(s, ",")]:
		if len(w) == 1:
			weeks.append((int(w[0]), int(w[0])))
		else:
			weeks.append((int(w[0]), int(w[1])))

	return getdates(weeks)

def rowtime( l ):

	m = re.match(".*<b>(?P<time>.*) (?P<ampm>.M)</.*", l)
	time = int(m.group("time"))
	if m.group("ampm") == "PM" and time != 12:
		time = time + 12

	return time



if __name__ == '__main__':
	main()