The unified diff between revisions [8f58bb18..] and [22f61b7a..] is displayed below. It can also be downloaded as a raw diff.
#
#
# patch "potd/potd.py"
# from [624277921b79af3ff71f6713d69d7454ea3e30e1]
# to [c866ec6319f546df90f14ed9584bf3c01fe4c8a8]
#
============================================================
--- potd/potd.py 624277921b79af3ff71f6713d69d7454ea3e30e1
+++ potd/potd.py c866ec6319f546df90f14ed9584bf3c01fe4c8a8
@@ -1,16 +1,17 @@
-#!/usr/bin/env python2.4
+#!/usr/bin/env python
import urlparse
import urllib2
import pickle
import urllib
+import heapq
import time
import sha
import sys
import re
import os
-picture_uri = 'http://en.wikipedia.org/wiki/Wikipedia:Featured_pictures'
+picture_uri = 'http://en.wikipedia.org/w/index.php?title=Category:Wikipedia_featured_pictures'
cache_path = './cache/'
image_path = './images/'
tmp_path = './tmp/'
@@ -40,11 +41,24 @@ def pictures_of_the_day():
@cache_result()
def pictures_of_the_day():
+ heap = [picture_uri]
rv = []
- for line in retrieve_uri(picture_uri).readlines():
- m = re.match(r'^.*\"(/wiki/Image\:[^\"]+)\"', line)
- if m:
- rv.append("http://en.wikipedia.org" + m.groups()[0])
+ while heap:
+ uri = heapq.heappop(heap)
+ next_uri = None
+ for line in retrieve_uri(uri).readlines():
+ m = re.match(r'^.*href="([^"]+;from=[^"]+)', line)
+ if m:
+ candidate = "http://en.wikipedia.org" + m.groups()[0].replace('&', '&')
+ if uri == picture_uri or candidate.find(uri) == -1:
+ next_uri = candidate
+ continue
+ m = re.match(r'^.*\"(/wiki/Image\:[^\"]+)\"', line)
+ if m:
+ rv.append("http://en.wikipedia.org" + m.groups()[0])
+ continue
+ if next_uri is not None:
+ heapq.heappush(heap, next_uri)
return rv
@cache_result()