The unified diff between revisions [092dff1e..] and [0eaf66b5..] is displayed below. It can also be downloaded as a raw diff.

This diff has been restricted to the following files: 'generators.py'

#
#
# patch "generators.py"
#  from [1f4da17464aa9497077d16c27ccb84e60453057d]
#    to [550b478b5c306ae773cd42e8e5aff775cb0b8aed]
#
============================================================
--- generators.py	1f4da17464aa9497077d16c27ccb84e60453057d
+++ generators.py	550b478b5c306ae773cd42e8e5aff775cb0b8aed
@@ -4,6 +4,7 @@ import libxml2
 import sha
 import socket
 import libxml2
+import urlparse

 s_splitter = re.compile(r'[\.\!]+')
 w_splitter = re.compile(r'[\,\;\"\t\-\(\)\* ]+')
@@ -48,9 +49,10 @@ class Callback:
 	return rv

 class Callback:
-	def __init__(self, queue, article_ids):
+	def __init__(self, queue, article_ids, new_article_ids):
 		self.queue = queue
 		self.article_ids = article_ids
+		self.new_article_ids = new_article_ids
 	def startDocument(self):
 		self.gather_chars = False
 		self.in_item = False
@@ -61,7 +63,8 @@ class Callback:
 		elif self.in_guid: self.guid += data
 	def endDocument(self): pass
 	def startElement(self, tag, attrs):
-		if tag == "item": self.in_item = True
+		if tag == "item":
+			self.in_item = True
 		if tag == "guid":
 			self.guid = ""
 			self.in_guid = True
@@ -70,7 +73,8 @@ class Callback:
 	def endElement(self, tag):
 		if tag == "item":
 			self.in_item = False
-			self.article_ids.add(self.current_guid)
+			if self.current_guid and self.current_guid not in self.article_ids:
+				self.new_article_ids.add(self.current_guid)
 		if tag == "guid":
 			self.in_guid = False
 			self.current_guid = sha.new(self.guid).hexdigest()
@@ -82,9 +86,9 @@ class Callback:
 			self.chars = ""
 			self.gather_chars = False

-def RSSGenerator(data_generator, article_ids=[]):
+def RSSGenerator(data_generator, article_ids, new_article_ids):
 	queue = []
-	callback = Callback(queue, article_ids)
+	callback = Callback(queue, article_ids, new_article_ids)
 	ctxt = libxml2.createPushParser(callback, "", 0, "")
 	for data in data_generator:
 		ctxt.parseChunk(data, len(data), 0)
@@ -95,8 +99,9 @@ def RSSGenerator(data_generator, article
 	queue.reverse()
 	while len(queue):
 		yield queue.pop()
-
-def HTTPGenerator(host, uri):
+
+def HTTPGenerator(uri):
+	host = urlparse.urlparse(uri)[1]
 	fd = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
 	fd.connect((host, 80))
 	fd.send("GET %s HTTP/1.0\r\nHost: %s\r\nUser-agent: http://grahame.angrygoats.net/lj-haiku/; grahame@angrygoats.net\r\nX-Goat: yes\r\n\r\n" % (uri, host))