The unified diff between revisions [092dff1e..] and [0eaf66b5..] is displayed below. It can also be downloaded as a raw diff.
This diff has been restricted to the following files: 'generators.py'
#
#
# patch "generators.py"
# from [1f4da17464aa9497077d16c27ccb84e60453057d]
# to [550b478b5c306ae773cd42e8e5aff775cb0b8aed]
#
============================================================
--- generators.py 1f4da17464aa9497077d16c27ccb84e60453057d
+++ generators.py 550b478b5c306ae773cd42e8e5aff775cb0b8aed
@@ -4,6 +4,7 @@ import libxml2
import sha
import socket
import libxml2
+import urlparse
s_splitter = re.compile(r'[\.\!]+')
w_splitter = re.compile(r'[\,\;\"\t\-\(\)\* ]+')
@@ -48,9 +49,10 @@ class Callback:
return rv
class Callback:
- def __init__(self, queue, article_ids):
+ def __init__(self, queue, article_ids, new_article_ids):
self.queue = queue
self.article_ids = article_ids
+ self.new_article_ids = new_article_ids
def startDocument(self):
self.gather_chars = False
self.in_item = False
@@ -61,7 +63,8 @@ class Callback:
elif self.in_guid: self.guid += data
def endDocument(self): pass
def startElement(self, tag, attrs):
- if tag == "item": self.in_item = True
+ if tag == "item":
+ self.in_item = True
if tag == "guid":
self.guid = ""
self.in_guid = True
@@ -70,7 +73,8 @@ class Callback:
def endElement(self, tag):
if tag == "item":
self.in_item = False
- self.article_ids.add(self.current_guid)
+ if self.current_guid and self.current_guid not in self.article_ids:
+ self.new_article_ids.add(self.current_guid)
if tag == "guid":
self.in_guid = False
self.current_guid = sha.new(self.guid).hexdigest()
@@ -82,9 +86,9 @@ class Callback:
self.chars = ""
self.gather_chars = False
-def RSSGenerator(data_generator, article_ids=[]):
+def RSSGenerator(data_generator, article_ids, new_article_ids):
queue = []
- callback = Callback(queue, article_ids)
+ callback = Callback(queue, article_ids, new_article_ids)
ctxt = libxml2.createPushParser(callback, "", 0, "")
for data in data_generator:
ctxt.parseChunk(data, len(data), 0)
@@ -95,8 +99,9 @@ def RSSGenerator(data_generator, article
queue.reverse()
while len(queue):
yield queue.pop()
-
-def HTTPGenerator(host, uri):
+
+def HTTPGenerator(uri):
+ host = urlparse.urlparse(uri)[1]
fd = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
fd.connect((host, 80))
fd.send("GET %s HTTP/1.0\r\nHost: %s\r\nUser-agent: http://grahame.angrygoats.net/lj-haiku/; grahame@angrygoats.net\r\nX-Goat: yes\r\n\r\n" % (uri, host))