diff options
Diffstat (limited to 'handlers.py')
| -rw-r--r-- | handlers.py | 118 |
1 files changed, 54 insertions, 64 deletions
diff --git a/handlers.py b/handlers.py index 8139c44..bfca09b 100644 --- a/handlers.py +++ b/handlers.py @@ -41,74 +41,64 @@ class Website(Site): def process(self): r = requests.get(self.address) if r: - i = 0 + session = DBSession() for m in self.regexes["list"].finditer(r.text): - #if i > 10: - #break - print(m["title"]) - id = str(ObjectId.from_datetime(datetime.strptime(m["updatedAt"], self.date_format))) - if id not in database["entries"]: - entry = { - "title" : html.unescape(m["title"]), - "voting" : { - "sum" : round(float(m["niveau"]), 0) if "niveau" in m.groups() else 0, - "count" : 0, - "votes" : None - }, - "tags" : [self.tag], - "files" : [], - "dependencies" : [], - "deleted" : False, - "description" : "", - "pic" : None, - "author" : { - "username" : html.unescape(m["author"]) - }, - "slug" : "", - "updatedAt" : datetime.strptime(m["updatedAt"], self.date_format).isoformat(), - "__v" : 1, - "comments" : None, - "id" : id, - "__intern" : { - "entryURL" : self.prefix + m["entryURL"] - } - } + id = ObjectId.from_datetime(datetime.strptime(m["updatedAt"], self.date_format)) + try: + entry = session.query(Upload).filter_by(id=id).one() + try: + entry.author = session.query(User).filter_by(name=m["author"]).one() + except db.orm.exc.NoResultFound: + pass - downloadURL = self.prefix + m["downloadURL"] + continue + except db.orm.exc.NoResultFound: + pass + + entry = Upload( + id=id, + title=html.unescape(m["title"]), + tags=[self.tag], + slug="".join(i for i in html.unescape(m["title"]).lower() if i in string.ascii_letters), + updated_at=datetime.strptime(m["updatedAt"], self.date_format), + _v=1 + ) + + try: + entry.author = session.query(User).filter_by(name=m["author"]).one() + except db.orm.exc.NoResultFound: + pass + + downloadURL = self.prefix + m["downloadURL"] + try: r = requests.get(downloadURL, stream=True, allow_redirects=True) - if not r: - continue - - locale.setlocale(locale.LC_ALL, "C") - entry["files"] = [{ - "metadata" : { - "hashes" : { - "sha1" : calculateHashForResource(r).hexdigest() - }, - "downloadURL" : downloadURL - }, - "aliases" : None, - "deleted" : False, - "_id" : entry["id"], - "filename" : m["downloadURL"].split("/")[-1], - "content-type" : r.headers.get("Content-Type", "application/octet-stream"), - "length" : int(r.headers["Content-Length"]), - "chunkSize" : 4096, # what is this for - "uploadDate" : datetime.strptime(r.headers["Date"], "%a, %d %b %Y %H:%M:%S GMT").isoformat(), - } - ] - - locale.setlocale(locale.LC_ALL, "") + except requests.exceptions.ConnectionError: + continue - entry["createdAt"] = entry["updatedAt"] - entry["slug"] = "".join(filter(lambda x: x in string.ascii_letters, entry["title"].lower())) - - r = requests.get(entry["__intern"]["entryURL"]) - d = self.regexes["desc"].match(r.text) - if d and "description" in d.groups(): - entry["description"] = html.unescape(d["description"]) - yield entry - i += 1 + if not r: + continue + + locale.setlocale(locale.LC_ALL, "C") + session.add(File( + hash=calculateHashForResource(r).hexdigest(), + id=entry.id, + name=downloadURL.split("/")[-1], + content_type=r.headers.get("Content-Type", "application/octet-stream"), + length=int(r.headers["Content-Length"]), + date=datetime.strptime(r.headers["Date"], "%a, %d %b %Y %H:%M:%S GMT"), + download_url=downloadURL, + upload=entry + )) + + locale.setlocale(locale.LC_ALL, "") + + r = requests.get(self.prefix + m["entryURL"]) + d = self.regexes["desc"].match(r.text) + if d and "description" in d.groups(): + entry.description = html.unescape(d["description"]) + + session.add(entry) + session.commit() class CCAN(Website): regexes = { |
