TorrentIndexer/indexer.py

379 lines
14 KiB
Python

#!/usr/bin/python3
#/* vim:set ts=2 set noexpandtab */
from flask import Flask, render_template, url_for, request, send_file, redirect
from flask_babel import Babel, gettext as _, lazy_gettext
from werkzeug import secure_filename
from hurry.filesize import size
from hashlib import sha1
import threading
import binascii
import bencoder
import requests
import hashlib
import sqlite3
import base64
import urllib
import json
import uuid
import time
app = Flask(__name__)
babel = Babel(app)
LANGUAGES = ['en', 'de']
settings = None
class Categories():
def __init__(self):
self.categories = settings["categories"]
for c in self.categories:
c["label"] = str(lazy_gettext(c["label"]))
for s in c["subcategories"]:
s["label"] = str(lazy_gettext(s["label"]))
def find(self, category, subcategory = None):
cat_name = ""
sub_name = ""
for cat in self.categories:
if cat["id"] == category:
cat_name = cat["label"]
if subcategory != None:
for sub in cat["subcategories"]:
if sub["id"] == subcategory:
sub_name = sub["label"]
return (cat_name, sub_name)
class ScrapeState():
stats = {}
def __init__(self):
pass
def update(self):
self._statedump()
self._tpbs()
self._fullscrape()
def _statedump(self):
url = settings["scrape_url"]
statedump_url = url + "/stats"
params = { "mode" : "statedump" }
req = requests.get(statedump_url, params=params)
dump = req.text.strip()
dump = dump.split("\n")
for entry in dump:
entry = entry.split(":")
key = entry[0].lower()
if not key in self.stats.keys():
self.stats.update({ key : {}})
self.stats.get(key).update({ "base" : entry[1], "unsure_downloaded" : entry[2] })
def _tpbs(self):
url = settings["scrape_url"]
tpbs_url = url + "/stats"
params = { "mode" : "tpbs", "format" : "ben" }
req = requests.get(tpbs_url, params=params)
decoded = bencoder.decode(req.content)
for torrent in decoded[b"files"]:
info_hash = binascii.b2a_hex(torrent)
stats = decoded[b"files"][torrent]
key = info_hash.decode("utf-8").lower()
self.stats.get(key).update({ "seeds" : stats[b"complete"], "peers" : stats[b"incomplete"], "complete" : stats[b"downloaded"] })
def _fullscrape(self):
connection = sqlite3.connect("torrentdb.sqlite")
c = connection.cursor()
c.execute("SELECT fileid FROM torrents")
all_hashes = c.fetchall()
connection.close()
for info_hash in all_hashes:
info_hash = info_hash[0]
url_param = binascii.a2b_hex(info_hash.encode())
url = settings["scrape_url"]
req = requests.get(url + "/scrape", params={"info_hash" : url_param})
decoded = bencoder.decode(req.content)
info = decoded[b"files"]
try:
ugly_hash, stats = info.popitem()
key = info_hash.lower()
self.stats.get(key).update({ "seeds" : stats[b"complete"], "peers" : stats[b"incomplete"], "complete" : stats[b"downloaded"] })
except KeyError:
print("No stats found for {}".format(info_hash))
@app.route("/")
def index():
return render_template("search.html", categories=categories.categories)
@app.route("/categories")
def categorys():
return render_template("categories.html", categories=categories.categories)
@app.route("/create", methods=['GET','POST'])
def create():
if request.method == "GET":
return render_template("create.html", categories=categories.categories, errors=None, tracker=settings["valid_tracker"])
elif request.method == "POST":
newTorrent = createNewTorrent(request)
if len(newTorrent.errors) == 0:
message = _("Successfully created torrent <a href=\"/search?h={}\">{}</a>").format(newTorrent.fileid, newTorrent.fileid[:-20])
return render_template("create.html", categories=categories.categories, messages=[message], tracker=settings["valid_tracker"])
else:
return render_template("create.html", categories=categories.categories, errors=newTorrent.errors, tracker=settings["valid_tracker"]), 409
@app.route("/download/<filename>")
def download(filename):
connection = sqlite3.connect("torrentdb.sqlite")
c = connection.cursor()
c.execute("SELECT name FROM torrents WHERE fileid = :fileid", { 'fileid' : filename})
name = c.fetchone()[0]
connection.close()
return send_file("torrentFiles/" + filename, as_attachment=True, attachment_filename=name + ".torrent", conditional=True)
@app.route("/search", methods=['GET'])
def search():
connection = sqlite3.connect("torrentdb.sqlite")
c = connection.cursor()
search_params = []
search = ""
fields = list(request.args.keys())
for field in fields:
query_list = request.args.getlist(field)
for query in query_list:
if len(search) > 0:
search += " AND "
if field is "q":
names = query.split(" ")
search_params += list(map(lambda x: "%" + x + "%", names))
search += " AND ".join(["torrents.name LIKE (?)"] * len(query.split(" ")))
elif field is "c":
search_params += query.split(" ")
search += " AND ".join(["torrents.category LIKE (?)"] * len(query.split(" ")))
elif field is "s":
search_params += query.split(" ")
search += " AND ".join(["torrents.subcategory LIKE (?)"] * len(query.split(" ")))
elif field is "h":
hashes = query.split(" ")
search_params += list(map(lambda x: x + "%", hashes))
search += " AND ".join(["torrents.fileid LIKE (?)"] * len(query.split(" ")))
print(search)
results = list()
for row in c.execute("SELECT torrents.fileid, torrents.name, metadata.torrentsize FROM torrents LEFT JOIN metadata on metadata.fileid = torrents.fileid WHERE " + search, search_params):
r = row[0:2] + (size(float(row[2])) , ) + row[3:]
results.append(r)
connection.close()
return render_template("result.html", results=results, categories=categories.categories, stats=scrapeState.stats)
@app.route("/details", methods=['GET'])
def details():
info_hash = request.args["h"]
tf = TorrentFile(fileid=info_hash)
tf.fromDb()
return render_template("details.html", categories=categories.categories, torrent=tf)
def init():
global settings
with open("settings.json") as settingsJson:
settings = json.load(settingsJson)
initDb()
global categories
categories = Categories()
global scrapeState
scrapeState = ScrapeState()
scrape = threading.Thread(target=scraper)
scrape.start()
def scraper():
while True:
print("Start scraping")
scrapeState.update()
print("Scraping done")
time.sleep(60)
def initDb():
connection = sqlite3.connect("torrentdb.sqlite")
c = connection.cursor()
c.execute('CREATE TABLE IF NOT EXISTS torrents (fileid TEXT PRIMARY KEY NOT NULL, name TEXT NOT NULL, category TEXT NOT NULL, subcategory TEXT NOT NULL, description TEXT NOT NULL, audioquality_description TEXT NOT NULL, videoquality_description TEXT NOT NULL);')
c.execute('CREATE TABLE IF NOT EXISTS metadata (fileid TEXT PRIMARY KEY NOT NULL, created_by TEXT, creation_date TEXT, announce_url TEXT NOT NULL, source TEXT, torrentsize TEXT NOT NULL, name TEXT NOT NULL, private TEXT NOT NULL)')
connection.commit()
connection.close()
def createNewTorrent(reuqest):
uploadfile = request.files["torrentFile"]
filename = secure_filename(uploadfile.filename)
content = request.files["torrentFile"].stream.read()
bcoded = bencoder.decode(content)
info_hash = sha1(bencoder.encode(bcoded[b'info'])).hexdigest()
#TODO: Validate the input serverside before writing it to the database
name = request.form["name"]
category = request.form["category"]
subcategory = request.form["subcategory"]
description = request.form["description"]
audioquality_description = request.form["audioquality_description"]
videoquality_description = request.form["videoquality_description"]
newTFile = TorrentFile(info_hash, name, category, subcategory, description, audioquality_description, videoquality_description)
try:
announce = bcoded[b'announce-list']
except KeyError:
try:
announce = (bcoded[b'announce'], )
except KeyError:
announce = []
is_ours = False
for a in announce:
a = a.decode("utf-8", "ignore")
if a in settings["valid_tracker"]:
is_ours = True
break
if not is_ours:
newTFile.errors = ["Rejecting torrent <a href=\"/search?h={}\">{}</a>, as it does not use our tracker".format(info_hash, info_hash[:-20])]
return newTFile
with open("torrentFiles/" + info_hash, "wb") as torrent_file:
torrent_file.write(content)
bcoded = bencoder.decode(content)
size = ((len(bcoded[b'info'][b'pieces']) / 20) * bcoded[b'info'][b'piece length']) / 1024 / 1024
print("=== CREATE NEW TORRENT FILE ===")
print( "Name: " + request.form["name"] )
print( "Torrent file: " + info_hash )
print( "Category: " + request.form["category"] )
print( "Subcategory: " + request.form["subcategory"] )
print( "Description: " + request.form["description"] )
#TODO: Validate the input serverside before writing it to the database
name = request.form["name"]
category = request.form["category"]
subcategory = request.form["subcategory"]
description = request.form["description"]
audioquality_description = request.form["audioquality_description"]
videoquality_description = request.form["videoquality_description"]
newTFile = TorrentFile(info_hash, name, category, subcategory, description, audioquality_description, videoquality_description)
try:
connection = sqlite3.connect("torrentdb.sqlite")
newTFile.writeToDb(connection.cursor())
newTFile.metadata.writeToDb(connection.cursor())
connection.commit()
connection.close()
except sqlite3.IntegrityError as e:
print(e)
newTFile.errors = [_("Torrent <a href=\"/search?h={}\">{}</a> already exists").format(info_hash, info_hash[:-20])]
except Exception as e:
newTFile.errors = [_("Unknown error in creation")]
return newTFile
class Metadata():
def __init__(self, fileid):
try:
with open("torrentFiles/" + fileid, "rb") as f:
torrent = f.read()
except FileNotFoundError:
return
self.fileid = fileid
self.bcoded = bencoder.decode(torrent)
self.created_by = self.bcoded.get(b'created by', b"")
self.creation_date = self.bcoded.get(b'creation date', 0)
self.announce_url = self.bcoded.get(b'info', dict()).get(b'', "")
self.source = self.bcoded.get(b'info', dict()).get(b'source', b"")
self.torrentsize = ((len(self.bcoded.get(b'info', dict()).get(b'pieces', "")) / 20) * self.bcoded.get(b'info', dict()).get(b'piece length'))
self.torrentsize_human = size(self.torrentsize)
self.name = self.bcoded.get(b'info', dict()).get(b'name', b"")
self.private = self.bcoded.get(b'info', dict()).get(b'private', b"")
def writeToDb(self, cursor):
c = cursor
b64created_by = base64.b64encode(self.created_by) if self.created_by else ""
b64announce_url = base64.b64encode(self.announce_url.decode()) if self.announce_url else ""
b64source = base64.b64encode(self.source) if self.source else ""
b64name = base64.b64encode(self.name)
c.execute("INSERT INTO metadata(fileid, created_by, creation_date, announce_url, source, torrentsize, name, private) VALUES(:fileid, :created_by, :creation_date, :announce_url, :source, :torrentsize, :name, :private)", { 'fileid' : self.fileid, 'created_by' : b64created_by, 'creation_date' : self.creation_date, 'announce_url' : b64announce_url, 'source' : b64source , 'torrentsize' : self.torrentsize, 'name' : b64name, 'private' : self.private})
class TorrentFile():
errors = []
fileid = None
name = None
category = None
subcategory = None
description = None
audioquality_description = None
videoquality_description = None
def __init__(self, fileid=fileid, name=name, category=category, subcategory=subcategory, description=description, audioquality_description=audioquality_description, videoquality_description=videoquality_description):
self.fileid = fileid
self.name = name
self.category = category
self.subcategory = subcategory
self.description = description
self.audioquality_description = audioquality_description
self.videoquality_description = videoquality_description
if self.fileid:
self.metadata = Metadata(fileid)
def writeToDb(self, cursor):
c = cursor
b64description = base64.b64encode(self.description.encode())
b64audioquality_description = base64.b64encode(self.audioquality_description.encode())
b64videoquality_description = base64.b64encode(self.videoquality_description.encode())
c.execute("INSERT INTO torrents(fileid, name, category, subcategory, description, audioquality_description, videoquality_description) VALUES(:fileid, :name, :category, :subcategory, :description, :audioquality_description, :videoquality_description)", { 'fileid' : self.fileid, 'name' : self.name, 'category' : self.category, 'subcategory' : self.subcategory, 'description' : b64description , 'audioquality_description' : b64audioquality_description, 'videoquality_description' : b64videoquality_description})
def fromDb(self):
def dict_factory(cursor, row):
d = {}
for idx, col in enumerate(cursor.description):
d[col[0]] = row[idx]
return d
con = sqlite3.connect("torrentdb.sqlite")
con.row_factory = dict_factory
c = con.cursor()
res = c.execute("SELECT torrents.*, metadata.* FROM torrents LEFT JOIN metadata on metadata.fileid = torrents.fileid WHERE torrents.fileid LIKE :fileid", { "fileid" : self.fileid })
res = res.fetchone()
self.fileid = res["fileid"]
self.name = (base64.b64decode(res["name"])).decode()
self.category = res["category"]
self.subcategory = res["subcategory"]
self.category_string, self.subcategory_string = categories.find(int(self.category), int(self.subcategory))
self.description = (base64.b64decode(res["description"])).decode()
self.audioquality_description = (base64.b64decode(res["audioquality_description"])).decode()
self.videoquality_description = (base64.b64decode(res["videoquality_description"])).decode()
self.metadata = Metadata(self.fileid)
@babel.localeselector
def get_locale():
return request.accept_languages.best_match(LANGUAGES)
if __name__ == "__main__":
init()
app.jinja_env.globals.update(json=json)
app.jinja_env.globals.update(sorted=sorted)
app.run(debug=False, host="127.0.0.1")
# vim: set ts=2 sts=2 sw=2 noexpandtab: