TorrentIndexer/indexer.py

379 lines
14 KiB
Python
Raw Normal View History

2015-02-05 01:02:32 +01:00
#!/usr/bin/python3
#/* vim:set ts=2 set noexpandtab */
2017-12-29 01:47:39 +01:00
from flask import Flask, render_template, url_for, request, send_file, redirect
2017-12-29 08:03:17 +01:00
from flask_babel import Babel, gettext as _, lazy_gettext
from werkzeug import secure_filename
2017-12-29 02:56:15 +01:00
from hurry.filesize import size
2018-04-01 18:14:22 +02:00
from hashlib import sha1
import threading
import binascii
import bencoder
import requests
import hashlib
import sqlite3
import base64
import urllib
import json
import uuid
import time
app = Flask(__name__)
2017-12-29 08:03:17 +01:00
babel = Babel(app)
LANGUAGES = ['en', 'de']
settings = None
2015-02-05 01:02:32 +01:00
2018-04-01 01:14:48 +02:00
class Categories():
def __init__(self):
self.categories = settings["categories"]
for c in self.categories:
c["label"] = str(lazy_gettext(c["label"]))
for s in c["subcategories"]:
s["label"] = str(lazy_gettext(s["label"]))
def find(self, category, subcategory = None):
cat_name = ""
sub_name = ""
for cat in self.categories:
if cat["id"] == category:
cat_name = cat["label"]
if subcategory != None:
for sub in cat["subcategories"]:
if sub["id"] == subcategory:
sub_name = sub["label"]
return (cat_name, sub_name)
2017-12-29 04:57:37 +01:00
2018-04-01 18:14:22 +02:00
class ScrapeState():
stats = {}
def __init__(self):
pass
def update(self):
self._statedump()
self._tpbs()
self._fullscrape()
def _statedump(self):
url = settings["scrape_url"]
statedump_url = url + "/stats"
params = { "mode" : "statedump" }
req = requests.get(statedump_url, params=params)
dump = req.text.strip()
dump = dump.split("\n")
for entry in dump:
entry = entry.split(":")
key = entry[0].lower()
if not key in self.stats.keys():
self.stats.update({ key : {}})
self.stats.get(key).update({ "base" : entry[1], "unsure_downloaded" : entry[2] })
def _tpbs(self):
url = settings["scrape_url"]
tpbs_url = url + "/stats"
params = { "mode" : "tpbs", "format" : "ben" }
req = requests.get(tpbs_url, params=params)
decoded = bencoder.decode(req.content)
for torrent in decoded[b"files"]:
info_hash = binascii.b2a_hex(torrent)
stats = decoded[b"files"][torrent]
key = info_hash.decode("utf-8").lower()
self.stats.get(key).update({ "seeds" : stats[b"complete"], "peers" : stats[b"incomplete"], "complete" : stats[b"downloaded"] })
def _fullscrape(self):
connection = sqlite3.connect("torrentdb.sqlite")
c = connection.cursor()
c.execute("SELECT fileid FROM torrents")
all_hashes = c.fetchall()
2018-04-01 19:44:49 +02:00
connection.close()
2018-04-01 18:14:22 +02:00
for info_hash in all_hashes:
info_hash = info_hash[0]
url_param = binascii.a2b_hex(info_hash.encode())
url = settings["scrape_url"]
req = requests.get(url + "/scrape", params={"info_hash" : url_param})
decoded = bencoder.decode(req.content)
info = decoded[b"files"]
try:
ugly_hash, stats = info.popitem()
key = info_hash.lower()
self.stats.get(key).update({ "seeds" : stats[b"complete"], "peers" : stats[b"incomplete"], "complete" : stats[b"downloaded"] })
except KeyError:
print("No stats found for {}".format(info_hash))
@app.route("/")
def index():
2018-04-01 01:14:48 +02:00
return render_template("search.html", categories=categories.categories)
2018-04-01 18:14:22 +02:00
@app.route("/categories")
def categorys():
2018-04-01 01:14:48 +02:00
return render_template("categories.html", categories=categories.categories)
2018-04-01 18:14:22 +02:00
@app.route("/create", methods=['GET','POST'])
def create():
if request.method == "GET":
return render_template("create.html", categories=categories.categories, errors=None, tracker=settings["valid_tracker"])
elif request.method == "POST":
2017-12-29 01:47:39 +01:00
newTorrent = createNewTorrent(request)
if len(newTorrent.errors) == 0:
2017-12-29 08:03:17 +01:00
message = _("Successfully created torrent <a href=\"/search?h={}\">{}</a>").format(newTorrent.fileid, newTorrent.fileid[:-20])
return render_template("create.html", categories=categories.categories, messages=[message], tracker=settings["valid_tracker"])
else:
2018-12-28 07:35:45 +01:00
return render_template("create.html", categories=categories.categories, errors=newTorrent.errors, tracker=settings["valid_tracker"]), 409
2018-04-01 18:14:22 +02:00
2017-12-28 08:36:44 +01:00
@app.route("/download/<filename>")
def download(filename):
connection = sqlite3.connect("torrentdb.sqlite")
c = connection.cursor()
c.execute("SELECT name FROM torrents WHERE fileid = :fileid", { 'fileid' : filename})
name = c.fetchone()[0]
2018-04-01 19:44:49 +02:00
connection.close()
2017-12-28 08:36:44 +01:00
return send_file("torrentFiles/" + filename, as_attachment=True, attachment_filename=name + ".torrent", conditional=True)
2018-04-01 18:14:22 +02:00
2015-02-06 22:03:42 +01:00
@app.route("/search", methods=['GET'])
def search():
2017-12-28 04:19:04 +01:00
connection = sqlite3.connect("torrentdb.sqlite")
c = connection.cursor()
2017-12-28 05:07:42 +01:00
search_params = []
search = ""
fields = list(request.args.keys())
for field in fields:
query_list = request.args.getlist(field)
for query in query_list:
if len(search) > 0:
search += " AND "
if field is "q":
names = query.split(" ")
search_params += list(map(lambda x: "%" + x + "%", names))
2017-12-28 08:36:44 +01:00
search += " AND ".join(["torrents.name LIKE (?)"] * len(query.split(" ")))
2017-12-28 05:07:42 +01:00
elif field is "c":
search_params += query.split(" ")
2017-12-28 08:36:44 +01:00
search += " AND ".join(["torrents.category LIKE (?)"] * len(query.split(" ")))
2017-12-28 05:07:42 +01:00
elif field is "s":
search_params += query.split(" ")
2017-12-28 08:36:44 +01:00
search += " AND ".join(["torrents.subcategory LIKE (?)"] * len(query.split(" ")))
2017-12-29 00:31:28 +01:00
elif field is "h":
hashes = query.split(" ")
search_params += list(map(lambda x: x + "%", hashes))
search += " AND ".join(["torrents.fileid LIKE (?)"] * len(query.split(" ")))
2017-12-28 05:07:42 +01:00
print(search)
2017-12-29 02:56:15 +01:00
results = list()
for row in c.execute("SELECT torrents.fileid, torrents.name, metadata.torrentsize FROM torrents LEFT JOIN metadata on metadata.fileid = torrents.fileid WHERE " + search, search_params):
r = row[0:2] + (size(float(row[2])) , ) + row[3:]
results.append(r)
2018-04-01 19:44:49 +02:00
connection.close()
2018-04-01 18:14:22 +02:00
return render_template("result.html", results=results, categories=categories.categories, stats=scrapeState.stats)
2017-12-29 03:03:47 +01:00
@app.route("/details", methods=['GET'])
def details():
info_hash = request.args["h"]
tf = TorrentFile(fileid=info_hash)
tf.fromDb()
2018-04-01 01:14:48 +02:00
return render_template("details.html", categories=categories.categories, torrent=tf)
2017-12-29 03:03:47 +01:00
2017-12-28 08:36:44 +01:00
def init():
global settings
with open("settings.json") as settingsJson:
settings = json.load(settingsJson)
2015-02-19 00:50:18 +01:00
initDb()
2018-04-01 01:14:48 +02:00
global categories
categories = Categories()
2018-04-01 18:14:22 +02:00
global scrapeState
scrapeState = ScrapeState()
scrape = threading.Thread(target=scraper)
scrape.start()
def scraper():
while True:
print("Start scraping")
scrapeState.update()
print("Scraping done")
time.sleep(60)
2015-02-19 00:50:18 +01:00
def initDb():
connection = sqlite3.connect("torrentdb.sqlite")
c = connection.cursor()
c.execute('CREATE TABLE IF NOT EXISTS torrents (fileid TEXT PRIMARY KEY NOT NULL, name TEXT NOT NULL, category TEXT NOT NULL, subcategory TEXT NOT NULL, description TEXT NOT NULL, audioquality_description TEXT NOT NULL, videoquality_description TEXT NOT NULL);')
2017-12-28 08:36:44 +01:00
c.execute('CREATE TABLE IF NOT EXISTS metadata (fileid TEXT PRIMARY KEY NOT NULL, created_by TEXT, creation_date TEXT, announce_url TEXT NOT NULL, source TEXT, torrentsize TEXT NOT NULL, name TEXT NOT NULL, private TEXT NOT NULL)')
2015-02-19 00:50:18 +01:00
connection.commit()
connection.close()
2018-04-01 18:14:22 +02:00
def createNewTorrent(reuqest):
uploadfile = request.files["torrentFile"]
filename = secure_filename(uploadfile.filename)
2017-12-28 10:15:00 +01:00
content = request.files["torrentFile"].stream.read()
bcoded = bencoder.decode(content)
info_hash = sha1(bencoder.encode(bcoded[b'info'])).hexdigest()
#TODO: Validate the input serverside before writing it to the database
name = request.form["name"]
category = request.form["category"]
subcategory = request.form["subcategory"]
description = request.form["description"]
audioquality_description = request.form["audioquality_description"]
videoquality_description = request.form["videoquality_description"]
newTFile = TorrentFile(info_hash, name, category, subcategory, description, audioquality_description, videoquality_description)
try:
announce = bcoded[b'announce-list']
except KeyError:
try:
announce = (bcoded[b'announce'], )
except KeyError:
announce = []
is_ours = False
for a in announce:
2018-04-01 01:35:01 +02:00
a = a.decode("utf-8", "ignore")
if a in settings["valid_tracker"]:
is_ours = True
break
2018-04-01 01:14:48 +02:00
if not is_ours:
newTFile.errors = ["Rejecting torrent <a href=\"/search?h={}\">{}</a>, as it does not use our tracker".format(info_hash, info_hash[:-20])]
return newTFile
2017-12-28 10:15:00 +01:00
with open("torrentFiles/" + info_hash, "wb") as torrent_file:
torrent_file.write(content)
2017-12-29 08:03:17 +01:00
2017-12-28 08:36:44 +01:00
bcoded = bencoder.decode(content)
size = ((len(bcoded[b'info'][b'pieces']) / 20) * bcoded[b'info'][b'piece length']) / 1024 / 1024
print("=== CREATE NEW TORRENT FILE ===")
print( "Name: " + request.form["name"] )
2017-12-28 10:15:00 +01:00
print( "Torrent file: " + info_hash )
print( "Category: " + request.form["category"] )
print( "Subcategory: " + request.form["subcategory"] )
print( "Description: " + request.form["description"] )
2015-02-19 00:50:18 +01:00
#TODO: Validate the input serverside before writing it to the database
name = request.form["name"]
category = request.form["category"]
subcategory = request.form["subcategory"]
description = request.form["description"]
audioquality_description = request.form["audioquality_description"]
videoquality_description = request.form["videoquality_description"]
2017-12-28 10:15:00 +01:00
newTFile = TorrentFile(info_hash, name, category, subcategory, description, audioquality_description, videoquality_description)
2017-12-29 00:31:28 +01:00
try:
connection = sqlite3.connect("torrentdb.sqlite")
newTFile.writeToDb(connection.cursor())
newTFile.metadata.writeToDb(connection.cursor())
connection.commit()
connection.close()
except sqlite3.IntegrityError as e:
print(e)
2017-12-29 08:03:17 +01:00
newTFile.errors = [_("Torrent <a href=\"/search?h={}\">{}</a> already exists").format(info_hash, info_hash[:-20])]
2017-12-29 00:31:28 +01:00
except Exception as e:
2017-12-29 08:03:17 +01:00
newTFile.errors = [_("Unknown error in creation")]
2017-12-29 01:47:39 +01:00
return newTFile
2015-02-19 00:50:18 +01:00
2018-04-01 18:14:22 +02:00
2017-12-28 08:36:44 +01:00
class Metadata():
def __init__(self, fileid):
try:
with open("torrentFiles/" + fileid, "rb") as f:
torrent = f.read()
except FileNotFoundError:
return
2017-12-28 08:36:44 +01:00
self.fileid = fileid
self.bcoded = bencoder.decode(torrent)
2018-04-01 20:04:52 +02:00
self.created_by = self.bcoded.get(b'created by', b"")
2018-04-01 18:14:22 +02:00
self.creation_date = self.bcoded.get(b'creation date', 0)
2017-12-28 08:36:44 +01:00
self.announce_url = self.bcoded.get(b'info', dict()).get(b'', "")
2017-12-29 04:57:37 +01:00
self.source = self.bcoded.get(b'info', dict()).get(b'source', b"")
2017-12-28 08:36:44 +01:00
self.torrentsize = ((len(self.bcoded.get(b'info', dict()).get(b'pieces', "")) / 20) * self.bcoded.get(b'info', dict()).get(b'piece length'))
2017-12-29 08:42:22 +01:00
self.torrentsize_human = size(self.torrentsize)
2018-04-01 20:04:52 +02:00
self.name = self.bcoded.get(b'info', dict()).get(b'name', b"")
2017-12-29 04:57:37 +01:00
self.private = self.bcoded.get(b'info', dict()).get(b'private', b"")
2017-12-28 08:36:44 +01:00
def writeToDb(self, cursor):
c = cursor
2017-12-29 04:57:37 +01:00
b64created_by = base64.b64encode(self.created_by) if self.created_by else ""
2017-12-28 10:15:00 +01:00
b64announce_url = base64.b64encode(self.announce_url.decode()) if self.announce_url else ""
b64source = base64.b64encode(self.source) if self.source else ""
2017-12-28 08:36:44 +01:00
b64name = base64.b64encode(self.name)
c.execute("INSERT INTO metadata(fileid, created_by, creation_date, announce_url, source, torrentsize, name, private) VALUES(:fileid, :created_by, :creation_date, :announce_url, :source, :torrentsize, :name, :private)", { 'fileid' : self.fileid, 'created_by' : b64created_by, 'creation_date' : self.creation_date, 'announce_url' : b64announce_url, 'source' : b64source , 'torrentsize' : self.torrentsize, 'name' : b64name, 'private' : self.private})
2018-04-01 18:14:22 +02:00
2015-02-19 00:50:18 +01:00
class TorrentFile():
2017-12-29 01:47:39 +01:00
errors = []
2015-02-19 00:50:18 +01:00
fileid = None
name = None
category = None
subcategory = None
description = None
audioquality_description = None
videoquality_description = None
def __init__(self, fileid=fileid, name=name, category=category, subcategory=subcategory, description=description, audioquality_description=audioquality_description, videoquality_description=videoquality_description):
2015-02-19 00:50:18 +01:00
self.fileid = fileid
self.name = name
self.category = category
self.subcategory = subcategory
self.description = description
self.audioquality_description = audioquality_description
self.videoquality_description = videoquality_description
if self.fileid:
self.metadata = Metadata(fileid)
2015-02-19 00:50:18 +01:00
def writeToDb(self, cursor):
c = cursor
b64description = base64.b64encode(self.description.encode())
b64audioquality_description = base64.b64encode(self.audioquality_description.encode())
b64videoquality_description = base64.b64encode(self.videoquality_description.encode())
c.execute("INSERT INTO torrents(fileid, name, category, subcategory, description, audioquality_description, videoquality_description) VALUES(:fileid, :name, :category, :subcategory, :description, :audioquality_description, :videoquality_description)", { 'fileid' : self.fileid, 'name' : self.name, 'category' : self.category, 'subcategory' : self.subcategory, 'description' : b64description , 'audioquality_description' : b64audioquality_description, 'videoquality_description' : b64videoquality_description})
2015-02-06 22:03:42 +01:00
def fromDb(self):
def dict_factory(cursor, row):
d = {}
for idx, col in enumerate(cursor.description):
d[col[0]] = row[idx]
return d
con = sqlite3.connect("torrentdb.sqlite")
con.row_factory = dict_factory
c = con.cursor()
res = c.execute("SELECT torrents.*, metadata.* FROM torrents LEFT JOIN metadata on metadata.fileid = torrents.fileid WHERE torrents.fileid LIKE :fileid", { "fileid" : self.fileid })
res = res.fetchone()
self.fileid = res["fileid"]
self.name = (base64.b64decode(res["name"])).decode()
self.category = res["category"]
self.subcategory = res["subcategory"]
2018-04-01 01:14:48 +02:00
self.category_string, self.subcategory_string = categories.find(int(self.category), int(self.subcategory))
self.description = (base64.b64decode(res["description"])).decode()
self.audioquality_description = (base64.b64decode(res["audioquality_description"])).decode()
self.videoquality_description = (base64.b64decode(res["videoquality_description"])).decode()
2017-12-29 08:42:22 +01:00
self.metadata = Metadata(self.fileid)
2018-04-01 18:14:22 +02:00
2017-12-29 08:03:17 +01:00
@babel.localeselector
def get_locale():
return request.accept_languages.best_match(LANGUAGES)
2018-04-01 18:14:22 +02:00
if __name__ == "__main__":
init()
app.jinja_env.globals.update(json=json)
app.jinja_env.globals.update(sorted=sorted)
2018-04-01 01:36:11 +02:00
app.run(debug=False, host="127.0.0.1")
2017-12-29 05:14:13 +01:00
# vim: set ts=2 sts=2 sw=2 noexpandtab: