From 8c5d73930280fcaf6aa1c3a7d50fa756b8b2b728 Mon Sep 17 00:00:00 2001 From: dave Date: Thu, 6 Jun 2019 08:51:07 -0700 Subject: [PATCH] Use zodb instead of flat files --- Dockerfile | 27 +++++-- requirements.txt | 32 +++++--- setup.py | 2 +- wastebin/cli.py | 12 ++- wastebin/daemon.py | 197 +++++++++++++++++++++++++++------------------ 5 files changed, 171 insertions(+), 99 deletions(-) diff --git a/Dockerfile b/Dockerfile index 21c5e31..9dea130 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,15 +1,26 @@ FROM ubuntu:bionic -RUN apt-get update && \ - apt-get install -y python3-pip +RUN sed -i -E 's/(archive|security).ubuntu.com/192.168.1.142/' /etc/apt/sources.list && \ + sed -i -E 's/^deb-src/# deb-src/' /etc/apt/sources.list && \ + apt-get update && \ + DEBIAN_FRONTEND=noninteractive \ + apt-get install -y -o Dpkg::Options::="--force-confdef" -o Dpkg::Options::="--force-confold" \ + wget gpg git build-essential && \ + wget -qO- http://artifact.scc.net.davepedu.com/repo/apt/extpython/dists/bionic/install | bash /dev/stdin && \ + apt-get update && \ + DEBIAN_FRONTEND=noninteractive \ + apt-get install -y -o Dpkg::Options::="--force-confdef" -o Dpkg::Options::="--force-confold" \ + extpython-python3.7 && \ + apt-get clean autoclean && \ + apt-get autoremove -y && \ + rm -rf /var/lib/{apt,dpkg,cache,log}/ -ADD . /tmp/code/ +ADD . /tmp/code -RUN pip3 install -U pip && \ - cd /tmp/code && \ - python3 setup.py install && \ +RUN cd /tmp/code && \ + /opt/extpython/3.7/bin/pip3 install -r requirements.txt && \ + /opt/extpython/3.7/bin/python3 setup.py install && \ useradd --uid 1000 app -VOLUME /data/ USER app -ENTRYPOINT ["wastebind", "-d", "/data/"] +ENTRYPOINT ["/opt/extpython/3.7/bin/wastebind"] diff --git a/requirements.txt b/requirements.txt index 0fcad47..8cdaa36 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,16 +1,30 @@ appdirs==1.4.3 backports.functools-lru-cache==1.5 -certifi==2018.11.29 +BTrees==4.5.1 +certifi==2019.3.9 +cffi==1.12.3 chardet==3.0.4 -cheroot==6.5.4 -CherryPy==18.1.0 +cheroot==6.5.5 +CherryPy==18.1.1 idna==2.8 jaraco.functools==2.0 -more-itertools==5.0.0 -portend==2.3 -pytz==2018.9 -requests==2.21.0 +more-itertools==7.0.0 +perfmetrics==2.0 +persistent==4.5.0 +portend==2.4 +pycparser==2.19 +PyMySQL==0.9.3 +pytz==2019.1 +RelStorage==2.1.1 +requests==2.22.0 six==1.12.0 -tempora==1.14 -urllib3==1.24.1 +tempora==1.14.1 +transaction==2.4.0 +urllib3==1.25.3 zc.lockfile==1.4 +ZConfig==3.4.0 +zdaemon==4.3 +ZEO==5.2.1 +ZODB==5.5.1 +zodbpickle==1.0.3 +zope.interface==4.6.0 diff --git a/setup.py b/setup.py index b830b11..f718fec 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ from setuptools import setup import os -__version__ = "0.0.0" +__version__ = "0.0.1" with open(os.path.join(os.path.dirname(__file__), "requirements.txt")) as f: __requirements__ = [line.strip() for line in f.readlines()] diff --git a/wastebin/cli.py b/wastebin/cli.py index 5c6253c..68ab477 100644 --- a/wastebin/cli.py +++ b/wastebin/cli.py @@ -41,7 +41,9 @@ def main(): # parser.add_argument("-p", "--password", help="password") spr_action = parser.add_subparsers(dest="action", help="action to take") - spr_action.add_parser("list", help="show list of pastes") + + spr_list = spr_action.add_parser("list", help="show list of pastes") + spr_list.add_argument("name", nargs="?", help="prefix to match") spr_new = spr_action.add_parser("new", help="create a paste") spr_new.add_argument("name", nargs="?", default="", help="name of paste to create") @@ -89,7 +91,13 @@ def main(): r.delete(host + args.name).raise_for_status() elif args.action == "list": - print(r.get(host + "search").text, end="") + print(r.get(host + "search", + params={"prefix": args.name} if args.name else None).text, + end="") + + else: + parser.error('must specify an action') + if __name__ == "__main__": main() diff --git a/wastebin/daemon.py b/wastebin/daemon.py index 262e4b1..6877ce1 100644 --- a/wastebin/daemon.py +++ b/wastebin/daemon.py @@ -1,9 +1,84 @@ import os import cherrypy import logging -import hashlib import re -from threading import Thread +from urllib.parse import urlparse +import ZODB +from relstorage.storage import RelStorage +from relstorage.options import Options +from relstorage.adapters.mysql import MySQLAdapter +import persistent +import persistent.list +import ZODB.FileStorage +import persistent.mapping +import BTrees.OOBTree + + +def pmap(): + return persistent.mapping.PersistentMapping() + + +class Database(object): + def __init__(self, storage): + self.db = ZODB.DB(storage) + self.init_db() + + @staticmethod + def from_uri(uri): + """ + Return a database backed by the storage specified by the passed uri. URIs containing a scheme (scheme://) will + be checked against installed adapters. Schemeless URIs are assumed to be a file path for flat file storage. + """ + parsed = urlparse(uri) + storage = None + + if parsed.scheme: + mysql = MySQLAdapter(host=parsed.hostname, port=parsed.port, + user=parsed.username, passwd=parsed.password, + db=parsed.path[1:], options=Options(keep_history=False)) + storage = RelStorage(adapter=mysql) + else: + storage = ZODB.FileStorage.FileStorage(uri) + + if storage is None: + raise Exception(f"Unsupported uri {uri}") + + return Database(storage) + + def init_db(self): + with self.db.transaction() as c: + if "pastes" not in c.root(): + c.root.pastes = BTrees.OOBTree.BTree() + + def loadpaste(self, name): + with self.db.transaction() as c: + return c.root.pastes[name].value + + def writepaste(self, name, contents): + with self.db.transaction() as c: + try: + paste = c.root.pastes[name] + paste.value = contents + except KeyError: + paste = Paste(contents) + c.root.pastes[name] = paste + + def delpaste(self, name): + with self.db.transaction() as c: + del c.root.pastes[name] + + def iterpastes(self, prefix=None): + with self.db.transaction() as c: + for name, value in c.root.pastes.items(): + if prefix and not name.startswith(prefix): + continue + yield (name, value, ) + + +class Paste(persistent.Persistent): + def __init__(self, value): + self.value = value + PAGE = """ @@ -22,119 +97,83 @@ PAGE = """ """ -RE_NAME = re.compile(r'^[a-z0-9_\-/]+$') - - -def sha256(data): - h = hashlib.sha256() - h.update(data.encode("utf-8")) - return h.hexdigest() +RE_NAME_RAW = r'^[a-z0-9_\-/]+$' +RE_NAME = re.compile(RE_NAME_RAW) class WasteWeb(object): - def __init__(self, datadir): - self.datadir = datadir - self.namecache = set() - t = Thread(target=self.prep_cache) - t.daemon = True - t.start() - - def prep_cache(self): - print("Populating index cache....") - for dirpath, dirnames, filenames in os.walk(self.datadir): - for fname in filenames: - with open(os.path.join(dirpath, fname)) as f: - self.namecache.update([f.readline().strip()]) - print("Indexed {} items".format(len(self.namecache))) + def __init__(self, db): + self.db = db @cherrypy.expose def index(self, load=None): data = "" if load: - assert RE_NAME.match(load) - data = self.loadpaste(load) + try: + data = self.db.loadpaste(load) + except KeyError: + raise cherrypy.HTTPError(404) yield PAGE.format(data=data.replace("<", "<"), load=load or "") @cherrypy.expose def make(self, name, contents): - pname = name or sha256(contents) - assert RE_NAME.match(pname) - self.writepaste(pname, contents) - raise cherrypy.HTTPRedirect("/" + pname) + if not RE_NAME.match(name): + raise cherrypy.HTTPError(400, f"paste name must match {RE_NAME_RAW}") + self.db.writepaste(name, contents) + raise cherrypy.HTTPRedirect("/" + name) @cherrypy.expose def default(self, *args): - if cherrypy.request.method == "DELETE": - self.delpaste(args[0]) - return "OK" - else: - cherrypy.response.headers['Content-Type'] = 'text/plain' - return self.loadpaste(args[0]).encode("utf-8") + try: + if cherrypy.request.method == "DELETE": + self.db.delpaste(args[0]) + return "OK" + else: + cherrypy.response.headers['Content-Type'] = 'text/plain' + return self.db.loadpaste(args[0]).encode("utf-8") + except KeyError: + raise cherrypy.HTTPError(404) @cherrypy.expose - def search(self): - for entry in self.namecache: - yield entry + "\n" + def search(self, prefix=""): + cherrypy.response.headers['Content-Type'] = 'text/plain' - def loadpaste(self, name): - path = self.pastepath(sha256(name)) - with open(path) as f: - f.readline() # the name - return f.read() - - def writepaste(self, name, contents): - hname = sha256(name) - path = self.pastepath(hname) - os.makedirs(os.path.dirname(path), exist_ok=True) - with open(path, "w") as f: - f.write(name) - f.write("\n") - f.write(contents) - self.namecache.update({name}) - - def delpaste(self, name): - self.namecache.remove(name) - path = self.pastepath(sha256(name)) - os.unlink(path) - pdir = os.path.dirname(path) - try: - os.rmdir(os.path.normpath(pdir)) - os.rmdir(os.path.normpath(os.path.join(pdir, "../"))) - except: - pass - - def pastepath(self, hashedname): - return os.path.join(self.datadir, hashedname[0], hashedname[1], hashedname + ".txt") + def _work(): + for name, _ in self.db.iterpastes(prefix): + yield name + "\n" + return _work() def main(): import argparse import signal - parser = argparse.ArgumentParser(description="") + parser = argparse.ArgumentParser(description="basic pastebin", + epilog="supprted databases are file paths and mysql://") - parser.add_argument('-p', '--port', default=8080, type=int, help="http port") - parser.add_argument('-d', '--data', default="./", help="data dir") + parser.add_argument('-p', '--port', default=int(os.environ.get("PASTE_PORT", 8080)), type=int, help="http port") + parser.add_argument('-d', '--database', default=os.environ.get("PASTE_DB", None), help="database uri") parser.add_argument('--debug', action="store_true", help="enable development options") args = parser.parse_args() + if not args.database: + parser.error("the following arguments are required: -d/--database") + logging.basicConfig(level=logging.INFO if args.debug else logging.WARNING, format="%(asctime)-15s %(levelname)-8s %(filename)s:%(lineno)d %(message)s") - web = WasteWeb(args.data) + web = WasteWeb(Database.from_uri(args.database)) cherrypy.tree.mount(web, '/', {'/': {'tools.trailing_slash.on': False}}) cherrypy.config.update({ - 'tools.sessions.on': False, - 'request.show_tracebacks': True, - 'server.socket_port': args.port, - 'server.thread_pool': 5, - 'server.socket_host': '0.0.0.0', - 'server.show_tracebacks': args.debug, - 'log.screen': False, - 'engine.autoreload.on': args.debug + "tools.sessions.on": False, + "server.socket_host": "0.0.0.0", + "server.socket_port": args.port, + "server.thread_pool": 5, + "engine.autoreload.on": args.debug, + "log.screen": True }) def signal_handler(signum, stack):