From 326051097c98d09f8f81d37e09a46ba2449284c4 Mon Sep 17 00:00:00 2001 From: dave Date: Thu, 30 May 2019 21:30:46 -0700 Subject: [PATCH] generic tarball provider --- README.md | 18 +++- repobot/server.py | 4 +- repobot/tarprovider.py | 209 +++++++++++++++++++++++++++++++++++++ templates/tar/package.html | 16 +++ templates/tar/repo.html | 16 +++ templates/tar/root.html | 16 +++ 6 files changed, 276 insertions(+), 3 deletions(-) create mode 100644 repobot/tarprovider.py create mode 100644 templates/tar/package.html create mode 100644 templates/tar/repo.html create mode 100644 templates/tar/root.html diff --git a/README.md b/README.md index adfa747..2ea2249 100644 --- a/README.md +++ b/README.md @@ -82,7 +82,21 @@ wget -qO- http://host/repo/apt/reponame/dists/trusty/install | bash -x /dev/stdi Todo ---- +* CLI tool (for adding packages only) +* 'Simple' cli tool (shell script fetchable from the server for adding packages) +* Rpm Support * Auth -* Delete packages -* Support using existing GPG keys +* Support using existing GPG keys for apt * Nicer UI +* Json API +* deb need to be able to slice package in repos by: component (arbitrary names), index (binary-amd64, binary-i386, source) +* can already slice packages by: repo, dist +* Move copysha256 somewhere generic +* Have the server dictate the S3 root path to the provider plugins +* Assert that submitted package names and file names are sane +* Assert that submitted files smell like the type of file that is intended +* Global & per-provider options: + * option to block overwriting +* Standardize what is returned from provider's web_addpkg +* Standardize some fields of provider's schema (name, version) +* Delete repos if empty (with option to disable per provider) diff --git a/repobot/server.py b/repobot/server.py index 3980364..6763f72 100644 --- a/repobot/server.py +++ b/repobot/server.py @@ -6,6 +6,7 @@ import sqlalchemy from botocore.client import Config as BotoConfig from repobot.aptprovider import AptProvider from repobot.pypiprovider import PypiProvider +from repobot.tarprovider import TarProvider from repobot.tables import SAEnginePlugin, SATool from urllib.parse import urlparse @@ -78,7 +79,8 @@ def main(): # set up providers providers = {"apt": AptProvider(dbcon, s3, bucket), - "pypi": PypiProvider(dbcon, s3, bucket)} + "pypi": PypiProvider(dbcon, s3, bucket), + "tar": TarProvider(dbcon, s3, bucket)} # set up main web screen web = AppWeb(providers) diff --git a/repobot/tarprovider.py b/repobot/tarprovider.py new file mode 100644 index 0000000..0278b81 --- /dev/null +++ b/repobot/tarprovider.py @@ -0,0 +1,209 @@ +import cherrypy +import hashlib +import json +import os +from jinja2 import Environment, FileSystemLoader, select_autoescape +from sqlalchemy import Column, ForeignKey, UniqueConstraint +from sqlalchemy.orm import relationship +from sqlalchemy.types import String, Integer +from tempfile import TemporaryDirectory +from repobot.tables import Base, db + + +APPROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "../")) + + +class TarRepo(Base): + __tablename__ = 'tarrepo' + id = Column(Integer, primary_key=True) + name = Column(String(length=32), unique=True, nullable=False) + + +class TarPackage(Base): + __tablename__ = 'tarpkg' + id = Column(Integer, primary_key=True) + + repo_id = Column(Integer, ForeignKey("tarrepo.id"), nullable=False) + repo = relationship("TarRepo") + + name = Column(String(length=128), nullable=False) # 'cpython' + version = Column(String(length=64), nullable=False) # '3.7.3' + + fname = Column(String(length=256), nullable=False) # cpython-3.7.3.tar.gz + + size = Column(Integer, nullable=False) + sha256 = Column(String(length=64)) + + __table_args__ = (UniqueConstraint('fname', 'repo_id', name='tar_unique_repopkg'), ) + + @property + def blobpath(self): + """ + Get the s3 path within + repos//tarballs/// + """ + return os.path.join("repos", self.repo.name, "tarballs", self.fname[0].lower(), self.name, self.fname) + + +def get_repo(_db, repo_name, create_ok=True): #TODO make this generic + """ + Fetch a repo from the database by name + """ + repo = _db.query(TarRepo).filter(TarRepo.name == repo_name).first() + if not repo and create_ok: + repo = TarRepo(name=repo_name) + _db.add(repo) + _db.commit() + return repo + + +def copysha256(fin, fout): + """ + Copy a file and calculate sha256 while doing so + """ + h = hashlib.sha256() + + while True: + data = fin.read(4096) + if not data: + break + h.update(data) + fout.write(data) + + return h.hexdigest() + + +class TarProvider(object): + def __init__(self, dbcon, s3client, bucket): + self.db = dbcon + self.s3 = s3client + self.bucket = bucket + """base path within the s3 bucket""" + self.basepath = "data/provider/tar" + + cherrypy.tree.mount(TarWeb(self), "/repo/tar", {'/': {'tools.trailing_slash.on': False, + 'tools.db.on': True}}) + + def web_addpkg(self, reponame, name, version, fobj): + repo = get_repo(db(), reponame) + + # write wheel to temp storage + with TemporaryDirectory() as tdir: + tmppkgpath = os.path.join(tdir, fobj.filename) #TODO verify filename doesnt have any nonsense like ../../passwd + with open(tmppkgpath, "wb") as fdest: + shasum = copysha256(fobj.file, fdest) + + #TODO assert that the uploaded file smells like a tarball + #TODO assert the version string matches allowed chars + #TODO assert the name string matches allowed chars + #TODO support non-gzip + fname = f"{name}-{version}.tar.gz" + + # add to db + tar = TarPackage(repo=repo, + name=name, + version=version, + fname=fname, + size=os.path.getsize(tmppkgpath), + sha256=shasum) + + # s3 path - repos//tarballs/f/foo/foo-1234.tar.gz + dpath = os.path.join(self.basepath, tar.blobpath) + + files = self.s3.list_objects(Bucket=self.bucket, Prefix=dpath).get("Contents") + if files: + print(f"will overwrite: {files}") + + db().add(tar) + db().commit() + + try: + with open(tmppkgpath, "rb") as f: + response = self.s3.put_object(Body=f, Bucket=self.bucket, Key=dpath) + assert(response["ResponseMetadata"]["HTTPStatusCode"] == 200), f"Upload failed: {response}" + except Exception: + db().delete(tar) + db().commit() + raise + + return json.dumps({"ok": True}, indent=4) #TODO do something with this + + +@cherrypy.popargs("reponame", "pkgname", "filename") +class TarWeb(object): + def __init__(self, base): + self.base = base + + template_dir = "templates" if os.path.exists("templates") else os.path.join(APPROOT, "templates") + self.tpl = Environment(loader=FileSystemLoader(template_dir), + autoescape=select_autoescape(['html', 'xml'])) + + @cherrypy.expose + def index(self, reponame=None, pkgname=None, filename=None): + if filename: + return self.handle_download(reponame, pkgname, filename) + else: + return self.handle_navigation(reponame, pkgname, filename) + + def handle_navigation(self, reponame=None, pkgname=None, filename=None): + if reponame: + repo = get_repo(db(), reponame, create_ok=False) + if pkgname: + return self.tpl.get_template("tar/package.html") \ + .render(repo=repo, + pkgs=db().query(TarPackage).filter(TarPackage.repo == repo, + TarPackage.name == pkgname). + order_by(TarPackage.version).all()) + + return self.tpl.get_template("tar/repo.html") \ + .render(repo=repo, + pkgs=self._get_dists(repo)) + + return self.tpl.get_template("tar/root.html") \ + .render(repos=db().query(TarRepo).order_by(TarRepo.name).all()) + + def _get_dists(self, repo): + lastpkg = None + for pkg in db().query(TarPackage).filter(TarPackage.repo == repo).order_by(TarPackage.fname).all(): + if lastpkg and pkg.name == lastpkg: + continue + yield pkg + lastpkg = pkg.name + + def handle_download(self, reponame, distname, filename): + repo = get_repo(db(), reponame, create_ok=False) + pkg = db().query(TarPackage).filter(TarPackage.repo == repo, TarPackage.fname == filename).first() + if not pkg: + raise cherrypy.HTTPError(404) + + dpath = os.path.join(self.base.basepath, pkg.blobpath) + print("dpath=", dpath) + print("blobpath=", pkg.blobpath) + print("basepath=", self.base.basepath) + + if str(cherrypy.request.method) == "DELETE": + db().delete(pkg) + files = self.base.s3.list_objects(Bucket=self.base.bucket, Prefix=dpath).get("Contents") + if files: + self.base.s3.delete_object(Bucket=self.base.bucket, Key=dpath) + db().commit() + return "OK" #TODO delete the repo if we've emptied it(?) + + elif str(cherrypy.request.method) == "GET": + response = self.base.s3.get_object(Bucket=self.base.bucket, Key=dpath) + + cherrypy.response.headers["Content-Type"] = "application/octet-stream" + cherrypy.response.headers["Content-Length"] = response["ContentLength"] + + def stream(): + while True: + data = response["Body"].read(65535) + if not data: + return + yield data + + return stream() + else: + raise cherrypy.HTTPError(405) + + index._cp_config = {'response.stream': True} diff --git a/templates/tar/package.html b/templates/tar/package.html new file mode 100644 index 0000000..cbf99ca --- /dev/null +++ b/templates/tar/package.html @@ -0,0 +1,16 @@ + + + + Tarball index + + + + {%- for pkg in pkgs %} + {{ pkg.fname }} + {%- endfor %} + + diff --git a/templates/tar/repo.html b/templates/tar/repo.html new file mode 100644 index 0000000..51cff2c --- /dev/null +++ b/templates/tar/repo.html @@ -0,0 +1,16 @@ + + + + Tarball index + + + + {%- for pkg in pkgs %} + {{ pkg.name }} + {%- endfor %} + + diff --git a/templates/tar/root.html b/templates/tar/root.html new file mode 100644 index 0000000..50ac5ed --- /dev/null +++ b/templates/tar/root.html @@ -0,0 +1,16 @@ + + + + Tarball index + + + + {%- for repo in repos %} + {{ repo.name }} + {%- endfor %} + +