generic tarball provider

This commit is contained in:
dave 2019-05-30 21:30:46 -07:00
parent 69a184cb75
commit 326051097c
6 changed files with 276 additions and 3 deletions

View File

@ -82,7 +82,21 @@ wget -qO- http://host/repo/apt/reponame/dists/trusty/install | bash -x /dev/stdi
* CLI tool (for adding packages only)
* 'Simple' cli tool (shell script fetchable from the server for adding packages)
* Rpm Support
* Auth
* Delete packages
* Support using existing GPG keys
* Support using existing GPG keys for apt
* Nicer UI
* Json API
* deb need to be able to slice package in repos by: component (arbitrary names), index (binary-amd64, binary-i386, source)
* can already slice packages by: repo, dist
* Move copysha256 somewhere generic
* Have the server dictate the S3 root path to the provider plugins
* Assert that submitted package names and file names are sane
* Assert that submitted files smell like the type of file that is intended
* Global & per-provider options:
* option to block overwriting
* Standardize what is returned from provider's web_addpkg
* Standardize some fields of provider's schema (name, version)
* Delete repos if empty (with option to disable per provider)

View File

@ -6,6 +6,7 @@ import sqlalchemy
from botocore.client import Config as BotoConfig
from repobot.aptprovider import AptProvider
from repobot.pypiprovider import PypiProvider
from repobot.tarprovider import TarProvider
from repobot.tables import SAEnginePlugin, SATool
from urllib.parse import urlparse
@ -78,7 +79,8 @@ def main():
# set up providers
providers = {"apt": AptProvider(dbcon, s3, bucket),
"pypi": PypiProvider(dbcon, s3, bucket)}
"pypi": PypiProvider(dbcon, s3, bucket),
"tar": TarProvider(dbcon, s3, bucket)}
# set up main web screen
web = AppWeb(providers)

repobot/ Normal file
View File

@ -0,0 +1,209 @@
import cherrypy
import hashlib
import json
import os
from jinja2 import Environment, FileSystemLoader, select_autoescape
from sqlalchemy import Column, ForeignKey, UniqueConstraint
from sqlalchemy.orm import relationship
from sqlalchemy.types import String, Integer
from tempfile import TemporaryDirectory
from repobot.tables import Base, db
APPROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "../"))
class TarRepo(Base):
__tablename__ = 'tarrepo'
id = Column(Integer, primary_key=True)
name = Column(String(length=32), unique=True, nullable=False)
class TarPackage(Base):
__tablename__ = 'tarpkg'
id = Column(Integer, primary_key=True)
repo_id = Column(Integer, ForeignKey(""), nullable=False)
repo = relationship("TarRepo")
name = Column(String(length=128), nullable=False) # 'cpython'
version = Column(String(length=64), nullable=False) # '3.7.3'
fname = Column(String(length=256), nullable=False) # cpython-3.7.3.tar.gz
size = Column(Integer, nullable=False)
sha256 = Column(String(length=64))
__table_args__ = (UniqueConstraint('fname', 'repo_id', name='tar_unique_repopkg'), )
def blobpath(self):
Get the s3 path within
return os.path.join("repos",, "tarballs", self.fname[0].lower(),, self.fname)
def get_repo(_db, repo_name, create_ok=True): #TODO make this generic
Fetch a repo from the database by name
repo = _db.query(TarRepo).filter( == repo_name).first()
if not repo and create_ok:
repo = TarRepo(name=repo_name)
return repo
def copysha256(fin, fout):
Copy a file and calculate sha256 while doing so
h = hashlib.sha256()
while True:
data =
if not data:
return h.hexdigest()
class TarProvider(object):
def __init__(self, dbcon, s3client, bucket):
self.db = dbcon
self.s3 = s3client
self.bucket = bucket
"""base path within the s3 bucket"""
self.basepath = "data/provider/tar"
cherrypy.tree.mount(TarWeb(self), "/repo/tar", {'/': {'tools.trailing_slash.on': False,
'tools.db.on': True}})
def web_addpkg(self, reponame, name, version, fobj):
repo = get_repo(db(), reponame)
# write wheel to temp storage
with TemporaryDirectory() as tdir:
tmppkgpath = os.path.join(tdir, fobj.filename) #TODO verify filename doesnt have any nonsense like ../../passwd
with open(tmppkgpath, "wb") as fdest:
shasum = copysha256(fobj.file, fdest)
#TODO assert that the uploaded file smells like a tarball
#TODO assert the version string matches allowed chars
#TODO assert the name string matches allowed chars
#TODO support non-gzip
fname = f"{name}-{version}.tar.gz"
# add to db
tar = TarPackage(repo=repo,
# s3 path - repos/<reponame>/tarballs/f/foo/foo-1234.tar.gz
dpath = os.path.join(self.basepath, tar.blobpath)
files = self.s3.list_objects(Bucket=self.bucket, Prefix=dpath).get("Contents")
if files:
print(f"will overwrite: {files}")
with open(tmppkgpath, "rb") as f:
response = self.s3.put_object(Body=f, Bucket=self.bucket, Key=dpath)
assert(response["ResponseMetadata"]["HTTPStatusCode"] == 200), f"Upload failed: {response}"
except Exception:
return json.dumps({"ok": True}, indent=4) #TODO do something with this
@cherrypy.popargs("reponame", "pkgname", "filename")
class TarWeb(object):
def __init__(self, base):
self.base = base
template_dir = "templates" if os.path.exists("templates") else os.path.join(APPROOT, "templates")
self.tpl = Environment(loader=FileSystemLoader(template_dir),
autoescape=select_autoescape(['html', 'xml']))
def index(self, reponame=None, pkgname=None, filename=None):
if filename:
return self.handle_download(reponame, pkgname, filename)
return self.handle_navigation(reponame, pkgname, filename)
def handle_navigation(self, reponame=None, pkgname=None, filename=None):
if reponame:
repo = get_repo(db(), reponame, create_ok=False)
if pkgname:
return self.tpl.get_template("tar/package.html") \
pkgs=db().query(TarPackage).filter(TarPackage.repo == repo, == pkgname).
return self.tpl.get_template("tar/repo.html") \
return self.tpl.get_template("tar/root.html") \
def _get_dists(self, repo):
lastpkg = None
for pkg in db().query(TarPackage).filter(TarPackage.repo == repo).order_by(TarPackage.fname).all():
if lastpkg and == lastpkg:
yield pkg
lastpkg =
def handle_download(self, reponame, distname, filename):
repo = get_repo(db(), reponame, create_ok=False)
pkg = db().query(TarPackage).filter(TarPackage.repo == repo, TarPackage.fname == filename).first()
if not pkg:
raise cherrypy.HTTPError(404)
dpath = os.path.join(self.base.basepath, pkg.blobpath)
print("dpath=", dpath)
print("blobpath=", pkg.blobpath)
print("basepath=", self.base.basepath)
if str(cherrypy.request.method) == "DELETE":
files = self.base.s3.list_objects(Bucket=self.base.bucket, Prefix=dpath).get("Contents")
if files:
self.base.s3.delete_object(Bucket=self.base.bucket, Key=dpath)
return "OK" #TODO delete the repo if we've emptied it(?)
elif str(cherrypy.request.method) == "GET":
response = self.base.s3.get_object(Bucket=self.base.bucket, Key=dpath)
cherrypy.response.headers["Content-Type"] = "application/octet-stream"
cherrypy.response.headers["Content-Length"] = response["ContentLength"]
def stream():
while True:
data = response["Body"].read(65535)
if not data:
yield data
return stream()
raise cherrypy.HTTPError(405)
index._cp_config = {'': True}

View File

@ -0,0 +1,16 @@
<!DOCTYPE html>
<title>Tarball index</title>
<style type="text/css">
a {
display: block;
{%- for pkg in pkgs %}
<a href="/repo/tar/{{ }}/{{ }}/{{ pkg.fname }}#sha256={{ pkg.sha256 }}">{{ pkg.fname }}</a>
{%- endfor %}

templates/tar/repo.html Normal file
View File

@ -0,0 +1,16 @@
<!DOCTYPE html>
<title>Tarball index</title>
<style type="text/css">
a {
display: block;
{%- for pkg in pkgs %}
<a href="/repo/tar/{{ }}/{{ }}/">{{ }}</a>
{%- endfor %}

templates/tar/root.html Normal file
View File

@ -0,0 +1,16 @@
<!DOCTYPE html>
<title>Tarball index</title>
<style type="text/css">
a {
display: block;
{%- for repo in repos %}
<a href="/repo/tar/{{ }}/">{{ }}</a>
{%- endfor %}