Browse Source

generic tarball provider

master
dave 1 year ago
parent
commit
326051097c
6 changed files with 276 additions and 3 deletions
  1. +16
    -2
      README.md
  2. +3
    -1
      repobot/server.py
  3. +209
    -0
      repobot/tarprovider.py
  4. +16
    -0
      templates/tar/package.html
  5. +16
    -0
      templates/tar/repo.html
  6. +16
    -0
      templates/tar/root.html

+ 16
- 2
README.md View File

@@ -82,7 +82,21 @@ wget -qO- http://host/repo/apt/reponame/dists/trusty/install | bash -x /dev/stdi
Todo
----

* CLI tool (for adding packages only)
* 'Simple' cli tool (shell script fetchable from the server for adding packages)
* Rpm Support
* Auth
* Delete packages
* Support using existing GPG keys
* Support using existing GPG keys for apt
* Nicer UI
* Json API
* deb need to be able to slice package in repos by: component (arbitrary names), index (binary-amd64, binary-i386, source)
* can already slice packages by: repo, dist
* Move copysha256 somewhere generic
* Have the server dictate the S3 root path to the provider plugins
* Assert that submitted package names and file names are sane
* Assert that submitted files smell like the type of file that is intended
* Global & per-provider options:
* option to block overwriting
* Standardize what is returned from provider's web_addpkg
* Standardize some fields of provider's schema (name, version)
* Delete repos if empty (with option to disable per provider)

+ 3
- 1
repobot/server.py View File

@@ -6,6 +6,7 @@ import sqlalchemy
from botocore.client import Config as BotoConfig
from repobot.aptprovider import AptProvider
from repobot.pypiprovider import PypiProvider
from repobot.tarprovider import TarProvider
from repobot.tables import SAEnginePlugin, SATool
from urllib.parse import urlparse

@@ -78,7 +79,8 @@ def main():

# set up providers
providers = {"apt": AptProvider(dbcon, s3, bucket),
"pypi": PypiProvider(dbcon, s3, bucket)}
"pypi": PypiProvider(dbcon, s3, bucket),
"tar": TarProvider(dbcon, s3, bucket)}

# set up main web screen
web = AppWeb(providers)


+ 209
- 0
repobot/tarprovider.py View File

@@ -0,0 +1,209 @@
import cherrypy
import hashlib
import json
import os
from jinja2 import Environment, FileSystemLoader, select_autoescape
from sqlalchemy import Column, ForeignKey, UniqueConstraint
from sqlalchemy.orm import relationship
from sqlalchemy.types import String, Integer
from tempfile import TemporaryDirectory
from repobot.tables import Base, db


APPROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "../"))


class TarRepo(Base):
__tablename__ = 'tarrepo'
id = Column(Integer, primary_key=True)
name = Column(String(length=32), unique=True, nullable=False)


class TarPackage(Base):
__tablename__ = 'tarpkg'
id = Column(Integer, primary_key=True)

repo_id = Column(Integer, ForeignKey("tarrepo.id"), nullable=False)
repo = relationship("TarRepo")

name = Column(String(length=128), nullable=False) # 'cpython'
version = Column(String(length=64), nullable=False) # '3.7.3'

fname = Column(String(length=256), nullable=False) # cpython-3.7.3.tar.gz

size = Column(Integer, nullable=False)
sha256 = Column(String(length=64))

__table_args__ = (UniqueConstraint('fname', 'repo_id', name='tar_unique_repopkg'), )

@property
def blobpath(self):
"""
Get the s3 path within
repos/<reponame>/tarballs/<f>/<foo>/<foo-1.2.3.tar.gz>
"""
return os.path.join("repos", self.repo.name, "tarballs", self.fname[0].lower(), self.name, self.fname)


def get_repo(_db, repo_name, create_ok=True): #TODO make this generic
"""
Fetch a repo from the database by name
"""
repo = _db.query(TarRepo).filter(TarRepo.name == repo_name).first()
if not repo and create_ok:
repo = TarRepo(name=repo_name)
_db.add(repo)
_db.commit()
return repo


def copysha256(fin, fout):
"""
Copy a file and calculate sha256 while doing so
"""
h = hashlib.sha256()

while True:
data = fin.read(4096)
if not data:
break
h.update(data)
fout.write(data)

return h.hexdigest()


class TarProvider(object):
def __init__(self, dbcon, s3client, bucket):
self.db = dbcon
self.s3 = s3client
self.bucket = bucket
"""base path within the s3 bucket"""
self.basepath = "data/provider/tar"

cherrypy.tree.mount(TarWeb(self), "/repo/tar", {'/': {'tools.trailing_slash.on': False,
'tools.db.on': True}})

def web_addpkg(self, reponame, name, version, fobj):
repo = get_repo(db(), reponame)

# write wheel to temp storage
with TemporaryDirectory() as tdir:
tmppkgpath = os.path.join(tdir, fobj.filename) #TODO verify filename doesnt have any nonsense like ../../passwd
with open(tmppkgpath, "wb") as fdest:
shasum = copysha256(fobj.file, fdest)

#TODO assert that the uploaded file smells like a tarball
#TODO assert the version string matches allowed chars
#TODO assert the name string matches allowed chars
#TODO support non-gzip
fname = f"{name}-{version}.tar.gz"

# add to db
tar = TarPackage(repo=repo,
name=name,
version=version,
fname=fname,
size=os.path.getsize(tmppkgpath),
sha256=shasum)

# s3 path - repos/<reponame>/tarballs/f/foo/foo-1234.tar.gz
dpath = os.path.join(self.basepath, tar.blobpath)

files = self.s3.list_objects(Bucket=self.bucket, Prefix=dpath).get("Contents")
if files:
print(f"will overwrite: {files}")

db().add(tar)
db().commit()

try:
with open(tmppkgpath, "rb") as f:
response = self.s3.put_object(Body=f, Bucket=self.bucket, Key=dpath)
assert(response["ResponseMetadata"]["HTTPStatusCode"] == 200), f"Upload failed: {response}"
except Exception:
db().delete(tar)
db().commit()
raise

return json.dumps({"ok": True}, indent=4) #TODO do something with this


@cherrypy.popargs("reponame", "pkgname", "filename")
class TarWeb(object):
def __init__(self, base):
self.base = base

template_dir = "templates" if os.path.exists("templates") else os.path.join(APPROOT, "templates")
self.tpl = Environment(loader=FileSystemLoader(template_dir),
autoescape=select_autoescape(['html', 'xml']))

@cherrypy.expose
def index(self, reponame=None, pkgname=None, filename=None):
if filename:
return self.handle_download(reponame, pkgname, filename)
else:
return self.handle_navigation(reponame, pkgname, filename)

def handle_navigation(self, reponame=None, pkgname=None, filename=None):
if reponame:
repo = get_repo(db(), reponame, create_ok=False)
if pkgname:
return self.tpl.get_template("tar/package.html") \
.render(repo=repo,
pkgs=db().query(TarPackage).filter(TarPackage.repo == repo,
TarPackage.name == pkgname).
order_by(TarPackage.version).all())

return self.tpl.get_template("tar/repo.html") \
.render(repo=repo,
pkgs=self._get_dists(repo))

return self.tpl.get_template("tar/root.html") \
.render(repos=db().query(TarRepo).order_by(TarRepo.name).all())

def _get_dists(self, repo):
lastpkg = None
for pkg in db().query(TarPackage).filter(TarPackage.repo == repo).order_by(TarPackage.fname).all():
if lastpkg and pkg.name == lastpkg:
continue
yield pkg
lastpkg = pkg.name

def handle_download(self, reponame, distname, filename):
repo = get_repo(db(), reponame, create_ok=False)
pkg = db().query(TarPackage).filter(TarPackage.repo == repo, TarPackage.fname == filename).first()
if not pkg:
raise cherrypy.HTTPError(404)

dpath = os.path.join(self.base.basepath, pkg.blobpath)
print("dpath=", dpath)
print("blobpath=", pkg.blobpath)
print("basepath=", self.base.basepath)

if str(cherrypy.request.method) == "DELETE":
db().delete(pkg)
files = self.base.s3.list_objects(Bucket=self.base.bucket, Prefix=dpath).get("Contents")
if files:
self.base.s3.delete_object(Bucket=self.base.bucket, Key=dpath)
db().commit()
return "OK" #TODO delete the repo if we've emptied it(?)

elif str(cherrypy.request.method) == "GET":
response = self.base.s3.get_object(Bucket=self.base.bucket, Key=dpath)

cherrypy.response.headers["Content-Type"] = "application/octet-stream"
cherrypy.response.headers["Content-Length"] = response["ContentLength"]

def stream():
while True:
data = response["Body"].read(65535)
if not data:
return
yield data

return stream()
else:
raise cherrypy.HTTPError(405)

index._cp_config = {'response.stream': True}

+ 16
- 0
templates/tar/package.html View File

@@ -0,0 +1,16 @@
<!DOCTYPE html>
<html>
<head>
<title>Tarball index</title>
<style type="text/css">
a {
display: block;
}
</style>
</head>
<body>
{%- for pkg in pkgs %}
<a href="/repo/tar/{{ repo.name }}/{{ pkg.name }}/{{ pkg.fname }}#sha256={{ pkg.sha256 }}">{{ pkg.fname }}</a>
{%- endfor %}
</body>
</html>

+ 16
- 0
templates/tar/repo.html View File

@@ -0,0 +1,16 @@
<!DOCTYPE html>
<html>
<head>
<title>Tarball index</title>
<style type="text/css">
a {
display: block;
}
</style>
</head>
<body>
{%- for pkg in pkgs %}
<a href="/repo/tar/{{ repo.name }}/{{ pkg.name }}/">{{ pkg.name }}</a>
{%- endfor %}
</body>
</html>

+ 16
- 0
templates/tar/root.html View File

@@ -0,0 +1,16 @@
<!DOCTYPE html>
<html>
<head>
<title>Tarball index</title>
<style type="text/css">
a {
display: block;
}
</style>
</head>
<body>
{%- for repo in repos %}
<a href="/repo/tar/{{ repo.name }}/">{{ repo.name }}</a>
{%- endfor %}
</body>
</html>

Loading…
Cancel
Save