Browse Source

Apt without aptly mvp

master
dave 1 year ago
parent
commit
caebad0a16
7 changed files with 592 additions and 38 deletions
  1. +1
    -0
      repobot/__init__.py
  2. +397
    -0
      repobot/aptprovider.py
  3. +5
    -0
      repobot/pypiprovider.py
  4. +91
    -0
      repobot/server.py
  5. +68
    -0
      repobot/tables.py
  6. +29
    -33
      requirements.txt
  7. +1
    -5
      setup.py

+ 1
- 0
repobot/__init__.py View File

@@ -0,0 +1 @@
__version__ = "0.2.0"

+ 397
- 0
repobot/aptprovider.py View File

@@ -0,0 +1,397 @@
import cherrypy
from pydpkg import Dpkg
from repobot.tables import Base, db, get_engine
import sqlalchemy
from sqlalchemy import Column, ForeignKey
from sqlalchemy.types import String, Integer, Text, BOOLEAN
from sqlalchemy.dialects.mysql import LONGTEXT
from sqlalchemy.orm import relationship
from sqlalchemy import UniqueConstraint
from tempfile import TemporaryDirectory
from threading import Thread
import hashlib
import os
from time import sleep
import gnupg
import traceback
import json


class AptRepo(Base):
__tablename__ = 'aptrepo'
id = Column(Integer, primary_key=True)
name = Column(String(length=32), unique=True, nullable=False)
gpgkey = Column(Text(), nullable=True)
gpgkeyprint = Column(Text(), nullable=True)
gpgpubkey = Column(Text(), nullable=True)


class AptDist(Base):
__tablename__ = 'aptdist'
id = Column(Integer, primary_key=True)
repo_id = Column(Integer, ForeignKey("aptrepo.id"), nullable=False)
repo = relationship("AptRepo")

dirty = Column(BOOLEAN(), nullable=False, default=False)

name = Column(String(length=32), nullable=False)

packages_cache = Column(LONGTEXT(), nullable=True)
release_cache = Column(Text(), nullable=True)
sig_cache = Column(Text(), nullable=True)

__table_args__ = (UniqueConstraint('repo_id', 'name', name='apt_unique_repodist'), )


class AptPackage(Base):
__tablename__ = 'aptpkg'
id = Column(Integer, primary_key=True)

repo_id = Column(Integer, ForeignKey("aptrepo.id"), nullable=False)
repo = relationship("AptRepo")

dist_id = Column(Integer, ForeignKey("aptdist.id"), nullable=False)
dist = relationship("AptDist")

# index (always 'binary-amd64' for now)

name = Column(String(length=128), nullable=False) # 'python3-pip'
version = Column(String(length=128), nullable=False) # '4.20.1'
arch = Column(String(length=16), nullable=False) # 'amd64'

fname = Column(String(length=256), nullable=False)

size = Column(Integer, nullable=False)

md5 = Column(String(length=32))
sha1 = Column(String(length=40))
sha256 = Column(String(length=64))
sha512 = Column(String(length=128))

fields = Column(Text())

__table_args__ = (UniqueConstraint('name', 'version', 'repo_id', 'dist_id', name='apt_unique_repodist'), )

@property
def blobpath(self):
return "{}/{}/packages/{}/{}_{}.deb".format(self.repo.name, self.dist.name,
self.name[0], self.name, self.sha256[0:8])


def get_repo(_db, repo_name, create_ok=True):
repo = _db.query(AptRepo).filter(AptRepo.name == repo_name).first()
if not repo and create_ok:
repo = AptRepo(name=repo_name)
_db.add(repo)
_db.commit()
return repo


def get_dist(_db, repo, dist_name, create_ok=True):
dist = _db.query(AptDist).filter(AptDist.name == dist_name, AptDist.repo_id == repo.id).first()
if not dist and create_ok:
dist = AptDist(name=dist_name, repo_id=repo.id)
_db.add(dist)
_db.commit()
return dist


algos = {"md5": "MD5Sum",
"sha1": "SHA1",
"sha256": "SHA256",
"sha512": "SHA512"}


def copyhash(fin, fout):
"""
Copy a file and calculate hashes while doing so
"""
hashes = {}
for algo in algos.keys():
hashes[algo] = getattr(hashlib, algo)()

while True:
data = fin.read(4096)
if not data:
break
for h in hashes.values():
h.update(data)
fout.write(data)

return {k: v.hexdigest() for k, v in hashes.items()}


def hashmany(data):
"""
Copy a file and calculate hashes while doing so
"""
hashes = {}
for algo in algos.keys():
hashes[algo] = getattr(hashlib, algo)()

for h in hashes.values():
h.update(data)

return {k: v.hexdigest() for k, v in hashes.items()}


class AptProvider(object):
def __init__(self, dbcon, s3client, bucket="aptprovider"):
self.db = dbcon
self.s3 = s3client
self.bucket = bucket
self.basepath = "data/provider/apt"
"""
bucket path (after basedir)
repos/{reponame}/packages/f/foo.deb
"""
cherrypy.tree.mount(AptWeb(self), "/repo/apt", {'/': {'tools.trailing_slash.on': False,
'tools.db.on': True}})

# ensure bucket exists
if bucket not in [b['Name'] for b in self.s3.list_buckets()['Buckets']]:
print("Creating bucket")
self.s3.create_bucket(Bucket=bucket)

self.updater = Thread(target=self.sign_packages, daemon=True)
self.updater.start()

def sign_packages(self):
while True:
sleep(2)
try:
self._sign_packages()
except:
traceback.print_exc()
# sleep(10)
break

def _sign_packages(self):
print("signing packages")
session = sqlalchemy.orm.scoped_session(sqlalchemy.orm.sessionmaker(autoflush=True, autocommit=False))
session.configure(bind=get_engine())

dirtydists = session.query(AptDist).filter(AptDist.dirty == True).all()

for dist in dirtydists:
print("Signing dist {}/{}".format(dist.repo.name, dist.name))

str_packages = ""

for package in session.query(AptPackage) \
.filter(AptPackage.repo == dist.repo,
AptPackage.dist == dist) \
.order_by(AptPackage.id).all():
fields = json.loads(package.fields)
for k, v in fields.items():
str_packages += "{}: {}\n".format(k, v)
for algo, algoname in algos.items():
str_packages += "{}: {}\n".format(algoname, getattr(package, algo))

str_packages += "Filename: packages/{}/{}\n".format(package.fname[0], package.fname)
str_packages += "Size: {}\n".format(package.size)

str_packages += "\n"

dist.packages_cache = str_packages.encode("utf-8")

release_hashes = hashmany(dist.packages_cache)
print(release_hashes)

str_release = """Origin: . {dist}
Label: . {dist}
Suite: {dist}
Codename: {dist}
Date: Fri, 2 Nov 2018 04:58:59 UTC
Architectures: amd64
Components: main
Description: Generated by yolo
""".format(dist=dist.name)
for algo, algoname in algos.items():
str_release += "{}:\n {} {} {}/{}/{}\n".format(algoname,
release_hashes[algo],
len(dist.packages_cache),
"main", #TODO component
"binary-amd64", #TODO whatever this was
"Packages")

dist.release_cache = str_release.encode("utf-8")

keyemail = 'debian_signing@localhost'

with TemporaryDirectory() as tdir:
gpg = gnupg.GPG(gnupghome=tdir)

def getkey():
keys = [i for i in gpg.list_keys(secret=True) if any([keyemail in k for k in i["uids"]])]
if keys:
return keys[0]

fingerprint = None

if not dist.repo.gpgkey:
print("Generating key for", dist.repo.name)
key = gpg.gen_key(gpg.gen_key_input(name_email=keyemail,
expire_date='2029-04-28',
key_type='RSA',
key_length=4096,
key_usage='encrypt,sign,auth',
passphrase="secret"))
fingerprint = key.fingerprint
dist.repo.gpgkey = gpg.export_keys(fingerprint, secret=True, passphrase="secret")
dist.repo.gpgkeyprint = fingerprint
dist.repo.gpgpubkey = gpg.export_keys(fingerprint)

else:
import_result = gpg.import_keys(dist.repo.gpgkey)
fingerprint = import_result.results[0]['fingerprint'] # errors here suggests some gpg import issue
assert(fingerprint == getkey()['fingerprint'])

dist.sig_cache = gpg.sign(dist.release_cache, keyid=fingerprint, passphrase='secret',
detach=True, clearsign=False).data

session.commit()

def web_addpkg(self, reponame, name, version, fobj, dist):
repo = get_repo(db(), reponame)
dist = get_dist(db(), repo, dist)
print("Dist:", dist)

# - read f (write to temp storage if needed) and generate the hashes
# - load with Dpkg to get name version and whatnot
with TemporaryDirectory() as tdir:
tmppkgpath = os.path.join(tdir, "temp.deb")
with open(tmppkgpath, "wb") as fdest:
fhashes = copyhash(fobj.file, fdest)

p = Dpkg(tmppkgpath)
pkgname = "{}_{}_{}.deb".format(p.message['Package'], p.message['Version'], p.message['Architecture'])
yield "package name: {}\n".format(pkgname)
yield "package size: {}\n".format(os.path.getsize(tmppkgpath))
yield "package message:\n-----------------\n{}\n-----------------\n".format(p.message)
yield "package hashes: {}".format(fhashes)

# repos/<reponame>/packages/f/foo.deb
dpath = os.path.join(self.basepath, "repos", repo.name, "packages", pkgname[0], pkgname)

with open(tmppkgpath, "rb") as f:
response = self.s3.put_object(Body=f, Bucket=self.bucket, Key=dpath)
if response["ResponseMetadata"]["HTTPStatusCode"] != 200:
print(response)
raise Exception("failed to store package")

fields = {key: p.message[key] for key in p.message.keys()}

pkg = AptPackage(repo=repo, dist=dist,
name=p.message['Package'],
version=p.message['Version'],
arch=p.message['Architecture'],
fname=pkgname,
size=os.path.getsize(tmppkgpath),
**fhashes,
fields=json.dumps(fields))

dist.dirty = True
db().add(pkg)
db().commit()

#TODO
# - verify dpkg name & version match params
# - copy to persistent storage
# - add db record keyed under repo name and dist (and index but only 'binary-amd64' for now)
# - mark dist dirty


@cherrypy.popargs("reponame")
class AptWeb(object):
def __init__(self, base):
self.base = base
self.dists = AptDists(base)
self.packages = AptFiles(base)

@cherrypy.expose
def index(self, reponame=None):
if reponame:
#TODO
yield "about apt repo '{}'".format(reponame)
else:
#TODO
yield "about all apt repos"

@cherrypy.expose
def pubkey(self, reponame=None):
yield get_repo(db(), reponame, create_ok=False).gpgpubkey


@cherrypy.expose
class AptDists(object):
_cp_config = {'request.dispatch': cherrypy.dispatch.MethodDispatcher()}

def __init__(self, base):
self.base = base

def __call__(self, *segments, reponame=None):
if len(segments) == 4 and segments[3] == "Packages":
distname, componentname, indexname, pkgs = segments

repo = get_repo(db(), reponame, create_ok=False)
dist = get_dist(db(), repo, distname, create_ok=False)

if not repo or not dist:
raise cherrypy.HTTPError(404)

yield dist.packages_cache

return

elif len(segments) == 2:
distname, target = segments

repo = get_repo(db(), reponame, create_ok=False)
dist = get_dist(db(), repo, distname, create_ok=False)

if target == "Release":
# yield "Release for repo={} dist={}".format(reponame, distname)
yield dist.release_cache
return

elif target == "Release.gpg":
yield dist.sig_cache
return

raise cherrypy.HTTPError(404)


@cherrypy.expose
class AptFiles(object):
_cp_config = {'request.dispatch': cherrypy.dispatch.MethodDispatcher()}

def __init__(self, base):
self.base = base

def __call__(self, *segments, reponame=None):

firstletter, pkgname = segments
repo = get_repo(db(), reponame, create_ok=False)
package = db().query(AptPackage).filter(AptPackage.repo == repo, AptPackage.fname == pkgname).first()

if not package:
raise cherrypy.HTTPError(404)

dpath = os.path.join(self.base.basepath, "repos", repo.name, "packages", package.fname[0], package.fname)

response = self.base.s3.get_object(Bucket=self.base.bucket, Key=dpath)

cherrypy.response.headers["Content-Type"] = "application/x-debian-package"
cherrypy.response.headers["Content-Length"] = response["ContentLength"]

def stream():
while True:
data = response["Body"].read()
if not data:
return
yield data

return stream()

__call__._cp_config = {'response.stream': True}

+ 5
- 0
repobot/pypiprovider.py View File

@@ -0,0 +1,5 @@

class PypiProvider(object):
def __init__(self, dbcon, s3client):
self.db = dbcon
self.s3 = s3client

+ 91
- 0
repobot/server.py View File

@@ -0,0 +1,91 @@
import cherrypy
import logging
from repobot.tables import get_engine, SAEnginePlugin, SATool

from repobot.aptprovider import AptProvider
from repobot.pypiprovider import PypiProvider

import boto3
from botocore.client import Config as BotoConfig


class AppWeb(object):
def __init__(self, providers):
self.providers = providers

@cherrypy.expose
def index(self):
yield '<a href="/repo">repos</a>'

@cherrypy.expose
def repo(self):
for provider in self.providers.keys():
yield '<a href="/repo/{provider}">{provider}</a><br />'.format(provider=provider)

@cherrypy.expose
def addpkg(self, provider, reponame, name, version, f, **params):
# TODO regex validate args
yield from self.providers[provider].web_addpkg(reponame, name, version, f, **params)


def main():
import argparse
import signal

parser = argparse.ArgumentParser(description="irc web client server")
parser.add_argument('-p', '--port', default=8080, type=int, help="tcp port to listen on")
parser.add_argument('-s', '--database', help="mysql connection string")
parser.add_argument('--debug', action="store_true", help="enable development options")
args = parser.parse_args()

logging.basicConfig(level=logging.INFO if args.debug else logging.WARNING,
format="%(asctime)-15s %(levelname)-8s %(filename)s:%(lineno)d %(message)s")

dbcon = get_engine()

SAEnginePlugin(cherrypy.engine, dbcon).subscribe()
cherrypy.tools.db = SATool()

s3 = boto3.client('s3', config=BotoConfig(signature_version='s3v4'), region_name='us-east-1',
endpoint_url='',
aws_access_key_id='',
aws_secret_access_key='')

providers = {"apt": AptProvider(dbcon, s3),
"pypi": PypiProvider(dbcon, s3)}

web = AppWeb(providers)

def validate_password(realm, username, password):
return True

cherrypy.tree.mount(web, '/', {'/': {'tools.trailing_slash.on': False,
'tools.db.on': True}})

cherrypy.config.update({
'tools.sessions.on': False,
'request.show_tracebacks': True,
'server.socket_port': args.port,
'server.thread_pool': 5,
'server.socket_host': '0.0.0.0',
'server.show_tracebacks': True,
'log.screen': False,
'engine.autoreload.on': args.debug,
})

def signal_handler(signum, stack):
logging.warning('Got sig {}, exiting...'.format(signum))
cherrypy.engine.exit()

signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, signal_handler)

try:
cherrypy.engine.start()
cherrypy.engine.block()
finally:
cherrypy.engine.exit()


if __name__ == '__main__':
main()

+ 68
- 0
repobot/tables.py View File

@@ -0,0 +1,68 @@
import sqlalchemy
import cherrypy
from cherrypy.process import plugins
from sqlalchemy.ext.declarative import declarative_base


Base = declarative_base()


def db():
return cherrypy.request.db


def get_engine(echo=False):
return sqlalchemy.create_engine('mysql+pymysql://root:root@localhost/repobot', echo=echo, encoding="utf8")


class SAEnginePlugin(plugins.SimplePlugin):
def __init__(self, bus, dbcon):
plugins.SimplePlugin.__init__(self, bus)
self.sa_engine = dbcon
self.bus.subscribe("bind", self.bind)

def start(self):
Base.metadata.create_all(self.sa_engine)

def bind(self, session):
session.configure(bind=self.sa_engine)


class SATool(cherrypy.Tool):
def __init__(self):
"""
The SA tool is responsible for associating a SA session
to the SA engine and attaching it to the current request.
Since we are running in a multithreaded application,
we use the scoped_session that will create a session
on a per thread basis so that you don't worry about
concurrency on the session object itself.

This tools binds a session to the engine each time
a requests starts and commits/rollbacks whenever
the request terminates.
"""
cherrypy.Tool.__init__(self, 'before_request_body',
self.bind_session,
priority=100)

self.session = sqlalchemy.orm.scoped_session(
sqlalchemy.orm.sessionmaker(autoflush=True, autocommit=False))

def _setup(self):
cherrypy.Tool._setup(self)
cherrypy.request.hooks.attach('on_end_resource', self.commit_transaction, priority=80)

def bind_session(self):
cherrypy.engine.publish('bind', self.session)
cherrypy.request.db = self.session

def commit_transaction(self):
cherrypy.request.db = None
try:
self.session.commit() #TODO commit is issued even on endpoints with no queries
except:
self.session.rollback()
raise
finally:
self.session.remove()

+ 29
- 33
requirements.txt View File

@@ -1,34 +1,30 @@
arpy==1.1.1
asn1crypto==0.24.0
backports.functools-lru-cache==1.5
BTrees==4.5.1
certifi==2018.10.15
chardet==3.0.4
cheroot==6.5.2
CherryPy==18.0.1
coloredlogs==10.0
deb-pkg-tools==4.5
executor==21.2
fasteners==0.14.1
humanfriendly==4.16.1
idna==2.7
jaraco.functools==1.20
Jinja2==2.10
MarkupSafe==1.0
monotonic==1.5
more-itertools==4.3.0
persistent==4.4.2
portend==2.3
property-manager==2.3.1
python-debian==0.1.33
python-memcached==1.59
pytz==2018.5
requests==2.20.0
six==1.11.0
tempora==1.13
transaction==2.2.1
urllib3==1.24
verboselogs==1.7
zc.lockfile==1.3.0
ZConfig==3.3.0
ZODB==5.5.0
zodbpickle==1.0.2
zope.interface==4.5.0
boto3==1.9.138
botocore==1.12.138
cffi==1.12.3
cheroot==6.5.4
CherryPy==18.1.1
cryptography==2.6.1
docutils==0.14
jaraco.functools==2.0
jmespath==0.9.4
more-itertools==7.0.0
PGPy==0.4.1
portend==2.4
pyasn1==0.4.5
pycparser==2.19
pydpkg==1.3.1
PyMySQL==0.9.3
python-dateutil==2.8.0
python-gnupg==0.4.4
pytz==2019.1
-e git+ssh://git@git.davepedu.com:223/dave/docker-artifact.git@48de79c18776e77bbd9b956afc27a872efeb0e9f#egg=repobot
s3transfer==0.2.0
singledispatch==3.4.0.3
six==1.12.0
SQLAlchemy==1.3.3
tempora==1.14.1
urllib3==1.24.2
zc.lockfile==1.4

+ 1
- 5
setup.py View File

@@ -17,11 +17,7 @@ setup(name='repobot',
]
},
include_package_data=True,
package_data={'repobot': ['../templates/pypi/*.html']},
# package_data={'repobot': ['../templates/pypi/*.html']},
install_requires=[
'ZODB==5.5.0',
'CherryPy==18.0.1',
'Jinja2==2.10',
'requests==2.20.0'
],
zip_safe=False)

Loading…
Cancel
Save