From caebad0a16776368a43c41bbb62f2fcd9905241b Mon Sep 17 00:00:00 2001 From: dave Date: Mon, 29 Apr 2019 22:23:25 -0700 Subject: [PATCH] Apt without aptly mvp --- repobot/__init__.py | 1 + repobot/aptprovider.py | 397 ++++++++++++++++++++++++++++++++++++++++ repobot/pypiprovider.py | 5 + repobot/server.py | 91 +++++++++ repobot/tables.py | 68 +++++++ requirements.txt | 62 +++---- setup.py | 6 +- 7 files changed, 592 insertions(+), 38 deletions(-) create mode 100644 repobot/__init__.py create mode 100644 repobot/aptprovider.py create mode 100644 repobot/pypiprovider.py create mode 100644 repobot/server.py create mode 100644 repobot/tables.py diff --git a/repobot/__init__.py b/repobot/__init__.py new file mode 100644 index 0000000..d3ec452 --- /dev/null +++ b/repobot/__init__.py @@ -0,0 +1 @@ +__version__ = "0.2.0" diff --git a/repobot/aptprovider.py b/repobot/aptprovider.py new file mode 100644 index 0000000..0aff3c4 --- /dev/null +++ b/repobot/aptprovider.py @@ -0,0 +1,397 @@ +import cherrypy +from pydpkg import Dpkg +from repobot.tables import Base, db, get_engine +import sqlalchemy +from sqlalchemy import Column, ForeignKey +from sqlalchemy.types import String, Integer, Text, BOOLEAN +from sqlalchemy.dialects.mysql import LONGTEXT +from sqlalchemy.orm import relationship +from sqlalchemy import UniqueConstraint +from tempfile import TemporaryDirectory +from threading import Thread +import hashlib +import os +from time import sleep +import gnupg +import traceback +import json + + +class AptRepo(Base): + __tablename__ = 'aptrepo' + id = Column(Integer, primary_key=True) + name = Column(String(length=32), unique=True, nullable=False) + gpgkey = Column(Text(), nullable=True) + gpgkeyprint = Column(Text(), nullable=True) + gpgpubkey = Column(Text(), nullable=True) + + +class AptDist(Base): + __tablename__ = 'aptdist' + id = Column(Integer, primary_key=True) + repo_id = Column(Integer, ForeignKey("aptrepo.id"), nullable=False) + repo = relationship("AptRepo") + + dirty = Column(BOOLEAN(), nullable=False, default=False) + + name = Column(String(length=32), nullable=False) + + packages_cache = Column(LONGTEXT(), nullable=True) + release_cache = Column(Text(), nullable=True) + sig_cache = Column(Text(), nullable=True) + + __table_args__ = (UniqueConstraint('repo_id', 'name', name='apt_unique_repodist'), ) + + +class AptPackage(Base): + __tablename__ = 'aptpkg' + id = Column(Integer, primary_key=True) + + repo_id = Column(Integer, ForeignKey("aptrepo.id"), nullable=False) + repo = relationship("AptRepo") + + dist_id = Column(Integer, ForeignKey("aptdist.id"), nullable=False) + dist = relationship("AptDist") + + # index (always 'binary-amd64' for now) + + name = Column(String(length=128), nullable=False) # 'python3-pip' + version = Column(String(length=128), nullable=False) # '4.20.1' + arch = Column(String(length=16), nullable=False) # 'amd64' + + fname = Column(String(length=256), nullable=False) + + size = Column(Integer, nullable=False) + + md5 = Column(String(length=32)) + sha1 = Column(String(length=40)) + sha256 = Column(String(length=64)) + sha512 = Column(String(length=128)) + + fields = Column(Text()) + + __table_args__ = (UniqueConstraint('name', 'version', 'repo_id', 'dist_id', name='apt_unique_repodist'), ) + + @property + def blobpath(self): + return "{}/{}/packages/{}/{}_{}.deb".format(self.repo.name, self.dist.name, + self.name[0], self.name, self.sha256[0:8]) + + +def get_repo(_db, repo_name, create_ok=True): + repo = _db.query(AptRepo).filter(AptRepo.name == repo_name).first() + if not repo and create_ok: + repo = AptRepo(name=repo_name) + _db.add(repo) + _db.commit() + return repo + + +def get_dist(_db, repo, dist_name, create_ok=True): + dist = _db.query(AptDist).filter(AptDist.name == dist_name, AptDist.repo_id == repo.id).first() + if not dist and create_ok: + dist = AptDist(name=dist_name, repo_id=repo.id) + _db.add(dist) + _db.commit() + return dist + + +algos = {"md5": "MD5Sum", + "sha1": "SHA1", + "sha256": "SHA256", + "sha512": "SHA512"} + + +def copyhash(fin, fout): + """ + Copy a file and calculate hashes while doing so + """ + hashes = {} + for algo in algos.keys(): + hashes[algo] = getattr(hashlib, algo)() + + while True: + data = fin.read(4096) + if not data: + break + for h in hashes.values(): + h.update(data) + fout.write(data) + + return {k: v.hexdigest() for k, v in hashes.items()} + + +def hashmany(data): + """ + Copy a file and calculate hashes while doing so + """ + hashes = {} + for algo in algos.keys(): + hashes[algo] = getattr(hashlib, algo)() + + for h in hashes.values(): + h.update(data) + + return {k: v.hexdigest() for k, v in hashes.items()} + + +class AptProvider(object): + def __init__(self, dbcon, s3client, bucket="aptprovider"): + self.db = dbcon + self.s3 = s3client + self.bucket = bucket + self.basepath = "data/provider/apt" + """ + bucket path (after basedir) + repos/{reponame}/packages/f/foo.deb + """ + cherrypy.tree.mount(AptWeb(self), "/repo/apt", {'/': {'tools.trailing_slash.on': False, + 'tools.db.on': True}}) + + # ensure bucket exists + if bucket not in [b['Name'] for b in self.s3.list_buckets()['Buckets']]: + print("Creating bucket") + self.s3.create_bucket(Bucket=bucket) + + self.updater = Thread(target=self.sign_packages, daemon=True) + self.updater.start() + + def sign_packages(self): + while True: + sleep(2) + try: + self._sign_packages() + except: + traceback.print_exc() + # sleep(10) + break + + def _sign_packages(self): + print("signing packages") + session = sqlalchemy.orm.scoped_session(sqlalchemy.orm.sessionmaker(autoflush=True, autocommit=False)) + session.configure(bind=get_engine()) + + dirtydists = session.query(AptDist).filter(AptDist.dirty == True).all() + + for dist in dirtydists: + print("Signing dist {}/{}".format(dist.repo.name, dist.name)) + + str_packages = "" + + for package in session.query(AptPackage) \ + .filter(AptPackage.repo == dist.repo, + AptPackage.dist == dist) \ + .order_by(AptPackage.id).all(): + fields = json.loads(package.fields) + for k, v in fields.items(): + str_packages += "{}: {}\n".format(k, v) + for algo, algoname in algos.items(): + str_packages += "{}: {}\n".format(algoname, getattr(package, algo)) + + str_packages += "Filename: packages/{}/{}\n".format(package.fname[0], package.fname) + str_packages += "Size: {}\n".format(package.size) + + str_packages += "\n" + + dist.packages_cache = str_packages.encode("utf-8") + + release_hashes = hashmany(dist.packages_cache) + print(release_hashes) + + str_release = """Origin: . {dist} +Label: . {dist} +Suite: {dist} +Codename: {dist} +Date: Fri, 2 Nov 2018 04:58:59 UTC +Architectures: amd64 +Components: main +Description: Generated by yolo +""".format(dist=dist.name) + for algo, algoname in algos.items(): + str_release += "{}:\n {} {} {}/{}/{}\n".format(algoname, + release_hashes[algo], + len(dist.packages_cache), + "main", #TODO component + "binary-amd64", #TODO whatever this was + "Packages") + + dist.release_cache = str_release.encode("utf-8") + + keyemail = 'debian_signing@localhost' + + with TemporaryDirectory() as tdir: + gpg = gnupg.GPG(gnupghome=tdir) + + def getkey(): + keys = [i for i in gpg.list_keys(secret=True) if any([keyemail in k for k in i["uids"]])] + if keys: + return keys[0] + + fingerprint = None + + if not dist.repo.gpgkey: + print("Generating key for", dist.repo.name) + key = gpg.gen_key(gpg.gen_key_input(name_email=keyemail, + expire_date='2029-04-28', + key_type='RSA', + key_length=4096, + key_usage='encrypt,sign,auth', + passphrase="secret")) + fingerprint = key.fingerprint + dist.repo.gpgkey = gpg.export_keys(fingerprint, secret=True, passphrase="secret") + dist.repo.gpgkeyprint = fingerprint + dist.repo.gpgpubkey = gpg.export_keys(fingerprint) + + else: + import_result = gpg.import_keys(dist.repo.gpgkey) + fingerprint = import_result.results[0]['fingerprint'] # errors here suggests some gpg import issue + assert(fingerprint == getkey()['fingerprint']) + + dist.sig_cache = gpg.sign(dist.release_cache, keyid=fingerprint, passphrase='secret', + detach=True, clearsign=False).data + + session.commit() + + def web_addpkg(self, reponame, name, version, fobj, dist): + repo = get_repo(db(), reponame) + dist = get_dist(db(), repo, dist) + print("Dist:", dist) + + # - read f (write to temp storage if needed) and generate the hashes + # - load with Dpkg to get name version and whatnot + with TemporaryDirectory() as tdir: + tmppkgpath = os.path.join(tdir, "temp.deb") + with open(tmppkgpath, "wb") as fdest: + fhashes = copyhash(fobj.file, fdest) + + p = Dpkg(tmppkgpath) + pkgname = "{}_{}_{}.deb".format(p.message['Package'], p.message['Version'], p.message['Architecture']) + yield "package name: {}\n".format(pkgname) + yield "package size: {}\n".format(os.path.getsize(tmppkgpath)) + yield "package message:\n-----------------\n{}\n-----------------\n".format(p.message) + yield "package hashes: {}".format(fhashes) + + # repos//packages/f/foo.deb + dpath = os.path.join(self.basepath, "repos", repo.name, "packages", pkgname[0], pkgname) + + with open(tmppkgpath, "rb") as f: + response = self.s3.put_object(Body=f, Bucket=self.bucket, Key=dpath) + if response["ResponseMetadata"]["HTTPStatusCode"] != 200: + print(response) + raise Exception("failed to store package") + + fields = {key: p.message[key] for key in p.message.keys()} + + pkg = AptPackage(repo=repo, dist=dist, + name=p.message['Package'], + version=p.message['Version'], + arch=p.message['Architecture'], + fname=pkgname, + size=os.path.getsize(tmppkgpath), + **fhashes, + fields=json.dumps(fields)) + + dist.dirty = True + db().add(pkg) + db().commit() + + #TODO + # - verify dpkg name & version match params + # - copy to persistent storage + # - add db record keyed under repo name and dist (and index but only 'binary-amd64' for now) + # - mark dist dirty + + +@cherrypy.popargs("reponame") +class AptWeb(object): + def __init__(self, base): + self.base = base + self.dists = AptDists(base) + self.packages = AptFiles(base) + + @cherrypy.expose + def index(self, reponame=None): + if reponame: + #TODO + yield "about apt repo '{}'".format(reponame) + else: + #TODO + yield "about all apt repos" + + @cherrypy.expose + def pubkey(self, reponame=None): + yield get_repo(db(), reponame, create_ok=False).gpgpubkey + + +@cherrypy.expose +class AptDists(object): + _cp_config = {'request.dispatch': cherrypy.dispatch.MethodDispatcher()} + + def __init__(self, base): + self.base = base + + def __call__(self, *segments, reponame=None): + if len(segments) == 4 and segments[3] == "Packages": + distname, componentname, indexname, pkgs = segments + + repo = get_repo(db(), reponame, create_ok=False) + dist = get_dist(db(), repo, distname, create_ok=False) + + if not repo or not dist: + raise cherrypy.HTTPError(404) + + yield dist.packages_cache + + return + + elif len(segments) == 2: + distname, target = segments + + repo = get_repo(db(), reponame, create_ok=False) + dist = get_dist(db(), repo, distname, create_ok=False) + + if target == "Release": + # yield "Release for repo={} dist={}".format(reponame, distname) + yield dist.release_cache + return + + elif target == "Release.gpg": + yield dist.sig_cache + return + + raise cherrypy.HTTPError(404) + + +@cherrypy.expose +class AptFiles(object): + _cp_config = {'request.dispatch': cherrypy.dispatch.MethodDispatcher()} + + def __init__(self, base): + self.base = base + + def __call__(self, *segments, reponame=None): + + firstletter, pkgname = segments + repo = get_repo(db(), reponame, create_ok=False) + package = db().query(AptPackage).filter(AptPackage.repo == repo, AptPackage.fname == pkgname).first() + + if not package: + raise cherrypy.HTTPError(404) + + dpath = os.path.join(self.base.basepath, "repos", repo.name, "packages", package.fname[0], package.fname) + + response = self.base.s3.get_object(Bucket=self.base.bucket, Key=dpath) + + cherrypy.response.headers["Content-Type"] = "application/x-debian-package" + cherrypy.response.headers["Content-Length"] = response["ContentLength"] + + def stream(): + while True: + data = response["Body"].read() + if not data: + return + yield data + + return stream() + + __call__._cp_config = {'response.stream': True} diff --git a/repobot/pypiprovider.py b/repobot/pypiprovider.py new file mode 100644 index 0000000..dfd08ba --- /dev/null +++ b/repobot/pypiprovider.py @@ -0,0 +1,5 @@ + +class PypiProvider(object): + def __init__(self, dbcon, s3client): + self.db = dbcon + self.s3 = s3client diff --git a/repobot/server.py b/repobot/server.py new file mode 100644 index 0000000..f9ee94a --- /dev/null +++ b/repobot/server.py @@ -0,0 +1,91 @@ +import cherrypy +import logging +from repobot.tables import get_engine, SAEnginePlugin, SATool + +from repobot.aptprovider import AptProvider +from repobot.pypiprovider import PypiProvider + +import boto3 +from botocore.client import Config as BotoConfig + + +class AppWeb(object): + def __init__(self, providers): + self.providers = providers + + @cherrypy.expose + def index(self): + yield 'repos' + + @cherrypy.expose + def repo(self): + for provider in self.providers.keys(): + yield '{provider}
'.format(provider=provider) + + @cherrypy.expose + def addpkg(self, provider, reponame, name, version, f, **params): + # TODO regex validate args + yield from self.providers[provider].web_addpkg(reponame, name, version, f, **params) + + +def main(): + import argparse + import signal + + parser = argparse.ArgumentParser(description="irc web client server") + parser.add_argument('-p', '--port', default=8080, type=int, help="tcp port to listen on") + parser.add_argument('-s', '--database', help="mysql connection string") + parser.add_argument('--debug', action="store_true", help="enable development options") + args = parser.parse_args() + + logging.basicConfig(level=logging.INFO if args.debug else logging.WARNING, + format="%(asctime)-15s %(levelname)-8s %(filename)s:%(lineno)d %(message)s") + + dbcon = get_engine() + + SAEnginePlugin(cherrypy.engine, dbcon).subscribe() + cherrypy.tools.db = SATool() + + s3 = boto3.client('s3', config=BotoConfig(signature_version='s3v4'), region_name='us-east-1', + endpoint_url='', + aws_access_key_id='', + aws_secret_access_key='') + + providers = {"apt": AptProvider(dbcon, s3), + "pypi": PypiProvider(dbcon, s3)} + + web = AppWeb(providers) + + def validate_password(realm, username, password): + return True + + cherrypy.tree.mount(web, '/', {'/': {'tools.trailing_slash.on': False, + 'tools.db.on': True}}) + + cherrypy.config.update({ + 'tools.sessions.on': False, + 'request.show_tracebacks': True, + 'server.socket_port': args.port, + 'server.thread_pool': 5, + 'server.socket_host': '0.0.0.0', + 'server.show_tracebacks': True, + 'log.screen': False, + 'engine.autoreload.on': args.debug, + }) + + def signal_handler(signum, stack): + logging.warning('Got sig {}, exiting...'.format(signum)) + cherrypy.engine.exit() + + signal.signal(signal.SIGINT, signal_handler) + signal.signal(signal.SIGTERM, signal_handler) + + try: + cherrypy.engine.start() + cherrypy.engine.block() + finally: + cherrypy.engine.exit() + + +if __name__ == '__main__': + main() diff --git a/repobot/tables.py b/repobot/tables.py new file mode 100644 index 0000000..9346a40 --- /dev/null +++ b/repobot/tables.py @@ -0,0 +1,68 @@ +import sqlalchemy +import cherrypy +from cherrypy.process import plugins +from sqlalchemy.ext.declarative import declarative_base + + +Base = declarative_base() + + +def db(): + return cherrypy.request.db + + +def get_engine(echo=False): + return sqlalchemy.create_engine('mysql+pymysql://root:root@localhost/repobot', echo=echo, encoding="utf8") + + +class SAEnginePlugin(plugins.SimplePlugin): + def __init__(self, bus, dbcon): + plugins.SimplePlugin.__init__(self, bus) + self.sa_engine = dbcon + self.bus.subscribe("bind", self.bind) + + def start(self): + Base.metadata.create_all(self.sa_engine) + + def bind(self, session): + session.configure(bind=self.sa_engine) + + +class SATool(cherrypy.Tool): + def __init__(self): + """ + The SA tool is responsible for associating a SA session + to the SA engine and attaching it to the current request. + Since we are running in a multithreaded application, + we use the scoped_session that will create a session + on a per thread basis so that you don't worry about + concurrency on the session object itself. + + This tools binds a session to the engine each time + a requests starts and commits/rollbacks whenever + the request terminates. + """ + cherrypy.Tool.__init__(self, 'before_request_body', + self.bind_session, + priority=100) + + self.session = sqlalchemy.orm.scoped_session( + sqlalchemy.orm.sessionmaker(autoflush=True, autocommit=False)) + + def _setup(self): + cherrypy.Tool._setup(self) + cherrypy.request.hooks.attach('on_end_resource', self.commit_transaction, priority=80) + + def bind_session(self): + cherrypy.engine.publish('bind', self.session) + cherrypy.request.db = self.session + + def commit_transaction(self): + cherrypy.request.db = None + try: + self.session.commit() #TODO commit is issued even on endpoints with no queries + except: + self.session.rollback() + raise + finally: + self.session.remove() diff --git a/requirements.txt b/requirements.txt index 00dd9fa..9d32b86 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,34 +1,30 @@ +arpy==1.1.1 +asn1crypto==0.24.0 backports.functools-lru-cache==1.5 -BTrees==4.5.1 -certifi==2018.10.15 -chardet==3.0.4 -cheroot==6.5.2 -CherryPy==18.0.1 -coloredlogs==10.0 -deb-pkg-tools==4.5 -executor==21.2 -fasteners==0.14.1 -humanfriendly==4.16.1 -idna==2.7 -jaraco.functools==1.20 -Jinja2==2.10 -MarkupSafe==1.0 -monotonic==1.5 -more-itertools==4.3.0 -persistent==4.4.2 -portend==2.3 -property-manager==2.3.1 -python-debian==0.1.33 -python-memcached==1.59 -pytz==2018.5 -requests==2.20.0 -six==1.11.0 -tempora==1.13 -transaction==2.2.1 -urllib3==1.24 -verboselogs==1.7 -zc.lockfile==1.3.0 -ZConfig==3.3.0 -ZODB==5.5.0 -zodbpickle==1.0.2 -zope.interface==4.5.0 +boto3==1.9.138 +botocore==1.12.138 +cffi==1.12.3 +cheroot==6.5.4 +CherryPy==18.1.1 +cryptography==2.6.1 +docutils==0.14 +jaraco.functools==2.0 +jmespath==0.9.4 +more-itertools==7.0.0 +PGPy==0.4.1 +portend==2.4 +pyasn1==0.4.5 +pycparser==2.19 +pydpkg==1.3.1 +PyMySQL==0.9.3 +python-dateutil==2.8.0 +python-gnupg==0.4.4 +pytz==2019.1 +-e git+ssh://git@git.davepedu.com:223/dave/docker-artifact.git@48de79c18776e77bbd9b956afc27a872efeb0e9f#egg=repobot +s3transfer==0.2.0 +singledispatch==3.4.0.3 +six==1.12.0 +SQLAlchemy==1.3.3 +tempora==1.14.1 +urllib3==1.24.2 +zc.lockfile==1.4 diff --git a/setup.py b/setup.py index ac5a8d6..8f550b9 100644 --- a/setup.py +++ b/setup.py @@ -17,11 +17,7 @@ setup(name='repobot', ] }, include_package_data=True, - package_data={'repobot': ['../templates/pypi/*.html']}, + # package_data={'repobot': ['../templates/pypi/*.html']}, install_requires=[ - 'ZODB==5.5.0', - 'CherryPy==18.0.1', - 'Jinja2==2.10', - 'requests==2.20.0' ], zip_safe=False)