dave 2 years ago
parent
commit
48de79c187
  1. 1
      repobot/__init__.py
  2. 10
      repobot/common.py
  3. 226
      repobot/provider.py
  4. 76
      repobot/replication.py
  5. 74
      repobot/repos.py
  6. 120
      repobot/server.py

1
repobot/__init__.py

@ -1 +0,0 @@
__version__ = "0.0.1"

10
repobot/common.py

@ -1,10 +0,0 @@
import persistent.list
import persistent.mapping
def plist():
return persistent.list.PersistentList()
def pmap():
return persistent.mapping.PersistentMapping()

226
repobot/provider.py

@ -1,226 +0,0 @@
import os
import shutil
from repobot.common import plist, pmap
from jinja2 import Environment, FileSystemLoader, select_autoescape
import cherrypy
class DuplicateException(Exception):
pass
class PkgProvider(object):
def __init__(self, db, repo, datadir):
"""
Base package provider class
"""
self.db = db
self.repo = repo
self.dir = datadir
def render(self):
"""
Respond to requests to browse the repo
"""
raise NotImplementedError()
def add_package(self, pkobj, fname, fobj, params):
"""
Add a package to the repo
"""
raise NotImplementedError()
def get_path(self, pkgobj, fname):
"""
Get the path to a package file in the repo
"""
raise NotImplementedError()
class PyPiProvider(PkgProvider):
def add_package(self, pkgobj, fname, fobj, params):
if "files" not in pkgobj.data:
pkgobj.data["files"] = plist()
if fname in pkgobj.data["files"]:
raise DuplicateException("File {} already in package {}-{}".format(fname, pkgobj.name, pkgobj.version))
pkgdir = os.path.join(self.dir, pkgobj.name)
os.makedirs(pkgdir, exist_ok=True)
# TODO handle duplicate files better
pkgfilepath = os.path.join(pkgdir, fname)
with open(pkgfilepath, "wb") as fdest:
shutil.copyfileobj(fobj, fdest)
pkgobj.data["files"].append(fname)
def browse(self, args):
tpl = Environment(loader=FileSystemLoader("templates"), autoescape=select_autoescape(['html', 'xml']))
if len(args) == 0: # repo root
return tpl.get_template("pypi/root.html"). \
render(reponame=self.repo.name,
packages=self.repo.packages.keys())
elif len(args) == 1: # single module dir
files = []
if args[0] not in self.repo.packages:
raise cherrypy.HTTPError(404, 'Invalid package')
for _, version in self.repo.packages[args[0]].items():
files += version.data["files"]
return tpl.get_template("pypi/project.html"). \
render(reponame=self.repo.name,
modulename=args[0],
files=files)
elif len(args) == 2: # fetch file
fpath = os.path.join(self.dir, args[0], args[1])
return cherrypy.lib.static.serve_file(os.path.abspath(fpath), "application/octet-stream")
def get_path(self, pkgobj, fname):
assert fname in pkgobj.data["files"]
return os.path.join(self.dir, pkgobj.name, fname)
from subprocess import check_call, check_output, Popen, PIPE
from tempfile import NamedTemporaryFile, TemporaryDirectory
import json
class AptlyConfig(object):
"""
Context manager providing an aptly config file
"""
def __init__(self, rootdir):
self.conf = {"rootDir": rootdir} # , "gpgDisableSign": True, "gpgDisableVerify": True}
self.file = None
def __enter__(self):
self.file = NamedTemporaryFile()
with open(self.file.name, "w") as f:
f.write(json.dumps(self.conf))
return self.file.name
def __exit__(self, *args):
self.file.close()
class AptProvider(PkgProvider):
def add_package(self, pkgobj, fname, fobj, params):
# first package added sets the Distribution of the repo
# subsequent package add MUST specify the same dist
if "dist" not in self.repo.data:
self.repo.data["dist"] = params["dist"]
assert self.repo.data["dist"] == params["dist"]
# Generate a GPG key to sign packages in this repo
# TODO support passing keypath=... param to import existing keys and maybe other key generation options
if not os.path.exists(self._gpg_dir):
self._generate_gpg_key()
if "files" not in pkgobj.data:
pkgobj.data["files"] = plist()
if fname in pkgobj.data["files"]:
raise DuplicateException("File {} already in package {}-{}".format(fname, pkgobj.name, pkgobj.version))
with AptlyConfig(self.dir) as conf:
if not os.path.exists(os.path.join(self.dir, "db")):
os.makedirs(self.dir, exist_ok=True)
check_call(["aptly", "-config", conf, "repo", "create",
"-distribution", self.repo.data["dist"], "main"]) # TODO dist param
# put the file somewhere for now
with TemporaryDirectory() as tdir:
tmppkgpath = os.path.join(tdir, fname)
with open(tmppkgpath, "wb") as fdest:
shutil.copyfileobj(fobj, fdest)
check_call(["aptly", "-config", conf, "repo", "add", "main", tmppkgpath])
if not os.path.exists(os.path.join(self.dir, "public")):
check_call(["aptly", "-config", conf, "publish", "repo", "main"],
env=self._env)
else:
check_call(["aptly", "-config", conf, "publish", "update",
"-force-overwrite", self.repo.data["dist"]],
env=self._env)
# Make the public key available for clients
self._export_pubkey()
pkgobj.data["files"].append(fname)
# TODO validate deb file name version against user passed version
def browse(self, args):
if not args:
with open(self.pubkeypath) as f:
pubkey_body = f.read()
return "<plaintext>{}\n\nSigning key:\n\n\n{}".format(self._get_gpg_info(), pubkey_body)
fpath = os.path.abspath(os.path.join(self.dir, "public", *args))
if not os.path.exists(fpath):
raise cherrypy.HTTPError(404)
return cherrypy.lib.static.serve_file(fpath)
def _get_gpg_info(self):
return check_output(["gpg", "--list-keys"], env=self._env).decode("UTF-8")
def _generate_gpg_key(self):
"""
Generate a GPG key for signing packages in this repo. Because only gpg2 supports unattended generation of
passwordless keys we generate the key with gpg2 then export/import it into gpg1.
"""
# Generate the key
os.makedirs(self._gpg_dir)
proc = Popen(["gpg", "--batch", "--gen-key"], stdin=PIPE, env=self._env)
proc.stdin.write("""%no-protection
Key-Type: rsa
Key-Length: 1024
Subkey-Type: default
Subkey-Length: 1024
Name-Real: Apt Master
Name-Comment: Apt signing key
Name-Email: aptmaster@localhost
Expire-Date: 0
%commit""".encode("ascii"))
proc.stdin.close()
proc.wait()
assert proc.returncode == 0
# Export the private key
keydata = check_output(["gpg", "--export-secret-key", "--armor", "aptmaster@localhost"], env=self._env)
shutil.rmtree(self._gpg_dir)
os.makedirs(self._gpg_dir)
# Import the private key
proc = Popen(["gpg1", "--import"], stdin=PIPE, env=self._env)
proc.stdin.write(keydata)
proc.stdin.close()
proc.wait()
assert proc.returncode == 0
def _export_pubkey(self):
if not os.path.exists(self.pubkeypath):
keydata = check_output(["gpg", "--export", "--armor", "aptmaster@localhost"], env=self._env)
with open(self.pubkeypath, "wb") as f:
f.write(keydata)
@property
def pubkeypath(self):
return os.path.join(self.dir, "public", "repo.key")
@property
def _env(self):
"""
Return env vars to be used for subprocesses of this module
"""
print(os.environ["PATH"])
return {"GNUPGHOME": self._gpg_dir,
"PATH": os.environ["PATH"]}
@property
def _gpg_dir(self):
return os.path.normpath(os.path.join(self.dir, "gpg"))
def get_path(self, pkgobj, fname):
assert fname in pkgobj.data["files"]
return os.path.join(self.dir, "public", "pool", "main", pkgobj.name[0], pkgobj.name, fname)
providers = {"pypi": PyPiProvider,
"apt": AptProvider}

76
repobot/replication.py

@ -1,76 +0,0 @@
from urllib.parse import urlparse, urlunsplit, urlencode
from time import sleep
from threading import Thread
from repobot.provider import providers
import logging
import os
from requests import post
log = logging.getLogger("replication")
class RepoReplicator(object):
def __init__(self, db, data_root, neighbors):
"""
:param neighbors: list of replication neighbor uris like 'http://1.2.3.4:8080'
"""
self.db = db
self.data_root = data_root
self.neighbors = [urlparse(i) for i in neighbors]
self.worker = None
def start(self):
if not self.neighbors:
return
self.worker = ReplicationWorker(self)
self.worker.start()
class ReplicationWorker(Thread):
def __init__(self, master):
super().__init__()
self.daemon = True
self.master = master
def run(self):
while True:
with self.master.db.db.transaction() as c:
# for item in c.root.sendqueue:
log.info("items in queue: %s", len(c.root.sendqueue))
if len(c.root.sendqueue) > 0:
item = c.root.sendqueue[0]
if self.replicate(item):
c.root.sendqueue.pop(0)
log.info("Replication successful")
sleep(5)
def replicate(self, item):
item_type, item = item
if item_type == "package":
return self.replicate_package(item)
def replicate_package(self, item):
repo, pkg, fname, params = item
datadir = os.path.join(self.master.data_root, repo.provider, repo.name)
provider = providers[repo.provider](self.master.db, repo, datadir)
fpath = provider.get_path(pkg, fname)
for neighbor in self.master.neighbors:
q_params = {"provider": repo.provider,
"reponame": repo.name,
"name": pkg.name,
"version": pkg.version}
q_params.update(**params)
url = urlunsplit(["http", neighbor.netloc, "/addpkg", urlencode(q_params), None])
with open(fpath, 'rb') as fitem:
try:
r = post(url, files={'f': (fname, fitem)}, timeout=(10, 30))
if r.status_code not in (200, 409):
r.raise_for_status()
except Exception as e:
log.warning("Failed replication of %s to %s: %s", pkg, neighbor.geturl(), str(e))
return False
log.info("Replicated %s to %s", pkg, neighbor.geturl())
return True

74
repobot/repos.py

@ -1,74 +0,0 @@
import ZODB
import ZODB.FileStorage
import persistent
import BTrees.OOBTree
from repobot.provider import providers
import os
from threading import Lock
from collections import defaultdict
from repobot.common import plist, pmap
class Repo(persistent.Persistent):
def __init__(self, name, provider):
self.name = name
self.provider = provider
self.packages = pmap()
self.data = pmap()
def get_package(self, name, version):
if name not in self.packages:
self.packages[name] = pmap()
if version not in self.packages[name]:
self.packages[name][version] = RepoPackage(name, version)
return self.packages[name][version]
class RepoPackage(persistent.Persistent):
def __init__(self, name, version):
self.name = name
self.version = version
self.data = pmap()
def __str__(self):
return "<RepoPackage {}@{}>".format(self.name, self.version)
class RepoDb(object):
def __init__(self, db_path, data_root):
self.storage = ZODB.FileStorage.FileStorage(db_path)
self.db = ZODB.DB(self.storage)
self.data_root = data_root
self.repolocks = defaultdict(lambda: Lock())
with self.db.transaction() as c:
if "repos" not in c.root():
c.root.repos = BTrees.OOBTree.BTree()
if "sendqueue" not in c.root():
c.root.sendqueue = plist()
def add_package(self, provider, reponame, pkgname, pkgversion, fname, fobj, params):
with self.repolocks[(provider, reponame)]:
with self.db.transaction() as c:
repo = self._get_repo(c, provider, reponame)
datadir = os.path.join(self.data_root, provider, reponame)
provider = providers[repo.provider](self.db, repo, datadir)
# Add the package
pkg = repo.get_package(pkgname, pkgversion)
provider.add_package(pkg, fname, fobj, params)
# Pack successfully added, queue the file for replication
c.root.sendqueue.append(("package", (repo, pkg, fname, params, )))
def _get_repo(self, c, provider, name):
if provider not in c.root.repos:
c.root.repos[provider] = pmap()
if name not in c.root.repos[provider]:
c.root.repos[provider][name] = Repo(name, provider)
return c.root.repos[provider][name]
def browse_repo(self, provider, reponame, args):
with self.db.transaction() as c:
repo = c.root.repos[provider][reponame]
datadir = os.path.join(self.data_root, provider, reponame)
provider = providers[repo.provider](self.db, repo, datadir)
return provider.browse(args)

120
repobot/server.py

@ -1,120 +0,0 @@
import cherrypy
import logging
from repobot.repos import RepoDb
from repobot.provider import DuplicateException
from repobot.replication import RepoReplicator
class AppWeb(object):
def __init__(self, db):
self.db = db
@cherrypy.expose
def addpkg(self, provider, reponame, name, version, f, **params):
try:
self.db.add_package(provider, reponame, name, version, f.filename, f.file, params)
except DuplicateException:
raise cherrypy.HTTPError(409, 'Package already exists')
@cherrypy.expose
def repo(self, provider, repo, *args):
return self.db.browse_repo(provider, repo, args)
@cherrypy.expose
def index(self):
yield "<pre>"
with self.db.db.transaction() as c:
for provider, repos in c.root.repos.items():
for reponame, repo in repos.items():
print(repo)
for pkgname, versions in repo.packages.items():
for version, pkg in versions.items():
for fname in pkg.data["files"]:
yield "{}/{}/{}/{}/{}\n".format(provider, reponame, pkgname, version, fname)
class FlatDispatch(cherrypy.dispatch.Dispatcher):
def __init__(self, method):
"""
Route all sub urls of this one to the single passed method
"""
super().__init__(self)
self.method = method
def find_handler(self, path):
# Hack, it does not respect settings of parent nodes
cherrypy.serving.request.config = cherrypy.config
return self.method, [i for i in filter(lambda o: len(o) > 0, path.split("/")[2:])]
def main():
import argparse
import signal
parser = argparse.ArgumentParser(description="Repobot daemon")
parser.add_argument('-p', '--port', default=8080, type=int, help="tcp port to listen on")
parser.add_argument('-s', '--database', default="./repos.db", help="path to persistent database")
parser.add_argument('-d', '--data-root', default="./data/", help="data storage dir")
parser.add_argument('-n', '--neighbors', nargs="+", default=[], help="Replication neighbor uris")
parser.add_argument('--debug', action="store_true", help="enable development options")
args = parser.parse_args()
logging.basicConfig(level=logging.INFO if args.debug else logging.WARNING,
format="%(asctime)-15s %(levelname)-8s %(filename)s:%(lineno)d %(message)s")
db = RepoDb(args.database, args.data_root)
repl = RepoReplicator(db, args.data_root, args.neighbors)
repl.start()
web = AppWeb(db)
def validate_password(realm, username, password):
s = library.session()
if s.query(User).filter(User.name == username, User.password == pwhash(password)).first():
return True
return False
cherrypy.tree.mount(web, '/', {'/': {'tools.trailing_slash.on': False,
# 'error_page.403': web.error,
# 'error_page.404': web.error
},
'/repo': {'request.dispatch': FlatDispatch(web.repo)},
#'/static': {"tools.staticdir.on": True,
# "tools.staticdir.dir": os.path.join(APPROOT, "styles/dist")
# if not args.debug else os.path.abspath("styles/dist")},
'/login': {'tools.auth_basic.on': True,
'tools.auth_basic.realm': 'webapp',
'tools.auth_basic.checkpassword': validate_password}})
cherrypy.config.update({
'tools.sessions.on': True,
'tools.sessions.locking': 'explicit',
'tools.sessions.timeout': 525600,
'request.show_tracebacks': True,
'server.socket_port': args.port,
'server.thread_pool': 25,
'server.socket_host': '0.0.0.0',
'server.show_tracebacks': True,
'log.screen': False,
'engine.autoreload.on': args.debug
})
def signal_handler(signum, stack):
logging.critical('Got sig {}, exiting...'.format(signum))
cherrypy.engine.exit()
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, signal_handler)
try:
cherrypy.engine.start()
cherrypy.engine.block()
finally:
logging.info("API has shut down")
cherrypy.engine.exit()
if __name__ == '__main__':
main()
Loading…
Cancel
Save