Software repository API
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

aptprovider.py 16 KiB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441
  1. import cherrypy
  2. import gnupg
  3. import hashlib
  4. import json
  5. import os
  6. import queue
  7. import sqlalchemy
  8. import traceback
  9. from datetime import datetime
  10. from pydpkg import Dpkg
  11. from sqlalchemy import Column, ForeignKey, UniqueConstraint
  12. from sqlalchemy.dialects.mysql import LONGTEXT
  13. from sqlalchemy.orm import relationship
  14. from sqlalchemy.types import String, Integer, Text, BOOLEAN
  15. from tempfile import TemporaryDirectory
  16. from threading import Thread
  17. from repobot.tables import Base, db
  18. class AptRepo(Base):
  19. __tablename__ = 'aptrepo'
  20. id = Column(Integer, primary_key=True)
  21. name = Column(String(length=32), unique=True, nullable=False)
  22. gpgkey = Column(Text(), nullable=True)
  23. gpgkeyprint = Column(Text(), nullable=True)
  24. gpgpubkey = Column(Text(), nullable=True)
  25. dists = relationship("AptDist")
  26. class AptDist(Base):
  27. __tablename__ = 'aptdist'
  28. id = Column(Integer, primary_key=True)
  29. repo_id = Column(Integer, ForeignKey("aptrepo.id"), nullable=False)
  30. repo = relationship("AptRepo")
  31. dirty = Column(BOOLEAN(), nullable=False, default=False)
  32. name = Column(String(length=32), nullable=False)
  33. packages_cache = Column(LONGTEXT(), nullable=True)
  34. release_cache = Column(Text(), nullable=True)
  35. sig_cache = Column(Text(), nullable=True)
  36. __table_args__ = (UniqueConstraint('repo_id', 'name', name='apt_unique_repodist'), )
  37. class AptPackage(Base):
  38. __tablename__ = 'aptpkg'
  39. id = Column(Integer, primary_key=True)
  40. repo_id = Column(Integer, ForeignKey("aptrepo.id"), nullable=False)
  41. repo = relationship("AptRepo")
  42. dist_id = Column(Integer, ForeignKey("aptdist.id"), nullable=False)
  43. dist = relationship("AptDist")
  44. # index (always 'binary-amd64' for now)
  45. name = Column(String(length=128), nullable=False) # 'python3-pip'
  46. version = Column(String(length=128), nullable=False) # '4.20.1'
  47. arch = Column(String(length=16), nullable=False) # 'amd64'
  48. fname = Column(String(length=256), nullable=False)
  49. size = Column(Integer, nullable=False)
  50. md5 = Column(String(length=32))
  51. sha1 = Column(String(length=40))
  52. sha256 = Column(String(length=64))
  53. sha512 = Column(String(length=128))
  54. fields = Column(Text())
  55. __table_args__ = (UniqueConstraint('name', 'version', 'arch', 'repo_id', 'dist_id', name='apt_unique_repodist'), )
  56. @property
  57. def blobpath(self):
  58. return os.path.join("repos", self.repo.name, "packages", self.dist.name, self.name[0], self.fname)
  59. def get_repo(_db, repo_name, create_ok=True):
  60. """
  61. Fetch a repo from the database by name
  62. """
  63. repo = _db.query(AptRepo).filter(AptRepo.name == repo_name).first()
  64. if not repo and create_ok:
  65. repo = AptRepo(name=repo_name)
  66. _db.add(repo)
  67. _db.commit()
  68. return repo
  69. def get_dist(_db, repo, dist_name, create_ok=True):
  70. """
  71. Fetch a repo's dist from the database by name
  72. """
  73. dist = _db.query(AptDist).filter(AptDist.name == dist_name, AptDist.repo_id == repo.id).first()
  74. if not dist and create_ok:
  75. dist = AptDist(name=dist_name, repo_id=repo.id)
  76. _db.add(dist)
  77. _db.commit()
  78. return dist
  79. algos = {"md5": "MD5Sum",
  80. "sha1": "SHA1",
  81. "sha256": "SHA256",
  82. "sha512": "SHA512"}
  83. def copyhash(fin, fout):
  84. """
  85. Copy a file and calculate hashes while doing so
  86. """
  87. hashes = {}
  88. for algo in algos.keys():
  89. hashes[algo] = getattr(hashlib, algo)()
  90. while True:
  91. data = fin.read(4096)
  92. if not data:
  93. break
  94. for h in hashes.values():
  95. h.update(data)
  96. fout.write(data)
  97. return {k: v.hexdigest() for k, v in hashes.items()}
  98. def hashmany(data):
  99. """
  100. Hash the input data using several algos
  101. """
  102. hashes = {}
  103. for algo in algos.keys():
  104. hashes[algo] = getattr(hashlib, algo)()
  105. for h in hashes.values():
  106. h.update(data)
  107. return {k: v.hexdigest() for k, v in hashes.items()}
  108. class AptProvider(object):
  109. def __init__(self, dbcon, s3client, bucket):
  110. self.db = dbcon
  111. self.s3 = s3client
  112. self.bucket = bucket
  113. """base path within the s3 bucket"""
  114. self.basepath = "data/provider/apt"
  115. """queue entries are tuples containing the database id of the dist to regenerate indexes and signatures for"""
  116. self.queue = queue.Queue()
  117. cherrypy.tree.mount(AptWeb(self), "/repo/apt", {'/': {'tools.trailing_slash.on': False,
  118. 'tools.db.on': True}})
  119. self.updater = Thread(target=self.sign_packages, daemon=True)
  120. self.updater.start()
  121. def sign_packages(self):
  122. Session = sqlalchemy.orm.sessionmaker(autoflush=True, autocommit=False)
  123. Session.configure(bind=self.db)
  124. while True:
  125. try:
  126. work = self.queue.get(block=True, timeout=5)
  127. except queue.Empty:
  128. continue
  129. session = Session()
  130. try:
  131. self._sign_packages(session, work)
  132. except:
  133. traceback.print_exc()
  134. finally:
  135. session.close()
  136. def _sign_packages(self, session, work):
  137. dist_id = work[0]
  138. dist = session.query(AptDist).filter(AptDist.id == dist_id).first()
  139. print("Generating metadata for repo:{} dist:{}".format(dist.repo.name, dist.name))
  140. str_packages = ""
  141. for package in session.query(AptPackage) \
  142. .filter(AptPackage.repo == dist.repo,
  143. AptPackage.dist == dist) \
  144. .order_by(AptPackage.id).all():
  145. fields = json.loads(package.fields)
  146. for k, v in fields.items():
  147. str_packages += "{}: {}\n".format(k, v)
  148. for algo, algoname in algos.items():
  149. str_packages += "{}: {}\n".format(algoname, getattr(package, algo))
  150. str_packages += "Filename: packages/{}/{}/{}\n".format(dist.name, package.fname[0], package.fname)
  151. str_packages += "Size: {}\n".format(package.size)
  152. str_packages += "\n"
  153. dist.packages_cache = str_packages.encode("utf-8")
  154. release_hashes = hashmany(dist.packages_cache)
  155. str_release = """Origin: . {dist}
  156. Label: . {dist}
  157. Suite: {dist}
  158. Codename: {dist}
  159. Date: {time}
  160. Architectures: amd64
  161. Components: main
  162. Description: Generated by Repobot
  163. """.format(dist=dist.name, time=datetime.utcnow().strftime("%a, %d %b %Y %H:%M:%S UTC"))
  164. for algo, algoname in algos.items():
  165. str_release += "{}:\n {} {} {}/{}/{}\n".format(algoname,
  166. release_hashes[algo],
  167. len(dist.packages_cache),
  168. "main", #TODO component
  169. "binary-amd64", #TODO whatever this was
  170. "Packages")
  171. dist.release_cache = str_release.encode("utf-8")
  172. keyemail = 'debian_signing@localhost'
  173. with TemporaryDirectory() as tdir:
  174. gpg = gnupg.GPG(gnupghome=tdir)
  175. def getkey():
  176. keys = [i for i in gpg.list_keys(secret=True) if any([keyemail in k for k in i["uids"]])]
  177. if keys:
  178. return keys[0]
  179. fingerprint = None
  180. if not dist.repo.gpgkey:
  181. print("Generating key for", dist.repo.name)
  182. key = gpg.gen_key(gpg.gen_key_input(name_email=keyemail,
  183. expire_date='2029-04-28',
  184. key_type='RSA',
  185. key_length=4096,
  186. key_usage='encrypt,sign,auth',
  187. passphrase="secret"))
  188. fingerprint = key.fingerprint
  189. dist.repo.gpgkey = gpg.export_keys(fingerprint, secret=True, passphrase="secret")
  190. dist.repo.gpgkeyprint = fingerprint
  191. dist.repo.gpgpubkey = gpg.export_keys(fingerprint)
  192. else:
  193. import_result = gpg.import_keys(dist.repo.gpgkey)
  194. fingerprint = import_result.results[0]['fingerprint'] # errors here suggests some gpg import issue
  195. assert(fingerprint == getkey()['fingerprint'])
  196. dist.sig_cache = gpg.sign(dist.release_cache, keyid=fingerprint, passphrase='secret',
  197. detach=True, clearsign=False).data
  198. dist.dirty = False
  199. session.commit()
  200. print("Metadata generation complete")
  201. def web_addpkg(self, reponame, name, version, fobj, dist):
  202. repo = get_repo(db(), reponame)
  203. dist = get_dist(db(), repo, dist)
  204. print("Dist:", dist)
  205. # - read f (write to temp storage if needed) and generate the hashes
  206. # - load with Dpkg to get name version and whatnot
  207. with TemporaryDirectory() as tdir:
  208. tmppkgpath = os.path.join(tdir, "temp.deb")
  209. with open(tmppkgpath, "wb") as fdest:
  210. fhashes = copyhash(fobj.file, fdest)
  211. fsize = os.path.getsize(tmppkgpath)
  212. p = Dpkg(tmppkgpath)
  213. pkgname = "{}_{}_{}.deb".format(p.message['Package'], p.message['Version'], p.message['Architecture'])
  214. #TODO keys can be duplicated in email.message.Message, does this cause any problems?
  215. fields = {key: p.message[key] for key in p.message.keys()}
  216. # repos/<reponame>/packages/f/foo.deb
  217. dpath = os.path.join(self.basepath, "repos", repo.name, "packages", dist.name, pkgname[0], pkgname)
  218. files = self.s3.list_objects(Bucket=self.bucket, Prefix=dpath).get("Contents")
  219. if files:
  220. print(f"will overwrite: {files}")
  221. pkg = AptPackage(repo=repo, dist=dist,
  222. name=p.message['Package'],
  223. version=p.message['Version'],
  224. arch=p.message['Architecture'],
  225. fname=pkgname,
  226. size=fsize,
  227. **fhashes,
  228. fields=json.dumps(fields))
  229. db().add(pkg)
  230. db().commit()
  231. try:
  232. with open(tmppkgpath, "rb") as f:
  233. response = self.s3.put_object(Body=f, Bucket=self.bucket, Key=dpath)
  234. assert(response["ResponseMetadata"]["HTTPStatusCode"] == 200), f"Upload failed: {response}"
  235. except Exception:
  236. db().delete(pkg)
  237. db().commit()
  238. raise
  239. dist.dirty = True
  240. db().commit()
  241. self.regen_dist(dist.id)
  242. yield "package name: {}\n".format(pkgname)
  243. yield "package size: {}\n".format(fsize)
  244. yield "package message:\n-----------------\n{}\n-----------------\n".format(p.message)
  245. yield "package hashes: {}\n".format(fhashes)
  246. def regen_dist(self, dist_id):
  247. self.queue.put((dist_id, ))
  248. #TODO
  249. # - verify dpkg name & version match params
  250. # - copy to persistent storage
  251. # - add db record keyed under repo name and dist (and index but only 'binary-amd64' for now)
  252. # - mark dist dirty
  253. @cherrypy.popargs("reponame")
  254. class AptWeb(object):
  255. def __init__(self, base):
  256. self.base = base
  257. self.dists = AptDists(base)
  258. self.packages = AptFiles(base)
  259. @cherrypy.expose
  260. def index(self, reponame=None, regen=False):
  261. if reponame:
  262. repo = get_repo(db(), reponame, create_ok=False)
  263. yield "<a href='/repo/apt/{reponame}/pubkey'>pubkey</a> " \
  264. "<a href='/repo/apt/{reponame}?regen=1'>regen</a><hr/>".format(reponame=repo.name)
  265. for dist in db().query(AptDist).filter(AptDist.repo == repo).order_by(AptDist.name).all():
  266. yield "<a href='/repo/apt/{reponame}/dists/{name}'>{name}</a>: <a href='/repo/apt/{reponame}/dists/{name}/main/indexname/Packages'>Packages</a> <a href='/repo/apt/{reponame}/dists/{name}/Release'>Release</a> <a href='/repo/apt/{reponame}/dists/{name}/Release.gpg'>Release.gpg</a> <a href='/repo/apt/{reponame}/dists/{name}/install'>install</a><br />".format(reponame=repo.name, name=dist.name)
  267. if regen:
  268. self.base.regen_dist(dist.id)
  269. # yield "about apt repo '{}'".format(reponame)
  270. else:
  271. for repo in db().query(AptRepo).order_by(AptRepo.name).all():
  272. yield "<a href='/repo/apt/{name}'>{name}</a><br/>".format(name=repo.name)
  273. @cherrypy.expose
  274. def pubkey(self, reponame=None):
  275. cherrypy.response.headers['Content-Type'] = 'text/plain'
  276. return get_repo(db(), reponame, create_ok=False).gpgpubkey
  277. @cherrypy.expose
  278. class AptDists(object):
  279. _cp_config = {'request.dispatch': cherrypy.dispatch.MethodDispatcher()}
  280. def __init__(self, base):
  281. self.base = base
  282. def __call__(self, *segments, reponame=None):
  283. repo = get_repo(db(), reponame, create_ok=False)
  284. if len(segments) == 4 and segments[3] == "Packages":
  285. distname, componentname, indexname, pkgs = segments
  286. dist = get_dist(db(), repo, distname, create_ok=False)
  287. if not repo or not dist:
  288. raise cherrypy.HTTPError(404)
  289. cherrypy.response.headers['Content-Type'] = 'text/plain'
  290. return dist.packages_cache
  291. elif len(segments) == 2:
  292. distname, target = segments
  293. dist = get_dist(db(), repo, distname, create_ok=False)
  294. cherrypy.response.headers['Content-Type'] = 'text/plain'
  295. if target == "Release":
  296. return dist.release_cache
  297. elif target == "Release.gpg":
  298. return dist.sig_cache
  299. elif target == "install":
  300. return """#!/bin/sh -ex
  301. wget -qO- {scheme}://{host}/repo/apt/{reponame}/pubkey | apt-key add -
  302. echo 'deb {scheme}://{host}/repo/apt/{reponame}/ {dist} main' | tee /etc/apt/sources.list.d/{reponame}-{dist}.list
  303. apt-get update
  304. """.format(scheme=cherrypy.request.scheme, host=cherrypy.request.headers['Host'], reponame=repo.name, dist=dist.name)
  305. else:
  306. raise cherrypy.HTTPError(404)
  307. elif len(segments) == 1:
  308. distname = segments[0]
  309. dist = get_dist(db(), repo, distname, create_ok=False)
  310. body = ""
  311. for package in db().query(AptPackage).filter(AptPackage.repo == repo,
  312. AptPackage.dist == dist).order_by(AptPackage.fname).all():
  313. body += "<a href='/repo/apt/{reponame}/packages/{dist.name}/{fname[0]}/{fname}'>{fname}</a><br />" \
  314. .format(reponame=repo.name, dist=dist, fname=package.fname)
  315. return body
  316. raise cherrypy.HTTPError(404)
  317. @cherrypy.expose
  318. class AptFiles(object):
  319. _cp_config = {'request.dispatch': cherrypy.dispatch.MethodDispatcher()}
  320. def __init__(self, base):
  321. self.base = base
  322. def __call__(self, *segments, reponame=None):
  323. distname, firstletter, pkgname = segments
  324. repo = get_repo(db(), reponame, create_ok=False)
  325. dist = get_dist(db(), repo, distname, create_ok=False)
  326. package = db().query(AptPackage).filter(AptPackage.repo == repo,
  327. AptPackage.dist == dist,
  328. AptPackage.fname == pkgname).first()
  329. if not package:
  330. raise cherrypy.HTTPError(404)
  331. dpath = os.path.join(self.base.basepath, package.blobpath)
  332. response = self.base.s3.get_object(Bucket=self.base.bucket, Key=dpath)
  333. print("reading ", dpath)
  334. cherrypy.response.headers["Content-Type"] = "application/x-debian-package"
  335. cherrypy.response.headers["Content-Length"] = response["ContentLength"]
  336. def stream():
  337. while True:
  338. data = response["Body"].read(65535)
  339. if not data:
  340. return
  341. yield data
  342. return stream()
  343. __call__._cp_config = {'response.stream': True}