import dist metadata from source repo

This commit is contained in:
dave 2022-11-30 23:47:48 -08:00
parent b57676d972
commit ea5a42daf6
1 changed files with 22 additions and 5 deletions

View File

@ -9,6 +9,10 @@ from email import message_from_string
from dataclasses import dataclass from dataclasses import dataclass
def message_to_string(message):
return message.as_string()[0:-2]
def dict_factory(c, row): def dict_factory(c, row):
d = {} d = {}
for idx, col in enumerate(c.description): for idx, col in enumerate(c.description):
@ -42,6 +46,10 @@ def get_db(db_file):
'name' TEXT, 'name' TEXT,
'version' TEXT, 'version' TEXT,
UNIQUE(dist, component, arch, name, version) UNIQUE(dist, component, arch, name, version)
"""CREATE TABLE IF NOT EXISTS 'dist_meta' (
'dist' TEXT,
'metadata' TEXT,
UNIQUE(dist)
)""", )""",
] ]
@ -105,7 +113,11 @@ class Repoline:
dist_path = "{}dists/{}/".format(self.base_url, self.dist) dist_path = "{}dists/{}/".format(self.base_url, self.dist)
r = requests.get("{}Release".format(dist_path)) r = requests.get("{}Release".format(dist_path))
r.raise_for_status() r.raise_for_status()
# release = r.text release = message_from_string(r.text)
ignore_keys = ('MD5Sum', 'SHA1', 'SHA256', 'Acquire-By-Hash', )
for key in ignore_keys:
del release[key]
# parse out each component's Packages/.gz/.xz file # parse out each component's Packages/.gz/.xz file
# files = parse_dist_release(release) # files = parse_dist_release(release)
@ -133,7 +145,7 @@ class Repoline:
for p in fetch_packages_file("{}{}".format(dist_path, component_prefix)).split("\n\n")[0:-1] for p in fetch_packages_file("{}{}".format(dist_path, component_prefix)).split("\n\n")[0:-1]
] ]
return packages return release, packages
@staticmethod @staticmethod
def parse(line) -> "Repoline": def parse(line) -> "Repoline":
@ -183,18 +195,19 @@ class Repo(object):
return dist return dist
def import_source_metadata(self, line): def import_source_metadata(self, line):
packages = line.get_packages() release, packages = line.get_packages()
dist = self.get_dist(line.dist) dist = self.get_dist(line.dist)
dirty = False dirty = False
with closing(self.db.cursor()) as c: with closing(self.db.cursor()) as c:
dist.update_metadata(c, release)
for component_name in line.components: for component_name in line.components:
component = dist.get_component(component_name) component = dist.get_component(component_name)
arch = component.get_arch(line.arch) arch = component.get_arch(line.arch)
for package in packages[component_name]: for package in packages[component_name]:
dirty = arch.add_package(c, package) or dirty dirty = arch.add_package(c, package) or dirty
if dirty: # if dirty:
c.execute("COMMIT") c.execute("COMMIT")
def import_source_packages(self, line): def import_source_packages(self, line):
#TODO parallelize downloads #TODO parallelize downloads
@ -252,6 +265,10 @@ class Dist(object):
self.components[name] = component self.components[name] = component
return component return component
def update_metadata(self, c, release):
c.execute("REPLACE INTO dist_meta (dist, metadata) VALUES (?, ?);",
(self.name, message_to_string(release), ))
class Component(object): class Component(object):
def __init__(self, dist, name): def __init__(self, dist, name):