python3 and travis support

- python3 compatibility
- drop py2.6 support
- use email.message rather than rfc822.message
- add some initial debug logging
- pylint and pep8 fixes
- add object properties for file hashes
- add a simple cli demo script
- add travis for continuous build
This commit is contained in:
Nathan J. Mehl 2017-06-04 14:33:06 -07:00
parent fe2cf7df3d
commit c56c520950
7 changed files with 319 additions and 100 deletions

3
.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
target/
*.egg-info
.cache

14
.travis.yml Normal file
View File

@ -0,0 +1,14 @@
language: python
python:
- "2.7"
- "3.3"
- "3.4"
- "3.5"
before_install:
- "pip install -U pip"
install:
- "pip install -e .[test]"
script:
- "py.test tests/"
- "pylint pydpkg/"
- "pep8 pydpkg/"

View File

@ -1,3 +1,5 @@
[![Build Status](https://travis-ci.org/TheClimateCorporation/python-dpkg.svg?branch=master)](https://travis-ci.org/TheClimateCorporation/python-dpkg)
python-dpkg python-dpkg
=========== ===========
@ -14,8 +16,8 @@ This is primarily intended for use on platforms that do not normally
ship [python-apt](http://apt.alioth.debian.org/python-apt-doc/) due to ship [python-apt](http://apt.alioth.debian.org/python-apt-doc/) due to
licensing restrictions or the lack of a native libapt.so (e.g. macOS) licensing restrictions or the lack of a native libapt.so (e.g. macOS)
Currently only tested on Python 2.6 and 2.7. Should run on any python2 Currently only tested on CPython 2.7 and 3.5, but at least in theory should run
distribution that can install the [arpy](https://pypi.python.org/pypi/arpy/) on any python distribution that can install the [arpy](https://pypi.python.org/pypi/arpy/)
library. library.
Installing Installing
@ -26,9 +28,9 @@ the [pip](https://packaging.python.org/installing/) tool:
$ pip install pydpkg $ pip install pydpkg
Collecting pydpkg Collecting pydpkg
Downloading pydpkg-1.0-py2-none-any.whl Downloading pydpkg-1.1-py2-none-any.whl
Installing collected packages: pydpkg Installing collected packages: pydpkg
Successfully installed pydpkg-1.0 Successfully installed pydpkg-1.1
Usage Usage
===== =====
@ -53,6 +55,28 @@ Read and extract headers
Description: testdeb Description: testdeb
a bogus debian package for testing dpkg builds a bogus debian package for testing dpkg builds
Interact directly with the package control message
--------------------------------------------------
>>> dp.message
<email.message.Message instance at 0x10895c6c8>
>>> dp.message.get_content_type()
'text/plain'
Get package file fingerprints
-----------------------------
>>> dp.fileinfo
{'sha256': '547500652257bac6f6bc83f0667d0d66c8abd1382c776c4de84b89d0f550ab7f', 'sha1': 'a5d28ae2f23e726a797349d7dd5f21baf8aa02b4', 'filesize': 910, 'md5': '149e61536a9fe36374732ec95cf7945d'}
>>> dp.md5
'149e61536a9fe36374732ec95cf7945d'
>>> dp.sha1
'a5d28ae2f23e726a797349d7dd5f21baf8aa02b4'
>>> dp.sha256
'547500652257bac6f6bc83f0667d0d66c8abd1382c776c4de84b89d0f550ab7f'
>>> dp.filesize
910
Get an arbitrary control header, case-independent Get an arbitrary control header, case-independent
------------------------------------------------- -------------------------------------------------
@ -86,3 +110,24 @@ Use as a cmp function to sort a list of version strings
>>> from pydpkg import Dpkg >>> from pydpkg import Dpkg
>>> sorted(['0:1.0-test1', '1:0.0-test0', '0:1.0-test2'] , cmp=Dpkg.compare_versions) >>> sorted(['0:1.0-test1', '1:0.0-test0', '0:1.0-test2'] , cmp=Dpkg.compare_versions)
['0:1.0-test1', '0:1.0-test2', '1:0.0-test0'] ['0:1.0-test1', '0:1.0-test2', '1:0.0-test0']
Use the `dpkg-inspect.py` script to inspect packages
----------------------------------------------------
$ dpkg-inspect.py ~/testdeb*deb
Filename: /Home/n/testdeb_1:0.0.0-test_all.deb
Size: 910
MD5: 149e61536a9fe36374732ec95cf7945d
SHA1: a5d28ae2f23e726a797349d7dd5f21baf8aa02b4
SHA256: 547500652257bac6f6bc83f0667d0d66c8abd1382c776c4de84b89d0f550ab7f
Headers:
Package: testdeb
Version: 1:0.0.0-test
Section: base
Priority: extra
Architecture: all
Installed-Size: 0
Maintainer: Nathan Mehl <n@climate.com>
Description: testdeb
a bogus debian package for testing dpkg builds

View File

@ -1,35 +1,55 @@
""" pydpkg: tools for inspecting dpkg archive files in python
without any dependency on libapt
"""
from __future__ import absolute_import
# stdlib imports # stdlib imports
import io
import logging
import os import os
import tarfile import tarfile
from StringIO import StringIO
from rfc822 import Message
from gzip import GzipFile from gzip import GzipFile
from hashlib import md5, sha1, sha256
from email import message_from_string as Message
# pypi imports # pypi imports
import six
from arpy import Archive from arpy import Archive
REQUIRED_HEADERS = ('package', 'version', 'architecture') REQUIRED_HEADERS = ('package', 'version', 'architecture')
logging.basicConfig()
class DpkgError(Exception): class DpkgError(Exception):
"""Base error class for pydpkg"""
pass pass
class DpkgVersionError(Exception): class DpkgVersionError(Exception):
"""Corrupt or unparseable version string"""
pass pass
class DpkgMissingControlFile(DpkgError): class DpkgMissingControlFile(DpkgError):
"""No control file found in control.tar.gz"""
pass pass
class DpkgMissingControlGzipFile(DpkgError): class DpkgMissingControlGzipFile(DpkgError):
"""No control.tar.gz file found in dpkg file"""
pass pass
class DpkgMissingRequiredHeaderError(DpkgError): class DpkgMissingRequiredHeaderError(DpkgError):
"""Corrupt package missing a required header"""
pass pass
@ -37,83 +57,160 @@ class Dpkg(object):
"""Class allowing import and manipulation of a debian package file.""" """Class allowing import and manipulation of a debian package file."""
def __init__(self, filename=None): def __init__(self, filename=None, ignore_missing=False, logger=None):
self.headers = {} self.filename = os.path.expanduser(filename)
if not isinstance(filename, basestring): self.ignore_missing = ignore_missing
if not isinstance(self.filename, six.string_types):
raise DpkgError('filename argument must be a string') raise DpkgError('filename argument must be a string')
if not os.path.isfile(filename): if not os.path.isfile(self.filename):
raise DpkgError('filename "%s" does not exist', filename) raise DpkgError('filename "%s" does not exist', filename)
self.control_str, self._control_headers = self._process_dpkg_file( self._log = logger or logging.getLogger(__name__)
filename) self._fileinfo = None
for k in self._control_headers.keys(): self._control_str = None
self.headers[k] = self._control_headers[k] self._headers = None
self._message = None
def __repr__(self): def __repr__(self):
return self.control_str return repr(self.control_str)
def __str__(self):
return six.text_type(self.control_str)
@property
def message(self):
"""Return an email.Message object containing the package control
structure."""
if not self._message:
self._message = self._process_dpkg_file(self.filename)
return self._message
@property
def control_str(self):
"""Return the control message as a string"""
if not self._control_str:
self._control_str = self.message.as_string()
return self._control_str
@property
def headers(self):
"""Return the control message headers as a dict"""
if not self._headers:
self._headers = dict(self.message.items())
return self._headers
@property
def fileinfo(self):
"""Return a dictionary containing md5/sha1/sha256 checksums
and the size in bytes of our target file."""
if not self._fileinfo:
h_md5 = md5()
h_sha1 = sha1()
h_sha256 = sha256()
with open(self.filename, 'rb') as dpkg_file:
for chunk in iter(lambda: dpkg_file.read(128), b''):
h_md5.update(chunk)
h_sha1.update(chunk)
h_sha256.update(chunk)
self._fileinfo = {
'md5': h_md5.hexdigest(),
'sha1': h_sha1.hexdigest(),
'sha256': h_sha256.hexdigest(),
'filesize': os.path.getsize(self.filename)
}
return self._fileinfo
@property
def md5(self):
"""Return the md5 hash of our target file"""
return self.fileinfo['md5']
@property
def sha1(self):
"""Return the sha1 hash of our target file"""
return self.fileinfo['sha1']
@property
def sha256(self):
"""Return the sha256 hash of our target file"""
return self.fileinfo['sha256']
@property
def filesize(self):
"""Return the size of our target file"""
return self.fileinfo['filesize']
def get_header(self, header): def get_header(self, header):
""" case-independent query for a control message header value """ """ case-independent query for a control message header value """
return self.headers.get(header.lower(), '') return self.headers.get(header.lower(), '')
def compare_version_with(self, version_str): def compare_version_with(self, version_str):
return Dpkg.compare_versions( """Compare my version to an arbitrary version"""
self.get_header('version'), return Dpkg.compare_versions(self.get_header('version'), version_str)
version_str)
def _force_encoding(self, obj, encoding='utf-8'): @staticmethod
if isinstance(obj, basestring): def _force_encoding(obj, encoding='utf-8'):
if not isinstance(obj, unicode): """Enforce uniform text encoding"""
obj = unicode(obj, encoding) if isinstance(obj, six.string_types):
if not isinstance(obj, six.text_type):
obj = six.text_type(obj, encoding)
return obj return obj
def _process_dpkg_file(self, filename): def _process_dpkg_file(self, filename):
dpkg = Archive(filename) dpkg_archive = Archive(filename)
dpkg.read_all_headers() dpkg_archive.read_all_headers()
try:
if 'control.tar.gz' not in dpkg.archived_files: control_tgz = dpkg_archive.archived_files[b'control.tar.gz']
except KeyError:
raise DpkgMissingControlGzipFile( raise DpkgMissingControlGzipFile(
'Corrupt dpkg file: no control.tar.gz file in ar archive.') 'Corrupt dpkg file: no control.tar.gz file in ar archive.')
self._log.debug('found controlgz: %s', control_tgz)
control_tgz = dpkg.archived_files['control.tar.gz'] # have to pass through BytesIO because gzipfile doesn't support seek
# have to do an intermediate step because gzipfile doesn't support seek
# from end; luckily control tars are tiny # from end; luckily control tars are tiny
control_tar_intermediate = GzipFile(fileobj=control_tgz, mode='rb') with GzipFile(fileobj=control_tgz) as gzf:
tar_data = control_tar_intermediate.read() self._log.debug('opened gzip file: %s', gzf)
sio = StringIO(tar_data) with tarfile.open(fileobj=io.BytesIO(gzf.read())) as control_tar:
control_tar = tarfile.open(fileobj=sio) self._log.debug('opened tar file: %s', control_tar)
# pathname in the tar could be ./control, or just control
# (there would never be two control files...right?)
tar_members = [
os.path.basename(x.name) for x in control_tar.getmembers()]
self._log.debug('got tar members: %s', tar_members)
if 'control' not in tar_members:
raise DpkgMissingControlFile(
'Corrupt dpkg file: no control file in control.tar.gz')
control_idx = tar_members.index('control')
self._log.debug('got control index: %s', control_idx)
# at last!
control_file = control_tar.extractfile(
control_tar.getmembers()[control_idx])
self._log.debug('got control file: %s', control_file)
message_body = control_file.read()
# py27 lacks email.message_from_bytes, so...
if isinstance(message_body, bytes):
message_body = message_body.decode('utf-8')
message = Message(message_body)
self._log.debug('got control message: %s', message)
# pathname in the tar could be ./control, or just control for req in REQUIRED_HEADERS:
# (there would never be two control files...right?) if req not in list(map(str.lower, message.keys())):
tar_members = [os.path.basename(x.name) import pdb
for x in control_tar.getmembers()] pdb.set_trace()
if 'control' not in tar_members: if self.ignore_missing:
raise DpkgMissingControlFile( self._log.debug(
'Corrupt dpkg file: no control file in control.tar.gz.') 'Header "%s" not found in control message', req)
control_idx = tar_members.index('control') continue
# at last!
control_file = control_tar.extractfile(
control_tar.getmembers()[control_idx])
# beware: dpkg will happily let people drop random encodings into the
# control file
control_str = self._force_encoding(control_file.read())
# now build the dict
control_file.seek(0)
control_headers = Message(control_file)
for header in REQUIRED_HEADERS:
if header not in control_headers:
raise DpkgMissingRequiredHeaderError( raise DpkgMissingRequiredHeaderError(
'Corrupt control section; header: "%s" not found' % header) 'Corrupt control section; header: "%s" not found' % req)
self._log.debug('all required headers found')
for header in control_headers: for header in message.keys():
control_headers[header] = self._force_encoding( self._log.debug('coercing header to utf8: %s', header)
control_headers[header]) message.replace_header(
header, self._force_encoding(message[header]))
self._log.debug('all required headers coerced')
return control_str, control_headers return message
@staticmethod @staticmethod
def get_epoch(version_str): def get_epoch(version_str):
@ -152,6 +249,10 @@ class Dpkg(object):
@staticmethod @staticmethod
def split_full_version(version_str): def split_full_version(version_str):
"""Split a full version string into epoch, upstream version and
debian revision.
:param: version_str
:returns: tuple """
epoch, full_ver = Dpkg.get_epoch(version_str) epoch, full_ver = Dpkg.get_epoch(version_str)
upstream_rev, debian_rev = Dpkg.get_upstream(full_ver) upstream_rev, debian_rev = Dpkg.get_upstream(full_ver)
return epoch, upstream_rev, debian_rev return epoch, upstream_rev, debian_rev
@ -160,14 +261,12 @@ class Dpkg(object):
def get_alphas(revision_str): def get_alphas(revision_str):
"""Return a tuple of the first non-digit characters of a revision (which """Return a tuple of the first non-digit characters of a revision (which
may be empty) and the remaining characters.""" may be empty) and the remaining characters."""
# get the index of the first digit # get the index of the first digit
for i, char in enumerate(revision_str): for i, char in enumerate(revision_str):
if char.isdigit(): if char.isdigit():
if i == 0: if i == 0:
return '', revision_str return '', revision_str
else: return revision_str[0:i], revision_str[i:]
return revision_str[0:i], revision_str[i:]
# string is entirely alphas # string is entirely alphas
return revision_str, '' return revision_str, ''
@ -175,17 +274,15 @@ class Dpkg(object):
def get_digits(revision_str): def get_digits(revision_str):
"""Return a tuple of the first integer characters of a revision (which """Return a tuple of the first integer characters of a revision (which
may be empty) and the remains.""" may be empty) and the remains."""
# If the string is empty, return (0,'')
if not revision_str: if not revision_str:
return 0, '' return 0, ''
# get the index of the first non-digit # get the index of the first non-digit
for i, char in enumerate(revision_str): for i, char in enumerate(revision_str):
if not char.isdigit(): if not char.isdigit():
if i == 0: if i == 0:
return 0, revision_str return 0, revision_str
else: return int(revision_str[0:i]), revision_str[i:]
return int(revision_str[0:i]), revision_str[i:]
# string is entirely digits # string is entirely digits
return int(revision_str), '' return int(revision_str), ''
@ -199,12 +296,13 @@ class Dpkg(object):
""" """
result = [] result = []
while revision_str: while revision_str:
r1, remains = Dpkg.get_alphas(revision_str) rev_1, remains = Dpkg.get_alphas(revision_str)
r2, remains = Dpkg.get_digits(remains) rev_2, remains = Dpkg.get_digits(remains)
result.extend([r1, r2]) result.extend([rev_1, rev_2])
revision_str = remains revision_str = remains
return result return result
# pylint: disable=invalid-name,too-many-return-statements
@staticmethod @staticmethod
def dstringcmp(a, b): def dstringcmp(a, b):
"""debian package version string section lexical sort algorithm """debian package version string section lexical sort algorithm
@ -241,32 +339,30 @@ class Dpkg(object):
# ...except for goddamn tildes # ...except for goddamn tildes
if char == '~': if char == '~':
return -1 return -1
else: return 1
return 1
# if we get here, a is shorter than b but otherwise equal, hence lesser # if we get here, a is shorter than b but otherwise equal, hence lesser
# ...except for goddamn tildes # ...except for goddamn tildes
if b[len(a)] == '~': if b[len(a)] == '~':
return 1 return 1
else: return -1
return -1
@staticmethod @staticmethod
def compare_revision_strings(rev1, rev2): def compare_revision_strings(rev1, rev2):
"""Compare two debian revision strings as described at
https://www.debian.org/doc/debian-policy/ch-controlfields.html#s-f-Version
"""
if rev1 == rev2: if rev1 == rev2:
return 0 return 0
# listify pads results so that we will always be comparing ints to ints # listify pads results so that we will always be comparing ints to ints
# and strings to strings (at least until we fall off the end of a list) # and strings to strings (at least until we fall off the end of a list)
list1 = Dpkg.listify(rev1) list1 = Dpkg.listify(rev1)
list2 = Dpkg.listify(rev2) list2 = Dpkg.listify(rev2)
if list1 == list2: if list1 == list2:
return 0 return 0
try: try:
for i, item in enumerate(list1): for i, item in enumerate(list1):
# just in case # just in case
if type(item) != type(list2[i]): if not isinstance(item, list2[i].__class__):
raise DpkgVersionError( raise DpkgVersionError(
'Cannot compare %s to %s, something has gone horribly ' 'Cannot compare %s to %s, something has gone horribly '
'awry.' % (item, list2[i])) 'awry.' % (item, list2[i]))
@ -274,7 +370,7 @@ class Dpkg(object):
if item == list2[i]: if item == list2[i]:
continue continue
# numeric comparison # numeric comparison
if type(item) == int: if isinstance(item, int):
if item > list2[i]: if item > list2[i]:
return 1 return 1
if item < list2[i]: if item < list2[i]:
@ -290,6 +386,8 @@ class Dpkg(object):
@staticmethod @staticmethod
def compare_versions(ver1, ver2): def compare_versions(ver1, ver2):
"""Function to compare two Debian package version strings,
suitable for passing to list.sort() and friends."""
if ver1 == ver2: if ver1 == ver2:
return 0 return 0

45
scripts/dpkg-inspect.py Executable file
View File

@ -0,0 +1,45 @@
#!/usr/bin/env python
from __future__ import print_function
import glob
import logging
import os
import sys
from pydpkg import Dpkg
logging.basicConfig()
log = logging.getLogger('dpkg_extract')
log.setLevel(logging.INFO)
PRETTY = """Filename: {0}
Size: {1}
MD5: {2}
SHA1: {3}
SHA256: {4}
Headers:
{5}"""
def indent(input_str, prefix):
return '\n'.join(
['%s%s' % (prefix, x) for x in input_str.split('\n')]
)
try:
filenames = sys.argv[1:]
except KeyError:
log.fatal('You must list at least one deb file as an argument')
sys.exit(1)
for files in filenames:
for fn in glob.glob(files):
if not os.path.isfile(fn):
log.warning('%s is not a file, skipping', fn)
log.debug('checking %s', fn)
dp = Dpkg(fn)
print(PRETTY.format(
fn, dp.filesize, dp.md5, dp.sha1, dp.sha256,
indent(str(dp), ' ')
))

View File

@ -1,20 +1,32 @@
from distutils.core import setup from distutils.core import setup
setup( setup(
name = 'pydpkg', name='pydpkg',
packages = ['pydpkg'], # this must be the same as the name above packages=['pydpkg'], # this must be the same as the name above
version = '1.0', version='1.1',
description = 'A python library for parsing debian package control headers and comparing version strings', description='A python library for parsing debian package control headers and comparing version strings',
author = 'Nathan J. Mehl', author='Nathan J. Mehl',
author_email = 'n@climate.com', author_email='n@climate.com',
url = 'https://github.com/theclimatecorporation/python-dpkg', url='https://github.com/theclimatecorporation/python-dpkg',
download_url = 'https://github.com/theclimatecorporation/python-dpkg/tarball/1.0', download_url='https://github.com/theclimatecorporation/python-dpkg/tarball/1.1',
keywords = ['apt', 'debian', 'dpkg', 'packaging'], keywords=['apt', 'debian', 'dpkg', 'packaging'],
classifiers=[ install_requires=[
"Development Status :: 5 - Production/Stable", 'arpy==1.1.1',
"License :: OSI Approved :: Apache Software License", 'six==1.10.0'
"Programming Language :: Python :: 2.6", ],
"Programming Language :: Python :: 2.7", extras_require={
"Programming Language :: Python :: Implementation :: CPython", 'test': ['pep8==1.7.0', 'pytest==3.1.1', 'pylint==1.7.1']
"Topic :: System :: Archiving :: Packaging", },
] scripts=[
'scripts/dpkg-inspect.py'
],
classifiers=[
"Development Status :: 5 - Production/Stable",
"License :: OSI Approved :: Apache Software License",
"Programming Language :: Python :: 2.7",
"Programming Language :: Python :: 3.3",
"Programming Language :: Python :: 3.4",
"Programming Language :: Python :: 3.5",
"Programming Language :: Python :: Implementation :: CPython",
"Topic :: System :: Archiving :: Packaging",
]
) )

View File

@ -1,6 +1,7 @@
#!/usr/bin/env python #!/usr/bin/env python
import unittest import unittest
from functools import cmp_to_key
from pydpkg import Dpkg, DpkgVersionError from pydpkg import Dpkg, DpkgVersionError
@ -65,8 +66,9 @@ class DpkgTest(unittest.TestCase):
# taken from # taken from
# http://www.debian.org/doc/debian-policy/ch-controlfields.html#s-f-Version # http://www.debian.org/doc/debian-policy/ch-controlfields.html#s-f-Version
self.assertEqual( self.assertEqual(
sorted(['a', '', '~', '~~a', '~~'], cmp=Dpkg.dstringcmp), sorted(['a', '', '~', '~~a', '~~'],
['~~', '~~a', '~', '', 'a']) key=cmp_to_key(Dpkg.dstringcmp)),
['~~', '~~a', '~', '', 'a'])
def test_compare_revision_strings(self): def test_compare_revision_strings(self):
# note that these are testing a single revision string, not the full # note that these are testing a single revision string, not the full