From f342b2932ede5cd2ea3dba8179f638a57fcbb0a4 Mon Sep 17 00:00:00 2001 From: dave Date: Tue, 2 May 2017 20:06:28 -0700 Subject: [PATCH] Initial commit --- .gitignore | 5 +++ findbig/__init__.py | 1 + findbig/cli.py | 83 +++++++++++++++++++++++++++++++++++++++++++++ setup.py | 17 ++++++++++ 4 files changed, 106 insertions(+) create mode 100644 .gitignore create mode 100644 findbig/__init__.py create mode 100644 findbig/cli.py create mode 100644 setup.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bb2b4a2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +/build/ +/dist/ +/*.egg-info +/testenv +__pycache__ diff --git a/findbig/__init__.py b/findbig/__init__.py new file mode 100644 index 0000000..f102a9c --- /dev/null +++ b/findbig/__init__.py @@ -0,0 +1 @@ +__version__ = "0.0.1" diff --git a/findbig/cli.py b/findbig/cli.py new file mode 100644 index 0000000..74d92cd --- /dev/null +++ b/findbig/cli.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python3 +from os import walk, environ +from os.path import join, getsize, normpath, islink +import sys +from argparse import ArgumentParser + + +unit_sizes = { + "b": 1024 ** 0, + "k": 10 ** 3, + "m": 10 ** 6, + "g": 10 ** 9, + "t": 10 ** 12 +} + +# This used to be MB, GB, etc, but now it seems pointless +unit_names = { + "b": "B", + "k": "K", + "m": "M", + "g": "G", + "t": "T" +} + + +def find_files(base_dir, verbose=False): + for current_dir, subdirs, files in walk(base_dir): + for file_name in files: + file_path = normpath(join(current_dir, file_name)) + if not islink(file_path): + try: + yield (file_path, getsize(file_path)) + except FileNotFoundError: + # File disappeared or dangling link + pass + + +def size_to_units(size, unit, round_places=0): + size = round(size / unit_sizes[unit], round_places) + if round_places == 0: + return int(size) + return size + + +def main(): + environ["LANG"] = "en_US.UTF-8" + clean = True + parser = ArgumentParser(description="Find large files in a directory") + parser.add_argument("base_dir", help="Root dir to begin search") + + output_opts = parser.add_argument_group("output options") + output_opts.add_argument("-v", "--verbose", action="store_true", help="Enable verbose output") + output_opts.add_argument("-f", "--format", default="{fsize}{unit}\t{fpath}", help="Output format") + output_opts.add_argument("-l", "--limit", type=int, help="Limit result count") + output_opts.add_argument("-u", "--unit", choices=unit_sizes.keys(), default="b", + help="Convert sizes to unit") + output_opts.add_argument("-r", "--round", type=int, default=0, help="Number of places to round to") + + args = parser.parse_args() + + files = [i for i in find_files(args.base_dir, verbose=args.verbose)] + files.sort(key=lambda x: x[1]) + + if args.limit: + files = files[-(args.limit):] + + for item in files: + try: + print(args.format.format(fsize=size_to_units(item[1], args.unit, args.round), unit=unit_names[args.unit], + fpath=item[0])) + except UnicodeEncodeError: + sys.stderr.write("Invalid UTF-8 file name: {}\n".format(repr(item[0]))) + sys.stderr.flush() + clean = False + + if not clean: + sys.stderr.write("Warning: errors were encountered while scanning files\n") + sys.stderr.flush() + + return 0 if clean else 1 + +if __name__ == '__main__': + sys.exit(main()) diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..fe9cf3d --- /dev/null +++ b/setup.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python3 +from setuptools import setup +from findbig import __version__ + + +setup(name='findbig', + version=__version__, + description='Command-line tool for finding the biggest files in a directory tree', + url='http://gitlab.davepedu.com/dave/findbog', + author='dpedu', + author_email='dave@davepedu.com', + packages=['findbig'], + entry_points={ + "console_scripts": [ + "findbig = findbig.cli:main" + ] + })