python-findbig/findbig/cli.py

113 lines
3.5 KiB
Python

#!/usr/bin/env python3
from os import walk, environ
from os.path import join, getsize, normpath, islink
import sys
from argparse import ArgumentParser
unit_sizes = {
"b": 1024 ** 0,
"k": 10 ** 3,
"m": 10 ** 6,
"g": 10 ** 9,
"t": 10 ** 12
}
# This used to be MB, GB, etc, but now it seems pointless
unit_names = {
"b": "B",
"k": "K",
"m": "M",
"g": "G",
"t": "T"
}
def find_files(base_dir, min_size=0, verbose=False):
for current_dir, subdirs, files in walk(base_dir):
for file_name in files:
file_path = normpath(join(current_dir, file_name))
if not islink(file_path):
try:
file_size = getsize(file_path)
except FileNotFoundError:
# File disappeared or dangling link
continue
if file_size < min_size:
continue
yield (file_path, file_size)
def size_to_units(size, unit, round_places=0):
size = round(size / unit_sizes[unit], round_places)
if round_places == 0:
return int(size)
return size
def main():
environ["LANG"] = "en_US.UTF-8"
clean = True
parser = ArgumentParser(description="Find large files in a directory")
parser.add_argument("base_dir", help="Root dir to begin search")
output_opts = parser.add_argument_group("output options")
output_opts.add_argument("-v", "--verbose", action="store_true", help="Enable verbose output")
output_opts.add_argument("-f", "--format", default="{fsize}{unit}\t{fpath}", help="Output format")
output_opts.add_argument("-l", "--limit", type=int, help="Limit result count")
output_opts.add_argument("-u", "--unit", choices=unit_sizes.keys(), default="b",
help="Convert sizes to unit")
output_opts.add_argument("-r", "--round", type=int, default=0, help="Number of places to round to")
output_opts.add_argument("-m", "--min-size", default="0", help="Ignore files smaller than threshold")
output_opts.add_argument("-i", "--live", action="store_true", help="Print offending files as they are found")
args = parser.parse_args()
min_size = 0
if args.min_size:
try:
min_size = int(args.min_size)
except ValueError:
try:
unit = args.min_size[-1]
min_size = int(args.min_size[0:-1]) * unit_sizes[unit.lower()]
except (KeyError, ValueError):
parser.error("unparseable minimum size: {}".format(args.min_size))
def print_file(file_info):
print(args.format.format(fsize=size_to_units(file_info[1], args.unit, args.round),
unit=unit_names[args.unit],
fpath=file_info[0]))
files = []
for item in find_files(args.base_dir, min_size=min_size, verbose=args.verbose):
if args.live:
print_file(item)
else:
files.append(item)
if args.live:
return 0
files.sort(key=lambda x: x[1])
if args.limit:
files = files[-(args.limit):]
for item in files:
try:
print_file(item)
except UnicodeEncodeError:
sys.stderr.write("Invalid UTF-8 file name: {}\n".format(repr(item[0])))
sys.stderr.flush()
clean = False
if not clean:
sys.stderr.write("Warning: errors were encountered while scanning files\n")
sys.stderr.flush()
return 0 if clean else 1
if __name__ == '__main__':
sys.exit(main())