99 lines
3.2 KiB
Python
99 lines
3.2 KiB
Python
#!/usr/bin/env python3
|
|
from os import walk, environ
|
|
from os.path import join, getsize, normpath, islink
|
|
import sys
|
|
from argparse import ArgumentParser
|
|
|
|
|
|
unit_sizes = {
|
|
"b": 1024 ** 0,
|
|
"k": 10 ** 3,
|
|
"m": 10 ** 6,
|
|
"g": 10 ** 9,
|
|
"t": 10 ** 12
|
|
}
|
|
|
|
# This used to be MB, GB, etc, but now it seems pointless
|
|
unit_names = {
|
|
"b": "B",
|
|
"k": "K",
|
|
"m": "M",
|
|
"g": "G",
|
|
"t": "T"
|
|
}
|
|
|
|
|
|
def find_files(base_dir, min_size=0, verbose=False):
|
|
for current_dir, subdirs, files in walk(base_dir):
|
|
for file_name in files:
|
|
file_path = normpath(join(current_dir, file_name))
|
|
if not islink(file_path):
|
|
try:
|
|
file_size = getsize(file_path)
|
|
except FileNotFoundError:
|
|
# File disappeared or dangling link
|
|
continue
|
|
if file_size < min_size:
|
|
continue
|
|
yield (file_path, file_size)
|
|
|
|
|
|
def size_to_units(size, unit, round_places=0):
|
|
size = round(size / unit_sizes[unit], round_places)
|
|
if round_places == 0:
|
|
return int(size)
|
|
return size
|
|
|
|
|
|
def main():
|
|
environ["LANG"] = "en_US.UTF-8"
|
|
clean = True
|
|
parser = ArgumentParser(description="Find large files in a directory")
|
|
parser.add_argument("base_dir", help="Root dir to begin search")
|
|
|
|
output_opts = parser.add_argument_group("output options")
|
|
output_opts.add_argument("-v", "--verbose", action="store_true", help="Enable verbose output")
|
|
output_opts.add_argument("-f", "--format", default="{fsize}{unit}\t{fpath}", help="Output format")
|
|
output_opts.add_argument("-l", "--limit", type=int, help="Limit result count")
|
|
output_opts.add_argument("-u", "--unit", choices=unit_sizes.keys(), default="b",
|
|
help="Convert sizes to unit")
|
|
output_opts.add_argument("-r", "--round", type=int, default=0, help="Number of places to round to")
|
|
output_opts.add_argument("-m", "--min-size", default="0", help="Ignore files smaller than threshold")
|
|
|
|
args = parser.parse_args()
|
|
|
|
min_size = 0
|
|
if args.min_size:
|
|
try:
|
|
min_size = int(args.min_size)
|
|
except ValueError:
|
|
try:
|
|
unit = args.min_size[-1]
|
|
min_size = int(args.min_size[0:-1]) * unit_sizes[unit.lower()]
|
|
except (KeyError, ValueError):
|
|
parser.error("unparseable minimum size: {}".format(args.min_size))
|
|
|
|
files = [i for i in find_files(args.base_dir, min_size=min_size, verbose=args.verbose)]
|
|
files.sort(key=lambda x: x[1])
|
|
|
|
if args.limit:
|
|
files = files[-(args.limit):]
|
|
|
|
for item in files:
|
|
try:
|
|
print(args.format.format(fsize=size_to_units(item[1], args.unit, args.round), unit=unit_names[args.unit],
|
|
fpath=item[0]))
|
|
except UnicodeEncodeError:
|
|
sys.stderr.write("Invalid UTF-8 file name: {}\n".format(repr(item[0])))
|
|
sys.stderr.flush()
|
|
clean = False
|
|
|
|
if not clean:
|
|
sys.stderr.write("Warning: errors were encountered while scanning files\n")
|
|
sys.stderr.flush()
|
|
|
|
return 0 if clean else 1
|
|
|
|
if __name__ == '__main__':
|
|
sys.exit(main())
|