basic webapp
This commit is contained in:
parent
2970c139b9
commit
9aedf2f53d
|
@ -0,0 +1,142 @@
|
|||
import os
|
||||
import sys
|
||||
# import locale
|
||||
# locale.setlocale(locale.LC_ALL, 'en_US')
|
||||
import logging
|
||||
import cherrypy
|
||||
from threading import Thread
|
||||
from jinja2 import Environment, FileSystemLoader, select_autoescape
|
||||
from dirview.dirtools import gen_db, gen_index, NodeType, NodeGroup
|
||||
|
||||
|
||||
APPROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "../"))
|
||||
|
||||
|
||||
class DbUpdater(Thread):
|
||||
def __init__(self, root_path, cache_dir):
|
||||
super().__init__()
|
||||
self.daemon = True
|
||||
self.root_path = root_path
|
||||
self.root = None
|
||||
self.index = None
|
||||
|
||||
def run(self):
|
||||
logging.info("Updating database...")
|
||||
self.root = gen_db(self.root_path)
|
||||
logging.info("Generating index...")
|
||||
self.index = gen_index(self.root)
|
||||
logging.info("Warming caches...")
|
||||
self.root.total_size # calculating these require recursing all nodes
|
||||
self.root.total_children
|
||||
logging.info("Database update complete!")
|
||||
|
||||
|
||||
class AppWeb(object):
|
||||
def __init__(self, database, template_dir):
|
||||
self.db = database
|
||||
self.tpl = Environment(loader=FileSystemLoader(template_dir),
|
||||
autoescape=select_autoescape(['html', 'xml']))
|
||||
self.tpl.filters.update(id=id,
|
||||
repr=repr,
|
||||
len=len,
|
||||
pathjoin=lambda x: os.path.join(*x),
|
||||
commafy=lambda x: format(x, ',d'))
|
||||
|
||||
def render(self, template, **kwargs):
|
||||
"""
|
||||
Render a template
|
||||
"""
|
||||
return self.tpl.get_template(template). \
|
||||
render(**kwargs,
|
||||
NodeType=NodeType,
|
||||
NodeGroup=NodeGroup) #, **self.get_default_vars())
|
||||
|
||||
@cherrypy.expose
|
||||
def index(self, n=None):
|
||||
from time import time
|
||||
start = time()
|
||||
if self.db.root is None:
|
||||
return "I'm still scanning your files, check back soon."
|
||||
|
||||
if n is None:
|
||||
node = self.db.root
|
||||
else:
|
||||
try:
|
||||
node = self.db.index[int(n)]
|
||||
except KeyError:
|
||||
raise cherrypy.HTTPError(404)
|
||||
|
||||
page = self.render("page.html", node=node)
|
||||
dur = time() - start
|
||||
return page + f"\n<!-- render time: {round(dur, 4)} -->"
|
||||
|
||||
# yield str(self.db.root)
|
||||
|
||||
# yield "Ready<br />"
|
||||
# from time import time
|
||||
# start = time()
|
||||
# num_nodes = len([i for i in self.db.root.iter()])
|
||||
# dur = time() - start
|
||||
# yield f"num nodes: {num_nodes} in {round(dur, 3)}"
|
||||
|
||||
|
||||
def main():
|
||||
import argparse
|
||||
import signal
|
||||
|
||||
parser = argparse.ArgumentParser(description="NAS storage visualizer")
|
||||
parser.add_argument('-d', '--dir', required=True, help="directory to scan")
|
||||
parser.add_argument('--cache', help="cache dir")
|
||||
parser.add_argument('-p', '--port', default=8080, type=int, help="http port to listen on")
|
||||
parser.add_argument('--debug', action="store_true", help="enable development options")
|
||||
args = parser.parse_args()
|
||||
|
||||
logging.basicConfig(level=logging.INFO if args.debug else logging.WARNING,
|
||||
format="%(asctime)-15s %(levelname)-8s %(filename)s:%(lineno)d %(message)s")
|
||||
|
||||
tpl_dir = os.path.join(APPROOT, "templates") if not args.debug else "templates"
|
||||
db = DbUpdater(args.dir, args.cache)
|
||||
db.start()
|
||||
|
||||
web = AppWeb(db, tpl_dir)
|
||||
|
||||
cherrypy.tree.mount(web, '/',
|
||||
{'/': {},
|
||||
'/static': {"tools.staticdir.on": True,
|
||||
"tools.staticdir.dir": os.path.join(APPROOT, "static")}, # TODO non --debug path
|
||||
# '/login': {'tools.auth_basic.on': True,
|
||||
# 'tools.auth_basic.realm': 'webapp',
|
||||
# 'tools.auth_basic.checkpassword': validate_password}})
|
||||
})
|
||||
|
||||
cherrypy.config.update({
|
||||
'tools.sessions.on': False,
|
||||
'request.show_tracebacks': True, #??
|
||||
'server.show_tracebacks': True, #??
|
||||
'server.socket_port': args.port,
|
||||
'server.socket_host': '0.0.0.0',
|
||||
'server.thread_pool': 5,
|
||||
'engine.autoreload.on': args.debug,
|
||||
'log.screen': False, #??
|
||||
})
|
||||
|
||||
def signal_handler(signum, stack):
|
||||
logging.critical('Got sig {}, exiting...'.format(signum))
|
||||
cherrypy.engine.exit()
|
||||
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
signal.signal(signal.SIGTERM, signal_handler)
|
||||
|
||||
# This is also the maximum nested directory depth supported
|
||||
sys.setrecursionlimit(1000)
|
||||
|
||||
try:
|
||||
cherrypy.engine.start()
|
||||
cherrypy.engine.block()
|
||||
finally:
|
||||
logging.info("API has shut down")
|
||||
cherrypy.engine.exit()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -13,18 +13,23 @@ from time import time
|
|||
import json
|
||||
import resource
|
||||
import typing
|
||||
import logging
|
||||
# import ipdb
|
||||
|
||||
|
||||
class NodeType(Enum):
|
||||
DIR = auto()
|
||||
FILE = auto()
|
||||
ROOT = auto() # behaves like a dir but has special handling in some places
|
||||
# TODO use these
|
||||
ROOT = auto() # behaves like a dir but has special handling in some places
|
||||
LINK = auto()
|
||||
SPECIAL = auto()
|
||||
|
||||
|
||||
class NodeGroup(object):
|
||||
DIRLIKE = {NodeType.DIR, NodeType.ROOT}
|
||||
FILELIKE = {NodeType.FILE, NodeType.LINK, NodeType.SPECIAL}
|
||||
|
||||
|
||||
# this costs about 380 bytes per file/directory
|
||||
@dataclass
|
||||
class Node:
|
||||
|
@ -32,16 +37,26 @@ class Node:
|
|||
typ: int
|
||||
children: list
|
||||
size: int
|
||||
parent_id: int
|
||||
parent: "Node"
|
||||
|
||||
total_size_cache: int = None
|
||||
|
||||
@property
|
||||
def total_size(self) -> int:
|
||||
if self.typ in {NodeType.DIR, NodeType.ROOT}:
|
||||
sz = 0
|
||||
for node in self.children:
|
||||
sz += node.total_size()
|
||||
return sz
|
||||
else:
|
||||
return self.size
|
||||
if self.total_size_cache is None:
|
||||
if self.typ in {NodeType.DIR, NodeType.ROOT}:
|
||||
self.total_size_cache = sum([node.total_size for node in self.children])
|
||||
else:
|
||||
self.total_size_cache = self.size
|
||||
return self.total_size_cache
|
||||
|
||||
total_children_cache: int = None
|
||||
|
||||
@property
|
||||
def total_children(self) -> int:
|
||||
if self.total_children_cache is None:
|
||||
self.total_children_cache = sum([c.total_children for c in self.children]) + len(self.children)
|
||||
return self.total_children_cache
|
||||
|
||||
def serialize(self) -> tuple:
|
||||
"""
|
||||
|
@ -54,7 +69,7 @@ class Node:
|
|||
typ=self.typ.value,
|
||||
children=[id(n) for n in self.children],
|
||||
size=self.size,
|
||||
parent_id=self.parent_id,
|
||||
parent=id(self.parent),
|
||||
id=id(self))
|
||||
|
||||
def iter(self, include_self=True) -> typing.Generator["Node", None, None]:
|
||||
|
@ -66,6 +81,19 @@ class Node:
|
|||
for child in self.children:
|
||||
yield from child.iter()
|
||||
|
||||
@property
|
||||
def path(self):
|
||||
parts = [self.name]
|
||||
while True:
|
||||
if self.parent is None:
|
||||
break
|
||||
parts.insert(0, self.parent.name)
|
||||
self = self.parent
|
||||
return parts
|
||||
|
||||
def __hash__(self):
|
||||
return id(self)
|
||||
|
||||
# def __str__(self): # TODO
|
||||
# pass
|
||||
|
||||
|
@ -82,7 +110,7 @@ def get_type(dirpath):
|
|||
# TODO other types
|
||||
|
||||
|
||||
def gen_db_recurse(dirpath, parent_id=None, is_root=False):
|
||||
def gen_db_recurse(dirpath, parent=None, is_root=False):
|
||||
"""
|
||||
returns a node representing the file/directory at dirpath
|
||||
:param dirpath: absolute path to the item
|
||||
|
@ -90,11 +118,11 @@ def gen_db_recurse(dirpath, parent_id=None, is_root=False):
|
|||
|
||||
children = []
|
||||
|
||||
node = Node(name=os.path.basename(dirpath),
|
||||
node = Node(name=dirpath if is_root else os.path.basename(dirpath),
|
||||
typ=NodeType.ROOT if is_root else get_type(dirpath),
|
||||
children=children,
|
||||
size=0,
|
||||
parent_id=parent_id)
|
||||
parent=parent)
|
||||
|
||||
if node.typ in {NodeType.FILE}: # todo account for link and dir sizes somewhere
|
||||
node.size = os.path.getsize(dirpath)
|
||||
|
@ -104,9 +132,9 @@ def gen_db_recurse(dirpath, parent_id=None, is_root=False):
|
|||
try:
|
||||
flist = os.listdir(dirpath)
|
||||
except PermissionError as e:
|
||||
print(f"Could not access {dirpath}: {e}")
|
||||
logging.info(f"Could not access {dirpath}: {e}")
|
||||
for i in flist: # TODO we could probably parallelize the recursion down different trees?
|
||||
children.append(gen_db_recurse(os.path.join(dirpath, i), parent_id=id(node)))
|
||||
children.append(gen_db_recurse(os.path.join(dirpath, i), parent=node))
|
||||
|
||||
return node
|
||||
|
||||
|
@ -134,13 +162,13 @@ def serialize_db(db):
|
|||
|
||||
This would be serialized as:
|
||||
|
||||
{"name": "root_dir", "typ": 3, "children": [1, 2], "size": 0, "parent_id": null, "id": 0}
|
||||
{"name": "hello.txt", "typ": 2, "children": [], "size": 92863, "parent_id": 0, "id": 1}
|
||||
{"name": "foo", "typ": 1, "children": [3], "size": 0, "parent_id": 0, "id": 2}
|
||||
{"name": "bar.txt", "typ": 2, "children": [], "size": 19459, "parent_id": 2, "id": 3}
|
||||
{"name": "root_dir", "typ": 3, "children": [1, 2], "size": 0, "parent": null, "id": 0}
|
||||
{"name": "hello.txt", "typ": 2, "children": [], "size": 92863, "parent": 0, "id": 1}
|
||||
{"name": "foo", "typ": 1, "children": [3], "size": 0, "parent": 0, "id": 2}
|
||||
{"name": "bar.txt", "typ": 2, "children": [], "size": 19459, "parent": 2, "id": 3}
|
||||
|
||||
Note that:
|
||||
- parent_id is null on the root node
|
||||
- parent is null on the root node
|
||||
- child/parent relationships are by node id
|
||||
- it is possible to append entries to the dump at a later time
|
||||
- removing files directly from the serialized dump is technically possible
|
||||
|
@ -201,8 +229,8 @@ def test_gen_write_db(path):
|
|||
|
||||
# nodecache = {}
|
||||
|
||||
with open("testdb.jsonl", "w") as f:
|
||||
write_db(db, f)
|
||||
# with open("testdb.jsonl", "w") as f:
|
||||
# write_db(db, f)
|
||||
|
||||
# for node in recurse_nodes(db):
|
||||
# print(node.name)
|
||||
|
@ -216,7 +244,7 @@ def load_db(fpath):
|
|||
1) parse all node objects and save them in a cache keyed by the embedded IDs
|
||||
2) for each node in the cache:
|
||||
3) re-establish child pointers
|
||||
4) re-establish parent pointers TODO if we change parents to be pointers too
|
||||
4) re-establish parent pointers
|
||||
|
||||
On my i7-7920HQ CPU @ 3.10GHz, loading a 276M dump with 2.2M lines takes 22s
|
||||
"""
|
||||
|
@ -231,17 +259,17 @@ def load_db(fpath):
|
|||
typ=NodeType(info["typ"]),
|
||||
children=info["children"], # keep as IDs for now
|
||||
size=info["size"],
|
||||
parent_id=info["parent_id"])
|
||||
parent=nodecache[info["parent"]])
|
||||
|
||||
nodecache[info["id"]] = node
|
||||
|
||||
if node.parent_id is None:
|
||||
if node.parent is None:
|
||||
root = node
|
||||
|
||||
for oldid, node in nodecache.items():
|
||||
node.children = [nodecache[child_old_id] for child_old_id in node.children]
|
||||
if node.parent_id is not None:
|
||||
node.parent_id = id(nodecache[node.parent_id]) # this may break on symlinks or other loops?
|
||||
# for oldid, node in nodecache.items():
|
||||
# node.children = [nodecache[child_old_id] for child_old_id in node.children]
|
||||
# if node.parent is not None:
|
||||
# node.parent = nodecache[node.parent] # this may break on symlinks or other loops?
|
||||
|
||||
return root
|
||||
|
||||
|
@ -264,8 +292,8 @@ def test_load_db(fpath):
|
|||
|
||||
|
||||
def main(path):
|
||||
# test_gen_write_db(path)
|
||||
test_load_db(path)
|
||||
test_gen_write_db(path)
|
||||
# test_load_db(path)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@ -278,6 +306,13 @@ TODO:
|
|||
i.e. when dirs cant be scanned due to permission denied we'll see a difference between actual disk usage and our
|
||||
calculation. the difference can be treated as its own "unknown" cell
|
||||
- add some sort of option to prevent scans from crossing mountpoints
|
||||
- multiple roots
|
||||
- list mode:
|
||||
- hide dot files
|
||||
- list subdirs first
|
||||
- link to dir/file by permanent URL
|
||||
- we use id()s now
|
||||
- switch to path, finding a node by following the path through the database should be inexpensive
|
||||
|
||||
App planning:
|
||||
- single page webui
|
|
@ -0,0 +1,13 @@
|
|||
backports.functools-lru-cache==1.5
|
||||
cheroot==6.5.5
|
||||
CherryPy==18.1.1
|
||||
-e git+ssh://git@git.davepedu.com:223/dave/dirview.git@2970c139b9004b0d1231f0a33ab418a7b363fbbf#egg=dirview
|
||||
jaraco.functools==2.0
|
||||
Jinja2==2.10.1
|
||||
MarkupSafe==1.1.1
|
||||
more-itertools==7.0.0
|
||||
portend==2.4
|
||||
pytz==2019.1
|
||||
six==1.12.0
|
||||
tempora==1.14.1
|
||||
zc.lockfile==1.4
|
|
@ -0,0 +1,18 @@
|
|||
#!/usr/bin/env python3
|
||||
from setuptools import setup
|
||||
|
||||
__version__ = "0.0.1"
|
||||
|
||||
setup(name='dirview',
|
||||
version=__version__,
|
||||
description='Storage visualizer',
|
||||
url='http://git.davepedu.com/dave/dirview',
|
||||
author='dpedu',
|
||||
author_email='dave@davepedu.com',
|
||||
packages=['dirview'],
|
||||
entry_points={
|
||||
"console_scripts": [
|
||||
"dirviewd = dirview:main"
|
||||
]
|
||||
},
|
||||
zip_safe=False)
|
|
@ -0,0 +1,60 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>NAS Viewer</title>
|
||||
<style>
|
||||
* {
|
||||
box-sizing: border-box;
|
||||
}
|
||||
div.children > div {
|
||||
padding: 0px 15px;
|
||||
}
|
||||
div.dirs {
|
||||
float: left;
|
||||
width: 50%;
|
||||
}
|
||||
div.files {
|
||||
float: right;
|
||||
width: 50%;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="viewer">
|
||||
<h1>{{ node.path|pathjoin }}</h1>
|
||||
<div>
|
||||
<ul>
|
||||
<li><strong>Controls:</strong> {% if node.parent %}<a href="/?n={{ node.parent|id }}">up</a>{% else %}up{% endif %}</li>
|
||||
<li><strong>Sort by:</strong> name children size up down
|
||||
</ul>
|
||||
|
||||
<ul>
|
||||
<li>Type: {{ node.typ }}</li>
|
||||
<li>Size: {{ node.size|commafy }} B</li>
|
||||
<li>Total Size: {{ node.total_size|commafy }} B</li>
|
||||
<li>Recursive Children: {{ node.total_children|commafy }}</li>
|
||||
<li>Children: {{ node.children|len }}</li>
|
||||
</ul>
|
||||
</div>
|
||||
{% if node.typ in (NodeType.ROOT, NodeType.DIR) %}
|
||||
<div class="children">
|
||||
<div class="dirs">
|
||||
<h2>Subdirs:</h2>
|
||||
{% for child in node.children|sort(attribute='total_children', reverse=True) %}{% if child.typ in NodeGroup.DIRLIKE %}
|
||||
<hr />
|
||||
<a href="/?n={{ child|id }}">{{ child.name }}</a>: {{ child.total_size|commafy }}B - {{ child.total_children|commafy }} children
|
||||
{% endif %}{% endfor %}
|
||||
</div>
|
||||
<div class="files">
|
||||
<h2>Files:</h2>
|
||||
{% for child in node.children|sort(attribute='name') %}{% if child.typ in NodeGroup.FILELIKE %}
|
||||
<hr />
|
||||
<a href="/?n={{ child|id }}">{{ child.name }}</a>: {{ child.total_size|commafy }}B
|
||||
{% endif %}{% endfor %}
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
Loading…
Reference in New Issue