basic webapp

This commit is contained in:
dave 2019-05-24 19:41:45 -07:00
parent 2970c139b9
commit 9aedf2f53d
5 changed files with 300 additions and 32 deletions

142
dirview/__init__.py Normal file
View File

@ -0,0 +1,142 @@
import os
import sys
# import locale
# locale.setlocale(locale.LC_ALL, 'en_US')
import logging
import cherrypy
from threading import Thread
from jinja2 import Environment, FileSystemLoader, select_autoescape
from dirview.dirtools import gen_db, gen_index, NodeType, NodeGroup
APPROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "../"))
class DbUpdater(Thread):
def __init__(self, root_path, cache_dir):
super().__init__()
self.daemon = True
self.root_path = root_path
self.root = None
self.index = None
def run(self):
logging.info("Updating database...")
self.root = gen_db(self.root_path)
logging.info("Generating index...")
self.index = gen_index(self.root)
logging.info("Warming caches...")
self.root.total_size # calculating these require recursing all nodes
self.root.total_children
logging.info("Database update complete!")
class AppWeb(object):
def __init__(self, database, template_dir):
self.db = database
self.tpl = Environment(loader=FileSystemLoader(template_dir),
autoescape=select_autoescape(['html', 'xml']))
self.tpl.filters.update(id=id,
repr=repr,
len=len,
pathjoin=lambda x: os.path.join(*x),
commafy=lambda x: format(x, ',d'))
def render(self, template, **kwargs):
"""
Render a template
"""
return self.tpl.get_template(template). \
render(**kwargs,
NodeType=NodeType,
NodeGroup=NodeGroup) #, **self.get_default_vars())
@cherrypy.expose
def index(self, n=None):
from time import time
start = time()
if self.db.root is None:
return "I'm still scanning your files, check back soon."
if n is None:
node = self.db.root
else:
try:
node = self.db.index[int(n)]
except KeyError:
raise cherrypy.HTTPError(404)
page = self.render("page.html", node=node)
dur = time() - start
return page + f"\n<!-- render time: {round(dur, 4)} -->"
# yield str(self.db.root)
# yield "Ready<br />"
# from time import time
# start = time()
# num_nodes = len([i for i in self.db.root.iter()])
# dur = time() - start
# yield f"num nodes: {num_nodes} in {round(dur, 3)}"
def main():
import argparse
import signal
parser = argparse.ArgumentParser(description="NAS storage visualizer")
parser.add_argument('-d', '--dir', required=True, help="directory to scan")
parser.add_argument('--cache', help="cache dir")
parser.add_argument('-p', '--port', default=8080, type=int, help="http port to listen on")
parser.add_argument('--debug', action="store_true", help="enable development options")
args = parser.parse_args()
logging.basicConfig(level=logging.INFO if args.debug else logging.WARNING,
format="%(asctime)-15s %(levelname)-8s %(filename)s:%(lineno)d %(message)s")
tpl_dir = os.path.join(APPROOT, "templates") if not args.debug else "templates"
db = DbUpdater(args.dir, args.cache)
db.start()
web = AppWeb(db, tpl_dir)
cherrypy.tree.mount(web, '/',
{'/': {},
'/static': {"tools.staticdir.on": True,
"tools.staticdir.dir": os.path.join(APPROOT, "static")}, # TODO non --debug path
# '/login': {'tools.auth_basic.on': True,
# 'tools.auth_basic.realm': 'webapp',
# 'tools.auth_basic.checkpassword': validate_password}})
})
cherrypy.config.update({
'tools.sessions.on': False,
'request.show_tracebacks': True, #??
'server.show_tracebacks': True, #??
'server.socket_port': args.port,
'server.socket_host': '0.0.0.0',
'server.thread_pool': 5,
'engine.autoreload.on': args.debug,
'log.screen': False, #??
})
def signal_handler(signum, stack):
logging.critical('Got sig {}, exiting...'.format(signum))
cherrypy.engine.exit()
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, signal_handler)
# This is also the maximum nested directory depth supported
sys.setrecursionlimit(1000)
try:
cherrypy.engine.start()
cherrypy.engine.block()
finally:
logging.info("API has shut down")
cherrypy.engine.exit()
if __name__ == '__main__':
main()

View File

@ -13,18 +13,23 @@ from time import time
import json
import resource
import typing
import logging
# import ipdb
class NodeType(Enum):
DIR = auto()
FILE = auto()
ROOT = auto() # behaves like a dir but has special handling in some places
# TODO use these
ROOT = auto() # behaves like a dir but has special handling in some places
LINK = auto()
SPECIAL = auto()
class NodeGroup(object):
DIRLIKE = {NodeType.DIR, NodeType.ROOT}
FILELIKE = {NodeType.FILE, NodeType.LINK, NodeType.SPECIAL}
# this costs about 380 bytes per file/directory
@dataclass
class Node:
@ -32,16 +37,26 @@ class Node:
typ: int
children: list
size: int
parent_id: int
parent: "Node"
total_size_cache: int = None
@property
def total_size(self) -> int:
if self.typ in {NodeType.DIR, NodeType.ROOT}:
sz = 0
for node in self.children:
sz += node.total_size()
return sz
else:
return self.size
if self.total_size_cache is None:
if self.typ in {NodeType.DIR, NodeType.ROOT}:
self.total_size_cache = sum([node.total_size for node in self.children])
else:
self.total_size_cache = self.size
return self.total_size_cache
total_children_cache: int = None
@property
def total_children(self) -> int:
if self.total_children_cache is None:
self.total_children_cache = sum([c.total_children for c in self.children]) + len(self.children)
return self.total_children_cache
def serialize(self) -> tuple:
"""
@ -54,7 +69,7 @@ class Node:
typ=self.typ.value,
children=[id(n) for n in self.children],
size=self.size,
parent_id=self.parent_id,
parent=id(self.parent),
id=id(self))
def iter(self, include_self=True) -> typing.Generator["Node", None, None]:
@ -66,6 +81,19 @@ class Node:
for child in self.children:
yield from child.iter()
@property
def path(self):
parts = [self.name]
while True:
if self.parent is None:
break
parts.insert(0, self.parent.name)
self = self.parent
return parts
def __hash__(self):
return id(self)
# def __str__(self): # TODO
# pass
@ -82,7 +110,7 @@ def get_type(dirpath):
# TODO other types
def gen_db_recurse(dirpath, parent_id=None, is_root=False):
def gen_db_recurse(dirpath, parent=None, is_root=False):
"""
returns a node representing the file/directory at dirpath
:param dirpath: absolute path to the item
@ -90,11 +118,11 @@ def gen_db_recurse(dirpath, parent_id=None, is_root=False):
children = []
node = Node(name=os.path.basename(dirpath),
node = Node(name=dirpath if is_root else os.path.basename(dirpath),
typ=NodeType.ROOT if is_root else get_type(dirpath),
children=children,
size=0,
parent_id=parent_id)
parent=parent)
if node.typ in {NodeType.FILE}: # todo account for link and dir sizes somewhere
node.size = os.path.getsize(dirpath)
@ -104,9 +132,9 @@ def gen_db_recurse(dirpath, parent_id=None, is_root=False):
try:
flist = os.listdir(dirpath)
except PermissionError as e:
print(f"Could not access {dirpath}: {e}")
logging.info(f"Could not access {dirpath}: {e}")
for i in flist: # TODO we could probably parallelize the recursion down different trees?
children.append(gen_db_recurse(os.path.join(dirpath, i), parent_id=id(node)))
children.append(gen_db_recurse(os.path.join(dirpath, i), parent=node))
return node
@ -134,13 +162,13 @@ def serialize_db(db):
This would be serialized as:
{"name": "root_dir", "typ": 3, "children": [1, 2], "size": 0, "parent_id": null, "id": 0}
{"name": "hello.txt", "typ": 2, "children": [], "size": 92863, "parent_id": 0, "id": 1}
{"name": "foo", "typ": 1, "children": [3], "size": 0, "parent_id": 0, "id": 2}
{"name": "bar.txt", "typ": 2, "children": [], "size": 19459, "parent_id": 2, "id": 3}
{"name": "root_dir", "typ": 3, "children": [1, 2], "size": 0, "parent": null, "id": 0}
{"name": "hello.txt", "typ": 2, "children": [], "size": 92863, "parent": 0, "id": 1}
{"name": "foo", "typ": 1, "children": [3], "size": 0, "parent": 0, "id": 2}
{"name": "bar.txt", "typ": 2, "children": [], "size": 19459, "parent": 2, "id": 3}
Note that:
- parent_id is null on the root node
- parent is null on the root node
- child/parent relationships are by node id
- it is possible to append entries to the dump at a later time
- removing files directly from the serialized dump is technically possible
@ -201,8 +229,8 @@ def test_gen_write_db(path):
# nodecache = {}
with open("testdb.jsonl", "w") as f:
write_db(db, f)
# with open("testdb.jsonl", "w") as f:
# write_db(db, f)
# for node in recurse_nodes(db):
# print(node.name)
@ -216,7 +244,7 @@ def load_db(fpath):
1) parse all node objects and save them in a cache keyed by the embedded IDs
2) for each node in the cache:
3) re-establish child pointers
4) re-establish parent pointers TODO if we change parents to be pointers too
4) re-establish parent pointers
On my i7-7920HQ CPU @ 3.10GHz, loading a 276M dump with 2.2M lines takes 22s
"""
@ -231,17 +259,17 @@ def load_db(fpath):
typ=NodeType(info["typ"]),
children=info["children"], # keep as IDs for now
size=info["size"],
parent_id=info["parent_id"])
parent=nodecache[info["parent"]])
nodecache[info["id"]] = node
if node.parent_id is None:
if node.parent is None:
root = node
for oldid, node in nodecache.items():
node.children = [nodecache[child_old_id] for child_old_id in node.children]
if node.parent_id is not None:
node.parent_id = id(nodecache[node.parent_id]) # this may break on symlinks or other loops?
# for oldid, node in nodecache.items():
# node.children = [nodecache[child_old_id] for child_old_id in node.children]
# if node.parent is not None:
# node.parent = nodecache[node.parent] # this may break on symlinks or other loops?
return root
@ -264,8 +292,8 @@ def test_load_db(fpath):
def main(path):
# test_gen_write_db(path)
test_load_db(path)
test_gen_write_db(path)
# test_load_db(path)
if __name__ == '__main__':
@ -278,6 +306,13 @@ TODO:
i.e. when dirs cant be scanned due to permission denied we'll see a difference between actual disk usage and our
calculation. the difference can be treated as its own "unknown" cell
- add some sort of option to prevent scans from crossing mountpoints
- multiple roots
- list mode:
- hide dot files
- list subdirs first
- link to dir/file by permanent URL
- we use id()s now
- switch to path, finding a node by following the path through the database should be inexpensive
App planning:
- single page webui

13
requirements.txt Normal file
View File

@ -0,0 +1,13 @@
backports.functools-lru-cache==1.5
cheroot==6.5.5
CherryPy==18.1.1
-e git+ssh://git@git.davepedu.com:223/dave/dirview.git@2970c139b9004b0d1231f0a33ab418a7b363fbbf#egg=dirview
jaraco.functools==2.0
Jinja2==2.10.1
MarkupSafe==1.1.1
more-itertools==7.0.0
portend==2.4
pytz==2019.1
six==1.12.0
tempora==1.14.1
zc.lockfile==1.4

18
setup.py Normal file
View File

@ -0,0 +1,18 @@
#!/usr/bin/env python3
from setuptools import setup
__version__ = "0.0.1"
setup(name='dirview',
version=__version__,
description='Storage visualizer',
url='http://git.davepedu.com/dave/dirview',
author='dpedu',
author_email='dave@davepedu.com',
packages=['dirview'],
entry_points={
"console_scripts": [
"dirviewd = dirview:main"
]
},
zip_safe=False)

60
templates/page.html Normal file
View File

@ -0,0 +1,60 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>NAS Viewer</title>
<style>
* {
box-sizing: border-box;
}
div.children > div {
padding: 0px 15px;
}
div.dirs {
float: left;
width: 50%;
}
div.files {
float: right;
width: 50%;
}
</style>
</head>
<body>
<div class="viewer">
<h1>{{ node.path|pathjoin }}</h1>
<div>
<ul>
<li><strong>Controls:</strong> {% if node.parent %}<a href="/?n={{ node.parent|id }}">up</a>{% else %}up{% endif %}</li>
<li><strong>Sort by:</strong> name children size up down
</ul>
<ul>
<li>Type: {{ node.typ }}</li>
<li>Size: {{ node.size|commafy }} B</li>
<li>Total Size: {{ node.total_size|commafy }} B</li>
<li>Recursive Children: {{ node.total_children|commafy }}</li>
<li>Children: {{ node.children|len }}</li>
</ul>
</div>
{% if node.typ in (NodeType.ROOT, NodeType.DIR) %}
<div class="children">
<div class="dirs">
<h2>Subdirs:</h2>
{% for child in node.children|sort(attribute='total_children', reverse=True) %}{% if child.typ in NodeGroup.DIRLIKE %}
<hr />
<a href="/?n={{ child|id }}">{{ child.name }}</a>: {{ child.total_size|commafy }}B - {{ child.total_children|commafy }} children
{% endif %}{% endfor %}
</div>
<div class="files">
<h2>Files:</h2>
{% for child in node.children|sort(attribute='name') %}{% if child.typ in NodeGroup.FILELIKE %}
<hr />
<a href="/?n={{ child|id }}">{{ child.name }}</a>: {{ child.total_size|commafy }}B
{% endif %}{% endfor %}
</div>
</div>
{% endif %}
</div>
</body>
</html>