renames and moves

2019-05-24 19:46:06 -07:00 · 2019-05-24 19:46:06 -07:00 · 62f3729a83
parent 9aedf2f53d
commit 62f3729a83
2 changed files with 42 additions and 41 deletions
--- a/dirview/init.py
+++ b/dirview/init.py
@ -6,7 +6,7 @@ import logging
 import cherrypy
 from threading import Thread
 from jinja2 import Environment, FileSystemLoader, select_autoescape
-from dirview.dirtools import gen_db, gen_index, NodeType, NodeGroup
+from dirview.dirtools import gen_db, gen_node_index, NodeType, NodeGroup


 APPROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "../"))
@ -24,7 +24,7 @@ class DbUpdater(Thread):
        logging.info("Updating database...")
        self.root = gen_db(self.root_path)
        logging.info("Generating index...")
-        self.index = gen_index(self.root)
+        self.index = gen_node_index(self.root)
        logging.info("Warming caches...")
        self.root.total_size  # calculating these require recursing all nodes
        self.root.total_children
--- a/dirview/dirtools.py
+++ b/dirview/dirtools.py
@ -94,7 +94,7 @@ class Node:
    def __hash__(self):
        return id(self)

-    # def __str__(self):  # TODO
+    # def __str__(self):  # TODO, because the default str() shows all the children recursively
    #     pass


@ -177,7 +177,7 @@ def serialize_db(db):
        yield node.serialize()


-def gen_index(db):
+def gen_node_index(db):
    index = {}
    for node in db.iter():
        index[id(node)] = node
@ -189,6 +189,43 @@ def write_db(db, fobj):
        fobj.write(json.dumps(ob) + "\n")


+def load_db(fpath):
+    """
+    Loading the db
+    1) parse all node objects and save them in a cache keyed by the embedded IDs
+    2) for each node in the cache:
+        3) re-establish child/parent pointers
+
+    Note that the cache is discarded and does NOT become the node id cache because it is keyed by the serialized IDs
+
+    On my i7-7920HQ CPU @ 3.10GHz, loading a 276M dump with 2.2M lines takes 22s
+    """
+    nodecache = {}  # mapping of serialized_id->object
+    root = None
+
+    with open(fpath) as f:
+        for line in f.readlines():
+            info = json.loads(line)
+
+            node = Node(name=info["name"],
+                        typ=NodeType(info["typ"]),
+                        children=info["children"],  # keep as IDs for now
+                        size=info["size"],
+                        parent=None)  # this could be 'nodecache[info["parent"]]' but that assumes the dump is in order
+
+            nodecache[info["id"]] = node
+
+            # if node.parent is None:
+            #     root = node
+
+    for oldid, node in nodecache.items():
+        node.children = [nodecache[child_old_id] for child_old_id in node.children]
+        if node.parent is not None:
+            node.parent = nodecache[node.parent]
+
+    return root
+
+
 def test_gen_write_db(path):
    path = os.path.normpath(os.path.abspath(path))

@ -238,42 +275,6 @@ def test_gen_write_db(path):
    # pass


-def load_db(fpath):
-    """
-    Loading the db
-    1) parse all node objects and save them in a cache keyed by the embedded IDs
-    2) for each node in the cache:
-        3) re-establish child pointers
-        4) re-establish parent pointers
-
-    On my i7-7920HQ CPU @ 3.10GHz, loading a 276M dump with 2.2M lines takes 22s
-    """
-    nodecache = {}  # mapping of serialized_id->object
-    root = None
-
-    with open(fpath) as f:
-        for line in f.readlines():
-            info = json.loads(line)
-
-            node = Node(name=info["name"],
-                        typ=NodeType(info["typ"]),
-                        children=info["children"],  # keep as IDs for now
-                        size=info["size"],
-                        parent=nodecache[info["parent"]])
-
-            nodecache[info["id"]] = node
-
-            if node.parent is None:
-                root = node
-
-    # for oldid, node in nodecache.items():
-    #     node.children = [nodecache[child_old_id] for child_old_id in node.children]
-    #     if node.parent is not None:
-    #         node.parent = nodecache[node.parent]  # this may break on symlinks or other loops?
-
-    return root
-
-
 def test_load_db(fpath):
    print("ready")
    start = time()
@ -287,7 +288,7 @@ def test_load_db(fpath):
    print(f"counted {count} nodes in {round(time()-start, 2)}s")

    start = time()
-    index = gen_index(db)
+    index = gen_node_index(db)
    print(f"generated index with {len(index)} nodes in {round(time()-start, 2)}s")