|
|
|
@ -1,10 +1,11 @@
|
|
|
|
|
import os |
|
|
|
|
import re |
|
|
|
|
import logging |
|
|
|
|
from contextlib import closing |
|
|
|
|
import mimetypes |
|
|
|
|
from time import time |
|
|
|
|
from threading import Thread |
|
|
|
|
from pysonic.types import KNOWN_MIMES, MUSIC_TYPES, MPX_TYPES, FLAC_TYPES, WAV_TYPES |
|
|
|
|
from pysonic.types import KNOWN_MIMES, MUSIC_TYPES, MPX_TYPES, FLAC_TYPES, WAV_TYPES, MUSIC_EXTENSIONS, IMAGE_EXTENSIONS, IMAGE_TYPES |
|
|
|
|
from mutagen.id3 import ID3 |
|
|
|
|
from mutagen import MutagenError |
|
|
|
|
from mutagen.id3._util import ID3NoHeaderError |
|
|
|
@ -25,151 +26,236 @@ class PysonicFilesystemScanner(object):
|
|
|
|
|
self.scanner.start() |
|
|
|
|
|
|
|
|
|
def rescan(self): |
|
|
|
|
# Perform directory scan |
|
|
|
|
logging.warning("Beginning library rescan") |
|
|
|
|
""" |
|
|
|
|
Perform a full scan of the media library's files |
|
|
|
|
""" |
|
|
|
|
start = time() |
|
|
|
|
for parent in self.library.get_libraries(): |
|
|
|
|
meta = parent["metadata"] |
|
|
|
|
logging.info("Scanning {}".format(meta["fspath"])) |
|
|
|
|
|
|
|
|
|
def recurse_dir(path, parent): |
|
|
|
|
logging.info("Scanning {}".format(path)) |
|
|
|
|
# create or update the database of nodes by comparing sets of names |
|
|
|
|
fs_entries = set(os.listdir(path)) |
|
|
|
|
db_entires = self.library.db.getnodes(parent["id"]) |
|
|
|
|
db_entires_names = set([i['name'] for i in db_entires]) |
|
|
|
|
to_delete = db_entires_names - fs_entries |
|
|
|
|
to_create = fs_entries - db_entires_names |
|
|
|
|
|
|
|
|
|
# If any size have changed, mark the file to be rescanned |
|
|
|
|
for entry in db_entires: |
|
|
|
|
finfo = os.stat(os.path.join(path, entry["name"])) |
|
|
|
|
if finfo.st_size != entry["size"]: |
|
|
|
|
logging.info("{} has changed in size, marking for meta rescan".format(entry["id"])) |
|
|
|
|
self.library.db.update_metadata(entry['id'], id3_done=False, size=finfo.st_size) |
|
|
|
|
|
|
|
|
|
# Create any nodes not found in the db |
|
|
|
|
for create in to_create: |
|
|
|
|
new_finfo = os.stat(os.path.join(path, create)) |
|
|
|
|
new_node = self.library.db.addnode(parent["id"], path, create, size=new_finfo.st_size) |
|
|
|
|
logging.info("Added {}".format(os.path.join(path, create))) |
|
|
|
|
db_entires.append(new_node) |
|
|
|
|
|
|
|
|
|
# Delete any db nodes not found on disk |
|
|
|
|
for delete in to_delete: |
|
|
|
|
logging.info("Prune ", delete, "in parent", path) |
|
|
|
|
node = [i for i in db_entires if i["name"] == delete] |
|
|
|
|
if node: |
|
|
|
|
deleted = self.library.db.delnode(node[0]["id"]) |
|
|
|
|
logging.info("Pruned {}, deleting total of {}".format(node, deleted)) |
|
|
|
|
|
|
|
|
|
for entry in db_entires: |
|
|
|
|
if entry["name"] in to_delete: |
|
|
|
|
logging.warning("Beginning library rescan") |
|
|
|
|
for parent in self.library.db.get_libraries(): |
|
|
|
|
logging.info("Scanning {}".format(parent["path"])) |
|
|
|
|
self.scan_root(parent["id"], parent["path"]) |
|
|
|
|
logging.warning("Rescan complete in %ss", round(time() - start, 3)) |
|
|
|
|
|
|
|
|
|
def scan_root(self, pid, root): |
|
|
|
|
""" |
|
|
|
|
Scan a single root the library |
|
|
|
|
:param pid: parent ID |
|
|
|
|
:param root: absolute path to scan |
|
|
|
|
""" |
|
|
|
|
logging.warning("Beginning file scan for library %s", pid) |
|
|
|
|
root_depth = len(self.split_path(root)) |
|
|
|
|
for path, dirs, files in os.walk(root): |
|
|
|
|
child = self.split_path(path)[root_depth:] |
|
|
|
|
self.scan_dir(pid, root, child, dirs, files) |
|
|
|
|
|
|
|
|
|
logging.warning("Beginning metadata scan for library %s", pid) |
|
|
|
|
self.scan_metadata(pid, root, freshonly=True) |
|
|
|
|
|
|
|
|
|
logging.warning("Finished scan for library %s", pid) |
|
|
|
|
|
|
|
|
|
def scan_dir(self, pid, root, path, dirs, files): |
|
|
|
|
""" |
|
|
|
|
Scan a single directory in the library. |
|
|
|
|
:param pid: parent id |
|
|
|
|
:param root: library root path |
|
|
|
|
:param path: scan location path, as a list of subdirs within the root |
|
|
|
|
:param dirs: dirs in the current path |
|
|
|
|
:param files: files in the current path |
|
|
|
|
""" |
|
|
|
|
# If there are no files then just bail |
|
|
|
|
if not files: |
|
|
|
|
return |
|
|
|
|
# If it is the library root just bail |
|
|
|
|
if len(path) == 0: |
|
|
|
|
return |
|
|
|
|
|
|
|
|
|
# Guess an artist from the dir |
|
|
|
|
artist = path[0] |
|
|
|
|
|
|
|
|
|
# Guess an album from the dir, if possible |
|
|
|
|
album = None |
|
|
|
|
if len(path) > 1: |
|
|
|
|
album = path[-1] |
|
|
|
|
|
|
|
|
|
with closing(self.library.db.db.cursor()) as cursor: |
|
|
|
|
# Create artist entry |
|
|
|
|
cursor.execute("SELECT * FROM artists WHERE dir = ?", (artist, )) |
|
|
|
|
row = cursor.fetchone() |
|
|
|
|
if row: |
|
|
|
|
artist_id = row['id'] |
|
|
|
|
else: |
|
|
|
|
cursor.execute("INSERT INTO artists (libraryid, dir, name) VALUES (?, ?, ?)", |
|
|
|
|
(pid, artist, artist)) |
|
|
|
|
artist_id = cursor.lastrowid |
|
|
|
|
|
|
|
|
|
# Create album entry |
|
|
|
|
album_id = None |
|
|
|
|
libpath = os.path.join(*path) |
|
|
|
|
if album: |
|
|
|
|
cursor.execute("SELECT * FROM albums WHERE artistid = ? AND dir = ?", (artist_id, libpath, )) |
|
|
|
|
row = cursor.fetchone() |
|
|
|
|
if row: |
|
|
|
|
album_id = row['id'] |
|
|
|
|
else: |
|
|
|
|
cursor.execute("INSERT INTO albums (artistid, dir, name) VALUES (?, ?, ?)", |
|
|
|
|
(artist_id, libpath, path[-1])) |
|
|
|
|
album_id = cursor.lastrowid |
|
|
|
|
|
|
|
|
|
new_files = False |
|
|
|
|
for file in files: |
|
|
|
|
if not any([file.endswith(".{}".format(i)) for i in MUSIC_EXTENSIONS]): |
|
|
|
|
continue |
|
|
|
|
fpath = os.path.join(libpath, file) |
|
|
|
|
cursor.execute("SELECT id FROM songs WHERE file=?", (fpath, )) |
|
|
|
|
|
|
|
|
|
if not cursor.fetchall(): |
|
|
|
|
# We leave most fields blank now and return later |
|
|
|
|
cursor.execute("INSERT INTO songs (albumid, file, size, title) " |
|
|
|
|
"VALUES (?, ?, ?, ?)", |
|
|
|
|
(album_id, |
|
|
|
|
fpath, |
|
|
|
|
os.stat(os.path.join(root, fpath)).st_size, |
|
|
|
|
file, )) |
|
|
|
|
new_files = True |
|
|
|
|
|
|
|
|
|
# Create cover entry TODO we can probably skip this if there were no new audio files? |
|
|
|
|
if album_id: |
|
|
|
|
for file in files: |
|
|
|
|
if not any([file.endswith(".{}".format(i)) for i in IMAGE_EXTENSIONS]): |
|
|
|
|
continue |
|
|
|
|
if int(entry['isdir']): # 1 means dir |
|
|
|
|
recurse_dir(os.path.join(path, entry["name"]), entry) |
|
|
|
|
|
|
|
|
|
# Populate all files for this top-level root |
|
|
|
|
recurse_dir(meta["fspath"], parent) |
|
|
|
|
# |
|
|
|
|
# |
|
|
|
|
# |
|
|
|
|
# Add simple metadata |
|
|
|
|
for artist_dir in self.library.db.getnodes(parent["id"]): |
|
|
|
|
artist = artist_dir["name"] |
|
|
|
|
for album_dir in self.library.db.getnodes(artist_dir["id"]): |
|
|
|
|
album = album_dir["name"] |
|
|
|
|
album_meta = album_dir["metadata"] |
|
|
|
|
for track_file in self.library.db.getnodes(album_dir["id"]): |
|
|
|
|
title = track_file["name"] |
|
|
|
|
if not track_file["title"]: |
|
|
|
|
self.library.db.update_metadata(track_file["id"], artist=artist, album=album, title=title) |
|
|
|
|
logging.info("Adding simple metadata for {}/{}/{} #{}".format(artist, album, |
|
|
|
|
title, track_file["id"])) |
|
|
|
|
if not album_dir["album"]: |
|
|
|
|
self.library.db.update_metadata(album_dir["id"], artist=artist, album=album) |
|
|
|
|
logging.info("Adding simple metadata for {}/{} #{}".format(artist, album, album_dir["id"])) |
|
|
|
|
if not artist_dir["artist"]: |
|
|
|
|
self.library.db.update_metadata(artist_dir["id"], artist=artist) |
|
|
|
|
logging.info("Adding simple metadata for {} #{}".format(artist, artist_dir["id"])) |
|
|
|
|
if title in ["cover.jpg", "cover.png"] and 'cover' not in album_meta: |
|
|
|
|
# // add cover art |
|
|
|
|
self.library.db.update_metadata(album_dir["id"], cover=track_file["id"]) |
|
|
|
|
logging.info("added cover for {}".format(album_dir['id'])) |
|
|
|
|
|
|
|
|
|
if track_file["type"] is None: |
|
|
|
|
fpath = self.library.get_filepath(track_file['id']) |
|
|
|
|
ftype, extra = mimetypes.guess_type(fpath) |
|
|
|
|
|
|
|
|
|
if ftype in KNOWN_MIMES: |
|
|
|
|
self.library.db.update_metadata(track_file["id"], type=ftype) |
|
|
|
|
logging.info("added type {} for {}".format(ftype, track_file['id'])) |
|
|
|
|
else: |
|
|
|
|
logging.warning("Ignoring unreadable file at {}, unknown ftype ({}, {})" |
|
|
|
|
.format(fpath, ftype, extra)) |
|
|
|
|
# |
|
|
|
|
# |
|
|
|
|
# |
|
|
|
|
# Add advanced id3 / media info metadata |
|
|
|
|
for artist_dir in self.library.db.getnodes(parent["id"]): |
|
|
|
|
artist = artist_dir["name"] |
|
|
|
|
for album_dir in self.library.db.getnodes(artist_dir["id"]): |
|
|
|
|
album = album_dir["name"] |
|
|
|
|
album_meta = album_dir["metadata"] |
|
|
|
|
for track_file in self.library.db.getnodes(album_dir["id"]): |
|
|
|
|
track_meta = track_file['metadata'] |
|
|
|
|
title = track_file["name"] |
|
|
|
|
fpath = self.library.get_filepath(track_file["id"]) |
|
|
|
|
if track_meta.get('id3_done', False) or track_file.get("type", None) not in MUSIC_TYPES: |
|
|
|
|
continue |
|
|
|
|
tags = {'id3_done': True} |
|
|
|
|
try: |
|
|
|
|
audio = None |
|
|
|
|
if track_file.get("type", None) in MPX_TYPES: |
|
|
|
|
audio = MP3(fpath) |
|
|
|
|
if audio.info.sketchy: |
|
|
|
|
logging.warning("media reported as sketchy: %s", fpath) |
|
|
|
|
elif track_file.get("type", None) in FLAC_TYPES: |
|
|
|
|
audio = FLAC(fpath) |
|
|
|
|
else: |
|
|
|
|
audio = ID3(fpath) |
|
|
|
|
# print(audio.pprint()) |
|
|
|
|
try: |
|
|
|
|
tags["media_length"] = int(audio.info.length) |
|
|
|
|
except (ValueError, AttributeError): |
|
|
|
|
pass |
|
|
|
|
try: |
|
|
|
|
bitrate = int(audio.info.bitrate) |
|
|
|
|
tags["media_bitrate"] = bitrate |
|
|
|
|
tags["media_kbitrate"] = int(bitrate / 1024) |
|
|
|
|
except (ValueError, AttributeError): |
|
|
|
|
pass |
|
|
|
|
try: |
|
|
|
|
tags["track"] = int(RE_NUMBERS.findall(''.join(audio['TRCK'].text))[0]) |
|
|
|
|
except (KeyError, IndexError): |
|
|
|
|
pass |
|
|
|
|
try: |
|
|
|
|
tags["id3_artist"] = ''.join(audio['TPE1'].text) |
|
|
|
|
except KeyError: |
|
|
|
|
pass |
|
|
|
|
try: |
|
|
|
|
tags["id3_album"] = ''.join(audio['TALB'].text) |
|
|
|
|
except KeyError: |
|
|
|
|
pass |
|
|
|
|
try: |
|
|
|
|
tags["id3_title"] = ''.join(audio['TIT2'].text) |
|
|
|
|
except KeyError: |
|
|
|
|
pass |
|
|
|
|
try: |
|
|
|
|
tags["id3_year"] = audio['TDRC'].text[0].year |
|
|
|
|
except (KeyError, IndexError): |
|
|
|
|
pass |
|
|
|
|
logging.info("got all media info from %s", fpath) |
|
|
|
|
except ID3NoHeaderError: |
|
|
|
|
pass |
|
|
|
|
except MutagenError as m: |
|
|
|
|
logging.error("failed to read audio information: %s", m) |
|
|
|
|
continue |
|
|
|
|
self.library.db.update_metadata(track_file["id"], **tags) |
|
|
|
|
|
|
|
|
|
logging.warning("Library scan complete in {}s".format(round(time() - start, 2))) |
|
|
|
|
fpath = os.path.join(libpath, file) |
|
|
|
|
cursor.execute("SELECT id FROM covers WHERE path=?", (fpath, )) |
|
|
|
|
if not cursor.fetchall(): |
|
|
|
|
# We leave most fields blank now and return later |
|
|
|
|
cursor.execute("INSERT INTO covers (path) VALUES (?);", (fpath, )) |
|
|
|
|
cursor.execute("UPDATE albums SET coverid=? WHERE id=?", (cursor.lastrowid, album_id)) |
|
|
|
|
break |
|
|
|
|
|
|
|
|
|
if new_files: # Commit after each dir IF audio files were found. no audio == dump the artist |
|
|
|
|
cursor.execute("COMMIT") |
|
|
|
|
|
|
|
|
|
def split_path(self, path): |
|
|
|
|
""" |
|
|
|
|
Given a path like /foo/bar, return ['foo', 'bar'] |
|
|
|
|
""" |
|
|
|
|
parts = [] |
|
|
|
|
head = path |
|
|
|
|
while True: |
|
|
|
|
head, tail = os.path.split(head) |
|
|
|
|
if tail: |
|
|
|
|
parts.append(tail) |
|
|
|
|
else: |
|
|
|
|
break |
|
|
|
|
parts.reverse() |
|
|
|
|
return parts |
|
|
|
|
|
|
|
|
|
def scan_metadata(self, pid, root, freshonly=False): |
|
|
|
|
""" |
|
|
|
|
Iterate through files in the library and update metadata |
|
|
|
|
:param freshonly: only update metadata on files that have never been scanned before |
|
|
|
|
""" |
|
|
|
|
q = "SELECT * FROM songs " |
|
|
|
|
if freshonly: |
|
|
|
|
q += "WHERE lastscan = -1 " |
|
|
|
|
q += "ORDER BY albumid" |
|
|
|
|
|
|
|
|
|
with closing(self.library.db.db.cursor()) as reader, \ |
|
|
|
|
closing(self.library.db.db.cursor()) as writer: |
|
|
|
|
processed = 0 # commit batching counter |
|
|
|
|
for row in reader.execute(q): |
|
|
|
|
# Find meta, bail if the file was unreadable |
|
|
|
|
# TODO file metadata scanning could be done in parallel |
|
|
|
|
meta = self.scan_file_metadata(os.path.join(root, row['file'])) |
|
|
|
|
if not meta: |
|
|
|
|
continue |
|
|
|
|
# Meta may have additional keys that arent in the songs table, omit them |
|
|
|
|
song_attrs = ["title", "lastscan", "format", "length", "bitrate", "track", "year"] |
|
|
|
|
song_meta = {k: v for k, v in meta.items() if k in song_attrs} |
|
|
|
|
|
|
|
|
|
# Update the song row |
|
|
|
|
q = "UPDATE songs SET " |
|
|
|
|
params = [] |
|
|
|
|
for key, value in song_meta.items(): |
|
|
|
|
q += "{}=?, ".format(key) |
|
|
|
|
params.append(value) |
|
|
|
|
q += "lastscan=? WHERE id=?" |
|
|
|
|
params += [int(time()), row["id"]] |
|
|
|
|
writer.execute(q, params) |
|
|
|
|
|
|
|
|
|
# If the metadata has an artist or album name, update the relevant items |
|
|
|
|
if "album" in meta: |
|
|
|
|
writer.execute("UPDATE albums SET name=? WHERE id=?", (meta["album"], row["albumid"])) |
|
|
|
|
if "artist" in meta: |
|
|
|
|
album = writer.execute("SELECT artistid FROM albums WHERE id=?", (row['albumid'], )).fetchone() |
|
|
|
|
writer.execute("UPDATE artists SET name=? WHERE id=?", (meta["artist"], album["artistid"])) |
|
|
|
|
|
|
|
|
|
# Commit every 50 items |
|
|
|
|
processed += 1 |
|
|
|
|
if processed > 50: |
|
|
|
|
writer.execute("COMMIT") |
|
|
|
|
processed = 0 |
|
|
|
|
|
|
|
|
|
if processed != 0: |
|
|
|
|
writer.execute("COMMIT") |
|
|
|
|
|
|
|
|
|
def scan_file_metadata(self, fpath): |
|
|
|
|
""" |
|
|
|
|
Scan the file for metadata. |
|
|
|
|
:param fpath: path to the file to scan |
|
|
|
|
""" |
|
|
|
|
ftype, extra = mimetypes.guess_type(fpath) |
|
|
|
|
|
|
|
|
|
if ftype in MUSIC_TYPES: |
|
|
|
|
return self.scan_mutagen_metadata(fpath, ftype) |
|
|
|
|
|
|
|
|
|
def scan_mutagen_metadata(self, fpath, ftype): |
|
|
|
|
meta = {"format": ftype} |
|
|
|
|
try: |
|
|
|
|
# Open file with mutagen |
|
|
|
|
if ftype in MPX_TYPES: |
|
|
|
|
audio = MP3(fpath) |
|
|
|
|
if audio.info.sketchy: |
|
|
|
|
logging.warning("media reported as sketchy: %s", fpath) |
|
|
|
|
elif ftype in FLAC_TYPES: |
|
|
|
|
audio = FLAC(fpath) |
|
|
|
|
else: |
|
|
|
|
audio = ID3(fpath) |
|
|
|
|
except ID3NoHeaderError: |
|
|
|
|
return |
|
|
|
|
except MutagenError as m: |
|
|
|
|
logging.error("failed to read audio information: %s", m) |
|
|
|
|
return |
|
|
|
|
|
|
|
|
|
try: |
|
|
|
|
meta["length"] = int(audio.info.length) |
|
|
|
|
except (ValueError, AttributeError): |
|
|
|
|
pass |
|
|
|
|
try: |
|
|
|
|
bitrate = int(audio.info.bitrate) |
|
|
|
|
meta["bitrate"] = bitrate |
|
|
|
|
# meta["kbitrate"] = int(bitrate / 1024) |
|
|
|
|
except (ValueError, AttributeError): |
|
|
|
|
pass |
|
|
|
|
try: |
|
|
|
|
meta["track"] = int(RE_NUMBERS.findall(''.join(audio['TRCK'].text))[0]) |
|
|
|
|
except (KeyError, IndexError): |
|
|
|
|
pass |
|
|
|
|
try: |
|
|
|
|
meta["artist"] = ''.join(audio['TPE1'].text) |
|
|
|
|
except KeyError: |
|
|
|
|
pass |
|
|
|
|
try: |
|
|
|
|
meta["album"] = ''.join(audio['TALB'].text) |
|
|
|
|
except KeyError: |
|
|
|
|
pass |
|
|
|
|
try: |
|
|
|
|
meta["title"] = ''.join(audio['TIT2'].text) |
|
|
|
|
except KeyError: |
|
|
|
|
pass |
|
|
|
|
try: |
|
|
|
|
meta["year"] = audio['TDRC'].text[0].year |
|
|
|
|
except (KeyError, IndexError): |
|
|
|
|
pass |
|
|
|
|
logging.info("got all media info from %s", fpath) |
|
|
|
|
|
|
|
|
|
return meta |
|
|
|
|