You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
287 lines
11 KiB
287 lines
11 KiB
import os |
|
import re |
|
import logging |
|
from contextlib import closing |
|
import mimetypes |
|
from time import time |
|
from threading import Thread |
|
from pysonic.types import KNOWN_MIMES, MUSIC_TYPES, MPX_TYPES, FLAC_TYPES, WAV_TYPES, MUSIC_EXTENSIONS, IMAGE_EXTENSIONS, IMAGE_TYPES |
|
from mutagen.id3 import ID3 |
|
from mutagen import MutagenError |
|
from mutagen.id3._util import ID3NoHeaderError |
|
from mutagen.flac import FLAC |
|
from mutagen.mp3 import MP3 |
|
|
|
|
|
logging = logging.getLogger("scanner") |
|
RE_NUMBERS = re.compile(r'^([0-9]+)') |
|
|
|
|
|
class PysonicFilesystemScanner(object): |
|
def __init__(self, library): |
|
self.library = library |
|
|
|
def init_scan(self): |
|
self.scanner = Thread(target=self.rescan, daemon=True) |
|
self.scanner.start() |
|
|
|
def rescan(self): |
|
""" |
|
Perform a full scan of the media library's files |
|
""" |
|
start = time() |
|
logging.warning("Beginning library rescan") |
|
for parent in self.library.db.get_libraries(): |
|
logging.info("Scanning {}".format(parent["path"])) |
|
self.scan_root(parent["id"], parent["path"]) |
|
logging.warning("Rescan complete in %ss", round(time() - start, 3)) |
|
|
|
def scan_root(self, pid, root): |
|
""" |
|
Scan a single root the library |
|
:param pid: parent ID |
|
:param root: absolute path to scan |
|
""" |
|
logging.warning("Beginning file scan for library %s", pid) |
|
root_depth = len(self.split_path(root)) |
|
for path, dirs, files in os.walk(root): |
|
child = self.split_path(path)[root_depth:] |
|
# dirid = self.create_or_get_dbdir_tree(pid, child) # dumb table for Subsonic |
|
self.scan_dir(pid, root, child, dirs, files) |
|
|
|
logging.warning("Beginning metadata scan for library %s", pid) |
|
self.scan_metadata(pid, root, freshonly=True) |
|
|
|
logging.warning("Finished scan for library %s", pid) |
|
|
|
def create_or_get_dbdir_tree(self, cursor, pid, path): |
|
""" |
|
Return the ID of the directory specified by `path`. The path will be created as necessary. This bullshit exists |
|
only to serve Subsonic, and can easily be lopped off. |
|
:param pid: root parent the path resides in |
|
:param path: single-file tree as a list of dir names under the root parent |
|
:type path list |
|
""" |
|
assert path |
|
# with closing(self.library.db.db.cursor()) as cursor: |
|
parent_id = 0 # 0 indicates a top level item in the library |
|
for name in path: |
|
parent_id = self.create_or_get_dbdir(cursor, pid, parent_id, name) |
|
return parent_id |
|
|
|
def create_or_get_dbdir(self, cursor, pid, parent_id, name): |
|
for row in cursor.execute("SELECT * FROM dirs WHERE library=? and parent=? and name=?", |
|
(pid, parent_id, name, )): |
|
return row['id'] |
|
cursor.execute("INSERT INTO dirs (library, parent, name) VALUES (?, ?, ?)", (pid, parent_id, name)) |
|
return cursor.lastrowid |
|
|
|
def scan_dir(self, pid, root, path, dirs, files): |
|
""" |
|
Scan a single directory in the library. |
|
:param pid: parent id |
|
:param root: library root path |
|
:param path: scan location path, as a list of subdirs within the root |
|
:param dirs: dirs in the current path |
|
:param files: files in the current path |
|
""" |
|
# If there are no files then just bail |
|
if not files: |
|
return |
|
# If it is the library root just bail |
|
if len(path) == 0: |
|
return |
|
|
|
# Guess an artist from the dir |
|
artist = path[0] |
|
|
|
# Guess an album from the dir, if possible |
|
album = None |
|
if len(path) > 1: |
|
album = path[-1] |
|
|
|
with closing(self.library.db.db.cursor()) as cursor: |
|
# Create artist entry |
|
artist_dirid = self.create_or_get_dbdir_tree(cursor, pid, [path[0]]) |
|
cursor.execute("SELECT * FROM artists WHERE dir = ?", (artist_dirid, )) |
|
row = cursor.fetchone() |
|
artist_id = None |
|
if row: |
|
artist_id = row['id'] |
|
else: |
|
cursor.execute("INSERT INTO artists (libraryid, dir, name) VALUES (?, ?, ?)", |
|
(pid, artist_dirid, artist)) |
|
artist_id = cursor.lastrowid |
|
|
|
# Create album entry |
|
album_id = None |
|
album_dirid = self.create_or_get_dbdir_tree(cursor, pid, path) |
|
libpath = os.path.join(*path) |
|
if album: |
|
cursor.execute("SELECT * FROM albums WHERE artistid = ? AND dir = ?", (artist_id, album_dirid, )) |
|
row = cursor.fetchone() |
|
if row: |
|
album_id = row['id'] |
|
else: |
|
cursor.execute("INSERT INTO albums (artistid, dir, name) VALUES (?, ?, ?)", |
|
(artist_id, album_dirid, path[-1])) |
|
album_id = cursor.lastrowid |
|
|
|
new_files = False |
|
for file in files: |
|
if not any([file.endswith(".{}".format(i)) for i in MUSIC_EXTENSIONS]): |
|
continue |
|
fpath = os.path.join(libpath, file) |
|
cursor.execute("SELECT id FROM songs WHERE file=?", (fpath, )) |
|
|
|
if not cursor.fetchall(): |
|
# We leave most fields blank now and return later |
|
cursor.execute("INSERT INTO songs (albumid, file, size, title) " |
|
"VALUES (?, ?, ?, ?)", |
|
(album_id, |
|
fpath, |
|
os.stat(os.path.join(root, fpath)).st_size, |
|
file, )) |
|
new_files = True |
|
|
|
# Create cover entry TODO we can probably skip this if there were no new audio files? |
|
if album_id: |
|
for file in files: |
|
if not any([file.endswith(".{}".format(i)) for i in IMAGE_EXTENSIONS]): |
|
continue |
|
fpath = os.path.join(libpath, file) |
|
cursor.execute("SELECT id FROM covers WHERE path=?", (fpath, )) |
|
if not cursor.fetchall(): |
|
# We leave most fields blank now and return later |
|
cursor.execute("INSERT INTO covers (path) VALUES (?);", (fpath, )) |
|
cursor.execute("UPDATE albums SET coverid=? WHERE id=?", (cursor.lastrowid, album_id)) |
|
break |
|
|
|
if new_files: # Commit after each dir IF audio files were found. no audio == dump the artist |
|
cursor.execute("COMMIT") |
|
|
|
def split_path(self, path): |
|
""" |
|
Given a path like /foo/bar, return ['foo', 'bar'] |
|
""" |
|
parts = [] |
|
head = path |
|
while True: |
|
head, tail = os.path.split(head) |
|
if tail: |
|
parts.append(tail) |
|
else: |
|
break |
|
parts.reverse() |
|
return parts |
|
|
|
def scan_metadata(self, pid, root, freshonly=False): |
|
""" |
|
Iterate through files in the library and update metadata |
|
:param freshonly: only update metadata on files that have never been scanned before |
|
""" |
|
q = "SELECT * FROM songs " |
|
if freshonly: |
|
q += "WHERE lastscan = -1 " |
|
q += "ORDER BY albumid" |
|
|
|
with closing(self.library.db.db.cursor()) as reader, \ |
|
closing(self.library.db.db.cursor()) as writer: |
|
processed = 0 # commit batching counter |
|
for row in reader.execute(q): |
|
# Find meta, bail if the file was unreadable |
|
# TODO file metadata scanning could be done in parallel |
|
meta = self.scan_file_metadata(os.path.join(root, row['file'])) |
|
if not meta: |
|
continue |
|
# Meta may have additional keys that arent in the songs table, omit them |
|
song_attrs = ["title", "lastscan", "format", "length", "bitrate", "track", "year"] |
|
song_meta = {k: v for k, v in meta.items() if k in song_attrs} |
|
|
|
# Update the song row |
|
q = "UPDATE songs SET " |
|
params = [] |
|
for key, value in song_meta.items(): |
|
q += "{}=?, ".format(key) |
|
params.append(value) |
|
q += "lastscan=? WHERE id=?" |
|
params += [int(time()), row["id"]] |
|
writer.execute(q, params) |
|
|
|
# If the metadata has an artist or album name, update the relevant items |
|
if "album" in meta: |
|
writer.execute("UPDATE albums SET name=? WHERE id=?", (meta["album"], row["albumid"])) |
|
if "artist" in meta: |
|
album = writer.execute("SELECT artistid FROM albums WHERE id=?", (row['albumid'], )).fetchone() |
|
writer.execute("UPDATE artists SET name=? WHERE id=?", (meta["artist"], album["artistid"])) |
|
|
|
# Commit every 50 items |
|
processed += 1 |
|
if processed > 50: |
|
writer.execute("COMMIT") |
|
processed = 0 |
|
|
|
if processed != 0: |
|
writer.execute("COMMIT") |
|
|
|
def scan_file_metadata(self, fpath): |
|
""" |
|
Scan the file for metadata. |
|
:param fpath: path to the file to scan |
|
""" |
|
ftype, extra = mimetypes.guess_type(fpath) |
|
|
|
if ftype in MUSIC_TYPES: |
|
return self.scan_mutagen_metadata(fpath, ftype) |
|
|
|
def scan_mutagen_metadata(self, fpath, ftype): |
|
meta = {"format": ftype} |
|
try: |
|
# Open file with mutagen |
|
if ftype in MPX_TYPES: |
|
audio = MP3(fpath) |
|
if audio.info.sketchy: |
|
logging.warning("media reported as sketchy: %s", fpath) |
|
elif ftype in FLAC_TYPES: |
|
audio = FLAC(fpath) |
|
else: |
|
audio = ID3(fpath) |
|
except ID3NoHeaderError: |
|
return |
|
except MutagenError as m: |
|
logging.error("failed to read audio information: %s", m) |
|
return |
|
|
|
try: |
|
meta["length"] = int(audio.info.length) |
|
except (ValueError, AttributeError): |
|
pass |
|
try: |
|
bitrate = int(audio.info.bitrate) |
|
meta["bitrate"] = bitrate |
|
# meta["kbitrate"] = int(bitrate / 1024) |
|
except (ValueError, AttributeError): |
|
pass |
|
try: |
|
meta["track"] = int(RE_NUMBERS.findall(''.join(audio['TRCK'].text))[0]) |
|
except (KeyError, IndexError): |
|
pass |
|
try: |
|
meta["artist"] = ''.join(audio['TPE1'].text) |
|
except KeyError: |
|
pass |
|
try: |
|
meta["album"] = ''.join(audio['TALB'].text) |
|
except KeyError: |
|
pass |
|
try: |
|
meta["title"] = ''.join(audio['TIT2'].text) |
|
except KeyError: |
|
pass |
|
try: |
|
meta["year"] = audio['TDRC'].text[0].year |
|
except (KeyError, IndexError): |
|
pass |
|
logging.info("got all media info from %s", fpath) |
|
|
|
return meta
|
|
|