alternative backend for subsonic music server
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

287 lines
11 KiB

import os
import re
import logging
from contextlib import closing
import mimetypes
from time import time
from threading import Thread
from pysonic.types import KNOWN_MIMES, MUSIC_TYPES, MPX_TYPES, FLAC_TYPES, WAV_TYPES, MUSIC_EXTENSIONS, IMAGE_EXTENSIONS, IMAGE_TYPES
from mutagen.id3 import ID3
from mutagen import MutagenError
from mutagen.id3._util import ID3NoHeaderError
from mutagen.flac import FLAC
from mutagen.mp3 import MP3
logging = logging.getLogger("scanner")
RE_NUMBERS = re.compile(r'^([0-9]+)')
class PysonicFilesystemScanner(object):
def __init__(self, library):
self.library = library
def init_scan(self):
self.scanner = Thread(target=self.rescan, daemon=True)
self.scanner.start()
def rescan(self):
"""
Perform a full scan of the media library's files
"""
start = time()
logging.warning("Beginning library rescan")
for parent in self.library.db.get_libraries():
logging.info("Scanning {}".format(parent["path"]))
self.scan_root(parent["id"], parent["path"])
logging.warning("Rescan complete in %ss", round(time() - start, 3))
def scan_root(self, pid, root):
"""
Scan a single root the library
:param pid: parent ID
:param root: absolute path to scan
"""
logging.warning("Beginning file scan for library %s", pid)
root_depth = len(self.split_path(root))
for path, dirs, files in os.walk(root):
child = self.split_path(path)[root_depth:]
# dirid = self.create_or_get_dbdir_tree(pid, child) # dumb table for Subsonic
self.scan_dir(pid, root, child, dirs, files)
logging.warning("Beginning metadata scan for library %s", pid)
self.scan_metadata(pid, root, freshonly=True)
logging.warning("Finished scan for library %s", pid)
def create_or_get_dbdir_tree(self, cursor, pid, path):
"""
Return the ID of the directory specified by `path`. The path will be created as necessary. This bullshit exists
only to serve Subsonic, and can easily be lopped off.
:param pid: root parent the path resides in
:param path: single-file tree as a list of dir names under the root parent
:type path list
"""
assert path
# with closing(self.library.db.db.cursor()) as cursor:
parent_id = 0 # 0 indicates a top level item in the library
for name in path:
parent_id = self.create_or_get_dbdir(cursor, pid, parent_id, name)
return parent_id
def create_or_get_dbdir(self, cursor, pid, parent_id, name):
for row in cursor.execute("SELECT * FROM dirs WHERE library=? and parent=? and name=?",
(pid, parent_id, name, )):
return row['id']
cursor.execute("INSERT INTO dirs (library, parent, name) VALUES (?, ?, ?)", (pid, parent_id, name))
return cursor.lastrowid
def scan_dir(self, pid, root, path, dirs, files):
"""
Scan a single directory in the library.
:param pid: parent id
:param root: library root path
:param path: scan location path, as a list of subdirs within the root
:param dirs: dirs in the current path
:param files: files in the current path
"""
# If there are no files then just bail
if not files:
return
# If it is the library root just bail
if len(path) == 0:
return
# Guess an artist from the dir
artist = path[0]
# Guess an album from the dir, if possible
album = None
if len(path) > 1:
album = path[-1]
with closing(self.library.db.db.cursor()) as cursor:
# Create artist entry
artist_dirid = self.create_or_get_dbdir_tree(cursor, pid, [path[0]])
cursor.execute("SELECT * FROM artists WHERE dir = ?", (artist_dirid, ))
row = cursor.fetchone()
artist_id = None
if row:
artist_id = row['id']
else:
cursor.execute("INSERT INTO artists (libraryid, dir, name) VALUES (?, ?, ?)",
(pid, artist_dirid, artist))
artist_id = cursor.lastrowid
# Create album entry
album_id = None
album_dirid = self.create_or_get_dbdir_tree(cursor, pid, path)
libpath = os.path.join(*path)
if album:
cursor.execute("SELECT * FROM albums WHERE artistid = ? AND dir = ?", (artist_id, album_dirid, ))
row = cursor.fetchone()
if row:
album_id = row['id']
else:
cursor.execute("INSERT INTO albums (artistid, dir, name) VALUES (?, ?, ?)",
(artist_id, album_dirid, path[-1]))
album_id = cursor.lastrowid
new_files = False
for file in files:
if not any([file.endswith(".{}".format(i)) for i in MUSIC_EXTENSIONS]):
continue
fpath = os.path.join(libpath, file)
cursor.execute("SELECT id FROM songs WHERE file=?", (fpath, ))
if not cursor.fetchall():
# We leave most fields blank now and return later
cursor.execute("INSERT INTO songs (albumid, file, size, title) "
"VALUES (?, ?, ?, ?)",
(album_id,
fpath,
os.stat(os.path.join(root, fpath)).st_size,
file, ))
new_files = True
# Create cover entry TODO we can probably skip this if there were no new audio files?
if album_id:
for file in files:
if not any([file.endswith(".{}".format(i)) for i in IMAGE_EXTENSIONS]):
continue
fpath = os.path.join(libpath, file)
cursor.execute("SELECT id FROM covers WHERE path=?", (fpath, ))
if not cursor.fetchall():
# We leave most fields blank now and return later
cursor.execute("INSERT INTO covers (path) VALUES (?);", (fpath, ))
cursor.execute("UPDATE albums SET coverid=? WHERE id=?", (cursor.lastrowid, album_id))
break
if new_files: # Commit after each dir IF audio files were found. no audio == dump the artist
cursor.execute("COMMIT")
def split_path(self, path):
"""
Given a path like /foo/bar, return ['foo', 'bar']
"""
parts = []
head = path
while True:
head, tail = os.path.split(head)
if tail:
parts.append(tail)
else:
break
parts.reverse()
return parts
def scan_metadata(self, pid, root, freshonly=False):
"""
Iterate through files in the library and update metadata
:param freshonly: only update metadata on files that have never been scanned before
"""
q = "SELECT * FROM songs "
if freshonly:
q += "WHERE lastscan = -1 "
q += "ORDER BY albumid"
with closing(self.library.db.db.cursor()) as reader, \
closing(self.library.db.db.cursor()) as writer:
processed = 0 # commit batching counter
for row in reader.execute(q):
# Find meta, bail if the file was unreadable
# TODO file metadata scanning could be done in parallel
meta = self.scan_file_metadata(os.path.join(root, row['file']))
if not meta:
continue
# Meta may have additional keys that arent in the songs table, omit them
song_attrs = ["title", "lastscan", "format", "length", "bitrate", "track", "year"]
song_meta = {k: v for k, v in meta.items() if k in song_attrs}
# Update the song row
q = "UPDATE songs SET "
params = []
for key, value in song_meta.items():
q += "{}=?, ".format(key)
params.append(value)
q += "lastscan=? WHERE id=?"
params += [int(time()), row["id"]]
writer.execute(q, params)
# If the metadata has an artist or album name, update the relevant items
if "album" in meta:
writer.execute("UPDATE albums SET name=? WHERE id=?", (meta["album"], row["albumid"]))
if "artist" in meta:
album = writer.execute("SELECT artistid FROM albums WHERE id=?", (row['albumid'], )).fetchone()
writer.execute("UPDATE artists SET name=? WHERE id=?", (meta["artist"], album["artistid"]))
# Commit every 50 items
processed += 1
if processed > 50:
writer.execute("COMMIT")
processed = 0
if processed != 0:
writer.execute("COMMIT")
def scan_file_metadata(self, fpath):
"""
Scan the file for metadata.
:param fpath: path to the file to scan
"""
ftype, extra = mimetypes.guess_type(fpath)
if ftype in MUSIC_TYPES:
return self.scan_mutagen_metadata(fpath, ftype)
def scan_mutagen_metadata(self, fpath, ftype):
meta = {"format": ftype}
try:
# Open file with mutagen
if ftype in MPX_TYPES:
audio = MP3(fpath)
if audio.info.sketchy:
logging.warning("media reported as sketchy: %s", fpath)
elif ftype in FLAC_TYPES:
audio = FLAC(fpath)
else:
audio = ID3(fpath)
except ID3NoHeaderError:
return
except MutagenError as m:
logging.error("failed to read audio information: %s", m)
return
try:
meta["length"] = int(audio.info.length)
except (ValueError, AttributeError):
pass
try:
bitrate = int(audio.info.bitrate)
meta["bitrate"] = bitrate
# meta["kbitrate"] = int(bitrate / 1024)
except (ValueError, AttributeError):
pass
try:
meta["track"] = int(RE_NUMBERS.findall(''.join(audio['TRCK'].text))[0])
except (KeyError, IndexError):
pass
try:
meta["artist"] = ''.join(audio['TPE1'].text)
except KeyError:
pass
try:
meta["album"] = ''.join(audio['TALB'].text)
except KeyError:
pass
try:
meta["title"] = ''.join(audio['TIT2'].text)
except KeyError:
pass
try:
meta["year"] = audio['TDRC'].text[0].year
except (KeyError, IndexError):
pass
logging.info("got all media info from %s", fpath)
return meta