pysonic/pysonic/scanner.py

262 lines
9.7 KiB
Python
Raw Normal View History

2017-08-13 18:56:13 -07:00
import os
2017-08-14 00:15:19 -07:00
import re
2017-08-13 21:13:46 -07:00
import logging
2018-04-02 21:58:48 -07:00
from contextlib import closing
2017-08-13 21:13:46 -07:00
import mimetypes
from time import time
2017-08-13 18:56:13 -07:00
from threading import Thread
2018-04-02 21:58:48 -07:00
from pysonic.types import KNOWN_MIMES, MUSIC_TYPES, MPX_TYPES, FLAC_TYPES, WAV_TYPES, MUSIC_EXTENSIONS, IMAGE_EXTENSIONS, IMAGE_TYPES
2017-08-13 23:54:37 -07:00
from mutagen.id3 import ID3
2017-08-14 00:32:27 -07:00
from mutagen import MutagenError
2017-08-13 23:54:37 -07:00
from mutagen.id3._util import ID3NoHeaderError
2017-08-19 22:03:09 -07:00
from mutagen.flac import FLAC
from mutagen.mp3 import MP3
2017-08-13 18:56:13 -07:00
2017-08-13 21:13:46 -07:00
logging = logging.getLogger("scanner")
2017-08-14 00:15:19 -07:00
RE_NUMBERS = re.compile(r'^([0-9]+)')
2017-08-13 21:13:46 -07:00
2017-08-13 18:56:13 -07:00
class PysonicFilesystemScanner(object):
def __init__(self, library):
self.library = library
def init_scan(self):
self.scanner = Thread(target=self.rescan, daemon=True)
self.scanner.start()
def rescan(self):
2018-04-02 21:58:48 -07:00
"""
Perform a full scan of the media library's files
"""
2017-08-13 21:13:46 -07:00
start = time()
2018-04-02 21:58:48 -07:00
logging.warning("Beginning library rescan")
for parent in self.library.db.get_libraries():
logging.info("Scanning {}".format(parent["path"]))
self.scan_root(parent["id"], parent["path"])
logging.warning("Rescan complete in %ss", round(time() - start, 3))
def scan_root(self, pid, root):
"""
Scan a single root the library
:param pid: parent ID
:param root: absolute path to scan
"""
logging.warning("Beginning file scan for library %s", pid)
root_depth = len(self.split_path(root))
for path, dirs, files in os.walk(root):
child = self.split_path(path)[root_depth:]
self.scan_dir(pid, root, child, dirs, files)
logging.warning("Beginning metadata scan for library %s", pid)
self.scan_metadata(pid, root, freshonly=True)
logging.warning("Finished scan for library %s", pid)
def scan_dir(self, pid, root, path, dirs, files):
"""
Scan a single directory in the library.
:param pid: parent id
:param root: library root path
:param path: scan location path, as a list of subdirs within the root
:param dirs: dirs in the current path
:param files: files in the current path
"""
# If there are no files then just bail
if not files:
return
# If it is the library root just bail
if len(path) == 0:
return
# Guess an artist from the dir
artist = path[0]
# Guess an album from the dir, if possible
album = None
if len(path) > 1:
album = path[-1]
with closing(self.library.db.db.cursor()) as cursor:
# Create artist entry
cursor.execute("SELECT * FROM artists WHERE dir = ?", (artist, ))
row = cursor.fetchone()
if row:
artist_id = row['id']
else:
cursor.execute("INSERT INTO artists (libraryid, dir, name) VALUES (?, ?, ?)",
(pid, artist, artist))
artist_id = cursor.lastrowid
# Create album entry
album_id = None
libpath = os.path.join(*path)
if album:
cursor.execute("SELECT * FROM albums WHERE artistid = ? AND dir = ?", (artist_id, libpath, ))
row = cursor.fetchone()
if row:
album_id = row['id']
else:
cursor.execute("INSERT INTO albums (artistid, dir, name) VALUES (?, ?, ?)",
(artist_id, libpath, path[-1]))
album_id = cursor.lastrowid
new_files = False
for file in files:
if not any([file.endswith(".{}".format(i)) for i in MUSIC_EXTENSIONS]):
continue
fpath = os.path.join(libpath, file)
cursor.execute("SELECT id FROM songs WHERE file=?", (fpath, ))
if not cursor.fetchall():
# We leave most fields blank now and return later
cursor.execute("INSERT INTO songs (albumid, file, size, title) "
"VALUES (?, ?, ?, ?)",
(album_id,
fpath,
os.stat(os.path.join(root, fpath)).st_size,
file, ))
new_files = True
# Create cover entry TODO we can probably skip this if there were no new audio files?
if album_id:
for file in files:
if not any([file.endswith(".{}".format(i)) for i in IMAGE_EXTENSIONS]):
2017-08-13 18:56:13 -07:00
continue
2018-04-02 21:58:48 -07:00
fpath = os.path.join(libpath, file)
cursor.execute("SELECT id FROM covers WHERE path=?", (fpath, ))
if not cursor.fetchall():
# We leave most fields blank now and return later
cursor.execute("INSERT INTO covers (path) VALUES (?);", (fpath, ))
cursor.execute("UPDATE albums SET coverid=? WHERE id=?", (cursor.lastrowid, album_id))
break
if new_files: # Commit after each dir IF audio files were found. no audio == dump the artist
cursor.execute("COMMIT")
def split_path(self, path):
"""
Given a path like /foo/bar, return ['foo', 'bar']
"""
parts = []
head = path
while True:
head, tail = os.path.split(head)
if tail:
parts.append(tail)
else:
break
parts.reverse()
return parts
def scan_metadata(self, pid, root, freshonly=False):
"""
Iterate through files in the library and update metadata
:param freshonly: only update metadata on files that have never been scanned before
"""
q = "SELECT * FROM songs "
if freshonly:
q += "WHERE lastscan = -1 "
q += "ORDER BY albumid"
with closing(self.library.db.db.cursor()) as reader, \
closing(self.library.db.db.cursor()) as writer:
processed = 0 # commit batching counter
for row in reader.execute(q):
# Find meta, bail if the file was unreadable
# TODO file metadata scanning could be done in parallel
meta = self.scan_file_metadata(os.path.join(root, row['file']))
if not meta:
continue
# Meta may have additional keys that arent in the songs table, omit them
song_attrs = ["title", "lastscan", "format", "length", "bitrate", "track", "year"]
song_meta = {k: v for k, v in meta.items() if k in song_attrs}
# Update the song row
q = "UPDATE songs SET "
params = []
for key, value in song_meta.items():
q += "{}=?, ".format(key)
params.append(value)
q += "lastscan=? WHERE id=?"
params += [int(time()), row["id"]]
writer.execute(q, params)
# If the metadata has an artist or album name, update the relevant items
if "album" in meta:
writer.execute("UPDATE albums SET name=? WHERE id=?", (meta["album"], row["albumid"]))
if "artist" in meta:
album = writer.execute("SELECT artistid FROM albums WHERE id=?", (row['albumid'], )).fetchone()
writer.execute("UPDATE artists SET name=? WHERE id=?", (meta["artist"], album["artistid"]))
# Commit every 50 items
processed += 1
if processed > 50:
writer.execute("COMMIT")
processed = 0
if processed != 0:
writer.execute("COMMIT")
def scan_file_metadata(self, fpath):
"""
Scan the file for metadata.
:param fpath: path to the file to scan
"""
ftype, extra = mimetypes.guess_type(fpath)
if ftype in MUSIC_TYPES:
return self.scan_mutagen_metadata(fpath, ftype)
def scan_mutagen_metadata(self, fpath, ftype):
meta = {"format": ftype}
try:
# Open file with mutagen
if ftype in MPX_TYPES:
audio = MP3(fpath)
if audio.info.sketchy:
logging.warning("media reported as sketchy: %s", fpath)
elif ftype in FLAC_TYPES:
audio = FLAC(fpath)
else:
audio = ID3(fpath)
except ID3NoHeaderError:
return
except MutagenError as m:
logging.error("failed to read audio information: %s", m)
return
try:
meta["length"] = int(audio.info.length)
except (ValueError, AttributeError):
pass
try:
bitrate = int(audio.info.bitrate)
meta["bitrate"] = bitrate
# meta["kbitrate"] = int(bitrate / 1024)
except (ValueError, AttributeError):
pass
try:
meta["track"] = int(RE_NUMBERS.findall(''.join(audio['TRCK'].text))[0])
except (KeyError, IndexError):
pass
try:
meta["artist"] = ''.join(audio['TPE1'].text)
except KeyError:
pass
try:
meta["album"] = ''.join(audio['TALB'].text)
except KeyError:
pass
try:
meta["title"] = ''.join(audio['TIT2'].text)
except KeyError:
pass
try:
meta["year"] = audio['TDRC'].text[0].year
except (KeyError, IndexError):
pass
logging.info("got all media info from %s", fpath)
return meta