pysonic/pysonic/scanner.py

176 lines
8.9 KiB
Python
Raw Normal View History

2017-08-13 18:56:13 -07:00
import os
2017-08-14 00:15:19 -07:00
import re
2017-08-13 21:13:46 -07:00
import logging
import mimetypes
from time import time
2017-08-13 18:56:13 -07:00
from threading import Thread
2017-08-19 22:03:09 -07:00
from pysonic.types import KNOWN_MIMES, MUSIC_TYPES, MPX_TYPES, FLAC_TYPES, WAV_TYPES
2017-08-13 23:54:37 -07:00
from mutagen.id3 import ID3
2017-08-14 00:32:27 -07:00
from mutagen import MutagenError
2017-08-13 23:54:37 -07:00
from mutagen.id3._util import ID3NoHeaderError
2017-08-19 22:03:09 -07:00
from mutagen.flac import FLAC
from mutagen.mp3 import MP3
2017-08-13 18:56:13 -07:00
2017-08-13 21:13:46 -07:00
logging = logging.getLogger("scanner")
2017-08-14 00:15:19 -07:00
RE_NUMBERS = re.compile(r'^([0-9]+)')
2017-08-13 21:13:46 -07:00
2017-08-13 18:56:13 -07:00
class PysonicFilesystemScanner(object):
def __init__(self, library):
self.library = library
def init_scan(self):
self.scanner = Thread(target=self.rescan, daemon=True)
self.scanner.start()
def rescan(self):
# Perform directory scan
2017-08-13 21:13:46 -07:00
logging.warning("Beginning library rescan")
start = time()
2017-08-13 18:56:13 -07:00
for parent in self.library.get_libraries():
2017-08-16 00:05:26 -07:00
meta = parent["metadata"]
2017-08-13 21:13:46 -07:00
logging.info("Scanning {}".format(meta["fspath"]))
2017-08-13 18:56:13 -07:00
def recurse_dir(path, parent):
2017-08-19 22:03:09 -07:00
logging.info("Scanning {}".format(path))
2017-08-13 18:56:13 -07:00
# create or update the database of nodes by comparing sets of names
fs_entries = set(os.listdir(path))
db_entires = self.library.db.getnodes(parent["id"])
db_entires_names = set([i['name'] for i in db_entires])
to_delete = db_entires_names - fs_entries
to_create = fs_entries - db_entires_names
2017-08-19 22:03:09 -07:00
# If any size have changed, mark the file to be rescanned
for entry in db_entires:
finfo = os.stat(os.path.join(path, entry["name"]))
if finfo.st_size != entry["size"]:
logging.info("{} has changed in size, marking for meta rescan".format(entry["id"]))
self.library.db.update_metadata(entry['id'], id3_done=False, size=finfo.st_size)
2017-08-13 18:56:13 -07:00
# Create any nodes not found in the db
for create in to_create:
2017-08-19 22:03:09 -07:00
new_finfo = os.stat(os.path.join(path, create))
new_node = self.library.db.addnode(parent["id"], path, create, size=new_finfo.st_size)
2017-08-13 22:08:40 -07:00
logging.info("Added {}".format(os.path.join(path, create)))
2017-08-13 18:56:13 -07:00
db_entires.append(new_node)
# Delete any db nodes not found on disk
for delete in to_delete:
2017-08-13 21:13:46 -07:00
logging.info("Prune ", delete, "in parent", path)
2017-08-13 18:56:13 -07:00
node = [i for i in db_entires if i["name"] == delete]
if node:
deleted = self.library.db.delnode(node[0]["id"])
2017-08-13 21:13:46 -07:00
logging.info("Pruned {}, deleting total of {}".format(node, deleted))
2017-08-13 18:56:13 -07:00
for entry in db_entires:
if entry["name"] in to_delete:
continue
if int(entry['isdir']): # 1 means dir
recurse_dir(os.path.join(path, entry["name"]), entry)
2017-08-19 22:03:09 -07:00
2017-08-13 18:56:13 -07:00
# Populate all files for this top-level root
recurse_dir(meta["fspath"], parent)
#
#
#
# Add simple metadata
for artist_dir in self.library.db.getnodes(parent["id"]):
artist = artist_dir["name"]
for album_dir in self.library.db.getnodes(artist_dir["id"]):
album = album_dir["name"]
2017-08-16 00:05:26 -07:00
album_meta = album_dir["metadata"]
2017-08-13 18:56:13 -07:00
for track_file in self.library.db.getnodes(album_dir["id"]):
title = track_file["name"]
if not track_file["title"]:
self.library.db.update_metadata(track_file["id"], artist=artist, album=album, title=title)
2017-08-13 21:13:46 -07:00
logging.info("Adding simple metadata for {}/{}/{} #{}".format(artist, album,
title, track_file["id"]))
2017-08-13 18:56:13 -07:00
if not album_dir["album"]:
self.library.db.update_metadata(album_dir["id"], artist=artist, album=album)
2017-08-13 21:13:46 -07:00
logging.info("Adding simple metadata for {}/{} #{}".format(artist, album, album_dir["id"]))
2017-08-13 18:56:13 -07:00
if not artist_dir["artist"]:
self.library.db.update_metadata(artist_dir["id"], artist=artist)
2017-08-13 21:13:46 -07:00
logging.info("Adding simple metadata for {} #{}".format(artist, artist_dir["id"]))
if title in ["cover.jpg", "cover.png"] and 'cover' not in album_meta:
2017-08-13 18:56:13 -07:00
# // add cover art
self.library.db.update_metadata(album_dir["id"], cover=track_file["id"])
2017-08-13 21:13:46 -07:00
logging.info("added cover for {}".format(album_dir['id']))
if track_file["type"] is None:
fpath = self.library.get_filepath(track_file['id'])
ftype, extra = mimetypes.guess_type(fpath)
if ftype in KNOWN_MIMES:
self.library.db.update_metadata(track_file["id"], type=ftype)
logging.info("added type {} for {}".format(ftype, track_file['id']))
else:
logging.warning("Ignoring unreadable file at {}, unknown ftype ({}, {})"
.format(fpath, ftype, extra))
2017-08-13 23:54:37 -07:00
#
#
#
2017-08-19 22:03:09 -07:00
# Add advanced id3 / media info metadata
2017-08-13 23:54:37 -07:00
for artist_dir in self.library.db.getnodes(parent["id"]):
artist = artist_dir["name"]
for album_dir in self.library.db.getnodes(artist_dir["id"]):
album = album_dir["name"]
2017-08-16 00:05:26 -07:00
album_meta = album_dir["metadata"]
2017-08-13 23:54:37 -07:00
for track_file in self.library.db.getnodes(album_dir["id"]):
2017-08-16 00:05:26 -07:00
track_meta = track_file['metadata']
2017-08-13 23:54:37 -07:00
title = track_file["name"]
fpath = self.library.get_filepath(track_file["id"])
2017-08-19 22:03:09 -07:00
if track_meta.get('id3_done', False) or track_file.get("type", None) not in MUSIC_TYPES:
2017-08-13 23:54:37 -07:00
continue
tags = {'id3_done': True}
try:
2017-08-19 22:03:09 -07:00
audio = None
if track_file.get("type", None) in MPX_TYPES:
audio = MP3(fpath)
if audio.info.sketchy:
logging.warning("media reported as sketchy: %s", fpath)
elif track_file.get("type", None) in FLAC_TYPES:
audio = FLAC(fpath)
else:
audio = ID3(fpath)
# print(audio.pprint())
2017-08-13 23:54:37 -07:00
try:
2017-08-19 22:03:09 -07:00
tags["media_length"] = int(audio.info.length)
except (ValueError, AttributeError):
pass
try:
bitrate = int(audio.info.bitrate)
tags["media_bitrate"] = bitrate
tags["media_kbitrate"] = int(bitrate / 1024)
except (ValueError, AttributeError):
pass
try:
tags["track"] = int(RE_NUMBERS.findall(''.join(audio['TRCK'].text))[0])
2017-08-14 00:15:19 -07:00
except (KeyError, IndexError):
2017-08-13 23:54:37 -07:00
pass
try:
2017-08-19 22:03:09 -07:00
tags["id3_artist"] = ''.join(audio['TPE1'].text)
2017-08-13 23:54:37 -07:00
except KeyError:
pass
try:
2017-08-19 22:03:09 -07:00
tags["id3_album"] = ''.join(audio['TALB'].text)
2017-08-13 23:54:37 -07:00
except KeyError:
pass
try:
2017-08-19 22:03:09 -07:00
tags["id3_title"] = ''.join(audio['TIT2'].text)
2017-08-13 23:54:37 -07:00
except KeyError:
pass
try:
2017-08-19 22:03:09 -07:00
tags["id3_year"] = audio['TDRC'].text[0].year
2017-08-13 23:54:37 -07:00
except (KeyError, IndexError):
pass
2017-08-19 22:03:09 -07:00
logging.info("got all media info from %s", fpath)
2017-08-13 23:54:37 -07:00
except ID3NoHeaderError:
pass
2017-08-14 00:32:27 -07:00
except MutagenError as m:
2017-08-19 22:03:09 -07:00
logging.error("failed to read audio information: %s", m)
continue
2017-08-13 23:54:37 -07:00
self.library.db.update_metadata(track_file["id"], **tags)
2017-08-13 21:13:46 -07:00
2017-08-19 22:03:09 -07:00
logging.warning("Library scan complete in {}s".format(round(time() - start, 2)))