some things

podcast downloader features
basic podcast browsing apis
2018-09-21 13:49:30 -07:00 · 2018-04-09 22:01:00 -07:00 · 2018-04-07 16:26:27 -07:00
6 changed files with 446 additions and 86 deletions
--- a/pysonic/api.py
+++ b/pysonic/api.py
@ -275,7 +275,7 @@ class PysonicSubsonicApi(object):
                           playlistRole="true",
                           coverArtRole="false",
                           commentRole="false",
-                           podcastRole="false",
+                           podcastRole="true",
                           streamRole="true",
                           jukeboxRole="false",
                           shareRole="true",
@ -525,3 +525,53 @@ class PysonicSubsonicApi(object):

        self.library.delete_playlist(plinfo["id"])
        return ApiResponse()
+
+    #
+    #
+    #
+    #
+    # Podcast related endpoints
+    @cherrypy.expose
+    @formatresponse
+    def getPodcasts_view(self, id=None, includeEpisodes=True, **kwargs):
+        #TODO implement includeEpisodes properly
+        response = ApiResponse()
+        response.add_child("podcasts")
+
+        for podcast in self.library.get_podcasts():
+            node = response.add_child("channel",
+                                      _parent="podcasts",
+                                      id=podcast["id"],
+                                      title=podcast["title"],
+                                      url=podcast["url"],
+                                      description=podcast["description"],
+                                      # coverArt="pl-1"
+                                      # originalImageUrl="",
+                                      status="completed" # or "downloading"
+                                      )
+            if includeEpisodes:
+                for episode in self.library.db.get_podcast_episodes(podcast_id=podcast['id']):
+                    response.add_child("episode",
+                                       _real_parent=node, # what the actual fuck does this do
+                                       isDir="false",
+                                       title=episode["title"],
+                                       id=episode["id"],
+                                       duration="420",
+                                       description=episode["description"],
+                                       status=episode["status"]
+                                       )
+
+        # publishDate="2018-03-29T01:00:00.000Z"/>
+
+        return response
+
+    @cherrypy.expose
+    @formatresponse
+    def createPodcastChannel_view(self, url, **kwargs):
+        self.library.db.add_postcast(url)
+        return ApiResponse()
+
+    @cherrypy.expose
+    @formatresponse
+    def refreshPodcasts_view(self, **kwargs):
+        return ApiResponse()
--- a/pysonic/database.py
+++ b/pysonic/database.py
@ -4,6 +4,8 @@ from hashlib import sha512
 from time import time
 from contextlib import closing
 from collections import Iterable
+from pysonic.schema import table_quers
+

 logging = logging.getLogger("database")
 keys_in_table = ["title", "album", "artist", "type", "size"]
@ -56,91 +58,13 @@ class PysonicDatabase(object):

    def migrate(self):
        # Create db
-        queries = ["""CREATE TABLE 'libraries' (
-                        'id'        INTEGER PRIMARY KEY AUTOINCREMENT,
-                        'name'      TEXT,
-                        'path'      TEXT UNIQUE);""",
-                   """CREATE TABLE 'dirs' (
-                        'id'        INTEGER PRIMARY KEY AUTOINCREMENT,
-                        'library'   INTEGER,
-                        'parent'    INTEGER,
-                        'name'      TEXT,
-                        UNIQUE(parent, name)
-                        )""",
-                   """CREATE TABLE 'genres' (
-                        'id'        INTEGER PRIMARY KEY AUTOINCREMENT,
-                        'name'      TEXT UNIQUE)""",
-                   """CREATE TABLE 'artists' (
-                        'id'        INTEGER PRIMARY KEY AUTOINCREMENT,
-                        'libraryid' INTEGER,
-                        'dir'       INTEGER UNIQUE,
-                        'name'      TEXT)""",
-                   """CREATE TABLE 'albums' (
-                        'id'        INTEGER PRIMARY KEY AUTOINCREMENT,
-                        'artistid'  INTEGER,
-                        'coverid'   INTEGER,
-                        'dir'       INTEGER,
-                        'name'      TEXT,
-                        'added'     INTEGER NOT NULL DEFAULT -1,
-                        'played'    INTEGER,
-                        'plays'     INTEGER NOT NULL DEFAULT 0,
-                         UNIQUE (artistid, dir));""",
-                   """CREATE TABLE 'songs' (
-                        'id'        INTEGER PRIMARY KEY AUTOINCREMENT,
-                        'library'   INTEGER,
-                        'albumid'   BOOLEAN,
-                        'genre'     INTEGER DEFAULT NULL,
-                        'file'      TEXT UNIQUE,  -- path from the library root
-                        'size'      INTEGER NOT NULL DEFAULT -1,
-                        'title'     TEXT NOT NULL,
-                        'lastscan'  INTEGER NOT NULL DEFAULT -1,
-                        'format'    TEXT,
-                        'length'    INTEGER,
-                        'bitrate'   INTEGER,
-                        'track'     INTEGER,
-                        'year'      INTEGER
-                        )""",
-                   """CREATE TABLE 'covers' (
-                        'id'        INTEGER PRIMARY KEY AUTOINCREMENT,
-                        'library'   INTEGER,
-                        'type'      TEXT,
-                        'size'      TEXT,
-                        'path'      TEXT UNIQUE);""",
-                   """CREATE TABLE 'users' (
-                        'id'        INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
-                        'username'  TEXT UNIQUE NOT NULL,
-                        'password'  TEXT NOT NULL,
-                        'admin'     BOOLEAN DEFAULT 0,
-                        'email'     TEXT)""",
-                   """CREATE TABLE 'stars' (
-                        'userid'    INTEGER,
-                        'songid'    INTEGER,
-                        primary key ('userid', 'songid'))""",
-                   """CREATE TABLE 'playlists' (
-                        'id'        INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
-                        'ownerid'   INTEGER,
-                        'name'      TEXT,
-                        'public'    BOOLEAN,
-                        'created'   INTEGER,
-                        'changed'   INTEGER,
-                        'cover'     INTEGER,
-                        UNIQUE ('ownerid', 'name'))""",
-                   """CREATE TABLE 'playlist_entries' (
-                        'playlistid'    INTEGER,
-                        'songid'        INTEGER,
-                        'order'         FLOAT)""",
-                   """CREATE TABLE 'meta' (
-                        'key' TEXT PRIMARY KEY NOT NULL,
-                        'value' TEXT);""",
-                   """INSERT INTO meta VALUES ('db_version', '1');"""]
-
        with closing(self.db.cursor()) as cursor:
            cursor.execute("SELECT * FROM sqlite_master WHERE type='table' AND name='meta'")
-
            # Initialize DB
            if len(cursor.fetchall()) == 0:
                logging.warning("Initializing database")
-                for query in queries:
+                for query in table_quers:
+                    print(query)
                    cursor.execute(query)
                cursor.execute("COMMIT")
            else:
@ -495,3 +419,76 @@ class PysonicDatabase(object):
            return cursor.execute("SELECT * FROM users WHERE {}=?;".format(column), (user, )).fetchall()[0]
        except IndexError:
            raise NotFoundError("User doesn't exist")
+
+    #
+    # Podcast related
+    @readcursor
+    def get_podcasts(self, cursor):
+        podcasts = []
+        for row in cursor.execute("SELECT * FROM podcasts ORDER BY title ASC"):  #TODO order by newest episode
+            podcasts.append(row)
+        return podcasts
+
+    @readcursor
+    def add_postcast(self, cursor, url, title=None):
+        cursor.execute("INSERT INTO podcasts (title, url) VALUES (?, ?)",
+                       (title if title else url, url, ))
+        cursor.execute("COMMIT")
+
+    @readcursor
+    def get_podcast_episodes(self, cursor, episode_id=None, podcast_id=None, title=None, status=None,
+                             sortby="pe.date", order="desc", limit=None):
+        q = """
+            SELECT
+                pe.*
+            FROM podcast_episodes as pe
+                INNER JOIN podcasts as p
+                    on pe.podcastid == p.id
+            """
+
+        episodes = []
+        params = []
+
+        conditions = []
+        if episode_id:
+            conditions.append("pe.id = ?")
+            params.append(episode_id)
+        if podcast_id:
+            conditions.append("p.id = ?")
+            params.append(podcast_id)
+        if title:
+            conditions.append("pe.title = ?")
+            params.append(title)
+        if status:
+            conditions.append("pe.status = ?")
+            params.append(status)
+        if conditions:
+            q += " WHERE " + " AND ".join(conditions)
+
+        if sortby:
+            q += " ORDER BY {}".format(sortby)
+            if order:
+                q += " {}".format(order)
+        if order:
+            order = {"asc": "ASC", "desc": "DESC"}[order]
+        if limit:
+            q += " LIMIT {}".format(limit)
+
+        cursor.execute(q, params)
+        for row in cursor:
+            episodes.append(row)
+        return episodes
+
+    @readcursor
+    def add_podcast_episode(self, cursor, podcast_id, date, title, description, url, mime):
+        cursor.execute("INSERT INTO podcast_episodes (podcastid, date, title, description, url, format, status) "
+                       "VALUES (?, ?, ?, ?, ?, ?, ?)",
+                       (podcast_id, date, title, description, url, mime, "new", ))
+        cursor.execute("COMMIT")
+        return cursor.lastrowid
+
+    @readcursor
+    def set_podcast_episode_status(self, cursor, episode_id, status):
+        assert status in ["new", "skipped", "downloading", "completed"]
+        cursor.execute("UPDATE podcast_episodes SET status=? WHERE id=?", (status, episode_id, ))
+        cursor.execute("COMMIT")
--- a/pysonic/library.py
+++ b/pysonic/library.py
@ -2,6 +2,7 @@ import os
 import logging
 from pysonic.scanner import PysonicFilesystemScanner
 from pysonic.types import MUSIC_TYPES
+from pysonic.podcast import PodcastManager


 LETTER_GROUPS = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t",
@ -31,12 +32,14 @@ class NoDataException(Exception):
 class PysonicLibrary(object):
    def __init__(self, database):
        self.db = database
+        self.podcastmgr = PodcastManager(database)

        self.get_libraries = self.db.get_libraries
        self.get_artists = self.db.get_artists
        self.get_albums = self.db.get_albums
        # self.get_song = self.db.get_song
        # self.get_cover = self.db.get_cover
+        self.get_podcasts = self.db.get_podcasts

        self.scanner = PysonicFilesystemScanner(self)
        logging.info("library ready")
--- a/pysonic/podcast.py
+++ b/pysonic/podcast.py
@ -0,0 +1,194 @@
+from threading import Thread, Timer
+from concurrent.futures import ThreadPoolExecutor
+from queue import Queue
+import shutil
+import logging
+import os
+import requests
+import feedparser
+import time
+
+
+class PodcastSettings(object):
+    """seconds between updating podcasts"""
+    refresh_interval = 3 #60 * 60
+    """how many seconds to wait after initialization to start refreshing podcasts"""
+    startup_delay = 30
+    """how many podcasts can be scanned at once"""
+    scan_threads = 4
+    """root path of downloaded podcasts"""
+    path = "podcasts"
+    """how many of the most recent episodes to download"""
+    download_episodes = 2
+
+
+class PodcastManager(Thread):
+    def __init__(self, db):
+        super().__init__()
+        self.daemon = True
+
+        self.db = db
+        self.settings = PodcastSettings
+        self.q = Queue()
+        self.start()
+
+    def run(self):
+        """
+        In a loop forever, query for podcasts in need of scanning for new episodes. Wait for a scan being requested (aka
+        a queue item) as the signal to begin scanning.
+        """
+        self.schedule_rescan()
+        while True:
+            self.q.get()
+            self.refresh_podcasts()
+
+    def interval_scan(self):
+        """
+        Schedule the next automated rescan. Request a scan be executed.
+        """
+        self.request_rescan()
+        #self.schedule_rescan()
+
+    def schedule_rescan(self):
+        """
+        Call the next interval scan later
+        """
+        t = Timer(self.settings.refresh_interval, self.interval_scan)
+        t.daemon = True
+        t.start()
+
+    def request_rescan(self):
+        """
+        Add an item to the queue
+        """
+        self.q.put(None)
+
+    def refresh_podcasts(self):
+        """
+        Refresh all the podcasts
+        """
+        logging.info("rescanning podcasts")
+        # If any episodes are marked as "downloading", it's a lie and left over from before the crash
+        # TODO this should happen earlier than the scan
+        for entry in self.db.get_podcast_episodes(status="downloading"):
+            self.db.set_podcast_episode_status(entry['id'], "new")
+
+        futures = []
+        # TODO the TPE doesn't die as a daemon thread :|
+        with ThreadPoolExecutor(max_workers=self.settings.scan_threads) as pool:
+            for item in self.db.get_podcasts():
+                futures.append(pool.submit(self.refresh_podcast, item, ))
+            for item in futures:
+                e = item.exception()
+                if e:
+                    raise e
+        # for item in self.db.get_podcasts():
+        #     self.refresh_podcast(item)
+        logging.info("podcast refresh complete")
+        #TODO all episodes in 'new' status change to 'skipped'
+
+    def refresh_podcast(self, podcast):
+        """
+        Refresh all metadata and episodes of a single podcast
+        """
+        logging.info("updating podcast %s '%s' ", podcast['id'], podcast['title'])
+        feed = self.get_feed(podcast['url'])
+        for entry in feed['entries']:
+            self.refresh_podcast_entry(podcast['id'], entry)
+        self.refresh_podcast_episodes(podcast['id'])
+
+        #TODO update the feed's description
+        # self.udpate_feed_meta(feed['feed'])
+        # 'image': {'href': 'http://sysadministrivia.com/images/1.jpg',
+        #    'link': 'http://sysadministrivia.com/',
+        #    'links': [{'href': 'http://sysadministrivia.com/',
+        #               'rel': 'alternate',
+        #               'type': 'text/html'}],
+        #    'title': 'The Sysadministrivia Podcast',
+        #    'title_detail': {'base': '',
+        #                     'language': 'en',
+        #                     'type': 'text/plain',
+        #                     'value': 'The Sysadministrivia Podcast'}},
+        # 'link': 'http://sysadministrivia.com/',
+        # 'subtitle': 'We podcast all things system administration/engineering/infosec, '
+        #      'with a strong focus on GNU/Linux. We use F/OSS software whenever '
+        #      'possible in the production of these podcasts. Please be sure to '
+        #      'view our show notes on the site!',
+        # 'title': 'The Sysadministrivia Podcast',
+
+    def refresh_podcast_episodes(self, podcast_id):
+        """
+        Check that the most recent X episodes are downloaded. Start downloads if not.
+        """
+        for entry in self.db.get_podcast_episodes(podcast_id=podcast_id, limit=self.settings.download_episodes):
+            if entry["status"] == "new":
+                self.download_episode(entry)
+
+    def download_episode(self, episode):
+        """
+        Download the episode:
+        - mark status as downloading
+        - clean up any tmp files from previous failures
+        - create the dir
+        - stream the url to temp file
+        - rename the temp file to final location
+        - mark episode as downloaded
+        """
+        self.db.set_podcast_episode_status(episode['id'], "downloading")
+
+        ep_dir = os.path.join(self.settings.path, str(episode['podcastid']))
+        ep_path = os.path.join(ep_dir, "{}.mp3".format(episode['id']))
+        ep_tmppath = os.path.join(ep_dir, ".{}.mp3".format(episode['id']))
+
+        os.makedirs(ep_dir, exist_ok=True)
+        if os.path.exists(ep_path):
+            os.unlink(ep_path)  # previous failed downloads
+        if os.path.exists(ep_tmppath):
+            os.unlink(ep_tmppath)  # previous failed downloads
+
+        logging.info("fetching %s", episode['url'])
+        r = requests.get(episode['url'], stream=True)
+        r.raise_for_status()
+        with open(ep_tmppath, 'wb') as f:
+            shutil.copyfileobj(r.raw, f)
+        os.rename(ep_tmppath, ep_path)
+        # TODO verify or update MIME from that of the url
+        self.db.set_podcast_episode_status(episode['id'], "completed")
+
+    def get_feed(self, rss_url):
+        """
+        Download the given URL and return a parsed feed
+        """
+        feed_body = requests.get(rss_url, timeout=30)
+        return feedparser.parse(feed_body.text)
+
+    def refresh_podcast_entry(self, podcast_id, entry):
+        """
+        Update the database for the given podcast entry. Add it to the database if it doesn't exist. Note: we use the
+        episode TITLE as the uniqueness check against the database
+        """
+        existing = self.db.get_podcast_episodes(podcast_id=podcast_id, title=entry['title'])
+        if existing:
+            return
+
+        # find media file url
+        url = None
+        mime = None
+        for link in entry['links']:
+            if link['type'] in ["audio/mpeg", "audio/mp3"]:  # TODO more formats
+                url = link['href']
+                mime = link['type']
+                break
+
+        if not url:
+            logging.warning("could not find url for episode in podcast %s", podcast_id)
+            return
+
+        # create entry
+        ep_id = self.db.add_podcast_episode(podcast_id,
+                                            time.mktime(entry['published_parsed']),
+                                            entry['title'],
+                                            entry['summary'],
+                                            url,
+                                            mime)
+        logging.info("added episode %s '%s'", ep_id, entry['title'])
--- a/pysonic/schema.py
+++ b/pysonic/schema.py
@ -0,0 +1,111 @@
+table_quers = ["""CREATE TABLE 'libraries' (
+                    'id'        INTEGER PRIMARY KEY AUTOINCREMENT,
+                    'name'      TEXT,
+                    'path'      TEXT UNIQUE);""",
+
+               """CREATE TABLE 'dirs' (
+                    'id'        INTEGER PRIMARY KEY AUTOINCREMENT,
+                    'library'   INTEGER,
+                    'parent'    INTEGER,
+                    'name'      TEXT,
+                    UNIQUE(parent, name)
+                    )""",
+
+               """CREATE TABLE 'genres' (
+                    'id'        INTEGER PRIMARY KEY AUTOINCREMENT,
+                    'name'      TEXT UNIQUE)""",
+
+               """CREATE TABLE 'artists' (
+                    'id'        INTEGER PRIMARY KEY AUTOINCREMENT,
+                    'libraryid' INTEGER,
+                    'dir'       INTEGER UNIQUE,
+                    'name'      TEXT)""",
+
+               """CREATE TABLE 'albums' (
+                    'id'        INTEGER PRIMARY KEY AUTOINCREMENT,
+                    'artistid'  INTEGER,
+                    'coverid'   INTEGER,
+                    'dir'       INTEGER,
+                    'name'      TEXT,
+                    'added'     INTEGER NOT NULL DEFAULT -1,
+                    'played'    INTEGER,
+                    'plays'     INTEGER NOT NULL DEFAULT 0,
+                     UNIQUE (artistid, dir));""",
+
+               """CREATE TABLE 'songs' (
+                    'id'        INTEGER PRIMARY KEY AUTOINCREMENT,
+                    'library'   INTEGER,
+                    'albumid'   BOOLEAN,
+                    'genre'     INTEGER DEFAULT NULL,
+                    'file'      TEXT UNIQUE,  -- path from the library root
+                    'size'      INTEGER NOT NULL DEFAULT -1,
+                    'title'     TEXT NOT NULL,
+                    'lastscan'  INTEGER NOT NULL DEFAULT -1,
+                    'format'    TEXT,
+                    'length'    INTEGER,
+                    'bitrate'   INTEGER,
+                    'track'     INTEGER,
+                    'year'      INTEGER
+                    )""",
+
+               """CREATE TABLE 'covers' (
+                    'id'        INTEGER PRIMARY KEY AUTOINCREMENT,
+                    'library'   INTEGER,
+                    'type'      TEXT,
+                    'size'      TEXT,
+                    'path'      TEXT UNIQUE);""",
+
+               """CREATE TABLE 'users' (
+                    'id'        INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
+                    'username'  TEXT UNIQUE NOT NULL,
+                    'password'  TEXT NOT NULL,
+                    'admin'     BOOLEAN DEFAULT 0,
+                    'email'     TEXT)""",
+
+               """CREATE TABLE 'stars' (
+                    'userid'    INTEGER,
+                    'songid'    INTEGER,
+                    primary key ('userid', 'songid'))""",
+
+               """CREATE TABLE 'playlists' (
+                    'id'        INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
+                    'ownerid'   INTEGER,
+                    'name'      TEXT,
+                    'public'    BOOLEAN,
+                    'created'   INTEGER,
+                    'changed'   INTEGER,
+                    'cover'     INTEGER,
+                    UNIQUE ('ownerid', 'name'))""",
+
+               """CREATE TABLE 'playlist_entries' (
+                    'playlistid'    INTEGER,
+                    'songid'        INTEGER,
+                    'order'         FLOAT)""",
+
+               """CREATE TABLE 'podcasts' (
+                    'id'        INTEGER PRIMARY KEY AUTOINCREMENT,
+                    'lastscan'  INTEGER NOT NULL DEFAULT 0,
+                    'interval'  INTEGER NOT NULL DEFAULT 60,
+                    'url'       TEXT UNIQUE,
+                    'title'     TEXT NOT NULL,
+                    'description' TEXT,
+                    'cover'     INTEGER,
+                    'rss_cover' TEXT,
+                    'status'    TEXT)""",
+
+               """CREATE TABLE 'podcast_episodes' (
+                    'id'        INTEGER PRIMARY KEY AUTOINCREMENT,
+                    'podcastid' INTEGER,
+                    'date'      INTEGER,
+                    'title'     TEXT NOT NULL,
+                    'description' TEXT,
+                    'url'       TEXT,
+                    'format'    TEXT,
+                    'status'    TEXT,
+                    UNIQUE('podcastid', 'title'))""",
+
+               """CREATE TABLE 'meta' (
+                    'key' TEXT PRIMARY KEY NOT NULL,
+                    'value' TEXT);""",
+
+               """INSERT INTO meta VALUES ('db_version', '1');"""]
--- a/requirements.txt
+++ b/requirements.txt
@ -1,12 +1,17 @@
+backports.functools-lru-cache==1.5
 beautifulsoup4==4.6.0
-bs4==0.0.1
-cheroot==6.0.0
+certifi==2018.1.18
+chardet==3.0.4
+cheroot==6.2.0
 CherryPy==14.0.1
-lxml==4.2.1
+feedparser==5.2.1
+idna==2.6
+lxml==3.8.0
 more-itertools==4.1.0
-mutagen==1.40.0
+mutagen==1.38
 portend==2.2
-pysonic==0.0.1
 pytz==2018.3
+requests==2.18.4
 six==1.11.0
 tempora==1.11
+urllib3==1.22
Author	SHA1	Message	Date
dave	122addbfa9	some things	2018-09-21 13:49:30 -07:00
dave	30c641fbea	podcast downloader features	2018-04-09 22:01:00 -07:00
dave	c8a9ae89e1	basic podcast browsing apis	2018-04-07 16:26:27 -07:00