osrsgamestats/scrape.py

#!/usr/bin/env python3

import sys
import requests
import re
from influxdb import InfluxDBClient
from urllib.parse import urlparse
import os
import logging


def get_rs_players(session):
    r = session.get("https://www.runescape.com/player_count.js?varname=iPlayerCount&callback=jQuery3600011173447649542423_1624513024284&_=1624513024285")
    matches = re.compile(r'.+\((\d+)\)').findall(r.text)
    return int(matches[0])


def get_osrs_players(session):
    r = session.get("https://oldschool.runescape.com")
    matches = re.compile(r'There are currently ([\d,]+) people playing').findall(r.text)
    return int(matches[0].replace(",", ""))


def get_influx():
    url_s = os.environ.get("RSSCRAPE_INFLUX_URL")
    if not url_s:
        logging.critical("must set RSSCRAPE_INFLUX_URL")
        sys.exit(1)

    url = urlparse(url_s)

    db_name = url.path[1:]

    influx = InfluxDBClient(url.hostname, url.port, url.username, url.password)
    influx.create_database(db_name)
    influx.switch_database(db_name)

    return influx


def main():
    logging.basicConfig(level=logging.INFO, format="%(asctime)-15s %(levelname)-8s %(filename)s:%(lineno)d %(message)s")
    influx = get_influx()

    s = requests.session()
    s.headers["User-Agent"] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:85.0) Gecko/20100101 Firefox/85.0"
    s.headers["DNT"] = "1"

    all_players = get_rs_players(s)
    osrs_players = get_osrs_players(s)

    rs3_players = all_players - osrs_players

    body = [
        {
            "measurement": "players",
            "tags": {},
            "fields": {
                "rs3": rs3_players,
                "osrs": osrs_players,
                "both_games": all_players,
            }
        },
    ]

    print(body)
    influx.write_points(body)


if __name__ == "__main__":
    main()