Files
osrsgamestats/scrape.py
dave 639283be0d
All checks were successful
Gitea/osrsgamestats/pipeline/head This commit looks good
allow multiple influxdb output urls
2024-10-18 17:10:57 -07:00

94 lines
2.4 KiB
Python

#!/usr/bin/env python3
import sys
import re
import os
import logging
from threading import Thread
from urllib.parse import urlparse
import requests
from influxdb import InfluxDBClient
def get_rs_players(session):
r = session.get("https://www.runescape.com/player_count.js?varname=iPlayerCount&callback=jQuery3600011173447649542423_1624513024284&_=1624513024285")
matches = re.compile(r'.+\((\d+)\)').findall(r.text)
return int(matches[0])
def get_osrs_players(session):
r = session.get("https://oldschool.runescape.com")
matches = re.compile(r'There are currently ([\d,]+) people playing').findall(r.text)
return int(matches[0].replace(",", ""))
def get_influx(influx_urls):
clients = []
for influx_url in influx_urls:
url = urlparse(influx_url)
db_name = url.path[1:]
influx = InfluxDBClient(url.hostname, url.port, url.username, url.password,
timeout=int(os.environ.get("RSSCRAPE_CONNECT_TIMEOUT", 5)))
influx.create_database(db_name)
influx.switch_database(db_name)
clients.append(influx)
return clients
def main():
logging.basicConfig(level=logging.INFO, format="%(asctime)-15s %(levelname)-8s %(filename)s:%(lineno)d %(message)s")
url_s = os.environ.get("RSSCRAPE_INFLUX_URL")
if not url_s:
logging.critical("must set RSSCRAPE_INFLUX_URL")
sys.exit(1)
influx_urls = url_s.split(",")
influxes = get_influx(influx_urls)
s = requests.session()
s.headers["User-Agent"] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:85.0) Gecko/20100101 Firefox/85.0"
s.headers["DNT"] = "1"
all_players = get_rs_players(s)
osrs_players = get_osrs_players(s)
rs3_players = all_players - osrs_players
body = [
{
"measurement": "players",
"tags": {},
"fields": {
"rs3": rs3_players,
"osrs": osrs_players,
"both_games": all_players,
}
},
]
print(body)
parallel_submit(influxes, influx_urls, body)
def parallel_submit(influxes, influx_urls, body):
threads = []
for client in influxes:
t = Thread(target=client.write_points, args=(body, ), daemon=True)
t.start()
threads.append(t)
for i, t in enumerate(threads):
t.join()
print("done:", influx_urls[i])
if __name__ == "__main__":
main()