osrsgamestats/scrape.py
2021-06-26 10:51:33 -07:00

72 lines
1.8 KiB
Python

#!/usr/bin/env python3
import sys
import requests
import re
from influxdb import InfluxDBClient
from urllib.parse import urlparse
import os
import logging
def get_rs_players(session):
r = session.get("https://www.runescape.com/player_count.js?varname=iPlayerCount&callback=jQuery3600011173447649542423_1624513024284&_=1624513024285")
matches = re.compile(r'.+\((\d+)\)').findall(r.text)
return int(matches[0])
def get_osrs_players(session):
r = session.get("https://oldschool.runescape.com")
matches = re.compile(r'There are currently ([\d,]+) people playing').findall(r.text)
return int(matches[0].replace(",", ""))
def get_influx():
url_s = os.environ.get("RSSCRAPE_INFLUX_URL")
if not url_s:
logging.critical("must set RSSCRAPE_INFLUX_URL")
sys.exit(1)
url = urlparse(url_s)
db_name = url.path[1:]
influx = InfluxDBClient(url.hostname, url.port, url.username, url.password)
influx.create_database(db_name)
influx.switch_database(db_name)
return influx
def main():
logging.basicConfig(level=logging.INFO, format="%(asctime)-15s %(levelname)-8s %(filename)s:%(lineno)d %(message)s")
influx = get_influx()
s = requests.session()
s.headers["User-Agent"] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:85.0) Gecko/20100101 Firefox/85.0"
s.headers["DNT"] = "1"
all_players = get_rs_players(s)
osrs_players = get_osrs_players(s)
rs3_players = all_players - osrs_players
body = [
{
"measurement": "players",
"tags": {},
"fields": {
"rs3": rs3_players,
"osrs": osrs_players,
"both_games": all_players,
}
},
]
print(body)
influx.write_points(body)
if __name__ == "__main__":
main()