osrsscrape/scrape.py

168 lines
4.4 KiB
Python
Raw Normal View History

2021-06-23 20:20:25 -07:00
#!/usr/bin/env python3
import logging
from influxdb import InfluxDBClient
import requests
import os
from random import randint
from time import sleep
import sys
import csv
import traceback
from collections import namedtuple
timeout = int(os.environ.get("RSSCRAPE_TIMEOUT", 60))
def get_influx():
influx_host = os.environ.get("INFLUX_HOST")
influx_port = os.environ.get("INFLUX_PORT")
influx_db = os.environ.get("INFLUX_DB")
if not all([influx_host, influx_port, influx_db]):
logging.critical("must set INFLUX_HOST, INFLUX_PORT and INFLUX_DB")
sys.exit(1)
influx = InfluxDBClient(influx_host, influx_port) # user, password
influx.create_database(influx_db)
influx.switch_database(influx_db)
return influx
# order of the skills in the stats data
STATS = ["Overall",
"Attack",
"Defence",
"Strength",
"Hitpoints",
"Ranged",
"Prayer",
"Magic",
"Cooking",
"Woodcutting",
"Fletching",
"Fishing",
"Firemaking",
"Crafting",
"Smithing",
"Mining",
"Herblore",
"Agility",
"Thieving",
"Slayer",
"Farming",
"Runecraft",
"Hunter",
"Construction"]
LEVELS = [0, 83, 174, 276, 388, 512, 650, 801, 969, 1154,
1358, 1584, 1833, 2107, 2411, 2746, 3115, 3523, 3973, 4470,
5018, 5624, 6291, 7028, 7842, 8740, 9730, 10824, 12031, 13363,
14833, 16456, 18247, 20224, 22406, 24815, 27473, 30408, 33648, 37224,
41171, 45529, 50339, 55649, 61512, 67983, 75127, 83014, 91721, 101333,
111945, 123660, 136594, 150872, 166636, 184040, 203254, 224466, 247886, 273742,
302288, 333804, 368599, 407015, 449428, 496254, 547953, 605032, 668051, 737627,
814445, 899257, 992895, 1096278, 1210421, 1336443, 1475581, 1629200, 1798808, 1986068,
2192818, 2421087, 2673114, 2951373, 3258594, 3597792, 3972294, 4385776, 4842295, 5346332,
5902831, 6517253, 7195629, 7944614, 8771558, 9684577, 10692629, 11805606, 13034431]
StatData = namedtuple("StatData", "skill rank level xp xp_next")
def get_player(name):
data = None
for trynum in range(0, 5):
sleep(randint(5, 30) * trynum)
print(f"try #{trynum}")
try:
r = requests.get("https://secure.runescape.com/m=hiscore_oldschool/a=13/index_lite.ws",
params={"player": name},
timeout=timeout)
if r.status_code != 200:
print(f"retrying, status code = {r.status_code}")
continue
except:
print(traceback.format_exc())
continue
_data = r.text
print(_data)
if len(_data) == 0 or _data[0] not in "-1234567890":
print(f"retrying, body must start with a number")
continue
data = _data
break
if data is None:
return None
# with open("sample.txt") as f:
# data = f.read()
reader = csv.reader(data.splitlines(), delimiter=',')
rows = list(reader)
ret = []
for i, row in enumerate(rows): # rank, level, xp
if i >= len(STATS):
break
level = int(row[1])
xp = int(row[2])
xp_next = -1
if level < len(LEVELS):
xp_next = LEVELS[level] - xp
ret.append(StatData(STATS[i].lower(), int(row[0]), level, xp, xp_next))
return ret
def main():
logging.basicConfig(level=logging.WARNING,
format="%(asctime)-15s %(levelname)-8s %(filename)s:%(lineno)d %(message)s")
influx = get_influx()
player_name = os.environ["PLAYER_NAME"]
if not player_name:
logging.critical("must set PLAYER_NAME")
sys.exit(1)
player_name = player_name.lower()
stats = get_player(player_name)
if stats is None:
print("failed to fetch player!")
sys.exit(1)
body = []
for measurement in ("rank", "level", "xp", "xp_next"):
fields = {}
for stat in stats:
fields[stat.skill] = getattr(stat, measurement)
blob = {"measurement": measurement,
"tags": {"username": player_name},
"fields": fields}
body.append(blob)
# import json
# print(json.dumps(body, indent=4))
# return
influx.write_points(body)
print("done!")
if __name__ == '__main__':
main()