From c9160cf23e540e5e43c32062e2399f91f9b64123 Mon Sep 17 00:00:00 2001 From: dave Date: Wed, 23 Jun 2021 20:20:25 -0700 Subject: [PATCH] initial commit of original project --- .dockerignore | 1 + Dockerfile | 14 ++ cron.yml | 347 +++++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 9 ++ scrape.py | 167 +++++++++++++++++++++++ 5 files changed, 538 insertions(+) create mode 100644 .dockerignore create mode 100644 Dockerfile create mode 100644 cron.yml create mode 100644 requirements.txt create mode 100644 scrape.py diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..e8a422e --- /dev/null +++ b/.dockerignore @@ -0,0 +1 @@ +testenv \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..21066b0 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,14 @@ +FROM ubuntu:focal + +RUN apt-get update && \ + apt-get install -y python3-pip + +ADD requirements.txt /tmp/requirements.txt + +RUN pip3 install -r /tmp/requirements.txt + +ADD scrape.py /usr/local/bin/scrape.py + +RUN chmod +x /usr/local/bin/scrape.py + +ENTRYPOINT ["/usr/local/bin/scrape.py"] diff --git a/cron.yml b/cron.yml new file mode 100644 index 0000000..c9f1621 --- /dev/null +++ b/cron.yml @@ -0,0 +1,347 @@ +--- +apiVersion: batch/v1beta1 +kind: CronJob +metadata: + name: rsscrape-mop + namespace: jobs +spec: + schedule: "*/10 * * * *" + concurrencyPolicy: Replace + startingDeadlineSeconds: 60 + jobTemplate: + spec: + template: + spec: + containers: + - name: scrape + image: dockermirror:5000/dpedu/rsscrape + imagePullPolicy: Always + env: + - name: INFLUX_HOST + value: "influx01" + - name: INFLUX_PORT + value: "8086" + - name: INFLUX_DB + value: osrs + - name: PLAYER_NAME + value: moptop650 + restartPolicy: Never + +--- +apiVersion: batch/v1beta1 +kind: CronJob +metadata: + name: rsscrape-imam + namespace: jobs +spec: + schedule: "*/10 * * * *" + concurrencyPolicy: Replace + startingDeadlineSeconds: 60 + jobTemplate: + spec: + template: + spec: + containers: + - name: scrape + image: dockermirror:5000/dpedu/rsscrape + imagePullPolicy: Always + env: + - name: INFLUX_HOST + value: "influx01" + - name: INFLUX_PORT + value: "8086" + - name: INFLUX_DB + value: osrs + - name: PLAYER_NAME + value: "grand imam" + restartPolicy: Never + +--- +apiVersion: batch/v1beta1 +kind: CronJob +metadata: + name: rsscrape-spoon + namespace: jobs +spec: + schedule: "*/10 * * * *" + concurrencyPolicy: Replace + startingDeadlineSeconds: 60 + jobTemplate: + spec: + template: + spec: + containers: + - name: scrape + image: dockermirror:5000/dpedu/rsscrape + imagePullPolicy: Always + env: + - name: INFLUX_HOST + value: "influx01" + - name: INFLUX_PORT + value: "8086" + - name: INFLUX_DB + value: osrs + - name: PLAYER_NAME + value: "sword spoons" + restartPolicy: Never + +--- +apiVersion: batch/v1beta1 +kind: CronJob +metadata: + name: rsscrape-mop1 + namespace: jobs +spec: + schedule: "*/10 * * * *" + concurrencyPolicy: Replace + startingDeadlineSeconds: 60 + jobTemplate: + spec: + template: + spec: + containers: + - name: scrape + image: dockermirror:5000/dpedu/rsscrape + imagePullPolicy: Always + env: + - name: INFLUX_HOST + value: "influx01" + - name: INFLUX_PORT + value: "8086" + - name: INFLUX_DB + value: osrs + - name: PLAYER_NAME + value: "moptop 651" + restartPolicy: Never + +--- +apiVersion: batch/v1beta1 +kind: CronJob +metadata: + name: rsscrape-botusername + namespace: jobs +spec: + schedule: "*/10 * * * *" + concurrencyPolicy: Replace + startingDeadlineSeconds: 60 + jobTemplate: + spec: + template: + spec: + containers: + - name: scrape + image: dockermirror:5000/dpedu/rsscrape + imagePullPolicy: Always + env: + - name: INFLUX_HOST + value: "influx01" + - name: INFLUX_PORT + value: "8086" + - name: INFLUX_DB + value: osrs + - name: PLAYER_NAME + value: "botusername" + restartPolicy: Never + +--- +apiVersion: batch/v1beta1 +kind: CronJob +metadata: + name: rsscrape-uim + namespace: jobs +spec: + schedule: "*/10 * * * *" + concurrencyPolicy: Replace + startingDeadlineSeconds: 60 + jobTemplate: + spec: + template: + spec: + containers: + - name: scrape + image: dockermirror:5000/dpedu/rsscrape + imagePullPolicy: Always + env: + - name: INFLUX_HOST + value: "influx01" + - name: INFLUX_PORT + value: "8086" + - name: INFLUX_DB + value: osrs + - name: PLAYER_NAME + value: "uim mop" + restartPolicy: Never + +--- +apiVersion: batch/v1beta1 +kind: CronJob +metadata: + name: rsscrape-habibi + namespace: jobs +spec: + schedule: "*/10 * * * *" + concurrencyPolicy: Replace + startingDeadlineSeconds: 60 + jobTemplate: + spec: + template: + spec: + containers: + - name: scrape + image: dockermirror:5000/dpedu/rsscrape + imagePullPolicy: Always + env: + - name: INFLUX_HOST + value: "influx01" + - name: INFLUX_PORT + value: "8086" + - name: INFLUX_DB + value: osrs + - name: PLAYER_NAME + value: "super habibi" + restartPolicy: Never + +--- +apiVersion: batch/v1beta1 +kind: CronJob +metadata: + name: rsscrape-gerbi + namespace: jobs +spec: + schedule: "*/10 * * * *" + concurrencyPolicy: Replace + startingDeadlineSeconds: 60 + jobTemplate: + spec: + template: + spec: + containers: + - name: scrape + image: dockermirror:5000/dpedu/rsscrape + imagePullPolicy: Always + env: + - name: INFLUX_HOST + value: "influx01" + - name: INFLUX_PORT + value: "8086" + - name: INFLUX_DB + value: osrs + - name: PLAYER_NAME + value: gerbi7 + restartPolicy: Never + +--- +apiVersion: batch/v1beta1 +kind: CronJob +metadata: + name: rsscrape-swamp + namespace: jobs +spec: + schedule: "*/10 * * * *" + concurrencyPolicy: Replace + startingDeadlineSeconds: 60 + jobTemplate: + spec: + template: + spec: + containers: + - name: scrape + image: dockermirror:5000/dpedu/rsscrape + imagePullPolicy: Always + env: + - name: INFLUX_HOST + value: "influx01" + - name: INFLUX_PORT + value: "8086" + - name: INFLUX_DB + value: osrs + - name: PLAYER_NAME + value: Swampletics + restartPolicy: Never + +--- +apiVersion: batch/v1beta1 +kind: CronJob +metadata: + name: rsscrape-soulja + namespace: jobs +spec: + schedule: "*/5 * * * *" + concurrencyPolicy: Replace + startingDeadlineSeconds: 60 + jobTemplate: + spec: + template: + spec: + containers: + - name: scrape + image: dockermirror:5000/dpedu/rsscrape + imagePullPolicy: Always + env: + - name: INFLUX_HOST + value: "influx01" + - name: INFLUX_PORT + value: "8086" + - name: INFLUX_DB + value: osrs + - name: PLAYER_NAME + value: "souljaboysod" + restartPolicy: Never + +--- +apiVersion: batch/v1beta1 +kind: CronJob +metadata: + name: rsscrape-pp + namespace: jobs +spec: + schedule: "*/10 * * * *" + concurrencyPolicy: Replace + startingDeadlineSeconds: 60 + jobTemplate: + spec: + template: + spec: + containers: + - name: scrape + image: dockermirror:5000/dpedu/rsscrape + imagePullPolicy: Always + env: + - name: INFLUX_HOST + value: "influx01" + - name: INFLUX_PORT + value: "8086" + - name: INFLUX_DB + value: osrs + - name: PLAYER_NAME + value: "hardcorepp2" + restartPolicy: Never + +--- +apiVersion: batch/v1beta1 +kind: CronJob +metadata: + name: rsscrape-ceng2 + namespace: jobs +spec: + schedule: "*/10 * * * *" + concurrencyPolicy: Replace + startingDeadlineSeconds: 60 + jobTemplate: + spec: + template: + spec: + containers: + - name: scrape + image: dockermirror:5000/dpedu/rsscrape + imagePullPolicy: Always + env: + - name: INFLUX_HOST + value: "influx01" + - name: INFLUX_PORT + value: "8086" + - name: INFLUX_DB + value: osrs + - name: PLAYER_NAME + value: "hm for off" + restartPolicy: Never diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..984620c --- /dev/null +++ b/requirements.txt @@ -0,0 +1,9 @@ +certifi==2019.11.28 +chardet==3.0.4 +idna==2.8 +influxdb==5.2.3 +python-dateutil==2.8.1 +pytz==2019.3 +requests==2.22.0 +six==1.13.0 +urllib3==1.25.7 diff --git a/scrape.py b/scrape.py new file mode 100644 index 0000000..606d876 --- /dev/null +++ b/scrape.py @@ -0,0 +1,167 @@ +#!/usr/bin/env python3 + +import logging +from influxdb import InfluxDBClient +import requests +import os +from random import randint +from time import sleep +import sys +import csv +import traceback +from collections import namedtuple + + +timeout = int(os.environ.get("RSSCRAPE_TIMEOUT", 60)) + + +def get_influx(): + influx_host = os.environ.get("INFLUX_HOST") + influx_port = os.environ.get("INFLUX_PORT") + influx_db = os.environ.get("INFLUX_DB") + + if not all([influx_host, influx_port, influx_db]): + logging.critical("must set INFLUX_HOST, INFLUX_PORT and INFLUX_DB") + sys.exit(1) + + influx = InfluxDBClient(influx_host, influx_port) # user, password + influx.create_database(influx_db) + influx.switch_database(influx_db) + + return influx + + +# order of the skills in the stats data +STATS = ["Overall", + "Attack", + "Defence", + "Strength", + "Hitpoints", + "Ranged", + "Prayer", + "Magic", + "Cooking", + "Woodcutting", + "Fletching", + "Fishing", + "Firemaking", + "Crafting", + "Smithing", + "Mining", + "Herblore", + "Agility", + "Thieving", + "Slayer", + "Farming", + "Runecraft", + "Hunter", + "Construction"] + +LEVELS = [0, 83, 174, 276, 388, 512, 650, 801, 969, 1154, + 1358, 1584, 1833, 2107, 2411, 2746, 3115, 3523, 3973, 4470, + 5018, 5624, 6291, 7028, 7842, 8740, 9730, 10824, 12031, 13363, + 14833, 16456, 18247, 20224, 22406, 24815, 27473, 30408, 33648, 37224, + 41171, 45529, 50339, 55649, 61512, 67983, 75127, 83014, 91721, 101333, + 111945, 123660, 136594, 150872, 166636, 184040, 203254, 224466, 247886, 273742, + 302288, 333804, 368599, 407015, 449428, 496254, 547953, 605032, 668051, 737627, + 814445, 899257, 992895, 1096278, 1210421, 1336443, 1475581, 1629200, 1798808, 1986068, + 2192818, 2421087, 2673114, 2951373, 3258594, 3597792, 3972294, 4385776, 4842295, 5346332, + 5902831, 6517253, 7195629, 7944614, 8771558, 9684577, 10692629, 11805606, 13034431] + + +StatData = namedtuple("StatData", "skill rank level xp xp_next") + + +def get_player(name): + + data = None + + for trynum in range(0, 5): + sleep(randint(5, 30) * trynum) + print(f"try #{trynum}") + + try: + r = requests.get("https://secure.runescape.com/m=hiscore_oldschool/a=13/index_lite.ws", + params={"player": name}, + timeout=timeout) + if r.status_code != 200: + print(f"retrying, status code = {r.status_code}") + continue + except: + print(traceback.format_exc()) + continue + + _data = r.text + print(_data) + + if len(_data) == 0 or _data[0] not in "-1234567890": + print(f"retrying, body must start with a number") + continue + + data = _data + break + + if data is None: + return None + + # with open("sample.txt") as f: + # data = f.read() + reader = csv.reader(data.splitlines(), delimiter=',') + rows = list(reader) + ret = [] + for i, row in enumerate(rows): # rank, level, xp + if i >= len(STATS): + break + level = int(row[1]) + xp = int(row[2]) + xp_next = -1 + if level < len(LEVELS): + xp_next = LEVELS[level] - xp + ret.append(StatData(STATS[i].lower(), int(row[0]), level, xp, xp_next)) + return ret + + +def main(): + logging.basicConfig(level=logging.WARNING, + format="%(asctime)-15s %(levelname)-8s %(filename)s:%(lineno)d %(message)s") + + influx = get_influx() + + player_name = os.environ["PLAYER_NAME"] + + if not player_name: + logging.critical("must set PLAYER_NAME") + sys.exit(1) + + player_name = player_name.lower() + + stats = get_player(player_name) + + if stats is None: + print("failed to fetch player!") + sys.exit(1) + + body = [] + + for measurement in ("rank", "level", "xp", "xp_next"): + fields = {} + for stat in stats: + fields[stat.skill] = getattr(stat, measurement) + + blob = {"measurement": measurement, + "tags": {"username": player_name}, + "fields": fields} + + body.append(blob) + + # import json + # print(json.dumps(body, indent=4)) + # return + + influx.write_points(body) + + print("done!") + + +if __name__ == '__main__': + main()