initial commit of original project

This commit is contained in:
dave 2021-06-23 20:20:25 -07:00
commit c9160cf23e
5 changed files with 538 additions and 0 deletions

1
.dockerignore Normal file
View File

@ -0,0 +1 @@
testenv

14
Dockerfile Normal file
View File

@ -0,0 +1,14 @@
FROM ubuntu:focal
RUN apt-get update && \
apt-get install -y python3-pip
ADD requirements.txt /tmp/requirements.txt
RUN pip3 install -r /tmp/requirements.txt
ADD scrape.py /usr/local/bin/scrape.py
RUN chmod +x /usr/local/bin/scrape.py
ENTRYPOINT ["/usr/local/bin/scrape.py"]

347
cron.yml Normal file
View File

@ -0,0 +1,347 @@
---
apiVersion: batch/v1beta1
kind: CronJob
metadata:
name: rsscrape-mop
namespace: jobs
spec:
schedule: "*/10 * * * *"
concurrencyPolicy: Replace
startingDeadlineSeconds: 60
jobTemplate:
spec:
template:
spec:
containers:
- name: scrape
image: dockermirror:5000/dpedu/rsscrape
imagePullPolicy: Always
env:
- name: INFLUX_HOST
value: "influx01"
- name: INFLUX_PORT
value: "8086"
- name: INFLUX_DB
value: osrs
- name: PLAYER_NAME
value: moptop650
restartPolicy: Never
---
apiVersion: batch/v1beta1
kind: CronJob
metadata:
name: rsscrape-imam
namespace: jobs
spec:
schedule: "*/10 * * * *"
concurrencyPolicy: Replace
startingDeadlineSeconds: 60
jobTemplate:
spec:
template:
spec:
containers:
- name: scrape
image: dockermirror:5000/dpedu/rsscrape
imagePullPolicy: Always
env:
- name: INFLUX_HOST
value: "influx01"
- name: INFLUX_PORT
value: "8086"
- name: INFLUX_DB
value: osrs
- name: PLAYER_NAME
value: "grand imam"
restartPolicy: Never
---
apiVersion: batch/v1beta1
kind: CronJob
metadata:
name: rsscrape-spoon
namespace: jobs
spec:
schedule: "*/10 * * * *"
concurrencyPolicy: Replace
startingDeadlineSeconds: 60
jobTemplate:
spec:
template:
spec:
containers:
- name: scrape
image: dockermirror:5000/dpedu/rsscrape
imagePullPolicy: Always
env:
- name: INFLUX_HOST
value: "influx01"
- name: INFLUX_PORT
value: "8086"
- name: INFLUX_DB
value: osrs
- name: PLAYER_NAME
value: "sword spoons"
restartPolicy: Never
---
apiVersion: batch/v1beta1
kind: CronJob
metadata:
name: rsscrape-mop1
namespace: jobs
spec:
schedule: "*/10 * * * *"
concurrencyPolicy: Replace
startingDeadlineSeconds: 60
jobTemplate:
spec:
template:
spec:
containers:
- name: scrape
image: dockermirror:5000/dpedu/rsscrape
imagePullPolicy: Always
env:
- name: INFLUX_HOST
value: "influx01"
- name: INFLUX_PORT
value: "8086"
- name: INFLUX_DB
value: osrs
- name: PLAYER_NAME
value: "moptop 651"
restartPolicy: Never
---
apiVersion: batch/v1beta1
kind: CronJob
metadata:
name: rsscrape-botusername
namespace: jobs
spec:
schedule: "*/10 * * * *"
concurrencyPolicy: Replace
startingDeadlineSeconds: 60
jobTemplate:
spec:
template:
spec:
containers:
- name: scrape
image: dockermirror:5000/dpedu/rsscrape
imagePullPolicy: Always
env:
- name: INFLUX_HOST
value: "influx01"
- name: INFLUX_PORT
value: "8086"
- name: INFLUX_DB
value: osrs
- name: PLAYER_NAME
value: "botusername"
restartPolicy: Never
---
apiVersion: batch/v1beta1
kind: CronJob
metadata:
name: rsscrape-uim
namespace: jobs
spec:
schedule: "*/10 * * * *"
concurrencyPolicy: Replace
startingDeadlineSeconds: 60
jobTemplate:
spec:
template:
spec:
containers:
- name: scrape
image: dockermirror:5000/dpedu/rsscrape
imagePullPolicy: Always
env:
- name: INFLUX_HOST
value: "influx01"
- name: INFLUX_PORT
value: "8086"
- name: INFLUX_DB
value: osrs
- name: PLAYER_NAME
value: "uim mop"
restartPolicy: Never
---
apiVersion: batch/v1beta1
kind: CronJob
metadata:
name: rsscrape-habibi
namespace: jobs
spec:
schedule: "*/10 * * * *"
concurrencyPolicy: Replace
startingDeadlineSeconds: 60
jobTemplate:
spec:
template:
spec:
containers:
- name: scrape
image: dockermirror:5000/dpedu/rsscrape
imagePullPolicy: Always
env:
- name: INFLUX_HOST
value: "influx01"
- name: INFLUX_PORT
value: "8086"
- name: INFLUX_DB
value: osrs
- name: PLAYER_NAME
value: "super habibi"
restartPolicy: Never
---
apiVersion: batch/v1beta1
kind: CronJob
metadata:
name: rsscrape-gerbi
namespace: jobs
spec:
schedule: "*/10 * * * *"
concurrencyPolicy: Replace
startingDeadlineSeconds: 60
jobTemplate:
spec:
template:
spec:
containers:
- name: scrape
image: dockermirror:5000/dpedu/rsscrape
imagePullPolicy: Always
env:
- name: INFLUX_HOST
value: "influx01"
- name: INFLUX_PORT
value: "8086"
- name: INFLUX_DB
value: osrs
- name: PLAYER_NAME
value: gerbi7
restartPolicy: Never
---
apiVersion: batch/v1beta1
kind: CronJob
metadata:
name: rsscrape-swamp
namespace: jobs
spec:
schedule: "*/10 * * * *"
concurrencyPolicy: Replace
startingDeadlineSeconds: 60
jobTemplate:
spec:
template:
spec:
containers:
- name: scrape
image: dockermirror:5000/dpedu/rsscrape
imagePullPolicy: Always
env:
- name: INFLUX_HOST
value: "influx01"
- name: INFLUX_PORT
value: "8086"
- name: INFLUX_DB
value: osrs
- name: PLAYER_NAME
value: Swampletics
restartPolicy: Never
---
apiVersion: batch/v1beta1
kind: CronJob
metadata:
name: rsscrape-soulja
namespace: jobs
spec:
schedule: "*/5 * * * *"
concurrencyPolicy: Replace
startingDeadlineSeconds: 60
jobTemplate:
spec:
template:
spec:
containers:
- name: scrape
image: dockermirror:5000/dpedu/rsscrape
imagePullPolicy: Always
env:
- name: INFLUX_HOST
value: "influx01"
- name: INFLUX_PORT
value: "8086"
- name: INFLUX_DB
value: osrs
- name: PLAYER_NAME
value: "souljaboysod"
restartPolicy: Never
---
apiVersion: batch/v1beta1
kind: CronJob
metadata:
name: rsscrape-pp
namespace: jobs
spec:
schedule: "*/10 * * * *"
concurrencyPolicy: Replace
startingDeadlineSeconds: 60
jobTemplate:
spec:
template:
spec:
containers:
- name: scrape
image: dockermirror:5000/dpedu/rsscrape
imagePullPolicy: Always
env:
- name: INFLUX_HOST
value: "influx01"
- name: INFLUX_PORT
value: "8086"
- name: INFLUX_DB
value: osrs
- name: PLAYER_NAME
value: "hardcorepp2"
restartPolicy: Never
---
apiVersion: batch/v1beta1
kind: CronJob
metadata:
name: rsscrape-ceng2
namespace: jobs
spec:
schedule: "*/10 * * * *"
concurrencyPolicy: Replace
startingDeadlineSeconds: 60
jobTemplate:
spec:
template:
spec:
containers:
- name: scrape
image: dockermirror:5000/dpedu/rsscrape
imagePullPolicy: Always
env:
- name: INFLUX_HOST
value: "influx01"
- name: INFLUX_PORT
value: "8086"
- name: INFLUX_DB
value: osrs
- name: PLAYER_NAME
value: "hm for off"
restartPolicy: Never

9
requirements.txt Normal file
View File

@ -0,0 +1,9 @@
certifi==2019.11.28
chardet==3.0.4
idna==2.8
influxdb==5.2.3
python-dateutil==2.8.1
pytz==2019.3
requests==2.22.0
six==1.13.0
urllib3==1.25.7

167
scrape.py Normal file
View File

@ -0,0 +1,167 @@
#!/usr/bin/env python3
import logging
from influxdb import InfluxDBClient
import requests
import os
from random import randint
from time import sleep
import sys
import csv
import traceback
from collections import namedtuple
timeout = int(os.environ.get("RSSCRAPE_TIMEOUT", 60))
def get_influx():
influx_host = os.environ.get("INFLUX_HOST")
influx_port = os.environ.get("INFLUX_PORT")
influx_db = os.environ.get("INFLUX_DB")
if not all([influx_host, influx_port, influx_db]):
logging.critical("must set INFLUX_HOST, INFLUX_PORT and INFLUX_DB")
sys.exit(1)
influx = InfluxDBClient(influx_host, influx_port) # user, password
influx.create_database(influx_db)
influx.switch_database(influx_db)
return influx
# order of the skills in the stats data
STATS = ["Overall",
"Attack",
"Defence",
"Strength",
"Hitpoints",
"Ranged",
"Prayer",
"Magic",
"Cooking",
"Woodcutting",
"Fletching",
"Fishing",
"Firemaking",
"Crafting",
"Smithing",
"Mining",
"Herblore",
"Agility",
"Thieving",
"Slayer",
"Farming",
"Runecraft",
"Hunter",
"Construction"]
LEVELS = [0, 83, 174, 276, 388, 512, 650, 801, 969, 1154,
1358, 1584, 1833, 2107, 2411, 2746, 3115, 3523, 3973, 4470,
5018, 5624, 6291, 7028, 7842, 8740, 9730, 10824, 12031, 13363,
14833, 16456, 18247, 20224, 22406, 24815, 27473, 30408, 33648, 37224,
41171, 45529, 50339, 55649, 61512, 67983, 75127, 83014, 91721, 101333,
111945, 123660, 136594, 150872, 166636, 184040, 203254, 224466, 247886, 273742,
302288, 333804, 368599, 407015, 449428, 496254, 547953, 605032, 668051, 737627,
814445, 899257, 992895, 1096278, 1210421, 1336443, 1475581, 1629200, 1798808, 1986068,
2192818, 2421087, 2673114, 2951373, 3258594, 3597792, 3972294, 4385776, 4842295, 5346332,
5902831, 6517253, 7195629, 7944614, 8771558, 9684577, 10692629, 11805606, 13034431]
StatData = namedtuple("StatData", "skill rank level xp xp_next")
def get_player(name):
data = None
for trynum in range(0, 5):
sleep(randint(5, 30) * trynum)
print(f"try #{trynum}")
try:
r = requests.get("https://secure.runescape.com/m=hiscore_oldschool/a=13/index_lite.ws",
params={"player": name},
timeout=timeout)
if r.status_code != 200:
print(f"retrying, status code = {r.status_code}")
continue
except:
print(traceback.format_exc())
continue
_data = r.text
print(_data)
if len(_data) == 0 or _data[0] not in "-1234567890":
print(f"retrying, body must start with a number")
continue
data = _data
break
if data is None:
return None
# with open("sample.txt") as f:
# data = f.read()
reader = csv.reader(data.splitlines(), delimiter=',')
rows = list(reader)
ret = []
for i, row in enumerate(rows): # rank, level, xp
if i >= len(STATS):
break
level = int(row[1])
xp = int(row[2])
xp_next = -1
if level < len(LEVELS):
xp_next = LEVELS[level] - xp
ret.append(StatData(STATS[i].lower(), int(row[0]), level, xp, xp_next))
return ret
def main():
logging.basicConfig(level=logging.WARNING,
format="%(asctime)-15s %(levelname)-8s %(filename)s:%(lineno)d %(message)s")
influx = get_influx()
player_name = os.environ["PLAYER_NAME"]
if not player_name:
logging.critical("must set PLAYER_NAME")
sys.exit(1)
player_name = player_name.lower()
stats = get_player(player_name)
if stats is None:
print("failed to fetch player!")
sys.exit(1)
body = []
for measurement in ("rank", "level", "xp", "xp_next"):
fields = {}
for stat in stats:
fields[stat.skill] = getattr(stat, measurement)
blob = {"measurement": measurement,
"tags": {"username": player_name},
"fields": fields}
body.append(blob)
# import json
# print(json.dumps(body, indent=4))
# return
influx.write_points(body)
print("done!")
if __name__ == '__main__':
main()