support writing to multiple destinations
All checks were successful
Gitea/osrsscrape/pipeline/head This commit looks good

This commit is contained in:
dave 2024-10-15 16:49:45 -07:00
parent 61bd01a852
commit e3c0338ce2
2 changed files with 33 additions and 11 deletions

View File

@ -29,7 +29,7 @@ variable "players" {
variable "influx_url" {
type = string
description = "influxdb connection url"
description = "influxdb connection urls - can be multiple, comma separated"
}
variable "namespace" {

View File

@ -2,33 +2,42 @@ import sys
import json
import logging
from urllib.parse import urlparse
from threading import Thread
from influxdb import InfluxDBClient
import click
from osrsscrape.scrape import get_player
def get_influx(influx_url):
url = urlparse(influx_url)
def get_influx(influx_urls):
clients = []
db_name = url.path[1:]
for influx_url in influx_urls:
url = urlparse(influx_url)
influx = InfluxDBClient(url.hostname, url.port, url.username, url.password)
influx.create_database(db_name)
influx.switch_database(db_name)
db_name = url.path[1:]
return influx
influx = InfluxDBClient(url.hostname, url.port, url.username, url.password)
influx.create_database(db_name)
influx.switch_database(db_name)
clients.append(influx)
return clients
@click.command(help="tool for scraping osrs highscores")
@click.argument('player_name', envvar='RSSCRAPE_PLAYER_NAME')
@click.option('--influx-url', envvar='RSSCRAPE_INFLUX_URL', required=True, help="influxdb server url")
@click.option('--influx-url', envvar='RSSCRAPE_INFLUX_URL', required=True, help="influxdb server url") # can be multiple, comma separated
@click.option('-a', '--alias', envvar='RSSCRAPE_PLAYER_ALIAS', help="value for player alias field instead of username")
@click.option('--timeout', '-t', envvar='RSSCRAPE_TIMEOUT', type=int, default=60, help="scrape timeout")
@click.option('--dry-run', '-n', is_flag=True, envvar='RSSCRAPE_DRYRUN', help='dry run mode')
def main(player_name, alias, influx_url, timeout, dry_run):
logging.basicConfig(level=logging.WARNING,
format="%(asctime)-15s %(levelname)-8s %(filename)s:%(lineno)d %(message)s")
influx = get_influx(influx_url)
influx_urls = influx_url.split(",")
influxes = get_influx(influx_urls)
player_name = player_name.lower()
stats = get_player(player_name, timeout)
@ -57,6 +66,19 @@ def main(player_name, alias, influx_url, timeout, dry_run):
click.echo(json.dumps(body, indent=4))
click.echo("not submitting points to influxdb due to dry run mode")
else:
influx.write_points(body)
parallel_submit(influxes, influx_urls, body)
click.echo("done!")
def parallel_submit(influxes, influx_urls, body):
threads = []
for client in influxes:
t = Thread(target=client.write_points, args=(body, ), daemon=True)
t.start()
threads.append(t)
for i, t in enumerate(threads):
t.join()
print("done:", influx_urls[i])