#!/usr/bin/env python3 import csv from influxdb import InfluxDBClient from urllib.parse import urlparse import datetime row_fields = ["State", "Country", "Last_Update", "Confirmed", "Deaths", "Recovered", "Active", "Latitude", "Longitude"] f2int = lambda x: int(float(x)) row_fields = { 'Hospitalization_Rate': float, 'People_Hospitalized': f2int, 'Incident_Rate': float, 'Province_State': str, 'FIPS': f2int, 'People_Tested': f2int, 'Lat': float, 'Long_': float, 'ISO3': str, 'Testing_Rate': float, 'Deaths': f2int, 'Mortality_Rate': float, 'Recovered': f2int, 'Confirmed': f2int, 'UID': f2int, 'Last_Update': None, 'Active': f2int, 'Country_Region': str, } # https://www.nytimes.com/elections/2016/results/president states = { 'red': { 'Georgia', 'Ohio', 'Montana', 'Pennsylvania', 'South Dakota', 'Tennessee', 'Nebraska', 'North Dakota', 'Mississippi', 'Utah', 'Missouri', 'Alaska', 'Idaho', 'Arkansas', 'Wyoming', 'Alabama', 'Indiana', 'Kentucky', 'Louisiana', 'Kansas', 'Florida', 'Iowa', 'Oklahoma', 'Texas', 'West Virginia', 'Arizona', 'South Carolina', 'Wisconsin', 'North Carolina', 'Michigan', }, 'blue': { 'Minnesota', 'New Mexico', 'Oregon', 'Nevada', 'New Jersey', 'Colorado', 'Washington', 'New Hampshire', 'District of Columbia', 'Maryland', 'Virginia', 'California', 'Hawaii', 'Massachusetts', 'New York', 'Rhode Island', 'Vermont', 'Connecticut', 'Delaware', 'Illinois', 'Maine', }, 'other': { 'American Samoa', 'Guam', 'Puerto Rico', 'Diamond Princess', 'Virgin Islands', 'Grand Princess', 'Northern Mariana Islands', } } states_bycolor = {} for color, states in states.items(): for state in states: states_bycolor[state] = color def convert(func, inp): if inp == "": return func(0) return func(inp) def get_rows(fpath): first = True headers = None with open(fpath, "r") as f: r = csv.reader(f) for line in r: if first: first = False headers = line continue yield {headers[i]: convert(row_fields[headers[i]], line[i]) for i in range(0, len(headers)) if row_fields[headers[i]]} def get_data_for_influx(fpath, assigned_date=None): data = [] for row in get_rows(fpath): if row["Province_State"] == "Recovered": continue data.append({ "measurement": "covid", "tags": { "state": row["Province_State"], "iso3": row["ISO3"], "color": states_bycolor[row["Province_State"]] }, "time": assigned_date or row["Last_Update"], "fields": row }) return data def ingest_file(influx_client, fname, assigned_date): d = get_data_for_influx(fname, assigned_date) # import json # print(json.dumps(d, indent=4)) influx_client.write_points(d) def main(): influx_uri = urlparse("http://localhost:10019/") influx_client = InfluxDBClient(influx_uri.hostname, str(influx_uri.port)) # user, password) influx_client.create_database("covid") influx_client.switch_database("covid") when = datetime.date(month=4, day=12, year=2020) now = datetime.date.today() while when < now: daystring = when.strftime("%m-%d-%Y") fname = f"COVID-19/csse_covid_19_data/csse_covid_19_daily_reports_us/{daystring}.csv" print(fname) ingest_file(influx_client, fname, when.strftime("%Y-%m-%dT%H:%M:%SZ")) when = when + datetime.timedelta(days=1) if __name__ == '__main__': main()