252 lines
5.7 KiB
Python
252 lines
5.7 KiB
Python
#!/usr/bin/env python3
|
|
|
|
import csv
|
|
from influxdb import InfluxDBClient
|
|
from urllib.parse import urlparse
|
|
import datetime
|
|
|
|
row_fields = ["State", "Country", "Last_Update", "Confirmed", "Deaths", "Recovered", "Active", "Latitude", "Longitude"]
|
|
|
|
f2int = lambda x: int(float(x))
|
|
|
|
row_fields = {
|
|
'Hospitalization_Rate': float,
|
|
'People_Hospitalized': f2int,
|
|
'Incident_Rate': float,
|
|
'Province_State': str,
|
|
'FIPS': f2int,
|
|
'People_Tested': f2int,
|
|
'Lat': float,
|
|
'Long_': float,
|
|
'ISO3': str,
|
|
'Testing_Rate': float,
|
|
'Deaths': f2int,
|
|
'Mortality_Rate': float,
|
|
'Recovered': f2int,
|
|
'Confirmed': f2int,
|
|
'UID': f2int,
|
|
'Last_Update': None,
|
|
'Active': f2int,
|
|
'Country_Region': str,
|
|
'Total_Test_Results': f2int,
|
|
'Case_Fatality_Ratio': float,
|
|
'Cases_28_Days': f2int,
|
|
'Deaths_28_Days': f2int,
|
|
}
|
|
|
|
# https://www.nytimes.com/elections/2016/results/president
|
|
states = {
|
|
'red': {
|
|
'Georgia',
|
|
'Ohio',
|
|
'Montana',
|
|
'Pennsylvania',
|
|
'South Dakota',
|
|
'Tennessee',
|
|
'Nebraska',
|
|
'North Dakota',
|
|
'Mississippi',
|
|
'Utah',
|
|
'Missouri',
|
|
'Alaska',
|
|
'Idaho',
|
|
'Arkansas',
|
|
'Wyoming',
|
|
'Alabama',
|
|
'Indiana',
|
|
'Kentucky',
|
|
'Louisiana',
|
|
'Kansas',
|
|
'Florida',
|
|
'Iowa',
|
|
'Oklahoma',
|
|
'Texas',
|
|
'West Virginia',
|
|
'Arizona',
|
|
'South Carolina',
|
|
'Wisconsin',
|
|
'North Carolina',
|
|
'Michigan',
|
|
},
|
|
'blue': {
|
|
'Minnesota',
|
|
'New Mexico',
|
|
'Oregon',
|
|
'Nevada',
|
|
'New Jersey',
|
|
'Colorado',
|
|
'Washington',
|
|
'New Hampshire',
|
|
'District of Columbia',
|
|
'Maryland',
|
|
'Virginia',
|
|
'California',
|
|
'Hawaii',
|
|
'Massachusetts',
|
|
'New York',
|
|
'Rhode Island',
|
|
'Vermont',
|
|
'Connecticut',
|
|
'Delaware',
|
|
'Illinois',
|
|
'Maine',
|
|
},
|
|
'other': {
|
|
'American Samoa',
|
|
'Guam',
|
|
'Puerto Rico',
|
|
'Diamond Princess',
|
|
'Virgin Islands',
|
|
'Grand Princess',
|
|
'Northern Mariana Islands',
|
|
}
|
|
}
|
|
|
|
# https://upload.wikimedia.org/wikipedia/commons/4/49/ElectoralCollege2020.svg
|
|
states_2020 = {
|
|
'red': {
|
|
'Ohio',
|
|
'Montana',
|
|
'South Dakota',
|
|
'Tennessee',
|
|
'Nebraska',
|
|
'North Dakota',
|
|
'Mississippi',
|
|
'Utah',
|
|
'Alaska',
|
|
'Idaho',
|
|
'Arkansas',
|
|
'Wyoming',
|
|
'Alabama',
|
|
'Indiana',
|
|
'Kentucky',
|
|
'Louisiana',
|
|
'Kansas',
|
|
'Florida',
|
|
'Iowa',
|
|
'Oklahoma',
|
|
'Texas',
|
|
'West Virginia',
|
|
'South Carolina',
|
|
'North Carolina',
|
|
},
|
|
'blue': {
|
|
'Georgia',
|
|
'Pennsylvania',
|
|
'Missouri',
|
|
'Arizona',
|
|
'Wisconsin',
|
|
'Michigan',
|
|
'Minnesota',
|
|
'New Mexico',
|
|
'Oregon',
|
|
'Nevada',
|
|
'New Jersey',
|
|
'Colorado',
|
|
'Washington',
|
|
'New Hampshire',
|
|
'District of Columbia',
|
|
'Maryland',
|
|
'Virginia',
|
|
'California',
|
|
'Hawaii',
|
|
'Massachusetts',
|
|
'New York',
|
|
'Rhode Island',
|
|
'Vermont',
|
|
'Connecticut',
|
|
'Delaware',
|
|
'Illinois',
|
|
'Maine',
|
|
},
|
|
'other': {
|
|
'American Samoa',
|
|
'Guam',
|
|
'Puerto Rico',
|
|
'Diamond Princess',
|
|
'Virgin Islands',
|
|
'Grand Princess',
|
|
'Northern Mariana Islands',
|
|
}
|
|
}
|
|
|
|
states_bycolor = {}
|
|
for color, states in states.items():
|
|
for state in states:
|
|
states_bycolor[state] = color
|
|
|
|
states_bycolor_2020 = {}
|
|
for color, states in states_2020.items():
|
|
for state in states:
|
|
states_bycolor_2020[state] = color
|
|
|
|
|
|
def convert(func, inp):
|
|
if inp == "":
|
|
return func(0)
|
|
return func(inp)
|
|
|
|
|
|
def get_rows(fpath):
|
|
first = True
|
|
headers = None
|
|
with open(fpath, "r") as f:
|
|
r = csv.reader(f)
|
|
for line in r:
|
|
if first:
|
|
first = False
|
|
headers = line
|
|
continue
|
|
yield {headers[i]: convert(row_fields[headers[i]], line[i])
|
|
for i in range(0, len(headers))
|
|
if row_fields[headers[i]]}
|
|
|
|
|
|
def get_data_for_influx(fpath, assigned_date=None):
|
|
data = []
|
|
for row in get_rows(fpath):
|
|
if row["Province_State"] == "Recovered":
|
|
continue
|
|
data.append({
|
|
"measurement": "covid",
|
|
"tags": {
|
|
"state": row["Province_State"],
|
|
"iso3": row["ISO3"],
|
|
"color": states_bycolor[row["Province_State"]],
|
|
"color_2020": states_bycolor_2020[row["Province_State"]]
|
|
},
|
|
"time": assigned_date or row["Last_Update"],
|
|
"fields": row
|
|
})
|
|
return data
|
|
|
|
|
|
def ingest_file(influx_client, fname, assigned_date):
|
|
d = get_data_for_influx(fname, assigned_date)
|
|
# import json
|
|
# print(json.dumps(d, indent=4))
|
|
influx_client.write_points(d)
|
|
|
|
|
|
def main():
|
|
influx_uri = urlparse("http://localhost:10019/")
|
|
influx_client = InfluxDBClient(influx_uri.hostname, str(influx_uri.port)) # user, password)
|
|
influx_client.create_database("covid")
|
|
influx_client.switch_database("covid")
|
|
|
|
when = datetime.date(month=4, day=12, year=2020)
|
|
now = datetime.date.today()
|
|
|
|
while when < now:
|
|
daystring = when.strftime("%m-%d-%Y")
|
|
fname = f"COVID-19/csse_covid_19_data/csse_covid_19_daily_reports_us/{daystring}.csv"
|
|
print(fname)
|
|
|
|
ingest_file(influx_client, fname, when.strftime("%Y-%m-%dT%H:%M:%SZ"))
|
|
|
|
when = when + datetime.timedelta(days=1)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|