my tools for viewing covid data
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

176 lines
4.1 KiB

  1. #!/usr/bin/env python3
  2. import csv
  3. from influxdb import InfluxDBClient
  4. from urllib.parse import urlparse
  5. import datetime
  6. row_fields = ["State", "Country", "Last_Update", "Confirmed", "Deaths", "Recovered", "Active", "Latitude", "Longitude"]
  7. f2int = lambda x: int(float(x))
  8. row_fields = {
  9. 'Hospitalization_Rate': float,
  10. 'People_Hospitalized': f2int,
  11. 'Incident_Rate': float,
  12. 'Province_State': str,
  13. 'FIPS': f2int,
  14. 'People_Tested': f2int,
  15. 'Lat': float,
  16. 'Long_': float,
  17. 'ISO3': str,
  18. 'Testing_Rate': float,
  19. 'Deaths': f2int,
  20. 'Mortality_Rate': float,
  21. 'Recovered': f2int,
  22. 'Confirmed': f2int,
  23. 'UID': f2int,
  24. 'Last_Update': None,
  25. 'Active': f2int,
  26. 'Country_Region': str,
  27. 'Total_Test_Results': f2int,
  28. 'Case_Fatality_Ratio': float,
  29. }
  30. # https://www.nytimes.com/elections/2016/results/president
  31. states = {
  32. 'red': {
  33. 'Georgia',
  34. 'Ohio',
  35. 'Montana',
  36. 'Pennsylvania',
  37. 'South Dakota',
  38. 'Tennessee',
  39. 'Nebraska',
  40. 'North Dakota',
  41. 'Mississippi',
  42. 'Utah',
  43. 'Missouri',
  44. 'Alaska',
  45. 'Idaho',
  46. 'Arkansas',
  47. 'Wyoming',
  48. 'Alabama',
  49. 'Indiana',
  50. 'Kentucky',
  51. 'Louisiana',
  52. 'Kansas',
  53. 'Florida',
  54. 'Iowa',
  55. 'Oklahoma',
  56. 'Texas',
  57. 'West Virginia',
  58. 'Arizona',
  59. 'South Carolina',
  60. 'Wisconsin',
  61. 'North Carolina',
  62. 'Michigan',
  63. },
  64. 'blue': {
  65. 'Minnesota',
  66. 'New Mexico',
  67. 'Oregon',
  68. 'Nevada',
  69. 'New Jersey',
  70. 'Colorado',
  71. 'Washington',
  72. 'New Hampshire',
  73. 'District of Columbia',
  74. 'Maryland',
  75. 'Virginia',
  76. 'California',
  77. 'Hawaii',
  78. 'Massachusetts',
  79. 'New York',
  80. 'Rhode Island',
  81. 'Vermont',
  82. 'Connecticut',
  83. 'Delaware',
  84. 'Illinois',
  85. 'Maine',
  86. },
  87. 'other': {
  88. 'American Samoa',
  89. 'Guam',
  90. 'Puerto Rico',
  91. 'Diamond Princess',
  92. 'Virgin Islands',
  93. 'Grand Princess',
  94. 'Northern Mariana Islands',
  95. }
  96. }
  97. states_bycolor = {}
  98. for color, states in states.items():
  99. for state in states:
  100. states_bycolor[state] = color
  101. def convert(func, inp):
  102. if inp == "":
  103. return func(0)
  104. return func(inp)
  105. def get_rows(fpath):
  106. first = True
  107. headers = None
  108. with open(fpath, "r") as f:
  109. r = csv.reader(f)
  110. for line in r:
  111. if first:
  112. first = False
  113. headers = line
  114. continue
  115. yield {headers[i]: convert(row_fields[headers[i]], line[i])
  116. for i in range(0, len(headers))
  117. if row_fields[headers[i]]}
  118. def get_data_for_influx(fpath, assigned_date=None):
  119. data = []
  120. for row in get_rows(fpath):
  121. if row["Province_State"] == "Recovered":
  122. continue
  123. data.append({
  124. "measurement": "covid",
  125. "tags": {
  126. "state": row["Province_State"],
  127. "iso3": row["ISO3"],
  128. "color": states_bycolor[row["Province_State"]]
  129. },
  130. "time": assigned_date or row["Last_Update"],
  131. "fields": row
  132. })
  133. return data
  134. def ingest_file(influx_client, fname, assigned_date):
  135. d = get_data_for_influx(fname, assigned_date)
  136. # import json
  137. # print(json.dumps(d, indent=4))
  138. influx_client.write_points(d)
  139. def main():
  140. influx_uri = urlparse("http://localhost:10019/")
  141. influx_client = InfluxDBClient(influx_uri.hostname, str(influx_uri.port)) # user, password)
  142. influx_client.create_database("covid")
  143. influx_client.switch_database("covid")
  144. when = datetime.date(month=4, day=12, year=2020)
  145. now = datetime.date.today()
  146. while when < now:
  147. daystring = when.strftime("%m-%d-%Y")
  148. fname = f"COVID-19/csse_covid_19_data/csse_covid_19_daily_reports_us/{daystring}.csv"
  149. print(fname)
  150. ingest_file(influx_client, fname, when.strftime("%Y-%m-%dT%H:%M:%SZ"))
  151. when = when + datetime.timedelta(days=1)
  152. if __name__ == '__main__':
  153. main()