my tools for viewing covid data
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

174 lines
4.0 KiB

  1. #!/usr/bin/env python3
  2. import csv
  3. from influxdb import InfluxDBClient
  4. from urllib.parse import urlparse
  5. import datetime
  6. row_fields = ["State", "Country", "Last_Update", "Confirmed", "Deaths", "Recovered", "Active", "Latitude", "Longitude"]
  7. f2int = lambda x: int(float(x))
  8. row_fields = {
  9. 'Hospitalization_Rate': float,
  10. 'People_Hospitalized': f2int,
  11. 'Incident_Rate': float,
  12. 'Province_State': str,
  13. 'FIPS': f2int,
  14. 'People_Tested': f2int,
  15. 'Lat': float,
  16. 'Long_': float,
  17. 'ISO3': str,
  18. 'Testing_Rate': float,
  19. 'Deaths': f2int,
  20. 'Mortality_Rate': float,
  21. 'Recovered': f2int,
  22. 'Confirmed': f2int,
  23. 'UID': f2int,
  24. 'Last_Update': None,
  25. 'Active': f2int,
  26. 'Country_Region': str,
  27. }
  28. # https://www.nytimes.com/elections/2016/results/president
  29. states = {
  30. 'red': {
  31. 'Georgia',
  32. 'Ohio',
  33. 'Montana',
  34. 'Pennsylvania',
  35. 'South Dakota',
  36. 'Tennessee',
  37. 'Nebraska',
  38. 'North Dakota',
  39. 'Mississippi',
  40. 'Utah',
  41. 'Missouri',
  42. 'Alaska',
  43. 'Idaho',
  44. 'Arkansas',
  45. 'Wyoming',
  46. 'Alabama',
  47. 'Indiana',
  48. 'Kentucky',
  49. 'Louisiana',
  50. 'Kansas',
  51. 'Florida',
  52. 'Iowa',
  53. 'Oklahoma',
  54. 'Texas',
  55. 'West Virginia',
  56. 'Arizona',
  57. 'South Carolina',
  58. 'Wisconsin',
  59. 'North Carolina',
  60. 'Michigan',
  61. },
  62. 'blue': {
  63. 'Minnesota',
  64. 'New Mexico',
  65. 'Oregon',
  66. 'Nevada',
  67. 'New Jersey',
  68. 'Colorado',
  69. 'Washington',
  70. 'New Hampshire',
  71. 'District of Columbia',
  72. 'Maryland',
  73. 'Virginia',
  74. 'California',
  75. 'Hawaii',
  76. 'Massachusetts',
  77. 'New York',
  78. 'Rhode Island',
  79. 'Vermont',
  80. 'Connecticut',
  81. 'Delaware',
  82. 'Illinois',
  83. 'Maine',
  84. },
  85. 'other': {
  86. 'American Samoa',
  87. 'Guam',
  88. 'Puerto Rico',
  89. 'Diamond Princess',
  90. 'Virgin Islands',
  91. 'Grand Princess',
  92. 'Northern Mariana Islands',
  93. }
  94. }
  95. states_bycolor = {}
  96. for color, states in states.items():
  97. for state in states:
  98. states_bycolor[state] = color
  99. def convert(func, inp):
  100. if inp == "":
  101. return func(0)
  102. return func(inp)
  103. def get_rows(fpath):
  104. first = True
  105. headers = None
  106. with open(fpath, "r") as f:
  107. r = csv.reader(f)
  108. for line in r:
  109. if first:
  110. first = False
  111. headers = line
  112. continue
  113. yield {headers[i]: convert(row_fields[headers[i]], line[i])
  114. for i in range(0, len(headers))
  115. if row_fields[headers[i]]}
  116. def get_data_for_influx(fpath, assigned_date=None):
  117. data = []
  118. for row in get_rows(fpath):
  119. if row["Province_State"] == "Recovered":
  120. continue
  121. data.append({
  122. "measurement": "covid",
  123. "tags": {
  124. "state": row["Province_State"],
  125. "iso3": row["ISO3"],
  126. "color": states_bycolor[row["Province_State"]]
  127. },
  128. "time": assigned_date or row["Last_Update"],
  129. "fields": row
  130. })
  131. return data
  132. def ingest_file(influx_client, fname, assigned_date):
  133. d = get_data_for_influx(fname, assigned_date)
  134. # import json
  135. # print(json.dumps(d, indent=4))
  136. influx_client.write_points(d)
  137. def main():
  138. influx_uri = urlparse("http://localhost:10019/")
  139. influx_client = InfluxDBClient(influx_uri.hostname, str(influx_uri.port)) # user, password)
  140. influx_client.create_database("covid")
  141. influx_client.switch_database("covid")
  142. when = datetime.date(month=4, day=12, year=2020)
  143. now = datetime.date.today()
  144. while when < now:
  145. daystring = when.strftime("%m-%d-%Y")
  146. fname = f"COVID-19/csse_covid_19_data/csse_covid_19_daily_reports_us/{daystring}.csv"
  147. print(fname)
  148. ingest_file(influx_client, fname, when.strftime("%Y-%m-%dT%H:%M:%SZ"))
  149. when = when + datetime.timedelta(days=1)
  150. if __name__ == '__main__':
  151. main()