-
Notifications
You must be signed in to change notification settings - Fork 0
/
csv_covid.py
550 lines (461 loc) · 24.9 KB
/
csv_covid.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
import csv
import os
import math
import requests
import matplotlib.pyplot as plt
from datetime import date, timedelta
import concurrent.futures
import sys
import requests_cache
import pathlib
# TODO add some decent comments to this code
# I know, i should use panda for CSVs, cause Fiat Pandas are reliable, but I'm too lazy, sry
SMOOTH_DATA_DAYS_FACTOR = 3 # how many days before and after should be considered to smooth data. if value is set to
# 1, each value gets smoothed with the day before and the day after
STDDEV_CRISPNESS = 2 # how the smoothness should be. if 1, 68.3% of the values are set considering the central day
MAX_CONNECTIONS = 6 # https://stackoverflow.com/questions/985431/max-parallel-http-connections-in-a-browser
SAVE_IMAGE_DPI = 300 # image saving quality
DAY_LABEL = 15 # add a label in the plot every DAY_LABEL days
# Possible future work, model with a markov chain smh
# Markov chain: no_inf -> exp beginning -> stall -> decreasing ->no_inf
# --------->
class Plot:
def __init__(self):
self.x = list()
self.y = list()
def save_plot(self, title, xlabel, ylabel, path):
return self.__plot(title, xlabel, ylabel, save=True, show=False, path=path)
def show_plot(self, title, xlabel, ylabel):
return self.__plot(title, xlabel, ylabel, save=False, show=True, path=None)
def __plot(self, title, xlabel, ylabel, save=True, show=False, path=None):
if type(path) == str and save:
path = pathlib.Path(path)
plt.plot(self.x, self.y, linestyle='dashed', linewidth=1, marker='o', markerfacecolor='blue', markersize=2)
plt.xlabel(xlabel)
plt.ylabel(ylabel)
plt.title(title)
plt.grid(True)
font = {
'weight' : 'normal',
'size' : 8}
plt.rc('font',**font)
n_values = len(self.x)
howManyLabelsToPlot = math.ceil(n_values / DAY_LABEL) # one label per DAY_LABEL
slidingWindow = (n_values - 1) % DAY_LABEL
ticks = [(slidingWindow + i * DAY_LABEL) for i in range(howManyLabelsToPlot)]
# take one tick every DAY_LABEL days. the " % len(x)" is to make it circular if the last tick is not included
lastTick = n_values - 1
assert lastTick in ticks
plt.xticks(ticks, rotation="vertical")
if save:
plt.savefig(f'{path / title}.png', dpi=SAVE_IMAGE_DPI,bbox_inches='tight')
if show:
plt.show()
plt.clf()
def append(self, x, y):
self.x.append(x)
self.y.append(y)
def fdr_norm(value, dev_std, avg=0): # fdr =
z = (value - avg) / dev_std # normalization
return 0.5 * (1 + math.erf(z / (math.sqrt(2)))) # Cumulative distribution function for norm distr
def data_norm(values, center_index=None): # we need to normalize data
assert type(values) == list # center data with day in position math.floor(len(values)/2)
if center_index is None:
center_index = math.floor(len(values) / 2) # as default it is set to its central value,
# cause we want to consider data relevance as shown here:
# https://commons.wikimedia.org/wiki/File:Gaussian_Filter.svg
normalized_value = 0
coeff_sum = 0
for i, val in enumerate(values):
upper_bound = (i - center_index + 0.5) # useless for last element
lower_bound = upper_bound - 1 # useless for first element
if i == len(values) - 1 and i == 0:
coeff = 1
elif i == len(values) - 1: # last
coeff = 1 - fdr_norm(lower_bound, STDDEV_CRISPNESS)
elif i == 0:
coeff = fdr_norm(upper_bound, STDDEV_CRISPNESS)
else:
coeff = (fdr_norm(upper_bound, STDDEV_CRISPNESS) - fdr_norm(lower_bound, STDDEV_CRISPNESS))
normalized_value += coeff * val
coeff_sum += coeff
assert 1 - sys.float_info.epsilon < coeff_sum < 1 + sys.float_info.epsilon
return normalized_value
def pre_processing(csv_as_a_list):
rename = ["Ascoli Piceno", "La Spezia", "Reggio Calabria",
"Reggio Emilia", "Vibo Valentia", "Sud Sardegna", "Friuli-Venezia Giulia", "Friuli Venezia Giulia"]
for i, row in enumerate(csv_as_a_list):
if "denominazione_regione" in row: # Provinces and regions
region_name = row["denominazione_regione"]
if "P.A." in region_name: # Allows me to have a higher precision in those provinces
csv_as_a_list[i]["denominazione_regione"] = csv_as_a_list[i]["denominazione_regione"].replace("P.A. ",
"PA-")
elif region_name == "Valle d'Aosta":
csv_as_a_list[i]["denominazione_regione"] = "Valle-d-Aosta"
elif region_name in rename:
csv_as_a_list[i]["denominazione_regione"] = csv_as_a_list[i]["denominazione_regione"].replace(" ", "-")
if "Territorio" in row:
csv_as_a_list[i]['Territorio'] = row['Territorio'].split(' /')[0] # remove dialect names
if csv_as_a_list[i]["Territorio"] == "Valle d'Aosta":
if len(row['\ufeff"ITTER107"']) == 4: # region
csv_as_a_list[i]["Territorio"] = "Valle-d-Aosta"
else:
csv_as_a_list[i]["Territorio"] = "Aosta"
elif csv_as_a_list[i]["Territorio"] == "L'Aquila":
csv_as_a_list[i]["Territorio"] = "Aquila" # to avoid problems in file naming
elif csv_as_a_list[i]["Territorio"] == "Forlì-Cesena":
csv_as_a_list[i]["Territorio"] = "Forli-Cesena" # same here
elif csv_as_a_list[i]["Territorio"] == "Monza e della Brianza":
csv_as_a_list[i]["Territorio"] = "Monza-Brianza"
elif csv_as_a_list[i]["Territorio"] == "Pesaro e Urbino":
csv_as_a_list[i]["Territorio"] = "Pesaro-Urbino"
elif csv_as_a_list[i]["Territorio"] == "Reggio di Calabria":
csv_as_a_list[i]["Territorio"] = "Reggio-Calabria"
elif csv_as_a_list[i]["Territorio"] == "Reggio nell'Emilia":
csv_as_a_list[i]["Territorio"] = "Reggio-Emilia"
elif csv_as_a_list[i]["Territorio"] in rename:
csv_as_a_list[i]["Territorio"] = csv_as_a_list[i]["Territorio"].replace(" ", "-")
# to avoid problems in file naming
elif csv_as_a_list[i]["Territorio"] == "Provincia Autonoma Bolzano" or csv_as_a_list[i][
"Territorio"] == "Provincia Autonoma Trento":
csv_as_a_list[i]["Territorio"] = csv_as_a_list[i]["Territorio"].replace("Provincia Autonoma ", "PA-")
return csv_as_a_list
def download_csv(url):
r = requests.get(url, stream=True)
file_content = bytearray()
for chunk in r.iter_content(chunk_size=1024):
if chunk:
file_content += chunk
if r.status_code != 200: # ISTAT does a 302 redirect to their warning messages
print(f"Downloading failed for {url}!")
return None
else:
csv_file_splitted = (file_content.decode("UTF-8")).splitlines()
csv_file_reader = csv.DictReader(csv_file_splitted)
csv_as_a_list = list(csv_file_reader)
# Pre processing, it is extremely ugly. I'm lazy and I don't want to refactor it now
return pre_processing(csv_as_a_list)
fn = ["data", "stato", "codice_regione", "denominazione_regione", "codice_provincia", "denominazione_provincia",
"sigla_provincia", "lat", "long", "totale_casi", "note_it", "note_en"]
def get_population_per_territory_csv(territory_names):
#url = "https://dati.istat.it/Download.ashx?type=csv&Delimiter=%2c&IncludeTimeSeriesIdentifiers=False&LabelType" \
# "=CodeAndLabel&LanguageCode=it "
#csv_as_a_list = download_csv(url)
csv_as_a_list = None
if csv_as_a_list is None: # istat services changed their format, use the offline csv
# fetch the local file
with open("DCIS_POPRES1_14102020171844268.csv", "r") as f:
csv_file_splitted = f.read().splitlines()
csv_file_reader = csv.DictReader(csv_file_splitted)
csv_as_a_list = pre_processing(list(csv_file_reader))
reduced_csv = list()
for entry in csv_as_a_list:
if "Territorio" in entry:
if entry["SEXISTAT1"] == '9' and entry["ETA1"] == "TOTAL" and entry["STATCIV2"] == '99' and entry[
"Territorio"] in territory_names: # include only stats for all sex, age and civil status
reduced_csv.append(entry)
else:
# print(f"Formato non riconosciuto. Url:{url}")
exit(-1)
return reduced_csv
def get_provinces_csv(d):
return get_provinces_csv_multithread(d)[0]
def get_regions_csv(d):
return get_regions_csv_multithread(d)[0]
def get_regions_csv_multithread(d):
assert type(d) == date
url = f"https://raw.github.com/pcm-dpc/COVID-19/master/dati-regioni/" \
f"dpc-covid19-ita-regioni-{d.year}{d.month:02}{d.day:02}.csv"
print(f"Fetching {d} regions data...")
return download_csv(url), d
def get_provinces_csv_multithread(d):
assert type(d) == date
url = f"https://raw.github.com/pcm-dpc/COVID-19/master/dati-province/" \
f"dpc-covid19-ita-province-{d.year}{d.month:02}{d.day:02}.csv"
print(f"Fetching {d} provinces data...")
return download_csv(url), d
def get_province_infections_from_csv(c):
assert type(c) == list
assert len(c) > 0
assert type(c[0]) == dict # I know, it's a dumb assert but it saved me some times
infections = {}
for row in c:
if "sigla_provincia" in row:
infections[row["sigla_provincia"]] = int(row["totale_casi"])
return infections
def diff_infections_between_csv(csv0, csv1, provinces_abbr): # convenient for batch downloads
assert csv0 is not None and csv1 is not None
assert type(csv0) == list and type(csv1) == list
assert len(csv0) > 0
infections = get_province_infections_from_csv(csv0)
infections1 = get_province_infections_from_csv(csv1)
for p in provinces_abbr:
if p not in infections or p not in infections1:
infections[p] = None
else:
infections[p] = infections1[p] - infections[p]
return infections
def diff_infections_per_date(d0, d1, provinces_abbr): # convenient for a few downloads
assert type(d0) == date and type(d1) == date
assert d0 < d1
d0_data = get_provinces_csv(d0)
d1_data = get_provinces_csv(d1)
if d0_data is not None and d1_data is not None:
return diff_infections_between_csv(d0_data, d1_data, provinces_abbr)
def diff_infections_per_day(d, provinces_abbr): # convenient for a few downloads
d0 = d - timedelta(days=1) # d0 = previous day
return diff_infections_per_date(d0, d, provinces_abbr)
def get_provinces_data_csv_indexed(starting_date, ending_date):
distance = (ending_date - starting_date).days
indexed_csv = {}
with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_CONNECTIONS) as executor:
future_results = (executor.submit(get_provinces_csv_multithread, starting_date + timedelta(days=x)) for x in
range(distance + 1))
for future in concurrent.futures.as_completed(future_results):
try:
data = future.result()
finally:
indexed_csv[data[1]] = data[0]
return indexed_csv
def get_regions_data_csv_indexed(starting_date, ending_date):
distance = (ending_date - starting_date).days
indexed_csv = {}
with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_CONNECTIONS) as executor:
future_results = (executor.submit(get_regions_csv_multithread, starting_date + timedelta(days=x)) for x in
range(distance + 1))
for future in concurrent.futures.as_completed(future_results):
data = future.result()
indexed_csv[data[1]] = data[0]
return indexed_csv
def get_prov_region_mapping(prov_csv_sample, provinces_name, provinces_abbr):
prov_region_mapping = {}
for row in prov_csv_sample:
if "sigla_provincia" in row and row['sigla_provincia'] in provinces_abbr:
corresponding_prov_name = provinces_name[provinces_abbr.index(row["sigla_provincia"])]
region_name = row["denominazione_regione"]
prov_region_mapping[corresponding_prov_name] = region_name
prov_region_mapping[row["sigla_provincia"]] = region_name # duplicated to avoid any further pain
assert len(prov_region_mapping) == 2 * len(provinces_name)
return prov_region_mapping
def get_province_ratio(pop_csv, provinces_name, provinces_abbr, regions, prov_region_mapping):
assert len(provinces_name) == len(provinces_abbr) # I assume that they are ordered properly. It sucks, I know
pop_provs = {}
pop_regions = {}
for row in pop_csv:
if row["Territorio"] in regions and len(row['\ufeff"ITTER107"']) == 4: # len=4 identifies a region
assert row["Territorio"] not in pop_regions
pop_regions[row["Territorio"]] = int(row["Value"])
if row["Territorio"] in provinces_name and len(row['\ufeff"ITTER107"']) == 5: # len=4 identifies a province
assert row["Territorio"] not in pop_provs # shouldn't have been inserted by now
assert row["Territorio"] in prov_region_mapping
# but it should be present in the province/region mapping indexed array
pop_provs[row["Territorio"]] = int(row["Value"]) # save it for long name cities
assert len(pop_regions) == len(regions)
assert len(pop_provs) == len(provinces_name)
# we got the pop data, I can calculate the ratio
ratio_per_province = {}
for p_abb, p_name in zip(provinces_abbr, provinces_name):
region = prov_region_mapping[p_abb]
pop_r = pop_regions[region]
pop_p = pop_provs[p_name] # it is indexed by long name
ratio = pop_p / pop_r
ratio_per_province[p_abb] = ratio
ratio_per_province[p_name] = ratio
# if you want to add some post processing (eg gaussian distribution), add it here
return ratio_per_province
def estimated_cumulative_tests_per_province(provinces_abbr, prov_region_mapping, ratio, daily_regs_csv):
if daily_regs_csv is not None:
tests_per_province = {}
tests_per_region = {}
for lin in daily_regs_csv:
tests_per_region[lin["denominazione_regione"]] = int(lin["tamponi"])
for p in provinces_abbr:
tests_per_province[p] = tests_per_region[prov_region_mapping[p]] * ratio[p]
return tests_per_province
return None
def estimated_daily_tests_per_province(provinces_abbr, prov_region_mapping, ratio, daily_regs_csv0, daily_regs_csv):
tests_per_province = {}
t0 = estimated_cumulative_tests_per_province(provinces_abbr, prov_region_mapping, ratio, daily_regs_csv0)
t = estimated_cumulative_tests_per_province(provinces_abbr, prov_region_mapping, ratio, daily_regs_csv)
if t is not None and t0 is not None:
for p in provinces_abbr:
tests_per_province[p] = t[p] - t0[p]
return tests_per_province
return None
def save_graphs_multiprocess(plots, provinces_name, provinces_abbr):
total_saves = len(plots) * len(provinces_name)
path = pathlib.Path(__file__).parent
fr = list()
with concurrent.futures.ProcessPoolExecutor(max_workers=max(1, len(os.sched_getaffinity(
0)) - 1)) as executor: # to avoid making the system completely unusable, I have set it to nproc -1
for k in plots:
for i, (p_name, p_abbr) in enumerate(zip(provinces_name, provinces_abbr)):
sys.stdout.flush()
if k == "infect":
fr.append(executor.submit(plots[k][p_abbr].save_plot,
f'Covid new infections per day in {p_name} {p_abbr}',
'Day', 'New infections', path / 'Covid'))
elif k == "infect_n":
fr.append(executor.submit(plots[k][p_abbr].save_plot,
f'Covid new infections per day in {p_name} {p_abbr} normalized',
'Day', 'New infections', path / 'Covid_n'))
elif k == "infects per tests":
fr.append(
executor.submit(plots[k][p_abbr].save_plot, f'Covid infections per tests in {p_name} {p_abbr}',
'Day', '% new infections/tests', path / 'Covid_infection_per_test_est'))
elif k == "infects per tests_n":
fr.append(executor.submit(plots[k][p_abbr].save_plot,
f'Covid infections per tests in {p_name} {p_abbr} normalized',
'Day', '% new infections/tests', path / 'Covid_infection_per_test_est_n'))
elif k == "tests":
fr.append(executor.submit(plots[k][p_abbr].save_plot,
f'Covid estimated tests per day in {p_name} {p_abbr}',
'Day', 'Tests', path / 'Covid_Tests_est'))
elif k == "tests_n":
fr.append(executor.submit(plots[k][p_abbr].save_plot,
f'Covid estimated tests per day in {p_name} {p_abbr} normalized', 'Day',
'Tests', path / 'Covid_Tests_est_n'))
j = 0
sys.stdout.write(f"\r0% done")
sys.stdout.flush()
for future in concurrent.futures.as_completed(fr):
try:
future.result()
finally:
j += 1
sys.stdout.write(f"\r{int(j / total_saves * 100)}% done")
sys.stdout.flush()
def get_provinces_abbr_list(filename="provinces.txt"):
provinces_abbr = list()
with open(filename, "r") as f:
for lin in f.read().splitlines():
tmp = lin.split(' ')
provinces_abbr.append(tmp[1])
return provinces_abbr
def get_provinces_name_list(filename="provinces.txt"):
provinces_name = list()
with open(filename, "r") as f:
for lin in f.read().splitlines():
tmp = lin.split(' ')
provinces_name.append(tmp[0])
return provinces_name
def get_regions_list(filename="regions.txt"):
with open(filename, "r") as f:
regions = list(f.read().splitlines())
return regions
def main():
starting_date = date(2020, 2, 24)
ending_date = date.today()
provinces_name = get_provinces_name_list()
provinces_abbr = get_provinces_abbr_list()
regions = get_regions_list()
pop_csv = get_population_per_territory_csv(provinces_name + regions)
prov_region_mapping = get_prov_region_mapping(get_provinces_csv(date(2020, 2, 26)), provinces_name,
provinces_abbr) # provides a mapping for each province to its region
ratio = get_province_ratio(pop_csv, provinces_name, provinces_abbr, regions, prov_region_mapping)
provs_indexed_csv = get_provinces_data_csv_indexed(starting_date, ending_date)
regs_indexed_csv = get_regions_data_csv_indexed(starting_date, ending_date)
plots = {}
categories = ["infect", "infects per tests", "tests", "infect_n", "infects per tests_n", "tests_n"]
for c in categories:
plots[c] = {p: Plot() for p in provinces_abbr}
# for normalization
distance = (ending_date - starting_date).days
window_size = SMOOTH_DATA_DAYS_FACTOR * 2 + 1 # smooth data over
infection_window = list()
test_window = list()
last_index = distance - 2
central_element_index = math.floor(window_size / 2)
for x in range(distance):
d0 = starting_date + timedelta(days=x)
d = starting_date + timedelta(days=x + 1)
tests = estimated_daily_tests_per_province(provinces_abbr, prov_region_mapping, ratio, regs_indexed_csv[d0],
regs_indexed_csv[d])
if provs_indexed_csv[d] is not None and provs_indexed_csv[d0] is not None:
newInfections = diff_infections_between_csv(provs_indexed_csv[d0], provs_indexed_csv[d], provinces_abbr)
else:
continue
# window management
test_window.append(tests)
if len(test_window) > window_size:
test_window.pop(0)
infection_window.append(newInfections)
if len(infection_window) > window_size:
infection_window.pop(0)
for p_a, p_n in zip(provinces_abbr, provinces_name):
# classical plots
formatted_date_str = f"{d.day:02}/{d.month:02}/{d.year%100:02}"
if newInfections[p_a] is not None:
plots["infect"][p_a].append(formatted_date_str, newInfections[p_a])
if tests[p_a] is not None:
if newInfections[p_a] == 0:
plots["infects per tests"][p_a].append(formatted_date_str, 0)
else:
if tests[p_a] == 0: # I know, it's bad
plots["infects per tests"][p_a].append(formatted_date_str, 100)
else:
val = newInfections[p_a] / tests[p_a]
if val > 1: # I know, it's bad
val = 1
elif val < 0: # I know, it's bad
val = 0
plots["infects per tests"][p_a].append(formatted_date_str, val * 100)
plots["tests"][p_a].append(formatted_date_str, tests[p_a])
# normalized plots
province_inf_window = [ni[p_a] for ni in infection_window]
# todo refactor this sh1t
province_test_window = [ni[p_a] for ni in test_window]
# if the window had been filled for the first time
if x == window_size - 1:
skipped_days = math.ceil(window_size / 2) # days still not registered
for sd in range(skipped_days):
iterator_d = d - timedelta(days=skipped_days - (sd + 1))
formatted_date_str = f"{iterator_d.day:02}/{iterator_d.month:02}/{iterator_d.year%100:02}"
inf_val = data_norm(province_inf_window, sd)
plots["infect_n"][p_a].append(formatted_date_str, inf_val)
test_val = data_norm(province_test_window, sd)
plots["tests_n"][p_a].append(formatted_date_str, test_val)
if test_val == 0 and inf_val > 0: # it is bad, i know
rat_val = 100
elif test_val == 0 and inf_val <= 0: # adjustments
rat_val = 0
elif inf_val / test_val > 1: # still impossible, but it may happen with imprecise data
rat_val = 100
else:
rat_val = inf_val / test_val * 100
plots["infects per tests_n"][p_a].append(formatted_date_str, rat_val)
elif x >= window_size and x + central_element_index <= last_index: #
inf_val = data_norm(province_inf_window)
plots["infect_n"][p_a].append(formatted_date_str, inf_val)
test_val = data_norm(province_test_window)
plots["tests_n"][p_a].append(formatted_date_str, test_val)
if test_val == 0 and inf_val > 0: # it is bad, i know
rat_val = 100
elif test_val == 0 and inf_val <= 0: # adjustments
rat_val = 0
elif inf_val / test_val > 1: # still impossible, but it may happen with imprecise data
rat_val = 100
else:
rat_val = inf_val / test_val * 100
plots["infects per tests_n"][p_a].append(formatted_date_str, rat_val)
elif x + central_element_index > last_index: # emptying the window, it is less precise
index = central_element_index + x
inf_val = data_norm(province_inf_window, index)
plots["infect_n"][p_a].append(formatted_date_str, inf_val)
test_val = data_norm(province_test_window, index)
plots["tests_n"][p_a].append(formatted_date_str, test_val)
if test_val == 0 and inf_val > 0: # it is bad, i know
rat_val = 100
elif test_val == 0 and inf_val <= 0: # adjustments
rat_val = 0
elif inf_val / test_val > 1: # still impossible, but it may happen with imprecise data
rat_val = 100
else:
rat_val = inf_val / test_val * 100
plots["infects per tests_n"][p_a].append(formatted_date_str, rat_val)
print("Saving infection graphs, may require some time...")
save_graphs_multiprocess(plots, provinces_name, provinces_abbr)
print("\nDone!")
if __name__ == "__main__":
requests_cache.install_cache('req_cache.sqlite')
main()