forked from rodralez/covid-19
-
Notifications
You must be signed in to change notification settings - Fork 0
/
improve_covid_global_hopkins.m
113 lines (76 loc) · 3.05 KB
/
improve_covid_global_hopkins.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
function tableData = improve_covid_global_hopkins( tableData, filename_time )
% improve_covid_global_hopkins
% get_covid_global_hopkins() sorts and add data to the global data from the COVID-19 epidemy from the
% John Hopkins university [1]. e.
%
% References:
% [1] https://github.com/CSSEGISandData/COVID-19
%
% Version: 001
% Date: 2020/04/02
% Author: Rodrigo Gonzalez <[email protected]>
% URL: https://github.com/rodralez/navego
tableData = sortrows(tableData, 2);
%% Some countries have only data for provinces but not for the whole country as China.
idx = 1;
tsize = size(tableData, 1);
while ( idx < tsize )
country = tableData.CountryRegion(idx);
ldx = contains( tableData.CountryRegion, country );
province_count = sum( ldx );
mdx = ismissing( tableData.ProvinceState(ldx), '');
province_missing = sum( mdx );
% if there are several rows for a country but no row for the country only...
if (province_count > 1 & province_missing == 0)
country_total = sum ( table2array(tableData(ldx, 5:end) ) );
t1 = table( {''}, country, 0, 0 );
t2 = [t1, array2table( country_total ) ];
t2.Properties.VariableNames = tableData.Properties.VariableNames;
tableData = [tableData; t2];
idx = idx + province_count;
% if there are several rows for a country and a row for the country only...
elseif (province_count > 1 & province_missing == 1)
idx = idx + province_count;
% if there is a only one row for the country...
else
idx = idx + 1;
end
end
%% ADD POPULATION
filename = './hopkins/WPP2019_TotalPopulationBySex.csv';
opts = detectImportOptions(filename);
tablePop = readtable(filename, opts);
% Only year 2020
tablePop( ~ismember (tablePop.Time, 2020) , :)=[];
% Only medium values
tablePop( ~ismember (tablePop.Variant, 'Medium') , :)=[];
for idx = 1:size(tableData, 1)
country = tableData.CountryRegion( idx );
% Special cases
if (strcmp( country, 'US')), country = 'United States of America'; end
if (strcmp( country, 'Vietnam')), country = 'Viet Nam'; end
if (strcmp( country, 'Korea, South')), country = 'Republic of Korea'; end
ldx = strcmp( tablePop.Location, country );
NPop = tablePop.PopTotal ( ldx ) * 1000;
if (~ any(ldx))
ldx = contains( tablePop.Location, country );
NPop = tablePop.PopTotal ( ldx ) * 1000;
if (~ any(ldx))
NPop = Inf;
end
end
tableData.Population (idx) = NPop;
end
%%
tableData = sortrows(tableData, 2);
%% CHANGE VariableNames WITH DATETIME
fid = fopen(filename_time);
time_str = textscan( fid, repmat('%s', 1, size(tableData , 2) + 1 ), 1, 'Delimiter',',');
FIRST_DAY = datetime( [time_str{5}] ) + years(2000);
fclose(fid);
DAYS = size(time_str, 2) - 4;
% Daytime cells
for ddx = 1:DAYS
time_s = ['Day_',datestr(FIRST_DAY+ddx-1,'dd_mm_yy') ];
tableData.Properties.VariableNames(3+ddx) = { time_s };
end