Globale trends¶

Data importeren¶

In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from datetime import date,datetime,timedelta
from matplotlib.ticker import ScalarFormatter
import numpy as np
from matplotlib.colors import LinearSegmentedColormap, to_rgba_array
In [2]:
gemeentes = pd.read_csv('https://data.rivm.nl/covid-19/COVID-19_aantallen_gemeente_cumulatief.csv', parse_dates=['Date_of_report', 'Date_of_publication'], sep=';').drop(columns=['Hospital_admission', 'Deceased', 'Version'])
gemeentes.loc[gemeentes.Municipality_code.isnull(), 'Municipality_code'] = 'Onbekend'
gemeentes.loc[gemeentes.Province.isnull(), 'Province'] = 'Onbekend'
gemeentes.loc[gemeentes.Province == 'Friesland', 'Province'] = 'Fryslân'
gemeentes = gemeentes.fillna(0)
gemeentes.Date_of_report = gemeentes.Date_of_publication
gemeentes
Out[2]:
Date_of_report Date_of_publication Municipality_code Municipality_name Province Total_reported
0 2020-03-13 10:00:00 2020-03-13 10:00:00 GM0003 Appingedam Groningen 0
1 2020-03-13 10:00:00 2020-03-13 10:00:00 GM0010 Delfzijl Groningen 0
2 2020-03-13 10:00:00 2020-03-13 10:00:00 GM0014 Groningen Groningen 3
3 2020-03-13 10:00:00 2020-03-13 10:00:00 GM0024 Loppersum Groningen 0
4 2020-03-13 10:00:00 2020-03-13 10:00:00 GM0034 Almere Flevoland 1
... ... ... ... ... ... ...
403201 2023-03-31 10:00:00 2023-03-31 10:00:00 Onbekend 0 Overijssel 4871
403202 2023-03-31 10:00:00 2023-03-31 10:00:00 Onbekend 0 Utrecht 4751
403203 2023-03-31 10:00:00 2023-03-31 10:00:00 Onbekend 0 Zeeland 1607
403204 2023-03-31 10:00:00 2023-03-31 10:00:00 Onbekend 0 Zuid-Holland 17335
403205 2023-03-31 10:00:00 2023-03-31 10:00:00 Onbekend 0 Onbekend 139

403206 rows × 6 columns

In [3]:
totalPositiveTestRatio = pd.read_csv('https://raw.githubusercontent.com/mzelst/covid-19/master/data-dashboards/percentage-positive-daily-national.csv', parse_dates=['date'])
totalPositiveTestRatio['infected_percentage_7d'] =  totalPositiveTestRatio['values.infected_percentage'].rolling(7).mean()
totalPositiveTestRatio['total_7d'] =  totalPositiveTestRatio['values.tested_total'].rolling(7).mean()
totalPositiveTestRatio['positive_7d'] =  totalPositiveTestRatio['values.infected'].rolling(7).mean()
totalPositiveTestRatio
Out[3]:
date values.tested_total values.infected values.infected_percentage tests.7d.avg tests.7d_lagged_7 pos.rate.3d.avg pos.rate.7d.avg values.infected_7d.avg values.infected_7d_lagged growth_infected growth_tests infected_percentage_7d total_7d positive_7d
0 2020-06-01 1551 73 4.706641 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1 2020-06-02 6821 203 2.976103 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2 2020-06-03 8880 165 1.858108 NaN NaN 2.6 NaN NaN NaN NaN NaN NaN NaN NaN
3 2020-06-04 9352 173 1.849872 NaN NaN 2.2 NaN NaN NaN NaN NaN NaN NaN NaN
4 2020-06-05 9478 135 1.424351 NaN NaN 1.7 NaN NaN NaN NaN NaN NaN NaN NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1015 2023-03-13 108 72 66.666667 171.428571 199.571429 62.3 62.3 62.354688 59.827485 104.2 85.9 62.354688 171.428571 106.714286
1016 2023-03-14 98 57 58.163265 154.142857 194.571429 62.1 61.9 61.792233 60.037562 102.9 79.2 61.792233 154.142857 95.428571
1017 2023-03-15 116 77 66.379310 131.142857 200.000000 64.0 63.8 63.074888 60.467193 104.3 65.6 63.074888 131.142857 83.714286
1018 2023-03-16 113 49 43.362832 119.857143 200.428571 56.0 60.7 59.894578 60.998655 98.2 59.8 59.894578 119.857143 72.714286
1019 2023-03-17 144 86 59.722222 106.714286 202.428571 56.8 59.0 59.104290 61.311680 96.4 52.7 59.104290 106.714286 63.000000

1020 rows × 15 columns

In [4]:
sewer_detail = pd.read_csv('https://data.rivm.nl/covid-19/COVID-19_rioolwaterdata.csv', parse_dates=['Date_measurement'], sep=';').drop(columns=['Date_of_report', 'Version', 'RWZI_AWZI_code'])
sewer_detail.rename(columns={'Date_measurement': 'Date', 'RWZI_AWZI_name':'Station'}, inplace=True)
dateOfStatsDetail = sewer_detail.Date.max()
sewer_detail
Out[4]:
Date Station RNA_flow_per_100000
0 2020-03-30 Tilburg 34621959326395
1 2020-03-31 Assen 41199039310795
2 2020-03-31 Garmerwolde 10473596079208
3 2020-03-31 Leeuwarden 11002680177775
4 2020-03-31 Stadskanaal 3802954816430
... ... ... ...
117936 2023-04-19 Asten 14144676039699
117937 2023-04-19 Dinteloord 7575368694212
117938 2023-04-19 Dinther 12578231882919
117939 2023-04-19 Dokhaven 44366369820223
117940 2023-04-16 Nieuw-Vossemeer 10721634301093

117941 rows × 3 columns

In [5]:
opnames_nice = pd.read_csv("https://github.com/mzelst/covid-19/raw/master/data-nice/age/leeftijdsverdeling_datum_Klinisch_IC_long.csv", parse_dates=["Datum"])
opnames_nice = opnames_nice[opnames_nice.Type == "Klinisch"].sort_values("Datum")
opnames_nice.drop(columns=['Type'], inplace=True)
opnames_nice.rename(columns={'Datum': 'Date'}, inplace=True)
opnames_nice
Out[5]:
Date <20 20 - 24 25 - 29 30 - 34 35 - 39 40 - 44 45 - 49 50 - 54 55 - 59 60 - 64 65 - 69 70 - 74 75 - 79 80 - 84 85 - 89 >90
185 2020-11-03 507 151 290 353 440 562 935 1479 1723 1944 1979 2617 2739 2466 1646 657
187 2020-11-04 523 151 290 361 445 568 940 1485 1747 1976 1997 2641 2759 2494 1676 662
189 2020-11-05 531 151 293 363 444 575 953 1494 1759 1994 2007 2687 2799 2532 1697 673
191 2020-11-06 529 151 299 366 448 573 958 1507 1776 2020 2025 2688 2822 2564 1714 678
193 2020-11-07 531 154 299 368 453 585 973 1514 1796 2022 2041 2703 2850 2582 1729 681
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1718 2023-03-22 5013 1233 2102 2874 2923 3201 4557 7127 8949 10111 11460 14676 16520 15338 10986 5361
1720 2023-03-23 5023 1234 2104 2873 2925 3203 4561 7132 8954 10130 11473 14706 16543 15365 11013 5367
1722 2023-03-24 5027 1234 2104 2876 2925 3205 4562 7137 8962 10139 11477 14719 16562 15387 11026 5380
1724 2023-03-27 5034 1236 2105 2882 2926 3205 4566 7144 8969 10155 11497 14762 16612 15427 11054 5400
1726 2023-03-28 5039 1236 2106 2883 2929 3205 4567 7148 8971 10158 11510 14781 16631 15452 11066 5406

772 rows × 17 columns

In [6]:
#display(pd.read_csv("https://raw.githubusercontent.com/mzelst/covid-19/master/data/lcps_by_day.csv", parse_dates=["date"]))
opnames_lcps = pd.read_csv("https://raw.githubusercontent.com/mzelst/covid-19/master/data/lcps_by_day.csv", parse_dates=["date"])[["date", "kliniek_bezetting_covid", "IC_bezetting_covid"]].sort_values("date")
opnames_lcps
Out[6]:
date kliniek_bezetting_covid IC_bezetting_covid
844 2021-01-01 1780.0 642.0
843 2021-01-02 1819.0 650.0
842 2021-01-03 1826.0 643.0
841 2021-01-04 1895.0 651.0
840 2021-01-05 1905.0 661.0
... ... ... ...
4 2023-04-21 323.0 23.0
3 2023-04-22 NaN NaN
2 2023-04-23 NaN NaN
1 2023-04-24 357.0 22.0
0 2023-04-25 333.0 24.0

845 rows × 3 columns

Data voorbereiden¶

In [7]:
from datetime import datetime
totals = gemeentes.groupby(by=['Date_of_report']).sum().diff().reset_index()
totals['rolling'] = totals.Total_reported.rolling(window=7).mean()
#totals['rolling_100k'] = totals['rolling'] * (100000 / 17454856) 
totals['rolling14'] = totals.Total_reported.rolling(window=14).mean()
#totals['rolling14_100k'] = totals.rolling14 *  (100000 / 17454856)
totals['Growth'] = totals.Total_reported.rolling(window=14).mean().pct_change().rolling(window = 14).mean() * 100
totals
Out[7]:
Date_of_report Total_reported rolling rolling14 Growth
0 2020-03-13 10:00:00 NaN NaN NaN NaN
1 2020-03-14 10:00:00 155.0 NaN NaN NaN
2 2020-03-15 10:00:00 176.0 NaN NaN NaN
3 2020-03-16 10:00:00 278.0 NaN NaN NaN
4 2020-03-17 10:00:00 292.0 NaN NaN NaN
... ... ... ... ... ...
1109 2023-03-27 10:00:00 187.0 286.714286 337.000000 -4.026833
1110 2023-03-28 10:00:00 473.0 264.285714 303.285714 -4.662717
1111 2023-03-29 10:00:00 297.0 251.285714 299.357143 -4.425063
1112 2023-03-30 10:00:00 182.0 236.142857 281.428571 -4.716153
1113 2023-03-31 10:00:00 203.0 219.714286 261.785714 -5.143211

1114 rows × 5 columns

In [8]:
perWeek = totals.copy().drop(columns=['Growth'])
perWeek['week'] = perWeek.Date_of_report.dt.isocalendar().week
perWeek['year'] = perWeek.Date_of_report.dt.isocalendar().year
weekTotals = perWeek.groupby(by=['year', 'week']).mean()
weekTotals.drop(columns=['rolling', 'rolling14'], inplace=True)
weekTotals.rename(columns={'Total_reported': 'WeekTotalReported', 'Hospital_admission': 'WeekHospitalAdmission', 'Deceased' : 'WeekDeceased'}, inplace=True)
perWeek = perWeek.set_index(['year','week']).join(weekTotals).reset_index()
perWeek['Reported_ratio'] = (perWeek.Total_reported / perWeek.WeekTotalReported)

dayOfWeek={0:'Maandag', 1:'Dinsdag', 2:'Woensdag', 3:'Donderdag', 4:'Vrijdag', 5:'Zaterdag', 6:'Zondag'}
perWeek['weekday'] = perWeek.Date_of_report.dt.dayofweek.map(dayOfWeek)
perWeek['month'] = perWeek.Date_of_report.dt.month
perWeek['date'] = pd.to_datetime(perWeek.year.apply(str) + '-W' + perWeek.week.apply(str) + '-7', format='%G-W%V-%u')
display(perWeek)

dayToWeekAvg = totals.copy()
dayToWeekAvg['week'] = dayToWeekAvg.Date_of_report.dt.isocalendar().week
dayToWeekAvg['year'] = dayToWeekAvg.Date_of_report.dt.isocalendar().year
dayToWeekAvg['date'] = pd.to_datetime(dayToWeekAvg.year.apply(str) + '-W' + dayToWeekAvg.week.apply(str) + '-7', format='%G-W%V-%u')
dayToWeekAvg['RatioWeekAvg'] = dayToWeekAvg.Total_reported / dayToWeekAvg['rolling'].shift(1)
dayToWeekAvg['AboveAverage'] = dayToWeekAvg.RatioWeekAvg > 1
dayToWeekAvg['weekday'] = dayToWeekAvg.Date_of_report.dt.dayofweek.map(dayOfWeek)
display(dayToWeekAvg)
year week Date_of_report Total_reported rolling rolling14 WeekTotalReported Reported_ratio weekday month date
0 2020 11 2020-03-13 10:00:00 NaN NaN NaN 165.500000 NaN Vrijdag 3 2020-03-15
1 2020 11 2020-03-14 10:00:00 155.0 NaN NaN 165.500000 0.936556 Zaterdag 3 2020-03-15
2 2020 11 2020-03-15 10:00:00 176.0 NaN NaN 165.500000 1.063444 Zondag 3 2020-03-15
3 2020 12 2020-03-16 10:00:00 278.0 NaN NaN 438.428571 0.634083 Maandag 3 2020-03-22
4 2020 12 2020-03-17 10:00:00 292.0 NaN NaN 438.428571 0.666015 Dinsdag 3 2020-03-22
... ... ... ... ... ... ... ... ... ... ... ...
1109 2023 13 2023-03-27 10:00:00 187.0 286.714286 337.000000 268.400000 0.696721 Maandag 3 2023-04-02
1110 2023 13 2023-03-28 10:00:00 473.0 264.285714 303.285714 268.400000 1.762295 Dinsdag 3 2023-04-02
1111 2023 13 2023-03-29 10:00:00 297.0 251.285714 299.357143 268.400000 1.106557 Woensdag 3 2023-04-02
1112 2023 13 2023-03-30 10:00:00 182.0 236.142857 281.428571 268.400000 0.678092 Donderdag 3 2023-04-02
1113 2023 13 2023-03-31 10:00:00 203.0 219.714286 261.785714 268.400000 0.756334 Vrijdag 3 2023-04-02

1114 rows × 11 columns

Date_of_report Total_reported rolling rolling14 Growth week year date RatioWeekAvg AboveAverage weekday
0 2020-03-13 10:00:00 NaN NaN NaN NaN 11 2020 2020-03-15 NaN False Vrijdag
1 2020-03-14 10:00:00 155.0 NaN NaN NaN 11 2020 2020-03-15 NaN False Zaterdag
2 2020-03-15 10:00:00 176.0 NaN NaN NaN 11 2020 2020-03-15 NaN False Zondag
3 2020-03-16 10:00:00 278.0 NaN NaN NaN 12 2020 2020-03-22 NaN False Maandag
4 2020-03-17 10:00:00 292.0 NaN NaN NaN 12 2020 2020-03-22 NaN False Dinsdag
... ... ... ... ... ... ... ... ... ... ... ...
1109 2023-03-27 10:00:00 187.0 286.714286 337.000000 -4.026833 13 2023 2023-04-02 0.642927 False Maandag
1110 2023-03-28 10:00:00 473.0 264.285714 303.285714 -4.662717 13 2023 2023-04-02 1.649726 True Dinsdag
1111 2023-03-29 10:00:00 297.0 251.285714 299.357143 -4.425063 13 2023 2023-04-02 1.123784 True Woensdag
1112 2023-03-30 10:00:00 182.0 236.142857 281.428571 -4.716153 13 2023 2023-04-02 0.724275 False Donderdag
1113 2023-03-31 10:00:00 203.0 219.714286 261.785714 -5.143211 13 2023 2023-04-02 0.859649 False Vrijdag

1114 rows × 11 columns

In [9]:
sewer_detail_mean = sewer_detail.copy().groupby(by=['Date']).mean().reset_index().rolling(on="Date", window=14).mean()
sewer_detail_mean['RNA_flow_per_100000_Growth_7d'] = sewer_detail_mean.RNA_flow_per_100000.pct_change().replace([np.inf, -np.inf], 0).fillna(0).rolling(window=14).mean() * 100
sewer_detail_mean = sewer_detail_mean.reset_index()
sewer_detail_mean
Out[9]:
index Date RNA_flow_per_100000 RNA_flow_per_100000_Growth_7d
0 0 2020-03-30 NaN NaN
1 1 2020-03-31 NaN NaN
2 2 2020-04-01 NaN NaN
3 3 2020-04-02 NaN NaN
4 4 2020-04-05 NaN NaN
... ... ... ... ...
1088 1088 2023-04-16 8.729763e+13 -3.327250
1089 1089 2023-04-17 8.599119e+13 -3.124042
1090 1090 2023-04-18 8.399785e+13 -2.962682
1091 1091 2023-04-19 8.074352e+13 -2.845877
1092 1092 2023-04-20 7.632153e+13 -2.965786

1093 rows × 4 columns

In [10]:
totalPositiveTestRatio['infected_percentage_14d_growth'] =  totalPositiveTestRatio['values.infected_percentage'].rolling(14).mean().pct_change().rolling(14).mean() * 100
totalPositiveTestRatio
Out[10]:
date values.tested_total values.infected values.infected_percentage tests.7d.avg tests.7d_lagged_7 pos.rate.3d.avg pos.rate.7d.avg values.infected_7d.avg values.infected_7d_lagged growth_infected growth_tests infected_percentage_7d total_7d positive_7d infected_percentage_14d_growth
0 2020-06-01 1551 73 4.706641 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1 2020-06-02 6821 203 2.976103 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2 2020-06-03 8880 165 1.858108 NaN NaN 2.6 NaN NaN NaN NaN NaN NaN NaN NaN NaN
3 2020-06-04 9352 173 1.849872 NaN NaN 2.2 NaN NaN NaN NaN NaN NaN NaN NaN NaN
4 2020-06-05 9478 135 1.424351 NaN NaN 1.7 NaN NaN NaN NaN NaN NaN NaN NaN NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1015 2023-03-13 108 72 66.666667 171.428571 199.571429 62.3 62.3 62.354688 59.827485 104.2 85.9 62.354688 171.428571 106.714286 -0.090232
1016 2023-03-14 98 57 58.163265 154.142857 194.571429 62.1 61.9 61.792233 60.037562 102.9 79.2 61.792233 154.142857 95.428571 -0.141993
1017 2023-03-15 116 77 66.379310 131.142857 200.000000 64.0 63.8 63.074888 60.467193 104.3 65.6 63.074888 131.142857 83.714286 -0.000574
1018 2023-03-16 113 49 43.362832 119.857143 200.428571 56.0 60.7 59.894578 60.998655 98.2 59.8 59.894578 119.857143 72.714286 -0.157609
1019 2023-03-17 144 86 59.722222 106.714286 202.428571 56.8 59.0 59.104290 61.311680 96.4 52.7 59.104290 106.714286 63.000000 -0.216527

1020 rows × 16 columns

In [11]:
opnames_nice7 = pd.melt(opnames_nice.set_index('Date').diff().reset_index(), id_vars='Date', var_name=['Age'], value_name='Opnames')
opnames_nice7 = opnames_nice7.groupby(by=["Date"]).sum().reset_index()
opnames_nice7 = opnames_nice7.rolling(on="Date", window=14).mean().reset_index()
opnames_nice7['Growth'] = opnames_nice7.Opnames.pct_change().rolling(window= 14).mean() * 100
opnames_nice7
Out[11]:
index Date Opnames Growth
0 0 2020-11-03 NaN NaN
1 1 2020-11-04 NaN NaN
2 2 2020-11-05 NaN NaN
3 3 2020-11-06 NaN NaN
4 4 2020-11-07 NaN NaN
... ... ... ... ...
767 767 2023-03-22 183.642857 2.917497
768 768 2023-03-23 186.142857 3.071618
769 769 2023-03-24 176.642857 1.945280
770 770 2023-03-27 179.000000 1.381823
771 771 2023-03-28 174.000000 0.829903

772 rows × 4 columns

In [12]:
opnames_lcps14 = opnames_lcps.fillna(0).rolling(on="date", window=14).mean()
opnames_lcps14['IC_growth'] = opnames_lcps14.IC_bezetting_covid.pct_change().rolling(window=14).mean() * 100
opnames_lcps14['Kliniek_growth'] = opnames_lcps14.kliniek_bezetting_covid.pct_change().rolling(window=14).mean() * 100
opnames_lcps14
Out[12]:
date IC_bezetting_covid kliniek_bezetting_covid IC_growth Kliniek_growth
844 2021-01-01 NaN NaN NaN NaN
843 2021-01-02 NaN NaN NaN NaN
842 2021-01-03 NaN NaN NaN NaN
841 2021-01-04 NaN NaN NaN NaN
840 2021-01-05 NaN NaN NaN NaN
... ... ... ... ... ...
4 2023-04-21 15.071429 253.642857 -3.288960 -3.416603
3 2023-04-22 15.071429 253.642857 -3.288960 -3.416603
2 2023-04-23 15.071429 253.642857 -3.288960 -3.416603
1 2023-04-24 16.642857 279.142857 -1.869955 -1.831727
0 2023-04-25 16.571429 269.571429 -1.644678 -1.786014

845 rows × 5 columns

Plots configureren¶

In [13]:
sns.set_style("whitegrid")
sns.set_context("talk", font_scale=1, rc={"lines.linewidth": 3, "font.size":10,"axes.titlesize":24,"axes.labelsize":18})
sns.set_palette(sns.color_palette(['#CC6677', '#332288', '#DDCC77', '#117733', '#88CCEE', \
                    '#882255', '#44AA99', '#999933', '#AA4499', '#DDDDDD', \
                    '#000000']))

def discretemap(colormap, hexclrs):
    """
    Produce a colormap from a list of discrete colors without interpolation.
    """
    clrs = to_rgba_array(hexclrs)
    clrs = np.vstack([clrs[0], clrs, clrs[-1]])
    cdict = {}
    for ki, key in enumerate(('red','green','blue')):
        cdict[key] = [ (i/(len(clrs)-2.), clrs[i, ki], clrs[i+1, ki]) for i in range(len(clrs)-1) ]
    return LinearSegmentedColormap(colormap, cdict)

def __rainbow_WhBr():
    """
    Define colormap 'rainbow_WhBr'.
    """
    clrs = ['#E8ECFB', '#DDD8EF', '#D1C1E1', '#C3A8D1', '#B58FC2',
            '#A778B4', '#9B62A7', '#8C4E99', '#6F4C9B', '#6059A9',
            '#5568B8', '#4E79C5', '#4D8AC6', '#4E96BC', '#549EB3',
            '#59A5A9', '#60AB9E', '#69B190', '#77B77D', '#8CBC68',
            '#A6BE54', '#BEBC48', '#D1B541', '#DDAA3C', '#E49C39',
            '#E78C35', '#E67932', '#E4632D', '#DF4828', '#DA2222',
            '#B8221E', '#95211B', '#721E17', '#521A13']
    return LinearSegmentedColormap.from_list("Rainbo_WhBr", clrs)
    #self.cmap.set_bad('#666666')

def __iridescent():
    """
    Define colormap 'iridescent'.
    """
    clrs = ['#FEFBE9', '#FCF7D5', '#F5F3C1', '#EAF0B5', '#DDECBF',
            '#D0E7CA', '#C2E3D2', '#B5DDD8', '#A8D8DC', '#9BD2E1',
            '#8DCBE4', '#81C4E7', '#7BBCE7', '#7EB2E4', '#88A5DD',
            '#9398D2', '#9B8AC4', '#9D7DB2', '#9A709E', '#906388',
            '#805770', '#684957', '#46353A']
    return LinearSegmentedColormap.from_list("iridescent", clrs)
    #self.cmap.set_bad('#999999')

def __YlOrBr():
    """
    Define colormap 'YlOrBr'.
    """
    clrs = ['#FFFFE5', '#FFF7BC', '#FEE391', '#FEC44F', '#FB9A29',
            '#EC7014', '#CC4C02', '#993404', '#662506']
    return LinearSegmentedColormap.from_list('YlOrBr', clrs)
    #self.cmap.set_bad('#888888')

def __BuRd():
    """
    Define colormap 'BuRd'.
    """
    clrs = ['#2166AC', '#4393C3', '#92C5DE', '#D1E5F0', '#F7F7F7',
            '#FDDBC7', '#F4A582', '#D6604D', '#B2182B']
    return LinearSegmentedColormap.from_list('BuRd', clrs)

def __rainbow_discrete(self, lut=None):
    """
    Define colormap 'rainbow_discrete'.
    """
    clrs = ['#E8ECFB', '#D9CCE3', '#D1BBD7', '#CAACCB', '#BA8DB4',
            '#AE76A3', '#AA6F9E', '#994F88', '#882E72', '#1965B0',
            '#437DBF', '#5289C7', '#6195CF', '#7BAFDE', '#4EB265',
            '#90C987', '#CAE0AB', '#F7F056', '#F7CB45', '#F6C141',
            '#F4A736', '#F1932D', '#EE8026', '#E8601C', '#E65518',
            '#DC050C', '#A5170E', '#72190E', '#42150A']
    indexes = [[9], [9, 25], [9, 17, 25], [9, 14, 17, 25], [9, 13, 14, 17,
            25], [9, 13, 14, 16, 17, 25], [8, 9, 13, 14, 16, 17, 25], [8,
            9, 13, 14, 16, 17, 22, 25], [8, 9, 13, 14, 16, 17, 22, 25, 27],
            [8, 9, 13, 14, 16, 17, 20, 23, 25, 27], [8, 9, 11, 13, 14, 16,
            17, 20, 23, 25, 27], [2, 5, 8, 9, 11, 13, 14, 16, 17, 20, 23,
            25], [2, 5, 8, 9, 11, 13, 14, 15, 16, 17, 20, 23, 25], [2, 5,
            8, 9, 11, 13, 14, 15, 16, 17, 19, 21, 23, 25], [2, 5, 8, 9, 11,
            13, 14, 15, 16, 17, 19, 21, 23, 25, 27], [2, 4, 6, 8, 9, 11,
            13, 14, 15, 16, 17, 19, 21, 23, 25, 27], [2, 4, 6, 7, 8, 9, 11,
            13, 14, 15, 16, 17, 19, 21, 23, 25, 27], [2, 4, 6, 7, 8, 9, 11,
            13, 14, 15, 16, 17, 19, 21, 23, 25, 26, 27], [1, 3, 4, 6, 7, 8,
            9, 11, 13, 14, 15, 16, 17, 19, 21, 23, 25, 26, 27], [1, 3, 4,
            6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 19, 21, 23, 25, 26,
            27], [1, 3, 4, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 20,
            22, 24, 25, 26, 27], [1, 3, 4, 6, 7, 8, 9, 10, 12, 13, 14, 15,
            16, 17, 18, 20, 22, 24, 25, 26, 27, 28], [0, 1, 3, 4, 6, 7, 8,
            9, 10, 12, 13, 14, 15, 16, 17, 18, 20, 22, 24, 25, 26, 27, 28]]
    if lut == None or lut < 1 or lut > 23:
        lut = 22
    self.cmap = discretemap(self.cname, [ clrs[i] for i in indexes[lut-1] ])
    if lut == 23:
        self.cmap.set_bad('#777777')
    else:
        self.cmap.set_bad('#FFFFFF')
In [14]:
def tuftefy(ax):
    """Remove spines and tick position markers to reduce ink."""
    # 
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)
    ax.spines["left"].set_visible(False)
    ax.spines["bottom"].set_visible(True)
    ax.spines["bottom"].set_color('grey')

    ax.grid(color="w", alpha=0.5)
    ax.get_yaxis().grid(True)
    ax.get_xaxis().grid(False)

def fancy_titles(t1, t2, ax=None):
  if ax:
    ax.set_title(t1, loc='left', fontsize=18)
    ax.set_title(t2, loc='right', fontsize=13, color='grey')
  else:
    plt.title(t1, loc='left', fontsize=18)
    plt.title(t2, loc='right', fontsize=13, color='grey')

Plots¶

Week gemiddeld¶

In [15]:
#sns.set_context("talk")
fig = plt.gcf()
fig.set_size_inches(15, 6)
e =sns.violinplot(x="weekday", linewidth=2, y="Reported_ratio", data=perWeek[perWeek.Date_of_report >= '2020-09-01'], order=["Maandag", "Dinsdag", "Woensdag", "Donderdag", "Vrijdag", "Zaterdag", "Zondag"], inner=None, color='#ccc', scale_hue=False, cut=0.5, bw=0.4)
e =sns.stripplot(x="weekday", y="Reported_ratio", data=perWeek[perWeek.Date_of_report >= '2020-09-01'],order=["Maandag", "Dinsdag", "Woensdag", "Donderdag", "Vrijdag", "Zaterdag", "Zondag"], color='black', alpha=0.5)
e.set(ylabel='Verhouding tov weekgemiddelde', title='Verhouding tussen positieve COVID-19 testen en de weekdag', xlabel='')
plt.title("bron: RIVM update " + (totals.Date_of_report.to_list()[-1]).strftime('%Y-%m-%d'), loc='right', fontsize=12, color='grey', y=0.01)
#tuftefy(e)
e.yaxis.grid(True)
sns.despine(bottom=True)
In [16]:
fig = plt.gcf()
fig.set_size_inches(15, 6)
e = sns.countplot(x="weekday", hue="AboveAverage", data=dayToWeekAvg[dayToWeekAvg.Date_of_report >= '2020-09-01'], order=["Maandag", "Dinsdag", "Woensdag", "Donderdag", "Vrijdag", "Zaterdag", "Zondag"])

e.set(ylabel='Aantal weken', title='Aantal keer dag boven week gemiddelde', xlabel='Weekdag')
plt.title("bron: RIVM update " + (totals.Date_of_report.to_list()[-1]).strftime('%Y-%m-%d'), loc='right', fontsize=12, color='grey', y=0.0)
e.yaxis.grid(True)
sns.despine(bottom=True)
In [17]:
fig = plt.gcf()
fig.set_size_inches(15, 6)
ax = sns.lineplot(x='date', y='Reported_ratio', hue='weekday', style='weekday', linewidth=2, hue_order=['Maandag', 'Dinsdag', 'Woensdag', 'Donderdag', 'Vrijdag', 'Zaterdag', 'Zondag'], data=perWeek[perWeek.Date_of_report >= '2021-01-01'])
ax.set_ylim(0.0,2)
plt.axhline(y=0.6, ls=':', c='.8', linewidth=1, zorder=1)
plt.axhline(y=0.8, ls=':', c='.8', linewidth=1, zorder=1)
plt.axhline(y=1.0, ls='-', c='.85', linewidth=1, zorder=1)
plt.axhline(y=1.2, ls=':', c='.8', linewidth=1, zorder=1)
plt.axhline(y=1.6, ls=':', c='.8', linewidth=1, zorder=1)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%V'))
ax.xaxis.set_major_locator(mdates.WeekdayLocator(byweekday=mdates.SUNDAY, interval=3))
ax.set(ylabel='Verhouding tov weekgemiddelde', title='Verhouding tussen positieve COVID-19 testen en de weekdag', xlabel='Week')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
tuftefy(ax)
In [18]:
fig = plt.gcf()
fig.set_size_inches(15, 6)
ax = sns.lineplot(x='date', y='RatioWeekAvg', hue='weekday', style='weekday', linewidth=2, hue_order=['Maandag', 'Dinsdag', 'Woensdag', 'Donderdag', 'Vrijdag', 'Zaterdag', 'Zondag'], data=dayToWeekAvg[dayToWeekAvg.Date_of_report >= '2021-01-01'])
ax.set_ylim(0.3,2)
plt.axhline(y=0.8, ls=':', c='.8', linewidth=1, zorder=1)
plt.axhline(y=1.0, ls='-', c='.85', linewidth=1, zorder=1)
plt.axhline(y=1.2, ls=':', c='.8', linewidth=1, zorder=1)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%V'))
ax.xaxis.set_major_locator(mdates.WeekdayLocator(byweekday=mdates.SUNDAY, interval=3))
ax.set(ylabel='Verhouding tov weekgemiddelde', title='Verhouding tussen positieve COVID-19 testen en de 7d gemiddelde', xlabel='Week')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
tuftefy(ax)

Totals¶

In [19]:
fig = plt.figure()
fig.set_size_inches(15, 10)

#ax = sns.lineplot(data=totalPositiveTestRatio[totalPositiveTestRatio.date >= '2020-08-01'], x='date', y='total_7d', color='#004488', label='Aantal testen')

ax = sns.lineplot(data=totalPositiveTestRatio, x='date', y='total_7d', color='#004488', label='Aantal testen')
plt.legend(bbox_to_anchor=(0.2, 1),borderaxespad=0)
ax2 = ax.twinx()
#sns.lineplot(data=totalPositiveTestRatio[totalPositiveTestRatio.date >= '2020-08-01'], x='date', y='positive_7d', color='#ddaa33', label='Aantal positief', ax = ax2)
sns.lineplot(data=totalPositiveTestRatio, x='date', y='positive_7d', color='#ddaa33', label='Aantal positief', ax = ax2)
plt.legend(bbox_to_anchor=(0.9, 1),borderaxespad=0)
#sns.lineplot(data=totals[totals.Date_of_report > '2020-06-01'], x='Date_of_report',y='rolling', color='red',  ax=ax2)
ax.set_ylabel("Aantal testen")
ax2.set_ylabel("Positieve testen")
ax.set_xlabel("")
ax.set_title('Voortschrijdend gemiddelde (7d) van aantal uitgevoerde en positieve testen', pad=30)
#ax.xaxis.set_major_formatter(mdates.DateFormatter('%V'))
ax.xaxis.set_major_formatter(mdates.ConciseDateFormatter(mdates.AutoDateLocator()))
#ax.xaxis.set_major_locator(mdates.WeekdayLocator(byweekday=mdates.SUNDAY, interval=2))
ax.set(ylim=(0, None))
ax2.set(ylim=(0, None))
#plt.axvline(date(2020,10,14), ls=":", c=".5")
#plt.axvline(date(2020,12,18), ls=":", c=".5")
#plt.axvline(date(2021,2,8), ls=":", c=".5")
#plt.axhline(0.0, ls="-", c=".8", zorder=1)
#g = g.map(plt.axvline, x=date(2020,10,14), ls=":", c=".5")
plt.title("bron: Dashboard data t/m " + (totalPositiveTestRatio.date.to_list()[-1]).strftime('%Y-%m-%d'), loc='right', fontsize=12, color='grey', y=0.0)


tuftefy(ax)
tuftefy(ax2)
In [20]:
fig = plt.figure()
fig.set_size_inches(15, 10)

ax = sns.lineplot(data=sewer_detail_mean, x='Date', y='RNA_flow_per_100000', color='#004488', label='Riooldeeltjes')
plt.legend(bbox_to_anchor=(0.2, 1),borderaxespad=0)
ax2 = ax.twinx()
#sns.lineplot(data=totalPositiveTestRatio[totalPositiveTestRatio.date >= '2020-08-01'], x='date', y='positive_7d', color='#ddaa33', label='Aantal positief', ax = ax2)
sns.lineplot(data=totalPositiveTestRatio, x='date', y='positive_7d', color='#ddaa33', label='Aantal positief', ax = ax2)
plt.legend(bbox_to_anchor=(0.9, 1),borderaxespad=0)
#sns.lineplot(data=totals[totals.Date_of_report > '2020-06-01'], x='Date_of_report',y='rolling', color='red',  ax=ax2)
ax.set_ylabel("Riooldeeltjes per 100.000 inwoners")
ax2.set_ylabel("Positieve testen")
ax.set_xlabel("")
ax.set_title('Voortschrijdend gemiddelde (7d) van positieve testen en riooldeeltjes', pad=30)
#ax.xaxis.set_major_formatter(mdates.DateFormatter('%V'))
ax.xaxis.set_major_formatter(mdates.ConciseDateFormatter(mdates.AutoDateLocator()))
#ax.xaxis.set_major_locator(mdates.WeekdayLocator(byweekday=mdates.SUNDAY, interval=2))
ax.set(ylim=(0, None))
ax2.set(ylim=(0, None))
#plt.axvline(date(2020,10,14), ls=":", c=".5")
#plt.axvline(date(2020,12,18), ls=":", c=".5")
#plt.axvline(date(2021,2,8), ls=":", c=".5")
#plt.axhline(0.0, ls="-", c=".8", zorder=1)
#g = g.map(plt.axvline, x=date(2020,10,14), ls=":", c=".5")
plt.title("bron: Dashboard data t/m " + (totalPositiveTestRatio.date.to_list()[-1]).strftime('%Y-%m-%d'), loc='right', fontsize=12, color='grey', y=0.0)


tuftefy(ax)
tuftefy(ax2)
In [21]:
fig = plt.figure()
fig.set_size_inches(15, 10)

ax = sns.lineplot(data=sewer_detail_mean, x='Date', y='RNA_flow_per_100000', color='#004488', label='Riooldeeltjes')
plt.legend(bbox_to_anchor=(0.2, 1),borderaxespad=0)
ax2 = ax.twinx()
sns.lineplot(data=opnames_nice7, x='Date', y='Opnames', color='#ddaa33', label='Nieuwe opnames', ax = ax2)
plt.legend(bbox_to_anchor=(0.9, 1),borderaxespad=0)
#sns.lineplot(data=totals[totals.Date_of_report > '2020-06-01'], x='Date_of_report',y='rolling', color='red',  ax=ax2)
ax.set_ylabel("Riooldeeltjes per 100.000 inwoners")
ax2.set_ylabel("Nieuwe opnames")
ax.set_xlabel("")
ax.set_title('Voortschrijdend gemiddelde (14d) van opnames en riooldeeltjes', pad=30)
#ax.xaxis.set_major_formatter(mdates.DateFormatter('%V'))
ax.xaxis.set_major_formatter(mdates.ConciseDateFormatter(mdates.AutoDateLocator()))
#ax.xaxis.set_major_locator(mdates.WeekdayLocator(byweekday=mdates.SUNDAY, interval=2))
ax.set(ylim=(0, None))
ax2.set(ylim=(0, None))
#plt.axvline(date(2020,10,14), ls=":", c=".5")
#plt.axvline(date(2020,12,18), ls=":", c=".5")
#plt.axvline(date(2021,2,8), ls=":", c=".5")
#plt.axhline(0.0, ls="-", c=".8", zorder=1)
#g = g.map(plt.axvline, x=date(2020,10,14), ls=":", c=".5")
plt.title("bron: Dashboard data t/m " + (totalPositiveTestRatio.date.to_list()[-1]).strftime('%Y-%m-%d'), loc='right', fontsize=12, color='grey', y=0.0)


tuftefy(ax)
tuftefy(ax2)
In [22]:
from matplotlib.patches import Rectangle

fig = plt.figure()
fig.set_size_inches(15, 10)

#ax = sns.lineplot(data=totalPositiveTestRatio[totalPositiveTestRatio.date >= '2020-08-01'], x='date', y='total_7d', color='#4477AA', label='Aantal testen')

ax = sns.lineplot(data=totals, x='Date_of_report', y='rolling', color='#332288')
#sns.lineplot(data=totals[totals.Date_of_report > '2020-06-01'], x='Date_of_report',y='rolling', color='red',  ax=ax2)
ax.set_ylabel("Positieve testen")
#ax2.set_ylabel("Positieve testen")
ax.set_xlabel("")
ax.set_title('Positieve testen in relatie tot sluiten en openen van basisscholen', pad=30)
#ax.xaxis.set_major_formatter(mdates.DateFormatter('%V'))
ax.xaxis.set_major_formatter(mdates.ConciseDateFormatter(mdates.AutoDateLocator()))
#ax.xaxis.set_major_locator(mdates.WeekdayLocator(byweekday=mdates.SUNDAY, interval=2))
ax.set(ylim=(0, None))
# scholen sluiten
plt.axvline(date(2020,3,16), ls=":", c="#999933")
plt.annotate("Sluiten", xy=(date(2020,3,18), 10000), fontsize="x-large", color="#999933")
plt.axvline(date(2020,12,14), ls=":", c="#999933")
plt.axvline(date(2021,12,20), ls=":", c="#999933")
# scholen open
plt.axvline(date(2020,5,11), ls=":", c="#aa4499")
plt.annotate("Openen", xy=(date(2020,5,13), 10000), fontsize="x-large", color="#aa4499")
plt.axvline(date(2021,2,8), ls=":", c="#aa4499")
plt.axvline(date(2022,1,10), ls=":", c="#aa4499")

#ax.add_patch(Rectangle(date(2020,3,16), date(2020,5,20), color="#228833"))
def add_holiday(start, end):
    plt.axvspan(start, end, color="#117733", alpha=0.1)

plt.annotate("Vakantie", xy=(date(2020,7,9), 10000), fontsize="x-large")
# zomer 2020
add_holiday(date(2020, 7,4), date(2020, 8,16))
add_holiday(date(2020, 7,18), date(2020, 8,30))
add_holiday(date(2020, 7,11), date(2020, 8,23))

# herfst 2020
add_holiday(date(2020, 10,10), date(2020, 10,18))
add_holiday(date(2020, 10,17), date(2020, 10,25))
add_holiday(date(2020, 10,17), date(2020, 10,25))

# kerst 2020
add_holiday(date(2020, 12,19), date(2021, 1,3))
add_holiday(date(2020, 12,19), date(2021, 1,3))
add_holiday(date(2020, 12,19), date(2021, 1,3))

# voorjaar 2021
add_holiday(date(2021, 2,20), date(2021, 2,28))
add_holiday(date(2021, 2,20), date(2021, 2,28))
add_holiday(date(2021, 2,13), date(2021, 2,21))

# mei 2021
add_holiday(date(2021, 5,1), date(2021, 5,9))
add_holiday(date(2021, 5,1), date(2021, 5,9))
add_holiday(date(2021, 5,1), date(2021, 5,9))

# zomer 2021
add_holiday(date(2021, 7,10), date(2021, 8,22))
add_holiday(date(2021, 7,17), date(2021, 8,29))
add_holiday(date(2021, 7,24), date(2021, 9,5))

# herfst 2021
add_holiday(date(2021, 10,16), date(2021, 10,24))
add_holiday(date(2021, 10,16), date(2021, 10,24))
add_holiday(date(2021, 10,23), date(2021, 10,31))

# kerst 2021
add_holiday(date(2021, 12,25), date(2022, 1,9))
add_holiday(date(2021, 12,25), date(2022, 1,9))
add_holiday(date(2021, 12,25), date(2022, 1,9))

def annotate_date(d, labelxpos, txt):
    xval = totals[totals.Date_of_report.dt.date == d]["rolling"].to_list()[0]

    plt.annotate(txt, xy=(d, xval), xytext=(d - timedelta(days=3), labelxpos),
            arrowprops=dict(facecolor='black', shrink=0.05),
            horizontalalignment='center', verticalalignment='top',
            fontsize="x-large", bbox=dict(color='white', alpha=0.8)
            )

annotate_date(date(2021,12,28), 10000, "Omikron\ndominant")
annotate_date(date(2021,6,23), 8000, "Delta\ndominant")
annotate_date(date(2021,7,19), 16000, "Dansen met\nJansen")
#annotate_date(date(2021,1,5), 2000, "Alpha dominant")

#plt.legend(bbox_to_anchor=(0.2, 0.1),borderaxespad=0)
#plt.axvline(date(2020,12,18), ls=":", c=".5")
#plt.axvline(date(2021,2,8), ls=":", c=".5")
#plt.axhline(0.0, ls="-", c=".8", zorder=1)
#g = g.map(plt.axvline, x=date(2020,10,14), ls=":", c=".5")
plt.title("bron: RIVM " + (totals.Date_of_report.to_list()[-1]).strftime('%Y-%m-%d'), loc='right', fontsize=12, color='grey', y=0.0)

tuftefy(ax)

Groei¶

In [23]:
fig = plt.figure()
fig.set_size_inches(15, 10)

since= '2022-01-01'
g = sns.lineplot(data=totals[totals.Date_of_report >= since], x='Date_of_report', y='Growth', label="Positieve testen")
sns.lineplot(data=totalPositiveTestRatio[totalPositiveTestRatio.date >= since], x='date', y='infected_percentage_14d_growth', label="Percentage positief")
sns.lineplot(data=opnames_nice7[opnames_nice7.Date >= since], x='Date', y='Growth', label="Opnames (NICE)")
sns.lineplot(data=opnames_lcps14[opnames_lcps14.date >= since], x='date', y='Kliniek_growth', label="Opnames (LCPS)")
sns.lineplot(data=sewer_detail_mean[sewer_detail_mean.Date >= since], x='Date', y='RNA_flow_per_100000_Growth_7d', label="Riooldeeltjes")

g.set_ylabel("Groei percentage")
g.set_xlabel("")
g.set_title('Groei (14d gemiddelde) van verschillende signaal waardes (14d gemiddelde)', pad=30)
g.xaxis.set_major_formatter(mdates.ConciseDateFormatter(mdates.AutoDateLocator()))
g.set(ylim=(-12.5, 12.5))
plt.axhline(0.0, ls="-", c=".8", zorder=1)
#g = g.map(plt.axvline, x=date(2020,10,14), ls=":", c=".5")
plt.title("bronnen: RIVM/LCSP/NICE data t/m " + opnames_lcps14.date.max().strftime('%Y-%m-%d'), loc='right', fontsize=12, color='grey', y=0.0)


tuftefy(g)