notebook version: 3 (4 Jul 2024)
new in this version: added check if downloaded data is indeed in 30MIN time resolution

  • This notebook can be used to download data from the database InfluxDB

  • Data are stored to a .csv file in this folder


Auto-settings#

Data settings#

DIRCONF = r'L:\Sync\luhk_work\20 - CODING\22 - POET\configs'
# DIRCONF = r'P:\Flux\RDS_calculations\_scripts\_configs\configs'  # Folder with configuration files: needed e.g. for connection to database
TIMEZONE_OFFSET_TO_UTC_HOURS = 1  # Timezone, e.g. "1" is translated to timezone "UTC+01:00" (CET, winter time)
REQUIRED_TIME_RESOLUTION = '30min'  # 30MIN time resolution

Imports#

import importlib.metadata
from datetime import datetime

import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import pandas as pd
import seaborn as sns
from pathlib import Path
from diive.core.io.files import save_parquet

sns.set_theme('notebook')
from diive.core.plotting.timeseries import TimeSeries
from dbc_influxdb import dbcInflux
from diive.core.plotting.heatmap_datetime import HeatmapDateTime
from diive.core.times.times import TimestampSanitizer
from diive.pkgs.createvar.potentialradiation import potrad
from diive.pkgs.gapfilling.xgboost_ts import XGBoostTS
from diive.core.io.files import load_parquet
import warnings

warnings.filterwarnings(action='ignore', category=FutureWarning)
warnings.filterwarnings(action='ignore', category=UserWarning)
dt_string = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
version_diive = importlib.metadata.version("diive")
print(f"diive version: v{version_diive}")
version_dbc = importlib.metadata.version("dbc_influxdb")
print(f"dbc-influxdb version: v{version_dbc}")
dbc = dbcInflux(dirconf=DIRCONF)  # Connect to database
diive version: v0.85.0
dbc-influxdb version: v0.12.0
Reading configuration files was successful.
Connection to database works.

LOAD DATA from files#

filename_data_simple_2005_2020 = "13.1_CH-CHA_FEIGENW_meteo_2005-2020.parquet"
data_simple_2005_2020 = load_parquet(filepath=filename_data_simple_2005_2020)
data_simple_2005_2020
Loaded .parquet file 13.1_CH-CHA_FEIGENW_meteo_2005-2020.parquet (0.203 seconds).
    --> Detected time resolution of <30 * Minutes> / 30min 
G_GF1_0.03_1 LW_IN_T1_2_1 LW_OUT_T1_2_1 PA_GF1_0.9_1 FLAG_PA_GF1_0.9_1_ISFILLED PPFD_IN_T1_2_2 ... SW_OUT_T1_2_1 TA_T1_2_1 FLAG_TA_T1_2_1_ISFILLED TS_GF1_0.04_1 TS_GF1_0.15_1 TS_GF1_0.4_1
TIMESTAMP_MIDDLE
2005-01-01 00:15:00 NaN NaN NaN 978.100000 1.0 NaN ... NaN 1.566667 3.0 NaN NaN NaN
2005-01-01 00:45:00 NaN NaN NaN 977.933333 1.0 NaN ... NaN 1.533333 3.0 NaN NaN NaN
2005-01-01 01:15:00 NaN NaN NaN 977.900000 1.0 NaN ... NaN 1.566667 3.0 NaN NaN NaN
2005-01-01 01:45:00 NaN NaN NaN 977.833333 1.0 NaN ... NaN 1.566667 3.0 NaN NaN NaN
2005-01-01 02:15:00 NaN NaN NaN 977.833333 1.0 NaN ... NaN 1.500000 3.0 NaN NaN NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ...
2020-12-31 21:45:00 -5.246918 327.0590 305.8658 958.613800 0.0 0.012177 ... -13.17378 -0.059646 0.0 7.059905 4.751553 5.665280
2020-12-31 22:15:00 -5.688780 325.6822 299.9425 958.361800 0.0 0.015688 ... -14.05419 -0.747990 0.0 7.037867 4.753286 5.666638
2020-12-31 22:45:00 -6.256253 325.8016 303.5924 958.257100 0.0 0.015338 ... -13.52161 -0.606677 0.0 7.021161 4.754451 5.668180
2020-12-31 23:15:00 -6.492525 325.8260 307.0624 958.275800 0.0 0.010785 ... -12.03449 -0.063648 0.0 6.998506 4.753085 5.669790
2020-12-31 23:45:00 -6.446632 328.6548 309.0171 958.237500 0.0 0.007877 ... -12.31394 0.177185 0.0 6.990282 4.749334 5.671485

280512 rows × 21 columns

filename_data_simple_2021_2024 = "14.1_CH-CHA_DATABASE_meteo6_2021-2024.parquet"
data_simple_2021_2024 = load_parquet(filepath=filename_data_simple_2021_2024)
data_simple_2021_2024
Loaded .parquet file 14.1_CH-CHA_DATABASE_meteo6_2021-2024.parquet (0.006 seconds).
    --> Detected time resolution of <30 * Minutes> / 30min 
LW_IN_T1_2_1 PA_GF1_0.9_1 PPFD_IN_T1_2_2 RH_T1_2_1 SW_IN_T1_2_1 TA_T1_2_1
TIMESTAMP_MIDDLE
2021-01-01 00:15:00 332.179450 958.107523 0.0 100.000000 0.0 0.156080
2021-01-01 00:45:00 329.543557 958.088840 0.0 100.000000 0.0 0.102816
2021-01-01 01:15:00 328.928400 958.323920 0.0 100.000000 0.0 0.105138
2021-01-01 01:45:00 331.492063 958.425240 0.0 99.998978 0.0 0.168548
2021-01-01 02:15:00 332.917923 958.625800 0.0 99.999973 0.0 0.214706
... ... ... ... ... ... ...
2024-12-31 21:45:00 304.613900 983.370890 0.0 99.997990 0.0 -1.919472
2024-12-31 22:15:00 303.039890 983.052160 0.0 99.997990 0.0 -2.104678
2024-12-31 22:45:00 302.093633 982.851140 0.0 99.997990 0.0 -2.089444
2024-12-31 23:15:00 302.217307 982.896827 0.0 99.997990 0.0 -2.355761
2024-12-31 23:45:00 298.392973 982.856613 0.0 99.997990 0.0 -2.578839

70128 rows × 6 columns



Merge data#

# Merge data on index
data_simple_merged = pd.concat([data_simple_2005_2020, data_simple_2021_2024], axis=0)
data_simple_merged
G_GF1_0.03_1 LW_IN_T1_2_1 LW_OUT_T1_2_1 PA_GF1_0.9_1 FLAG_PA_GF1_0.9_1_ISFILLED PPFD_IN_T1_2_2 ... SW_OUT_T1_2_1 TA_T1_2_1 FLAG_TA_T1_2_1_ISFILLED TS_GF1_0.04_1 TS_GF1_0.15_1 TS_GF1_0.4_1
TIMESTAMP_MIDDLE
2005-01-01 00:15:00 NaN NaN NaN 978.100000 1.0 NaN ... NaN 1.566667 3.0 NaN NaN NaN
2005-01-01 00:45:00 NaN NaN NaN 977.933333 1.0 NaN ... NaN 1.533333 3.0 NaN NaN NaN
2005-01-01 01:15:00 NaN NaN NaN 977.900000 1.0 NaN ... NaN 1.566667 3.0 NaN NaN NaN
2005-01-01 01:45:00 NaN NaN NaN 977.833333 1.0 NaN ... NaN 1.566667 3.0 NaN NaN NaN
2005-01-01 02:15:00 NaN NaN NaN 977.833333 1.0 NaN ... NaN 1.500000 3.0 NaN NaN NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ...
2024-12-31 21:45:00 NaN 304.613900 NaN 983.370890 NaN 0.0 ... NaN -1.919472 NaN NaN NaN NaN
2024-12-31 22:15:00 NaN 303.039890 NaN 983.052160 NaN 0.0 ... NaN -2.104678 NaN NaN NaN NaN
2024-12-31 22:45:00 NaN 302.093633 NaN 982.851140 NaN 0.0 ... NaN -2.089444 NaN NaN NaN NaN
2024-12-31 23:15:00 NaN 302.217307 NaN 982.896827 NaN 0.0 ... NaN -2.355761 NaN NaN NaN NaN
2024-12-31 23:45:00 NaN 298.392973 NaN 982.856613 NaN 0.0 ... NaN -2.578839 NaN NaN NaN NaN

350640 rows × 21 columns


Sanitize timestamp#

data_simple_merged = TimestampSanitizer(data=data_simple_merged, output_middle_timestamp=False).get()
data_simple_merged
G_GF1_0.03_1 LW_IN_T1_2_1 LW_OUT_T1_2_1 PA_GF1_0.9_1 FLAG_PA_GF1_0.9_1_ISFILLED PPFD_IN_T1_2_2 ... SW_OUT_T1_2_1 TA_T1_2_1 FLAG_TA_T1_2_1_ISFILLED TS_GF1_0.04_1 TS_GF1_0.15_1 TS_GF1_0.4_1
TIMESTAMP_MIDDLE
2005-01-01 00:15:00 NaN NaN NaN 978.100000 1.0 NaN ... NaN 1.566667 3.0 NaN NaN NaN
2005-01-01 00:45:00 NaN NaN NaN 977.933333 1.0 NaN ... NaN 1.533333 3.0 NaN NaN NaN
2005-01-01 01:15:00 NaN NaN NaN 977.900000 1.0 NaN ... NaN 1.566667 3.0 NaN NaN NaN
2005-01-01 01:45:00 NaN NaN NaN 977.833333 1.0 NaN ... NaN 1.566667 3.0 NaN NaN NaN
2005-01-01 02:15:00 NaN NaN NaN 977.833333 1.0 NaN ... NaN 1.500000 3.0 NaN NaN NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ...
2024-12-31 21:45:00 NaN 304.613900 NaN 983.370890 NaN 0.0 ... NaN -1.919472 NaN NaN NaN NaN
2024-12-31 22:15:00 NaN 303.039890 NaN 983.052160 NaN 0.0 ... NaN -2.104678 NaN NaN NaN NaN
2024-12-31 22:45:00 NaN 302.093633 NaN 982.851140 NaN 0.0 ... NaN -2.089444 NaN NaN NaN NaN
2024-12-31 23:15:00 NaN 302.217307 NaN 982.896827 NaN 0.0 ... NaN -2.355761 NaN NaN NaN NaN
2024-12-31 23:45:00 NaN 298.392973 NaN 982.856613 NaN 0.0 ... NaN -2.578839 NaN NaN NaN NaN

350640 rows × 21 columns

Plot#

data_simple_merged.plot(subplots=True, x_compat=True, title="Merged data", figsize=(20, 14));
../../_images/4b864f6d5936b055fd05f5780fbccab34c10e2420697b5dc5f8f3c50c92d5d89.png

SAVE TO FILE#

OUTNAME = "15.1_CH-CHA_meteo6_2005-2024"
OUTPATH = r""
filepath = save_parquet(filename=OUTNAME, data=data_simple_merged, outpath=OUTPATH)
# data_simple_merged.to_csv(Path(OUTPATH) / f"{OUTNAME}.csv")
Saved file 15.1_CH-CHA_meteo6_2005-2024.parquet (0.333 seconds).

End of notebook.#

dt_string = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print(f"Finished. {dt_string}")
Finished. 2025-01-20 16:22:00