notebook version: 3
(4 Jul 2024)
new in this version: added check if downloaded data is indeed in 30MIN time resolution
This notebook can be used to download data from the database
InfluxDB
Data are stored to a
.csv
file in this folder
Auto-settings#
Data settings#
DIRCONF = r'L:\Sync\luhk_work\20 - CODING\22 - POET\configs'
# DIRCONF = r'P:\Flux\RDS_calculations\_scripts\_configs\configs' # Folder with configuration files: needed e.g. for connection to database
TIMEZONE_OFFSET_TO_UTC_HOURS = 1 # Timezone, e.g. "1" is translated to timezone "UTC+01:00" (CET, winter time)
REQUIRED_TIME_RESOLUTION = '30min' # 30MIN time resolution
Imports#
import importlib.metadata
from datetime import datetime
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import pandas as pd
import seaborn as sns
from pathlib import Path
from diive.core.io.files import save_parquet
sns.set_theme('notebook')
from diive.core.plotting.timeseries import TimeSeries
from dbc_influxdb import dbcInflux
from diive.core.plotting.heatmap_datetime import HeatmapDateTime
from diive.core.times.times import TimestampSanitizer
from diive.pkgs.createvar.potentialradiation import potrad
from diive.pkgs.gapfilling.xgboost_ts import XGBoostTS
from diive.core.io.files import load_parquet
import warnings
warnings.filterwarnings(action='ignore', category=FutureWarning)
warnings.filterwarnings(action='ignore', category=UserWarning)
dt_string = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
version_diive = importlib.metadata.version("diive")
print(f"diive version: v{version_diive}")
version_dbc = importlib.metadata.version("dbc_influxdb")
print(f"dbc-influxdb version: v{version_dbc}")
dbc = dbcInflux(dirconf=DIRCONF) # Connect to database
diive version: v0.85.0
dbc-influxdb version: v0.12.0
Reading configuration files was successful.
Connection to database works.
LOAD DATA from files#
filename_data_simple_2005_2020 = "13.1_CH-CHA_FEIGENW_meteo_2005-2020.parquet"
data_simple_2005_2020 = load_parquet(filepath=filename_data_simple_2005_2020)
data_simple_2005_2020
Loaded .parquet file 13.1_CH-CHA_FEIGENW_meteo_2005-2020.parquet (0.203 seconds).
--> Detected time resolution of <30 * Minutes> / 30min
G_GF1_0.03_1 | LW_IN_T1_2_1 | LW_OUT_T1_2_1 | PA_GF1_0.9_1 | FLAG_PA_GF1_0.9_1_ISFILLED | PPFD_IN_T1_2_2 | ... | SW_OUT_T1_2_1 | TA_T1_2_1 | FLAG_TA_T1_2_1_ISFILLED | TS_GF1_0.04_1 | TS_GF1_0.15_1 | TS_GF1_0.4_1 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
TIMESTAMP_MIDDLE | |||||||||||||
2005-01-01 00:15:00 | NaN | NaN | NaN | 978.100000 | 1.0 | NaN | ... | NaN | 1.566667 | 3.0 | NaN | NaN | NaN |
2005-01-01 00:45:00 | NaN | NaN | NaN | 977.933333 | 1.0 | NaN | ... | NaN | 1.533333 | 3.0 | NaN | NaN | NaN |
2005-01-01 01:15:00 | NaN | NaN | NaN | 977.900000 | 1.0 | NaN | ... | NaN | 1.566667 | 3.0 | NaN | NaN | NaN |
2005-01-01 01:45:00 | NaN | NaN | NaN | 977.833333 | 1.0 | NaN | ... | NaN | 1.566667 | 3.0 | NaN | NaN | NaN |
2005-01-01 02:15:00 | NaN | NaN | NaN | 977.833333 | 1.0 | NaN | ... | NaN | 1.500000 | 3.0 | NaN | NaN | NaN |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2020-12-31 21:45:00 | -5.246918 | 327.0590 | 305.8658 | 958.613800 | 0.0 | 0.012177 | ... | -13.17378 | -0.059646 | 0.0 | 7.059905 | 4.751553 | 5.665280 |
2020-12-31 22:15:00 | -5.688780 | 325.6822 | 299.9425 | 958.361800 | 0.0 | 0.015688 | ... | -14.05419 | -0.747990 | 0.0 | 7.037867 | 4.753286 | 5.666638 |
2020-12-31 22:45:00 | -6.256253 | 325.8016 | 303.5924 | 958.257100 | 0.0 | 0.015338 | ... | -13.52161 | -0.606677 | 0.0 | 7.021161 | 4.754451 | 5.668180 |
2020-12-31 23:15:00 | -6.492525 | 325.8260 | 307.0624 | 958.275800 | 0.0 | 0.010785 | ... | -12.03449 | -0.063648 | 0.0 | 6.998506 | 4.753085 | 5.669790 |
2020-12-31 23:45:00 | -6.446632 | 328.6548 | 309.0171 | 958.237500 | 0.0 | 0.007877 | ... | -12.31394 | 0.177185 | 0.0 | 6.990282 | 4.749334 | 5.671485 |
280512 rows × 21 columns
filename_data_simple_2021_2024 = "14.1_CH-CHA_DATABASE_meteo6_2021-2024.parquet"
data_simple_2021_2024 = load_parquet(filepath=filename_data_simple_2021_2024)
data_simple_2021_2024
Loaded .parquet file 14.1_CH-CHA_DATABASE_meteo6_2021-2024.parquet (0.006 seconds).
--> Detected time resolution of <30 * Minutes> / 30min
LW_IN_T1_2_1 | PA_GF1_0.9_1 | PPFD_IN_T1_2_2 | RH_T1_2_1 | SW_IN_T1_2_1 | TA_T1_2_1 | |
---|---|---|---|---|---|---|
TIMESTAMP_MIDDLE | ||||||
2021-01-01 00:15:00 | 332.179450 | 958.107523 | 0.0 | 100.000000 | 0.0 | 0.156080 |
2021-01-01 00:45:00 | 329.543557 | 958.088840 | 0.0 | 100.000000 | 0.0 | 0.102816 |
2021-01-01 01:15:00 | 328.928400 | 958.323920 | 0.0 | 100.000000 | 0.0 | 0.105138 |
2021-01-01 01:45:00 | 331.492063 | 958.425240 | 0.0 | 99.998978 | 0.0 | 0.168548 |
2021-01-01 02:15:00 | 332.917923 | 958.625800 | 0.0 | 99.999973 | 0.0 | 0.214706 |
... | ... | ... | ... | ... | ... | ... |
2024-12-31 21:45:00 | 304.613900 | 983.370890 | 0.0 | 99.997990 | 0.0 | -1.919472 |
2024-12-31 22:15:00 | 303.039890 | 983.052160 | 0.0 | 99.997990 | 0.0 | -2.104678 |
2024-12-31 22:45:00 | 302.093633 | 982.851140 | 0.0 | 99.997990 | 0.0 | -2.089444 |
2024-12-31 23:15:00 | 302.217307 | 982.896827 | 0.0 | 99.997990 | 0.0 | -2.355761 |
2024-12-31 23:45:00 | 298.392973 | 982.856613 | 0.0 | 99.997990 | 0.0 | -2.578839 |
70128 rows × 6 columns
Merge data#
# Merge data on index
data_simple_merged = pd.concat([data_simple_2005_2020, data_simple_2021_2024], axis=0)
data_simple_merged
G_GF1_0.03_1 | LW_IN_T1_2_1 | LW_OUT_T1_2_1 | PA_GF1_0.9_1 | FLAG_PA_GF1_0.9_1_ISFILLED | PPFD_IN_T1_2_2 | ... | SW_OUT_T1_2_1 | TA_T1_2_1 | FLAG_TA_T1_2_1_ISFILLED | TS_GF1_0.04_1 | TS_GF1_0.15_1 | TS_GF1_0.4_1 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
TIMESTAMP_MIDDLE | |||||||||||||
2005-01-01 00:15:00 | NaN | NaN | NaN | 978.100000 | 1.0 | NaN | ... | NaN | 1.566667 | 3.0 | NaN | NaN | NaN |
2005-01-01 00:45:00 | NaN | NaN | NaN | 977.933333 | 1.0 | NaN | ... | NaN | 1.533333 | 3.0 | NaN | NaN | NaN |
2005-01-01 01:15:00 | NaN | NaN | NaN | 977.900000 | 1.0 | NaN | ... | NaN | 1.566667 | 3.0 | NaN | NaN | NaN |
2005-01-01 01:45:00 | NaN | NaN | NaN | 977.833333 | 1.0 | NaN | ... | NaN | 1.566667 | 3.0 | NaN | NaN | NaN |
2005-01-01 02:15:00 | NaN | NaN | NaN | 977.833333 | 1.0 | NaN | ... | NaN | 1.500000 | 3.0 | NaN | NaN | NaN |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2024-12-31 21:45:00 | NaN | 304.613900 | NaN | 983.370890 | NaN | 0.0 | ... | NaN | -1.919472 | NaN | NaN | NaN | NaN |
2024-12-31 22:15:00 | NaN | 303.039890 | NaN | 983.052160 | NaN | 0.0 | ... | NaN | -2.104678 | NaN | NaN | NaN | NaN |
2024-12-31 22:45:00 | NaN | 302.093633 | NaN | 982.851140 | NaN | 0.0 | ... | NaN | -2.089444 | NaN | NaN | NaN | NaN |
2024-12-31 23:15:00 | NaN | 302.217307 | NaN | 982.896827 | NaN | 0.0 | ... | NaN | -2.355761 | NaN | NaN | NaN | NaN |
2024-12-31 23:45:00 | NaN | 298.392973 | NaN | 982.856613 | NaN | 0.0 | ... | NaN | -2.578839 | NaN | NaN | NaN | NaN |
350640 rows × 21 columns
Sanitize timestamp#
data_simple_merged = TimestampSanitizer(data=data_simple_merged, output_middle_timestamp=False).get()
data_simple_merged
G_GF1_0.03_1 | LW_IN_T1_2_1 | LW_OUT_T1_2_1 | PA_GF1_0.9_1 | FLAG_PA_GF1_0.9_1_ISFILLED | PPFD_IN_T1_2_2 | ... | SW_OUT_T1_2_1 | TA_T1_2_1 | FLAG_TA_T1_2_1_ISFILLED | TS_GF1_0.04_1 | TS_GF1_0.15_1 | TS_GF1_0.4_1 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
TIMESTAMP_MIDDLE | |||||||||||||
2005-01-01 00:15:00 | NaN | NaN | NaN | 978.100000 | 1.0 | NaN | ... | NaN | 1.566667 | 3.0 | NaN | NaN | NaN |
2005-01-01 00:45:00 | NaN | NaN | NaN | 977.933333 | 1.0 | NaN | ... | NaN | 1.533333 | 3.0 | NaN | NaN | NaN |
2005-01-01 01:15:00 | NaN | NaN | NaN | 977.900000 | 1.0 | NaN | ... | NaN | 1.566667 | 3.0 | NaN | NaN | NaN |
2005-01-01 01:45:00 | NaN | NaN | NaN | 977.833333 | 1.0 | NaN | ... | NaN | 1.566667 | 3.0 | NaN | NaN | NaN |
2005-01-01 02:15:00 | NaN | NaN | NaN | 977.833333 | 1.0 | NaN | ... | NaN | 1.500000 | 3.0 | NaN | NaN | NaN |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2024-12-31 21:45:00 | NaN | 304.613900 | NaN | 983.370890 | NaN | 0.0 | ... | NaN | -1.919472 | NaN | NaN | NaN | NaN |
2024-12-31 22:15:00 | NaN | 303.039890 | NaN | 983.052160 | NaN | 0.0 | ... | NaN | -2.104678 | NaN | NaN | NaN | NaN |
2024-12-31 22:45:00 | NaN | 302.093633 | NaN | 982.851140 | NaN | 0.0 | ... | NaN | -2.089444 | NaN | NaN | NaN | NaN |
2024-12-31 23:15:00 | NaN | 302.217307 | NaN | 982.896827 | NaN | 0.0 | ... | NaN | -2.355761 | NaN | NaN | NaN | NaN |
2024-12-31 23:45:00 | NaN | 298.392973 | NaN | 982.856613 | NaN | 0.0 | ... | NaN | -2.578839 | NaN | NaN | NaN | NaN |
350640 rows × 21 columns
Plot#
data_simple_merged.plot(subplots=True, x_compat=True, title="Merged data", figsize=(20, 14));

SAVE TO FILE#
OUTNAME = "15.1_CH-CHA_meteo6_2005-2024"
OUTPATH = r""
filepath = save_parquet(filename=OUTNAME, data=data_simple_merged, outpath=OUTPATH)
# data_simple_merged.to_csv(Path(OUTPATH) / f"{OUTNAME}.csv")
Saved file 15.1_CH-CHA_meteo6_2005-2024.parquet (0.333 seconds).
End of notebook.#
dt_string = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print(f"Finished. {dt_string}")
Finished. 2025-01-20 16:22:00