lop

Format meteo data for EddyPro flux processing

Notebook version: 1 (24 Feb 2025)
Author: Lukas Hörtnagl (holukas@ethz.ch)


Background#

  • Formats meteo data to be used in EddyPro flux processing

More info:


Settings#

Variables#

# Name of the variables in the original data file
SW_IN = 'SW_IN_T1_47_1_gfXG'
RH = 'RH_T1_47_1'
PPFD_IN = 'PPFD_IN_T1_47_1_gfXG'
LW_IN = 'LW_IN_T1_47_1'
TA = 'TA_T1_47_1_gfXG'
PA = 'PA_T1_47_1'

# Rename original variables for EddyPro, and add units
rename_dict = {
    TA: ('Ta_1_1_1', 'C'),
    SW_IN: ('Rg_1_1_1', 'W+1m-2'),
    RH: ('RH_1_1_1', '%'),
    LW_IN: ('Lwin_1_1_1', 'W+1m-2'),
    PA: ('Pa_1_1_1', 'kPa'),
    PPFD_IN: ('PPFD_1_1_1', 'umol+1m-2s-1'),
}

Imports#

import importlib.metadata
import warnings
from datetime import datetime

import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt
import numpy as np

from dbc_influxdb import dbcInflux

from diive.core.plotting.heatmap_datetime import HeatmapDateTime  # For plotting heatmaps
from diive.core.plotting.timeseries import TimeSeries  # For simple (interactive) time series plotting
from diive.pkgs.formats.meteo import FormatMeteoForEddyProFluxProcessing  # Class to format output files for upload
from diive.core.io.files import load_parquet, save_parquet

warnings.filterwarnings(action='ignore', category=FutureWarning)
warnings.filterwarnings(action='ignore', category=UserWarning)

version_diive = importlib.metadata.version("diive")
print(f"diive version: v{version_diive}")
diive version: v0.87.0

Docstring#

# help(FormatMeteoForEddyProProcessing)

Load data#

SOURCEFILE = r"../10_METEO/12.5_METEO7_GAPFILLED_2004-2024.parquet"
df = load_parquet(filepath=SOURCEFILE)
keepcols = ['SW_IN_T1_47_1_gfXG', 'RH_T1_47_1', 'PPFD_IN_T1_47_1_gfXG', 'LW_IN_T1_47_1', 'TA_T1_47_1_gfXG', 'PA_T1_47_1']
df = df[keepcols].copy()
Loaded .parquet file ../10_METEO/12.5_METEO7_GAPFILLED_2004-2024.parquet (0.027 seconds).
    --> Detected time resolution of <30 * Minutes> / 30min 

The dataframe with original data looks like this:

df
SW_IN_T1_47_1_gfXG RH_T1_47_1 PPFD_IN_T1_47_1_gfXG LW_IN_T1_47_1 TA_T1_47_1_gfXG PA_T1_47_1
TIMESTAMP_MIDDLE
2004-09-20 10:45:00 602.874084 80.503701 1070.543364 NaN 13.300000 NaN
2004-09-20 11:15:00 703.762207 77.503701 881.543364 NaN 13.390000 NaN
2004-09-20 11:45:00 537.947327 75.803704 1022.543364 NaN 13.810000 NaN
2004-09-20 12:15:00 634.747437 71.503701 1284.543364 NaN 14.470000 NaN
2004-09-20 12:45:00 634.747437 72.703705 742.543364 NaN 13.980000 NaN
... ... ... ... ... ... ...
2024-12-31 21:45:00 0.000000 87.254008 0.000000 232.595527 -0.504794 94.211806
2024-12-31 22:15:00 0.000000 87.430236 0.000000 232.609777 -0.296828 94.189013
2024-12-31 22:45:00 0.000000 89.787920 0.000000 232.345020 -0.392922 94.169525
2024-12-31 23:15:00 0.000000 81.809355 0.000000 234.211100 0.792661 94.168413
2024-12-31 23:45:00 0.000000 88.311314 0.000000 231.760533 -0.422600 94.170793

355563 rows × 6 columns


Apply formatting#

f = FormatMeteoForEddyProFluxProcessing(
    df=df,
    cols=rename_dict
)
f.run()
Sanitizing timestamp ...
>>> Validating timestamp naming of timestamp column TIMESTAMP_MIDDLE ... Timestamp name OK.
>>> Converting timestamp TIMESTAMP_MIDDLE to datetime ... OK
>>> All rows have timestamp TIMESTAMP_MIDDLE, no rows removed.
>>> Sorting timestamp TIMESTAMP_MIDDLE ascending ...
>>> Removing data records with duplicate indexes ... OK (no duplicates found in timestamp index)
>>> Creating continuous <30 * Minutes> timestamp index for timestamp TIMESTAMP_MIDDLE between 2004-09-20 10:45:00 and 2024-12-31 23:45:00 ...
Splitting timestamp into two separate columns ('TIMESTAMP_1', 'yyyy-mm-dd') and ('TIMESTAMP_2', 'HH:MM')
Filling missing values with -9999 ...
Renaming columns ...
res = f.get_results()
res
TIMESTAMP_1 TIMESTAMP_2 Rg_1_1_1 RH_1_1_1 PPFD_1_1_1 Lwin_1_1_1 Ta_1_1_1 Pa_1_1_1
yyyy-mm-dd HH:MM W+1m-2 % umol+1m-2s-1 W+1m-2 C kPa
TIMESTAMP_MIDDLE
2004-09-20 10:45:00 2004-09-20 10:45 602.874084 80.503701 1070.543364 -9999.000000 13.300000 -9999.000000
2004-09-20 11:15:00 2004-09-20 11:15 703.762207 77.503701 881.543364 -9999.000000 13.390000 -9999.000000
2004-09-20 11:45:00 2004-09-20 11:45 537.947327 75.803704 1022.543364 -9999.000000 13.810000 -9999.000000
2004-09-20 12:15:00 2004-09-20 12:15 634.747437 71.503701 1284.543364 -9999.000000 14.470000 -9999.000000
2004-09-20 12:45:00 2004-09-20 12:45 634.747437 72.703705 742.543364 -9999.000000 13.980000 -9999.000000
... ... ... ... ... ... ... ... ...
2024-12-31 21:45:00 2024-12-31 21:45 0.000000 87.254008 0.000000 232.595527 -0.504794 94.211806
2024-12-31 22:15:00 2024-12-31 22:15 0.000000 87.430236 0.000000 232.609777 -0.296828 94.189013
2024-12-31 22:45:00 2024-12-31 22:45 0.000000 89.787920 0.000000 232.345020 -0.392922 94.169525
2024-12-31 23:15:00 2024-12-31 23:15 0.000000 81.809355 0.000000 234.211100 0.792661 94.168413
2024-12-31 23:45:00 2024-12-31 23:45 0.000000 88.311314 0.000000 231.760533 -0.422600 94.170793

355563 rows × 8 columns


Plot all variables#

plotcols = [plotcol for plotcol in res.columns if not "TIMESTAMP" in plotcol[0]]
plotdf = res[plotcols].copy()

for col in plotdf.columns:
    series = plotdf[col].copy()
    series.replace(to_replace=-9999, value=np.nan, inplace=True)
    try:
        fig = plt.figure(facecolor='white', figsize=(4, 8))
        gs = gridspec.GridSpec(1, 1)  # rows, cols
        ax = fig.add_subplot(gs[0, 0])
        vmin = series.quantile(.01)
        vmax = series.quantile(.99)
        HeatmapDateTime(ax=ax, series=series, vmin=vmin, vmax=vmax).plot()
        ax.set_title(col, color='black')
        fig.show()
    except TypeError:
        pass
../../_images/6778a53b6926becb678734e8ff5cb25030e5b1b17eca9e6cceb848fb14c32302.png ../../_images/fa700d9efe4e36c7dfdf65194a59f7ab717749e344544226d4d209fe4d63b797.png ../../_images/59439244a0fa20be89af88bbbb17db918d5a4555720827fccc6c7123b6d2605f.png ../../_images/749e6e22fe45cbf05e4322da04dd27028625bd9680ad87bce2546a57689dea81.png ../../_images/1aa2f247a6751bff3e8233b05fd1c9e49db996cd4c12ebde463efe22ffc6ee8c.png ../../_images/b0881046fb47c98806bb519599bbca2e0e41bfe0d886d800300aacfa0f03030e.png

Save to CSV#

res.to_csv("CH-LAE_2004-2024_meteo_aux.csv", index=False)

End of notebook#

dt_string = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print(f"Finished {dt_string}")
Finished 2025-06-13 17:29:48