Verifikasi#

Load Data#

import os
import xarray as xr
import pandas as pd
import numpy as np
import gsw

Model#

dir_mod = '/data/local/marine-training/data/MATPEL_05/cawo_out'
paths_mod = []
for file in os.listdir(dir_mod):
    if file.endswith('nc') and 'cawo' in file:
        paths_mod.append(os.path.join(dir_mod, file))
paths_mod.sort()
ds_mod = xr.open_mfdataset(paths_mod, chunks='auto')

Argo#

path_argo_nc = '/data/local/marine-training/data/MATPEL_05/argo_data/nc_argo/GL_PR_PF_2902800.nc'
ds_argo = xr.open_dataset(path_argo_nc)

Drifter#

path_drifter_csv = '/data/local/marine-training/data/MATPEL_05/drifter_data/drifter_6hour_qc_f5dd_5de7_fd9d.csv'
df_drifter = pd.read_csv(path_drifter_csv)

Verifikasi#

Verifikasi menggunakan data Argo float#

Menyeleksi Data Berdasarkan QC dan Wilayah Indonesia#

ds_argo_filtered = (ds_argo
                    .where(ds_argo['PRES_ADJUSTED_QC'] == 1)
                    .where(ds_argo['TEMP_ADJUSTED_QC'] == 1)
                    .where(ds_argo['PSAL_ADJUSTED_QC'] == 1)
                   )

t_start = pd.to_datetime("2024-02-01")
t_end = pd.to_datetime("2025-03-31")
ext_indo = [90, 145, -15, 15]

# 1. Mengambil nilai dari dimensi
lat = ds_argo_filtered['LATITUDE'].values
lon = ds_argo_filtered['LONGITUDE'].values
tm = pd.to_datetime(ds_argo_filtered['TIME'].values)

# 2. Buat mask lokasi: hanya posisi dalam batas wilayah Indonesia
mask_pos = (lon >= ext_indo[0]) & (lon <= ext_indo[1]) & (lat >= ext_indo[2]) & (lat <= ext_indo[3])

# 3. Buat mask waktu: hanya waktu dalam rentang yang ditentukan
mask_tm = (tm >= t_start) & (tm <= t_end)

# 4. Gabungkan kedua mask (lokasi dan waktu)
mask_combined = mask_pos & mask_tm

# 5. Indeks data yang lolos seleksi
valid_indices = np.where(mask_combined)[0]
# print(f"Indeks/cycle yang lolos seleksi: {valid_indices}")

ds_argo_filtered = ds_argo_filtered.sel(TIME=mask_combined, LATITUDE=mask_combined, LONGITUDE=mask_combined, POSITION=valid_indices)
ds_argo_filtered['POSITION'] = ('TIME', ds_argo['POSITION'].values[valid_indices])
ds_argo_filtered
<xarray.Dataset> Size: 2MB
Dimensions:                   (TIME: 60, DEPTH: 103, POSITION: 60,
                               LATITUDE: 60, LONGITUDE: 60)
Coordinates:
  * TIME                      (TIME) datetime64[ns] 480B 2024-02-05T01:30:26 ...
  * LATITUDE                  (LATITUDE) float32 240B 4.764 4.568 ... 4.038
  * LONGITUDE                 (LONGITUDE) float32 240B 130.1 129.8 ... 131.1
    POSITION                  (TIME) int64 480B 159 160 161 162 ... 216 217 218
Dimensions without coordinates: DEPTH
Data variables: (12/23)
    TIME_QC                   (TIME, DEPTH) float32 25kB 1.0 1.0 1.0 ... nan nan
    POSITION_QC               (POSITION, TIME, DEPTH) float32 1MB 1.0 ... nan
    DC_REFERENCE              (TIME, DEPTH) object 49kB b'93359759' ... nan
    DIRECTION                 (TIME, DEPTH) object 49kB b'A' b'A' ... nan nan
    VERTICAL_SAMPLING_SCHEME  (TIME, DEPTH) object 49kB b'Synthetic sampling'...
    PRES                      (TIME, DEPTH) float32 25kB 0.0 0.6 1.9 ... nan nan
    ...                        ...
    PSAL                      (TIME, DEPTH) float64 49kB 34.02 34.06 ... nan nan
    PSAL_QC                   (TIME, DEPTH) float32 25kB 1.0 1.0 1.0 ... nan nan
    PSAL_ADJUSTED             (TIME, DEPTH) float64 49kB 34.02 34.06 ... nan nan
    PSAL_ADJUSTED_QC          (TIME, DEPTH) float32 25kB 1.0 1.0 1.0 ... nan nan
    PSAL_ADJUSTED_DM          (TIME, DEPTH) object 49kB b'D' b'D' ... nan nan
    PSAL_ADJUSTED_ERROR       (TIME, DEPTH) float64 49kB 0.009 0.009 ... nan nan
Attributes: (12/49)
    data_type:                      OceanSITES vertical profile
    format_version:                 1.4
    platform_code:                  2902800
    institution:                    First Institute of Oceanography - Ministr...
    institution_edmo_code:          4640
    site_code:                       
    ...                             ...
    last_date_observation:          2025-04-05T01:30:25Z
    last_latitude_observation:      4.24300
    last_longitude_observation:     130.82900
    date_update:                    2025-04-15T09:50:18Z
    history:                        2025-04-15T09:50:18Z : Creation
    data_mode:                      M

Menyeleksi berdasarkan dimensi model#

# Ambil data
pressure = ds_argo_filtered["PRES_ADJUSTED"].values        # (n_profile, n_levels)
latitude = ds_argo_filtered["LATITUDE"].values             # (n_profile,)
longitude = ds_argo_filtered["LONGITUDE"].values           # (n_profile,)
temperature = ds_argo_filtered["TEMP_ADJUSTED"].values     # (n_profile, n_levels)
salinity = ds_argo_filtered['PSAL_ADJUSTED'].values        # (n_profile, n_levels)

# Target kedalaman yang ingin diambil
target_depths = np.abs(ds_mod.depth.values)

# Konversi tekanan ke kedalaman per profil
n_profiles = pressure.shape[0]
depth = np.empty_like(pressure)

for i in range(n_profiles):
    depth[i, :] = -gsw.z_from_p(pressure[i, :], latitude[i])

# Fungsi untuk ambil nilai variabel di kedalaman terdekat
def get_nearest_profile_values(var, depth_arr, target_depths):
    result = []
    for prof_idx in range(var.shape[0]):
        prof_values = []
        for td in target_depths:
            # Hanya pertimbangkan nilai yang tidak NaN
            valid_indices = ~np.isnan(depth_arr[prof_idx]) & ~np.isnan(var[prof_idx])
            if not np.any(valid_indices):
                prof_values.append(np.nan)  # Jika semua nilai NaN, tambahkan NaN
                continue
            valid_depth = depth_arr[prof_idx][valid_indices]
            valid_var = var[prof_idx][valid_indices]
            # Temukan indeks dengan nilai kedalaman terdekat
            idx = (np.abs(valid_depth - td)).argmin()
            prof_values.append(valid_var[idx])
        result.append(prof_values)
    return np.array(result)

# Ambil suhu pada kedalaman target
temp_at_target_depths = get_nearest_profile_values(temperature, depth, target_depths)
sali_at_target_depths = get_nearest_profile_values(salinity, depth, target_depths)
press_at_target_depths = get_nearest_profile_values(pressure, depth, target_depths)

# Buat dataframe untuk suhu pada kedalaman target
df_temp_at_model_depths = pd.DataFrame(temp_at_target_depths, columns=target_depths)
df_sali_at_model_depths = pd.DataFrame(sali_at_target_depths, columns=target_depths)
df_press_at_model_depths = pd.DataFrame(press_at_target_depths, columns=target_depths)

Mengkonversi nilai temperatur in-situ menjadi temperatur potensial#

# Ambil suhu, salinitas, dan tekanan dari DataFrame
temp = df_temp_at_model_depths.values            # (n_profiles, n_depths)
sali = df_sali_at_model_depths.values            # (n_profiles, n_depths)
press = df_press_at_model_depths.values          # (n_profiles, n_depths)

# Ambil latitude dan longitude dari profil
latitude = ds_argo_filtered["LATITUDE"].values   # (n_profiles,)
longitude = ds_argo_filtered["LONGITUDE"].values # (n_profiles,)

# Ukuran
n_profiles, n_depths = temp.shape

# Perluas latitude dan longitude agar sesuai shape (n_profiles, n_depths)
lat_2d = np.repeat(latitude[:, np.newaxis], n_depths, axis=1)
lon_2d = np.repeat(longitude[:, np.newaxis], n_depths, axis=1)

# Konversi ke Absolute Salinity (dari Practical Salinity)
SA = gsw.SA_from_SP(sali, press, lon_2d, lat_2d)

# Hitung Potential Temperature (referensi ke permukaan laut, 0 dbar)
pt = gsw.pt_from_t(SA, temp, press, p_ref=0)

# Buat DataFrame hasil
df_pt_at_model_depths = pd.DataFrame(pt, columns=target_depths)
df_pt_at_model_depths['TIME'] = pd.to_datetime(ds_argo_filtered['TIME'].values)
df_pt_at_model_depths.set_index('TIME', inplace=True)
df_pt_at_model_depths = df_pt_at_model_depths.transpose()
df_pt_at_model_depths.index.name = 'Depth (m)'
df_pt_at_model_depths.columns.name = 'Time'

Ambil data model#

# Ekstrak Data model berdasarkan lat/lon/time data obs
latitude = ds_argo_filtered['LATITUDE'].values
longitude = ds_argo_filtered['LONGITUDE'].values
argo_time = pd.to_datetime(ds_argo_filtered['TIME'].values)

mod_temps_val = []

for i, dt in enumerate(argo_time):
    argLat = latitude[i]
    argLon = longitude[i]
    ds0 = ds_mod.sel(date=dt, lat=argLat, lon=argLon, method='nearest')
    mod_temps_val.append(ds0['sw_temp'].values)
    
# Buat dataframe untuk suhu pada kedalaman target dari data model
df_pt_mod = pd.DataFrame(mod_temps_val, columns=np.abs(ds_mod.depth.values))
df_pt_mod['TIME'] = argo_time
df_pt_mod.set_index('TIME', inplace=True)
df_pt_mod = df_pt_mod.transpose()
df_pt_mod.index.name = 'Depth (m)'
df_pt_mod.columns.name = 'Time'

Hitung parameter verifikasi#

# Verifikasi temperatur potensial:
from mods import verification_metrics
metrics_verif_temp = verification_metrics(df_pt_at_model_depths, df_pt_mod)
print("Verifikasi Suhu:")
for k, v in metrics_verif_temp.items():
    print(f"{k}: {v:.3f}")
Verifikasi Suhu:
Correlation: 0.996
MAPE (%): 4.088
Bias: -0.070
RMSE: 1.019
Std Obs: 10.804
Std Model: 10.685

Verifikasi menggunakan data Drifter#

Menyeleksi data drifter#

# Data sudah terseleksi saat akuisisi, hanya perlu menyesuaikan format data dan merapihkan data tabel
# Ambil baris pertama sebagai header yang benar
new_header = np.asarray(df_drifter.columns)
df_drifter_adjs = df_drifter[1:]
df_drifter_adjs.columns = new_header

# Drop rows dengan ID, latitude, atau longitude kosong
df_drifter_adjs = df_drifter_adjs.dropna(subset=["ID", "latitude", "longitude"])

# Konversi tipe data yang diperlukan
df_drifter_adjs["ID"] = df_drifter_adjs["ID"].astype(int)
df_drifter_adjs["latitude"] = df_drifter_adjs["latitude"].astype(float)
df_drifter_adjs["longitude"] = df_drifter_adjs["longitude"].astype(float)
df_drifter_adjs["time"] = pd.to_datetime(df_drifter_adjs["time"], format="%Y-%m-%dT%H:%M:%SZ")
df_drifter_adjs["start_date"] = pd.to_datetime(df_drifter_adjs["start_date"], format="%Y-%m-%dT%H:%M:%SZ")
df_drifter_adjs["deploy_date"] = pd.to_datetime(df_drifter_adjs["deploy_date"], format="%Y-%m-%dT%H:%M:%SZ")
df_drifter_adjs["end_date"] = pd.to_datetime(df_drifter_adjs["end_date"], format="%Y-%m-%dT%H:%M:%SZ")
df_drifter_adjs["drogue_lost_date"] = pd.to_datetime(df_drifter_adjs["drogue_lost_date"], format="%Y-%m-%dT%H:%M:%SZ")

# Ambil ID unik
unique_ids = df_drifter_adjs["ID"].unique()

Memilih data berdasarkan ID#

df_drifter2verif = df_drifter_adjs.loc[df_drifter_adjs['ID'] == 300234061473430][['ID', 'time', 'latitude', 'longitude', 'sst']]
df_drifter2verif['time'] = pd.to_datetime(df_drifter2verif['time']).dt.normalize()  # Atau .dt.strftime('%Y-%m-%d')
df_drifter2verif.set_index(['time'], inplace=True)

Ambil data model#

# Ekstrak Data model berdasarkan lat/lon/time data drifter
lons, lats = df_drifter2verif['longitude'].values, df_drifter2verif['latitude'].values
drf_time = pd.to_datetime(df_drifter2verif.index.values)

mod_temps_val = []
mod_tms = []

for i, dt in enumerate(drf_time):
    drfLat = lats[i]
    drfLon = lons[i]
    ds0 = ds_mod.sel(date=dt.strftime('%Y-%m-%d'), depth=0., lat=drfLat, lon=drfLon, method='nearest')
    mod_temps_val.append(ds0['sw_temp'].values)
    mod_tms.append(ds0.date.values)
    
# Buat dataframe untuk suhu pada kedalaman target dari data model
df_mod_temp = pd.DataFrame(
    data={
        'time': pd.to_datetime(mod_tms),
        'latitude': lats,
        'longitude': lons,
        'sst': mod_temps_val
    }
)
df_mod_temp.set_index(['time'], inplace=True)
df_drifter2verif['sst'] = df_drifter2verif['sst'].astype(float)
df_mod_temp['sst'] = df_mod_temp['sst'].astype(float)

metrics_verif_temp_drifter = verification_metrics(df_drifter2verif, df_mod_temp)
print("Verifikasi Suhu:")
for k, v in metrics_verif_temp_drifter.items():
    print(f"{k}: {v:.3f}")
Verifikasi Suhu:
Correlation: 1.000
MAPE (%): 0.632
Bias: 0.184
RMSE: 0.392
Std Obs: 66.363
Std Model: 66.299

Visualisasi hasil verifikasi#

Membuat Taylor Diagram untuk visualisasi verifikasi Model dan Argo#

from mods import TaylorDiagram
from matplotlib.lines import Line2D
import matplotlib.pyplot as plt
ccorr = 0.996
stdv_mod = 10.685
stdv_arg = 10.804
fig = plt.figure(figsize=(5.5,5.5))
dia = TaylorDiagram(stdv_arg, fig=fig, rect=111, label='', grid=True)
dia.add_sample(stdv_arg, 1, label="Obs", marker=".", ls="", mec="red", mfc="red", mew=2)
dia.add_sample(stdv_mod, ccorr, label=f"Mod", marker="o", ls="", mfc="green", mec="none", markersize=8)              
ref_line = Line2D([0], [0], color="red", linestyle=":")
handles = dia.samplePoints + [ref_line]
labels = [p.get_label() for p in dia.samplePoints] + ["Ref. Std. Dev"]
fig.legend(handles[1:], labels[1:], numpoints=1, prop=dict(size="medium"))
contours = dia.add_contours()
plt.clabel(contours, inline=1, fontsize=10)
plt.show()
../_images/2d8fb21b7654012c1ea685df40efa0173a79755386e96a5f07f840e04f7427cf.png