Source code for hydroravens.priors

"""
hydroravens.priors
~~~~~~~~~~~~~~~~~~
Data-driven prior estimation for HydroRaVENS calibration.

:func:`suggest_priors` wraps :class:`~hydroravens.BrutsaertNieber` and
:class:`~hydroravens.HydrographSeparation` to produce a coherent set of
parameter starting points from an observed discharge record, before any
model run or calibration is attempted.

The returned :class:`Priors` object holds:

* Reservoir e-folding timescales (from spectral + recession decomposition).
* Power-law recession exponents (from the B–N recession cloud).
* Initial reservoir storage depths (from hydrograph separation).

It also exposes the underlying analysis objects for deeper inspection and
prints a human-readable summary.

Example
-------
>>> import pandas as pd
>>> from hydroravens import suggest_priors
>>> df = pd.read_csv('input.csv', parse_dates=['Date'])
>>> Q = df['Specific Discharge [mm/day]'].values
>>> P = df['Precipitation [mm/day]'].values
>>> pr = suggest_priors(Q, P=P, n_reservoirs=3)
>>> pr.summary()
"""

import warnings

import numpy as np

from .recession import BrutsaertNieber
from .hydrograph_separation import HydrographSeparation

# Theoretical Brutsaert & Nieber (1977) long-time baseflow recession exponent
# (b_HR in HydroRaVENS notation, corresponding to B-N slope n ≈ 1.55).
# Appropriate as a fixed prior for the slow (karst/groundwater) reservoir.
_B_KARST_THEORETICAL = 2.203


[docs] class Priors: """ Data-driven parameter priors for HydroRaVENS. Produced by :func:`suggest_priors`; not intended to be instantiated directly. Attributes ---------- t_efold : list of float Suggested e-folding residence times [days], fastest reservoir first. ``None`` entries indicate that the timescale could not be estimated from the data (fall back to calibration defaults). recession_exponents : list of float Suggested power-law recession exponents, fastest reservoir first. The fastest reservoir uses the catchment-integrated B–N estimate; the slow (karst) reservoir uses the theoretical value (Brutsaert & Nieber 1977, b ≈ 2.203); deeper reservoirs default to 1.0 (linear). initial_depths : list of float Estimated initial storage depths [mm], fastest reservoir first. log_t_efold_bounds : dict Calibration bounds in log10(days) for each ``log__t_efold_*`` parameter, as returned by :meth:`~hydroravens.HydrographSeparation.get_parameter_priors`. bn : BrutsaertNieber Fitted Brutsaert & Nieber object; inspect ``bn.n_``, ``bn.a_``, or call ``bn.plot()`` for the recession cloud. hs : HydrographSeparation Fitted HydrographSeparation object; call ``hs.summary()`` for full spectral decomposition detail. n_reservoirs : int Number of reservoirs these priors are intended for. """ def __init__(self, t_efold, recession_exponents, initial_depths, log_t_efold_bounds, bn, hs, n_reservoirs): self.t_efold = t_efold self.recession_exponents = recession_exponents self.initial_depths = initial_depths self.log_t_efold_bounds = log_t_efold_bounds self.bn = bn self.hs = hs self.n_reservoirs = n_reservoirs
[docs] def summary(self): """ Print a human-readable summary of all suggested priors. Includes timescales, recession exponents, initial depths, and calibration bounds, alongside guidance on which values to fix versus calibrate. """ n = self.n_reservoirs labels = (['soil'] + ['karst'] + ['deep'] * (n - 2))[:n] print("=" * 60) print("HydroRaVENS data-driven priors") print("=" * 60) print("\nE-folding residence times (fastest → slowest):") for label, tau in zip(labels, self.t_efold): if tau is None: print(f" {label:8s}: could not be estimated — use calibration default") else: print(f" {label:8s}: {tau:.1f} days") print("\nPower-law recession exponents (fastest → slowest):") b_note = { labels[0]: "B–N data-driven estimate — calibrate", labels[-2] if n > 1 else labels[0]: f"theoretical B–N 2.203 — consider fixing", } if n > 2: b_note[labels[-1]] = "linear (b=1) — deep reservoir default" for label, b in zip(labels, self.recession_exponents): note = b_note.get(label, "") print(f" {label:8s}: {b:.3f} ({note})") print("\nInitial storage depths [mm] (fastest → slowest):") for label, h0 in zip(labels, self.initial_depths): print(f" {label:8s}: {h0:.1f} mm") print("\nCalibration bounds (log10 days) for params.yml:") if self.log_t_efold_bounds: for key, bounds in self.log_t_efold_bounds.items(): if bounds is None: print(f" {key}: not estimated — keep params.yml defaults") else: print(f" {key}: initial={bounds['initial']:.3f}," f" lower={bounds['lower']:.3f}," f" upper={bounds['upper']:.3f}") else: print(" (not available)") print("\nBrutsaert–Nieber recession cloud:") print(f" slope n = {self.bn.n_:.4f}") print(f" coeff a = {self.bn.a_:.4g}") print(f" → b_HR = {self.bn.to_reservoir_exponent():.3f} " f"(used as b_{labels[0]} prior)") print("=" * 60)
[docs] def to_yaml_snippet(self): """ Return a YAML string snippet for the ``reservoirs:`` and ``initial_conditions:`` sections, populated with these priors. The snippet is a starting point only; review and adjust before running calibration. Returns ------- str """ n = self.n_reservoirs labels = (['soil'] + ['karst'] + ['deep'] * (n - 2))[:n] def _fmt(v): return f"{v:.1f}" if v is not None else "null # could not be estimated" tau_lines = "\n".join(f" - {_fmt(t)} # {l}" for t, l in zip(self.t_efold, labels)) exfilt_lines = "\n".join( f" - {0.8 if i == 0 else (0.5 if i < n - 1 else 1.0)}" f" # {l} — placeholder; calibrate" for i, l in enumerate(labels) ) hmax_lines = "\n".join(f" - .inf # {l}" for l in labels) b_lines = "\n".join(f" - {b:.3f} # {l}" for b, l in zip(self.recession_exponents, labels)) h0_lines = "\n".join(f" - {h:.1f} # {l}" for h, l in zip(self.initial_depths, labels)) snippet = ( f"reservoirs:\n" f" e_folding_residence_times__days:\n{tau_lines}\n" f" exfiltration_fractions:\n{exfilt_lines}\n" f" maximum_effective_depths__mm:\n{hmax_lines}\n" f" recession_exponents:\n{b_lines}\n" f"\ninitial_conditions:\n" f" water_reservoir_effective_depths__mm:\n{h0_lines}\n" f" snowpack__mm_SWE: 0\n" ) return snippet
[docs] def suggest_priors(Q, P=None, n_reservoirs=3, dt=1.0, min_recession_days=3): """ Estimate HydroRaVENS parameter priors from an observed discharge record. Combines :class:`~hydroravens.BrutsaertNieber` recession analysis with :class:`~hydroravens.HydrographSeparation` to produce timescale estimates, recession exponents, and initial storage depths without running any model. Parameters ---------- Q : array-like Observed specific discharge time series [mm/day]. Must be non-negative and at daily resolution. P : array-like, optional Observed precipitation time series [mm/day], same length as *Q*. Improves the HydrographSeparation spectral decomposition when provided. Default ``None``. n_reservoirs : int, optional Number of reservoirs in the intended model structure (2 or 3). Default ``3``. dt : float, optional Timestep [days]. Default ``1.0``. min_recession_days : int, optional Minimum recession length passed to :class:`~hydroravens.BrutsaertNieber`. Default ``3``. Returns ------- Priors Object containing timescales, recession exponents, initial depths, calibration bounds, and the underlying analysis objects. Notes ----- **Recession exponents** are assigned as follows: * *Fastest reservoir* (soil): the catchment-integrated B–N estimate ``b = 1 / (2 − n)``, where *n* is the slope of the log(−dQ/dt) vs log(Q) cloud. Treat as a calibration starting point. * *Slow reservoir* (karst): the theoretical Brutsaert & Nieber (1977) long-time value, b ≈ 2.203. Consider fixing rather than calibrating. * *Deep reservoir* (if present): b = 1.0 (linear). **Timescales** come from the spectral + recession decomposition in :class:`~hydroravens.HydrographSeparation`. ``None`` entries in ``Priors.t_efold`` mean the timescale could not be resolved from the data; fall back to calibration defaults in that case. The B–N slope and the overall b estimate reflect the *catchment- integrated* recession, not individual reservoir responses. They anchor the soil exponent but cannot distinguish the karst component directly — that is why the karst exponent defaults to the theoretical value. Examples -------- >>> import pandas as pd >>> from hydroravens import suggest_priors >>> df = pd.read_csv('input.csv', parse_dates=['Date']) >>> Q = df['Specific Discharge [mm/day]'].values >>> P = df['Precipitation [mm/day]'].values >>> pr = suggest_priors(Q, P=P, n_reservoirs=3) >>> pr.summary() >>> print(pr.to_yaml_snippet()) """ Q = np.asarray(Q, dtype=float) if P is not None: P = np.asarray(P, dtype=float) # --- Brutsaert & Nieber recession analysis --------------------------- bn = BrutsaertNieber(Q, dt=dt, min_recession_days=min_recession_days) try: bn.fit() b_fast = bn.to_reservoir_exponent() if not np.isfinite(b_fast): warnings.warn( "B–N slope n ≥ 2; cannot convert to a finite recession " "exponent. Defaulting b_soil prior to 2.0.", UserWarning, stacklevel=2, ) b_fast = 2.0 except ValueError as e: warnings.warn( f"BrutsaertNieber fit failed ({e}); defaulting b_soil prior to 2.0.", UserWarning, stacklevel=2, ) b_fast = 2.0 # Build recession_exponents list: soil / karst / deep(linear) if n_reservoirs == 1: recession_exponents = [b_fast] elif n_reservoirs == 2: recession_exponents = [b_fast, _B_KARST_THEORETICAL] else: recession_exponents = ([b_fast, _B_KARST_THEORETICAL] + [1.0] * (n_reservoirs - 2)) # --- Hydrograph separation ------------------------------------------ hs = HydrographSeparation(Q, n_reservoirs=n_reservoirs, precip=P) try: hs.fit() ic = hs.get_initial_conditions() bounds = hs.get_parameter_priors() h0_list = ic['H0'] # fastest first # Convert log-scale bounds back to linear for t_efold display t_efold = [] for i in range(n_reservoirs): key = list(bounds.keys())[i] if i < len(bounds) else None if key and bounds[key] is not None: t_efold.append(round(10 ** bounds[key]['initial'], 1)) else: t_efold.append(None) except Exception as e: warnings.warn( f"HydrographSeparation fit failed ({e}); " f"timescales and initial depths will be None.", UserWarning, stacklevel=2, ) h0_list = [None] * n_reservoirs t_efold = [None] * n_reservoirs bounds = {} return Priors( t_efold = t_efold, recession_exponents = recession_exponents, initial_depths = [round(float(h), 1) if h is not None else None for h in h0_list], log_t_efold_bounds = bounds, bn = bn, hs = hs, n_reservoirs = n_reservoirs, )