import math
import numpy as np
import pandas as pd
__all__ = [
'gumbel1',
'simple_forecast',
'forecast_stats',
'daily_averages',
'monthly_averages',
'annual_averages',
'daily_stats',
'daily_variance',
'daily_flow_anomaly',
'return_periods',
'low_return_periods'
]
[docs]
def gumbel1(rp: int, xbar: float, std: float) -> float:
"""
Solves the Gumbel Type 1 distribution
Args:
rp: return period (years)
xbar: average of the dataset
std: standard deviation of the dataset
Returns:
float: solution to gumbel distribution
"""
return round(-math.log(-math.log(1 - (1 / rp))) * std * .7797 + xbar - (.45 * std), 2)
[docs]
def simple_forecast(ens: pd.DataFrame) -> pd.DataFrame:
"""
Calculates the simple forecast from a dataframe of forecast ensembles
Args:
ens: a dataframe of forecast ensembles
Returns:
pandas DataFrame with datetime index and columns flow_uncertainty_upper, flow_median, flow_uncertainty_lower
"""
df = (
ens
.drop(columns=['ensemble_52'])
.dropna()
)
df = pd.DataFrame({
f'flow_uncertainty_upper': np.nanpercentile(df.values, 80, axis=1),
f'flow_median': np.median(df.values, axis=1),
f'flow_uncertainty_lower': np.nanpercentile(df.values, 20, axis=1),
}, index=df.index)
return df
[docs]
def forecast_stats(ens: pd.DataFrame) -> pd.DataFrame:
"""
Calculates the statistics for a dataframe of forecast ensembles
Args:
ens: a dataframe of forecast ensembles
Returns:
pandas DataFrame with an index of datetime and columns min, max, mean, median, 25%, 75%
"""
df = (
ens
.drop(columns=['ensemble_52'])
.dropna()
)
return (
pd
.DataFrame(
{
'flow_min': df.min(axis=1),
'flow_25p': df.quantile(.25, axis=1),
'flow_avg': df.mean(axis=1),
'flow_med': df.median(axis=1),
'flow_75p': df.quantile(.75, axis=1),
'flow_max': df.max(axis=1),
},
index=df.index
)
.merge(
ens[['ensemble_52']]
.rename(columns={'ensemble_52': 'high_res'}),
left_index=True,
right_index=True,
how='outer'
)
.sort_index(ascending=True)
)
[docs]
def daily_averages(df: pd.DataFrame) -> pd.DataFrame:
"""
Calculates the daily average of a dataframe with a datetime index
Args:
df: a dataframe of retrospective simulation data
Returns:
pandas DataFrame with an index of "%m/%d" dates for each day of the year
"""
return df.groupby(df.index.strftime('%m/%d')).mean()
[docs]
def monthly_averages(df: pd.DataFrame) -> pd.DataFrame:
"""
Calculates the monthly average of a dataframe with a datetime index
Args:
df: a dataframe of retrospective simulation data
Returns:
pandas DataFrame with an index of "%m" values for each month
"""
return df.groupby(df.index.strftime('%m')).mean()
[docs]
def annual_averages(df: pd.DataFrame) -> pd.DataFrame:
"""
Calculates the annual average of a dataframe with a datetime index
Args:
df: a dataframe of retrospective simulation data
Returns:
pandas DataFrame with an index of "%Y" values for each year
"""
# select years with >= 365 entries
df = df.groupby(df.index.strftime('%Y')).filter(lambda x: len(x) >= 365)
return df.groupby(df.index.strftime('%Y')).mean()
[docs]
def daily_variance(df: pd.DataFrame) -> pd.DataFrame:
"""
Calculate the daily standard deviation of a dataframe with a datetime index
Args:
df: a dataframe of retrospective simulation data
Returns:
pandas DataFrame with an index of "%m/%d" dates for each day of the year
"""
return df.groupby(df.index.strftime('%m/%d')).std()
[docs]
def daily_stats(df: pd.DataFrame) -> pd.DataFrame:
"""
Calculates the statistics for a datafame given the day of year
Args:
df: historical data to compute daily statistics from
Return:
pd.DataFrame: dataframe with average, min, 25% values, median, 75% value and max value for each day of year
"""
daily_grouped = df.groupby(df.index.strftime('%m/%d'))
return (
daily_grouped
.mean()
.merge(daily_grouped.min(), left_index=True, right_index=True, suffixes=('_avg', '_min'))
.merge(daily_grouped.quantile(.25), left_index=True, right_index=True)
.merge(daily_grouped.median(), left_index=True, right_index=True, suffixes=('_25%', '_med'))
.merge(daily_grouped.quantile(.75), left_index=True, right_index=True)
.merge(daily_grouped.max(), left_index=True, right_index=True, suffixes=('_75%', '_max'))
)
[docs]
def daily_flow_anomaly(stats: pd.DataFrame, day_avgs: pd.DataFrame, daily: bool = True) -> pd.DataFrame:
"""
Compute the anomaly between the average forecasted flow and the daily average
Args:
stats: the csv response from the ForecastStats data service
day_avgs: the csv response from the DailyAverages data service
daily: if true, aggregate the hourly forecast to a daily average before computing the anomaly
Returns:
pandas DataFrame with a datetime index and a column labeled 'anomaly_m^3/s'
"""
anomaly_df = pd.DataFrame(stats['flow_avg_m^3/s'], index=stats.index)
anomaly_df = anomaly_df.dropna()
if daily:
anomaly_df = anomaly_df.resample('D').mean()
anomaly_df['datetime'] = anomaly_df.index
anomaly_df.index = anomaly_df.index.strftime("%m/%d")
anomaly_df = anomaly_df.join(day_avgs, how="inner")
anomaly_df['anomaly_m^3/s'] = anomaly_df['flow_avg_m^3/s'] - anomaly_df['streamflow_m^3/s']
anomaly_df.index = anomaly_df['datetime']
del anomaly_df['flow_avg_m^3/s'], anomaly_df['datetime'], anomaly_df['streamflow_m^3/s']
return anomaly_df
[docs]
def return_periods(df: pd.DataFrame, rps: int or tuple = (2, 5, 10, 25, 50, 100)) -> dict:
"""
Solves the Gumbel Type-I distribution using the annual maximum flow from the historic simulation
Args:
df: a dataframe of retrospective simulation data
rps: an integer or iterable of integer return period numbers to compute
Returns:
dict with keys 'max_simulated' and 'return_period_{year}' for each year with float values to 2 decimals
"""
annual_max_flow_list = df.groupby(df.index.strftime('%Y')).max().values
xbar = np.mean(annual_max_flow_list)
std = np.std(annual_max_flow_list)
if type(rps) is int:
rps = (rps,)
ret_pers = {
'max_simulated': round(np.max(annual_max_flow_list), 2)
}
ret_pers.update({f'return_period_{rp}': round(gumbel1(rp, xbar, std), 2) for rp in rps})
return ret_pers
[docs]
def low_return_periods(hist: pd.DataFrame, rps: tuple = (2, 5, 10, 25, 50, 100)) -> dict:
"""
Solves the Gumbel Type-I distribution using the annual minimum flow from the historic simulation
Args:
hist: the csv response from the HistoricSimulation streamflow data service
rps: a tuple of integer return period numbers to compute
Returns:
dictionary with keys labeled f'{return_period}_year' and float values
"""
annual_min_flow_list = []
low_flows = {}
year_min = hist.index.min().year
year_max = hist.index.max().year
for y in range(year_min, year_max + 1):
annual_min_flow_list.append(hist[hist.index.year == int(y)].min())
annual_min_flow_list = np.array(annual_min_flow_list)
xbar = np.mean(annual_min_flow_list)
std = np.std(annual_min_flow_list)
for rp in rps:
value = gumbel1(rp, xbar, std)
value = value if value > 0 else 0
low_flows[f'return_period_{rp}'] = value
return low_flows