Skip to content

utils

_to_idx(arr, bins)

Convert an array of values to an array of indices into a set of bins.T

Parameters

arr A N-dimensional array of values to convert. bins A 1-dimensional array of bins. Bins must be sorted, increasing, and not contain duplicates. Every value in arr will be converted to an index into bins, with values outside of bins being clipped to the nearest bin edge. Bins are 0-indexed and left-inclusive, so idx 0 contains values from -infinity to bins[1], idx 1 contains values from bins[1] to bins[2], etc.

Returns:

Type Description

The array of indices into the bins.

Source code in src/climate_data/special/utils.py
def _to_idx(arr, bins):
    """Convert an array of values to an array of indices into a set of bins.T

    Parameters
    ----------
    arr
       A N-dimensional array of values to convert.
    bins
       A 1-dimensional array of bins. Bins must be sorted, increasing,
       and not contain duplicates. Every value in `arr` will be converted
       to an index into `bins`, with values outside of `bins` being clipped
       to the nearest bin edge. Bins are 0-indexed and left-inclusive,
       so idx 0 contains values from -infinity to bins[1], idx 1 contains
       values from bins[1] to bins[2], etc.

    Returns:
        The array of indices into the bins.
    """
    return np.clip(np.digitize(arr, bins), 1, len(bins)) - 1

aggregate_to_hierarchy(data: pd.DataFrame, hierarchy: pd.DataFrame) -> pd.DataFrame

Create all aggregate climate values for a given hierarchy from most-detailed data.

Parameters

data The most-detailed climate data to aggregate. hierarchy The hierarchy to aggregate the data to.

Returns

pd.DataFrame The climate data with values for all levels of the hierarchy.

Source code in src/climate_data/special/utils.py
def aggregate_to_hierarchy(data: pd.DataFrame, hierarchy: pd.DataFrame) -> pd.DataFrame:
    """Create all aggregate climate values for a given hierarchy from most-detailed data.

    Parameters
    ----------
    data
        The most-detailed climate data to aggregate.
    hierarchy
        The hierarchy to aggregate the data to.

    Returns
    -------
    pd.DataFrame
        The climate data with values for all levels of the hierarchy.
    """
    agg_cols = sorted(
        set(data.columns) - {"location_id", "year_id", "temperature_zone"}
    )

    results = data.set_index("location_id")

    # Most detailed locations can be at multiple levels of the hierarchy,
    # so we loop over all levels from most detailed to global, aggregating
    # level by level and appending the results to the data.

    for level in reversed(list(range(1, hierarchy.level.max() + 1))):
        level_mask = hierarchy.level == level
        parent_map = hierarchy.loc[level_mask].set_index("location_id").parent_id

        subset = results.loc[results.index.intersection(parent_map.index)]
        subset["parent_id"] = parent_map

        parent_values = (
            subset.groupby(["year_id", "parent_id", "temperature_zone"])[agg_cols]
            .sum()
            .reset_index()
            .rename(columns={"parent_id": "location_id"})
            .set_index("location_id")
        )
        results = pd.concat([results, parent_values])
    results = (
        results.reset_index()
        .sort_values(["location_id", "year_id", "temperature_zone"])
        .reset_index(drop=True)
    )

    return results