Skip to content

pixel_hierarchy

aggregate_climate_to_hierarchy(data: pd.DataFrame, hierarchy: pd.DataFrame) -> pd.DataFrame

Create all aggregate climate values for a given hierarchy from most-detailed data.

Parameters

data The most-detailed climate data to aggregate. hierarchy The hierarchy to aggregate the data to.

Returns

pd.DataFrame The climate data with values for all levels of the hierarchy.

Source code in src/idd_forecast_mbp/map_to_admin_2/pixel_hierarchy.py
def aggregate_climate_to_hierarchy(
    data: pd.DataFrame, hierarchy: pd.DataFrame
) -> pd.DataFrame:
    """Create all aggregate climate values for a given hierarchy from most-detailed data.

    Parameters
    ----------
    data
        The most-detailed climate data to aggregate.
    hierarchy
        The hierarchy to aggregate the data to.

    Returns
    -------
    pd.DataFrame
        The climate data with values for all levels of the hierarchy.
    """
    results = data.set_index("location_id").copy()

    # Most detailed locations can be at multiple levels of the hierarchy,
    # so we loop over all levels from most detailed to global, aggregating
    # level by level and appending the results to the data.
    for level in reversed(list(range(1, hierarchy.level.max() + 1))):
        level_mask = hierarchy.level == level
        parent_map = hierarchy.loc[level_mask].set_index("location_id").parent_id

        # For every location in the parent map, we need to check if it is the results
        # For those that are, proceed to aggregate
        # For those that aren't, check to make sure their parent is in the results. If not, exit with an error
        absent_parent_map = parent_map.index.difference(results.index)
        if len(absent_parent_map) > 0:
            msg = f"Some parent locations are not in the results: {absent_parent_map}"
            # Check to see if the parent of each location id that is missing is in the results
            parent_of_absent = parent_map.loc[absent_parent_map]
            unique_parent_ids = parent_of_absent.unique()
            # Check to see if the unique_parent_ids are in the results
            missing_parents = unique_parent_ids[~np.isin(unique_parent_ids, results.index)]
            if len(missing_parents) > 0:
                msg = f"Some parent locations are not in the results: {missing_parents}"
                raise ValueError(msg)

        present_parent_map = parent_map.loc[parent_map.index.isin(results.index)]
        # Continue aggregation only on the present locations
        subset = results.loc[present_parent_map.index]
        subset["parent_id"] = present_parent_map

        parent_values = (
            subset.groupby(["year_id", "parent_id"])[["weighted_climate", "population"]]
            .sum()
            .reset_index()
            .rename(columns={"parent_id": "location_id"})
            .set_index("location_id")
        )
        results = pd.concat([results, parent_values])
    results = (
        results.reset_index()
        .sort_values(["location_id", "year_id"])
    )
    parent_values["value"] = parent_values.weighted_climate / parent_values.population
    return results

load_subset_hierarchy(subset_hierarchy: str) -> pd.DataFrame

Load a subset location hierarchy.

The subset hierarchy might be equal to the full aggregation hierarchy, but it might also be a subset of the full aggregation hierarchy. These hierarchies are used to provide different views of aggregated climate data.

Parameters

subset_hierarchy The administrative hierarchy to load (e.g. "gbd_2021")

Returns

pd.DataFrame The hierarchy data with parent-child relationships

Source code in src/idd_forecast_mbp/map_to_admin_2/pixel_hierarchy.py
def load_subset_hierarchy(subset_hierarchy: str) -> pd.DataFrame:
    """Load a subset location hierarchy.

    The subset hierarchy might be equal to the full aggregation hierarchy,
    but it might also be a subset of the full aggregation hierarchy.
    These hierarchies are used to provide different views of aggregated
    climate data.

    Parameters
    ----------
    subset_hierarchy
        The administrative hierarchy to load (e.g. "gbd_2021")

    Returns
    -------
    pd.DataFrame
        The hierarchy data with parent-child relationships
    """
    root = Path("/mnt/team/rapidresponse/pub/population-model/admin-inputs/raking")
    allowed_hierarchies = ["gbd_2021", "fhs_2021", "lsae_1209", "lsae_1285"]
    if subset_hierarchy not in allowed_hierarchies:
        msg = f"Unknown admin hierarchy: {subset_hierarchy}"
        raise ValueError(msg)
    path = root / "gbd-inputs" / f"hierarchy_{subset_hierarchy}.parquet"
    return pd.read_parquet(path)

post_process(df: pd.DataFrame, pop_df: pd.DataFrame) -> pd.DataFrame

Rename 000 to {summary_covariate}_per_capita Merge in population Create {summary_covariate}_capita*population -> {summary_covariate}

Source code in src/idd_forecast_mbp/map_to_admin_2/pixel_hierarchy.py
def post_process(df: pd.DataFrame, pop_df: pd.DataFrame) -> pd.DataFrame: # Fix this for other summary_variable/variable/etc
    """
    Rename 000 to {summary_covariate}_per_capita
    Merge in population
    Create {summary_covariate}_capita*population -> {summary_covariate}
    """

    # Rename 000 to people_flood_days_per_capita
    df = df.rename(columns={"000": f"{summary_covariate}_per_capita"})

    # Merge in population
    full_df = df.merge(
        pop_df,
        on=["location_id", "year_id"],
        how="left",
    )
    # assert all location_ids and years combinations are present
    assert df.shape[0] == full_df.shape[0]
    assert df.location_id.nunique() == full_df.location_id.nunique()
    assert df.year_id.nunique() == full_df.year_id.nunique()

    # Create {summary_covariate}
    full_df[summary_covariate] = (
        full_df[f"{summary_covariate}_per_capita"] * full_df["population"]
    ).astype(np.float32)

    return full_df