Commit ec4395e1 authored by tchervec's avatar tchervec
Browse files

impute canton id directly using shapefile instead of based on imputed municipalities

parent 5cf50d87
import pandas as pd import geopandas as gpd
def configure(context): def configure(context):
context.config("data_path") context.config("data_path")
def execute(context): def execute(context):
# Load data # Load data
data_path = context.config("data_path") data_path = context.config("data_path")
df_cantons = pd.read_excel("%s/spatial_structure_2018.xlsx" % data_path, df = gpd.read_file("%s/municipality_borders/gd-b-00.03-875-gg18/ggg_2018-LV95/shp/g1k18.shp" % data_path,
names=["municipality_id", "canton_id"], encoding="latin1"
usecols=[0, 2], ).to_crs("EPSG:2056")
skiprows=6,
nrows=2229, df = df.rename({"KTNR": "canton_id", "KTNAME": "canton_name"}, axis=1)
) df = df[["canton_id", "canton_name", "geometry"]]
return df_cantons return df
def impute(df_cantons, df):
assert("municipality_id" in df.columns)
return pd.merge(df, df_cantons, on = "municipality_id", how = "left")
SP_REGION_1 = [25, 12, 13, 1, 2, 14, 9] SP_REGION_1 = [25, 12, 13, 1, 2, 14, 9]
SP_REGION_2 = [21, 26, 15, 16, 22, 11, 24, 3, 6, 7] SP_REGION_2 = [21, 26, 15, 16, 22, 11, 24, 3, 6, 7]
SP_REGION_3 = [17, 19, 10, 23, 20, 5, 18, 4, 8] SP_REGION_3 = [17, 19, 10, 23, 20, 5, 18, 4, 8]
def impute_sp_region(df): def impute_sp_region(df):
assert("canton_id" in df.columns) assert ("canton_id" in df.columns)
assert("sp_region" not in df.columns) assert ("sp_region" not in df.columns)
df["sp_region"] = 0 df["sp_region"] = 0
df.loc[df["canton_id"].isin(SP_REGION_1), "sp_region"] = 1 df.loc[df["canton_id"].isin(SP_REGION_1), "sp_region"] = 1
...@@ -39,5 +38,5 @@ def impute_sp_region(df): ...@@ -39,5 +38,5 @@ def impute_sp_region(df):
# Especially, we need a consistent spatial system. It probably would make # Especially, we need a consistent spatial system. It probably would make
# more sense to impute the SP region in another way # more sense to impute the SP region in another way
#assert(not np.any(df["sp_region"] == 0)) # assert(not np.any(df["sp_region"] == 0))
return df return df
...@@ -90,27 +90,39 @@ def execute(context): ...@@ -90,27 +90,39 @@ def execute(context):
df_zones = context.stage("data.spatial.zones") df_zones = context.stage("data.spatial.zones")
df_municipality_types = context.stage("data.spatial.municipality_types") df_municipality_types = context.stage("data.spatial.municipality_types")
df_quarters = context.stage("data.spatial.quarters") df_quarters = context.stage("data.spatial.quarters")
df_cantons = context.stage("data.spatial.cantons")
df_spatial = pd.DataFrame(df[["person_id", "home_x", "home_y"]]) df_spatial = pd.DataFrame(df[["person_id", "home_x", "home_y"]])
df_spatial = data.spatial.utils.to_gpd(context, df_spatial, "home_x", "home_y") df_spatial = data.spatial.utils.to_gpd(context, df_spatial, "home_x", "home_y")
# Impute municipalities
df_spatial = data.spatial.utils.impute(context, df_spatial, df_municipalities, "person_id", "municipality_id")[[ df_spatial = data.spatial.utils.impute(context, df_spatial, df_municipalities, "person_id", "municipality_id")[[
"person_id", "municipality_id", "geometry" "person_id", "municipality_id", "geometry"
]] ]]
df_spatial["municipality_id"] = df_spatial["municipality_id"].astype(np.int)
# Impute quarters
df_spatial = (data.spatial.utils.impute(context, df_spatial, df_quarters, "person_id", "quarter_id",
fix_by_distance=False)[
["person_id", "municipality_id", "quarter_id", "geometry"]]
)
df_spatial = \ # Impute cantons
data.spatial.utils.impute(context, df_spatial, df_quarters, "person_id", "quarter_id", fix_by_distance=False)[[ df_spatial = data.spatial.utils.impute(context, df_spatial, df_cantons, "person_id", "canton_id")[[
"person_id", "municipality_id", "quarter_id", "geometry" "person_id", "municipality_id", "quarter_id", "canton_id", "geometry"
]] ]]
# Impute municipality types
df_spatial = data.spatial.municipality_types.impute(df_spatial, df_municipality_types) df_spatial = data.spatial.municipality_types.impute(df_spatial, df_municipality_types)
# Impute zones
df_spatial = data.spatial.zones.impute(df_spatial, df_zones) df_spatial = data.spatial.zones.impute(df_spatial, df_zones)
assert (len(df) == len(df_spatial)) assert (len(df) == len(df_spatial))
del df["municipality_id"] del df["municipality_id"]
df = pd.merge( df = pd.merge(
df, df_spatial[["person_id", "zone_id", "municipality_type", "municipality_id", "quarter_id"]], df, df_spatial[["person_id", "zone_id", "municipality_type", "municipality_id", "quarter_id", "canton_id"]],
on="person_id" on="person_id"
) )
...@@ -119,9 +131,6 @@ def execute(context): ...@@ -119,9 +131,6 @@ def execute(context):
df["home_quarter_id"] = df["quarter_id"] df["home_quarter_id"] = df["quarter_id"]
# Impute SP region # Impute SP region
df["municipality_id"] = df["municipality_id"].astype(np.int)
df_cantons = context.stage("data.spatial.cantons")
df = data.spatial.cantons.impute(df_cantons, df)
df = data.spatial.cantons.impute_sp_region(df) df = data.spatial.cantons.impute_sp_region(df)
# Impute population density # Impute population density
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment