Commit ec4395e1 authored by tchervec's avatar tchervec
Browse files

impute canton id directly using shapefile instead of based on imputed municipalities

parent 5cf50d87
import pandas as pd
import geopandas as gpd
def configure(context):
context.config("data_path")
def execute(context):
# Load data
data_path = context.config("data_path")
df_cantons = pd.read_excel("%s/spatial_structure_2018.xlsx" % data_path,
names=["municipality_id", "canton_id"],
usecols=[0, 2],
skiprows=6,
nrows=2229,
)
df = gpd.read_file("%s/municipality_borders/gd-b-00.03-875-gg18/ggg_2018-LV95/shp/g1k18.shp" % data_path,
encoding="latin1"
).to_crs("EPSG:2056")
df = df.rename({"KTNR": "canton_id", "KTNAME": "canton_name"}, axis=1)
df = df[["canton_id", "canton_name", "geometry"]]
return df_cantons
return df
def impute(df_cantons, df):
assert("municipality_id" in df.columns)
return pd.merge(df, df_cantons, on = "municipality_id", how = "left")
SP_REGION_1 = [25, 12, 13, 1, 2, 14, 9]
SP_REGION_2 = [21, 26, 15, 16, 22, 11, 24, 3, 6, 7]
SP_REGION_3 = [17, 19, 10, 23, 20, 5, 18, 4, 8]
def impute_sp_region(df):
assert("canton_id" in df.columns)
assert("sp_region" not in df.columns)
assert ("canton_id" in df.columns)
assert ("sp_region" not in df.columns)
df["sp_region"] = 0
df.loc[df["canton_id"].isin(SP_REGION_1), "sp_region"] = 1
......@@ -39,5 +38,5 @@ def impute_sp_region(df):
# Especially, we need a consistent spatial system. It probably would make
# more sense to impute the SP region in another way
#assert(not np.any(df["sp_region"] == 0))
# assert(not np.any(df["sp_region"] == 0))
return df
......@@ -90,27 +90,39 @@ def execute(context):
df_zones = context.stage("data.spatial.zones")
df_municipality_types = context.stage("data.spatial.municipality_types")
df_quarters = context.stage("data.spatial.quarters")
df_cantons = context.stage("data.spatial.cantons")
df_spatial = pd.DataFrame(df[["person_id", "home_x", "home_y"]])
df_spatial = data.spatial.utils.to_gpd(context, df_spatial, "home_x", "home_y")
# Impute municipalities
df_spatial = data.spatial.utils.impute(context, df_spatial, df_municipalities, "person_id", "municipality_id")[[
"person_id", "municipality_id", "geometry"
]]
df_spatial["municipality_id"] = df_spatial["municipality_id"].astype(np.int)
# Impute quarters
df_spatial = (data.spatial.utils.impute(context, df_spatial, df_quarters, "person_id", "quarter_id",
fix_by_distance=False)[
["person_id", "municipality_id", "quarter_id", "geometry"]]
)
df_spatial = \
data.spatial.utils.impute(context, df_spatial, df_quarters, "person_id", "quarter_id", fix_by_distance=False)[[
"person_id", "municipality_id", "quarter_id", "geometry"
]]
# Impute cantons
df_spatial = data.spatial.utils.impute(context, df_spatial, df_cantons, "person_id", "canton_id")[[
"person_id", "municipality_id", "quarter_id", "canton_id", "geometry"
]]
# Impute municipality types
df_spatial = data.spatial.municipality_types.impute(df_spatial, df_municipality_types)
# Impute zones
df_spatial = data.spatial.zones.impute(df_spatial, df_zones)
assert (len(df) == len(df_spatial))
del df["municipality_id"]
df = pd.merge(
df, df_spatial[["person_id", "zone_id", "municipality_type", "municipality_id", "quarter_id"]],
df, df_spatial[["person_id", "zone_id", "municipality_type", "municipality_id", "quarter_id", "canton_id"]],
on="person_id"
)
......@@ -119,9 +131,6 @@ def execute(context):
df["home_quarter_id"] = df["quarter_id"]
# Impute SP region
df["municipality_id"] = df["municipality_id"].astype(np.int)
df_cantons = context.stage("data.spatial.cantons")
df = data.spatial.cantons.impute(df_cantons, df)
df = data.spatial.cantons.impute_sp_region(df)
# Impute population density
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment