Commit ca911963 authored by tchervec's avatar tchervec
Browse files

Merge branch '42-canton-id-1' into 'develop'

impute canton id directly using shapefile instead of based on imputed municipalities

See merge request ivt-vpl/populations/ch-zh-synpop!86
parents 5cf50d87 ec4395e1
import pandas as pd
import geopandas as gpd
def configure(context):
context.config("data_path")
def execute(context):
# Load data
data_path = context.config("data_path")
df_cantons = pd.read_excel("%s/spatial_structure_2018.xlsx" % data_path,
names=["municipality_id", "canton_id"],
usecols=[0, 2],
skiprows=6,
nrows=2229,
)
df = gpd.read_file("%s/municipality_borders/gd-b-00.03-875-gg18/ggg_2018-LV95/shp/g1k18.shp" % data_path,
encoding="latin1"
).to_crs("EPSG:2056")
df = df.rename({"KTNR": "canton_id", "KTNAME": "canton_name"}, axis=1)
df = df[["canton_id", "canton_name", "geometry"]]
return df_cantons
return df
def impute(df_cantons, df):
assert("municipality_id" in df.columns)
return pd.merge(df, df_cantons, on = "municipality_id", how = "left")
SP_REGION_1 = [25, 12, 13, 1, 2, 14, 9]
SP_REGION_2 = [21, 26, 15, 16, 22, 11, 24, 3, 6, 7]
SP_REGION_3 = [17, 19, 10, 23, 20, 5, 18, 4, 8]
def impute_sp_region(df):
assert("canton_id" in df.columns)
assert("sp_region" not in df.columns)
assert ("canton_id" in df.columns)
assert ("sp_region" not in df.columns)
df["sp_region"] = 0
df.loc[df["canton_id"].isin(SP_REGION_1), "sp_region"] = 1
......@@ -39,5 +38,5 @@ def impute_sp_region(df):
# Especially, we need a consistent spatial system. It probably would make
# more sense to impute the SP region in another way
#assert(not np.any(df["sp_region"] == 0))
# assert(not np.any(df["sp_region"] == 0))
return df
......@@ -90,27 +90,39 @@ def execute(context):
df_zones = context.stage("data.spatial.zones")
df_municipality_types = context.stage("data.spatial.municipality_types")
df_quarters = context.stage("data.spatial.quarters")
df_cantons = context.stage("data.spatial.cantons")
df_spatial = pd.DataFrame(df[["person_id", "home_x", "home_y"]])
df_spatial = data.spatial.utils.to_gpd(context, df_spatial, "home_x", "home_y")
# Impute municipalities
df_spatial = data.spatial.utils.impute(context, df_spatial, df_municipalities, "person_id", "municipality_id")[[
"person_id", "municipality_id", "geometry"
]]
df_spatial["municipality_id"] = df_spatial["municipality_id"].astype(np.int)
# Impute quarters
df_spatial = (data.spatial.utils.impute(context, df_spatial, df_quarters, "person_id", "quarter_id",
fix_by_distance=False)[
["person_id", "municipality_id", "quarter_id", "geometry"]]
)
df_spatial = \
data.spatial.utils.impute(context, df_spatial, df_quarters, "person_id", "quarter_id", fix_by_distance=False)[[
"person_id", "municipality_id", "quarter_id", "geometry"
# Impute cantons
df_spatial = data.spatial.utils.impute(context, df_spatial, df_cantons, "person_id", "canton_id")[[
"person_id", "municipality_id", "quarter_id", "canton_id", "geometry"
]]
# Impute municipality types
df_spatial = data.spatial.municipality_types.impute(df_spatial, df_municipality_types)
# Impute zones
df_spatial = data.spatial.zones.impute(df_spatial, df_zones)
assert (len(df) == len(df_spatial))
del df["municipality_id"]
df = pd.merge(
df, df_spatial[["person_id", "zone_id", "municipality_type", "municipality_id", "quarter_id"]],
df, df_spatial[["person_id", "zone_id", "municipality_type", "municipality_id", "quarter_id", "canton_id"]],
on="person_id"
)
......@@ -119,9 +131,6 @@ def execute(context):
df["home_quarter_id"] = df["quarter_id"]
# Impute SP region
df["municipality_id"] = df["municipality_id"].astype(np.int)
df_cantons = context.stage("data.spatial.cantons")
df = data.spatial.cantons.impute(df_cantons, df)
df = data.spatial.cantons.impute_sp_region(df)
# Impute population density
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment