Commit 3feefe7c authored by tchervec's avatar tchervec
Browse files

Merge branch '61-crs-of-frames-being-joined-does-not-match' into 'develop'

Resolve "CRS of frames being joined does not match"

See merge request ivt-vpl/populations/ch-zh-synpop!92
parents a5b9e0d2 292e7dbe
......@@ -3,11 +3,11 @@ import pyproj
# TODO: Pandas is quite good at working with categorical data. Refactor everything to make use of that.
# It will not only be more readable but will also bring a speedup!
CH1903 = pyproj.Proj("EPSG:21781")
CH1903 = pyproj.Proj("epsg:21781")
LV05 = CH1903
CH1903_PLUS = pyproj.Proj("EPSG:2056")
CH1903_PLUS = pyproj.Proj("epsg:2056")
LV95 = CH1903_PLUS
WGS84 = pyproj.Proj("EPSG:4326")
WGS84 = pyproj.Proj("epsg:4326")
MAXIMUM_HOUSEHOLD_SIZE = 12
MINIMUM_AGE_PER_HOUSEHOLD = 16
......
......@@ -9,9 +9,12 @@ def execute(context):
# Load data
data_path = context.config("data_path")
df = gpd.read_file("%s/municipality_borders/gd-b-00.03-875-gg18/ggg_2018-LV95/shp/g1k18.shp" % data_path,
encoding="latin1"
).to_crs("EPSG:2056")
df = gpd.read_file(
"%s/municipality_borders/gd-b-00.03-875-gg18/ggg_2018-LV95/shp/g1k18.shp" % data_path,
encoding="latin1"
).to_crs("epsg:2056")
df.crs = "epsg:2056"
df = df.rename({"KTNR": "canton_id", "KTNAME": "canton_name"}, axis=1)
df = df[["canton_id", "canton_name", "geometry"]]
......
......@@ -2,6 +2,7 @@ import geopandas as gpd
import numpy as np
import pandas as pd
from sklearn.neighbors import KDTree
import data.spatial.utils
def configure(context):
......@@ -35,7 +36,10 @@ def execute(context):
df = gpd.read_file(
"%s/%s" % (data_path, shapefile),
encoding="latin1"
).to_crs("EPSG:2056")
).to_crs("epsg:2056")
df.crs = "epsg:2056"
df.loc[:, "municipality_id"] = df[id_field]
df.loc[:, "municipality_name"] = df[name_field]
df.loc[:, "year"] = year
......
......@@ -34,8 +34,8 @@ def execute(context):
"%s/%s" % (data_path, shapefile),
encoding="utf-8"
)
df.crs = "EPSG:4326"
df = df.to_crs("EPSG:2056")
df.crs = "epsg:4326"
df = df.to_crs("epsg:2056")
df.loc[:, "nuts_id"] = df[id_field]
df.loc[:, "nuts_name"] = df[name_field]
......
......@@ -11,7 +11,7 @@ def configure(context):
def execute(context):
input_path = "%s/ov_guteklasse/LV95/Oev_Gueteklassen_ARE.shp" % context.config("data_path")
df = gpd.read_file(input_path)
df.crs = "EPSG:2056"
df.crs = "epsg:2056"
df = df[["KLASSE", "geometry"]].rename({"KLASSE": "ovgk"}, axis=1)
return df
......
......@@ -9,10 +9,12 @@ def execute(context):
df = gpd.read_file(
"%s/postal_codes/PLZO_SHP_LV95/PLZO_PLZ.shp" % data_path,
encoding = "latin1"
).to_crs("EPSG:2056")
).to_crs("epsg:2056")
df.crs = "epsg:2056"
df["postal_code"] = df["PLZ"]
df = df.sort_values(by="postal_code").reset_index()
df = df[["postal_code", "geometry"]]
return df
\ No newline at end of file
return df
......@@ -12,7 +12,9 @@ def execute(context):
df = gpd.read_file(
"%s/statistical_quarter_borders/shp/quart17.shp" % data_path,
encoding = "latin1"
).to_crs("EPSG:2056")
).to_crs("epsg:2056")
df.crs = "epsg:2056"
df["quarter_id"] = df["GMDEQNR"]
df["quarter_name"] = df["NAME"]
......
......@@ -9,6 +9,8 @@ def execute(context):
df = gpd.read_file(
"%s/municipality_borders/gd-b-00.03-875-gg18/ggg_2018-LV95/shp/g1l18.shp" % data_path,
encoding = "latin1"
).to_crs("EPSG:2056")
).to_crs("epsg:2056")
return df["geometry"]
\ No newline at end of file
df.crs = "epsg:2056"
return df["geometry"]
......@@ -4,7 +4,6 @@ import pandas as pd
import shapely.geometry as geo
from sklearn.neighbors import KDTree
def sample_coordinates(row, count):
samples = []
bounds = row["geometry"].bounds
......@@ -19,22 +18,24 @@ def sample_coordinates(row, count):
return np.array(list(map(lambda p: (p.x, p.y), samples[:count])))
def to_gpd(context, df, x="x", y="y", crs="EPSG:2056"):
def to_gpd(context, df, x="x", y="y", crs="epsg:2056", coord_type=""):
df["geometry"] = [
geo.Point(*coord) for coord in context.progress(
zip(df[x], df[y]), total=len(df),
label="Converting coordinates"
label="Converting %s coordinates" % coord_type
)]
df = gpd.GeoDataFrame(df)
df.crs = crs
if not crs == "EPSG:2056":
df = df.to_crs("EPSG:2056")
if not crs == "epsg:2056":
df = df.to_crs("epsg:2056")
df.crs = "epsg:2056"
return df
def impute(context, df_points, df_zones, point_id_field, zone_id_field, fix_by_distance=True, chunk_size=10000):
def impute(context, df_points, df_zones, point_id_field, zone_id_field, fix_by_distance=True, chunk_size=10000,
zone_type="", point_type=""):
assert (type(df_points) == gpd.GeoDataFrame)
assert (type(df_zones) == gpd.GeoDataFrame)
......@@ -46,7 +47,8 @@ def impute(context, df_points, df_zones, point_id_field, zone_id_field, fix_by_d
df_points = df_points[[point_id_field, "geometry"]]
df_zones = df_zones[[zone_id_field, "geometry"]]
print("Imputing %d zones into %d points by spatial join..." % (len(df_zones), len(df_points)))
print("Imputing %d %s zones onto %d %s points by spatial join..."
% (len(df_zones), zone_type, len(df_points), point_type))
result = []
chunk_count = max(1, int(len(df_points) / chunk_size))
......
......@@ -93,24 +93,23 @@ def execute(context):
df_cantons = context.stage("data.spatial.cantons")
df_spatial = pd.DataFrame(df[["person_id", "home_x", "home_y"]])
df_spatial = data.spatial.utils.to_gpd(context, df_spatial, "home_x", "home_y")
df_spatial = data.spatial.utils.to_gpd(context, df_spatial, "home_x", "home_y", coord_type="home")
# Impute municipalities
df_spatial = data.spatial.utils.impute(context, df_spatial, df_municipalities, "person_id", "municipality_id")[[
"person_id", "municipality_id", "geometry"
]]
df_spatial = (data.spatial.utils.impute(context, df_spatial, df_municipalities, "person_id", "municipality_id",
zone_type="municipality", point_type="home")[
["person_id", "municipality_id", "geometry"]])
df_spatial["municipality_id"] = df_spatial["municipality_id"].astype(np.int)
# Impute quarters
df_spatial = (data.spatial.utils.impute(context, df_spatial, df_quarters, "person_id", "quarter_id",
fix_by_distance=False)[
["person_id", "municipality_id", "quarter_id", "geometry"]]
)
fix_by_distance=False, zone_type="quarter", point_type="home")[
["person_id", "municipality_id", "quarter_id", "geometry"]])
# Impute cantons
df_spatial = data.spatial.utils.impute(context, df_spatial, df_cantons, "person_id", "canton_id")[[
"person_id", "municipality_id", "quarter_id", "canton_id", "geometry"
]]
df_spatial = (data.spatial.utils.impute(context, df_spatial, df_cantons, "person_id", "canton_id",
zone_type="canton", point_type="home")[
["person_id", "municipality_id", "quarter_id", "canton_id", "geometry"]])
# Impute municipality types
df_spatial = data.spatial.municipality_types.impute(df_spatial, df_municipality_types)
......
......@@ -12,7 +12,7 @@ def execute(context):
# Create MATSim schedule
java(jar, "org.matsim.pt2matsim.run.Hafas2TransitSchedule", [
"%s/hafas" % context.config("data_path"), "EPSG:2056",
"%s/hafas" % context.config("data_path"), "epsg:2056",
"%s/transit_schedule.xml.gz" % context.cache_path,
"%s/transit_vehicles.xml.gz" % context.cache_path,
context.config("hafas_date")
......
......@@ -23,7 +23,7 @@ def execute(context):
)
content = content.replace(
'<param name="outputCoordinateSystem" value="null" />',
'<param name="outputCoordinateSystem" value="EPSG:2056" />'
'<param name="outputCoordinateSystem" value="epsg:2056" />'
)
content = content.replace(
'<param name="outputNetworkFile" value="null" />',
......
......@@ -14,7 +14,7 @@ def execute(context):
java(jar, "org.matsim.pt2matsim.run.CheckMappedSchedulePlausibility", [
"-Djava.io.tmpdir=%s/java_tmp" % tmp_path,
paths["schedule"], paths["network"], "EPSG:2056", context.cache_path
paths["schedule"], paths["network"], "epsg:2056", context.cache_path
], cwd = context.cache_path)
assert(os.path.exists("%s/allPlausibilityWarnings.csv" % context.cache_path))
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment