Commit 3feefe7c authored by tchervec's avatar tchervec
Browse files

Merge branch '61-crs-of-frames-being-joined-does-not-match' into 'develop'

Resolve "CRS of frames being joined does not match"

See merge request ivt-vpl/populations/ch-zh-synpop!92
parents a5b9e0d2 292e7dbe
...@@ -3,11 +3,11 @@ import pyproj ...@@ -3,11 +3,11 @@ import pyproj
# TODO: Pandas is quite good at working with categorical data. Refactor everything to make use of that. # TODO: Pandas is quite good at working with categorical data. Refactor everything to make use of that.
# It will not only be more readable but will also bring a speedup! # It will not only be more readable but will also bring a speedup!
CH1903 = pyproj.Proj("EPSG:21781") CH1903 = pyproj.Proj("epsg:21781")
LV05 = CH1903 LV05 = CH1903
CH1903_PLUS = pyproj.Proj("EPSG:2056") CH1903_PLUS = pyproj.Proj("epsg:2056")
LV95 = CH1903_PLUS LV95 = CH1903_PLUS
WGS84 = pyproj.Proj("EPSG:4326") WGS84 = pyproj.Proj("epsg:4326")
MAXIMUM_HOUSEHOLD_SIZE = 12 MAXIMUM_HOUSEHOLD_SIZE = 12
MINIMUM_AGE_PER_HOUSEHOLD = 16 MINIMUM_AGE_PER_HOUSEHOLD = 16
......
...@@ -9,9 +9,12 @@ def execute(context): ...@@ -9,9 +9,12 @@ def execute(context):
# Load data # Load data
data_path = context.config("data_path") data_path = context.config("data_path")
df = gpd.read_file("%s/municipality_borders/gd-b-00.03-875-gg18/ggg_2018-LV95/shp/g1k18.shp" % data_path, df = gpd.read_file(
encoding="latin1" "%s/municipality_borders/gd-b-00.03-875-gg18/ggg_2018-LV95/shp/g1k18.shp" % data_path,
).to_crs("EPSG:2056") encoding="latin1"
).to_crs("epsg:2056")
df.crs = "epsg:2056"
df = df.rename({"KTNR": "canton_id", "KTNAME": "canton_name"}, axis=1) df = df.rename({"KTNR": "canton_id", "KTNAME": "canton_name"}, axis=1)
df = df[["canton_id", "canton_name", "geometry"]] df = df[["canton_id", "canton_name", "geometry"]]
......
...@@ -2,6 +2,7 @@ import geopandas as gpd ...@@ -2,6 +2,7 @@ import geopandas as gpd
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from sklearn.neighbors import KDTree from sklearn.neighbors import KDTree
import data.spatial.utils
def configure(context): def configure(context):
...@@ -35,7 +36,10 @@ def execute(context): ...@@ -35,7 +36,10 @@ def execute(context):
df = gpd.read_file( df = gpd.read_file(
"%s/%s" % (data_path, shapefile), "%s/%s" % (data_path, shapefile),
encoding="latin1" encoding="latin1"
).to_crs("EPSG:2056") ).to_crs("epsg:2056")
df.crs = "epsg:2056"
df.loc[:, "municipality_id"] = df[id_field] df.loc[:, "municipality_id"] = df[id_field]
df.loc[:, "municipality_name"] = df[name_field] df.loc[:, "municipality_name"] = df[name_field]
df.loc[:, "year"] = year df.loc[:, "year"] = year
......
...@@ -34,8 +34,8 @@ def execute(context): ...@@ -34,8 +34,8 @@ def execute(context):
"%s/%s" % (data_path, shapefile), "%s/%s" % (data_path, shapefile),
encoding="utf-8" encoding="utf-8"
) )
df.crs = "EPSG:4326" df.crs = "epsg:4326"
df = df.to_crs("EPSG:2056") df = df.to_crs("epsg:2056")
df.loc[:, "nuts_id"] = df[id_field] df.loc[:, "nuts_id"] = df[id_field]
df.loc[:, "nuts_name"] = df[name_field] df.loc[:, "nuts_name"] = df[name_field]
......
...@@ -11,7 +11,7 @@ def configure(context): ...@@ -11,7 +11,7 @@ def configure(context):
def execute(context): def execute(context):
input_path = "%s/ov_guteklasse/LV95/Oev_Gueteklassen_ARE.shp" % context.config("data_path") input_path = "%s/ov_guteklasse/LV95/Oev_Gueteklassen_ARE.shp" % context.config("data_path")
df = gpd.read_file(input_path) df = gpd.read_file(input_path)
df.crs = "EPSG:2056" df.crs = "epsg:2056"
df = df[["KLASSE", "geometry"]].rename({"KLASSE": "ovgk"}, axis=1) df = df[["KLASSE", "geometry"]].rename({"KLASSE": "ovgk"}, axis=1)
return df return df
......
...@@ -9,10 +9,12 @@ def execute(context): ...@@ -9,10 +9,12 @@ def execute(context):
df = gpd.read_file( df = gpd.read_file(
"%s/postal_codes/PLZO_SHP_LV95/PLZO_PLZ.shp" % data_path, "%s/postal_codes/PLZO_SHP_LV95/PLZO_PLZ.shp" % data_path,
encoding = "latin1" encoding = "latin1"
).to_crs("EPSG:2056") ).to_crs("epsg:2056")
df.crs = "epsg:2056"
df["postal_code"] = df["PLZ"] df["postal_code"] = df["PLZ"]
df = df.sort_values(by="postal_code").reset_index() df = df.sort_values(by="postal_code").reset_index()
df = df[["postal_code", "geometry"]] df = df[["postal_code", "geometry"]]
return df return df
\ No newline at end of file
...@@ -12,7 +12,9 @@ def execute(context): ...@@ -12,7 +12,9 @@ def execute(context):
df = gpd.read_file( df = gpd.read_file(
"%s/statistical_quarter_borders/shp/quart17.shp" % data_path, "%s/statistical_quarter_borders/shp/quart17.shp" % data_path,
encoding = "latin1" encoding = "latin1"
).to_crs("EPSG:2056") ).to_crs("epsg:2056")
df.crs = "epsg:2056"
df["quarter_id"] = df["GMDEQNR"] df["quarter_id"] = df["GMDEQNR"]
df["quarter_name"] = df["NAME"] df["quarter_name"] = df["NAME"]
......
...@@ -9,6 +9,8 @@ def execute(context): ...@@ -9,6 +9,8 @@ def execute(context):
df = gpd.read_file( df = gpd.read_file(
"%s/municipality_borders/gd-b-00.03-875-gg18/ggg_2018-LV95/shp/g1l18.shp" % data_path, "%s/municipality_borders/gd-b-00.03-875-gg18/ggg_2018-LV95/shp/g1l18.shp" % data_path,
encoding = "latin1" encoding = "latin1"
).to_crs("EPSG:2056") ).to_crs("epsg:2056")
return df["geometry"] df.crs = "epsg:2056"
\ No newline at end of file
return df["geometry"]
...@@ -4,7 +4,6 @@ import pandas as pd ...@@ -4,7 +4,6 @@ import pandas as pd
import shapely.geometry as geo import shapely.geometry as geo
from sklearn.neighbors import KDTree from sklearn.neighbors import KDTree
def sample_coordinates(row, count): def sample_coordinates(row, count):
samples = [] samples = []
bounds = row["geometry"].bounds bounds = row["geometry"].bounds
...@@ -19,22 +18,24 @@ def sample_coordinates(row, count): ...@@ -19,22 +18,24 @@ def sample_coordinates(row, count):
return np.array(list(map(lambda p: (p.x, p.y), samples[:count]))) return np.array(list(map(lambda p: (p.x, p.y), samples[:count])))
def to_gpd(context, df, x="x", y="y", crs="EPSG:2056"): def to_gpd(context, df, x="x", y="y", crs="epsg:2056", coord_type=""):
df["geometry"] = [ df["geometry"] = [
geo.Point(*coord) for coord in context.progress( geo.Point(*coord) for coord in context.progress(
zip(df[x], df[y]), total=len(df), zip(df[x], df[y]), total=len(df),
label="Converting coordinates" label="Converting %s coordinates" % coord_type
)] )]
df = gpd.GeoDataFrame(df) df = gpd.GeoDataFrame(df)
df.crs = crs df.crs = crs
if not crs == "EPSG:2056": if not crs == "epsg:2056":
df = df.to_crs("EPSG:2056") df = df.to_crs("epsg:2056")
df.crs = "epsg:2056"
return df return df
def impute(context, df_points, df_zones, point_id_field, zone_id_field, fix_by_distance=True, chunk_size=10000): def impute(context, df_points, df_zones, point_id_field, zone_id_field, fix_by_distance=True, chunk_size=10000,
zone_type="", point_type=""):
assert (type(df_points) == gpd.GeoDataFrame) assert (type(df_points) == gpd.GeoDataFrame)
assert (type(df_zones) == gpd.GeoDataFrame) assert (type(df_zones) == gpd.GeoDataFrame)
...@@ -46,7 +47,8 @@ def impute(context, df_points, df_zones, point_id_field, zone_id_field, fix_by_d ...@@ -46,7 +47,8 @@ def impute(context, df_points, df_zones, point_id_field, zone_id_field, fix_by_d
df_points = df_points[[point_id_field, "geometry"]] df_points = df_points[[point_id_field, "geometry"]]
df_zones = df_zones[[zone_id_field, "geometry"]] df_zones = df_zones[[zone_id_field, "geometry"]]
print("Imputing %d zones into %d points by spatial join..." % (len(df_zones), len(df_points))) print("Imputing %d %s zones onto %d %s points by spatial join..."
% (len(df_zones), zone_type, len(df_points), point_type))
result = [] result = []
chunk_count = max(1, int(len(df_points) / chunk_size)) chunk_count = max(1, int(len(df_points) / chunk_size))
......
...@@ -93,24 +93,23 @@ def execute(context): ...@@ -93,24 +93,23 @@ def execute(context):
df_cantons = context.stage("data.spatial.cantons") df_cantons = context.stage("data.spatial.cantons")
df_spatial = pd.DataFrame(df[["person_id", "home_x", "home_y"]]) df_spatial = pd.DataFrame(df[["person_id", "home_x", "home_y"]])
df_spatial = data.spatial.utils.to_gpd(context, df_spatial, "home_x", "home_y") df_spatial = data.spatial.utils.to_gpd(context, df_spatial, "home_x", "home_y", coord_type="home")
# Impute municipalities # Impute municipalities
df_spatial = data.spatial.utils.impute(context, df_spatial, df_municipalities, "person_id", "municipality_id")[[ df_spatial = (data.spatial.utils.impute(context, df_spatial, df_municipalities, "person_id", "municipality_id",
"person_id", "municipality_id", "geometry" zone_type="municipality", point_type="home")[
]] ["person_id", "municipality_id", "geometry"]])
df_spatial["municipality_id"] = df_spatial["municipality_id"].astype(np.int) df_spatial["municipality_id"] = df_spatial["municipality_id"].astype(np.int)
# Impute quarters # Impute quarters
df_spatial = (data.spatial.utils.impute(context, df_spatial, df_quarters, "person_id", "quarter_id", df_spatial = (data.spatial.utils.impute(context, df_spatial, df_quarters, "person_id", "quarter_id",
fix_by_distance=False)[ fix_by_distance=False, zone_type="quarter", point_type="home")[
["person_id", "municipality_id", "quarter_id", "geometry"]] ["person_id", "municipality_id", "quarter_id", "geometry"]])
)
# Impute cantons # Impute cantons
df_spatial = data.spatial.utils.impute(context, df_spatial, df_cantons, "person_id", "canton_id")[[ df_spatial = (data.spatial.utils.impute(context, df_spatial, df_cantons, "person_id", "canton_id",
"person_id", "municipality_id", "quarter_id", "canton_id", "geometry" zone_type="canton", point_type="home")[
]] ["person_id", "municipality_id", "quarter_id", "canton_id", "geometry"]])
# Impute municipality types # Impute municipality types
df_spatial = data.spatial.municipality_types.impute(df_spatial, df_municipality_types) df_spatial = data.spatial.municipality_types.impute(df_spatial, df_municipality_types)
......
...@@ -12,7 +12,7 @@ def execute(context): ...@@ -12,7 +12,7 @@ def execute(context):
# Create MATSim schedule # Create MATSim schedule
java(jar, "org.matsim.pt2matsim.run.Hafas2TransitSchedule", [ java(jar, "org.matsim.pt2matsim.run.Hafas2TransitSchedule", [
"%s/hafas" % context.config("data_path"), "EPSG:2056", "%s/hafas" % context.config("data_path"), "epsg:2056",
"%s/transit_schedule.xml.gz" % context.cache_path, "%s/transit_schedule.xml.gz" % context.cache_path,
"%s/transit_vehicles.xml.gz" % context.cache_path, "%s/transit_vehicles.xml.gz" % context.cache_path,
context.config("hafas_date") context.config("hafas_date")
......
...@@ -23,7 +23,7 @@ def execute(context): ...@@ -23,7 +23,7 @@ def execute(context):
) )
content = content.replace( content = content.replace(
'<param name="outputCoordinateSystem" value="null" />', '<param name="outputCoordinateSystem" value="null" />',
'<param name="outputCoordinateSystem" value="EPSG:2056" />' '<param name="outputCoordinateSystem" value="epsg:2056" />'
) )
content = content.replace( content = content.replace(
'<param name="outputNetworkFile" value="null" />', '<param name="outputNetworkFile" value="null" />',
......
...@@ -14,7 +14,7 @@ def execute(context): ...@@ -14,7 +14,7 @@ def execute(context):
java(jar, "org.matsim.pt2matsim.run.CheckMappedSchedulePlausibility", [ java(jar, "org.matsim.pt2matsim.run.CheckMappedSchedulePlausibility", [
"-Djava.io.tmpdir=%s/java_tmp" % tmp_path, "-Djava.io.tmpdir=%s/java_tmp" % tmp_path,
paths["schedule"], paths["network"], "EPSG:2056", context.cache_path paths["schedule"], paths["network"], "epsg:2056", context.cache_path
], cwd = context.cache_path) ], cwd = context.cache_path)
assert(os.path.exists("%s/allPlausibilityWarnings.csv" % context.cache_path)) assert(os.path.exists("%s/allPlausibilityWarnings.csv" % context.cache_path))
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment