From 148d5cc1f523942ac6be76ae7e68b0d61769ba6f Mon Sep 17 00:00:00 2001 From: tchervec Date: Thu, 5 Nov 2020 09:56:09 +0100 Subject: [PATCH 1/3] replace EPSG by epsg --- data/constants.py | 6 +++--- data/spatial/cantons.py | 2 +- data/spatial/municipalities.py | 2 +- data/spatial/nuts.py | 4 ++-- data/spatial/ovgk.py | 2 +- data/spatial/postal_codes.py | 2 +- data/spatial/quarters.py | 2 +- data/spatial/swiss_border.py | 2 +- data/spatial/utils.py | 6 +++--- matsim/network/convert_hafas.py | 2 +- matsim/network/convert_osm.py | 2 +- matsim/network/plausibility.py | 2 +- 12 files changed, 17 insertions(+), 17 deletions(-) diff --git a/data/constants.py b/data/constants.py index 2bfaeec..93f9d0c 100644 --- a/data/constants.py +++ b/data/constants.py @@ -3,11 +3,11 @@ import pyproj # TODO: Pandas is quite good at working with categorical data. Refactor everything to make use of that. # It will not only be more readable but will also bring a speedup! -CH1903 = pyproj.Proj("EPSG:21781") +CH1903 = pyproj.Proj("epsg:21781") LV05 = CH1903 -CH1903_PLUS = pyproj.Proj("EPSG:2056") +CH1903_PLUS = pyproj.Proj("epsg:2056") LV95 = CH1903_PLUS -WGS84 = pyproj.Proj("EPSG:4326") +WGS84 = pyproj.Proj("epsg:4326") MAXIMUM_HOUSEHOLD_SIZE = 12 MINIMUM_AGE_PER_HOUSEHOLD = 16 diff --git a/data/spatial/cantons.py b/data/spatial/cantons.py index e434e3c..e895fa6 100644 --- a/data/spatial/cantons.py +++ b/data/spatial/cantons.py @@ -11,7 +11,7 @@ def execute(context): df = gpd.read_file("%s/municipality_borders/gd-b-00.03-875-gg18/ggg_2018-LV95/shp/g1k18.shp" % data_path, encoding="latin1" - ).to_crs("EPSG:2056") + ).to_crs("epsg:2056") df = df.rename({"KTNR": "canton_id", "KTNAME": "canton_name"}, axis=1) df = df[["canton_id", "canton_name", "geometry"]] diff --git a/data/spatial/municipalities.py b/data/spatial/municipalities.py index 1dafdfc..43c322b 100644 --- a/data/spatial/municipalities.py +++ b/data/spatial/municipalities.py @@ -35,7 +35,7 @@ def execute(context): df = gpd.read_file( "%s/%s" % (data_path, shapefile), encoding="latin1" - ).to_crs("EPSG:2056") + ).to_crs("epsg:2056") df.loc[:, "municipality_id"] = df[id_field] df.loc[:, "municipality_name"] = df[name_field] df.loc[:, "year"] = year diff --git a/data/spatial/nuts.py b/data/spatial/nuts.py index 2f1bce0..abdbb91 100644 --- a/data/spatial/nuts.py +++ b/data/spatial/nuts.py @@ -34,8 +34,8 @@ def execute(context): "%s/%s" % (data_path, shapefile), encoding="utf-8" ) - df.crs = "EPSG:4326" - df = df.to_crs("EPSG:2056") + df.crs = "epsg:4326" + df = df.to_crs("epsg:2056") df.loc[:, "nuts_id"] = df[id_field] df.loc[:, "nuts_name"] = df[name_field] diff --git a/data/spatial/ovgk.py b/data/spatial/ovgk.py index c49f9c0..99431bd 100644 --- a/data/spatial/ovgk.py +++ b/data/spatial/ovgk.py @@ -11,7 +11,7 @@ def configure(context): def execute(context): input_path = "%s/ov_guteklasse/LV95/Oev_Gueteklassen_ARE.shp" % context.config("data_path") df = gpd.read_file(input_path) - df.crs = "EPSG:2056" + df.crs = "epsg:2056" df = df[["KLASSE", "geometry"]].rename({"KLASSE": "ovgk"}, axis=1) return df diff --git a/data/spatial/postal_codes.py b/data/spatial/postal_codes.py index 263f373..77f7a26 100644 --- a/data/spatial/postal_codes.py +++ b/data/spatial/postal_codes.py @@ -9,7 +9,7 @@ def execute(context): df = gpd.read_file( "%s/postal_codes/PLZO_SHP_LV95/PLZO_PLZ.shp" % data_path, encoding = "latin1" - ).to_crs("EPSG:2056") + ).to_crs("epsg:2056") df["postal_code"] = df["PLZ"] df = df.sort_values(by="postal_code").reset_index() diff --git a/data/spatial/quarters.py b/data/spatial/quarters.py index 3a0d285..d73bab3 100644 --- a/data/spatial/quarters.py +++ b/data/spatial/quarters.py @@ -12,7 +12,7 @@ def execute(context): df = gpd.read_file( "%s/statistical_quarter_borders/shp/quart17.shp" % data_path, encoding = "latin1" - ).to_crs("EPSG:2056") + ).to_crs("epsg:2056") df["quarter_id"] = df["GMDEQNR"] df["quarter_name"] = df["NAME"] diff --git a/data/spatial/swiss_border.py b/data/spatial/swiss_border.py index a236c9b..93f6404 100644 --- a/data/spatial/swiss_border.py +++ b/data/spatial/swiss_border.py @@ -9,6 +9,6 @@ def execute(context): df = gpd.read_file( "%s/municipality_borders/gd-b-00.03-875-gg18/ggg_2018-LV95/shp/g1l18.shp" % data_path, encoding = "latin1" - ).to_crs("EPSG:2056") + ).to_crs("epsg:2056") return df["geometry"] \ No newline at end of file diff --git a/data/spatial/utils.py b/data/spatial/utils.py index 7610625..ebd012c 100644 --- a/data/spatial/utils.py +++ b/data/spatial/utils.py @@ -19,7 +19,7 @@ def sample_coordinates(row, count): return np.array(list(map(lambda p: (p.x, p.y), samples[:count]))) -def to_gpd(context, df, x="x", y="y", crs="EPSG:2056"): +def to_gpd(context, df, x="x", y="y", crs="epsg:2056"): df["geometry"] = [ geo.Point(*coord) for coord in context.progress( zip(df[x], df[y]), total=len(df), @@ -28,8 +28,8 @@ def to_gpd(context, df, x="x", y="y", crs="EPSG:2056"): df = gpd.GeoDataFrame(df) df.crs = crs - if not crs == "EPSG:2056": - df = df.to_crs("EPSG:2056") + if not crs == "epsg:2056": + df = df.to_crs("epsg:2056") return df diff --git a/matsim/network/convert_hafas.py b/matsim/network/convert_hafas.py index cdd98f5..ef85939 100644 --- a/matsim/network/convert_hafas.py +++ b/matsim/network/convert_hafas.py @@ -12,7 +12,7 @@ def execute(context): # Create MATSim schedule java(jar, "org.matsim.pt2matsim.run.Hafas2TransitSchedule", [ - "%s/hafas" % context.config("data_path"), "EPSG:2056", + "%s/hafas" % context.config("data_path"), "epsg:2056", "%s/transit_schedule.xml.gz" % context.cache_path, "%s/transit_vehicles.xml.gz" % context.cache_path, context.config("hafas_date") diff --git a/matsim/network/convert_osm.py b/matsim/network/convert_osm.py index 14e4674..1d4e54f 100644 --- a/matsim/network/convert_osm.py +++ b/matsim/network/convert_osm.py @@ -23,7 +23,7 @@ def execute(context): ) content = content.replace( '', - '' + '' ) content = content.replace( '', diff --git a/matsim/network/plausibility.py b/matsim/network/plausibility.py index 82706da..4638599 100644 --- a/matsim/network/plausibility.py +++ b/matsim/network/plausibility.py @@ -14,7 +14,7 @@ def execute(context): java(jar, "org.matsim.pt2matsim.run.CheckMappedSchedulePlausibility", [ "-Djava.io.tmpdir=%s/java_tmp" % tmp_path, - paths["schedule"], paths["network"], "EPSG:2056", context.cache_path + paths["schedule"], paths["network"], "epsg:2056", context.cache_path ], cwd = context.cache_path) assert(os.path.exists("%s/allPlausibilityWarnings.csv" % context.cache_path)) -- GitLab From dbd1c0b117ee10490e137c294b02f036df2ec7c0 Mon Sep 17 00:00:00 2001 From: tchervec Date: Thu, 5 Nov 2020 11:39:03 +0100 Subject: [PATCH 2/3] make spatial imputations more verbose --- data/spatial/utils.py | 10 ++++++---- data/statpop/statpop.py | 19 +++++++++---------- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/data/spatial/utils.py b/data/spatial/utils.py index ebd012c..903bb60 100644 --- a/data/spatial/utils.py +++ b/data/spatial/utils.py @@ -19,11 +19,11 @@ def sample_coordinates(row, count): return np.array(list(map(lambda p: (p.x, p.y), samples[:count]))) -def to_gpd(context, df, x="x", y="y", crs="epsg:2056"): +def to_gpd(context, df, x="x", y="y", crs="epsg:2056", coord_type=""): df["geometry"] = [ geo.Point(*coord) for coord in context.progress( zip(df[x], df[y]), total=len(df), - label="Converting coordinates" + label="Converting %s coordinates" % coord_type )] df = gpd.GeoDataFrame(df) df.crs = crs @@ -34,7 +34,8 @@ def to_gpd(context, df, x="x", y="y", crs="epsg:2056"): return df -def impute(context, df_points, df_zones, point_id_field, zone_id_field, fix_by_distance=True, chunk_size=10000): +def impute(context, df_points, df_zones, point_id_field, zone_id_field, fix_by_distance=True, chunk_size=10000, + zone_type="", point_type=""): assert (type(df_points) == gpd.GeoDataFrame) assert (type(df_zones) == gpd.GeoDataFrame) @@ -46,7 +47,8 @@ def impute(context, df_points, df_zones, point_id_field, zone_id_field, fix_by_d df_points = df_points[[point_id_field, "geometry"]] df_zones = df_zones[[zone_id_field, "geometry"]] - print("Imputing %d zones into %d points by spatial join..." % (len(df_zones), len(df_points))) + print("Imputing %d %s zones onto %d %s points by spatial join..." + % (len(df_zones), zone_type, len(df_points), point_type)) result = [] chunk_count = max(1, int(len(df_points) / chunk_size)) diff --git a/data/statpop/statpop.py b/data/statpop/statpop.py index 8982fae..411eddd 100644 --- a/data/statpop/statpop.py +++ b/data/statpop/statpop.py @@ -93,24 +93,23 @@ def execute(context): df_cantons = context.stage("data.spatial.cantons") df_spatial = pd.DataFrame(df[["person_id", "home_x", "home_y"]]) - df_spatial = data.spatial.utils.to_gpd(context, df_spatial, "home_x", "home_y") + df_spatial = data.spatial.utils.to_gpd(context, df_spatial, "home_x", "home_y", coord_type="home") # Impute municipalities - df_spatial = data.spatial.utils.impute(context, df_spatial, df_municipalities, "person_id", "municipality_id")[[ - "person_id", "municipality_id", "geometry" - ]] + df_spatial = (data.spatial.utils.impute(context, df_spatial, df_municipalities, "person_id", "municipality_id", + zone_type="municipality", point_type="home")[ + ["person_id", "municipality_id", "geometry"]]) df_spatial["municipality_id"] = df_spatial["municipality_id"].astype(np.int) # Impute quarters df_spatial = (data.spatial.utils.impute(context, df_spatial, df_quarters, "person_id", "quarter_id", - fix_by_distance=False)[ - ["person_id", "municipality_id", "quarter_id", "geometry"]] - ) + fix_by_distance=False, zone_type="quarter", point_type="home")[ + ["person_id", "municipality_id", "quarter_id", "geometry"]]) # Impute cantons - df_spatial = data.spatial.utils.impute(context, df_spatial, df_cantons, "person_id", "canton_id")[[ - "person_id", "municipality_id", "quarter_id", "canton_id", "geometry" - ]] + df_spatial = (data.spatial.utils.impute(context, df_spatial, df_cantons, "person_id", "canton_id", + zone_type="canton", point_type="home")[ + ["person_id", "municipality_id", "quarter_id", "canton_id", "geometry"]]) # Impute municipality types df_spatial = data.spatial.municipality_types.impute(df_spatial, df_municipality_types) -- GitLab From de7f895291e0dfa9a4eb363131b8bca5a2294b5c Mon Sep 17 00:00:00 2001 From: Christopher Tchervenkov Date: Thu, 5 Nov 2020 14:41:27 +0100 Subject: [PATCH 3/3] correctly set crs --- data/spatial/cantons.py | 9 ++++++--- data/spatial/municipalities.py | 4 ++++ data/spatial/postal_codes.py | 4 +++- data/spatial/quarters.py | 2 ++ data/spatial/swiss_border.py | 4 +++- data/spatial/utils.py | 2 +- 6 files changed, 19 insertions(+), 6 deletions(-) diff --git a/data/spatial/cantons.py b/data/spatial/cantons.py index e895fa6..fdb9230 100644 --- a/data/spatial/cantons.py +++ b/data/spatial/cantons.py @@ -9,9 +9,12 @@ def execute(context): # Load data data_path = context.config("data_path") - df = gpd.read_file("%s/municipality_borders/gd-b-00.03-875-gg18/ggg_2018-LV95/shp/g1k18.shp" % data_path, - encoding="latin1" - ).to_crs("epsg:2056") + df = gpd.read_file( + "%s/municipality_borders/gd-b-00.03-875-gg18/ggg_2018-LV95/shp/g1k18.shp" % data_path, + encoding="latin1" + ).to_crs("epsg:2056") + + df.crs = "epsg:2056" df = df.rename({"KTNR": "canton_id", "KTNAME": "canton_name"}, axis=1) df = df[["canton_id", "canton_name", "geometry"]] diff --git a/data/spatial/municipalities.py b/data/spatial/municipalities.py index 43c322b..35754d5 100644 --- a/data/spatial/municipalities.py +++ b/data/spatial/municipalities.py @@ -2,6 +2,7 @@ import geopandas as gpd import numpy as np import pandas as pd from sklearn.neighbors import KDTree +import data.spatial.utils def configure(context): @@ -36,6 +37,9 @@ def execute(context): "%s/%s" % (data_path, shapefile), encoding="latin1" ).to_crs("epsg:2056") + + df.crs = "epsg:2056" + df.loc[:, "municipality_id"] = df[id_field] df.loc[:, "municipality_name"] = df[name_field] df.loc[:, "year"] = year diff --git a/data/spatial/postal_codes.py b/data/spatial/postal_codes.py index 77f7a26..7a73a2f 100644 --- a/data/spatial/postal_codes.py +++ b/data/spatial/postal_codes.py @@ -11,8 +11,10 @@ def execute(context): encoding = "latin1" ).to_crs("epsg:2056") + df.crs = "epsg:2056" + df["postal_code"] = df["PLZ"] df = df.sort_values(by="postal_code").reset_index() df = df[["postal_code", "geometry"]] - return df \ No newline at end of file + return df diff --git a/data/spatial/quarters.py b/data/spatial/quarters.py index d73bab3..8cca366 100644 --- a/data/spatial/quarters.py +++ b/data/spatial/quarters.py @@ -14,6 +14,8 @@ def execute(context): encoding = "latin1" ).to_crs("epsg:2056") + df.crs = "epsg:2056" + df["quarter_id"] = df["GMDEQNR"] df["quarter_name"] = df["NAME"] df = df[["quarter_id", "quarter_name", "geometry"]] diff --git a/data/spatial/swiss_border.py b/data/spatial/swiss_border.py index 93f6404..785ba04 100644 --- a/data/spatial/swiss_border.py +++ b/data/spatial/swiss_border.py @@ -11,4 +11,6 @@ def execute(context): encoding = "latin1" ).to_crs("epsg:2056") - return df["geometry"] \ No newline at end of file + df.crs = "epsg:2056" + + return df["geometry"] diff --git a/data/spatial/utils.py b/data/spatial/utils.py index 903bb60..f4b1c33 100644 --- a/data/spatial/utils.py +++ b/data/spatial/utils.py @@ -4,7 +4,6 @@ import pandas as pd import shapely.geometry as geo from sklearn.neighbors import KDTree - def sample_coordinates(row, count): samples = [] bounds = row["geometry"].bounds @@ -30,6 +29,7 @@ def to_gpd(context, df, x="x", y="y", crs="epsg:2056", coord_type=""): if not crs == "epsg:2056": df = df.to_crs("epsg:2056") + df.crs = "epsg:2056" return df -- GitLab