diff --git a/data/constants.py b/data/constants.py index 2bfaeec564c2cec4d1e3548bcfa606b04ed4ec2d..93f9d0ca941ea4f4b80a8a7d566fb54a7ae051ff 100644 --- a/data/constants.py +++ b/data/constants.py @@ -3,11 +3,11 @@ import pyproj # TODO: Pandas is quite good at working with categorical data. Refactor everything to make use of that. # It will not only be more readable but will also bring a speedup! -CH1903 = pyproj.Proj("EPSG:21781") +CH1903 = pyproj.Proj("epsg:21781") LV05 = CH1903 -CH1903_PLUS = pyproj.Proj("EPSG:2056") +CH1903_PLUS = pyproj.Proj("epsg:2056") LV95 = CH1903_PLUS -WGS84 = pyproj.Proj("EPSG:4326") +WGS84 = pyproj.Proj("epsg:4326") MAXIMUM_HOUSEHOLD_SIZE = 12 MINIMUM_AGE_PER_HOUSEHOLD = 16 diff --git a/data/spatial/cantons.py b/data/spatial/cantons.py index e434e3c9664641ffd7600ff21ba8d52856424fa2..fdb923060406ddbaa62bb1bed9d39798e239c532 100644 --- a/data/spatial/cantons.py +++ b/data/spatial/cantons.py @@ -9,9 +9,12 @@ def execute(context): # Load data data_path = context.config("data_path") - df = gpd.read_file("%s/municipality_borders/gd-b-00.03-875-gg18/ggg_2018-LV95/shp/g1k18.shp" % data_path, - encoding="latin1" - ).to_crs("EPSG:2056") + df = gpd.read_file( + "%s/municipality_borders/gd-b-00.03-875-gg18/ggg_2018-LV95/shp/g1k18.shp" % data_path, + encoding="latin1" + ).to_crs("epsg:2056") + + df.crs = "epsg:2056" df = df.rename({"KTNR": "canton_id", "KTNAME": "canton_name"}, axis=1) df = df[["canton_id", "canton_name", "geometry"]] diff --git a/data/spatial/municipalities.py b/data/spatial/municipalities.py index 1dafdfc0f0d6509378e6da2efe91094df939d31d..35754d5444dab34ff3bcfa809297e489ba7096b2 100644 --- a/data/spatial/municipalities.py +++ b/data/spatial/municipalities.py @@ -2,6 +2,7 @@ import geopandas as gpd import numpy as np import pandas as pd from sklearn.neighbors import KDTree +import data.spatial.utils def configure(context): @@ -35,7 +36,10 @@ def execute(context): df = gpd.read_file( "%s/%s" % (data_path, shapefile), encoding="latin1" - ).to_crs("EPSG:2056") + ).to_crs("epsg:2056") + + df.crs = "epsg:2056" + df.loc[:, "municipality_id"] = df[id_field] df.loc[:, "municipality_name"] = df[name_field] df.loc[:, "year"] = year diff --git a/data/spatial/nuts.py b/data/spatial/nuts.py index 2f1bce07bfa5213d3815cf68df65044a633a774b..abdbb9127d0a97eae4fab946f7dfb0ada8b3426d 100644 --- a/data/spatial/nuts.py +++ b/data/spatial/nuts.py @@ -34,8 +34,8 @@ def execute(context): "%s/%s" % (data_path, shapefile), encoding="utf-8" ) - df.crs = "EPSG:4326" - df = df.to_crs("EPSG:2056") + df.crs = "epsg:4326" + df = df.to_crs("epsg:2056") df.loc[:, "nuts_id"] = df[id_field] df.loc[:, "nuts_name"] = df[name_field] diff --git a/data/spatial/ovgk.py b/data/spatial/ovgk.py index c49f9c0834ce6aead2e78ce97985035ca8fcf9f3..99431bdfb0b3f120478879654ac137d3e61e789a 100644 --- a/data/spatial/ovgk.py +++ b/data/spatial/ovgk.py @@ -11,7 +11,7 @@ def configure(context): def execute(context): input_path = "%s/ov_guteklasse/LV95/Oev_Gueteklassen_ARE.shp" % context.config("data_path") df = gpd.read_file(input_path) - df.crs = "EPSG:2056" + df.crs = "epsg:2056" df = df[["KLASSE", "geometry"]].rename({"KLASSE": "ovgk"}, axis=1) return df diff --git a/data/spatial/postal_codes.py b/data/spatial/postal_codes.py index 263f37302f18601d85bad436badd56798836d5ce..7a73a2f4c85563679a3e527e901366386cb70008 100644 --- a/data/spatial/postal_codes.py +++ b/data/spatial/postal_codes.py @@ -9,10 +9,12 @@ def execute(context): df = gpd.read_file( "%s/postal_codes/PLZO_SHP_LV95/PLZO_PLZ.shp" % data_path, encoding = "latin1" - ).to_crs("EPSG:2056") + ).to_crs("epsg:2056") + + df.crs = "epsg:2056" df["postal_code"] = df["PLZ"] df = df.sort_values(by="postal_code").reset_index() df = df[["postal_code", "geometry"]] - return df \ No newline at end of file + return df diff --git a/data/spatial/quarters.py b/data/spatial/quarters.py index 3a0d285fec31232fe2e04135ccaab2dc799629c3..8cca366c767619ea15ac5830f3eb654c3dfb527c 100644 --- a/data/spatial/quarters.py +++ b/data/spatial/quarters.py @@ -12,7 +12,9 @@ def execute(context): df = gpd.read_file( "%s/statistical_quarter_borders/shp/quart17.shp" % data_path, encoding = "latin1" - ).to_crs("EPSG:2056") + ).to_crs("epsg:2056") + + df.crs = "epsg:2056" df["quarter_id"] = df["GMDEQNR"] df["quarter_name"] = df["NAME"] diff --git a/data/spatial/swiss_border.py b/data/spatial/swiss_border.py index a236c9b5607ad1e6cac167762c9f6b4e97e9888b..785ba04a3f6f1a932266139d08234366e89b3501 100644 --- a/data/spatial/swiss_border.py +++ b/data/spatial/swiss_border.py @@ -9,6 +9,8 @@ def execute(context): df = gpd.read_file( "%s/municipality_borders/gd-b-00.03-875-gg18/ggg_2018-LV95/shp/g1l18.shp" % data_path, encoding = "latin1" - ).to_crs("EPSG:2056") + ).to_crs("epsg:2056") - return df["geometry"] \ No newline at end of file + df.crs = "epsg:2056" + + return df["geometry"] diff --git a/data/spatial/utils.py b/data/spatial/utils.py index 761062557cccd8006318ce430eb2fcf425cf769d..f4b1c338ca9e4e6ded19c3371f51db1c3fe368b8 100644 --- a/data/spatial/utils.py +++ b/data/spatial/utils.py @@ -4,7 +4,6 @@ import pandas as pd import shapely.geometry as geo from sklearn.neighbors import KDTree - def sample_coordinates(row, count): samples = [] bounds = row["geometry"].bounds @@ -19,22 +18,24 @@ def sample_coordinates(row, count): return np.array(list(map(lambda p: (p.x, p.y), samples[:count]))) -def to_gpd(context, df, x="x", y="y", crs="EPSG:2056"): +def to_gpd(context, df, x="x", y="y", crs="epsg:2056", coord_type=""): df["geometry"] = [ geo.Point(*coord) for coord in context.progress( zip(df[x], df[y]), total=len(df), - label="Converting coordinates" + label="Converting %s coordinates" % coord_type )] df = gpd.GeoDataFrame(df) df.crs = crs - if not crs == "EPSG:2056": - df = df.to_crs("EPSG:2056") + if not crs == "epsg:2056": + df = df.to_crs("epsg:2056") + df.crs = "epsg:2056" return df -def impute(context, df_points, df_zones, point_id_field, zone_id_field, fix_by_distance=True, chunk_size=10000): +def impute(context, df_points, df_zones, point_id_field, zone_id_field, fix_by_distance=True, chunk_size=10000, + zone_type="", point_type=""): assert (type(df_points) == gpd.GeoDataFrame) assert (type(df_zones) == gpd.GeoDataFrame) @@ -46,7 +47,8 @@ def impute(context, df_points, df_zones, point_id_field, zone_id_field, fix_by_d df_points = df_points[[point_id_field, "geometry"]] df_zones = df_zones[[zone_id_field, "geometry"]] - print("Imputing %d zones into %d points by spatial join..." % (len(df_zones), len(df_points))) + print("Imputing %d %s zones onto %d %s points by spatial join..." + % (len(df_zones), zone_type, len(df_points), point_type)) result = [] chunk_count = max(1, int(len(df_points) / chunk_size)) diff --git a/data/statpop/statpop.py b/data/statpop/statpop.py index 8982faeac20d03c663428c4c21a83803e8be1fb5..411eddda7465c5650430ab93670c54a079d877b1 100644 --- a/data/statpop/statpop.py +++ b/data/statpop/statpop.py @@ -93,24 +93,23 @@ def execute(context): df_cantons = context.stage("data.spatial.cantons") df_spatial = pd.DataFrame(df[["person_id", "home_x", "home_y"]]) - df_spatial = data.spatial.utils.to_gpd(context, df_spatial, "home_x", "home_y") + df_spatial = data.spatial.utils.to_gpd(context, df_spatial, "home_x", "home_y", coord_type="home") # Impute municipalities - df_spatial = data.spatial.utils.impute(context, df_spatial, df_municipalities, "person_id", "municipality_id")[[ - "person_id", "municipality_id", "geometry" - ]] + df_spatial = (data.spatial.utils.impute(context, df_spatial, df_municipalities, "person_id", "municipality_id", + zone_type="municipality", point_type="home")[ + ["person_id", "municipality_id", "geometry"]]) df_spatial["municipality_id"] = df_spatial["municipality_id"].astype(np.int) # Impute quarters df_spatial = (data.spatial.utils.impute(context, df_spatial, df_quarters, "person_id", "quarter_id", - fix_by_distance=False)[ - ["person_id", "municipality_id", "quarter_id", "geometry"]] - ) + fix_by_distance=False, zone_type="quarter", point_type="home")[ + ["person_id", "municipality_id", "quarter_id", "geometry"]]) # Impute cantons - df_spatial = data.spatial.utils.impute(context, df_spatial, df_cantons, "person_id", "canton_id")[[ - "person_id", "municipality_id", "quarter_id", "canton_id", "geometry" - ]] + df_spatial = (data.spatial.utils.impute(context, df_spatial, df_cantons, "person_id", "canton_id", + zone_type="canton", point_type="home")[ + ["person_id", "municipality_id", "quarter_id", "canton_id", "geometry"]]) # Impute municipality types df_spatial = data.spatial.municipality_types.impute(df_spatial, df_municipality_types) diff --git a/matsim/network/convert_hafas.py b/matsim/network/convert_hafas.py index cdd98f54afcfa7ba5fb5d0e365e4bda1cd2eff32..ef8593902fe46a45d91d5e771e059bcc7050f37e 100644 --- a/matsim/network/convert_hafas.py +++ b/matsim/network/convert_hafas.py @@ -12,7 +12,7 @@ def execute(context): # Create MATSim schedule java(jar, "org.matsim.pt2matsim.run.Hafas2TransitSchedule", [ - "%s/hafas" % context.config("data_path"), "EPSG:2056", + "%s/hafas" % context.config("data_path"), "epsg:2056", "%s/transit_schedule.xml.gz" % context.cache_path, "%s/transit_vehicles.xml.gz" % context.cache_path, context.config("hafas_date") diff --git a/matsim/network/convert_osm.py b/matsim/network/convert_osm.py index 14e467419d69a889395d5656151d206f2fa63386..1d4e54f265da4be82e6f73e057fee8913c03ef1c 100644 --- a/matsim/network/convert_osm.py +++ b/matsim/network/convert_osm.py @@ -23,7 +23,7 @@ def execute(context): ) content = content.replace( '', - '' + '' ) content = content.replace( '', diff --git a/matsim/network/plausibility.py b/matsim/network/plausibility.py index 82706da97a10b318c94b0bfb5f41ad96fb1251f3..463859915cd9354dd6808e1abe2f3aa1527286d9 100644 --- a/matsim/network/plausibility.py +++ b/matsim/network/plausibility.py @@ -14,7 +14,7 @@ def execute(context): java(jar, "org.matsim.pt2matsim.run.CheckMappedSchedulePlausibility", [ "-Djava.io.tmpdir=%s/java_tmp" % tmp_path, - paths["schedule"], paths["network"], "EPSG:2056", context.cache_path + paths["schedule"], paths["network"], "epsg:2056", context.cache_path ], cwd = context.cache_path) assert(os.path.exists("%s/allPlausibilityWarnings.csv" % context.cache_path))