diff --git a/config.yml b/config.yml index 179a7d65f3ba6ab99c515abeb0183f0247e48b3c..9e906249162e2f801ce7d54a95f99fc391176eb1 100644 --- a/config.yml +++ b/config.yml @@ -3,8 +3,9 @@ working_directory: /home/tchervec/Documents/data/switzerland/cache # Requested stages run: - - data.statpop.projections.households - - data.statpop.scaled + - data.microcensus.trips +# - data.statpop.projections.households +# - data.statpop.scaled # These are configuration options that we use in the pipeline config: diff --git a/data/constants.py b/data/constants.py index 4fbfb89a82219f4dfe6cc874feb611a3548a9f6e..2bfaeec564c2cec4d1e3548bcfa606b04ed4ec2d 100644 --- a/data/constants.py +++ b/data/constants.py @@ -3,11 +3,11 @@ import pyproj # TODO: Pandas is quite good at working with categorical data. Refactor everything to make use of that. # It will not only be more readable but will also bring a speedup! -CH1903 = pyproj.Proj(init = "EPSG:21781") +CH1903 = pyproj.Proj("EPSG:21781") LV05 = CH1903 -CH1903_PLUS = pyproj.Proj(init = "EPSG:2056") +CH1903_PLUS = pyproj.Proj("EPSG:2056") LV95 = CH1903_PLUS -WGS84 = pyproj.Proj(init = "EPSG:4326") +WGS84 = pyproj.Proj("EPSG:4326") MAXIMUM_HOUSEHOLD_SIZE = 12 MINIMUM_AGE_PER_HOUSEHOLD = 16 diff --git a/data/spatial/municipalities.py b/data/spatial/municipalities.py index 110f7fe7bf5e6ef6b2055b7793866db741de8266..1dafdfc0f0d6509378e6da2efe91094df939d31d 100644 --- a/data/spatial/municipalities.py +++ b/data/spatial/municipalities.py @@ -35,7 +35,7 @@ def execute(context): df = gpd.read_file( "%s/%s" % (data_path, shapefile), encoding="latin1" - ).to_crs({'init': 'EPSG:2056'}) + ).to_crs("EPSG:2056") df.loc[:, "municipality_id"] = df[id_field] df.loc[:, "municipality_name"] = df[name_field] df.loc[:, "year"] = year diff --git a/data/spatial/nuts.py b/data/spatial/nuts.py index efcc2da02ae364851354ab9c4efbdbc825dffb99..2f1bce07bfa5213d3815cf68df65044a633a774b 100644 --- a/data/spatial/nuts.py +++ b/data/spatial/nuts.py @@ -33,9 +33,9 @@ def execute(context): df = gpd.read_file( "%s/%s" % (data_path, shapefile), encoding="utf-8" - ) # .to_crs({'init': 'EPSG:2056'}) - df.crs = {'init': 'EPSG:4326'} - df = df.to_crs({'init': 'EPSG:2056'}) + ) + df.crs = "EPSG:4326" + df = df.to_crs("EPSG:2056") df.loc[:, "nuts_id"] = df[id_field] df.loc[:, "nuts_name"] = df[name_field] diff --git a/data/spatial/ovgk.py b/data/spatial/ovgk.py index 17c611eff60fe0ef95e8551400656c8402e6b4f0..c49f9c0834ce6aead2e78ce97985035ca8fcf9f3 100644 --- a/data/spatial/ovgk.py +++ b/data/spatial/ovgk.py @@ -11,7 +11,7 @@ def configure(context): def execute(context): input_path = "%s/ov_guteklasse/LV95/Oev_Gueteklassen_ARE.shp" % context.config("data_path") df = gpd.read_file(input_path) - df.crs = {"init": "EPSG:2056"} + df.crs = "EPSG:2056" df = df[["KLASSE", "geometry"]].rename({"KLASSE": "ovgk"}, axis=1) return df diff --git a/data/spatial/postal_codes.py b/data/spatial/postal_codes.py index f936c1a89f9dc127b1c5de784ab10647aa593822..263f37302f18601d85bad436badd56798836d5ce 100644 --- a/data/spatial/postal_codes.py +++ b/data/spatial/postal_codes.py @@ -9,7 +9,7 @@ def execute(context): df = gpd.read_file( "%s/postal_codes/PLZO_SHP_LV95/PLZO_PLZ.shp" % data_path, encoding = "latin1" - ).to_crs({'init': 'EPSG:2056'}) + ).to_crs("EPSG:2056") df["postal_code"] = df["PLZ"] df = df.sort_values(by="postal_code").reset_index() diff --git a/data/spatial/quarters.py b/data/spatial/quarters.py index 788692b2cd3daeb6541e21453d97edb19dbe0dc3..3a0d285fec31232fe2e04135ccaab2dc799629c3 100644 --- a/data/spatial/quarters.py +++ b/data/spatial/quarters.py @@ -12,7 +12,7 @@ def execute(context): df = gpd.read_file( "%s/statistical_quarter_borders/shp/quart17.shp" % data_path, encoding = "latin1" - ).to_crs({'init': 'EPSG:2056'}) + ).to_crs("EPSG:2056") df["quarter_id"] = df["GMDEQNR"] df["quarter_name"] = df["NAME"] diff --git a/data/spatial/swiss_border.py b/data/spatial/swiss_border.py index 1960d24546cafa3fd4cd4f2fe51445a2f2b1d993..a236c9b5607ad1e6cac167762c9f6b4e97e9888b 100644 --- a/data/spatial/swiss_border.py +++ b/data/spatial/swiss_border.py @@ -9,6 +9,6 @@ def execute(context): df = gpd.read_file( "%s/municipality_borders/gd-b-00.03-875-gg18/ggg_2018-LV95/shp/g1l18.shp" % data_path, encoding = "latin1" - ).to_crs({'init': 'EPSG:2056'}) + ).to_crs("EPSG:2056") return df["geometry"] \ No newline at end of file diff --git a/data/spatial/utils.py b/data/spatial/utils.py index 11000ba0386b53edebff61f36dd552ae23ca5e5d..761062557cccd8006318ce430eb2fcf425cf769d 100644 --- a/data/spatial/utils.py +++ b/data/spatial/utils.py @@ -19,7 +19,7 @@ def sample_coordinates(row, count): return np.array(list(map(lambda p: (p.x, p.y), samples[:count]))) -def to_gpd(context, df, x="x", y="y", crs={"init": "EPSG:2056"}): +def to_gpd(context, df, x="x", y="y", crs="EPSG:2056"): df["geometry"] = [ geo.Point(*coord) for coord in context.progress( zip(df[x], df[y]), total=len(df), @@ -28,8 +28,8 @@ def to_gpd(context, df, x="x", y="y", crs={"init": "EPSG:2056"}): df = gpd.GeoDataFrame(df) df.crs = crs - if not crs == {"init": "EPSG:2056"}: - df = df.to_crs({"init": "EPSG:2056"}) + if not crs == "EPSG:2056": + df = df.to_crs("EPSG:2056") return df