Commit 7979593c authored by tchervec's avatar tchervec
Browse files

Merge branch '56-update-to-new-pyproj-syntax-for-coordinate-systems' into 'develop'

removed all init: in pyproj calls, now just directly used epsg string without...

See merge request ivt-vpl/populations/ch-zh-synpop!83
parents f69fb0b5 dd4d3c01
...@@ -3,8 +3,9 @@ working_directory: /home/tchervec/Documents/data/switzerland/cache ...@@ -3,8 +3,9 @@ working_directory: /home/tchervec/Documents/data/switzerland/cache
# Requested stages # Requested stages
run: run:
- data.statpop.projections.households - data.microcensus.trips
- data.statpop.scaled # - data.statpop.projections.households
# - data.statpop.scaled
# These are configuration options that we use in the pipeline # These are configuration options that we use in the pipeline
config: config:
......
...@@ -3,11 +3,11 @@ import pyproj ...@@ -3,11 +3,11 @@ import pyproj
# TODO: Pandas is quite good at working with categorical data. Refactor everything to make use of that. # TODO: Pandas is quite good at working with categorical data. Refactor everything to make use of that.
# It will not only be more readable but will also bring a speedup! # It will not only be more readable but will also bring a speedup!
CH1903 = pyproj.Proj(init = "EPSG:21781") CH1903 = pyproj.Proj("EPSG:21781")
LV05 = CH1903 LV05 = CH1903
CH1903_PLUS = pyproj.Proj(init = "EPSG:2056") CH1903_PLUS = pyproj.Proj("EPSG:2056")
LV95 = CH1903_PLUS LV95 = CH1903_PLUS
WGS84 = pyproj.Proj(init = "EPSG:4326") WGS84 = pyproj.Proj("EPSG:4326")
MAXIMUM_HOUSEHOLD_SIZE = 12 MAXIMUM_HOUSEHOLD_SIZE = 12
MINIMUM_AGE_PER_HOUSEHOLD = 16 MINIMUM_AGE_PER_HOUSEHOLD = 16
......
...@@ -35,7 +35,7 @@ def execute(context): ...@@ -35,7 +35,7 @@ def execute(context):
df = gpd.read_file( df = gpd.read_file(
"%s/%s" % (data_path, shapefile), "%s/%s" % (data_path, shapefile),
encoding="latin1" encoding="latin1"
).to_crs({'init': 'EPSG:2056'}) ).to_crs("EPSG:2056")
df.loc[:, "municipality_id"] = df[id_field] df.loc[:, "municipality_id"] = df[id_field]
df.loc[:, "municipality_name"] = df[name_field] df.loc[:, "municipality_name"] = df[name_field]
df.loc[:, "year"] = year df.loc[:, "year"] = year
......
...@@ -33,9 +33,9 @@ def execute(context): ...@@ -33,9 +33,9 @@ def execute(context):
df = gpd.read_file( df = gpd.read_file(
"%s/%s" % (data_path, shapefile), "%s/%s" % (data_path, shapefile),
encoding="utf-8" encoding="utf-8"
) # .to_crs({'init': 'EPSG:2056'}) )
df.crs = {'init': 'EPSG:4326'} df.crs = "EPSG:4326"
df = df.to_crs({'init': 'EPSG:2056'}) df = df.to_crs("EPSG:2056")
df.loc[:, "nuts_id"] = df[id_field] df.loc[:, "nuts_id"] = df[id_field]
df.loc[:, "nuts_name"] = df[name_field] df.loc[:, "nuts_name"] = df[name_field]
......
...@@ -11,7 +11,7 @@ def configure(context): ...@@ -11,7 +11,7 @@ def configure(context):
def execute(context): def execute(context):
input_path = "%s/ov_guteklasse/LV95/Oev_Gueteklassen_ARE.shp" % context.config("data_path") input_path = "%s/ov_guteklasse/LV95/Oev_Gueteklassen_ARE.shp" % context.config("data_path")
df = gpd.read_file(input_path) df = gpd.read_file(input_path)
df.crs = {"init": "EPSG:2056"} df.crs = "EPSG:2056"
df = df[["KLASSE", "geometry"]].rename({"KLASSE": "ovgk"}, axis=1) df = df[["KLASSE", "geometry"]].rename({"KLASSE": "ovgk"}, axis=1)
return df return df
......
...@@ -9,7 +9,7 @@ def execute(context): ...@@ -9,7 +9,7 @@ def execute(context):
df = gpd.read_file( df = gpd.read_file(
"%s/postal_codes/PLZO_SHP_LV95/PLZO_PLZ.shp" % data_path, "%s/postal_codes/PLZO_SHP_LV95/PLZO_PLZ.shp" % data_path,
encoding = "latin1" encoding = "latin1"
).to_crs({'init': 'EPSG:2056'}) ).to_crs("EPSG:2056")
df["postal_code"] = df["PLZ"] df["postal_code"] = df["PLZ"]
df = df.sort_values(by="postal_code").reset_index() df = df.sort_values(by="postal_code").reset_index()
......
...@@ -12,7 +12,7 @@ def execute(context): ...@@ -12,7 +12,7 @@ def execute(context):
df = gpd.read_file( df = gpd.read_file(
"%s/statistical_quarter_borders/shp/quart17.shp" % data_path, "%s/statistical_quarter_borders/shp/quart17.shp" % data_path,
encoding = "latin1" encoding = "latin1"
).to_crs({'init': 'EPSG:2056'}) ).to_crs("EPSG:2056")
df["quarter_id"] = df["GMDEQNR"] df["quarter_id"] = df["GMDEQNR"]
df["quarter_name"] = df["NAME"] df["quarter_name"] = df["NAME"]
......
...@@ -9,6 +9,6 @@ def execute(context): ...@@ -9,6 +9,6 @@ def execute(context):
df = gpd.read_file( df = gpd.read_file(
"%s/municipality_borders/gd-b-00.03-875-gg18/ggg_2018-LV95/shp/g1l18.shp" % data_path, "%s/municipality_borders/gd-b-00.03-875-gg18/ggg_2018-LV95/shp/g1l18.shp" % data_path,
encoding = "latin1" encoding = "latin1"
).to_crs({'init': 'EPSG:2056'}) ).to_crs("EPSG:2056")
return df["geometry"] return df["geometry"]
\ No newline at end of file
...@@ -19,7 +19,7 @@ def sample_coordinates(row, count): ...@@ -19,7 +19,7 @@ def sample_coordinates(row, count):
return np.array(list(map(lambda p: (p.x, p.y), samples[:count]))) return np.array(list(map(lambda p: (p.x, p.y), samples[:count])))
def to_gpd(context, df, x="x", y="y", crs={"init": "EPSG:2056"}): def to_gpd(context, df, x="x", y="y", crs="EPSG:2056"):
df["geometry"] = [ df["geometry"] = [
geo.Point(*coord) for coord in context.progress( geo.Point(*coord) for coord in context.progress(
zip(df[x], df[y]), total=len(df), zip(df[x], df[y]), total=len(df),
...@@ -28,8 +28,8 @@ def to_gpd(context, df, x="x", y="y", crs={"init": "EPSG:2056"}): ...@@ -28,8 +28,8 @@ def to_gpd(context, df, x="x", y="y", crs={"init": "EPSG:2056"}):
df = gpd.GeoDataFrame(df) df = gpd.GeoDataFrame(df)
df.crs = crs df.crs = crs
if not crs == {"init": "EPSG:2056"}: if not crs == "EPSG:2056":
df = df.to_crs({"init": "EPSG:2056"}) df = df.to_crs("EPSG:2056")
return df return df
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment