Commit 7979593c authored by tchervec's avatar tchervec
Browse files

Merge branch '56-update-to-new-pyproj-syntax-for-coordinate-systems' into 'develop'

removed all init: in pyproj calls, now just directly used epsg string without...

See merge request ivt-vpl/populations/ch-zh-synpop!83
parents f69fb0b5 dd4d3c01
......@@ -3,8 +3,9 @@ working_directory: /home/tchervec/Documents/data/switzerland/cache
# Requested stages
run:
- data.statpop.projections.households
- data.statpop.scaled
- data.microcensus.trips
# - data.statpop.projections.households
# - data.statpop.scaled
# These are configuration options that we use in the pipeline
config:
......
......@@ -3,11 +3,11 @@ import pyproj
# TODO: Pandas is quite good at working with categorical data. Refactor everything to make use of that.
# It will not only be more readable but will also bring a speedup!
CH1903 = pyproj.Proj(init = "EPSG:21781")
CH1903 = pyproj.Proj("EPSG:21781")
LV05 = CH1903
CH1903_PLUS = pyproj.Proj(init = "EPSG:2056")
CH1903_PLUS = pyproj.Proj("EPSG:2056")
LV95 = CH1903_PLUS
WGS84 = pyproj.Proj(init = "EPSG:4326")
WGS84 = pyproj.Proj("EPSG:4326")
MAXIMUM_HOUSEHOLD_SIZE = 12
MINIMUM_AGE_PER_HOUSEHOLD = 16
......
......@@ -35,7 +35,7 @@ def execute(context):
df = gpd.read_file(
"%s/%s" % (data_path, shapefile),
encoding="latin1"
).to_crs({'init': 'EPSG:2056'})
).to_crs("EPSG:2056")
df.loc[:, "municipality_id"] = df[id_field]
df.loc[:, "municipality_name"] = df[name_field]
df.loc[:, "year"] = year
......
......@@ -33,9 +33,9 @@ def execute(context):
df = gpd.read_file(
"%s/%s" % (data_path, shapefile),
encoding="utf-8"
) # .to_crs({'init': 'EPSG:2056'})
df.crs = {'init': 'EPSG:4326'}
df = df.to_crs({'init': 'EPSG:2056'})
)
df.crs = "EPSG:4326"
df = df.to_crs("EPSG:2056")
df.loc[:, "nuts_id"] = df[id_field]
df.loc[:, "nuts_name"] = df[name_field]
......
......@@ -11,7 +11,7 @@ def configure(context):
def execute(context):
input_path = "%s/ov_guteklasse/LV95/Oev_Gueteklassen_ARE.shp" % context.config("data_path")
df = gpd.read_file(input_path)
df.crs = {"init": "EPSG:2056"}
df.crs = "EPSG:2056"
df = df[["KLASSE", "geometry"]].rename({"KLASSE": "ovgk"}, axis=1)
return df
......
......@@ -9,7 +9,7 @@ def execute(context):
df = gpd.read_file(
"%s/postal_codes/PLZO_SHP_LV95/PLZO_PLZ.shp" % data_path,
encoding = "latin1"
).to_crs({'init': 'EPSG:2056'})
).to_crs("EPSG:2056")
df["postal_code"] = df["PLZ"]
df = df.sort_values(by="postal_code").reset_index()
......
......@@ -12,7 +12,7 @@ def execute(context):
df = gpd.read_file(
"%s/statistical_quarter_borders/shp/quart17.shp" % data_path,
encoding = "latin1"
).to_crs({'init': 'EPSG:2056'})
).to_crs("EPSG:2056")
df["quarter_id"] = df["GMDEQNR"]
df["quarter_name"] = df["NAME"]
......
......@@ -9,6 +9,6 @@ def execute(context):
df = gpd.read_file(
"%s/municipality_borders/gd-b-00.03-875-gg18/ggg_2018-LV95/shp/g1l18.shp" % data_path,
encoding = "latin1"
).to_crs({'init': 'EPSG:2056'})
).to_crs("EPSG:2056")
return df["geometry"]
\ No newline at end of file
......@@ -19,7 +19,7 @@ def sample_coordinates(row, count):
return np.array(list(map(lambda p: (p.x, p.y), samples[:count])))
def to_gpd(context, df, x="x", y="y", crs={"init": "EPSG:2056"}):
def to_gpd(context, df, x="x", y="y", crs="EPSG:2056"):
df["geometry"] = [
geo.Point(*coord) for coord in context.progress(
zip(df[x], df[y]), total=len(df),
......@@ -28,8 +28,8 @@ def to_gpd(context, df, x="x", y="y", crs={"init": "EPSG:2056"}):
df = gpd.GeoDataFrame(df)
df.crs = crs
if not crs == {"init": "EPSG:2056"}:
df = df.to_crs({"init": "EPSG:2056"})
if not crs == "EPSG:2056":
df = df.to_crs("EPSG:2056")
return df
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment