Commit f69fb0b5 authored by tchervec's avatar tchervec
Browse files

Merge branch '54-port-pipeline-to-synpp' into develop

parents 7295912e 6c746bbc
import pandas as pd
import numpy as np
import data.constants as c
from tqdm import tqdm
import functools
def configure(context, require):
require.stage("data.microcensus.commute")
require.stage("data.od.matrix")
require.stage("data.od.distances")
require.stage("population.sociodemographics")
require.stage("data.spatial.zones")
import numpy as np
import pandas as pd
def configure(context):
context.stage("data.microcensus.commute")
context.stage("data.od.matrix")
context.stage("data.od.distances")
context.stage("population.sociodemographics")
context.stage("data.spatial.zones")
# TODO: We only assign work here through OD matrices. However, we *can* generate
# OD matrices for education as well (the STATPOP information is available). What
......@@ -26,7 +27,7 @@ def execute(context):
# Load commute information for work
df_commute = pd.DataFrame(context.stage("data.microcensus.commute")[[
"person_id", "commute_mode", "commute_home_distance", "commute_purpose"
]], copy = True)
]], copy=True)
df_commute = df_commute[df_commute["commute_purpose"] == "work"]
df_commute["mz_person_id"] = df_commute["person_id"]
del df_commute["person_id"]
......@@ -38,10 +39,10 @@ def execute(context):
# Merge commute information into the persons
df = pd.merge(
df_persons, df_commute, on = "mz_person_id"
df_persons, df_commute, on="mz_person_id"
)
df_demand = df.groupby(["commute_mode", "home_zone_id"]).size().reset_index(name = "count")
df_demand = df.groupby(["commute_mode", "home_zone_id"]).size().reset_index(name="count")
pdf_matrices, cdf_matrices = context.stage("data.od.matrix")
commute_counts = {}
......@@ -51,31 +52,31 @@ def execute(context):
origin_counts = np.array([
np.sum(df_demand.loc[
(df_demand["commute_mode"] == mode) & (df_demand["home_zone_id"] == origin_zone), "count"
]) for origin_zone in tqdm(df_zones["zone_id"], desc = mode)
(df_demand["commute_mode"] == mode) & (df_demand["home_zone_id"] == origin_zone), "count"
]) for origin_zone in context.progress(df_zones["zone_id"], label=mode)
])[:, np.newaxis]
counts = np.zeros(pdf_matrices[source_mode].shape, dtype = np.int)
counts = np.zeros(pdf_matrices[source_mode].shape, dtype=np.int)
for i in range(len(df_zones)):
if origin_counts[i] > 0:
assert(~np.any(np.isnan(pdf_matrices[source_mode][i])))
counts[i,:] = np.random.multinomial(origin_counts[i], pdf_matrices[source_mode][i,:])
assert (~np.any(np.isnan(pdf_matrices[source_mode][i])))
counts[i, :] = np.random.multinomial(origin_counts[i], pdf_matrices[source_mode][i, :])
commute_counts[mode] = counts
assert(len(counts) == len(df_zones))
assert (len(counts) == len(df_zones))
distances = context.stage("data.od.distances")
work_zones = np.zeros((len(df),), dtype = np.int)
work_zones = np.zeros((len(df),), dtype=np.int)
zone_ids = list(df_zones["zone_id"])
with tqdm(desc = "Assigning work zones", total = 5 * len(df_zones)) as progress:
with context.progress(label="Assigning work zones", total=5 * len(df_zones)) as progress:
for mode in ["car", "pt", "bike", "walk", "car_passenger"]:
mode_f = df["commute_mode"] == mode
for origin_index, origin_zone in enumerate(zone_ids):
destination_counts = commute_counts[mode][origin_index,:]
destination_order = np.argsort(distances[origin_index,:])
destination_counts = commute_counts[mode][origin_index, :]
destination_order = np.argsort(distances[origin_index, :])
destinations = [[zone_ids[i]] * destination_counts[i] for i in destination_order]
destinations = functools.reduce(lambda x, y: x + y, destinations)
......@@ -89,6 +90,6 @@ def execute(context):
df.loc[:, "work_zone_id"] = work_zones
df = df[["person_id", "work_zone_id", "commute_mode"]]
assert(len(df) == len(df.dropna()))
assert (len(df) == len(df.dropna()))
return df
import pandas as pd
import numpy as np
from tqdm import tqdm
import pandas as pd
import data.spatial.zone_shapes
def configure(context, require):
require.stage("population.spatial.by_person.primary_zones")
require.stage("data.statent.statent")
require.stage("data.spatial.zones")
require.stage("data.spatial.zone_shapes")
def configure(context):
context.stage("population.spatial.by_person.primary_zones")
context.stage("data.statent.statent")
context.stage("data.spatial.zones")
context.stage("data.spatial.zone_shapes")
def execute(context):
df = context.stage("population.spatial.by_person.primary_zones")
......@@ -16,14 +18,15 @@ def execute(context):
df_zones = context.stage("data.spatial.zones")
df_zones["work_zone_id"] = df_zones["zone_id"]
df_demand = df.groupby("work_zone_id").size().reset_index(name = "count")
df_demand = df.groupby("work_zone_id").size().reset_index(name="count")
df_demand = pd.merge(df_demand, df_zones[["work_zone_id", "zone_level"]])
# First handle the national commuters
df_national = df_demand[df_demand["zone_level"].isin(("municipality", "quarter"))]
empty_zones = []
for zone_id, count in tqdm(zip(df_national["work_zone_id"], df_national["count"]), desc = "Assigning national locations ...", total = len(df_demand)):
for zone_id, count in context.progress(zip(df_national["work_zone_id"], df_national["count"]),
label="Assigning national locations ...", total=len(df_demand)):
indices = np.where(df_statent["zone_id"] == zone_id)[0]
weights = df_statent.iloc[indices]["number_employees"]
weights /= np.sum(weights)
......@@ -45,12 +48,12 @@ def execute(context):
df_shapes = context.stage("data.spatial.zone_shapes")
for zone_id in tqdm(empty_zones, desc = "Assigning national locations for empty zones ..."):
for zone_id in context.progress(empty_zones, label="Assigning national locations for empty zones ..."):
count = df_national[df_national["work_zone_id"] == zone_id]["count"].iloc[0]
row = df_shapes[df_shapes["zone_id"] == zone_id].iloc[0]
coordinates = data.spatial.zone_shapes.sample_coordinates(row, count)
df.loc[df["work_zone_id"] == zone_id, "work_x"] = coordinates[:,0]
df.loc[df["work_zone_id"] == zone_id, "work_y"] = coordinates[:,1]
df.loc[df["work_zone_id"] == zone_id, "work_x"] = coordinates[:, 0]
df.loc[df["work_zone_id"] == zone_id, "work_y"] = coordinates[:, 1]
# Second, handle the international commuters
print("TODO: We do not handle commuter traffic at the moment.")
......@@ -58,8 +61,8 @@ def execute(context):
# For now, make sure that we do not have any international traffic
df_international = df_demand[df_demand["zone_level"] == "country"]
assert(len(df_international) == 0)
assert(len(df) == len(df.dropna()))
assert (len(df_international) == 0)
assert (len(df) == len(df.dropna()))
df = df[[
"person_id", "work_x", "work_y", "work_location_id"
......
import gzip
from tqdm import tqdm
import pandas as pd
import numpy as np
def configure(context, require):
require.stage("population.activities")
require.stage("population.spatial.by_activity.primary_locations")
require.stage("population.spatial.by_activity.subprimary_locations")
def configure(context):
context.stage("population.activities")
context.stage("population.spatial.by_activity.primary_locations")
context.stage("population.spatial.by_activity.subprimary_locations")
def execute(context):
df_activities = context.stage("population.activities")
......
import gzip
from tqdm import tqdm
import pandas as pd
import numpy as np
import data.constants as c
def configure(context, require):
require.stage("population.sociodemographics")
require.stage("data.microcensus.trips")
require.stage("data.microcensus.commute")
def configure(context):
context.stage("population.sociodemographics")
context.stage("data.microcensus.trips")
context.stage("data.microcensus.commute")
def execute(context):
df_persons = context.stage("population.sociodemographics")[[
......
matplotlib==3.1.3
pandas==0.25.3
scipy==1.2.1
numpy==1.17.4
geopandas==0.7.0
numba==0.49.0
palettable==3.3.0
scikit_learn==0.22.2.post1
Shapely==1.7.0
simpledbf==0.2.6
tqdm==4.45.0
synpp==1.2.2
tables==3.6.1
xlrd==1.2.0
pytest==5.4.2
xlwt==1.3.0
pysal==1.14.4.post1
import tqdm
import pipeline
import yaml
import sys
config_path = "config.yml"
if len(sys.argv) > 1:
config_path = sys.argv[1]
with open(config_path) as f:
config = yaml.load(f)
if "disable_progress_bar" in config and config["disable_progress_bar"]:
tqdm.tqdm = pipeline.safe_tqdm
pipeline.run(
config["stages"],
target_path = config["target_path"],
config = config)
......@@ -26,9 +26,9 @@ class JavaRunner:
else:
return sp.check_call(command_line, cwd = cwd)
def configure(context, require):
require.config("java_memory", "10G")
require.config("java_binary", "java")
def configure(context):
context.config("java_memory", "10G")
context.config("java_binary", "java")
# Not ideal, because we assume that "java" is the right binary.
# This should better go into a "validate" step between configure and
......@@ -36,4 +36,4 @@ def configure(context, require):
assert("1.8.0" in sp.check_output(["java", "-version"], stderr = sp.STDOUT).decode("utf-8"))
def execute(context):
return JavaRunner(context.config["java_binary"], context.config["java_memory"])
return JavaRunner(context.config("java_binary"), context.config("java_memory"))
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment