Commit f69fb0b5 authored by tchervec's avatar tchervec
Browse files

Merge branch '54-port-pipeline-to-synpp' into develop

parents 7295912e 6c746bbc
import requests
from tqdm import tqdm
import subprocess as sp
import os.path
def configure(context, require):
require.stage("utils.java")
def configure(context):
context.stage("utils.java")
def execute(context):
java = context.stage("utils.java")
......
import requests
from tqdm import tqdm
import subprocess as sp
import os.path
def configure(context, require):
require.stage("utils.java")
def configure(context):
context.stage("utils.java")
def execute(context):
java = context.stage("utils.java")
......
import gzip
from tqdm import tqdm
import pandas as pd
import numpy as np
import data.constants as c
def configure(context, require):
require.stage("data.microcensus.persons")
require.stage("data.microcensus.trips")
def configure(context):
context.stage("data.microcensus.persons")
context.stage("data.microcensus.trips")
def execute(context):
df_trips = pd.DataFrame(context.stage("data.microcensus.trips"), copy = True)
......
import gzip
from tqdm import tqdm
import data.constants as c
import numpy as np
import io
import matsim.writers
import numpy as np
import pandas as pd
def configure(context, require):
require.stage("data.microcensus.persons")
require.stage("data.microcensus.trips")
require.stage("matsim.mz.activities")
import matsim.writers
def configure(context):
context.stage("data.microcensus.persons")
context.stage("data.microcensus.trips")
context.stage("matsim.mz.activities")
class PersonWriter:
def __init__(self, person):
......@@ -91,7 +92,7 @@ def execute(context):
writer = matsim.writers.PopulationWriter(raw_writer)
writer.start_population()
with tqdm(total = len(df_persons), desc = "Writing persons ...") as progress:
with context.progress(total = len(df_persons), label = "Writing persons ...") as progress:
try:
while True:
person = next(person_iterator)
......
import subprocess as sp
import os.path
def configure(context, require):
require.stage("matsim.java.pt2matsim")
require.stage("utils.java")
require.config("raw_data_path")
def configure(context):
context.stage("matsim.java.pt2matsim")
context.stage("utils.java")
context.config("data_path")
def execute(context):
jar, tmp_path = context.stage("matsim.java.pt2matsim")
......@@ -13,10 +12,10 @@ def execute(context):
# Create MATSim schedule
java(jar, "org.matsim.pt2matsim.run.Hafas2TransitSchedule", [
"%s/hafas" % context.config["raw_data_path"], "EPSG:2056",
"%s/hafas" % context.config("data_path"), "EPSG:2056",
"%s/transit_schedule.xml.gz" % context.cache_path,
"%s/transit_vehicles.xml.gz" % context.cache_path,
context.config["hafas_date"]
context.config("hafas_date")
], cwd = context.cache_path, vm_arguments = ["-Djava.io.tmpdir=%s" % tmp_path])
assert(os.path.exists("%s/transit_schedule.xml.gz" % context.cache_path))
......
import subprocess as sp
import os.path
def configure(context, require):
require.stage("matsim.java.pt2matsim")
require.stage("utils.java")
require.config("raw_data_path")
def configure(context):
context.stage("matsim.java.pt2matsim")
context.stage("utils.java")
context.config("data_path")
def execute(context):
jar, tmp_path = context.stage("matsim.java.pt2matsim")
......@@ -20,7 +19,7 @@ def execute(context):
content = content.replace(
'<param name="osmFile" value="null" />',
'<param name="osmFile" value="%s/osm/switzerland-latest.osm.gz" />' % context.config["raw_data_path"]
'<param name="osmFile" value="%s/osm/switzerland-latest.osm.gz" />' % context.config("data_path")
)
content = content.replace(
'<param name="outputCoordinateSystem" value="null" />',
......
import subprocess as sp
import os.path
def configure(context, require):
require.stage("matsim.java.pt2matsim")
require.stage("utils.java")
require.stage("matsim.network.convert_osm")
require.stage("matsim.network.convert_hafas")
def configure(context):
context.stage("matsim.java.pt2matsim")
context.stage("utils.java")
context.stage("matsim.network.convert_osm")
context.stage("matsim.network.convert_hafas")
def execute(context):
jar, tmp_path = context.stage("matsim.java.pt2matsim")
......@@ -32,7 +31,7 @@ def execute(context):
)
content = content.replace(
'<param name="numOfThreads" value="2" />',
'<param name="numOfThreads" value="%d" />' % context.config["threads"]
'<param name="numOfThreads" value="%d" />' % context.config("threads")
)
content = content.replace(
'<param name="outputNetworkFile" value="" />',
......
import subprocess as sp
import os.path
def configure(context, require):
require.stage("matsim.java.pt2matsim")
require.stage("matsim.network.mapped")
require.stage("utils.java")
def configure(context):
context.stage("matsim.java.pt2matsim")
context.stage("matsim.network.mapped")
context.stage("utils.java")
def execute(context):
java = context.stage("utils.java")
......
import gzip
from tqdm import tqdm
import data.constants as c
import numpy as np
import io
import matsim.writers
import numpy as np
import pandas as pd
def configure(context, require):
require.stage("population.sociodemographics")
require.stage("population.trips")
require.stage("population.activities")
require.stage("population.spatial.locations")
require.config("use_freight", default=False)
require.stage("freight.trips")
import matsim.writers
def configure(context):
context.stage("population.sociodemographics")
context.stage("population.trips")
context.stage("population.activities")
context.stage("population.spatial.locations")
context.config("use_freight", default=False)
context.stage("freight.trips")
class PersonWriter:
def __init__(self, person):
......@@ -45,13 +47,14 @@ class PersonWriter:
writer.end_attributes()
# Plan
writer.start_plan(selected = True)
writer.start_plan(selected=True)
home_location = writer.location(self.activities[0][8], self.activities[0][9], "home%s" % self.person[13])
for i in range(len(self.activities)):
activity = self.activities[i]
location = home_location if np.isnan(activity[10]) else writer.location(activity[8], activity[9], int(activity[10]))
location = home_location if np.isnan(activity[10]) else writer.location(activity[8], activity[9],
int(activity[10]))
start_time = activity[3] if not np.isnan(activity[3]) else None
end_time = activity[4] if not np.isnan(activity[4]) else None
......@@ -65,6 +68,7 @@ class PersonWriter:
writer.end_plan()
writer.end_person()
class FreightWriter:
def __init__(self, freight_agent):
self.freight_agent = freight_agent
......@@ -80,7 +84,7 @@ class FreightWriter:
writer.end_attributes()
# Plan
writer.start_plan(selected = True)
writer.start_plan(selected=True)
start_location = writer.location(self.freight_agent[2], self.freight_agent[3], None)
end_location = writer.location(self.freight_agent[4], self.freight_agent[5], None)
......@@ -97,7 +101,7 @@ class FreightWriter:
writer.add_leg(str(self.freight_agent[7]), departure_time, arrival_time - departure_time)
# unloading activity
writer.start_activity("freight_unloading", end_location, arrival_time, 30*3600)
writer.start_activity("freight_unloading", end_location, arrival_time, 30 * 3600)
writer.start_attributes()
writer.end_attributes()
writer.end_activity()
......@@ -105,8 +109,14 @@ class FreightWriter:
writer.end_plan()
writer.end_person()
PERSON_FIELDS = ["person_id", "age", "car_availability", "employed", "driving_license", "sex", "home_x", "home_y", "subscriptions_ga", "subscriptions_halbtax", "subscriptions_verbund", "subscriptions_strecke", "household_id", "is_car_passenger", "statpop_person_id", "statpop_household_id", "mz_person_id", "mz_head_id"]
ACTIVITY_FIELDS = ["person_id", "activity_id", "start_time", "end_time", "duration", "purpose", "is_last", "location_x", "location_y", "location_id", "following_mode"]
PERSON_FIELDS = ["person_id", "age", "car_availability", "employed", "driving_license", "sex", "home_x", "home_y",
"subscriptions_ga", "subscriptions_halbtax", "subscriptions_verbund", "subscriptions_strecke",
"household_id", "is_car_passenger", "statpop_person_id", "statpop_household_id", "mz_person_id",
"mz_head_id"]
ACTIVITY_FIELDS = ["person_id", "activity_id", "start_time", "end_time", "duration", "purpose", "is_last", "location_x",
"location_y", "location_id", "following_mode"]
def execute(context):
cache_path = context.cache_path
......@@ -114,17 +124,17 @@ def execute(context):
df_activities = context.stage("population.activities")
# Attach following modes to activities
df_trips = pd.DataFrame(context.stage("population.trips"), copy = True)[["person_id", "trip_id", "mode"]]
df_trips = pd.DataFrame(context.stage("population.trips"), copy=True)[["person_id", "trip_id", "mode"]]
df_trips.columns = ["person_id", "activity_id", "following_mode"]
df_activities = pd.merge(df_activities, df_trips, on = ["person_id", "activity_id"], how = "left")
df_activities = pd.merge(df_activities, df_trips, on=["person_id", "activity_id"], how="left")
# Attach locations to activities
df_locations = context.stage("population.spatial.locations")
df_activities = pd.merge(df_activities, df_locations, on = ["person_id", "activity_id"], how = "left")
df_activities = pd.merge(df_activities, df_locations, on=["person_id", "activity_id"], how="left")
# Bring in correct order (although it should already be)
df_persons = df_persons.sort_values(by = "person_id")
df_activities = df_activities.sort_values(by = ["person_id", "activity_id"])
df_persons = df_persons.sort_values(by="person_id")
df_activities = df_activities.sort_values(by=["person_id", "activity_id"])
df_persons = df_persons[PERSON_FIELDS]
df_activities = df_activities[ACTIVITY_FIELDS]
......@@ -136,11 +146,11 @@ def execute(context):
number_of_written_activities = 0
with gzip.open("%s/population.xml.gz" % cache_path, "w+") as f:
with io.BufferedWriter(f, buffer_size = 1024 * 1024 * 1024 * 2) as raw_writer:
with io.BufferedWriter(f, buffer_size=1024 * 1024 * 1024 * 2) as raw_writer:
writer = matsim.writers.PopulationWriter(raw_writer)
writer.start_population()
with tqdm(total = len(df_persons), desc = "Writing persons ...") as progress:
with context.progress(total=len(df_persons), label="Writing persons ...") as progress:
try:
while True:
person = next(person_iterator)
......@@ -151,7 +161,7 @@ def execute(context):
while not is_last:
activity = next(activity_iterator)
is_last = activity[7]
assert(person[1] == activity[1])
assert (person[1] == activity[1])
person_writer.add_activity(activity)
number_of_written_activities += 1
......@@ -165,13 +175,13 @@ def execute(context):
assert (number_of_written_activities == len(df_activities))
assert (number_of_written_persons == len(df_persons))
if context.config["use_freight"]:
if context.config("use_freight"):
df_freight = context.stage("freight.trips")
freight_iterator = iter(df_freight.itertuples())
number_of_written_freight = 0
with tqdm(total = len(df_freight), desc = "Writing freight agents ...") as progress:
with context.progress(total=len(df_freight), label="Writing freight agents ...") as progress:
try:
while True:
freight = next(freight_iterator)
......
import shutil
import os.path
def configure(context, require):
require.stage("matsim.secondary_locations")
require.stage("matsim.households")
require.stage("matsim.facilities")
require.stage("matsim.network.mapped")
require.stage("matsim.java.eqasim")
require.stage("utils.java")
def configure(context):
context.stage("matsim.secondary_locations")
context.stage("matsim.households")
context.stage("matsim.facilities")
context.stage("matsim.network.mapped")
context.stage("matsim.java.eqasim")
context.stage("utils.java")
def execute(context):
# Some files we just copy
......@@ -45,7 +45,7 @@ def execute(context):
"--output-population-path", population_prepared_path,
"--input-network-path", network_input_path,
"--output-network-path", network_output_path,
"--threads", str(context.config["threads"])
"--threads", str(context.config("threads"))
], cwd = context.cache_path)
java(
......@@ -53,9 +53,9 @@ def execute(context):
"org.eqasim.core.scenario.config.RunGenerateConfig", [
"--output-path", config_output_path,
"--prefix", "switzerland_",
"--sample-size", str(context.config["input_downsampling"]),
"--sample-size", str(context.config("input_downsampling")),
"--random-seed", str(1000),
"--threads", str(context.config["threads"])
"--threads", str(context.config("threads"))
], cwd = context.cache_path)
java(
......@@ -70,7 +70,7 @@ def execute(context):
"org.eqasim.core.scenario.routing.RunPopulationRouting", [
"--config-path", config_output_path,
"--output-path", population_output_path,
"--threads", str(context.config["threads"]),
"--threads", str(context.config("threads")),
"--config:plans.inputPlansFile", population_prepared_path
], cwd = context.cache_path)
......
import gzip
from tqdm import tqdm
import pandas as pd
import numpy as np
from sklearn.neighbors import KDTree
import numpy.linalg as la
import os
import eqasim.location_assignment as eqla
import pandas as pd
def configure(context, require):
require.stage("matsim.population")
require.stage("matsim.facilities")
require.stage("data.microcensus.trips")
require.stage("data.microcensus.persons")
require.stage("matsim.java.eqasim")
require.stage("utils.java")
def configure(context):
context.stage("matsim.population")
context.stage("matsim.facilities")
context.stage("data.microcensus.trips")
context.stage("data.microcensus.persons")
context.stage("matsim.java.eqasim")
context.stage("utils.java")
def execute(context):
primary_activities = ["home", "work", "education"]
......@@ -56,7 +53,7 @@ def execute(context):
input_facilities_path = context.stage("matsim.facilities")
output_population_path = "%s/population_with_locations.xml.gz" % context.cache_path
number_of_threads = context.config["threads"]
number_of_threads = context.config("threads")
java(
context.stage("matsim.java.eqasim"),
......
import importlib
import os.path, shutil
import pickle
import itertools
import time
class safe_tqdm:
def __init__(self, iterator = None, total = None, position = None, desc = " Running something without description ...", *a, **k):
self.desc = desc
self.iterator = iterator
self.total = total
self.current = 0
self.last_time = 0.0
if position is not None:
self.desc = self.desc + "(%d)" % (position + 1)
self._print()
def _print(self):
current_time = time.time()
if current_time - self.last_time > 10.0:
self.last_time = current_time
if self.total is None:
if self.current == 0:
print("%s: running" % self.desc)
else:
print("%s: %d" % (self.desc, self.current))
else:
print("%s: %d/%d (%.2f%%)" % (self.desc, self.current, self.total, 100.0 * self.current / self.total))
def _print_done(self):
print("%s: done" % self.desc)
def __enter__(self):
return self
def __exit__(self ,type, value, traceback):
self._print_done()
def __iter__(self):
def loop():
for element in self.iterator:
yield element
self.update()
self._print_done()
return loop()
#return iter(self.iterator)
def update(self, count = 1, *a, **k):
self.current += count
self._print()
class Require:
def __init__(self):
self.config_defaults = {}
self.config_names = list()
self.stage_names = list()
self.cache = True
def config(self, name, default = None):
self.config_names.append(name)
if default is not None:
self.config_defaults[name] = default
def stage(self, name):
self.stage_names.append(name)
class Context:
def __init__(self, target_path, config):
self.stages = {}
self.target_path = target_path
self.config = config
self.cache_path = None
def cache_path(self, name):
return "%s/%s_cache" % (self.target_path, name)
def stage_path(self, name):
return "%s/%s.p" % (self.target_path, name)
def save(self, name, data, cache = True):
self.stages[name] = data
if cache:
with open(self.stage_path(name), "wb+") as f:
pickle.dump(data, f)
def stage(self, name):
if not name in self.stages:
with open(self.stage_path(name), "rb") as f:
self.stages[name] = pickle.load(f)
return self.stages[name]
def compute_dag(dependencies):
linear = []
remaining = list(dependencies.keys())
while len(remaining) > 0:
prior_count = len(remaining)
for item in remaining[:]:
insertable = True
for dependency in dependencies[item]:
insertable = insertable & (dependency in linear)
if insertable:
linear.append(item)
remaining.remove(item)
posterior_count = len(remaining)
if prior_count == posterior_count:
raise RuntimeError()
parents = {}
for seed in linear:
parents[seed] = set(dependencies[seed])
for parent in parents[seed]:
parents[seed] = parents[seed] | parents[parent]
direct_children = {}
for seed in linear:
direct_children[seed] = set()
for candidate in linear:
if seed in dependencies[candidate]:
direct_children[seed].add(candidate)
children = {}
for seed in linear[::-1]:
children[seed] = set(direct_children[seed])
for child in children[seed]:
children[seed] = children[seed] | children[child]
return { "sequence" : linear, "parents" : parents, "children" : children }
def run(requested_stages, target_path = "target", config = {}):
target_path = os.path.abspath(target_path)
stage_names = requested_stages[:]
stages = {}
requirements = {}
dependencies = {}
config_defaults = {}
while len(stage_names) > 0:
stage_name = stage_names[0]
del stage_names[0]
if not stage_name in stages:
require = Require()
stage = importlib.import_module(stage_name)
stage.configure(None, require)
stages[stage_name] = stage
requirements[stage_name] = require
dependencies[stage_name] = list(set(require.stage_names))
stage_names += require.stage_names
config_defaults = {}
multiple_defaults = []
for stage_name, require in requirements.items():
for config_name in require.config_defaults:
if config_name in config_defaults:
raise RuntimError("Multiple defaults")
else:
config_defaults[config_name] = require.config_defaults[config_name]
for config_name, config_value in config_defaults.items():
if not config_name in config:
config[config_name] = config_value
missing_config_values = []
for stage_name, require in requirements.items():