Commit 7295912e authored by tchervec's avatar tchervec
Browse files

fix merge conflicts

parents 042ac4a0 feafdd7b
test:
script:
- source environment/activate.sh env
- rm -rf output cache tmp
- mkdir output cache tmp
- export TMPDIR=${CI_PROJECT_DIR}/tmp
- rm -rf output cache
- mkdir output cache
- python3 -u run.py config_gitlab.yml
before_script:
......
**Version v2 (master)**
- Update Maven dependency
- Update eqasim to 1.0.5
- Upgrade to new pt2matsim version to allow selecting of HAFAS date
- Store STATPOP and MZ ids as agents attributes
- Fix Java /tmp bug in shared environments (IVT servers)
- Make possible to run pipeline in VM, add instructions
- Added ÖV Güteklasse for home locations
- Remove imputation of ÖV Güteklasse for activities
......
......@@ -2,6 +2,9 @@ This repository contains all the scripts that are used to create the
IVT Switzerland / Zurich MATSim scenario. It uses a custom build pipeline
with `python` modules that call each other in the sense of incremental builds.
A more flexible version is being made public on github.com. The documentation is
more through, and may be helpfu. [](https://github.com/eqasim-org/synpp).
# Installation
Two bash scripts which set up everything that is needed to run the pipeline on our servers, as well as a requirements.txt file, can be found in `environment`:
......
......@@ -9,6 +9,7 @@ input_downsampling: 0.01
enable_scaling: true
scaling_year: 2045
use_freight: true
hafas_date: 01.10.2018
stages:
- matsim.run
- matsim.mz.population
......
......@@ -129,6 +129,10 @@ def execute(context):
df_spatial = data.spatial.ovgk.impute(df_ovgk, df_spatial, ["person_id"])
df = pd.merge(df, df_spatial[["person_id", "ovgk"]], on = ["person_id"], how = "left")
# Save original statpop person and household ids
df["statpop_person_id"] = df["person_id"].astype(int)
df["statpop_household_id"] = df["household_id"].astype(int)
# Wrap everything up
df = df[[
"person_id", "household_id",
......@@ -137,7 +141,9 @@ def execute(context):
"marital_status", "nationality",
"household_size",
"age_class", "household_size_class", "home_zone_id", "municipality_type",
"home_municipality_id", "home_quarter_id", "canton_id", "population_density", "sp_region", "ovgk"]]
"home_municipality_id", "home_quarter_id", "canton_id", "population_density", "sp_region", "ovgk",
"statpop_person_id", "statpop_household_id"]]
df = data.statpop.head_of_household.impute(df)
return df
......@@ -12,9 +12,9 @@ jdk_version="8u212"
jdk_url="https://github.com/AdoptOpenJDK/openjdk8-binaries/releases/download/jdk8u212-b03/OpenJDK8U-jdk_x64_linux_hotspot_8u212b03.tar.gz"
jdk_sha256="dd28d6d2cde2b931caf94ac2422a2ad082ea62f0beee3bf7057317c53093de93"
maven_version="3.6.1"
maven_version="3.6.3"
maven_url="http://mirror.easyname.ch/apache/maven/maven-3/${maven_version}/binaries/apache-maven-${maven_version}-bin.tar.gz"
maven_sha512="b4880fb7a3d81edd190a029440cdf17f308621af68475a4fe976296e71ff4a4b546dd6d8a58aaafba334d309cc11e638c52808a4b0e818fc0fd544226d952544"
maven_sha512="c35a1803a6e70a126e80b2b3ae33eed961f83ed74d18fcd16909b2d44d7dada3203f1ffe726c17ef8dcca2dcaa9fca676987befeadc9b9f759967a8cb77181c0"
# Define Python requirements
python_requirements=$(cat <<EOF
......
......@@ -5,18 +5,23 @@ import numpy as np
import io
import matsim.writers
def configure(context, require):
require.stage("population.sociodemographics")
FIELDS = ["household_id", "person_id", "income_class", "age", "number_of_cars_class", "number_of_bikes_class", "municipality_type", "sp_region", "canton_id", "ovgk"]
FIELDS = ["household_id", "person_id", "income_class", "age", "number_of_cars_class", "number_of_bikes_class",
"municipality_type", "sp_region", "canton_id", "ovgk"]
INCOME_VALUES = [2000, 4000, 6000, 8000, 10000, 12000, 14000, 16000, 18000]
def write_number_of_cars_class(value):
if value == c.MAX_NUMBER_OF_CARS_CLASS:
return "%d+" % c.MAX_NUMBER_OF_CARS_CLASS
else:
return str(value)
def write_bike_availability(value):
if value == c.BIKE_AVAILABILITY_FOR_ALL:
return "FOR_ALL"
......@@ -25,6 +30,7 @@ def write_bike_availability(value):
else:
return "FOR_NONE"
def add_household(writer, household, member_ids):
writer.start_household(household[1])
writer.add_members(member_ids)
......@@ -45,27 +51,28 @@ def add_household(writer, household, member_ids):
writer.end_household()
def execute(context):
cache_path = context.cache_path
df_persons = context.stage("population.sociodemographics").sort_values(by = ["household_id", "person_id"])
df_persons = context.stage("population.sociodemographics").sort_values(by=["household_id", "person_id"])
df_persons = df_persons[FIELDS]
with gzip.open("%s/households.xml.gz" % cache_path, "w+") as f:
with io.BufferedWriter(f, buffer_size = 1024 * 1024 * 1024 * 2) as raw_writer:
with io.BufferedWriter(f, buffer_size=1024 * 1024 * 1024 * 2) as raw_writer:
writer = matsim.writers.HouseholdsWriter(raw_writer)
writer.start_households()
household = [None, None]
member_ids = []
for item in tqdm(df_persons.itertuples(), total = len(df_persons)):
if True: #item[4] >= c.MZ_AGE_THRESHOLD: # Here we filter out young person without actvity chain
if not household[1] == item[1]:
if household[0] is not None: add_household(writer, household, member_ids)
household, member_ids = item, [item[2]]
else:
member_ids.append(item[2])
for item in tqdm(df_persons.itertuples(), total=len(df_persons)):
# if item[4] >= c.MZ_AGE_THRESHOLD: # Here we filter out young person without activity chain
if not household[1] == item[1]:
if household[0] is not None: add_household(writer, household, member_ids)
household, member_ids = item, [item[2]]
else:
member_ids.append(item[2])
if household[0] is not None: add_household(writer, household, member_ids)
......
......@@ -14,12 +14,12 @@ def execute(context):
], cwd = context.cache_path)
sp.check_call([
"git", "checkout", "v1.0.2"
"git", "checkout", "v1.0.5"
], cwd = "%s/eqasim-java" % context.cache_path)
sp.check_call([
"mvn", "-Pstandalone", "package"
], cwd = "%s/eqasim-java" % context.cache_path)
jar = "%s/eqasim-java/switzerland/target/switzerland-1.0.2.jar" % context.cache_path
jar = "%s/eqasim-java/switzerland/target/switzerland-1.0.5.jar" % context.cache_path
return jar
......@@ -9,21 +9,24 @@ def configure(context, require):
def execute(context):
java = context.stage("utils.java")
os.mkdir("%s/java_tmp" % context.cache_path)
sp.check_call([
"git", "clone", "https://github.com/matsim-org/pt2matsim.git"
], cwd = context.cache_path)
sp.check_call([
"git", "checkout", "v19.5"
"git", "checkout", "v19.10"
], cwd = "%s/pt2matsim" % context.cache_path)
sp.check_call([
"mvn", "package"
"mvn", "-Djava.io.tmpdir=%s/java_tmp" % context.cache_path, "package"
], cwd = "%s/pt2matsim" % context.cache_path)
jar = "%s/pt2matsim/target/pt2matsim-19.5-shaded.jar" % context.cache_path
jar = "%s/pt2matsim/target/pt2matsim-19.10-shaded.jar" % context.cache_path
java(jar, "org.matsim.pt2matsim.run.CreateDefaultOsmConfig", ["test_config.xml"], cwd = context.cache_path)
assert(os.path.exists("%s/test_config.xml" % context.cache_path))
assert(os.path.exists("%s/java_tmp/GeoTools" % context.cache_path))
return jar
return jar, "%s/java_tmp" % context.cache_path
......@@ -7,7 +7,7 @@ def configure(context, require):
require.config("raw_data_path")
def execute(context):
jar = context.stage("matsim.java.pt2matsim")
jar, tmp_path = context.stage("matsim.java.pt2matsim")
java = context.stage("utils.java")
# Create MATSim schedule
......@@ -15,8 +15,9 @@ def execute(context):
java(jar, "org.matsim.pt2matsim.run.Hafas2TransitSchedule", [
"%s/hafas" % context.config["raw_data_path"], "EPSG:2056",
"%s/transit_schedule.xml.gz" % context.cache_path,
"%s/transit_vehicles.xml.gz" % context.cache_path
], cwd = context.cache_path)
"%s/transit_vehicles.xml.gz" % context.cache_path,
context.config["hafas_date"]
], cwd = context.cache_path, vm_arguments = ["-Djava.io.tmpdir=%s" % tmp_path])
assert(os.path.exists("%s/transit_schedule.xml.gz" % context.cache_path))
assert(os.path.exists("%s/transit_vehicles.xml.gz" % context.cache_path))
......
......@@ -7,14 +7,14 @@ def configure(context, require):
require.config("raw_data_path")
def execute(context):
jar = context.stage("matsim.java.pt2matsim")
jar, tmp_path = context.stage("matsim.java.pt2matsim")
java = context.stage("utils.java")
# Create MATSim network
java(jar, "org.matsim.pt2matsim.run.CreateDefaultOsmConfig", [
"convert_network_template.xml"
], cwd = context.cache_path)
], cwd = context.cache_path, vm_arguments = ["-Djava.io.tmpdir=%s" % tmp_path])
content = open("%s/convert_network_template.xml" % context.cache_path).read()
......@@ -58,7 +58,7 @@ def execute(context):
java(jar, "org.matsim.pt2matsim.run.Osm2MultimodalNetwork", [
"convert_network.xml"
], cwd = context.cache_path)
], cwd = context.cache_path, vm_arguments = ["-Djava.io.tmpdir=%s" % tmp_path])
assert(os.path.exists("%s/converted_network.xml.gz" % context.cache_path))
return "%s/converted_network.xml.gz" % context.cache_path
......@@ -8,7 +8,7 @@ def configure(context, require):
require.stage("matsim.network.convert_hafas")
def execute(context):
jar = context.stage("matsim.java.pt2matsim")
jar, tmp_path = context.stage("matsim.java.pt2matsim")
java = context.stage("utils.java")
unmapped_network_path = context.stage("matsim.network.convert_osm")
......@@ -18,7 +18,7 @@ def execute(context):
java(jar, "org.matsim.pt2matsim.run.CreateDefaultPTMapperConfig", [
"map_network_template.xml"
], cwd = context.cache_path)
], cwd = context.cache_path, vm_arguments = ["-Djava.io.tmpdir=%s" % tmp_path])
content = open("%s/map_network_template.xml" % context.cache_path).read()
......@@ -57,7 +57,7 @@ def execute(context):
java(jar, "org.matsim.pt2matsim.run.PublicTransitMapper", [
"map_network.xml"
], cwd = context.cache_path)
], cwd = context.cache_path, vm_arguments = ["-Djava.io.tmpdir=%s" % tmp_path])
assert(os.path.exists("%s/mapped_network.xml.gz" % context.cache_path))
assert(os.path.exists("%s/mapped_schedule.xml.gz" % context.cache_path))
......
......@@ -8,12 +8,13 @@ def configure(context, require):
def execute(context):
java = context.stage("utils.java")
jar = context.stage("matsim.java.pt2matsim")
jar, tmp_path = context.stage("matsim.java.pt2matsim")
paths = context.stage("matsim.network.mapped")
# Do plausibility checks
java(jar, "org.matsim.pt2matsim.run.CheckMappedSchedulePlausibility", [
"-Djava.io.tmpdir=%s/java_tmp" % tmp_path,
paths["schedule"], paths["network"], "EPSG:2056", context.cache_path
], cwd = context.cache_path)
......
......@@ -37,6 +37,11 @@ class PersonWriter:
writer.add_attribute("ptHasVerbund", "java.lang.Boolean", writer.true_false(self.person[11]))
writer.add_attribute("ptHasStrecke", "java.lang.Boolean", writer.true_false(self.person[12]))
writer.add_attribute("isCarPassenger", "java.lang.Boolean", writer.true_false(self.person[14]))
writer.add_attribute("statpopPersonId", "java.lang.Long", str(self.person[15]))
writer.add_attribute("statpopHouseholdId", "java.lang.Long", str(self.person[16]))
writer.add_attribute("mzPersonId", "java.lang.Long", str(self.person[17]))
writer.add_attribute("mzHeadId", "java.lang.Long", str(self.person[18]))
writer.end_attributes()
# Plan
......@@ -100,7 +105,7 @@ class FreightWriter:
writer.end_plan()
writer.end_person()
PERSON_FIELDS = ["person_id", "age", "car_availability", "employed", "driving_license", "sex", "home_x", "home_y", "subscriptions_ga", "subscriptions_halbtax", "subscriptions_verbund", "subscriptions_strecke", "household_id", "is_car_passenger"]
PERSON_FIELDS = ["person_id", "age", "car_availability", "employed", "driving_license", "sex", "home_x", "home_y", "subscriptions_ga", "subscriptions_halbtax", "subscriptions_verbund", "subscriptions_strecke", "household_id", "is_car_passenger", "statpop_person_id", "statpop_household_id", "mz_person_id", "mz_head_id"]
ACTIVITY_FIELDS = ["person_id", "activity_id", "start_time", "end_time", "duration", "purpose", "is_last", "location_x", "location_y", "location_id", "following_mode"]
def execute(context):
......
......@@ -56,4 +56,11 @@ def execute(context):
# Make sure we have now NaNs included (commented out, because home_quater_id MAY be NaN deliberately)
#assert(len(df_persons.drop(["mz_person_id", "mz_head_id"], axis = 1).dropna()) == len(df_matching))
# Make sure all mz_id == NaN are agents under threshold age
assert(np.sum(df_persons[df_persons["mz_person_id"].isna()]["age"] >= c.MZ_AGE_THRESHOLD) == 0)
# Set mz_person_id == NaN to -1 and format ids to int
df_persons["mz_person_id"] = df_persons["mz_person_id"].fillna(-1).astype(int)
df_persons["mz_head_id"] = df_persons["mz_head_id"].fillna(-1).astype(int)
return df_persons
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment