Commit f69fb0b5 authored by tchervec's avatar tchervec
Browse files

Merge branch '54-port-pipeline-to-synpp' into develop

parents 7295912e 6c746bbc
test:
script:
- source environment/activate.sh env
- rm -rf output cache
- mkdir output cache
- python3 -u run.py config_gitlab.yml
- python3 --version
- java -version
- mvn -version
- python3 -m synpp config_gitlab.yml
before_script:
- sh environment/setup.sh env
- sh environment/activate.sh env
# Anaconda
- wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh;
- bash miniconda.sh -b -p $HOME/miniconda
- source "$HOME/miniconda/etc/profile.d/conda.sh"
- conda config --set always_yes yes --set changeps1 no
- conda update -q conda
- conda env create -f environment.yml
- conda activate switzerland
# Java
- wget https://github.com/AdoptOpenJDK/openjdk11-binaries/releases/download/jdk-11.0.7%2B10/OpenJDK11U-jdk_x64_linux_hotspot_11.0.7_10.tar.gz -O java.tar.gz
- tar xf java.tar.gz
- export PATH=$HOME/jdk-11.0.7%2B10/bin:$PATH
# Maven
- wget http://mirror.easyname.ch/apache/maven/maven-3/3.6.3/binaries/apache-maven-3.6.3-bin.tar.gz -O maven.tar.gz
- tar xf maven.tar.gz
- export PATH=$HOME/apache-maven-3.6.3/bin:$PATH
# eqasim-python
- git clone https://github.com/eqasim-org/eqasim-python.git $HOME/eqasim-python
- sh -c "cd $HOME/eqasim-python; git pull origin master; python3 setup.py install"
cache:
key: environment_v2
......
**Version v2 (master)**
**3.0.0**
- Port code to use `synpp`
**2.0.0**
- Update Maven dependency
- Update eqasim to 1.0.5
......
v3.0.0
raw_data_path: /run/media/sebastian/shoerl_data/scenarios/switzerland/data
target_path: /run/media/sebastian/shoerl_data/scenarios/switzerland/temp
output_path: /run/media/sebastian/shoerl_data/scenarios/switzerland/temp
threads: 4
hot_deck_matching_runners: 2
disable_progress_bar: false
java_memory: 10G
input_downsampling: 0.01
enable_scaling: true
scaling_year: 2020
use_freight: true
stages:
# General pipeline settings
working_directory: /home/tchervec/Documents/data/switzerland/cache
# Requested stages
run:
- data.statpop.projections.households
- data.statpop.scaled
#- data.microcensus.trips
#- population.spatial.locations
#- population.trips
#- data.microcensus.persons
#- data.microcensus.households
#- population.sociodemographics
#- matsim.population
#- matsim.population_attributes
#- matsim.households
#- matsim.population
#- data.microcensus.commute_extrapolation
#- population.spatial.education_locations
#- population.spatial.by_person.education_locations
#- population.spatial.by_activity.subprimary_locations
#- population.spatial.activity_locations
#- population.opportunities
#- population.activities
#- population.commute
#- population.secondary_locations
#- population.fixed_locations
#- population.commute_ratios
# These are configuration options that we use in the pipeline
config:
threads: 4
hot_deck_matching_runners: 2
disable_progress_bar: false
java_memory: 10G
input_downsampling: 0.01
enable_scaling: true
scaling_year: 2020
use_freight: true
hafas_date: 01.10.2018
data_path: /home/tchervec/Documents/data/switzerland/data
raw_data_path: /data
data_path: /data
target_path: /cache
output_path: /cache
threads: 4
......
raw_data_path: /nas/ivtmatsim/scenarios/switzerland/data
target_path: temp
output_path: output
threads: 24
hot_deck_matching_runners: 24
disable_progress_bar: true
java_memory: 100G
input_downsampling: 0.01
enable_scaling: true
scaling_year: 2045
use_freight: true
hafas_date: 01.10.2018
stages:
# General pipeline settings
working_directory: /nas/ivtmatsim/scenarios/switzerland/data
# Requested stages
run:
- matsim.run
- matsim.mz.population
- data.microcensus.csv
# These are configuration options that we use in the pipeline
config:
threads: 24
hot_deck_matching_runners: 24
disable_progress_bar: true
java_memory: 100G
input_downsampling: 0.01
enable_scaling: true
scaling_year: 2045
use_freight: true
hafas_date: 01.10.2018
data_path: /nas/ivtmatsim/scenarios/switzerland/data
import os
from datetime import datetime
import jinja2
import yaml
from datetime import datetime
import os
def configure(context, require):
def configure(context):
pass
def min_date(dates):
......
import pandas as pd
def configure(context, require):
require.config("raw_data_path")
def configure(context):
context.config("data_path")
def execute(context):
raw_data_path = context.config["raw_data_path"]
df = pd.read_csv("%s/freight/departure_times.csv" % raw_data_path, sep=";")
data_path = context.config("data_path")
df = pd.read_csv("%s/freight/departure_times.csv" % data_path, sep=";")
return df
\ No newline at end of file
import pandas as pd
RENAMES = {"ORIGIN":"origin_nuts_id",
"DESTINATION":"destination_nuts_id",
"CH_MUNICIPALITY_ORIGIN": "origin_municipality",
......@@ -32,9 +30,9 @@ VEHICLE_TYPES = {
}
def configure(context, require):
require.stage("data.freight.gqgv.raw")
require.stage("data.spatial.nuts")
def configure(context):
context.stage("data.freight.gqgv.raw")
context.stage("data.spatial.nuts")
def execute(context):
......
import pandas as pd
import numpy as np
import pandas as pd
def configure(context, require):
require.stage("data.freight.gqgv.cleaned")
require.stage("data.freight.scaling_factor")
require.config("enable_scaling")
def configure(context):
context.stage("data.freight.gqgv.cleaned")
context.stage("data.freight.scaling_factor")
context.config("enable_scaling")
def execute(context):
......@@ -32,7 +32,7 @@ def execute(context):
demands[vehicle_type] = int(np.round(np.sum(matrix_values) / number_of_days))
# scale demand
if context.config["enable_scaling"]:
if context.config("enable_scaling"):
demands[vehicle_type] *= context.stage("data.freight.scaling_factor")
# make sure each from sums up to one
......
import pandas as pd
def configure(context, require):
require.config("raw_data_path")
def configure(context):
context.config("data_path")
def execute(context):
raw_data_path = context.config["raw_data_path"]
df = pd.read_csv("%s/freight/gqgv/GQGV_2014/GQGV_2014_Mikrodaten.csv" % raw_data_path, sep=";")
data_path = context.config("data_path")
df = pd.read_csv("%s/freight/gqgv/GQGV_2014/GQGV_2014_Mikrodaten.csv" % data_path, sep=";")
return df
......
......@@ -27,9 +27,9 @@ VEHICLE_TYPES = {
}
def configure(context, require):
require.stage("data.freight.gte.raw")
require.stage("data.spatial.nuts")
def configure(context):
context.stage("data.freight.gte.raw")
context.stage("data.spatial.nuts")
def execute(context):
......
import pandas as pd
import numpy as np
import pandas as pd
def configure(context, require):
require.stage("data.freight.gte.cleaned")
require.stage("data.freight.scaling_factor")
require.config("enable_scaling")
def configure(context):
context.stage("data.freight.gte.cleaned")
context.stage("data.freight.scaling_factor")
context.config("enable_scaling")
def execute(context):
......@@ -37,7 +37,7 @@ def execute(context):
demands[vehicle_type] = int(np.round(np.sum(matrix_values) / number_of_weeks / number_of_weekdays))
# scale demand
if context.config["enable_scaling"]:
if context.config("enable_scaling"):
demands[vehicle_type] *= context.stage("data.freight.scaling_factor")
# make sure each from sums up to one
......
import pandas as pd
def configure(context, require):
require.config("raw_data_path")
def configure(context):
context.config("data_path")
def execute(context):
raw_data_path = context.config["raw_data_path"]
data_path = context.config("data_path")
df_transport = pd.read_csv("%s/freight/gte/GTE_2017/Donnees/transport.csv" % raw_data_path, sep=";", low_memory=False)
df_journey = pd.read_csv("%s/freight/gte/GTE_2017/Donnees/journeych.csv" % raw_data_path, sep=";", low_memory=False)
df_week = pd.read_csv("%s/freight/gte/GTE_2017/Donnees/week.csv" % raw_data_path, sep=";", low_memory=False)
df_transport = pd.read_csv("%s/freight/gte/GTE_2017/Donnees/transport.csv" % data_path, sep=";", low_memory=False)
df_journey = pd.read_csv("%s/freight/gte/GTE_2017/Donnees/journeych.csv" % data_path, sep=";", low_memory=False)
df_week = pd.read_csv("%s/freight/gte/GTE_2017/Donnees/week.csv" % data_path, sep=";", low_memory=False)
return df_transport, df_journey, df_week
......
import pandas as pd
import numpy as np
import pandas as pd
import data.constants as c
INDEX_RENAMES = {0: "total",
......@@ -7,20 +8,20 @@ INDEX_RENAMES = {0: "total",
2: "delivery_van"}
def configure(context, require):
require.config("raw_data_path")
require.config("scaling_year")
def configure(context):
context.config("data_path")
context.config("scaling_year")
def execute(context):
raw_data_path = context.config["raw_data_path"]
data_path = context.config("data_path")
# Select year in the future to project to
scaling_year = np.max([c.BASE_SCALING_YEAR, context.config["scaling_year"]])
scaling_year = np.max([c.BASE_SCALING_YEAR, context.config("scaling_year")])
# Load excel for projections
df = pd.read_excel(
"%s/projections/are/freight/Verkehrsperspektiven_2040_Ergebnisse_Gueterverkehr_de.xlsx" % raw_data_path,
"%s/projections/are/freight/Verkehrsperspektiven_2040_Ergebnisse_Gueterverkehr_de.xlsx" % data_path,
sheet_name="Fahrzeugkilometer_Referenz", header=9,
index_col=None, nrows=3
).dropna(axis=1)[[2010,2020,2030,2040]].rename(index=INDEX_RENAMES).reset_index().rename(columns={"index":"type"})
......
import pandas as pd
import numpy as np
def configure(context, require):
require.stage("data.freight.gte.cleaned")
require.stage("data.freight.gqgv.cleaned")
require.stage("data.freight.projections")
def configure(context):
context.stage("data.freight.gte.cleaned")
context.stage("data.freight.gqgv.cleaned")
context.stage("data.freight.projections")
def execute(context):
......
import pandas as pd
import numpy as np
import data.constants as c
import pandas as pd
def configure(context, require):
require.stage("data.microcensus.trips")
require.stage("data.microcensus.persons")
def configure(context):
context.stage("data.microcensus.trips")
context.stage("data.microcensus.persons")
def execute(context):
df_trips = context.stage("data.microcensus.trips")
......
import gzip
from tqdm import tqdm
import pandas as pd
import numpy as np
from sklearn.neighbors import KDTree
import numpy.linalg as la
import pandas as pd
def configure(context, require):
require.stage("data.microcensus.trips")
require.stage("data.microcensus.persons")
require.stage("data.microcensus.commute")
def configure(context):
context.stage("data.microcensus.trips")
context.stage("data.microcensus.persons")
context.stage("data.microcensus.commute")
# TODO: Merge this into data.microcensus.commute
......
import pandas as pd
import numpy as np
import data.utils
import data.spatial.utils
import data.constants as c
import pyproj
import geopandas as gpd
def configure(context, require):
require.stage("data.microcensus.persons")
require.stage("data.microcensus.trips")
require.stage("data.microcensus.transit")
def configure(context):
context.stage("data.microcensus.persons")
context.stage("data.microcensus.trips")
context.stage("data.microcensus.transit")
def execute(context):
df_persons = context.stage("data.microcensus.persons")
......
import pandas as pd
import numpy as np
import data.utils
import data.constants as c
import pandas as pd
import pyproj
import data.constants as c
import data.spatial.cantons
import data.spatial.municipalities
import data.spatial.zones
import data.utils
import data.spatial.utils
import data.spatial.municipality_types
import data.spatial.cantons
import data.spatial.ovgk
import data.spatial.utils
import data.spatial.zones
import data.utils
import data.utils
def configure(context):
context.config("data_path")
context.stage("data.spatial.municipalities")
context.stage("data.spatial.zones")
context.stage("data.spatial.municipality_types")
context.stage("data.statpop.density")
context.stage("data.spatial.ovgk")
def configure(context, require):
require.config("raw_data_path")
require.stage("data.spatial.municipalities")
require.stage("data.spatial.zones")
require.stage("data.spatial.municipality_types")
require.stage("data.statpop.density")
require.stage("data.spatial.ovgk")
def execute(context):
raw_data_path = context.config["raw_data_path"]
data_path = context.config("data_path")
df_mz_households = pd.read_csv(
"%s/microcensus/haushalte.csv" % raw_data_path, sep = ",", encoding = "latin1")
"%s/microcensus/haushalte.csv" % data_path, sep=",", encoding="latin1")
# Simple attributes
df_mz_households["home_structure"] = df_mz_households["W_STRUKTUR_AGG_2000"]
......@@ -34,12 +37,12 @@ def execute(context):
df_mz_households["household_weight"] = df_mz_households["WM"]
# Income
df_mz_households["income_class"] = df_mz_households["F20601"] - 1 # Turn into zero-based class
df_mz_households["income_class"] = np.maximum(-1, df_mz_households["income_class"]) # Make all "invalid" entries -1
df_mz_households["income_class"] = df_mz_households["F20601"] - 1 # Turn into zero-based class
df_mz_households["income_class"] = np.maximum(-1, df_mz_households["income_class"]) # Make all "invalid" entries -1
# Convert coordinates to LV95
coords = df_mz_households[["W_X_CH1903", "W_Y_CH1903"]].values
x, y = pyproj.transform(c.CH1903, c.CH1903_PLUS, coords[:,0], coords[:,1])
x, y = pyproj.transform(c.CH1903, c.CH1903_PLUS, coords[:, 0], coords[:, 1])
df_mz_households.loc[:, "home_x"] = x
df_mz_households.loc[:, "home_y"] = y
......@@ -50,7 +53,8 @@ def execute(context):
# Bike availability depends on household size. (TODO: Would it make sense to use the same concept for cars?)
df_mz_households["number_of_bikes_class"] = c.BIKE_AVAILABILITY_FOR_NONE
df_mz_households.loc[df_mz_households["number_of_bikes"] > 0, "number_of_bikes_class"] = c.BIKE_AVAILABILITY_FOR_SOME
df_mz_households.loc[
df_mz_households["number_of_bikes"] > 0, "number_of_bikes_class"] = c.BIKE_AVAILABILITY_FOR_SOME
df_mz_households.loc[
df_mz_households["number_of_bikes"] >= df_mz_households["household_size"],
"number_of_bikes_class"] = c.BIKE_AVAILABILITY_FOR_ALL
......@@ -69,26 +73,26 @@ def execute(context):
df_municipality_types = context.stage("data.spatial.municipality_types")
df_spatial = pd.DataFrame(df_mz_households[["person_id", "home_x", "home_y"]])
df_spatial = data.spatial.utils.to_gpd(df_spatial, "home_x", "home_y")
df_spatial = data.spatial.utils.impute(df_spatial, df_municipalities, "person_id", "municipality_id")
df_spatial = data.spatial.utils.to_gpd(context, df_spatial, "home_x", "home_y")
df_spatial = data.spatial.utils.impute(context, df_spatial, df_municipalities, "person_id", "municipality_id")
df_spatial = data.spatial.zones.impute(df_spatial, df_zones)
df_spatial = data.spatial.municipality_types.impute(df_spatial, df_municipality_types)
df_mz_households = pd.merge(
df_mz_households, df_spatial[["person_id", "zone_id", "municipality_type"]],
on = "person_id"
on="person_id"
)
df_mz_households["home_zone_id"] = df_mz_households["zone_id"]
# Impute density
data.statpop.density.impute(context.stage("data.statpop.density"), df_mz_households, "home_x", "home_y")
data.statpop.density.impute(context, context.stage("data.statpop.density"), df_mz_households, "home_x", "home_y")
# Impute OV Guteklasse
print("Imputing ÖV Güteklasse ...")
df_ovgk = context.stage("data.spatial.ovgk")
df_spatial = data.spatial.ovgk.impute(df_ovgk, df_spatial, ["person_id"])
df_mz_households = pd.merge(df_mz_households, df_spatial[["person_id", "ovgk"]], on = ["person_id"], how = "left")
df_spatial = data.spatial.utils.impute(context, df_ovgk, df_spatial, ["person_id"])
df_mz_households = pd.merge(df_mz_households, df_spatial[["person_id", "ovgk"]], on=["person_id"], how="left")
# Wrap it up
return df_mz_households[[
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment