Commit f69fb0b5 authored by tchervec's avatar tchervec
Browse files

Merge branch '54-port-pipeline-to-synpp' into develop

parents 7295912e 6c746bbc
test: test:
script: script:
- source environment/activate.sh env - python3 --version
- rm -rf output cache - java -version
- mkdir output cache - mvn -version
- python3 -u run.py config_gitlab.yml - python3 -m synpp config_gitlab.yml
before_script: before_script:
- sh environment/setup.sh env # Anaconda
- sh environment/activate.sh env - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh;
- bash miniconda.sh -b -p $HOME/miniconda
- source "$HOME/miniconda/etc/profile.d/conda.sh"
- conda config --set always_yes yes --set changeps1 no
- conda update -q conda
- conda env create -f environment.yml
- conda activate switzerland
# Java
- wget https://github.com/AdoptOpenJDK/openjdk11-binaries/releases/download/jdk-11.0.7%2B10/OpenJDK11U-jdk_x64_linux_hotspot_11.0.7_10.tar.gz -O java.tar.gz
- tar xf java.tar.gz
- export PATH=$HOME/jdk-11.0.7%2B10/bin:$PATH
# Maven
- wget http://mirror.easyname.ch/apache/maven/maven-3/3.6.3/binaries/apache-maven-3.6.3-bin.tar.gz -O maven.tar.gz
- tar xf maven.tar.gz
- export PATH=$HOME/apache-maven-3.6.3/bin:$PATH
# eqasim-python
- git clone https://github.com/eqasim-org/eqasim-python.git $HOME/eqasim-python
- sh -c "cd $HOME/eqasim-python; git pull origin master; python3 setup.py install"
cache: cache:
key: environment_v2 key: environment_v2
......
**Version v2 (master)** **3.0.0**
- Port code to use `synpp`
**2.0.0**
- Update Maven dependency - Update Maven dependency
- Update eqasim to 1.0.5 - Update eqasim to 1.0.5
......
v3.0.0
raw_data_path: /run/media/sebastian/shoerl_data/scenarios/switzerland/data # General pipeline settings
target_path: /run/media/sebastian/shoerl_data/scenarios/switzerland/temp working_directory: /home/tchervec/Documents/data/switzerland/cache
output_path: /run/media/sebastian/shoerl_data/scenarios/switzerland/temp
threads: 4 # Requested stages
hot_deck_matching_runners: 2 run:
disable_progress_bar: false
java_memory: 10G
input_downsampling: 0.01
enable_scaling: true
scaling_year: 2020
use_freight: true
stages:
- data.statpop.projections.households - data.statpop.projections.households
- data.statpop.scaled - data.statpop.scaled
#- data.microcensus.trips
#- population.spatial.locations # These are configuration options that we use in the pipeline
#- population.trips config:
#- data.microcensus.persons threads: 4
#- data.microcensus.households hot_deck_matching_runners: 2
#- population.sociodemographics disable_progress_bar: false
#- matsim.population java_memory: 10G
#- matsim.population_attributes input_downsampling: 0.01
#- matsim.households enable_scaling: true
#- matsim.population scaling_year: 2020
#- data.microcensus.commute_extrapolation use_freight: true
#- population.spatial.education_locations hafas_date: 01.10.2018
#- population.spatial.by_person.education_locations data_path: /home/tchervec/Documents/data/switzerland/data
#- population.spatial.by_activity.subprimary_locations
#- population.spatial.activity_locations
#- population.opportunities
#- population.activities
#- population.commute
#- population.secondary_locations
#- population.fixed_locations
#- population.commute_ratios
raw_data_path: /data data_path: /data
target_path: /cache target_path: /cache
output_path: /cache output_path: /cache
threads: 4 threads: 4
......
raw_data_path: /nas/ivtmatsim/scenarios/switzerland/data # General pipeline settings
target_path: temp working_directory: /nas/ivtmatsim/scenarios/switzerland/data
output_path: output
threads: 24 # Requested stages
hot_deck_matching_runners: 24 run:
disable_progress_bar: true
java_memory: 100G
input_downsampling: 0.01
enable_scaling: true
scaling_year: 2045
use_freight: true
hafas_date: 01.10.2018
stages:
- matsim.run - matsim.run
- matsim.mz.population - matsim.mz.population
- data.microcensus.csv - data.microcensus.csv
# These are configuration options that we use in the pipeline
config:
threads: 24
hot_deck_matching_runners: 24
disable_progress_bar: true
java_memory: 100G
input_downsampling: 0.01
enable_scaling: true
scaling_year: 2045
use_freight: true
hafas_date: 01.10.2018
data_path: /nas/ivtmatsim/scenarios/switzerland/data
import os
from datetime import datetime
import jinja2 import jinja2
import yaml import yaml
from datetime import datetime
import os
def configure(context, require):
def configure(context):
pass pass
def min_date(dates): def min_date(dates):
......
import pandas as pd import pandas as pd
def configure(context, require): def configure(context):
require.config("raw_data_path") context.config("data_path")
def execute(context): def execute(context):
raw_data_path = context.config["raw_data_path"] data_path = context.config("data_path")
df = pd.read_csv("%s/freight/departure_times.csv" % raw_data_path, sep=";") df = pd.read_csv("%s/freight/departure_times.csv" % data_path, sep=";")
return df return df
\ No newline at end of file
import pandas as pd
RENAMES = {"ORIGIN":"origin_nuts_id", RENAMES = {"ORIGIN":"origin_nuts_id",
"DESTINATION":"destination_nuts_id", "DESTINATION":"destination_nuts_id",
"CH_MUNICIPALITY_ORIGIN": "origin_municipality", "CH_MUNICIPALITY_ORIGIN": "origin_municipality",
...@@ -32,9 +30,9 @@ VEHICLE_TYPES = { ...@@ -32,9 +30,9 @@ VEHICLE_TYPES = {
} }
def configure(context, require): def configure(context):
require.stage("data.freight.gqgv.raw") context.stage("data.freight.gqgv.raw")
require.stage("data.spatial.nuts") context.stage("data.spatial.nuts")
def execute(context): def execute(context):
......
import pandas as pd
import numpy as np import numpy as np
import pandas as pd
def configure(context, require): def configure(context):
require.stage("data.freight.gqgv.cleaned") context.stage("data.freight.gqgv.cleaned")
require.stage("data.freight.scaling_factor") context.stage("data.freight.scaling_factor")
require.config("enable_scaling") context.config("enable_scaling")
def execute(context): def execute(context):
...@@ -32,7 +32,7 @@ def execute(context): ...@@ -32,7 +32,7 @@ def execute(context):
demands[vehicle_type] = int(np.round(np.sum(matrix_values) / number_of_days)) demands[vehicle_type] = int(np.round(np.sum(matrix_values) / number_of_days))
# scale demand # scale demand
if context.config["enable_scaling"]: if context.config("enable_scaling"):
demands[vehicle_type] *= context.stage("data.freight.scaling_factor") demands[vehicle_type] *= context.stage("data.freight.scaling_factor")
# make sure each from sums up to one # make sure each from sums up to one
......
import pandas as pd import pandas as pd
def configure(context, require): def configure(context):
require.config("raw_data_path") context.config("data_path")
def execute(context): def execute(context):
raw_data_path = context.config["raw_data_path"] data_path = context.config("data_path")
df = pd.read_csv("%s/freight/gqgv/GQGV_2014/GQGV_2014_Mikrodaten.csv" % raw_data_path, sep=";") df = pd.read_csv("%s/freight/gqgv/GQGV_2014/GQGV_2014_Mikrodaten.csv" % data_path, sep=";")
return df return df
......
...@@ -27,9 +27,9 @@ VEHICLE_TYPES = { ...@@ -27,9 +27,9 @@ VEHICLE_TYPES = {
} }
def configure(context, require): def configure(context):
require.stage("data.freight.gte.raw") context.stage("data.freight.gte.raw")
require.stage("data.spatial.nuts") context.stage("data.spatial.nuts")
def execute(context): def execute(context):
......
import pandas as pd
import numpy as np import numpy as np
import pandas as pd
def configure(context, require): def configure(context):
require.stage("data.freight.gte.cleaned") context.stage("data.freight.gte.cleaned")
require.stage("data.freight.scaling_factor") context.stage("data.freight.scaling_factor")
require.config("enable_scaling") context.config("enable_scaling")
def execute(context): def execute(context):
...@@ -37,7 +37,7 @@ def execute(context): ...@@ -37,7 +37,7 @@ def execute(context):
demands[vehicle_type] = int(np.round(np.sum(matrix_values) / number_of_weeks / number_of_weekdays)) demands[vehicle_type] = int(np.round(np.sum(matrix_values) / number_of_weeks / number_of_weekdays))
# scale demand # scale demand
if context.config["enable_scaling"]: if context.config("enable_scaling"):
demands[vehicle_type] *= context.stage("data.freight.scaling_factor") demands[vehicle_type] *= context.stage("data.freight.scaling_factor")
# make sure each from sums up to one # make sure each from sums up to one
......
import pandas as pd import pandas as pd
def configure(context, require): def configure(context):
require.config("raw_data_path") context.config("data_path")
def execute(context): def execute(context):
raw_data_path = context.config["raw_data_path"] data_path = context.config("data_path")
df_transport = pd.read_csv("%s/freight/gte/GTE_2017/Donnees/transport.csv" % raw_data_path, sep=";", low_memory=False) df_transport = pd.read_csv("%s/freight/gte/GTE_2017/Donnees/transport.csv" % data_path, sep=";", low_memory=False)
df_journey = pd.read_csv("%s/freight/gte/GTE_2017/Donnees/journeych.csv" % raw_data_path, sep=";", low_memory=False) df_journey = pd.read_csv("%s/freight/gte/GTE_2017/Donnees/journeych.csv" % data_path, sep=";", low_memory=False)
df_week = pd.read_csv("%s/freight/gte/GTE_2017/Donnees/week.csv" % raw_data_path, sep=";", low_memory=False) df_week = pd.read_csv("%s/freight/gte/GTE_2017/Donnees/week.csv" % data_path, sep=";", low_memory=False)
return df_transport, df_journey, df_week return df_transport, df_journey, df_week
......
import pandas as pd
import numpy as np import numpy as np
import pandas as pd
import data.constants as c import data.constants as c
INDEX_RENAMES = {0: "total", INDEX_RENAMES = {0: "total",
...@@ -7,20 +8,20 @@ INDEX_RENAMES = {0: "total", ...@@ -7,20 +8,20 @@ INDEX_RENAMES = {0: "total",
2: "delivery_van"} 2: "delivery_van"}
def configure(context, require): def configure(context):
require.config("raw_data_path") context.config("data_path")
require.config("scaling_year") context.config("scaling_year")
def execute(context): def execute(context):
raw_data_path = context.config["raw_data_path"] data_path = context.config("data_path")
# Select year in the future to project to # Select year in the future to project to
scaling_year = np.max([c.BASE_SCALING_YEAR, context.config["scaling_year"]]) scaling_year = np.max([c.BASE_SCALING_YEAR, context.config("scaling_year")])
# Load excel for projections # Load excel for projections
df = pd.read_excel( df = pd.read_excel(
"%s/projections/are/freight/Verkehrsperspektiven_2040_Ergebnisse_Gueterverkehr_de.xlsx" % raw_data_path, "%s/projections/are/freight/Verkehrsperspektiven_2040_Ergebnisse_Gueterverkehr_de.xlsx" % data_path,
sheet_name="Fahrzeugkilometer_Referenz", header=9, sheet_name="Fahrzeugkilometer_Referenz", header=9,
index_col=None, nrows=3 index_col=None, nrows=3
).dropna(axis=1)[[2010,2020,2030,2040]].rename(index=INDEX_RENAMES).reset_index().rename(columns={"index":"type"}) ).dropna(axis=1)[[2010,2020,2030,2040]].rename(index=INDEX_RENAMES).reset_index().rename(columns={"index":"type"})
......
import pandas as pd
import numpy as np import numpy as np
def configure(context, require): def configure(context):
require.stage("data.freight.gte.cleaned") context.stage("data.freight.gte.cleaned")
require.stage("data.freight.gqgv.cleaned") context.stage("data.freight.gqgv.cleaned")
require.stage("data.freight.projections") context.stage("data.freight.projections")
def execute(context): def execute(context):
......
import pandas as pd
import numpy as np import numpy as np
import data.constants as c import pandas as pd
def configure(context, require): def configure(context):
require.stage("data.microcensus.trips") context.stage("data.microcensus.trips")
require.stage("data.microcensus.persons") context.stage("data.microcensus.persons")
def execute(context): def execute(context):
df_trips = context.stage("data.microcensus.trips") df_trips = context.stage("data.microcensus.trips")
......
import gzip
from tqdm import tqdm
import pandas as pd
import numpy as np
from sklearn.neighbors import KDTree
import numpy.linalg as la import numpy.linalg as la
import pandas as pd
def configure(context, require): def configure(context):
require.stage("data.microcensus.trips") context.stage("data.microcensus.trips")
require.stage("data.microcensus.persons") context.stage("data.microcensus.persons")
require.stage("data.microcensus.commute") context.stage("data.microcensus.commute")
# TODO: Merge this into data.microcensus.commute # TODO: Merge this into data.microcensus.commute
......
import pandas as pd def configure(context):
import numpy as np context.stage("data.microcensus.persons")
import data.utils context.stage("data.microcensus.trips")
import data.spatial.utils context.stage("data.microcensus.transit")
import data.constants as c
import pyproj
import geopandas as gpd
def configure(context, require):
require.stage("data.microcensus.persons")
require.stage("data.microcensus.trips")
require.stage("data.microcensus.transit")
def execute(context): def execute(context):
df_persons = context.stage("data.microcensus.persons") df_persons = context.stage("data.microcensus.persons")
......
import pandas as pd
import numpy as np import numpy as np
import data.utils import pandas as pd
import data.constants as c
import pyproj import pyproj
import data.constants as c
import data.spatial.cantons
import data.spatial.municipalities import data.spatial.municipalities
import data.spatial.zones
import data.utils
import data.spatial.utils
import data.spatial.municipality_types import data.spatial.municipality_types
import data.spatial.cantons
import data.spatial.ovgk import data.spatial.ovgk
import data.spatial.utils
import data.spatial.zones
import data.utils
import data.utils
def configure(context):
context.config("data_path")
context.stage("data.spatial.municipalities")
context.stage("data.spatial.zones")
context.stage("data.spatial.municipality_types")
context.stage("data.statpop.density")
context.stage("data.spatial.ovgk")
def configure(context, require):
require.config("raw_data_path")
require.stage("data.spatial.municipalities")
require.stage("data.spatial.zones")
require.stage("data.spatial.municipality_types")
require.stage("data.statpop.density")
require.stage("data.spatial.ovgk")
def execute(context): def execute(context):
raw_data_path = context.config["raw_data_path"] data_path = context.config("data_path")
df_mz_households = pd.read_csv( df_mz_households = pd.read_csv(
"%s/microcensus/haushalte.csv" % raw_data_path, sep = ",", encoding = "latin1") "%s/microcensus/haushalte.csv" % data_path, sep=",", encoding="latin1")
# Simple attributes # Simple attributes
df_mz_households["home_structure"] = df_mz_households["W_STRUKTUR_AGG_2000"] df_mz_households["home_structure"] = df_mz_households["W_STRUKTUR_AGG_2000"]
...@@ -34,12 +37,12 @@ def execute(context): ...@@ -34,12 +37,12 @@ def execute(context):
df_mz_households["household_weight"] = df_mz_households["WM"] df_mz_households["household_weight"] = df_mz_households["WM"]
# Income # Income
df_mz_households["income_class"] = df_mz_households["F20601"] - 1 # Turn into zero-based class df_mz_households["income_class"] = df_mz_households["F20601"] - 1 # Turn into zero-based class
df_mz_households["income_class"] = np.maximum(-1, df_mz_households["income_class"]) # Make all "invalid" entries -1 df_mz_households["income_class"] = np.maximum(-1, df_mz_households["income_class"]) # Make all "invalid" entries -1
# Convert coordinates to LV95 # Convert coordinates to LV95
coords = df_mz_households[["W_X_CH1903", "W_Y_CH1903"]].values coords = df_mz_households[["W_X_CH1903", "W_Y_CH1903"]].values
x, y = pyproj.transform(c.CH1903, c.CH1903_PLUS, coords[:,0], coords[:,1]) x, y = pyproj.transform(c.CH1903, c.CH1903_PLUS, coords[:, 0], coords[:, 1])
df_mz_households.loc[:, "home_x"] = x df_mz_households.loc[:, "home_x"] = x
df_mz_households.loc[:, "home_y"] = y df_mz_households.loc[:, "home_y"] = y
...@@ -50,7 +53,8 @@ def execute(context): ...@@ -50,7 +53,8 @@ def execute(context):
# Bike availability depends on household size. (TODO: Would it make sense to use the same concept for cars?) # Bike availability depends on household size. (TODO: Would it make sense to use the same concept for cars?)
df_mz_households["number_of_bikes_class"] = c.BIKE_AVAILABILITY_FOR_NONE df_mz_households["number_of_bikes_class"] = c.BIKE_AVAILABILITY_FOR_NONE
df_mz_households.loc[df_mz_households["number_of_bikes"] > 0, "number_of_bikes_class"] = c.BIKE_AVAILABILITY_FOR_SOME df_mz_households.loc[
df_mz_households["number_of_bikes"] > 0, "number_of_bikes_class"] = c.BIKE_AVAILABILITY_FOR_SOME
df_mz_households.loc[ df_mz_households.loc[
df_mz_households["number_of_bikes"] >= df_mz_households["household_size"], df_mz_households["number_of_bikes"] >= df_mz_households["household_size"],
"number_of_bikes_class"] = c.BIKE_AVAILABILITY_FOR_ALL "number_of_bikes_class"] = c.BIKE_AVAILABILITY_FOR_ALL
...@@ -69,26 +73,26 @@ def execute(context): ...@@ -69,26 +73,26 @@ def execute(context):
df_municipality_types = context.stage("data.spatial.municipality_types") df_municipality_types = context.stage("data.spatial.municipality_types")
df_spatial = pd.DataFrame(df_mz_households[["person_id", "home_x", "home_y"]]) df_spatial = pd.DataFrame(df_mz_households[["person_id", "home_x", "home_y"]])
df_spatial = data.spatial.utils.to_gpd(df_spatial, "home_x", "home_y") df_spatial = data.spatial.utils.to_gpd(context, df_spatial, "home_x", "home_y")
df_spatial = data.spatial.utils.impute(df_spatial, df_municipalities, "person_id", "municipality_id") df_spatial = data.spatial.utils.impute(context, df_spatial, df_municipalities, "person_id", "municipality_id")
df_spatial = data.spatial.zones.impute(df_spatial, df_zones) df_spatial = data.spatial.zones.impute(df_spatial, df_zones)
df_spatial = data.spatial.municipality_types.impute(df_spatial, df_municipality_types) df_spatial = data.spatial.municipality_types.impute(df_spatial, df_municipality_types)
df_mz_households = pd.merge( df_mz_households = pd.merge(