trips.py 2.44 KB
Newer Older
Sebastian Hörl's avatar
Sebastian Hörl committed
1
import pandas as pd
2

Sebastian Hörl's avatar
Sebastian Hörl committed
3
import data.constants as c
Sebastian Hörl's avatar
Sebastian Hörl committed
4

5

6
7
8
9
def configure(context):
    context.stage("population.sociodemographics")
    context.stage("data.microcensus.trips")
    context.stage("data.microcensus.commute")
Sebastian Hörl's avatar
Sebastian Hörl committed
10
11

def execute(context):
Sebastian Hörl's avatar
Sebastian Hörl committed
12
13
    df_persons = context.stage("population.sociodemographics")[[
        "person_id", "mz_person_id", "age"
Sebastian Hörl's avatar
Sebastian Hörl committed
14
15
    ]]

Sebastian Hörl's avatar
Sebastian Hörl committed
16
17
    df_trips = pd.DataFrame(context.stage("data.microcensus.trips"), copy = True)[[
        "person_id", "trip_id", "departure_time", "arrival_time", "mode", "purpose"
18
    ]]
Sebastian Hörl's avatar
Sebastian Hörl committed
19
    df_trips.columns = ["mz_person_id", "trip_id", "departure_time", "arrival_time", "mode", "following_purpose"]
20

Sebastian Hörl's avatar
Sebastian Hörl committed
21
22
23
24
25
    df_trips = pd.merge(df_persons, df_trips, on = "mz_person_id")

    # Children do not have any trips from the microcensus
    f = np.isnan(df_trips["mz_person_id"])
    assert((df_trips[f]["age"] > c.MZ_AGE_THRESHOLD).all())
26

Sebastian Hörl's avatar
Sebastian Hörl committed
27
28
29
    # We deliberately delete them here, since other persons also may not have any
    # trips. May be improved later. TODO
    df_trips = df_trips[~f]
30

Sebastian Hörl's avatar
Sebastian Hörl committed
31
    df_trips.loc[:, "travel_time"] = df_trips.loc[:, "arrival_time"] - df_trips.loc[:, "departure_time"]
32

Sebastian Hörl's avatar
Sebastian Hörl committed
33
34
35
36
37
    # Impute commuting information
    df_commute = pd.DataFrame(context.stage("data.microcensus.commute"), copy = True)[["person_id", "commute_trip_id"]]
    df_commute.columns = ["mz_person_id", "commute_trip_id"]
    df_trips = pd.merge(df_trips, df_commute, on = "mz_person_id", how = "left")
    df_trips.loc[:, "is_commute"] = df_trips.loc[:, "trip_id"] == df_trips.loc[:, "commute_trip_id"]
38

Sebastian Hörl's avatar
Sebastian Hörl committed
39
    df_trips = df_trips[[
Sebastian Hörl's avatar
Sebastian Hörl committed
40
        "person_id", "trip_id", "departure_time", "arrival_time", "travel_time", "mode", "following_purpose", "is_commute"
Sebastian Hörl's avatar
Sebastian Hörl committed
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
    ]].sort_values(by = ["person_id", "trip_id"])

    # Diversify departure times
    counts = df_trips[["person_id", "trip_id"]].groupby("person_id").size().reset_index(name = "count")["count"].values

    interval = df_trips[["person_id", "departure_time"]].groupby("person_id").min().reset_index()["departure_time"].values
    interval = np.minimum(1800.0, interval) # If first departure time is just 5min after midnight, we only add a deviation of 5min

    offset = np.random.random(size = (len(counts), )) * interval * 2.0 - interval
    offset = np.repeat(offset, counts)

    df_trips["departure_time"] += offset
    df_trips["arrival_time"] += offset
    df_trips["departure_time"] = np.round(df_trips["departure_time"])
    df_trips["arrival_time"] = np.round(df_trips["arrival_time"])
Sebastian Hörl's avatar
Sebastian Hörl committed
56
57

    return df_trips