output.py 6.84 KB
Newer Older
1
2
3
4
5
6
import geopandas as gpd
import pandas as pd
import numpy as np
import shapely.geometry as geo
import os, datetime, json

Aurore Sallard's avatar
Aurore Sallard committed
7
8
9
10
11
def configure(context):
    context.stage("synthesis.population.enriched")
    context.stage("synthesis.population.activities")
    context.stage("synthesis.population.trips")
    context.stage("synthesis.population.spatial.locations")
12

Aurore Sallard's avatar
Aurore Sallard committed
13
    for option in ("output_path", "input_downsampling"):
Aurore Sallard's avatar
Aurore Sallard committed
14
        context.config(option)
15
16

def validate(context):
Aurore Sallard's avatar
Aurore Sallard committed
17
    output_path = context.config("output_path")
18
19
20
21
22

    if not os.path.isdir(output_path):
        raise RuntimeError("Output directory must exist: %s" % output_path)

def execute(context):
Aurore Sallard's avatar
Aurore Sallard committed
23
    output_path = context.config("output_path")
24
    output_suffix = "_no_employeesAll"
25
26

    # Prepare households
Aurore Sallard's avatar
Aurore Sallard committed
27
    df_households = context.stage("synthesis.population.enriched").rename(
28
29
30
31
32
        columns = { "hhlincome": "income" }
    ).drop_duplicates("household_id")

    df_households = df_households[[
        "household_id",
Aurore Sallard's avatar
Aurore Sallard committed
33
34
35
        "number_of_cars_class",
        "income_class",
        "household_size"
36
37
38
39
40
    ]]

    df_households.to_csv("%s/households.csv" % output_path, sep = ";", index = None)

    # Prepare persons
Aurore Sallard's avatar
Aurore Sallard committed
41
42
    df_persons = context.stage("synthesis.population.enriched")#.rename(
        #columns = { "driving_license": "has_driving_license" }
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
    #)

    pt_sub = []
    pt_sub.append(df_persons["subscriptions_ga"].values.tolist())
    pt_sub.append(df_persons["subscriptions_halbtax"].values.tolist())
    pt_sub.append(df_persons["subscriptions_verbund"].values.tolist())
    pt_sub.append(df_persons["subscriptions_strecke"].values.tolist())
    pt_sub.append(df_persons["subscriptions_gleis7"].values.tolist())
    pt_sub.append(df_persons["subscriptions_junior"].values.tolist())
    pt_sub.append(df_persons["subscriptions_other"].values.tolist())
    pt_sub.append(df_persons["subscriptions_ga_class"].values.tolist())
    pt_sub.append(df_persons["subscriptions_verbund_class"].values.tolist())
    pt_sub.append(df_persons["subscriptions_strecke_class"].values.tolist())

    i = 1
    while(i < len(pt_sub)):
        pt_sub[i] = np.logical_or(pt_sub[i - 1], pt_sub[i])
        i += 1

    df_persons["has_pt_subscription"] = pt_sub[-1]

    df_persons = df_persons[[
        "person_id", "household_id",
        "age", "employed", "sex", "has_pt_subscription",
	"driving_license",
Aurore Sallard's avatar
Aurore Sallard committed
68
        "mz_person_id"
69
70
    ]]

71
    df_persons.to_csv(f"%s/persons{output_suffix}.csv" % output_path, sep = ";", index = None)
72
73

    # Prepare activities
Aurore Sallard's avatar
Aurore Sallard committed
74
75
76
    df_activities = context.stage("synthesis.population.activities").rename(
        columns = { "activity_index": "activity_id" }
    )
77

Aurore Sallard's avatar
Aurore Sallard committed
78
79
80
81
    df_activities.loc[:, "is_first"] = (df_activities["activity_id"] == 1)
    islast = (df_activities["activity_id"][1:] == 1).values.tolist()
    islast.append(True)
    df_activities["is_last"] = islast
82
83
84
85

    df_activities = df_activities[[
        "person_id", "activity_id",
        "purpose", "start_time", "end_time",
Aurore Sallard's avatar
Aurore Sallard committed
86
        "is_first", "is_last"
87
88
    ]]

Aurore Sallard's avatar
Aurore Sallard committed
89
    #df_activities = df_activities.astype({"is_last": int})
90

91
    df_activities.to_csv(f"%s/activities{output_suffix}.csv" % output_path, sep = ";", index = None)
92
93

    # Prepare trips
Aurore Sallard's avatar
Aurore Sallard committed
94
95
96
    df_trips = context.stage("synthesis.population.trips").rename(
        columns = {
             "trip_index": "trip_id"
97
98
        #    "is_first_trip": "is_first",
        #    "is_last_trip": "is_last"
Aurore Sallard's avatar
Aurore Sallard committed
99
100
101
102
        }
    )

    print(df_trips.columns)
103
104
105
106
107
108
109
110

    df_trips["preceeding_activity_index"] = df_trips["trip_id"]
    df_trips["following_activity_index"] = df_trips["trip_id"] + 1

    df_trips = df_trips[[
        "person_id", "trip_id",
        "preceeding_activity_index", "following_activity_index",
        "departure_time", "arrival_time", "mode",
Aurore Sallard's avatar
Aurore Sallard committed
111
        "preceding_purpose", 
112
113
114
115
        "following_purpose",
        #"is_first", "is_last"
    ]]

116
    df_trips.to_csv(f"%s/trips{output_suffix}.csv" % output_path, sep = ";", index = None)
117
118

    # Prepare spatial data sets
Aurore Sallard's avatar
Aurore Sallard committed
119
120
121
122
123
124
    df_locations = context.stage("synthesis.population.spatial.locations")#[[
        #"person_id", "activity_id", "location_x", "location_y" #, "geometry"
    #]]

    df_locations.columns = ["person_id", "activity_id", "destination_id", "geometry"]
    #df_locations["geometry"] = [geo.Point(px, py) for px, py in list(zip(df_locations["location_x"].values.tolist(), df_locations["location_y"].values.tolist()))]
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149

    df_activities = pd.merge(df_activities, df_locations[[
        "person_id", "activity_id", "geometry"
    ]], how = "left", on = ["person_id", "activity_id"])

    # Write spatial activities
    df_spatial = gpd.GeoDataFrame(df_activities, crs = dict(init = "epsg:2056"))
    df_spatial["purpose"] = df_spatial["purpose"].astype(str)
    df_spatial.to_file("%s/activities.gpkg" % output_path, driver = "GPKG")

    # Write spatial trips
    df_spatial = pd.merge(df_trips, df_locations[[
        "person_id", "activity_id", "geometry"
    ]].rename(columns = {
        "activity_id": "preceeding_activity_index",
        "geometry": "preceeding_geometry"
    }), how = "left", on = ["person_id", "preceeding_activity_index"])

    df_spatial = pd.merge(df_spatial, df_locations[[
        "person_id", "activity_id", "geometry"
    ]].rename(columns = {
        "activity_id": "following_activity_index",
        "geometry": "following_geometry"
    }), how = "left", on = ["person_id", "following_activity_index"])

Aurore Sallard's avatar
Aurore Sallard committed
150
151
152
153
154
    #df_spatial["origin_x"] = df_spatial["preceeding_geometry"].apply(lambda x : x.x)
    #df_spatial["origin_y"] = df_spatial["preceeding_geometry"].apply(lambda x : x.y)
    #df_spatial["destination_x"] = df_spatial["following_geometry"].apply(lambda x : x.x)
   #df_spatial["destination_x"] = df_spatial["following_geometry"].apply(lambda x : x.y)

155
156
157
158
159
160
161
162
163
164
165
166
167
168
    df_spatial["geometry"] = [
        geo.LineString(od)
        for od in zip(df_spatial["preceeding_geometry"], df_spatial["following_geometry"])
    ]

    df_spatial = df_spatial.drop(columns = ["preceeding_geometry", "following_geometry"])

    df_spatial = gpd.GeoDataFrame(df_spatial, crs = dict(init = "epsg:2056"))
    df_spatial["following_purpose"] = df_spatial["following_purpose"].astype(str)
    #df_spatial["preceeding_purpose"] = df_spatial["preceeding_purpose"].astype(str)
    df_spatial["mode"] = df_spatial["mode"].astype(str)
    df_spatial.to_file("%s/trips.gpkg" % output_path, driver = "GPKG")

    print("\n\n GPKG done")
Aurore Sallard's avatar
Aurore Sallard committed
169
170
171
    geom = df_spatial["geometry"].values
    df_spatial["crowfly_distance"] = [geom[i].length for i in range(len(geom))]
    df_spatial = df_spatial.drop(columns = ["geometry"])
172
    df_spatial.to_csv(f"%s/trips_with_distance{output_suffix}.csv" % output_path, sep = ";", index = None)
173
174
175

    # Write meta information
    information = dict(
Aurore Sallard's avatar
Aurore Sallard committed
176
        sampling_rate = context.config("input_downsampling"),
177
178
179
180
181
        created = datetime.datetime.now(datetime.timezone.utc).isoformat()
    )

    with open("%s/meta.json" % output_path, "w+") as f:
        json.dump(information, f, indent = 4)