primary_zones.py 3.79 KB
Newer Older
tchervec's avatar
tchervec committed
1
2
import functools

Sebastian Hörl's avatar
Sebastian Hörl committed
3
import numpy as np
tchervec's avatar
tchervec committed
4
5
import pandas as pd

Sebastian Hörl's avatar
Sebastian Hörl committed
6

7
8
9
10
11
12
def configure(context):
    context.stage("data.microcensus.commute")
    context.stage("data.od.matrix")
    context.stage("data.od.distances")
    context.stage("population.sociodemographics")
    context.stage("data.spatial.zones")
Sebastian Hörl's avatar
Sebastian Hörl committed
13

14

Sebastian Hörl's avatar
Sebastian Hörl committed
15
16
17
# TODO: We only assign work here through OD matrices. However, we *can* generate
# OD matrices for education as well (the STATPOP information is available). What
# would need to be done is to adjust data.od.matrix to produce two kinds of
Sebastian Hörl's avatar
bugfix    
Sebastian Hörl committed
18
# matrices and then we would need to use this information here. In data.microcensus.commute
Sebastian Hörl's avatar
Sebastian Hörl committed
19
20
21
22
23
24
25
26
27
# we already produce information on education commute.

# However, for now we will recover the simple scheme from Kirill!

def execute(context):
    df = context.stage("population.sociodemographics")
    df_zones = context.stage("data.spatial.zones")

    # Load commute information for work
Sebastian Hörl's avatar
bugfix    
Sebastian Hörl committed
28
    df_commute = pd.DataFrame(context.stage("data.microcensus.commute")[[
Sebastian Hörl's avatar
Sebastian Hörl committed
29
        "person_id", "commute_mode", "commute_home_distance", "commute_purpose"
30
    ]], copy=True)
Sebastian Hörl's avatar
Sebastian Hörl committed
31
32
33
34
35
36
37
38
39
40
41
    df_commute = df_commute[df_commute["commute_purpose"] == "work"]
    df_commute["mz_person_id"] = df_commute["person_id"]
    del df_commute["person_id"]

    # Load person information
    df_persons = context.stage("population.sociodemographics")[[
        "person_id", "household_id", "mz_person_id", "home_zone_id"
    ]]

    # Merge commute information into the persons
    df = pd.merge(
42
        df_persons, df_commute, on="mz_person_id"
Sebastian Hörl's avatar
Sebastian Hörl committed
43
44
    )

45
    df_demand = df.groupby(["commute_mode", "home_zone_id"]).size().reset_index(name="count")
Sebastian Hörl's avatar
Sebastian Hörl committed
46
47
48
49
    pdf_matrices, cdf_matrices = context.stage("data.od.matrix")
    commute_counts = {}

    print("Computing commute counts ...")
Sebastian Hörl's avatar
Sebastian Hörl committed
50
51
52
    for mode in ["car", "pt", "bike", "walk", "car_passenger"]:
        source_mode = "car" if mode == "car_passenger" else mode

Sebastian Hörl's avatar
Sebastian Hörl committed
53
54
        origin_counts = np.array([
            np.sum(df_demand.loc[
55
56
                       (df_demand["commute_mode"] == mode) & (df_demand["home_zone_id"] == origin_zone), "count"
                   ]) for origin_zone in context.progress(df_zones["zone_id"], label=mode)
Sebastian Hörl's avatar
Sebastian Hörl committed
57
58
        ])[:, np.newaxis]

59
        counts = np.zeros(pdf_matrices[source_mode].shape, dtype=np.int)
Sebastian Hörl's avatar
Sebastian Hörl committed
60
61

        for i in range(len(df_zones)):
Sebastian Hörl's avatar
Sebastian Hörl committed
62
            if origin_counts[i] > 0:
63
64
                assert (~np.any(np.isnan(pdf_matrices[source_mode][i])))
                counts[i, :] = np.random.multinomial(origin_counts[i], pdf_matrices[source_mode][i, :])
Sebastian Hörl's avatar
Sebastian Hörl committed
65
66

        commute_counts[mode] = counts
67
        assert (len(counts) == len(df_zones))
Sebastian Hörl's avatar
Sebastian Hörl committed
68
69

    distances = context.stage("data.od.distances")
70
    work_zones = np.zeros((len(df),), dtype=np.int)
Sebastian Hörl's avatar
Sebastian Hörl committed
71
72
    zone_ids = list(df_zones["zone_id"])

73
    with context.progress(label="Assigning work zones", total=5 * len(df_zones)) as progress:
Sebastian Hörl's avatar
Sebastian Hörl committed
74
        for mode in ["car", "pt", "bike", "walk", "car_passenger"]:
Sebastian Hörl's avatar
Sebastian Hörl committed
75
76
77
            mode_f = df["commute_mode"] == mode

            for origin_index, origin_zone in enumerate(zone_ids):
78
79
                destination_counts = commute_counts[mode][origin_index, :]
                destination_order = np.argsort(distances[origin_index, :])
Sebastian Hörl's avatar
Sebastian Hörl committed
80
81
82
83
84
85
86
87
88
89
90
                destinations = [[zone_ids[i]] * destination_counts[i] for i in destination_order]
                destinations = functools.reduce(lambda x, y: x + y, destinations)

                if len(destinations) > 0:
                    f = mode_f & (df["home_zone_id"] == origin_zone)
                    person_indices = np.where(f)[0]
                    person_order = np.argsort(df[f]["commute_home_distance"])
                    work_zones[person_indices[person_order]] = destinations

                progress.update()

Sebastian Hörl's avatar
Sebastian Hörl committed
91
92
    df.loc[:, "work_zone_id"] = work_zones
    df = df[["person_id", "work_zone_id", "commute_mode"]]
93
    assert (len(df) == len(df.dropna()))
Sebastian Hörl's avatar
Sebastian Hörl committed
94
95

    return df