Commit 4232cc0f authored by Aurore Sallard's avatar Aurore Sallard
Browse files

Adding secondary locations

parent 9bd54326
Pipeline #86225 failed with stage
...@@ -12,35 +12,40 @@ from tqdm import tqdm ...@@ -12,35 +12,40 @@ from tqdm import tqdm
def configure(context): def configure(context):
context.config("output_path") context.config("output_path")
context.config("raw_data_path") context.config("data_path")
context.config("analysis_path") context.config("analysis_path")
context.stage("analysis.import_syn_trips") #context.stage("analysis.import_syn_trips")
def import_data_synthetic(context): def import_data_synthetic(context):
filepath = "%s/syn_trips.csv" % context.config["output_path"] filepath = "%s/trips_with_distance.csv" % context.config("output_path")
df_trips = pd.read_csv(filepath, encoding = "latin1") df_trips = pd.read_csv(filepath, encoding = "latin1", sep = ";")
filepath = "%s/persons.csv" % context.config["output_path"] filepath = "%s/persons.csv" % context.config("output_path")
df_persons = pd.read_csv(filepath, encoding = "latin1", sep = ";") df_persons = pd.read_csv(filepath, encoding = "latin1", sep = ";")
df_syn = df_trips.merge(df_persons, left_on="person_id", right_on="person_id") df_syn = df_persons.merge(df_trips, left_on="person_id", right_on="person_id")
df_syn = df_syn[df_syn["age"] >= 6]
t_id = df_syn["person_id"].values.tolist() t_id = df_syn["person_id"].values.tolist()
df_persons_no_trip = df_persons[np.logical_not(df_persons["person_id"].isin(t_id))] df_persons_no_trip = df_persons[np.logical_not(df_persons["person_id"].isin(t_id))]
df_persons_no_trip = df_persons_no_trip.set_index(["person_id"]) df_persons_no_trip = df_persons_no_trip.set_index(["person_id"])
print("Synthetic: ", len(df_syn), ", ", len(df_persons_no_trip)) df_persons_no_trip = df_persons_no_trip[df_persons_no_trip["age"] >= 6]
print("Synthetic: ", len(list(set(df_syn["person_id"].values.tolist()))), ", ", len(df_persons_no_trip))
return df_syn, df_persons_no_trip return df_syn, df_persons_no_trip
def import_data_actual(context): def import_data_actual(context):
df_act_persons = pd.read_csv( df_act_persons = pd.read_csv(
"%s/microcensus/zielpersonen.csv" % context.config["raw_data_path"], "%s/microcensus/zielpersonen.csv" % context.config("data_path"),
sep = ",", encoding = "latin1", parse_dates = ["USTag"] sep = ",", encoding = "latin1", parse_dates = ["USTag"]
) )
filepath = "%s/microcensus_trips.csv" % context.config["output_path"] filepath = "%s/microcensus_trips.csv" % context.config("output_path")
df_act_trips = pd.read_csv(filepath, encoding = "latin1") df_act_trips = pd.read_csv(filepath, encoding = "latin1")
# Merging with person information, correcting trips with erroneous purpose # Merging with person information, correcting trips with erroneous purpose
...@@ -50,6 +55,11 @@ def import_data_actual(context): ...@@ -50,6 +55,11 @@ def import_data_actual(context):
df_act_persons["weight_person"] = df_act_persons["WP"] df_act_persons["weight_person"] = df_act_persons["WP"]
df_act_persons["date"] = df_act_persons["USTag"] df_act_persons["date"] = df_act_persons["USTag"]
df_act_persons["weekend"] = False
df_act_persons.loc[df_act_persons["tag"] == 6, "weekend"] = True
df_act_persons.loc[df_act_persons["tag"] == 7, "weekend"] = True
df_act_persons = df_act_persons[~df_act_persons["weekend"]]
# Driving license # Driving license
df_act_persons["driving_license"] = df_act_persons["f20400a"] == 1 df_act_persons["driving_license"] = df_act_persons["f20400a"] == 1
...@@ -66,9 +76,9 @@ def import_data_actual(context): ...@@ -66,9 +76,9 @@ def import_data_actual(context):
df_act_persons["age_class"] = np.digitize(df_act_persons["age"], c.AGE_CLASS_UPPER_BOUNDS) df_act_persons["age_class"] = np.digitize(df_act_persons["age"], c.AGE_CLASS_UPPER_BOUNDS)
df_act_persons.rename(columns = {"binary_car_availability":"car_availability"}, inplace = True) df_act_persons.rename(columns = {"binary_car_availability":"car_availability"}, inplace = True)
df_act = df_act_trips.merge(df_act_persons[["person_id", "weight_person", "employed", df_px = df_act_persons[["person_id", "weight_person", "employed",
"age", "sex", "car_availability"]], "age", "sex", "car_availability"]]
on=["person_id"], how='left') df_act = df_act_trips.merge(df_px, on=["person_id"], how='left')
df_act.loc[(df_act["purpose"]=='work') & (df_act["age"] < 16), "purpose"]="other" df_act.loc[(df_act["purpose"]=='work') & (df_act["age"] < 16), "purpose"]="other"
# Only keep the persons that could have been used in activity chain matching # Only keep the persons that could have been used in activity chain matching
...@@ -80,27 +90,10 @@ def import_data_actual(context): ...@@ -80,27 +90,10 @@ def import_data_actual(context):
df_persons_no_trip = df_act_persons[np.logical_not(df_act_persons["person_id"].isin(t_id))] df_persons_no_trip = df_act_persons[np.logical_not(df_act_persons["person_id"].isin(t_id))]
df_persons_no_trip = df_persons_no_trip.set_index(["person_id"]) df_persons_no_trip = df_persons_no_trip.set_index(["person_id"])
print(df_act.columns)
return df_act, df_persons_no_trip return df_act, df_persons_no_trip
def aux_data_frame(df_act, df_syn): def aux_data_frame(df_act, df_syn):
pers_ids = list(set(df_syn["person_id"].values.tolist()))
ids = []
chains = []
for pid in tqdm(pers_ids):
df = df_syn[df_syn["person_id"] == pid]
purposes = df["following_purpose"].values.tolist()
chain = "h-" + "-".join([purpose[0] for purpose in purposes])
#print(chain)
ids.append(pid)
chains.append(chain)
df_aux_syn = pd.DataFrame.from_dict({"person_id": ids, "weights": [1 for i in range(len(ids))], "chain": chains})
df_act["person_id"] = df_act.index df_act["person_id"] = df_act.index
pers_ids = list(set(df_act["person_id"].values.tolist())) pers_ids = list(set(df_act["person_id"].values.tolist()))
...@@ -112,12 +105,26 @@ def aux_data_frame(df_act, df_syn): ...@@ -112,12 +105,26 @@ def aux_data_frame(df_act, df_syn):
df = df_act[df_act["person_id"] == pid] df = df_act[df_act["person_id"] == pid]
weight = np.mean(df["weight_person"].values.tolist()) weight = np.mean(df["weight_person"].values.tolist())
purposes = df["purpose"].values.tolist() purposes = df["purpose"].values.tolist()
chain = "h-" + "-".join([purpose[0] for purpose in purposes]) chain = "home-" + "-".join([purpose for purpose in purposes])
ids.append(pid) ids.append(pid)
weights.append(weight) weights.append(weight)
chains.append(chain) chains.append(chain)
df_aux_act = pd.DataFrame.from_dict({"person_id": ids, "weight_person":weights, "chain": chains}) df_aux_act = pd.DataFrame.from_dict({"person_id": ids, "weight_person":weights, "chain": chains})
pers_ids = list(set(df_syn["person_id"].values.tolist()))
ids = []
chains = []
for pid in tqdm(pers_ids):
df = df_syn[df_syn["person_id"] == pid]
purposes = df["following_purpose"].values.tolist()
chain = "home-" + "-".join([purpose for purpose in purposes])
ids.append(pid)
chains.append(chain)
df_aux_syn = pd.DataFrame.from_dict({"person_id": ids, "weights": [1 for i in range(len(ids))], "chain": chains})
return df_aux_act, df_aux_syn return df_aux_act, df_aux_syn
...@@ -127,6 +134,7 @@ def activity_chains_comparison(context, all_CC, suffix = None): ...@@ -127,6 +134,7 @@ def activity_chains_comparison(context, all_CC, suffix = None):
all_CC["synthetic Count"] = all_CC ["synthetic Count"] / all_CC["synthetic Count"].sum() *100 all_CC["synthetic Count"] = all_CC ["synthetic Count"] / all_CC["synthetic Count"].sum() *100
all_CC["actual Count"] = all_CC["actual Count"] / all_CC["actual Count"].sum() *100 all_CC["actual Count"] = all_CC["actual Count"] / all_CC["actual Count"].sum() *100
all_CC = all_CC.sort_values(by=['actual Count'], ascending=False) all_CC = all_CC.sort_values(by=['actual Count'], ascending=False)
all_CC.to_csv("%s/actchains_DF.csv" % context.config("analysis_path"))
# First step done: plot activity chain counts # First step done: plot activity chain counts
title_plot = "Synthetic and HTS activity chain comparison" title_plot = "Synthetic and HTS activity chain comparison"
...@@ -415,24 +423,23 @@ def execute(context): ...@@ -415,24 +423,23 @@ def execute(context):
syn_CC.columns = ["Chain", "synthetic Count"] syn_CC.columns = ["Chain", "synthetic Count"]
# 1. ACTIVITY CHAINS # 1. ACTIVITY CHAINS
# Creating the new dataframes with activity chain counts # Creating the new dataframes with activity chain counts
#syn_CC = myutils.process_synthetic_activity_chain_counts(df_syn) #syn_CC = myutils.process_synthetic_activity_chain_counts(df_syn)
syn_CC.loc[len(syn_CC) + 1] = pd.Series({"Chain": "h", "synthetic Count": df_syn_no_trip.shape[0] }) syn_CC.loc[len(syn_CC) + 1] = pd.Series({"Chain": "home", "synthetic Count": df_syn_no_trip.shape[0] })
#act_CC = myutils.process_actual_activity_chain_counts(df_act, df_aux) #act_CC = myutils.process_actual_activity_chain_counts(df_act, df_aux)
act_CC.loc[len(act_CC) + 1] = pd.Series({"Chain": "h", "actual Count": np.sum(df_act_no_trip["weight_person"].values.tolist())}) act_CC.loc[len(act_CC) + 1] = pd.Series({"Chain": "home", "actual Count": np.sum(df_act_no_trip["weight_person"].values.tolist())})
# Merging together, comparing # Merging together, comparing
all_CC = pd.merge(syn_CC, act_CC, on = "Chain", how = "left") all_CC = pd.merge(syn_CC, act_CC, on = "Chain", how = "outer")
activity_chains_comparison(context, all_CC) activity_chains_comparison(context, all_CC)
# Number of activities # Number of activities
activity_counts_comparison(context, all_CC) #activity_counts_comparison(context, all_CC)
# Number of activities per purposes # Number of activities per purposes
activity_counts_per_purpose(context, all_CC) #activity_counts_per_purpose(context, all_CC)
# 2. MODE AND DESTINATION PURPOSE # 2. MODE AND DESTINATION PURPOSE
#mode_purpose_comparison(context, df_syn, df_act) #mode_purpose_comparison(context, df_syn, df_act)
...@@ -440,21 +447,21 @@ def execute(context): ...@@ -440,21 +447,21 @@ def execute(context):
# 3. CROWFLY DISTANCES # 3. CROWFLY DISTANCES
# 3.1. Compute the distances # 3.1. Compute the distances
df_syn_dist = compute_distances_synthetic(df_syn) #df_syn_dist = compute_distances_synthetic(df_syn)
df_act_dist = compute_distances_actual(df_act) #df_act_dist = compute_distances_actual(df_act)
print(list(set(df_syn["following_purpose"].values.tolist()))) #print(list(set(df_syn["following_purpose"].values.tolist())))
# 3.2 Prepare for plotting # 3.2 Prepare for plotting
df_act_dist["x"] = df_act_dist["weight_person"] * df_act_dist["crowfly_distance"] #df_act_dist["x"] = df_act_dist["weight_person"] * df_act_dist["crowfly_distance"]
act = df_act_dist.groupby(["purpose"]).sum()["x"] / df_act_dist.groupby(["purpose"]).sum()["weight_person"] #act = df_act_dist.groupby(["purpose"]).sum()["x"] / df_act_dist.groupby(["purpose"]).sum()["weight_person"]
syn = df_syn_dist.groupby(["following_purpose"]).mean()["crowfly_distance"] #syn = df_syn_dist.groupby(["following_purpose"]).mean()["crowfly_distance"]
print(syn) #print(syn)
# 3.3 Ready to plot! # 3.3 Ready to plot!
myplottools.plot_comparison_bar(context, imtitle = "distancepurpose.png", plottitle = "Crowfly distance", ylabel = "Mean crowfly distance [km]", xlabel = "", lab = syn.index, actual = act, synthetic = syn, t = None, xticksrot = True ) #myplottools.plot_comparison_bar(context, imtitle = "distancepurpose.png", plottitle = "Crowfly distance", ylabel = "Mean crowfly distance [km]", xlabel = "", lab = syn.index, actual = act, synthetic = syn, t = None, xticksrot = True )
all_the_plot_distances(context, df_act_dist, df_syn_dist) #all_the_plot_distances(context, df_act_dist, df_syn_dist)
# 3.4 Distance from home to education # 3.4 Distance from home to education
#syn_0, act_0, act_w0 = compare_dist_educ(context, df_syn, df_act) #syn_0, act_0, act_w0 = compare_dist_educ(context, df_syn, df_act)
......
...@@ -7,16 +7,16 @@ import shutil ...@@ -7,16 +7,16 @@ import shutil
def configure(context): def configure(context):
context.config("output_path") context.config("output_path")
context.config("raw_data_path") context.config("data_path")
context.config("analysis_path") context.config("analysis_path")
def import_data(context): def import_data(context):
with gzip.open("%s/population_with_secondary_locations.xml.gz" % context.config["output_path"], 'rb') as f_in: with gzip.open("%s/population_with_secondary_locations.xml.gz" % context.config("output_path"), 'rb') as f_in:
with open("%s/population_complete.xml" % context.config["output_path"], 'wb') as f_out: with open("%s/population_complete.xml" % context.config("output_path"), 'wb') as f_out:
shutil.copyfileobj(f_in, f_out) shutil.copyfileobj(f_in, f_out)
xml_tree = ET.parse("%s/population_complete.xml" % context.config["output_path"]) xml_tree = ET.parse("%s/population_complete.xml" % context.config("output_path"))
population = xml_tree.getroot() population = xml_tree.getroot()
return population return population
......
...@@ -63,9 +63,6 @@ def plot_comparison_bar(context, imtitle, plottitle, ylabel, xlabel, lab, actual ...@@ -63,9 +63,6 @@ def plot_comparison_bar(context, imtitle, plottitle, ylabel, xlabel, lab, actual
labels = lab labels = lab
actual_means = actual actual_means = actual
synthetic_means = synthetic synthetic_means = synthetic
print("SYNTHETIC MEANS")
print(synthetic_means)
x = np.arange(len(labels)) # the label locations x = np.arange(len(labels)) # the label locations
width = w # the width of the bars width = w # the width of the bars
...@@ -91,25 +88,26 @@ def plot_comparison_bar(context, imtitle, plottitle, ylabel, xlabel, lab, actual ...@@ -91,25 +88,26 @@ def plot_comparison_bar(context, imtitle, plottitle, ylabel, xlabel, lab, actual
#plt.rcParams.update({'font.size': 12}) #plt.rcParams.update({'font.size': 12})
plt.xticks(rotation=30) plt.xticks(rotation=30)
fig.tight_layout() fig.tight_layout()
plt.savefig("%s/" % context.config["analysis_path"] + imtitle) plt.savefig("%s/" % context.config("analysis_path") + imtitle)
#plt.rcParams.update({'font.size': 18}) #plt.rcParams.update({'font.size': 18})
def plot_comparison_hist_purpose(context, title, actual_df, synthetic_df, bins = np.linspace(0,25,120), dpi = 300, cols = 3, rows = 2): def plot_comparison_hist_purpose(context, title, actual_df, synthetic_df, bins = np.linspace(0,25,120), dpi = 300, cols = 3, rows = 2):
modelist = synthetic_df["following_purpose"].unique() modelist = synthetic_df["following_purpose"].unique()
print(modelist) rows = (len(modelist) // 3) + (len(modelist) % 3 != 0)
plt.rcParams['figure.dpi'] = dpi plt.rcParams['figure.dpi'] = dpi
fig, axes = plt.subplots(nrows=rows, ncols=cols) fig, axes = plt.subplots(nrows=rows, ncols=cols, figsize = (5*cols, 3*rows))
idx=0 idx=0
for r in range(rows): for r in range(rows):
for c in range(cols): for c in range(cols):
x = synthetic_df[synthetic_df["following_purpose"]==modelist[idx]]["crowfly_distance"] if idx < len(modelist):
y = actual_df[actual_df["purpose"]==modelist[idx]][["crowfly_distance", "weight_person"]] x = synthetic_df[synthetic_df["following_purpose"]==modelist[idx]]["crowfly_distance"]
axes = add_small_hist(axes, r, c, modelist[idx], x, y, bins) y = actual_df[actual_df["purpose"]==modelist[idx]][["crowfly_distance", "weight_person"]]
idx = idx + 1 axes = add_small_hist(axes, r, c, modelist[idx], x, y, bins)
idx = idx + 1
fig.tight_layout() fig.tight_layout()
plt.savefig("%s/" % context.config["analysis_path"] + title) plt.savefig("%s/" % context.config("analysis_path") + title)
...@@ -129,24 +127,26 @@ def plot_comparison_hist_mode(context, title, actual_df, synthetic_df, bins = np ...@@ -129,24 +127,26 @@ def plot_comparison_hist_mode(context, title, actual_df, synthetic_df, bins = np
fig.delaxes(axes[1,2]) fig.delaxes(axes[1,2])
fig.tight_layout() fig.tight_layout()
plt.savefig("%s/" % context.config["analysis_path"] + title) plt.savefig("%s/" % context.config("analysis_path") + title)
def plot_comparison_cdf_purpose(context, title, actual_df, synthetic_df, dpi = 300, cols = 3, rows = 2): def plot_comparison_cdf_purpose(context, title, actual_df, synthetic_df, dpi = 300, cols = 3, rows = 2):
modelist = synthetic_df["following_purpose"].unique() modelist = synthetic_df["following_purpose"].unique()
rows = (len(modelist) // 3) + (len(modelist) % 3 != 0)
plt.rcParams['figure.dpi'] = dpi plt.rcParams['figure.dpi'] = dpi
fig, axes = plt.subplots(nrows=rows, ncols=cols) fig, axes = plt.subplots(nrows=rows, ncols=cols, figsize = (5*cols, 3*rows))
idx=0 idx=0
for r in range(rows): for r in range(rows):
for c in range(cols): for c in range(cols):
x = synthetic_df[synthetic_df["following_purpose"]==modelist[idx]]["crowfly_distance"] if idx < len(modelist):
y = actual_df[actual_df["purpose"]==modelist[idx]][["crowfly_distance", "weight_person"]] x = synthetic_df[synthetic_df["following_purpose"]==modelist[idx]]["crowfly_distance"]
axes = add_small_cdf(axes, r, c, modelist[idx], x, y) y = actual_df[actual_df["purpose"]==modelist[idx]][["crowfly_distance", "weight_person"]]
idx = idx + 1 axes = add_small_cdf(axes, r, c, modelist[idx], x, y)
idx = idx + 1
fig.tight_layout() fig.tight_layout()
plt.savefig("%s/" % context.config["analysis_path"] + title) plt.savefig("%s/" % context.config("analysis_path") + title)
def plot_comparison_cdf_mode(context, title, actual_df, synthetic_df, bins = np.linspace(0,25,120), dpi = 300, cols = 3, rows = 2): def plot_comparison_cdf_mode(context, title, actual_df, synthetic_df, bins = np.linspace(0,25,120), dpi = 300, cols = 3, rows = 2):
...@@ -165,7 +165,7 @@ def plot_comparison_cdf_mode(context, title, actual_df, synthetic_df, bins = np. ...@@ -165,7 +165,7 @@ def plot_comparison_cdf_mode(context, title, actual_df, synthetic_df, bins = np.
fig.delaxes(axes[1,2]) fig.delaxes(axes[1,2])
fig.tight_layout() fig.tight_layout()
plt.savefig("%s/" % context.config["analysis_path"] + title) plt.savefig("%s/" % context.config("analysis_path") + title)
...@@ -200,7 +200,7 @@ def plot_mode_share(context, title, df_syn, df2, amdf2, dpi = 300): ...@@ -200,7 +200,7 @@ def plot_mode_share(context, title, df_syn, df2, amdf2, dpi = 300):
autolabel(rects2) autolabel(rects2)
fig.tight_layout() fig.tight_layout()
plt.savefig("%s/" % context.config["analysis_path"] + title) plt.savefig("%s/" % context.config("analysis_path") + title)
plt.show() plt.show()
......
# General pipeline settings # General pipeline settings
working_directory: /home/asallard/Scenarios/Switzerland/cache working_directory: /nas/asallard/Switzerland/cache_170221/act_true
flowchart_path: /home/asallard/Scenarios/Switzerland/cache/flowchart.json flowchart_path: /nas/asallard/Switzerland/output_170221/flowchart.json
dryrun: false dryrun: false
# Requested stages # Requested stages
run: run:
# - data.statpop.persons # - data.statpop.persons
# - data.statpop.projections.households # - data.statpop.projections.households
# - data.statpop.scaled - data.statpop.statpop
# - population.matched # - population.matched
# - population.output # - data.microcensus.trips
- matsim.run # - population.destinations
# - synthesis.population.destinations
# - synthesis.population.spatial.secondary.locations
# - matsim.facilities
# - matsim.population
# - matsim.households
# - matsim.run
# - analysis.analysis
# - population.output
# These are configuration options that we use in the pipeline # These are configuration options that we use in the pipeline
config: config:
...@@ -18,11 +26,13 @@ config: ...@@ -18,11 +26,13 @@ config:
random_seed: 0 random_seed: 0
hot_deck_matching_runners: 2 hot_deck_matching_runners: 2
disable_progress_bar: false disable_progress_bar: false
java_memory: 10G java_memory: 80G
input_downsampling: 0.01 input_downsampling: 0.1
enable_scaling: true enable_scaling: true
scaling_year: 2020 scaling_year: 2020
use_freight: true use_freight: true
use_detailed_activities: false
hafas_date: 01.10.2018 hafas_date: 01.10.2018
data_path: /home/asallard/Scenarios/Switzerland/Data data_path: /nas/ivtmatsim/scenarios/switzerland/data
output_path: /home/asallard/Scenarios/Switzerland/output output_path: /nas/asallard/Switzerland/output_170221/act_true
analysis_path: /nas/asallard/Switzerland/analysis_170221/act_true
...@@ -130,7 +130,8 @@ def execute(context): ...@@ -130,7 +130,8 @@ def execute(context):
remove_ids = set(df_mz_persons["person_id"]) - set(df_mz_trips["person_id"]) remove_ids = set(df_mz_persons["person_id"]) - set(df_mz_trips["person_id"])
initial_size = len(df_mz_persons) initial_size = len(df_mz_persons)
df_mz_persons = df_mz_persons[~df_mz_persons["person_id"].isin(remove_ids)] # TODO Uncomment if you DO WANT to exclude people staying at home all day
#df_mz_persons = df_mz_persons[~df_mz_persons["person_id"].isin(remove_ids)]
# Note: Around 7000 of them are those, which do not even have an activity chain in the first place # Note: Around 7000 of them are those, which do not even have an activity chain in the first place
# because they have not been asked. # because they have not been asked.
......
...@@ -7,9 +7,11 @@ from tqdm import tqdm ...@@ -7,9 +7,11 @@ from tqdm import tqdm
def configure(context): def configure(context):
context.config("data_path") context.config("data_path")
context.config("output_path") context.config("output_path")
context.config("use_detailed_activities")
def execute(context): def execute(context):
data_path = context.config("data_path") data_path = context.config("data_path")
det_activities = context.config("use_detailed_activities")
df_mz_trips = pd.read_csv("%s/microcensus/wege.csv" % data_path, encoding = "latin1") df_mz_trips = pd.read_csv("%s/microcensus/wege.csv" % data_path, encoding = "latin1")
df_mz_stages = pd.read_csv("%s/microcensus/etappen.csv" % data_path, encoding = "latin1") df_mz_stages = pd.read_csv("%s/microcensus/etappen.csv" % data_path, encoding = "latin1")
...@@ -17,7 +19,7 @@ def execute(context): ...@@ -17,7 +19,7 @@ def execute(context):
df_mz_trips = df_mz_trips[[ df_mz_trips = df_mz_trips[[
"HHNR", "WEGNR", "f51100", "f51400", "wzweck1", "wzweck2", "wmittel", "HHNR", "WEGNR", "f51100", "f51400", "wzweck1", "wzweck2", "wmittel",
"S_X_CH1903", "S_Y_CH1903", "Z_X_CH1903", "Z_Y_CH1903", "W_X_CH1903", "W_Y_CH1903", "S_X_CH1903", "S_Y_CH1903", "Z_X_CH1903", "Z_Y_CH1903", "W_X_CH1903", "W_Y_CH1903",
"w_rdist" "w_rdist", "f51800a", "f51700_weg"
]] ]]
df_mz_stages = df_mz_stages[[ df_mz_stages = df_mz_stages[[
...@@ -74,31 +76,36 @@ def execute(context): ...@@ -74,31 +76,36 @@ def execute(context):
df_mz_trips.loc[df_mz_trips["wzweck1"] == 12, "purpose"] = "unknown" # Other df_mz_trips.loc[df_mz_trips["wzweck1"] == 12, "purpose"] = "unknown" # Other
df_mz_trips.loc[df_mz_trips["wzweck1"] == 13, "purpose"] = "border" # Going out of country df_mz_trips.loc[df_mz_trips["wzweck1"] == 13, "purpose"] = "border" # Going out of country
df_mz_trips.loc[np.logical_and(df_mz_trips["wzweck1"] == 4, df_mz_trips["f51800a"] == 1), "purpose"] = "grocery" if det_activities == "true":
df_mz_trips.loc[np.logical_and(df_mz_trips["wzweck1"] == 4, df_mz_trips["f51800a"] >= 2), "purpose"] = "other(S)" df_mz_trips.loc[np.logical_and(df_mz_trips["wzweck1"] == 4, df_mz_trips["f51800a"] == 1), "purpose"] = "grocery"
df_mz_trips.loc[np.logical_and(df_mz_trips["wzweck1"] == 4, df_mz_trips["f51800a"] <= 0), "purpose"] = "other(S)" df_mz_trips.loc[np.logical_and(df_mz_trips["wzweck1"] == 4, df_mz_trips["f51800a"] >= 2), "purpose"] = "other(S)"
df_mz_trips.loc[np.logical_and(df_mz_trips["wzweck1"] == 4, df_mz_trips["f51800a"] <= 0), "purpose"] = "other(S)"
df_mz_trips.loc[np.logical_and(df_mz_trips["wzweck1"] == 8, df_mz_trips["f51700_weg"] <= 0), "purpose"] = "other(L)"
df_mz_trips.loc[np.logical_and(df_mz_trips["wzweck1"] == 8, df_mz_trips["f51700_weg"] == 1), "purpose"] = "visits" df_mz_trips.loc[np.logical_and(df_mz_trips["wzweck1"] == 8, df_mz_trips["f51700_weg"] <= 0), "purpose"] = "other(L)"
df_mz_trips.loc[np.logical_and(df_mz_trips["wzweck1"] == 8, df_mz_trips["f51700_weg"] == 2), "purpose"] = "gastronomy" df_mz_trips.loc[np.logical_and(df_mz_trips["wzweck1"] == 8, df_mz_trips["f51700_weg"] == 1), "purpose"] = "visits"
df_mz_trips.loc[np.logical_and(df_mz_trips["wzweck1"] == 8, df_mz_trips["f51700_weg"] == 3), "purpose"] = "sport" df_mz_trips.loc[np.logical_and(df_mz_trips["wzweck1"] == 8, df_mz_trips["f51700_weg"] == 2), "purpose"] = "gastronomy"
df_mz_trips.loc[np.logical_and(df_mz_trips["wzweck1"] == 8, df_mz_trips["f51700_weg"] == 4), "purpose"] = "outdoor" df_mz_trips.loc[np.logical_and(df_mz_trips["wzweck1"] == 8, df_mz_trips["f51700_weg"] == 3), "purpose"] = "sport"
df_mz_trips.loc[np.logical_and(df_mz_trips["wzweck1"] == 8, df_mz_trips["f51700_weg"] == 5), "purpose"] = "outdoor" df_mz_trips.loc[np.logical_and(df_mz_trips["wzweck1"] == 8, df_mz_trips["f51700_weg"] == 4), "purpose"] = "outdoor"
df_mz_trips.loc[np.logical_and(df_mz_trips["wzweck1"] == 8, df_mz_trips["f51700_weg"] == 6), "purpose"] = "sport" df_mz_trips.loc[np.logical_and(df_mz_trips["wzweck1"] == 8, df_mz_trips["f51700_weg"] == 5), "purpose"] = "outdoor"
df_mz_trips.loc[np.logical_and(df_mz_trips["wzweck1"] == 8, df_mz_trips["f51700_weg"] == 7), "purpose"] = "outdoor" df_mz_trips.loc[np.logical_and(df_mz_trips["wzweck1"] == 8, df_mz_trips["f51700_weg"] == 6), "purpose"] = "sport"
df_mz_trips.loc[np.logical_and(df_mz_trips["wzweck1"] == 8, df_mz_trips["f51700_weg"] == 8), "purpose"] = "sport" df_mz_trips.loc[np.logical_and(df_mz_trips["wzweck1"] == 8, df_mz_trips["f51700_weg"] == 7), "purpose"] = "outdoor"
df_mz_trips.loc[np.logical_and(df_mz_trips["wzweck1"] == 8, df_mz_trips["f51700_weg"] == 9), "purpose"] = "culture" df_mz_trips.loc[np.logical_and(df_mz_trips["wzweck1"] == 8, df_mz_trips["f51700_weg"] == 8), "purpose"] = "sport"
df_mz_trips.loc[np.logical_and(df_mz_trips["wzweck1"] == 8, df_mz_trips["f51700_weg"] == 10), "purpose"] = "volunteer" df_mz_trips.loc[np.logical_and(df_mz_trips["wzweck1"] == 8, df_mz_trips["f51700_weg"] == 9), "purpose"] = "culture"
df_mz_trips.loc[np.logical_and(df_mz_trips["wzweck1"] == 8, df_mz_trips["f51700_weg"] == 11), "purpose"] = "volunteer" df_mz_trips.loc[np.logical_and(df_mz_trips["wzweck1"] == 8, df_mz_trips["f51700_weg"] == 10), "purpose"] = "volunteer"
df_mz_trips.loc[np.logical_and(df_mz_trips["wzweck1"] == 8, df_mz_trips["f51700_weg"] == 12), "purpose"] = "culture" df_mz_trips.loc[np.logical_and(df_mz_trips["wzweck1"] == 8, df_mz_trips["f51700_weg"] == 11), "purpose"] = "volunteer"
df_mz_trips.loc[np.logical_and(df_mz_trips["wzweck1"] == 8, df_mz_trips["f51700_weg"] == 13), "purpose"] = "religion" df_mz_trips.loc[np.logical_and(df_mz_trips["wzweck1"] == 8, df_mz_trips["f51700_weg"] == 12), "purpose"] = "culture"
df_mz_trips.loc[np.logical_and(df_mz_trips["wzweck1"] == 8, df_mz_trips["f51700_weg"] == 14), "purpose"] = "visits" df_mz_trips.loc[np.logical_and(df_mz_trips["wzweck1"] == 8, df_mz_trips[