Commit 6711b639 authored by kaghog's avatar kaghog
Browse files

fix bug few new facilities added for detailed activity scenario

parent 655e948e
import pandas as pd
import numpy as np
import data.spatial.utils as spatial_utils
import geopandas as gpd
def configure(context):
context.stage("synthesis.population.destinations")
context.stage("synthesis.population.spatial.home.locations")
context.stage("synthesis.population.destinations_schedule")
context.config("use_detailed_activities")
def execute(context):
det_activities = context.config("use_detailed_activities")
df = pd.DataFrame(context.stage("synthesis.population.destinations"), copy=True)
if not det_activities:
return df
# with detailed activities, add additional facilities for such activities like loop trips
df_destinations = context.stage("synthesis.population.destinations_schedule")
M = np.max(df_destinations["destination_id"].values.tolist()) + 1
df_home = context.stage("synthesis.population.spatial.home.locations").copy()[
["household_id", "geometry"]].rename({"household_id": "destination_id"}, axis=1)
df_home.loc[:, "destination_id"] = np.array(range(M, M + len(df_home), 1))
df_home.loc[:, "offers_visits"] = True
df_home.loc[:, "offers_work"] = False
df_home.loc[:, "offers_education"] = False
df_home.loc[:, "offers_leisure"] = False
df_home.loc[:, "offers_grocery"] = False
df_home.loc[:, "offers_other(S)"] = False
df_home.loc[:, "offers_culture"] = False
df_home.loc[:, "education_type"] = False
df_home.loc[:, "offers_religion"] = False
df_home.loc[:, "offers_gastronomy"] = False
df_home.loc[:, "offers_sport"] = False
df_home.loc[:, "offers_other(L)"] = False
df_home.loc[:, "offers_other"] = False
df_home.loc[:, "offers_volunteer"] = False
df_home.loc[:, "offers_outdoor"] = False
df_home.loc[:, "destination_x"] = df_home["geometry"].apply(lambda x: x.x).values
df_home.loc[:, "destination_y"] = df_home["geometry"].apply(lambda x: x.y).values
df_home.loc[:, "number_employees"] = 1
df_home.loc[:, "open_0-3"] = 1
df_home.loc[:, "open_3-6"] = 1
df_home.loc[:, "open_6-9"] = 1
df_home.loc[:, "open_9-12"] = 1
df_home.loc[:, "open_12-15"] = 1
df_home.loc[:, "open_15-18"] = 1
df_home.loc[:, "open_18-21"] = 1
df_home.loc[:, "open_21-24"] = 1
df_home = pd.DataFrame(df_home)
df_destinations = pd.concat([df_destinations, df_home])
return df_destinations
......@@ -10,8 +10,6 @@ import matplotlib.pyplot as plt
def configure(context):
context.stage("synthesis.population.trips")
context.stage("synthesis.population.sampled")
context.stage("synthesis.population.spatial.home.locations")
context.stage("synthesis.population.spatial.primary.weekend.pry_distance_distributions")
context.stage("synthesis.population.destinations")
......
......@@ -11,8 +11,7 @@ import matplotlib.pyplot as plt
def configure(context):
context.stage("synthesis.population.trips")
context.stage("synthesis.population.sampled")
# context.stage("synthesis.population.sampled")
context.stage("synthesis.population.spatial.home.locations")
context.stage("synthesis.population.spatial.primary.weekend.pry_distance_distributions")
context.stage("synthesis.population.destinations")
......@@ -29,7 +28,7 @@ def prepare_home_locations(context):
df_home = df_home.rename(columns={"geometry": "home"})
df_locations = context.stage("synthesis.population.sampled")[["person_id", "household_id"]]
df_locations = context.stage("synthesis.population.enriched")[["person_id", "household_id"]]
df_locations = pd.merge(df_locations, df_home[["household_id", "home"]], how="left", on="household_id")
return df_locations[["person_id", "home"]].sort_values(by="person_id")
......@@ -56,43 +55,43 @@ def prepare_destinations(context):
return data, df_destinations
def prepare_radius_from_cdf(cdf, midpoint_bins, random_values):
#random_values are random values generated matching the length of the synthetic population which will be
# random_values are random values generated matching the length of the synthetic population which will be
# used to sample radius randomly to be used for sampling distances for each person
#here we are sampling locations for each person not for each trip
# here we are sampling locations for each person not for each trip
value_bins = np.searchsorted(cdf, random_values)
radius_from_cdf = midpoint_bins[value_bins]
return radius_from_cdf
def find_locations(home_coordinates, destination_coordinates, radius):
tree = KDTree(destination_coordinates)
indices, distances = tree.query_radius(home_coordinates, r = radius, return_distance = True, sort_results=True)
indices, distances = tree.query_radius(home_coordinates, r=radius, return_distance=True, sort_results=True)
#when no facility is found
# when no facility is found
for i in range(len(indices)):
l = indices[i]
if len(l) == 0:
dist, ind = tree.query(np.array(home_coordinates[i]).reshape(1,-1), 2, return_distance = True, sort_results=True)
dist, ind = tree.query(np.array(home_coordinates[i]).reshape(1, -1), 2, return_distance=True,
sort_results=True)
fac = ind[0][1]
indices[i] = [fac]
distances[i] = [dist[0][1]]
return indices, distances
def impute_nearest_work_locations(context):
# Prepare work persons
df_persons = context.stage("synthesis.population.enriched")
df_trips = context.stage("synthesis.population.trips")#.sort_values(by=["person_id", "trip_index"])
df_trips = context.stage("synthesis.population.trips") # .sort_values(by=["person_id", "trip_index"])
df_trips_work = df_trips[df_trips["following_purpose"] == "work"].copy()
#df_syn_persons_work = df_persons.copy()
# df_syn_persons_work = df_persons.copy()
df_syn_persons_work = df_persons[df_persons["person_id"].isin(df_trips_work["person_id"].unique())]
#prepare home coordinates
# prepare home coordinates
home_coordinates = np.vstack([df_syn_persons_work["home_x"], df_syn_persons_work["home_y"]]).T
# Prepare work destinations
......@@ -119,7 +118,7 @@ def impute_nearest_work_locations(context):
(df_syn_persons_work["home_y"] - df_syn_persons_work["work_y"]) ** 2
)
#plots to check sampled distance distribution
# plots to check sampled distance distribution
# plt.hist(distances, bins = 200)
# plt.savefig("%s/my_raw_sampled_distances.png" % context.cache_path)
#
......@@ -135,10 +134,10 @@ def impute_nearest_work_locations(context):
# plt.savefig("%s/my_persons_imputed_distances_25km.png" % context.cache_path)
df_syn_persons_work = df_syn_persons_work[["person_id",
"work_x", "work_y",
"destination_id"]].rename({"work_x": "x",
"work_y": "y"},
axis=1)
"work_x", "work_y",
"destination_id"]].rename({"work_x": "x",
"work_y": "y"},
axis=1)
df_syn_persons_work = spatial_utils.to_gpd(context, df_syn_persons_work, coord_type="work")
......@@ -148,18 +147,18 @@ def impute_nearest_work_locations(context):
def impute_work_locations_method1(context, distributions, destinations, df_syn_persons_work, df_destinations):
# Method1 takes the last distance sampled from the distance distribution based on the radius
#prepare the distances used for sampling based on the cdf (this is the radius variable)
# prepare the distances used for sampling based on the cdf (this is the radius variable)
cdf = distributions["work"]["cdf"]
midpoint_bins = distributions["work"]["midpoint_bins"]
random_values = np.random.rand(len(df_syn_persons_work))
value_bins = np.searchsorted(cdf, random_values)
radius = midpoint_bins[value_bins]
#plt.hist(radius, bins=200)
#plt.savefig("%s/my_dist_hist_selected.png" % context.cache_path)
# plt.hist(radius, bins=200)
# plt.savefig("%s/my_dist_hist_selected.png" % context.cache_path)
#prepare the home and destination coordinates
# prepare the home and destination coordinates
destination_coordinates = destinations["work"]["locations"]
home_coordinates = np.vstack([df_syn_persons_work["home_x"], df_syn_persons_work["home_y"]]).T
home_coordinates = np.vstack([df_syn_persons_work["home_x"], df_syn_persons_work["home_y"]]).T
tree = KDTree(destination_coordinates)
indices, distances = tree.query_radius(home_coordinates, r=radius, return_distance=True, sort_results=True)
......@@ -174,7 +173,7 @@ def impute_work_locations_method1(context, distributions, destinations, df_syn_p
indices[i] = [fac]
distances[i] = [dist[0][1]]
#indices, distances = find_locations(home_coordinates, destination_coordinates, radius)
# indices, distances = find_locations(home_coordinates, destination_coordinates, radius)
# Select the distances
# Method 1: Select the last index and distance for each person. ToDo note:Make the threshold distance zero
......@@ -184,8 +183,8 @@ def impute_work_locations_method1(context, distributions, destinations, df_syn_p
discrete_d = np.array(discrete_d)
#make a plot to look at the distance distribution with this method for smaller distances
plt.hist(discrete_d[discrete_d <25000], bins=200)
# make a plot to look at the distance distribution with this method for smaller distances
plt.hist(discrete_d[discrete_d < 25000], bins=200)
#
plt.savefig("%s/my_dist_hist_outputQuery_Method1.png" % context.cache_path)
......@@ -199,52 +198,53 @@ def impute_work_locations_method1(context, distributions, destinations, df_syn_p
df_work_persons["distance"] = np.sqrt(
(df_work_persons["home_x"] - df_work_persons["work_x"]) ** 2 +
(df_work_persons["home_y"] - df_work_persons["work_y"] ) ** 2
(df_work_persons["home_y"] - df_work_persons["work_y"]) ** 2
)
#plt.hist(df_work_persons["distance"], bins = 200)
#plt.savefig("%s/my_dist_hist_persons_imputedQuery.png" % context.cache_path)
# plt.hist(df_work_persons["distance"], bins = 200)
# plt.savefig("%s/my_dist_hist_persons_imputedQuery.png" % context.cache_path)
#df_work_persons.loc[:,"distance_km"] = 0.001 * df_work_persons["distance"]
#df_test = df_work_persons[df_work_persons["distance_km"] < 17].copy()
# df_work_persons.loc[:,"distance_km"] = 0.001 * df_work_persons["distance"]
# df_test = df_work_persons[df_work_persons["distance_km"] < 17].copy()
#plt.hist(df_test["distance_km"], bins = 200)
# plt.hist(df_test["distance_km"], bins = 200)
#plt.savefig("%s/my_dist_hist_persons_imputed_25km.png" % context.cache_path)
# plt.savefig("%s/my_dist_hist_persons_imputed_25km.png" % context.cache_path)
df_work_persons = df_work_persons[["person_id",
"work_x", "work_y",
"destination_id"]].rename({"work_x": "x",
"work_y": "y"},
axis=1)
"work_x", "work_y",
"destination_id"]].rename({"work_x": "x",
"work_y": "y"},
axis=1)
df_work_persons = spatial_utils.to_gpd(context, df_work_persons, coord_type="work")
return df_work_persons[["person_id", "destination_id", "geometry"]]
def impute_work_locations(context, distributions, destinations, df_syn_persons_work, df_destinations):
# This method selects based on a threshold buffer for the radius and weighted based
# on number of employees in a location
#prepare the distances used for sampling based on the cdf (this is the radius variable)
# prepare the distances used for sampling based on the cdf (this is the radius variable)
cdf = distributions["work"]["cdf"]
midpoint_bins = distributions["work"]["midpoint_bins"]
random_values = np.random.rand(len(df_syn_persons_work))
value_bins = np.searchsorted(cdf, random_values)
radius = midpoint_bins[value_bins]
#plt.hist(radius, bins=200)
#plt.savefig("%s/my_dist_hist_selected.png" % context.cache_path)
#radius = prepare_radius_from_cdf(cdf, midpoint_bins, random_values)
# plt.hist(radius, bins=200)
# plt.savefig("%s/my_dist_hist_selected.png" % context.cache_path)
# radius = prepare_radius_from_cdf(cdf, midpoint_bins, random_values)
#define a threshold distance to be added to the target distance that serves as maximum distance for a person
# define a threshold distance to be added to the target distance that serves as maximum distance for a person
# the target distance in this case is the midpoint distance of the histogram plus a maximum threshold distance
#this is because we want to select distances within a boundary of this threshold
threshold = distributions["work"]["threshold_buffer"] #in meters
# this is because we want to select distances within a boundary of this threshold
threshold = distributions["work"]["threshold_buffer"] # in meters
radius = radius + threshold
#prepare the home and destination coordinates
# prepare the home and destination coordinates
destination_coordinates = destinations["work"]["locations"]
home_coordinates = np.vstack([df_syn_persons_work["home_x"], df_syn_persons_work["home_y"]]).T
home_coordinates = np.vstack([df_syn_persons_work["home_x"], df_syn_persons_work["home_y"]]).T
tree = KDTree(destination_coordinates)
indices, distances = tree.query_radius(home_coordinates, r=radius, return_distance=True, sort_results=True)
......@@ -259,10 +259,9 @@ def impute_work_locations(context, distributions, destinations, df_syn_persons_w
indices[i] = [fac]
distances[i] = [dist[0][1]]
# indices, distances = find_locations(home_coordinates, destination_coordinates, radius)
#indices, distances = find_locations(home_coordinates, destination_coordinates, radius)
#Select the distances
# Select the distances
discrete_indices = []
discrete_distances = []
......@@ -271,7 +270,7 @@ def impute_work_locations(context, distributions, destinations, df_syn_persons_w
# In order not to possibly sample distances that are smaller than target distance, is to have a minimum
# threshold below the target distance and consider locations between the threshold and target (donut shape)
for ind, dist in zip(indices,distances):
for ind, dist in zip(indices, distances):
# limit to distances (indices) within for random selection so that we can use np.choose which works with 32 items
if len(ind) > 2:
......@@ -287,13 +286,13 @@ def impute_work_locations(context, distributions, destinations, df_syn_persons_w
# print("filter3: ", (dist >= minimum_selection_bound) & (dist <= maximum_selection_bound))
ind = ind[(dist >= minimum_selection_bound) & (dist <= maximum_selection_bound)]
#Select a location based on number of employee as weights
# Select a location based on number of employee as weights
weights = destinations["work"]["no_employees"][ind]
weights = weights.astype(float) # have to specify that it is a float or it raises an error...need to check why
weights = weights.astype(float) # have to specify that it is a float or it raises an error...need to check why
weights /= np.sum(weights)
#query_size = len(ind)
#selector = np.random.choice(query_size, p=weights)
#index = np.choose(selector, ind.T)
# query_size = len(ind)
# selector = np.random.choice(query_size, p=weights)
# index = np.choose(selector, ind.T)
index = np.random.choice(ind, p=weights)
discrete_indices.append(destinations["work"]["identifiers"][index])
......@@ -307,28 +306,26 @@ def impute_work_locations(context, distributions, destinations, df_syn_persons_w
df_work_persons["distance"] = np.sqrt(
(df_work_persons["home_x"] - df_work_persons["work_x"]) ** 2 +
(df_work_persons["home_y"] - df_work_persons["work_y"] ) ** 2
(df_work_persons["home_y"] - df_work_persons["work_y"]) ** 2
)
plt.hist(df_work_persons["distance"], bins = 200)
plt.hist(df_work_persons["distance"], bins=200)
plt.savefig("%s/my_dist_hist_persons_imputedQuery.png" % context.cache_path)
df_work_persons.loc[:,"distance_km"] = 0.001 * df_work_persons["distance"]
df_work_persons.loc[:, "distance_km"] = 0.001 * df_work_persons["distance"]
#df_test = df_work_persons[df_work_persons["distance_km"] < 17].copy()
# df_test = df_work_persons[df_work_persons["distance_km"] < 17].copy()
#plt.hist(df_work_persons["distance"], bins=200)
#plt.hist(df_test["distance_km"], bins = 200)
# plt.hist(df_work_persons["distance"], bins=200)
# plt.hist(df_test["distance_km"], bins = 200)
#plt.savefig("%s/my_dist_hist_persons_imputed_25km.png" % context.cache_path)
# plt.savefig("%s/my_dist_hist_persons_imputed_25km.png" % context.cache_path)
df_work_persons = df_work_persons[["person_id",
"work_x", "work_y",
"destination_id"]].rename({"work_x": "x",
"work_y": "y"},
axis=1)
"work_x", "work_y",
"destination_id"]].rename({"work_x": "x",
"work_y": "y"},
axis=1)
df_work_persons = spatial_utils.to_gpd(context, df_work_persons, coord_type="work")
......@@ -336,21 +333,18 @@ def impute_work_locations(context, distributions, destinations, df_syn_persons_w
def execute(context):
# Prepare data
distance_distributions = context.stage("synthesis.population.spatial.primary.weekend.pry_distance_distributions")
df_home = prepare_home_locations(context)
destinations, df_destinations = prepare_destinations(context)
#prepare synthetic persons for work and education
# prepare synthetic persons for work and education
# Load person information
df_persons = context.stage("synthesis.population.enriched")
df_persons = pd.merge(df_persons, df_home, on="person_id")
#Todo: the above df_persons merge is proprably a duplicate, need to sort it out when my brain is sharper than now
# Todo: the above df_persons merge is proprably a duplicate, need to sort it out when my brain is sharper than now
# Something to do with the df_homes using the syn.pop.enriched
# Load trips and get persons that do work trips #Todo maybe i could sort this out using mz.commute info?
......@@ -359,15 +353,16 @@ def execute(context):
df_trips_work = df_trips[df_trips["following_purpose"] == "work"].copy()
df_syn_persons_work = df_persons[df_persons["person_id"].isin(df_trips_work["person_id"].unique())]
#create work locations for work persons
# create work locations for work persons
df_locations = impute_work_locations(context, distance_distributions, destinations, df_syn_persons_work, df_destinations)
#df_locations = impute_nearest_work_locations(context)
df_locations = impute_work_locations(context, distance_distributions, destinations, df_syn_persons_work,
df_destinations)
# df_locations = impute_nearest_work_locations(context)
return df_locations
#todo: some things not considered and assumptions made:
# todo: some things not considered and assumptions made:
# 1. work distances are not by zone (any need to differentiate same zone work locations and different zones work locs?)
# 2. international vs national trips
# 3. would there be influence of mode usage on the work distances like in the secondary activities\
# I should test if this works for weekdays and compare with what is given from the zone. How to validate the flows?
\ No newline at end of file
# I should test if this works for weekdays and compare with what is given from the zone. How to validate the flows?
......@@ -42,6 +42,7 @@ def execute(context):
#distance distribution for weekend scenario
is_weekend_scenario = context.config("weekend_scenario")
if is_weekend_scenario:
df_persons = df_persons[df_persons["weekend"]]
print("INFO: creating distance distribution for weekend...")
......
......@@ -13,11 +13,13 @@ def configure(context):
context.stage("synthesis.population.trips")
context.stage("synthesis.population.sampled")
context.stage("synthesis.population.destinations")
context.stage("synthesis.population.spatial.home.locations")
context.stage("synthesis.population.spatial.primary.locations")
context.stage("synthesis.population.spatial.secondary.distance_distributions")
context.stage("synthesis.population.destinations_schedule")
context.stage("synthesis.population.destinations_detailed")
context.config("random_seed")
context.config("threads")
......@@ -34,6 +36,7 @@ def prepare_locations(context):
df_work = df_work.rename(columns={"geometry": "work"})
df_education = df_education.rename(columns={"geometry": "education"})
df_locations = context.stage("synthesis.population.sampled")[["person_id", "household_id"]]
df_locations = pd.merge(df_locations, df_home[["household_id", "home"]], how="left", on="household_id")
df_locations = pd.merge(df_locations, df_work[["person_id", "work"]], how="left", on="person_id")
......@@ -44,42 +47,11 @@ def prepare_locations(context):
def prepare_destinations(context):
df_destinations = context.stage("synthesis.population.destinations_schedule")
M = np.max(df_destinations["destination_id"].values.tolist()) + 1
det_activities = context.config("use_detailed_activities")
data = {}
if det_activities:# == "true":
df_home = context.stage("synthesis.population.spatial.home.locations").copy()[["household_id", "geometry"]].rename({"household_id": "destination_id"}, axis = 1)
df_home.loc[:, "destination_id"] = np.array(range(M, M + len(df_home), 1))
df_home.loc[:, "offers_visits"] = True
df_home.loc[:, "offers_work"] = False
df_home.loc[:, "offers_education"] = False
df_home.loc[:, "offers_leisure"] = False
df_home.loc[:, "offers_grocery"] = False
df_home.loc[:, "offers_other(S)"] = False
df_home.loc[:, "offers_culture"] = False
df_home.loc[:, "education_type"] = False
df_home.loc[:, "offers_religion"] = False
df_home.loc[:, "offers_gastronomy"] = False
df_home.loc[:, "offers_sport"] = False
df_home.loc[:, "offers_other(L)"] = False
df_home.loc[:, "offers_other"] = False
df_home.loc[:, "offers_volunteer"] = False
df_home.loc[:, "offers_outdoor"] = False
df_home.loc[:, "destination_x"] = df_home["geometry"].apply(lambda x: x.x).values
df_home.loc[:, "destination_y"] = df_home["geometry"].apply(lambda x: x.y).values
df_home.loc[:, "number_employees"] = 1
df_home.loc[:, "open_0-3"] = 1
df_home.loc[:, "open_3-6"] = 1
df_home.loc[:, "open_6-9"] = 1
df_home.loc[:, "open_9-12"] = 1
df_home.loc[:, "open_12-15"] = 1
df_home.loc[:, "open_15-18"] = 1
df_home.loc[:, "open_18-21"] = 1
df_home.loc[:, "open_21-24"] = 1
df_home = pd.DataFrame(df_home)
df_destinations = pd.concat([df_destinations, df_home])
if det_activities:
df_destinations = context.stage("synthesis.population.destinations_detailed")
identifiers = df_destinations["destination_id"].values
locations = np.vstack(df_destinations["geometry"].apply(lambda x: np.array([x.x, x.y])).values)
nb_employees = df_destinations["number_employees"].values
......@@ -156,6 +128,7 @@ def execute(context):
df_trips["travel_time"] = df_trips["arrival_time"] - df_trips["departure_time"]
df_primary = prepare_locations(context)
# Prepare data
distance_distributions = context.stage("synthesis.population.spatial.secondary.distance_distributions")
destinations = prepare_destinations(context)
......@@ -199,9 +172,22 @@ def execute(context):
df_locations = pd.concat(df_locations).sort_values(by=["person_id", "trip_index"])
df_convergence = pd.concat(df_convergence)
#print([type(x) for x in df_locations["destination_id"].values[:5]])
df_locations["destination_id"] = [x[0] if type(x) == np.ndarray else x for x in df_locations["destination_id"].values]
print("Success rate:", df_convergence["valid"].mean())
#validation
df_destinations = context.stage("synthesis.population.destinations_detailed")
# print(df_destinations["destination_id"].describe())
#
# L = np.unique(df_locations["destination_id"].values)
# M = np.unique(df_destinations["destination_id"].values)
#
# print([l if l not in M else 0 for l in L])
assert all(
items in df_destinations["destination_id"].values for items in df_locations["destination_id"].values)
return df_locations, df_convergence
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment