Commit d1062112 authored by kaghog's avatar kaghog
Browse files

create a donut boundary of the indexes where distance is sampled from

parent 45ed99a7
......@@ -3,12 +3,6 @@ import numpy as np
import pandas as pd
import shapely.geometry as geo
from synthesis.population.spatial.primary.weekend.components import CustomDistanceSampler, CustomDiscretizationSolver
from synthesis.population.spatial.primary.weekend.problems import find_assignment_problems
from synthesis.population.spatial.secondary.rda import AssignmentSolver, DiscretizationErrorObjective, \
GravityChainSolver
def configure(context):
context.stage("synthesis.population.spatial.primary.weekend.work_locations")
context.stage("synthesis.population.spatial.primary.weekend.education_locations")
......
......@@ -5,11 +5,6 @@ import shapely.geometry as geo
from sklearn.neighbors import KDTree
import data.spatial.utils as spatial_utils
from synthesis.population.spatial.primary.weekend.components import CustomDistanceSampler, CustomDiscretizationSolver
from synthesis.population.spatial.primary.weekend.problems import find_assignment_problems
from synthesis.population.spatial.secondary.rda import AssignmentSolver, DiscretizationErrorObjective, \
GravityChainSolver
def configure(context):
context.stage("synthesis.population.trips")
......@@ -91,9 +86,14 @@ def impute_work_locations(context, distributions, destinations, df_syn_persons_w
#prepare the distances used for sampling based on the cdf (this is the radius variable)
cdf = distributions["work"]["cdf"]
midpoint_bins = distributions["work"]["midpoint_bins"]
random_values = len(df_syn_persons_work)
random_values = np.random.rand(len(df_syn_persons_work))
radius = prepare_radius_from_cdf(cdf, midpoint_bins, random_values)
#define a threshold distance to be added to the target distance that serves as maximum distance for a person
#this is because we want to select distances within a boundary of this threshold
threshold = 500 #in meters #Todo maybe get this value from the average midpoint of the distance bins
radius = radius + threshold
#prepare the home and destination coordinates
destination_coordinates = destinations["work"]["locations"]
home_coordinates = np.vstack([df_syn_persons_work["home_x"], df_syn_persons_work["home_y"]]).T
......@@ -101,26 +101,45 @@ def impute_work_locations(context, distributions, destinations, df_syn_persons_w
indices, distances = find_locations(home_coordinates, destination_coordinates, radius)
# Select the last index and distance for each person
discrete_indices = [l[-1] for l in indices]
discrete_distances = [d[-1] for d in distances]
#ToDo choose location from indices using weights based on number of employees identified in the locations
#use number of employees attributes in destinations
#Select the distances
discrete_indices = []
discrete_distances = []
for ind in indices:
weights = destinations["work"]["no_employees"][ind][0]
# Method 1: Select the last index and distance for each person. ToDo note:Make the threshold distance zero
#discrete_indices = [l[-1] for l in indices]
#discrete_distances = [d[-1] for d in distances]
# Method 2: select distances based on number of employees identified in the locations
# In order not to possibly sample distances that are smaller than target distance, is to have a minimum
# threshold below the target distance and consider locations between the threshold and target (donut shape)
# the target distance in this case is the midpoint distance of the histogram plus a maximum threshold distance
for ind, dist in zip(indices,distances):
# limit to distances (indices) within for random selection so that we can use np.choose which works with 32 items
farthest_dist = dist[-1]
minimum_selection_bound = max((farthest_dist - 2*threshold), dist[0])
maximum_selection_bound = farthest_dist
ind = ind[(dist >= minimum_selection_bound) & (dist <= maximum_selection_bound)]
#Select a location based on number of employee as weights
weights = destinations["work"]["no_employees"][ind]
weights = weights.astype(float) # have to specify that it is a float or it raises an error...need to check why
weights /= np.sum(weights)
selector = np.random.choice(ind.len, p=weights)
index = np.choose(selector, ind.T)
#query_size = len(ind)
#selector = np.random.choice(query_size, p=weights)
#index = np.choose(selector, ind.T)
index = np.random.choice(ind, p=weights)
discrete_indices.append(destinations["work"]["identifiers"][index])
discrete_distances.append(destinations["work"]["locations"][index])
print("INFO: imputing work locations...")
df_candidates = df_destinations[df_destinations["offers_work"]].copy()
df_work_persons = df_syn_persons_work.copy()
df_work_persons["work_x"] = df_candidates.iloc[discrete_indices]["x"].values
df_work_persons["work_y"] = df_candidates.iloc[discrete_indices]["y"].values
df_work_persons["work_x"] = df_candidates.iloc[discrete_indices]["destination_x"].values
df_work_persons["work_y"] = df_candidates.iloc[discrete_indices]["destination_y"].values
df_work_persons["destination_id"] = df_candidates.iloc[discrete_indices]["destination_id"].values
df_work_persons = df_work_persons[["person_id",
......@@ -147,13 +166,14 @@ def execute(context):
# Load person information
df_persons = context.stage("synthesis.population.enriched")
df_persons = pd.merge(df_persons, df_home, on="person_id")
#Todo the above df_persons merge is proprably a duplicate, need to sort it out when my brain is sharper than now
#Todo: the above df_persons merge is proprably a duplicate, need to sort it out when my brain is sharper than now
# Something to do with the df_homes using the syn.pop.enriched
# Load trips and get persons that do work trips #Todo maybe i could sort this out using mz.commute info?
df_trips = context.stage("synthesis.population.trips").sort_values(by=["person_id", "trip_index"])
df_trips_work = df_trips[(df_trips["preceding_purpose"].isin(["home", "work"])
& df_trips["following_purpose"].isin(["home", "work"]))].copy()
# df_trips_work = df_trips[(df_trips["preceding_purpose"].isin(["home", "work"])
# & df_trips["following_purpose"].isin(["home", "work"]))].copy()
df_trips_work = df_trips[df_trips["following_purpose"] == "work"].copy()
df_syn_persons_work = df_persons[df_persons["person_id"].isin(df_trips_work["person_id"].unique())]
#create work locations for work persons
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment