Commit d7d567b1 authored by kaghog's avatar kaghog
Browse files

draw from fixed distance distribution

parent f161127b
......@@ -50,7 +50,7 @@ def execute(context):
df_trips.loc[df_trips["trip_id"] == 1, "preceding_purpose"] = "home"
# Filtering for only work and education
primary_activities = ["work", "education"]
primary_activities = ["home", "work", "education"]
df_trips = df_trips[(df_trips["preceding_purpose"].isin(primary_activities)
& df_trips["following_purpose"].isin(primary_activities))]
......
import geopandas as gpd
import numpy as np
import pandas as pd
import shapely.geometry as geo
from sklearn.neighbors import KDTree
from synthesis.population.spatial.primary.weekend.components import CustomDistanceSampler, CustomDiscretizationSolver
from synthesis.population.spatial.primary.weekend.problems import find_assignment_problems
from synthesis.population.spatial.secondary.rda import AssignmentSolver, DiscretizationErrorObjective, \
GravityChainSolver
def configure(context):
context.stage("synthesis.population.trips")
context.stage("synthesis.population.sampled")
context.stage("synthesis.population.spatial.home.locations")
context.stage("synthesis.population.spatial.primary.weekend.distance_distributions")
context.stage("synthesis.population.destinations")
context.config("random_seed")
context.config("threads")
context.config("output_path")
def prepare_locations(context):
# Load persons and their primary locations
df_home = context.stage("synthesis.population.spatial.home.locations")
df_home = df_home.rename(columns={"geometry": "home"})
df_locations = context.stage("synthesis.population.sampled")[["person_id", "household_id"]]
df_locations = pd.merge(df_locations, df_home[["household_id", "home"]], how="left", on="household_id")
return df_locations[["person_id", "home"]].sort_values(by="person_id")
def prepare_destinations(context):
df_destinations = context.stage("synthesis.population.destinations")
M = np.max(df_destinations["destination_id"].values.tolist()) + 1
data = {}
identifiers = df_destinations["destination_id"].values
locations = np.vstack(df_destinations["geometry"].apply(lambda x: np.array([x.x, x.y])).values)
for purpose in ("work", "education"):
f = df_destinations["offers_%s" % purpose].values
data[purpose] = dict(
identifiers=identifiers[f],
locations=locations[f]
)
print(purpose, len(identifiers[f]))
return data
def resample_cdf(cdf, factor):
if factor >= 0.0:
cdf = cdf * (1.0 + factor * np.arange(1, len(cdf) + 1) / len(cdf))
else:
cdf = cdf * (1.0 + abs(factor) - abs(factor) * np.arange(1, len(cdf) + 1) / len(cdf))
cdf /= cdf[-1]
return cdf
def resample_distributions(distributions, factors):
for mode, mode_distributions in distributions.items():
for distribution in mode_distributions["distributions"]:
distribution["cdf"] = resample_cdf(distribution["cdf"], factors[mode])
def impute_work_locations(df_trips, destinations, df_primary, distributions):
work_coordinates = destinations["work"]
tree = KDTree(work_coordinates)
home_coordinates = df_primary["home"].values
distance_cdf = distributions["cdf"]
indices, distances = tree.query_radius(home_coordinates, r = )
def execute(context):
# Load trips and primary locations
df_trips = context.stage("synthesis.population.trips").sort_values(by=["person_id", "trip_index"])
df_trips["travel_time"] = df_trips["arrival_time"] - df_trips["departure_time"]
df_primary = prepare_locations(context)
# Prepare data
distance_distributions = context.stage("synthesis.population.spatial.primary.weekend.distance_distributions")
destinations = prepare_destinations(context)
# Resampling for calibration
resample_distributions(distance_distributions, dict(
car=0.8, car_passenger=1.0, pt=1.0, bike=0.0, walk=0.0
))
# Segment into subsamples
processes = context.config("threads")
unique_person_ids = df_trips["person_id"].unique()
number_of_persons = len(unique_person_ids)
unique_person_ids = np.array_split(unique_person_ids, processes)
random = np.random.RandomState(context.config("random_seed"))
random_seeds = random.randint(10000, size=processes)
# Create batch problems for parallelization
batches = []
for index in range(processes):
batches.append((
df_trips[df_trips["person_id"].isin(unique_person_ids[index])],
df_primary[df_primary["person_id"].isin(unique_person_ids[index])],
random_seeds[index]
))
# Run algorithm in parallel
with context.progress(label="Assigning locations to persons", total=number_of_persons):
with context.parallel(processes=processes, data=dict(
distance_distributions=distance_distributions,
destinations=destinations
)) as parallel:
df_locations, df_convergence = [], []
for df_locations_item, df_convergence_item in parallel.imap_unordered(process, batches):
df_locations.append(df_locations_item)
df_convergence.append(df_convergence_item)
df_locations = pd.concat(df_locations).sort_values(by=["person_id", "trip_index"])
df_convergence = pd.concat(df_convergence)
print("Success rate:", df_convergence["valid"].mean())
return df_locations, df_convergence
def process(context, arguments):
df_trips, df_primary, random_seed = arguments
# Set up RNG
random = np.random.RandomState(context.config("random_seed"))
# Set up distance sampler
distance_distributions = context.data("distance_distributions")
distance_sampler = CustomDistanceSampler(
maximum_iterations=1000,
random=random,
distributions=distance_distributions)
# Set up relaxation solver; currently, we do not consider tail problems.
relaxation_solver = GravityChainSolver(
random=random, eps=10.0, lateral_deviation=10.0, alpha=0.1
)
# Set up discretization solver
destinations = context.data("destinations")
discretization_solver = CustomDiscretizationSolver(destinations)
# Set up assignment solver
thresholds = dict(
car=200.0, car_passenger=200.0, pt=200.0,
bike=100.0, walk=100.0
)
assignment_objective = DiscretizationErrorObjective(thresholds=thresholds)
assignment_solver = AssignmentSolver(
distance_sampler=distance_sampler,
relaxation_solver=relaxation_solver,
discretization_solver=discretization_solver,
objective=assignment_objective,
maximum_iterations=20
)
df_locations = []
df_convergence = []
last_person_id = None
for problem in find_assignment_problems(df_trips, df_primary):
result = assignment_solver.solve(problem)
starting_trip_index = problem["trip_index"]
for index, (identifier, location) in enumerate(
zip(result["discretization"]["identifiers"], result["discretization"]["locations"])):
df_locations.append((
problem["person_id"], starting_trip_index + index, identifier, geo.Point(location)
))
df_convergence.append((
result["valid"], problem["size"]
))
if problem["person_id"] != last_person_id:
last_person_id = problem["person_id"]
context.progress.update()
df_locations = pd.DataFrame.from_records(df_locations,
columns=["person_id", "trip_index", "destination_id", "geometry"])
df_locations = gpd.GeoDataFrame(df_locations, crs=dict(init="epsg:2154"))
df_convergence = pd.DataFrame.from_records(df_convergence, columns=["valid", "size"])
return df_locations, df_convergence
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment