Commit 46eea1c7 authored by Grace Orowo Kagho's avatar Grace Orowo Kagho
Browse files

make naming flexible for output files and calibration value changes

parent 371d8e66
......@@ -10,9 +10,11 @@ run:
# - data.statpop.scaled
# - synthesis.population.matched
# - data.microcensus.households
# - data.microcensus.csv
# - population.destinations
# - synthesis.population.destinations
# - synthesis.population.spatial.secondary.locations
# - synthesis.population.spatial.primary.weekend.locations
# - matsim.facilities
# - matsim.population
# - matsim.households
......
......@@ -8,6 +8,6 @@ def execute(context):
df_trips = context.stage("data.microcensus.trips")
df_transit = context.stage("data.microcensus.transit")
df_persons.to_csv("%s/persons.csv" % context.cache_path, sep = ";", index = None)
df_trips.to_csv("%s/trips.csv" % context.cache_path, sep = ";", index = None)
df_transit.to_csv("%s/transit.csv" % context.cache_path, sep = ";", index = None)
df_persons.to_csv("%s/mz_persons.csv" % context.cache_path, sep = ";", index = None)
df_trips.to_csv("%s/mz_trips.csv" % context.cache_path, sep = ";", index = None)
df_transit.to_csv("%s/mz_transit.csv" % context.cache_path, sep = ";", index = None)
......@@ -21,6 +21,7 @@ def validate(context):
def execute(context):
output_path = context.config("output_path")
output_suffix = "_no_employeesAll"
# Prepare households
df_households = context.stage("synthesis.population.enriched").rename(
......@@ -67,7 +68,7 @@ def execute(context):
"mz_person_id"
]]
df_persons.to_csv("%s/persons.csv" % output_path, sep = ";", index = None)
df_persons.to_csv(f"%s/persons{output_suffix}.csv" % output_path, sep = ";", index = None)
# Prepare activities
df_activities = context.stage("synthesis.population.activities").rename(
......@@ -87,7 +88,7 @@ def execute(context):
#df_activities = df_activities.astype({"is_last": int})
df_activities.to_csv("%s/activities.csv" % output_path, sep = ";", index = None)
df_activities.to_csv(f"%s/activities{output_suffix}.csv" % output_path, sep = ";", index = None)
# Prepare trips
df_trips = context.stage("synthesis.population.trips").rename(
......@@ -112,7 +113,7 @@ def execute(context):
#"is_first", "is_last"
]]
df_trips.to_csv("%s/trips.csv" % output_path, sep = ";", index = None)
df_trips.to_csv(f"%s/trips{output_suffix}.csv" % output_path, sep = ";", index = None)
# Prepare spatial data sets
df_locations = context.stage("synthesis.population.spatial.locations")#[[
......@@ -168,7 +169,7 @@ def execute(context):
geom = df_spatial["geometry"].values
df_spatial["crowfly_distance"] = [geom[i].length for i in range(len(geom))]
df_spatial = df_spatial.drop(columns = ["geometry"])
df_spatial.to_csv("%s/trips_with_distance.csv" % output_path, sep = ";", index = None)
df_spatial.to_csv(f"%s/trips_with_distance{output_suffix}.csv" % output_path, sep = ";", index = None)
# Write meta information
information = dict(
......
import synthesis.population.spatial.secondary.rda as rda
import sklearn.neighbors
import numpy as np
class CustomDistanceSampler(rda.FeasibleDistanceSampler):
def __init__(self, random, distributions, maximum_iterations = 1000):
rda.FeasibleDistanceSampler.__init__(self, random = random, maximum_iterations = maximum_iterations)
self.random = random
self.distributions = distributions
def sample_distances(self, problem):
distances = np.zeros((problem["size"] + 1))
for index, (mode, travel_time) in enumerate(zip(problem["modes"], problem["travel_times"])):
mode_distribution = self.distributions[mode]
bound_index = np.count_nonzero(travel_time > mode_distribution["bounds"])
mode_distribution = mode_distribution["distributions"][bound_index]
distances[index] = mode_distribution["values"][
np.count_nonzero(self.random.random_sample() > mode_distribution["cdf"])
]
return distances
class CustomDiscretizationSolver(rda.DiscretizationSolver):
def __init__(self, data):
self.data = data
self.indices = {}
self.query_size = 5
for purpose, data in self.data.items():
print("Constructing spatial index for %s ..." % purpose)
self.indices[purpose] = sklearn.neighbors.KDTree(data["locations"])
def solve(self, problem, locations):
discretized_locations = []
discretized_identifiers = []
for location, purpose in zip(locations, problem["purposes"]):
index = self.indices[purpose].query(location.reshape(1, -1), return_distance = False)[0][0]
discretized_identifiers.append(self.data[purpose]["identifiers"][index])
discretized_locations.append(self.data[purpose]["locations"][index])
return dict(
valid = True, locations = np.vstack(discretized_locations), identifiers = discretized_identifiers
)
import numpy as np
FIELDS = ["person_id", "trip_index", "preceding_purpose", "following_purpose", "mode", "travel_time", "activity_duration", "arrival_time"]
FIXED_PURPOSES = ["home"]
def find_bare_assignment_problems(df):
problem = None
for row in df[FIELDS].itertuples(index=False):
person_id, trip_index, preceding_purpose, following_purpose, mode, travel_time, act_dur, arr_time = row
if not problem is None and person_id != problem["person_id"]:
# We switch person, but we're still tracking a problem. This is a tail!
yield problem
problem = None
if problem is None:
# Start a new problem
problem = dict(
person_id=person_id, trip_index=trip_index, purposes=[preceding_purpose],
modes=[], travel_times=[], activity_duration = [], activity_start_time = []
)
problem["purposes"].append(following_purpose)
problem["modes"].append(mode)
problem["travel_times"].append(travel_time)
problem["activity_duration"].append(act_dur)
problem["activity_start_time"].append(arr_time)
if problem["purposes"][-1] in FIXED_PURPOSES:
# The current chain (or initial tail) ends with a fixed activity.
yield problem
problem = None
LOCATION_FIELDS = ["person_id", "home"]
def find_assignment_problems(df, df_locations):
"""
Enriches assignment problems with:
- Locations of the fixed activities
- Size of the problem
- Reduces purposes to the variable ones
"""
location_iterator = df_locations[LOCATION_FIELDS].itertuples(index=False)
current_location = None
for problem in find_bare_assignment_problems(df):
origin_purpose = problem["purposes"][0]
destination_purpose = problem["purposes"][-1]
# Reduce purposes
if origin_purpose in FIXED_PURPOSES and destination_purpose in FIXED_PURPOSES:
problem["purposes"] = problem["purposes"][1:-1]
elif origin_purpose in FIXED_PURPOSES:
problem["purposes"] = problem["purposes"][1:]
elif destination_purpose in FIXED_PURPOSES:
problem["purposes"] = problem["purposes"][:-1]
else:
raise RuntimeError("The presented 'problem' is neither a chain nor a tail")
# Define size
problem["size"] = len(problem["purposes"])
if problem["size"] == 0:
continue # We can skip if there are no variable activities
# Advance location iterator until we arrive at the current problem's person
while current_location is None or current_location[0] != problem["person_id"]:
current_location = next(location_iterator)
# Define origin and destination locations if they have fixed purposes
problem["origin"] = None
problem["destination"] = None
if origin_purpose in FIXED_PURPOSES:
problem["origin"] = current_location[LOCATION_FIELDS.index(origin_purpose)] # Shapely POINT
problem["origin"] = np.array([[problem["origin"].x, problem["origin"].y]])
if destination_purpose in FIXED_PURPOSES:
problem["destination"] = current_location[LOCATION_FIELDS.index(destination_purpose)] # Shapely POINT
problem["destination"] = np.array([[problem["destination"].x, problem["destination"].y]])
yield problem
......@@ -119,11 +119,11 @@ def impute_work_locations(context, distributions, destinations, df_syn_persons_w
# limit to distances (indices) within for random selection so that we can use np.choose which works with 32 items
farthest_dist = dist[-1]
min_threshold = 1000 # I am testing this arbitrarily - need to calibrate
min_threshold = 3000 # I am testing this arbitrarily - need to calibrate properly
min_threshold_band = farthest_dist - min_threshold
minimum_selection_bound = max(min_threshold_band, dist[0])
maximum_selection_bound = farthest_dist
ind = ind[(dist >= minimum_selection_bound) & (dist <= maximum_selection_bound)]
#ind = ind[(dist >= minimum_selection_bound) & (dist <= maximum_selection_bound)]
#Select a location based on number of employee as weights
weights = destinations["work"]["no_employees"][ind]
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment