Commit 85ef56cd authored by Aurore Sallard's avatar Aurore Sallard
Browse files

Propagating correctly number of employees

parent 75017f6e
Pipeline #104976 failed with stage
......@@ -48,7 +48,7 @@ def execute(context):
df_nuts = context.stage("data.spatial.nuts")
df_postal_codes = context.stage("data.spatial.postal_codes")
df_spatial = pd.DataFrame(df[["enterprise_id", "x", "y"]])
df_spatial = pd.DataFrame(df[["enterprise_id", "x", "y", "number_employees"]])
df_spatial = data.spatial.utils.to_gpd(context, df_spatial, "x", "y")
df_spatial = df_spatial.drop(["x", "y"], axis=1)
......
......@@ -11,9 +11,9 @@ def configure(context):
def execute(context):
det_activities = context.config("use_detailed_activities")
df = pd.DataFrame(context.stage("data.statent.statent")[["enterprise_id", "x", "y", "noga"]],
df = pd.DataFrame(context.stage("data.statent.statent")[["enterprise_id", "x", "y", "noga", "number_employees"]],
copy=True)
df.columns = ["destination_id", "destination_x", "destination_y", "noga"]
df.columns = ["destination_id", "destination_x", "destination_y", "noga", "number_employees"]
df.loc[:, "offers_work"] = True
df.loc[:, "offers_other"] = True
......@@ -89,6 +89,7 @@ def execute(context):
outdoor["offers_volunteer"] = False
outdoor["offers_outdoor"] = True
outdoor["offers_services"] = True
outdoor["nb_employees"] = 1
print(initial_crs)
outdoor = spatial_utils.to_gpd(context, outdoor, x="destination_x", y="destination_y", crs=initial_crs, coord_type="outdoor")
......@@ -102,8 +103,8 @@ def execute(context):
return df[["destination_id", "destination_x", "destination_y",
"offers_work", "offers_education", "offers_leisure", "offers_grocery", "offers_other(S)", "offers_culture", "education_type", "offers_religion", "offers_gastronomy", "offers_sport", "offers_other(L)", "offers_other", "offers_visits", "offers_volunteer",
"offers_outdoor", "offers_services", "geometry"]]
"offers_outdoor", "offers_services", "geometry", "number_employees"]]
else:
return df[["destination_id", "destination_x", "destination_y",
"offers_work", "offers_education", "offers_leisure", "education_type", "offers_shop", "offers_other", "geometry"]]
"offers_work", "offers_education", "offers_leisure", "education_type", "offers_shop", "offers_other", "geometry", "number_employees"]]
......@@ -41,12 +41,11 @@ class CustomDiscretizationSolver(rda.DiscretizationSolver):
for location, purpose in zip(locations, problem["purposes"]):
#index = self.indices[purpose].query(location.reshape(1, -1), return_distance = False)[0][0]
distances, indices = self.indices[purpose].query(location.reshape(1, -1), self.query_size, return_distance=True)
candidates_nboemployees = self.data[purpose]["number_employees"][indices]
candidates_nboemployees = self.data[purpose]["number_employees"][indices][0]
weights = candidates_nboemployees / np.sum(candidates_nboemployees)
selector = np.random.choice(self.query_size, p=weights)
index = np.choose(selector, indices[j].T)
index = np.choose(selector, indices.T)
discretized_identifiers.append(self.data[purpose]["identifiers"][index])
discretized_locations.append(self.data[purpose]["locations"][index])
......
......@@ -67,21 +67,25 @@ def prepare_destinations(context):
df_home.loc[:, "offers_outdoor"] = False
df_home.loc[:, "destination_x"] = df_home["geometry"].apply(lambda x: x.x).values
df_home.loc[:, "destination_y"] = df_home["geometry"].apply(lambda x: x.y).values
df_home.loc[:, "number_employees"] = 1
df_home = pd.DataFrame(df_home)
df_destinations = pd.concat([df_destinations, df_home])
identifiers = df_destinations["destination_id"].values
locations = np.vstack(df_destinations["geometry"].apply(lambda x: np.array([x.x, x.y])).values)
nb_employees = df_destinations["number_employees"].values
df_destinations["offers_outdoor"] = df_destinations['offers_outdoor'].fillna(False)
df_destinations["offers_services"] = df_destinations['offers_services'].fillna(False)
df_destinations["number_employees"] = df_destinations['number_employees'].fillna(1)
for purpose in ("grocery", "other(S)", "culture", "gastronomy", "religion", "sport", "other(L)", "other", "visits", "volunteer", "outdoor", "services"):
f = df_destinations["offers_%s" % purpose].values
print(list(set(f)))
#print(list(set(f)))
data[purpose] = dict(
identifiers=identifiers[f],
locations=locations[f]
locations=locations[f],
number_employees = nb_employees[f]
)
print(purpose, len(identifiers[f]))
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment