diff --git a/synthesis/population/matched.py b/synthesis/population/matched.py index 4c884a844a6ce5cc0d67c4604e7898ed61340a6b..31fca89708e0ec927ad7df7d336d9588d21fbb2a 100644 --- a/synthesis/population/matched.py +++ b/synthesis/population/matched.py @@ -18,6 +18,7 @@ def configure(context): context.config("matching_minimum_observations", 20) context.config("weekend_scenario", False) context.config("specific_weekend_scenario", "all") # options are "all", "saturday", "sunday" + context.config("specific_day_scenario", "avgworkday") #options can be any of the days of the week or "avgworkday" context.stage("data.microcensus.persons") context.stage("synthesis.population.sampled") @@ -161,20 +162,29 @@ def execute(context): df_mz = context.stage("data.microcensus.persons") is_weekend_scenario = context.config("weekend_scenario") specific_weekend_scenario = context.config("specific_weekend_scenario") + specific_day = context.config("specific_day_scenario") + is_specific_day_scenario = specific_day != "avgworkday" # Source are the MZ observations, for each STATPOP person, a sample is drawn from there + + #specify day of the week the scenario will be generated for df_source = pd.DataFrame(df_mz[ - (is_weekend_scenario & df_mz[ - "weekend"]) # use only weekend samples for a weekend scenario - | - (~is_weekend_scenario & ~df_mz["weekend"]) # and only weekday samples for a weekday - ]) + (is_weekend_scenario & df_mz[ + "weekend"]) # use only weekend samples for a weekend scenario - maybe not needed + | + (~is_weekend_scenario & ~is_specific_day_scenario & (~df_mz["weekend"])) # and only weekday samples for a weekday + | + #add options for different days of the week scenarios that are not weekend + (~is_weekend_scenario & (specific_day != "avgworkday") & (df_mz["day"] == specific_day)) + ]) #If specific weekend context is needed for saturday or sunday if (is_weekend_scenario & (specific_weekend_scenario != "all")): df_source = pd.DataFrame(df_source[((specific_weekend_scenario == "saturday") & df_source["saturday"]) | ((specific_weekend_scenario == "sunday") & df_source["sunday"])]) + print("INFO: The scenario will be generated for: ", df_source["day"].unique(), " day (s) of the week") + df_population = context.stage("synthesis.population.sampled") number_of_statpop_persons = len(np.unique(df_population["person_id"])) number_of_statpop_households = len(np.unique(df_population["household_id"]))