Commit effa95c5 authored by luroth's avatar luroth
Browse files

plant count est

parent 6c777569
......@@ -133,9 +133,7 @@ def plant_prediction(image, delta_to_BBCH30, GSD):
return(w_plant_count_total, gc_plant_counts_total)
def process_plant_count(path_campaign_date_5,
path_campaign_date_10,
path_campaign_date_15,
def process_plant_count(path_campaign, delta_to_BBCH30,
GSD):
"""
Process folder to perform plant count estimation
......@@ -143,75 +141,62 @@ def process_plant_count(path_campaign_date_5,
:param campaign_date: Date of campaign
"""
print("Process campaigns ", path_campaign_date_5, path_campaign_date_10)
paths = {5: path_campaign_date_5,
10: path_campaign_date_10,
15: path_campaign_date_15}
print("Process campaign ", path_campaign)
# Container for results
plant_counts = {}
for delta, path_campaign in paths.items():
campaign_date = path_campaign.parts[-2]
campaign_date = path_campaign.parts[-2]
path_GC_AC_folder = path_campaign / 'GC_AC'
path_GC_AC_folder = path_campaign / 'GC_AC'
LA_images = sorted(path_GC_AC_folder.glob('*_?????????*.tif'))
LA_images = sorted(path_GC_AC_folder.glob('*_?????????*.tif'))
for LA_image in LA_images:
parts = LA_image.name.split("_")
if (len(parts) == 3):
design_label = parts[0] + "_" + parts[1]
plot_label = parts[2][:-4]
elif (len(parts) == 4):
design_label = parts[0] + "_" + parts[1]
plot_label = parts[2] + "_" + parts[3][:-4]
else:
Exception("Do not know how to parse name " + LA_image.name)
for LA_image in LA_images:
parts = LA_image.name.split("_")
if (len(parts) == 3):
design_label = parts[0] + "_" + parts[1]
plot_label = parts[2][:-4]
elif (len(parts) == 4):
design_label = parts[0] + "_" + parts[1]
plot_label = parts[2] + "_" + parts[3][:-4]
else:
Exception("Do not know how to parse name " + LA_image.name)
print("Process LA image for plot", plot_label)
print("Process LA image for plot", plot_label)
LA_img = imageio.imread(LA_image)
LA_img = imageio.imread(LA_image)
w_counts, gc_counts = plant_prediction(LA_img, delta, GSD)
w_counts, gc_counts = plant_prediction(LA_img, delta_to_BBCH30, GSD)
# plant count estimation is absolut number, divide by sampling area size in mm^2
w_counts_scaled = round(w_counts / (LA_img.shape[0] * LA_img.shape[1] * (GSD)**2))
gc_counts_scaled = round(gc_counts / (LA_img.shape[0] * LA_img.shape[1] * (GSD) ** 2))
# plant count estimation is absolut number, divide by sampling area size in mm^2
w_counts_scaled = round(w_counts / (LA_img.shape[0] * LA_img.shape[1] * (GSD)**2))
gc_counts_scaled = round(gc_counts / (LA_img.shape[0] * LA_img.shape[1] * (GSD) ** 2))
print("Plants: w:", w_counts_scaled, "gc: ", gc_counts_scaled, "per m²")
print("Plants: w:", w_counts_scaled, "gc: ", gc_counts_scaled, "per m²")
plant_counts_ = {}
plant_counts_['plot_label'] = plot_label
plant_counts_['campaign_date'] = campaign_date
plant_counts_['watershed_plant_count_estimation'] = w_counts_scaled
plant_counts_['watershed_plant_count_estimation_abs'] = w_counts
plant_counts_['groundcoverage_plant_count_estimation'] = gc_counts_scaled
plant_counts_['groundcoverage_plant_count_estimation_abs'] = gc_counts
plant_counts_['delta'] = delta
plant_counts_ = {}
plant_counts_['plot_label'] = plot_label
plant_counts_['campaign_date'] = campaign_date
plant_counts_['watershed_plant_count_estimation'] = w_counts_scaled
plant_counts_['watershed_plant_count_estimation_abs'] = w_counts
plant_counts_['groundcoverage_plant_count_estimation'] = gc_counts_scaled
plant_counts_['groundcoverage_plant_count_estimation_abs'] = gc_counts
plant_counts_['delta'] = delta_to_BBCH30
if design_label not in plant_counts:
plant_counts[design_label] = []
if design_label not in plant_counts:
plant_counts[design_label] = []
plant_counts[design_label].append(plant_counts_)
plant_counts[design_label].append(plant_counts_)
path_trait_csvs = path_campaign_date_10.parent.parent / 'trait_csvs'
path_trait_csvs = path_campaign / 'trait_csvs'
path_trait_csvs.mkdir(exist_ok=True)
for design_label, plant_region_data_ in plant_counts.items():
print("Write plant region trait csv for", design_label)
df_regions = pd.DataFrame(plant_region_data_)
idx_w = df_regions.groupby(['plot_label'])['watershed_plant_count_estimation_abs'].transform(np.median) == \
df_regions['watershed_plant_count_estimation_abs']
df_regions_w = df_regions[idx_w].copy()
idx_gc = df_regions.groupby(['plot_label'])['groundcoverage_plant_count_estimation'].transform(np.median) == \
df_regions['groundcoverage_plant_count_estimation']
df_regions_gc = df_regions[idx_gc].copy()
df_regions_w = pd.DataFrame(plant_region_data_)
df_regions_gc = pd.DataFrame(plant_region_data_)
# watershed data
df_regions_w['trait'] = "PntDen"
......@@ -232,24 +217,15 @@ def process_plant_count(path_campaign_date_5,
df_regions_gc.to_csv(path_trait_csvs / (design_label + "_gc_plants.csv"), index=False)
def process_tiller_count(path_campaign, campaign_date, GSD, path_plant_count_estimation, path_BBCH30_estimation = None,
def process_tiller_count(path_campaign, GSD, path_BBCH30_estimation = None,
paths_BBCH30_measured = None):
print("Process campaigns ", path_campaign)
campaign_date_long = campaign_date[0:4] + "-" + campaign_date[4:6] + "-" + campaign_date[6:8]
# Read plant counts for later calculation of tiller rate per plant
plant_count_files = sorted(path_plant_count_estimation.glob('*_plants.csv'))
plant_counts = {}
for plant_count_file in plant_count_files:
df_plant_counts = pd.read_csv(plant_count_file)
design_label = plant_count_file.name[:-11]
campaign_date = path_campaign.parts[-2]
if design_label not in plant_counts:
plant_counts[design_label] = []
plant_counts[design_label].append(df_plant_counts.to_dict('records'))
campaign_date_long = campaign_date[0:4] + "-" + campaign_date[4:6] + "-" + campaign_date[6:8]
# Read GDD
path_GDD_csv = path_campaign.parent.parent / 'covariates' / 'GDD.csv'
......@@ -264,10 +240,9 @@ def process_tiller_count(path_campaign, campaign_date, GSD, path_plant_count_est
for BBCH30_estimation_file in BBCH30_estimation_files:
df_BBCH30_estimation = pd.read_csv(BBCH30_estimation_file)
# Join with GDD to get delta to BBCH30 in GDD
# Join with GDD to get delta to BBCH30 in GDD
df_BBCH30 = df_BBCH30_estimation[df_BBCH30_estimation.value == 30]
df_BBCH30 = df_BBCH30.sort_values(by=['timestamp']).groupby("plot_label").first()
# TODO: Something is very buggy with the label here...
df_GGD_at_BBCH30 = pd.merge(df_BBCH30, df_GDDs, left_on="timestamp", right_on="campaign_date")
df_GGD_at_BBCH30['GDD_BBCH30'] = GDD - df_GGD_at_BBCH30.GDD
......@@ -277,6 +252,8 @@ def process_tiller_count(path_campaign, campaign_date, GSD, path_plant_count_est
BBCH30_estimations[design_label] = []
BBCH30_estimations[design_label].append(df_GGD_at_BBCH30.to_dict('records'))
df_BBCH30_estimations = pd.DataFrame.from_records(BBCH30_estimations)
# Read BBCH30 measurement
if paths_BBCH30_measured is not None:
......@@ -293,50 +270,31 @@ def process_tiller_count(path_campaign, campaign_date, GSD, path_plant_count_est
df_GGD_at_BBCH30['GDD_BBCH30'] = GDD - df_GGD_at_BBCH30.GDD
BBCH30_measurements.append(df_GGD_at_BBCH30.to_dict('records'))
df_BBCH30_measurements = pd.concat(BBCH30_measurements)
# Read / calc traits
path_GC_AC_folder = path_campaign / 'GC_AC'
# Read LA and calc tiller estimation
path_trait_csvs = path_campaign / 'trait_csvs'
LA_files = path_trait_csvs.glob("*_LA.csv")
df_LA.to_csv(path_trait_csvs / (design_label + "_LA.csv"), index=False)
for design_label, LA in LA_data.items():
print("Write tiller trait csv for", design_label)
df_ = pd.DataFrame(LA)
df_['trait'] = "PltTilDen"
df_['trait_id'] = 34
df_['value'] = df_['tiller_estimation']
if GDD >= 400 and GDD <= 600:
for design_label, LA in LA_data.items():
print("Write tiller trait csv for", design_label)
df_ = pd.DataFrame(LA)
df_['trait'] = "PltTilDen"
df_['trait_id'] = 34
df_['value'] = df_['tiller_estimation']
df_['timestamp'] = datetime.strptime(campaign_date, "%Y%m%d")
df_.to_csv(path_trait_csvs / (design_label + "_tillers.csv"), index=False)
if GDD >= 400 and GDD <= 600:
for design_label, LA in LA_data.items():
print("Write tiller rate trait csv for", design_label)
df_ = pd.DataFrame(LA)
# get plant counts
df_plant_counts_ = pd.DataFrame(plant_counts[design_label][0])
df_plant_counts_ = df_plant_counts_[['plot_label', 'value']].copy()
df_plant_counts_.columns = ['plot_label', 'plant_count_estimate']
df_['timestamp'] = datetime.strptime(campaign_date, "%Y%m%d")
df_ = pd.merge(df_, df_plant_counts_, on="plot_label")
df_.to_csv(path_trait_csvs / (design_label + "_tillers.csv"), index=False)
df_['trait'] = "PntShtDen"
df_['trait_id'] = 40
df_['value'] = df_['tiller_estimation'] / df_['plant_count_estimate']
df_['timestamp'] = datetime.strptime(campaign_date, "%Y%m%d")
df_.to_csv(path_trait_csvs / (design_label + "_tiller_rates.csv"), index=False)
def process_NadirCC_LCCC_LA_AI(path_campaign, GSD):
def process_NadirCC_LCCC_LA_AI(path_campaign, campaign_date, GSD):
campaign_date = path_campaign.parts[-2]
# Trains SVM
df_training = pd.read_csv('./GroundAerialCoverage/svm_SE_training.csv')
......@@ -469,7 +427,7 @@ def process_NadirCC_LCCC_LA_AI(path_campaign, campaign_date, GSD):
df_LA = pd.DataFrame(LA)
df_LA['value_json'] = ""
df_LA['trait'] = "PltAI"
df_LA['trait'] = "PltSE"
df_LA['trait_id'] = 41
X = [list(d.values()) for d in df_LA['gt_bins']]
......@@ -483,11 +441,10 @@ def process_NadirCC_LCCC_LA_AI(path_campaign, campaign_date, GSD):
df_LA.to_csv(path_trait_csvs / (design_label + "_AI.csv"), index=False)
### TODO: Dynamic traits, should be based on spatial corrected values from DB
def process_begin_stem_elongation(paths_campaigns, graph=True):
path_global_trait_csvs = paths_campaigns[0].parent.parent / 'trait_csvs'
path_global_trait_csvs.mkdir(exist_ok=True)
......@@ -588,11 +545,157 @@ def process_begin_stem_elongation(paths_campaigns, graph=True):
df_begin_SE.to_csv(path_global_trait_csvs / (design_label + "_BBCH30.csv"), index=False)
def process_plant_count_dynamics(path_campaign_date_5,
path_campaign_date_10,
path_campaign_date_15,
GSD):
"""
Process folder to perform plant count estimation
:param path_campaign: path of campaigns
:param campaign_date: Date of campaign
"""
print("Process campaigns ", path_campaign_date_5, path_campaign_date_10)
paths = {5: path_campaign_date_5,
10: path_campaign_date_10,
15: path_campaign_date_15}
# Container for results
plant_counts = {}
for delta, path_campaign in paths.items():
campaign_date = path_campaign.parts[-2]
path_trait_csvs = path_campaign / 'trait_csvs'
plant_count_trait_files = sorted(path_GC_AC_folder.glob('*_?????????*.tif'))
for LA_image in LA_images:
parts = LA_image.name.split("_")
if (len(parts) == 3):
design_label = parts[0] + "_" + parts[1]
plot_label = parts[2][:-4]
elif (len(parts) == 4):
design_label = parts[0] + "_" + parts[1]
plot_label = parts[2] + "_" + parts[3][:-4]
else:
Exception("Do not know how to parse name " + LA_image.name)
print("Process LA image for plot", plot_label)
LA_img = imageio.imread(LA_image)
w_counts, gc_counts = plant_prediction(LA_img, delta, GSD)
# plant count estimation is absolut number, divide by sampling area size in mm^2
w_counts_scaled = round(w_counts / (LA_img.shape[0] * LA_img.shape[1] * (GSD) ** 2))
gc_counts_scaled = round(gc_counts / (LA_img.shape[0] * LA_img.shape[1] * (GSD) ** 2))
print("Plants: w:", w_counts_scaled, "gc: ", gc_counts_scaled, "per m²")
plant_counts_ = {}
plant_counts_['plot_label'] = plot_label
plant_counts_['campaign_date'] = campaign_date
plant_counts_['watershed_plant_count_estimation'] = w_counts_scaled
plant_counts_['watershed_plant_count_estimation_abs'] = w_counts
plant_counts_['groundcoverage_plant_count_estimation'] = gc_counts_scaled
plant_counts_['groundcoverage_plant_count_estimation_abs'] = gc_counts
plant_counts_['delta'] = delta
if design_label not in plant_counts:
plant_counts[design_label] = []
plant_counts[design_label].append(plant_counts_)
path_trait_csvs = path_campaign_date_10.parent.parent / 'trait_csvs'
path_trait_csvs.mkdir(exist_ok=True)
for design_label, plant_region_data_ in plant_counts.items():
print("Write plant region trait csv for", design_label)
df_regions = pd.DataFrame(plant_region_data_)
idx_w = df_regions.groupby(['plot_label'])['watershed_plant_count_estimation_abs'].transform(np.median) == \
df_regions['watershed_plant_count_estimation_abs']
df_regions_w = df_regions[idx_w].copy()
idx_gc = df_regions.groupby(['plot_label'])['groundcoverage_plant_count_estimation'].transform(np.median) == \
df_regions['groundcoverage_plant_count_estimation']
df_regions_gc = df_regions[idx_gc].copy()
# watershed data
df_regions_w['trait'] = "PntDen"
df_regions_w['trait_id'] = 1
df_regions_w['value'] = df_regions_w['watershed_plant_count_estimation']
df_regions_w['timestamp'] = pd.to_datetime(df_regions_w['campaign_date'], format="%Y%m%d")
df_regions_w.to_csv(path_trait_csvs / (design_label + "_watershed_plants.csv"), index=False)
# GC data
df_regions_gc['trait'] = "PntDen"
df_regions_gc['trait_id'] = 1
df_regions_gc['value'] = df_regions_gc['groundcoverage_plant_count_estimation']
df_regions_gc['timestamp'] = pd.to_datetime(df_regions_gc['campaign_date'], format="%Y%m%d")
df_regions_gc.to_csv(path_trait_csvs / (design_label + "_gc_plants.csv"), index=False)
def process_tiller_dynamics():
if GDD >= 400 and GDD <= 600:
for design_label, LA in LA_data.items():
print("Write tiller rate trait csv for", design_label)
df_ = pd.DataFrame(LA)
# get plant counts
df_plant_counts_ = pd.DataFrame(plant_counts[design_label][0])
df_plant_counts_ = df_plant_counts_[['plot_label', 'value']].copy()
df_plant_counts_.columns = ['plot_label', 'plant_count_estimate']
df_ = pd.merge(df_, df_plant_counts_, on="plot_label")
df_['trait'] = "PntShtDen"
df_['trait_id'] = 40
df_['value'] = df_['tiller_estimation'] / df_['plant_count_estimate']
df_['timestamp'] = datetime.strptime(campaign_date, "%Y%m%d")
df_.to_csv(path_trait_csvs / (design_label + "_tiller_rates.csv"), index=False)
# Read plant counts for later calculation of tiller rate per plant
plant_count_files = sorted(path_plant_count_estimation.glob('*_plants.csv'))
plant_counts = {}
for plant_count_file in plant_count_files:
df_plant_counts = pd.read_csv(plant_count_file)
design_label = plant_count_file.name[:-11]
if design_label not in plant_counts:
plant_counts[design_label] = []
plant_counts[design_label].append(df_plant_counts.to_dict('records'))
def clean_trait_folder(path_campaign, patterns):
path_trait_csvs = path_campaign / 'trait_csvs'
files = []
for pattern in patterns:
files.extend(path_trait_csvs.glob(pattern))
for file in files:
print("deleting", file)
file.unlink()
def clean_main_folder(path_campaign, patterns):
path_trait_csvs = path_campaign
files = []
for pattern in patterns:
files.extend(path_trait_csvs.glob(pattern))
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment