Commit 16b9a6b3 authored by luroth's avatar luroth
Browse files

new training for svm

parent bc5b4331
......@@ -23,11 +23,16 @@ from sklearn.preprocessing import StandardScaler
def tiller_prediction(LA, GDD_BBCH30):
"""
Predicts tiller count pased on multi-view leaf area
:param LA: Multi-view leaf area, scaled to plot size of 125 * 50 * 3 mm
:param LA: Multi-view leaf area per m^2
:param GDD: Growing degree days since sowing
:return: Estimation of tiller count
"""
# scale to plot size of 125 * 50 * 3 (mm)
LA = (LA / 1.0) * (0.125 * 0.15)
# scale to mm^2
LA = LA * 10**(6)
#Asym: Model was fit to plot size of 125 * 50 * 3 (mm)
Asym = 125 * 50 * 3
......@@ -47,6 +52,9 @@ def tiller_prediction(LA, GDD_BBCH30):
# x = (a/y - 1)^(-s) (e^m - (a/y - 1)^s)
# Therefore:
tiller_count = (Asym / LA - 1)**(-scal) * (math.exp(xmid) - (Asym / LA - 1) ** scal)
# Scale count to 1 m^2
tiller_count = (tiller_count / (0.125 * 0.15) ) * 1.0
return(tiller_count)
......@@ -220,77 +228,151 @@ def process_plant_count(path_campaign, delta_to_BBCH30,
def process_tiller_count(path_campaign, GSD, path_BBCH30_estimation = None,
paths_BBCH30_measured = None):
def process_tiller_count(path_campaign, path_BBCH30_estimation = None,
path_BBCH30_measured = None, design=None):
print("Process campaigns ", path_campaign)
campaign_date = path_campaign.parts[-2]
#print("Process campaigns ", path_campaign)
campaign_date_long = campaign_date[0:4] + "-" + campaign_date[4:6] + "-" + campaign_date[6:8]
# Read GDD
path_GDD_csv = path_campaign.parent.parent / 'covariates' / 'GDD.csv'
df_GDDs = pd.read_csv(path_GDD_csv)
GDD = df_GDDs.loc[df_GDDs.campaign_date == campaign_date_long, 'GDD'].values[0]
# Read BBCH30 estimation
if path_BBCH30_estimation is not None:
BBCH30_estimation_files = sorted(path_BBCH30_estimation.glob('*_BBCH30.csv'))
# Read LA
path_trait_csvs = path_campaign / 'trait_csvs'
LA_files = sorted(path_trait_csvs.glob("*_LA.csv"))
BBCH30_estimations = {}
for BBCH30_estimation_file in BBCH30_estimation_files:
df_BBCH30_estimation = pd.read_csv(BBCH30_estimation_file)
if len(LA_files) > 0:
campaign_date = path_campaign.parts[-2]
campaign_date_long = campaign_date[0:4] + "-" + campaign_date[4:6] + "-" + campaign_date[6:8]
# Read GDD
path_GDD_csv = path_campaign.parent.parent / 'covariates' / 'GDD.csv'
df_GDDs = pd.read_csv(path_GDD_csv)
GDD = df_GDDs.loc[df_GDDs.campaign_date == campaign_date_long, 'GDD'].values[0]
# Read BBCH30 estimation
df_BBCH30_estimations = None
if path_BBCH30_estimation is not None:
df_BBCH30 = pd.read_csv(path_BBCH30_estimation)
# Join with GDD to get delta to BBCH30 in GDD
df_BBCH30 = df_BBCH30_estimation[df_BBCH30_estimation.value == 30]
df_BBCH30 = df_BBCH30.sort_values(by=['timestamp']).groupby("plot_label").first()
# TODO: Something is very buggy with the label here...
df_GGD_at_BBCH30 = pd.merge(df_BBCH30, df_GDDs, left_on="timestamp", right_on="campaign_date")
df_GGD_at_BBCH30['GDD_BBCH30'] = GDD - df_GGD_at_BBCH30.GDD
design_label = BBCH30_estimation_file.name[:-11]
if design_label not in BBCH30_estimations:
BBCH30_estimations[design_label] = []
BBCH30_estimations[design_label].append(df_GGD_at_BBCH30.to_dict('records'))
df_BBCH30['GDD_date'] = pd.to_datetime(df_BBCH30['trait_value.timestamp']).dt.strftime("%Y-%m-%d")
df_GGD_at_BBCH30 = pd.merge(df_BBCH30, df_GDDs, left_on="GDD_date", right_on="campaign_date")
df_GGD_at_BBCH30['GDD_BBCH30_estimation'] = GDD - df_GGD_at_BBCH30.GDD
df_BBCH30_estimations = df_GGD_at_BBCH30
df_BBCH30_estimations = pd.DataFrame.from_records(BBCH30_estimations)
# Read BBCH30 measurement
if paths_BBCH30_measured is not None:
BBCH30_measurements = []
for path_BBCH30_measured in paths_BBCH30_measured:
df_BBCH30_measured = pd.read_csv(path_BBCH30_measured)
# Read BBCH30 measurement
df_BBCH30_measurements = None
if path_BBCH30_measured is not None:
df_BBCH30 = pd.read_csv(path_BBCH30_measured)
# Join with GDD to get delta to BBCH30 in GDD
df_BBCH30 = df_BBCH30_measured[df_BBCH30_measured.value== 30]
df_BBCH30 = df_BBCH30.sort_values(by=['timestamp']).groupby("plot_label").first()
df_BBCH30['GDD_date'] = pd.to_datetime(df_BBCH30['trait_value.timestamp']).dt.strftime("%Y-%m-%d")
df_GGD_at_BBCH30 = pd.merge(df_BBCH30, df_GDDs, left_on="GDD_date", right_on="campaign_date")
df_GGD_at_BBCH30['GDD_BBCH30_measurement'] = GDD - df_GGD_at_BBCH30.GDD
df_BBCH30_measurements = df_GGD_at_BBCH30
# Read LA
LA_data = {}
for LA_file in LA_files:
df_LA = pd.read_csv(LA_file)
if path_BBCH30_estimation is not None:
df_LA = pd.merge(df_LA, df_BBCH30_estimations, left_on="plot_label", right_on='plot.plot_label')
if path_BBCH30_measured is not None:
df_LA = pd.merge(df_LA, df_BBCH30_measurements, left_on="plot_label", right_on='plot.plot_label')
parts = LA_file.name.split("_")
if (len(parts) == 3):
if parts[1] == "lp01":
print(LA_file, " is lp01 overview image, skip")
continue
design_label = parts[0] + "_" + parts[1]
plot_label = parts[2][:-4]
elif (len(parts) == 2):
design_label = parts[0]
plot_label = parts[1][:-4]
elif (len(parts) == 4):
design_label = parts[0] + "_" + parts[1]
plot_label = parts[2] + "_" + parts[3][:-4]
else:
raise Exception("Do not know how to parse name " + LA_file.name)
df_GGD_at_BBCH30 = pd.merge(df_BBCH30, df_GDDs, left_on="timestamp", right_on="campaign_date")
df_GGD_at_BBCH30['GDD_BBCH30'] = GDD - df_GGD_at_BBCH30.GDD
LA_data[design_label] = df_LA
BBCH30_measurements.append(df_GGD_at_BBCH30.to_dict('records'))
df_BBCH30_measurements = pd.concat(BBCH30_measurements)
# Calc tiller count
for design_label, df_ in LA_data.items():
if design is not None and design_label != design:
print("Skip design", design_label)
# Read LA and calc tiller estimation
path_trait_csvs = path_campaign / 'trait_csvs'
LA_files = path_trait_csvs.glob("*_LA.csv")
df_LA.to_csv(path_trait_csvs / (design_label + "_LA.csv"), index=False)
path_trait_file = path_trait_csvs / (design_label + "_bbch30measurement-tillers.csv")
for design_label, LA in LA_data.items():
print("Write tiller trait csv for", design_label)
df_ = pd.DataFrame(LA)
df_['trait'] = "PltTilDen"
df_['trait_id'] = 34
df_['value'] = df_['tiller_estimation']
df_['timestamp'] = datetime.strptime(campaign_date, "%Y%m%d")
continue
df_.to_csv(path_trait_csvs / (design_label + "_tillers.csv"), index=False)
print("-----------------------\nGenerate tiller trait csv for", design_label, "for date", campaign_date)
if path_BBCH30_estimation is not None:
# Filter out too far GDD_BBCH30 values
df__ = df_.loc[(df_['GDD_BBCH30_estimation'] > - 160) & (df_['GDD_BBCH30_estimation'] < -40)].copy()
df__['tiller_estimation_bbch_estimation'] = [
tiller_prediction(row['value'], row['GDD_BBCH30_estimation']) for index, row in df__.iterrows()]
df__['trait'] = "PltTilDen"
df__['trait_id'] = 34
df__['timestamp'] = datetime.strptime(campaign_date, "%Y%m%d")
df__['GDD_BBCH30'] = df__['GDD_BBCH30_estimation']
df__ = df__[['trait', 'trait_id', 'tiller_estimation_bbch_estimation', 'timestamp', 'GDD_BBCH30', 'plot_label']].copy()
df__['value'] = df__['tiller_estimation_bbch_estimation']
df__['value_json'] = df__.apply(lambda row: row.iloc[4:5].to_json(), axis=1)
path_trait_file = path_trait_csvs / (design_label + "_bbch30estimation-tillers.csv")
if len(df__) > 0:
print("Calculated tillers, bbch30 estimation: median",
np.nanmedian(df__['tiller_estimation_bbch_estimation']))
print("Median GDD_BBCH30 estimated:", df__['GDD_BBCH30'].median())
df__.to_csv(path_trait_csvs / (design_label + "_bbch30estimation-tillers.csv"), index=False)
else:
# remove trait file if no measurements found
print("No estimated BBCH30 measurements left after filtering, delete potential trait file")
try:
path_trait_file.unlink()
except FileNotFoundError:
print("No file")
if path_BBCH30_measured is not None:
# Filter out too far GDD_BBCH30 values
df__ = df_.loc[(df_['GDD_BBCH30_measurement'] > - 160) & (df_['GDD_BBCH30_measurement'] < -40)].copy()
df__['tiller_estimation_bbch_measurement'] = [
tiller_prediction(row['value'], row['GDD_BBCH30_measurement']) for index, row in df__.iterrows()]
df__['trait'] = "PltTilDen"
df__['trait_id'] = 34
df__['timestamp'] = datetime.strptime(campaign_date, "%Y%m%d")
df__['GDD_BBCH30'] = df__['GDD_BBCH30_measurement']
df__ = df__[['trait', 'trait_id', 'tiller_estimation_bbch_measurement', 'timestamp', 'GDD_BBCH30', 'plot_label']].copy()
df__['value'] = df__['tiller_estimation_bbch_measurement']
df__['value_json'] = df__.apply(lambda row: row.iloc[4:5].to_json(), axis=1)
path_trait_file = path_trait_csvs / (design_label + "_bbch30measurement-tillers.csv")
if len(df__) > 0:
print("Calcualted tillers, bbch30 measurement: median",
np.nanmedian(df__['tiller_estimation_bbch_measurement']))
print("Median GDD_BBCH30 measured:", df__['GDD_BBCH30'].median())
df__.to_csv(path_trait_file, index=False)
else:
print("No measured BBCH30 measurements left after filtering, delete potential trait file")
try:
path_trait_file.unlink()
except FileNotFoundError:
print("No file")
else:
print("-------------\nNo LA data found, skip campaign", str(path_campaign))
......@@ -461,144 +543,6 @@ def process_NadirCC_LCCC_LA_BBCH30(path_campaign, GSD):
df_LA.to_csv(path_trait_csvs / (design_label + "_AI.csv"), index=False)
### TODO: Dynamic traits, should be based on spatial corrected values from DB
def process_plant_count_dynamics(path_campaign_date_5,
path_campaign_date_10,
path_campaign_date_15,
GSD):
"""
Process folder to perform plant count estimation
:param path_campaign: path of campaigns
:param campaign_date: Date of campaign
"""
print("Process campaigns ", path_campaign_date_5, path_campaign_date_10)
paths = {5: path_campaign_date_5,
10: path_campaign_date_10,
15: path_campaign_date_15}
# Container for results
plant_counts = {}
for delta, path_campaign in paths.items():
campaign_date = path_campaign.parts[-2]
path_trait_csvs = path_campaign / 'trait_csvs'
plant_count_trait_files = sorted(path_GC_AC_folder.glob('*_?????????*.tif'))
for LA_image in LA_images:
parts = LA_image.name.split("_")
if (len(parts) == 3):
design_label = parts[0] + "_" + parts[1]
plot_label = parts[2][:-4]
elif (len(parts) == 4):
design_label = parts[0] + "_" + parts[1]
plot_label = parts[2] + "_" + parts[3][:-4]
else:
Exception("Do not know how to parse name " + LA_image.name)
print("Process LA image for plot", plot_label)
LA_img = imageio.imread(LA_image)
w_counts, gc_counts = plant_prediction(LA_img, delta, GSD)
# plant count estimation is absolut number, divide by sampling area size in mm^2
w_counts_scaled = round(w_counts / (LA_img.shape[0] * LA_img.shape[1] * (GSD) ** 2))
gc_counts_scaled = round(gc_counts / (LA_img.shape[0] * LA_img.shape[1] * (GSD) ** 2))
print("Plants: w:", w_counts_scaled, "gc: ", gc_counts_scaled, "per m²")
plant_counts_ = {}
plant_counts_['plot_label'] = plot_label
plant_counts_['campaign_date'] = campaign_date
plant_counts_['watershed_plant_count_estimation'] = w_counts_scaled
plant_counts_['watershed_plant_count_estimation_abs'] = w_counts
plant_counts_['groundcoverage_plant_count_estimation'] = gc_counts_scaled
plant_counts_['groundcoverage_plant_count_estimation_abs'] = gc_counts
plant_counts_['delta'] = delta
if design_label not in plant_counts:
plant_counts[design_label] = []
plant_counts[design_label].append(plant_counts_)
path_trait_csvs = path_campaign_date_10.parent.parent / 'trait_csvs'
path_trait_csvs.mkdir(exist_ok=True)
for design_label, plant_region_data_ in plant_counts.items():
print("Write plant region trait csv for", design_label)
df_regions = pd.DataFrame(plant_region_data_)
idx_w = df_regions.groupby(['plot_label'])['watershed_plant_count_estimation_abs'].transform(np.median) == \
df_regions['watershed_plant_count_estimation_abs']
df_regions_w = df_regions[idx_w].copy()
idx_gc = df_regions.groupby(['plot_label'])['groundcoverage_plant_count_estimation'].transform(np.median) == \
df_regions['groundcoverage_plant_count_estimation']
df_regions_gc = df_regions[idx_gc].copy()
# watershed data
df_regions_w['trait'] = "PntDen"
df_regions_w['trait_id'] = 1
df_regions_w['value'] = df_regions_w['watershed_plant_count_estimation']
df_regions_w['timestamp'] = pd.to_datetime(df_regions_w['campaign_date'], format="%Y%m%d")
df_regions_w.to_csv(path_trait_csvs / (design_label + "_watershed_plants.csv"), index=False)
# GC data
df_regions_gc['trait'] = "PntDen"
df_regions_gc['trait_id'] = 1
df_regions_gc['value'] = df_regions_gc['groundcoverage_plant_count_estimation']
df_regions_gc['timestamp'] = pd.to_datetime(df_regions_gc['campaign_date'], format="%Y%m%d")
df_regions_gc.to_csv(path_trait_csvs / (design_label + "_gc_plants.csv"), index=False)
def process_tiller_dynamics():
if GDD >= 400 and GDD <= 600:
for design_label, LA in LA_data.items():
print("Write tiller rate trait csv for", design_label)
df_ = pd.DataFrame(LA)
# get plant counts
df_plant_counts_ = pd.DataFrame(plant_counts[design_label][0])
df_plant_counts_ = df_plant_counts_[['plot_label', 'value']].copy()
df_plant_counts_.columns = ['plot_label', 'plant_count_estimate']
df_ = pd.merge(df_, df_plant_counts_, on="plot_label")
df_['trait'] = "PntShtDen"
df_['trait_id'] = 40
df_['value'] = df_['tiller_estimation'] / df_['plant_count_estimate']
df_['timestamp'] = datetime.strptime(campaign_date, "%Y%m%d")
df_.to_csv(path_trait_csvs / (design_label + "_tiller_rates.csv"), index=False)
# Read plant counts for later calculation of tiller rate per plant
plant_count_files = sorted(path_plant_count_estimation.glob('*_plants.csv'))
plant_counts = {}
for plant_count_file in plant_count_files:
df_plant_counts = pd.read_csv(plant_count_file)
design_label = plant_count_file.name[:-11]
if design_label not in plant_counts:
plant_counts[design_label] = []
plant_counts[design_label].append(df_plant_counts.to_dict('records'))
def clean_trait_folder(path_campaign, patterns):
......
This diff is collapsed.
from GroundAerialCoverage import CanopyAnalysis
from pathlib import Path
#path_p = Path("P:/")
path_p = Path("/home/luroth/public")
if __name__ == "__main__":
base_path_campaign = path_p / 'Evaluation/UAV/_Processed_/ETHZ_eschikon_FPWW022_lot1'
path_BBCH30_estimation = base_path_campaign / "dynamic_traits" / "FPWW022_lot1_trait_id_45_method_id_312.csv"
path_BBCH30_measured = None
date_folders = base_path_campaign.glob('[0-9]*')
for date_folder in date_folders:
campaign_date = date_folder.name
path_campaign = base_path_campaign / campaign_date / '28m_M600P'
CanopyAnalysis.process_tiller_count(path_campaign, path_BBCH30_estimation=path_BBCH30_estimation,
path_BBCH30_measured=path_BBCH30_measured, design="FPWW022_lot1")
from GroundAerialCoverage import CanopyAnalysis
from pathlib import Path
#path_p = Path("P:/")
path_p = Path("/home/luroth/public")
if __name__ == "__main__":
base_path_campaign = path_p / 'Evaluation/UAV/_Processed_/ETHZ_eschikon_FPWW022_lot3'
path_BBCH30_estimation = base_path_campaign / "dynamic_traits" / "FPWW022_lot3_trait_id_45_method_id_312.csv"
path_BBCH30_measured = base_path_campaign / "dynamic_traits" / "FPWW022_lot3_trait_id_45_method_id_314.csv"
date_folders = base_path_campaign.glob('[0-9]*')
for date_folder in date_folders:
campaign_date = date_folder.name
path_campaign = base_path_campaign / campaign_date / '28m_M600P'
CanopyAnalysis.process_tiller_count(path_campaign, path_BBCH30_estimation=path_BBCH30_estimation,
path_BBCH30_measured=path_BBCH30_measured, design="FPWW022_lot3")
from GroundAerialCoverage import CanopyAnalysis
from pathlib import Path
#path_p = Path("P:/")
path_p = Path("/home/luroth/public")
if __name__ == "__main__":
base_path_campaign = path_p / 'Evaluation/UAV/_Processed_/DSP_genevey_DSWW001'
path_BBCH30_estimation = base_path_campaign / "dynamic_traits" / "DSWW001_trait_id_45_method_id_312.csv"
path_BBCH30_measured = base_path_campaign / "dynamic_traits" / "DSWW001_trait_id_45_method_id_314.csv"
date_folders = base_path_campaign.glob('[0-9]*')
for date_folder in date_folders:
campaign_date = date_folder.name
path_campaign = base_path_campaign / campaign_date / '28m_M600P'
CanopyAnalysis.process_tiller_count(path_campaign, path_BBCH30_estimation=path_BBCH30_estimation,
path_BBCH30_measured=path_BBCH30_measured, design="DSWW001")
from GroundAerialCoverage import CanopyAnalysis
from pathlib import Path
#path_p = Path("P:/")
path_p = Path("/home/luroth/public")
if __name__ == "__main__":
base_path_campaign = path_p / 'Evaluation/UAV/_Processed_/ETHZ_eschikon_FPWW024_lot2'
path_BBCH30_estimation = base_path_campaign / "dynamic_traits" / "FPWW024_lot2_trait_id_45_method_id_312.csv"
path_BBCH30_measured = base_path_campaign / "dynamic_traits" / "FPWW024_lot2_trait_id_45_method_id_314.csv"
date_folders = base_path_campaign.glob('[0-9]*')
for date_folder in date_folders:
campaign_date = date_folder.name
path_campaign = base_path_campaign / campaign_date / '28m_M600P'
CanopyAnalysis.process_tiller_count(path_campaign, path_BBCH30_estimation=path_BBCH30_estimation,
path_BBCH30_measured=path_BBCH30_measured, design="FPWW024_lot2")
from GroundAerialCoverage import CanopyAnalysis
from pathlib import Path
#path_p = Path("P:/")
path_p = Path("/home/luroth/public")
if __name__ == "__main__":
base_path_campaign = path_p / 'Evaluation/UAV/_Processed_/ETHZ_eschikon_FPWW024_lot4'
path_BBCH30_estimation = base_path_campaign / "dynamic_traits" / "FPWW024_lot4_trait_id_45_method_id_312.csv"
path_BBCH30_measured = base_path_campaign / "dynamic_traits" / "FPWW024_lot4_trait_id_45_method_id_314.csv"
date_folders = base_path_campaign.glob('[0-9]*')
for date_folder in date_folders:
campaign_date = date_folder.name
path_campaign = base_path_campaign / campaign_date / '28m_M600P'
CanopyAnalysis.process_tiller_count(path_campaign, path_BBCH30_estimation=path_BBCH30_estimation,
path_BBCH30_measured=path_BBCH30_measured, design="FPWW024_lot4")
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment