Commit bc5b4331 authored by luroth's avatar luroth
Browse files

new training for svm

parent 28120d50
......@@ -294,7 +294,7 @@ def process_tiller_count(path_campaign, GSD, path_BBCH30_estimation = None,
def process_NadirCC_LCCC_LA_AI(path_campaign, GSD):
def process_NadirCC_LCCC_LA_BBCH30(path_campaign, GSD):
campaign_date = path_campaign.parts[-2]
......@@ -308,9 +308,10 @@ def process_NadirCC_LCCC_LA_AI(path_campaign, GSD):
sc_x.fit(X)
X = sc_x.transform(X)
y = df_training['delta_to_BBCH']
# Log delta to weight close-to-zero values higher
y = np.log(np.abs(df_training['delta_to_BBCH']) + 1) * np.sign(df_training['delta_to_BBCH'])
svm_predictor = svm.SVR(C=32, kernel='rbf', gamma = 0.015625, epsilon = 0.1)
svm_predictor = svm.SVR(C=32, kernel='rbf', gamma = 0.125, epsilon = 0.1)
svm_predictor.fit(X, y)
print("Process campaign ", path_campaign)
......@@ -395,9 +396,20 @@ def process_NadirCC_LCCC_LA_AI(path_campaign, GSD):
LA_img_powered = np.power(LA_img, 2)
gt_bins = {}
for gt_bin in np.arange(100):
for gt_bin in np.arange(start=0, stop=100, step=1):
gt_bins[gt_bin] = np.sum(LA_img > gt_bin/100) / (LA_img.shape[0] * LA_img.shape[1])
# normalize
gt_bins_norm = {}
for gt_bin in np.arange(start=0, stop=100, step=10):
sum_a = np.sum([val for val in gt_bins.values()])
if sum_a != 0:
gt_bins_norm[gt_bin] = gt_bins[gt_bin] / sum_a
else:
gt_bins_norm[gt_bin] = 0
# check normalization:
#np.sum([val for val in gt_bins_norm.values()])
# Calc LA and aerial index
LA = np.sum(LA_img_powered)
......@@ -408,6 +420,7 @@ def process_NadirCC_LCCC_LA_AI(path_campaign, GSD):
# LA is absolut, divide by sampling area size in mm^2
LA_data_['LA'] = LA / (LA_img.shape[0] * LA_img.shape[1] * (GSD*1000)**2)
LA_data_['gt_bins'] = gt_bins
LA_data_['gt_bins_norm'] = gt_bins_norm
if design_label not in LA_data:
LA_data[design_label] = []
......@@ -429,16 +442,17 @@ def process_NadirCC_LCCC_LA_AI(path_campaign, GSD):
df_LA.to_csv(path_trait_csvs / (design_label + "_LA.csv"), index=False)
for design_label, LA in LA_data.items():
print("Write aerial leaf coverage trait csv for", design_label)
print("Write bbch30 trait csv for", design_label)
df_LA = pd.DataFrame(LA)
df_LA['value_json'] = ""
df_LA['trait'] = "PltSE"
df_LA['trait_id'] = 41
X = [list(d.values()) for d in df_LA['gt_bins']]
X = [list(d.values()) for d in df_LA['gt_bins_norm']]
value = -svm_predictor.predict(sc_x.transform(X))
value = (np.exp(value) + 1) * np.sign(value)
df_LA['value'] = value
......@@ -449,107 +463,6 @@ def process_NadirCC_LCCC_LA_AI(path_campaign, GSD):
### TODO: Dynamic traits, should be based on spatial corrected values from DB
def process_begin_stem_elongation(paths_campaigns, graph=True):
path_global_trait_csvs = paths_campaigns[0].parent.parent / 'trait_csvs'
path_global_trait_csvs.mkdir(exist_ok=True)
# Container for results
AI_index = {}
for path_campaign in paths_campaigns:
path_trait_csvs = path_campaign / 'trait_csvs'
print("Processing", path_trait_csvs)
AI_files = path_trait_csvs.glob('*_AI.csv')
for AI_file in AI_files:
print("Processing", AI_file)
if AI_file.name[-11:-7] == "lp01":
print("Lp01 subplots, skip")
break
design_label = AI_file.name[:-7]
df_AI = pd.read_csv(AI_file)
if design_label not in AI_index:
AI_index[design_label] = []
AI_index[design_label].extend(df_AI.to_dict('records'))
for design_label, list_AI in AI_index.items():
df_AI = pd.DataFrame.from_records(list_AI)
df_AI['value_abs'] = df_AI.value.abs()
# Find transition from negative to positive index (begin SE) with lm
df_AI_smooth = df_AI.copy()
df_AI_smooth['yday'] = pd.to_numeric(pd.to_datetime(df_AI_smooth.timestamp).dt.strftime('%j'), downcast="float")
df_AI_smooth['year'] = pd.to_numeric(pd.to_datetime(df_AI_smooth.timestamp).dt.strftime('%Y'))
bbch30_dates = {}
plot_groups = df_AI_smooth.groupby("plot_label")
for plot_label, df_plot_group in plot_groups:
print("Estimating transition trough zero for plot", plot_label)
# For model: find closest point before zero and two closes points after zero
df_before_zero = df_plot_group[df_plot_group.value < -1].nlargest(1, 'campaign_date')
df_after_zero = df_plot_group[df_plot_group.value >= -1].nsmallest(2, 'campaign_date')
# If oldest after zero date is older than newest before zero date: add one before zero
i_before_zero = 1
while np.min(df_after_zero.campaign_date) < np.min(df_before_zero.campaign_date) and i_before_zero < 10:
i_before_zero += 1
df_before_zero = df_plot_group[(df_plot_group.value < -1) & (df_plot_group.campaign_date < np.min(df_before_zero.campaign_date))].nlargest(1, 'campaign_date')
df_plot_group_lm = pd.concat([df_before_zero, df_after_zero])
lm = None
if len(df_plot_group_lm) < 2:
# Set date to max date if less than two points for lm
lm_zero = np.max(df_plot_group.yday)
else:
# Build lm, calculate zero transition
lm = LinearRegression().fit([[v] for v in df_plot_group_lm.yday], df_plot_group_lm.value)
lm_zero_ = - lm.intercept_ / lm.coef_[0]
# If zero transition is outside max date: correct extrapolation
lm_zero = lm_zero_ if (lm_zero_ < np.max(df_plot_group.yday) and lm.coef_[0] > 0) else np.max(df_plot_group.yday)
# Plot
if graph:
path_graphs = path_global_trait_csvs / "stem_elongation_plots"
path_graphs.mkdir(exist_ok=True)
plt.scatter(df_plot_group.yday, df_plot_group.value)
x_range = np.arange(np.min(df_plot_group.yday), np.max(df_plot_group.yday), 1)
plt.scatter(df_plot_group_lm.yday, df_plot_group_lm.value)
if lm:
plt.plot(x_range, x_range * lm.coef_[0] + lm.intercept_, label='lm')
plt.plot(lm_zero, -1, marker = 'x', c='black')
plt.hlines(y=-1, xmin=np.min(x_range), xmax=np.max(x_range))
plt.title(plot_label)
plt.xlabel("Day of year")
plt.ylabel("Predicted delta to BBCH 30 (days)")
#plt.show()
plt.savefig(path_graphs / (plot_label + ".png"))
plt.close()
# Convert back to date
year = np.unique(df_plot_group.year)[0]
bbch30_date = datetime(year, 1, 1) + timedelta(np.round(lm_zero) - 1)
bbch30_dates[plot_label] = bbch30_date
df_begin_SE = pd.DataFrame(list(bbch30_dates.items()), columns=["plot_label", "bbch30_date"])
print("Write begin stem elongation trait csv for", design_label)
df_begin_SE['value_json'] = ""
df_begin_SE['trait'] = "BBCH"
df_begin_SE['trait_id'] = 21
df_begin_SE['value'] = 30
df_begin_SE['timestamp'] = df_begin_SE['bbch30_date']
df_begin_SE.to_csv(path_global_trait_csvs / (design_label + "_BBCH30.csv"), index=False)
def process_plant_count_dynamics(path_campaign_date_5,
path_campaign_date_10,
......
This diff is collapsed.
......@@ -16,7 +16,7 @@ if __name__ == "__main__":
path_campaign = base_path_campaign / campaign_date / '28m_M600P'
if int(campaign_date) <= 20180420:
if int(campaign_date) <= 20180509:
print("Processing", campaign_date)
CanopyAnalysis.process_NadirCC_LCCC_LA_AI(path_campaign, campaign_date, GSD)
\ No newline at end of file
CanopyAnalysis.process_NadirCC_LCCC_LA_BBCH30(path_campaign, GSD)
\ No newline at end of file
......@@ -2,7 +2,7 @@ from GroundAerialCoverage import CanopyAnalysis
from pathlib import Path
path_p = Path("P:")
#path_p = Path("/home/luroth/public")
path_p = Path("/home/luroth/public")
if __name__ == "__main__":
......@@ -16,7 +16,7 @@ if __name__ == "__main__":
path_campaign = base_path_campaign / campaign_date / '28m_M600P'
if int(campaign_date) <= 20180420:
if int(campaign_date) <= 20180509:
print("Processing", campaign_date)
CanopyAnalysis.process_NadirCC_LCCC_LA_AI(path_campaign, GSD)
\ No newline at end of file
CanopyAnalysis.process_NadirCC_LCCC_LA_BBCH30(path_campaign, GSD)
\ No newline at end of file
......@@ -2,7 +2,7 @@ from GroundAerialCoverage import CanopyAnalysis
from pathlib import Path
path_p = Path("P:")
#path_p = Path("/home/luroth/public")
path_p = Path("/home/luroth/public")
if __name__ == "__main__":
......@@ -20,4 +20,4 @@ if __name__ == "__main__":
path_campaign = base_path_campaign / campaign_date / '28m_M600P'
CanopyAnalysis.process_NadirCC_LCCC_LA_AI(path_campaign, campaign_date, GSD)
\ No newline at end of file
CanopyAnalysis.process_NadirCC_LCCC_LA_BBCH30(path_campaign, campaign_date, GSD)
\ No newline at end of file
......@@ -2,7 +2,7 @@ from GroundAerialCoverage import CanopyAnalysis
from pathlib import Path
path_p = Path("P:")
#path_p = Path("/home/luroth/public")
path_p = Path("/home/luroth/public")
if __name__ == "__main__":
......@@ -14,10 +14,10 @@ if __name__ == "__main__":
for date_folder in date_folders:
campaign_date = date_folder.name
if int(campaign_date) <= 20190418:
if int(campaign_date) <= 20190501:
print("Processing", campaign_date)
path_campaign = base_path_campaign / campaign_date / '28m_M600P'
CanopyAnalysis.process_NadirCC_LCCC_LA_AI(path_campaign, GSD)
\ No newline at end of file
CanopyAnalysis.process_NadirCC_LCCC_LA_BBCH30(path_campaign, GSD)
\ No newline at end of file
......@@ -2,7 +2,7 @@ from GroundAerialCoverage import CanopyAnalysis
from pathlib import Path
path_p = Path("P:")
#path_p = Path("/home/luroth/public")
path_p = Path("/home/luroth/public")
if __name__ == "__main__":
......@@ -14,10 +14,10 @@ if __name__ == "__main__":
for date_folder in date_folders:
campaign_date = date_folder.name
if int(campaign_date) <= 20190418:
if int(campaign_date) <= 20190501:
print("Processing", campaign_date)
path_campaign = base_path_campaign / campaign_date / '28m_M600P'
CanopyAnalysis.process_NadirCC_LCCC_LA_AI(path_campaign, campaign_date, GSD)
\ No newline at end of file
CanopyAnalysis.process_NadirCC_LCCC_LA_BBCH30(path_campaign, GSD)
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment