Commit 09d16817 authored by Sebastian Hörl's avatar Sebastian Hörl
Browse files

Update scripts

parent 64c3e18b
......@@ -38,7 +38,7 @@
"df_counts = gpd.read_file(\"astra_count_stations.geojson\")[[\"id\", \"name\", \"personal_count\", \"geometry\"]]\n",
"df_counts = df_counts.rename(columns = {\"personal_count\": \"reference\"})\n",
"\n",
"df_flows = gpd.read_file(\"/run/media/sebastian/shoerl_data/astra1802/analysis/base10_flows.shp\")[[\"link_id\", \"day_pcu\", \"geometry\"]]\n",
"df_flows = gpd.read_file(\"/run/media/sebastian/shoerl_data/switzerland_freight_w_departures_1pm/flow.shp\")[[\"link_id\", \"day_pcu\", \"geometry\"]]\n",
"df_flows = df_flows.rename(columns = {\"day_pcu\": \"simulation\"})\n",
"df_flows[\"simulation\"] *= SCALING_FACTOR"
]
......
%% Cell type:code id: tags:
``` python
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import shapely.geometry as geo
import geopandas as gpd
from tqdm import tqdm_notebook
%matplotlib inline
```
%% Cell type:code id: tags:
``` python
SCALING_FACTOR = 1
SCALING_FACTOR *= 2 # ASTRA gives count in both directions
SCALING_FACTOR *= 10 # Sample upscaling for MATSim simulation
SCALING_FACTOR *= 0.92 # Correction factor
MATCHING_RADIUS = 100
```
%% Cell type:code id: tags:
``` python
df_counts = gpd.read_file("astra_count_stations.geojson")[["id", "name", "personal_count", "geometry"]]
df_counts = df_counts.rename(columns = {"personal_count": "reference"})
df_flows = gpd.read_file("/run/media/sebastian/shoerl_data/astra1802/analysis/base10_flows.shp")[["link_id", "day_pcu", "geometry"]]
df_flows = gpd.read_file("/run/media/sebastian/shoerl_data/switzerland_freight_w_departures_1pm/flow.shp")[["link_id", "day_pcu", "geometry"]]
df_flows = df_flows.rename(columns = {"day_pcu": "simulation"})
df_flows["simulation"] *= SCALING_FACTOR
```
%% Cell type:code id: tags:
``` python
factors = [1.0] # This is just for calibration, for production leave at 1.0
scores = []
for factor in tqdm_notebook(factors):
df_match = df_counts.copy()
df_match["geometry"] = df_match["geometry"].buffer(MATCHING_RADIUS)
df_match = gpd.sjoin(df_match, df_flows, op = "intersects", how = "left").drop(columns = ["index_right"])
df_match["delta"] = factor * df_match["simulation"] - df_match["reference"]
df_match["abs_delta"] = np.abs(df_match["delta"])
df_match["geometry"] = df_match["geometry"].centroid
df_match = df_match.sort_values(by = ["id", "abs_delta"])
df_match = df_match.drop_duplicates("id")
scores.append(df_match["abs_delta"].sum())
if len(factors) > 1:
plt.plot(factors, scores)
```
%%%% Output: display_data
%% Cell type:code id: tags:
``` python
dummy = df_match.loc[df_match["id"] == 2].copy()
dummy["id"] = -1
dummy["delta"] = np.max(np.abs(df_match["delta"]))
df_match = pd.concat([df_match, dummy])
dummy = df_match.loc[df_match["id"] == 2].copy()
dummy["id"] = -2
dummy["delta"] = -np.max(np.abs(df_match["delta"]))
df_match = pd.concat([df_match, dummy])
df_match.to_file("match.geojson", driver = "GeoJSON")
```
%% Cell type:code id: tags:
``` python
plt.figure(dpi = 120, figsize = (6,4))
plt.loglog(df_match["reference"], df_match["simulation"], 'x')
plt.loglog([0, 120000], [0, 120000], 'k--')
x = np.linspace(0, 120000, 100)
plt.fill_between(x, 0.8 * x, 1.2 * x, color = 'k', alpha = 0.2)
plt.grid()
plt.xlabel("Reference counts")
plt.ylabel("Scaled simulation counts")
```
%%%% Output: execute_result
Text(0, 0.5, 'Scaled simulation counts')
%%%% Output: display_data
[Hidden Image Output]
%% Cell type:code id: tags:
``` python
df_comparison = df_match.copy()
df_comparison = df_comparison.rename(columns = { "geometry": "count_geometry" })
df_centroids = df_flows.copy()
df_centroids["geometry"] = df_centroids["geometry"].centroid
df_centroids = df_centroids.rename(columns = { "geometry": "link_geometry" })
df_comparison = pd.merge(df_comparison, df_centroids[["link_id", "link_geometry"]], on = "link_id")
df_comparison["geometry"] = [geo.LineString([a, b]) for a, b in zip(df_comparison["count_geometry"], df_comparison["link_geometry"])]
df_comparison["dist"] = [a.distance(b) for a, b in zip(df_comparison["count_geometry"], df_comparison["link_geometry"])]
del df_comparison["link_geometry"]
del df_comparison["count_geometry"]
df_comparison = gpd.GeoDataFrame(df_comparison, crs = {"init": "EPSG:2056"})
df_comparison.to_file("comparison.geojson", driver = "GeoJSON")
```
......
......@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
......@@ -16,7 +16,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
......@@ -35,7 +35,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
......@@ -48,7 +48,7 @@
"df_counts[\"id\"] = df_counts[\"id\"].fillna(method = \"pad\").astype(np.int)\n",
"df_counts = df_counts[df_counts[\"slot\"].str.startswith(\"DWV\")]\n",
"\n",
"df_counts[\"freight\"] = df_counts[\"slot\"].isin([\"DWV SV\", \"DWV SGF\"])\n",
"df_counts[\"freight\"] = df_counts[\"slot\"] == \"DWV SGF\"\n",
"df_counts = df_counts.drop(columns = [\"slot\"])\n",
"df_counts = df_counts.groupby([\"id\", \"freight\"]).sum().reset_index()\n",
"\n",
......@@ -66,18 +66,9 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/sebastian/.local/lib/python3.6/site-packages/geopandas/io/file.py:108: FionaDeprecationWarning: Use fiona.Env() instead.\n",
" with fiona.drivers():\n"
]
}
],
"outputs": [],
"source": [
"df = pd.concat([df_locations, df_counts], axis = 1).dropna()\n",
"df.reset_index().to_file(\"astra_count_stations.geojson\", driver = \"GeoJSON\")"
......
%% Cell type:code id: tags:
``` python
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import shapely.geometry as geo
import geopandas as gpd
%matplotlib inline
```
%% Cell type:code id: tags:
``` python
df_locations = pd.read_excel("messstellenverzeichnis.xlsx", skiprows = 6).rename({
"Zählstellen-Bezeichnung": "name",
"Nr": "id", "Koordinate Ost": "x", "Koordinate Nord": "y"
}, axis = 1)[["id", "name", "x", "y"]].set_index("id")
df_locations["geometry"] = [geo.Point(*p) for p in zip(df_locations["x"], df_locations["y"])]
df_locations = gpd.GeoDataFrame(df_locations)
df_locations.crs = {"init": "EPSG:21781"}
df_locations = df_locations.to_crs({"init": "EPSG:2056"})
df_locations = df_locations[["name", "geometry"]]
```
%% Cell type:code id: tags:
``` python
df_counts = pd.read_excel(
"Jahresergebnisse-2017.xlsx",
skiprows = 7, sheet_name = "Klassendaten", usecols = [0, 5, 23],
names = ["id", "slot", "count"]
)
df_counts["id"] = df_counts["id"].fillna(method = "pad").astype(np.int)
df_counts = df_counts[df_counts["slot"].str.startswith("DWV")]
df_counts["freight"] = df_counts["slot"].isin(["DWV SV", "DWV SGF"])
df_counts["freight"] = df_counts["slot"] == "DWV SGF"
df_counts = df_counts.drop(columns = ["slot"])
df_counts = df_counts.groupby(["id", "freight"]).sum().reset_index()
df_total = df_counts[~df_counts["freight"]][["id", "count"]].rename({"count": "total_count"}, axis = 1)
df_freight = df_counts[df_counts["freight"]][["id", "count"]].rename({"count": "freight_count"}, axis = 1)
df_counts = pd.merge(df_total, df_freight, how = "left", on = "id")
assert(len(df_counts) == len(df_counts["id"].unique()))
df_counts["personal_count"] = df_counts["total_count"] - df_counts["freight_count"]
df_counts = df_counts[df_counts["total_count"] > 0]
df_counts = df_counts.set_index("id")
```
%% Cell type:code id: tags:
``` python
df = pd.concat([df_locations, df_counts], axis = 1).dropna()
df.reset_index().to_file("astra_count_stations.geojson", driver = "GeoJSON")
```
%%%% Output: stream
/home/sebastian/.local/lib/python3.6/site-packages/geopandas/io/file.py:108: FionaDeprecationWarning: Use fiona.Env() instead.
with fiona.drivers():
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment