Skip to content
Snippets Groups Projects
Verified Commit 92e370c6 authored by Marco Aceti's avatar Marco Aceti
Browse files

Add 'timetable' stat

parent bfb7d96f
No related branches found
No related tags found
No related merge requests found
......@@ -26,7 +26,7 @@ from dateparser import parse
from joblib import Parallel, delayed
from pandas.core.groupby.generic import DataFrameGroupBy
from src.analysis import groupby, stat, trajectories_map
from src.analysis import groupby, stat, timetable, trajectories_map
from src.analysis.filter import *
from src.analysis.load_data import read_station_csv, read_train_csv, tag_lines
......@@ -84,9 +84,16 @@ def register_args(parser: argparse.ArgumentParser):
"day_train_count",
"trajectories_map",
"detect_lines",
"timetable",
),
default="describe",
)
parser.add_argument(
"--timetable-collapse",
help="collapse the train stop times in the graph, relative to the first (only for 'timetable' stat). Defaults to False.",
action=argparse.BooleanOptionalAction,
default=False,
)
parser.add_argument(
"station_csv",
help="exported station CSV",
......@@ -172,11 +179,23 @@ def main(args: argparse.Namespace):
stat.delay_boxplot(df)
elif args.stat == "day_train_count":
stat.day_train_count(df)
elif args.stat == "trajectories_map":
if not isinstance(df, pd.DataFrame):
raise ValueError("can't use trajectories_map with unaggregated data")
if args.stat in [
"trajectories_map",
"detect_lines",
"timetable",
] and not isinstance(df, pd.DataFrame):
raise ValueError(f"can't use {args.stat} with unaggregated data")
assert isinstance(df, pd.DataFrame)
if args.stat == "trajectories_map":
trajectories_map.build_map(stations, df)
elif args.stat == "detect_lines":
if not isinstance(df, pd.DataFrame):
raise ValueError("can't use detect_lines with unaggregated data")
stat.detect_lines(df, stations)
elif args.stat == "timetable":
if not timetable.same_line(df):
raise ValueError(
f"can't use timetable if --railway-lines filter is not used"
)
timetable.timetable_graph(df, stations, args.timetable_collapse)
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import pandas as pd
import timple
from src.const import TIMEZONE, TIMEZONE_GMT
def same_line(df: pd.DataFrame) -> bool:
"""Check if the trains in the provided DataFrame are ALL on the same line
Args:
df (pd.DataFrame): the trains to check
Return:
bool: True if the trains are all on the same line, False otherwise
"""
return df.nunique().line == 1
def timetable_train(train: pd.DataFrame, expected: bool = False, collapse: bool = True):
"""Generate a timetable graph of a train
Args:
train (pd.DataFrame): the train stop data to consider
expected (bool, optional): determines whatever to consider the 'expected' or 'actual' arrival/departure times. Defaults to False.
collapse (bool, optional): determines whatever to _collapse_ the times in the graph, relative to the first. Defaults to True.
"""
if collapse:
train.value -= train.value.min()
train_f = train.loc[
train.variable.str.endswith("expected" if expected else "actual")
]
plt.plot(
train_f.value,
train_f.long_name,
"ko" if expected else "o",
linestyle="-" if expected else "--",
linewidth=3 if expected else 2,
label=f"{train.iloc[0].category} {train.iloc[0].number}"
if not expected
else "expected",
zorder=10 if expected else 5,
)
def timetable_graph(trains: pd.DataFrame, st: pd.DataFrame, collapse: bool = True):
"""Generate a timetable graph of trains in a line.
Args:
trains (pd.DataFrame): the train stop data to consider
st (pd.DataFrame): the station data
collapse (bool, optional): determines whatever to _collapse_ the times in the graph, relative to the first. Defaults to True.
"""
tmpl = timple.Timple()
tmpl.enable()
trains_j = (
trains.sort_values(by="stop_number")
.join(st, on="stop_station_code")
.reset_index(drop=True)
)
trains_m = (
pd.melt(
trains_j,
id_vars=[
"long_name",
"stop_number",
"train_hash",
"category",
"number",
"origin",
],
value_vars=[
"departure_expected",
"departure_actual",
"arrival_expected",
"arrival_actual",
],
)
.sort_values(["stop_number", "variable"])
.dropna()
)
# expected
if collapse:
for origin in trains_m.origin.unique():
train = list(trains_m.loc[trains_m.origin == origin].groupby("train_hash"))[0][1] # fmt: skip
timetable_train(train, True)
# actual
for _, train in trains_m.groupby("train_hash"):
timetable_train(train, False, collapse)
plt.title(trains.iloc[0].line)
plt.ylabel("Station")
plt.xlabel("Time")
ax = plt.gca()
ax.invert_yaxis()
ax.xaxis.set_major_formatter(mdates.DateFormatter("%H:%M", TIMEZONE if not collapse else TIMEZONE_GMT)) # type: ignore
plt.show()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment