Exemplo n.º 1
0
def smooth_sg(trj: TrajaDataFrame, w: int = None, p: int = 3):
    """Returns ``DataFrame`` of trajectory after Savitzky-Golay filtering.

    Args:
      trj (:class:`~traja.frame.TrajaDataFrame`): Trajectory
      w (int): window size (Default value = None)
      p (int): polynomial order (Default value = 3)

    Returns:
      trj (:class:`~traja.frame.TrajaDataFrame`): Trajectory

    .. doctest::

        >> df = traja.generate()
        >> traja.smooth_sg(df, w=101).head()
                   x          y  time
        0 -11.194803  12.312742  0.00
        1 -10.236337  10.613720  0.02
        2  -9.309282   8.954952  0.04
        3  -8.412910   7.335925  0.06
        4  -7.546492   5.756128  0.08

    """
    if w is None:
        w = p + 3 - p % 2

    if w % 2 != 1:
        raise Exception(f"Invalid smoothing parameter w ({w}): n must be odd")
    _trj = trj.copy()
    _trj.x = signal.savgol_filter(_trj.x, window_length=w, polyorder=p, axis=0)
    _trj.y = signal.savgol_filter(_trj.y, window_length=w, polyorder=p, axis=0)
    _trj = fill_in_traj(_trj)
    return _trj
Exemplo n.º 2
0
def grid_coordinates(
    trj: TrajaDataFrame,
    bins: Union[int, tuple] = None,
    xlim: tuple = None,
    ylim: tuple = None,
    assign: bool = False,
):
    """Returns ``DataFrame`` of trajectory discretized into 2D lattice grid coordinates.
    Args:
        trj (~`traja.frame.TrajaDataFrame`): Trajectory
        bins (tuple or int)
        xlim (tuple)
        ylim (tuple)
        assign (bool): Return updated original dataframe

    Returns:
        trj (~`traja.frame.TrajaDataFrame`): Trajectory is assign=True otherwise pd.DataFrame

    """
    # Drop nan for converting to int
    trj.dropna(subset=["x", "y"], inplace=True)

    xmin = trj.x.min() if xlim is None else xlim[0]
    xmax = trj.x.max() if xlim is None else xlim[1]
    ymin = trj.y.min() if ylim is None else ylim[0]
    ymax = trj.y.max() if ylim is None else ylim[1]

    bins = _bins_to_tuple(trj, bins)

    if not xlim:
        xbin = pd.cut(trj.x, bins[0], labels=False)
    else:
        xmin, xmax = xlim
        xbinarray = np.linspace(xmin, xmax, bins[0])
        xbin = np.digitize(trj.x, xbinarray)
    if not ylim:
        ybin = pd.cut(trj.y, bins[1], labels=False)
    else:
        ymin, ymax = ylim
        ybinarray = np.linspace(ymin, ymax, bins[1])
        ybin = np.digitize(trj.y, ybinarray)

    if assign:
        trj["xbin"] = xbin
        trj["ybin"] = ybin
        return trj
    return pd.DataFrame({"xbin": xbin, "ybin": ybin})
Exemplo n.º 3
0
    def test_dataframe_to_trajadataframe(self):
        df = pd.DataFrame(
            {"x": range(len(self.df)), "y": range(len(self.df))}, index=self.df.index
        )

        tf = TrajaDataFrame(df)
        assert isinstance(df, pd.DataFrame)
        assert isinstance(tf, TrajaDataFrame)
Exemplo n.º 4
0
def angles(trj: TrajaDataFrame, lag: int = 1):
    if lag > 1:
        raise NotImplementedError("Lag must be 1.")
    trj["angle"] = np.rad2deg(np.arccos(np.abs(trj["dx"]) / trj["distance"]))
    # Get heading from angle
    mask = (trj["dx"] > 0) & (trj["dy"] >= 0)
    trj.loc[mask, "heading"] = trj["angle"][mask]
    mask = (trj["dx"] >= 0) & (trj["dy"] < 0)
    trj.loc[mask, "heading"] = -trj["angle"][mask]
    mask = (trj["dx"] < 0) & (trj["dy"] <= 0)
    trj.loc[mask, "heading"] = -(180 - trj["angle"][mask])
    mask = (trj["dx"] <= 0) & (trj["dy"] > 0)
    trj.loc[mask, "heading"] = (180 - trj["angle"])[mask]
    trj["turn_angle"] = trj["heading"].diff()
    # Correction for 360-degree angle range
    trj.loc[trj.turn_angle >= 180, "turn_angle"] -= 360
    trj.loc[trj.turn_angle < -180, "turn_angle"] += 360
Exemplo n.º 5
0
def _resample_time(trj: TrajaDataFrame,
                   step_time: Union[float, int, str],
                   errors="coerce"):
    if not is_datetime_or_timedelta_dtype(trj.index):
        raise Exception(f"{trj.index.dtype} is not datetime or timedelta.")
    try:
        df = trj.resample(step_time).interpolate(method="spline", order=2)
    except ValueError as e:
        if len(e.args
               ) > 0 and "cannot reindex from a duplicate axis" in e.args[0]:
            if errors == "coerce":
                logger.warning("Duplicate time indices, keeping first")
                trj = trj.loc[~trj.index.duplicated(keep="first")]
                df = (trj.resample(step_time).bfill(limit=1).interpolate(
                    method="spline", order=2))
            else:
                logger.error("Error: duplicate time indices")
                raise ValueError("Duplicate values in indices")
    return df
Exemplo n.º 6
0
def from_df(df: pd.DataFrame, xcol=None, ycol=None, time_col=None, **kwargs):
    """Returns a :class:`traja.frame.TrajaDataFrame` from a :class:`pandas DataFrame<pandas.DataFrame>`.

    Args:
      df (:class:`pandas.DataFrame`): Trajectory as pandas ``DataFrame``
      xcol (str)
      ycol (str)
      timecol (str)

    Returns:
      traj_df (:class:`~traja.frame.TrajaDataFrame`): Trajectory

    .. doctest::

        >>> df = pd.DataFrame({'x':[0,1,2],'y':[1,2,3]})
        >>> traja.from_df(df)
           x  y
        0  0  1
        1  1  2
        2  2  3

    """
    traj_df = TrajaDataFrame(df)

    # Identify x and y columns if defined by user
    if xcol and ycol:
        traj_df["x"] = pd.to_numeric(traj_df[xcol], errors="coerce")
        traj_df["y"] = pd.to_numeric(traj_df[ycol], errors="coerce")
    if time_col:
        traj_df[time_col] = pd.to_timedelta(
            traj_df[time_col], unit=kwargs.get("time_units", "s")
        )
        kwargs.update({"time_col": time_col})

    # Initialize metadata
    for var in traj_df._metadata:
        if not hasattr(traj_df, var):
            traj_df.__dict__[var] = None

    # Save additional metadata
    for key, val in kwargs.items():
        traj_df.__dict__[key] = val
    return traj_df
Exemplo n.º 7
0
def smooth_sg(trj: TrajaDataFrame, w: int = None, p: int = 3):
    """Returns``DataFrame`` of trajectory after Savitzky-Golay filtering.

    Args:
      trj (:class:`~traja.frame.TrajaDataFrame`): Trajectory
      w (int): window size (Default value = None)
      p (int): polynomial order (Default value = 3)

    Returns:
      trj (:class:`~traja.frame.TrajaDataFrame`): Trajectory

    """
    if w is None:
        w = p + 3 - p % 2

    if w % 2 != 1:
        raise Exception(f"Invalid smoothing parameter w ({w}): n must be odd")
    trj.x = signal.savgol_filter(trj.x, window_length=w, polyorder=p, axis=0)
    trj.y = signal.savgol_filter(trj.y, window_length=w, polyorder=p, axis=0)
    trj = fill_in_traj(trj)
    return trj
Exemplo n.º 8
0
def calc_heading(trj: TrajaDataFrame):
    """Calculate trajectory heading.

    Args:
      trj (:class:`~traja.frame.TrajaDataFrame`): Trajectory

    Returns:
        heading (:class:`pandas.Series`): heading as a ``Series``

    ..doctest::

        >>> df = traja.TrajaDataFrame({'x':[0,1,2],'y':[1,2,3]})
        >>> traja.calc_heading(df)
        0     NaN
        1    45.0
        2    45.0
        Name: heading, dtype: float64

    """
    if not _has_cols(trj, ["angle"]):
        angle = calc_angle(trj)
    else:
        angle = trj.angle
        if hasattr(angle, "unit"):
            if angle.unit == "radians":
                angle = np.rad2deg(angle)

    dx = trj.x.diff()
    dy = trj.y.diff()
    # Get heading from angle
    mask = (dx > 0) & (dy >= 0)
    trj.loc[mask, "heading"] = angle[mask]
    mask = (dx >= 0) & (dy < 0)
    trj.loc[mask, "heading"] = -angle[mask]
    mask = (dx < 0) & (dy <= 0)
    trj.loc[mask, "heading"] = -(180 - angle[mask])
    mask = (dx <= 0) & (dy > 0)
    trj.loc[mask, "heading"] = 180 - angle[mask]
    return trj.heading
Exemplo n.º 9
0
def resample_time(trj: TrajaDataFrame, step_time: str, new_fps: bool = None):
    """Returns a ``TrajaDataFrame`` resampled to consistent `step_time` intervals.

    Args:
        trj (:class:`~traja.frame.TrajaDataFrame`): Trajectory
        step_time (str): step time interval (eg, '1s')
        new_fps (bool, optional): new fps

    Results:
        trj (:class:`~traja.frame.TrajaDataFrame`): Trajectory


    .. doctest::

        >>> from traja import generate
        >>> from traja.trajectory import resample_time
        >>> df = generate()
        >>> resampled = resample_time(df, '2s')
        >>> resampled.head()
              time          x          y
        0 00:00:00  14.555071 -26.482614
        1 00:00:02  -3.582797  -6.491297
        2 00:00:04  -4.299709  26.937443
        3 00:00:06 -25.337042  42.131848
        4 00:00:08  33.069915  32.780830

    """
    time_col = _get_time_col(trj)
    if time_col is "index" and is_datetime64_any_dtype(trj.index):
        _trj = _resample_time(trj, step_time)
    elif time_col is "index" and is_timedelta64_dtype(trj.index):
        _trj = _resample_time(trj, step_time)
    elif time_col:
        if isinstance(step_time, str):
            try:
                if "." in step_time:
                    raise NotImplementedError("Fractional step time not implemented.")
            except Exception:
                raise NotImplementedError(
                    f"Inferring from time format {step_time} not yet implemented."
                )
        _trj = trj.set_index(time_col)
        _trj.index = pd.to_timedelta(_trj.index, unit="s")
        _trj = _resample_time(_trj, step_time)
        _trj.reset_index(inplace=True)
    else:
        raise NotImplementedError(
            f"Time column ({time_col}) not of expected data type."
        )
    return _trj
Exemplo n.º 10
0
def calc_heading(trj: TrajaDataFrame):
    """Calculate trajectory heading.

    Args:
      assign (bool): (Default value = True)

    Returns:
        heading (:class:`pandas.Series`): heading as a ``Series``

    ..doctest::

        >>> df = traja.TrajaDataFrame({'x':[0,1,2],'y':[1,2,3]})
        >>> traja.calc_heading(df)
        0     NaN
        1    45.0
        2    45.0
        Name: heading, dtype: float64

    """
    if not _has_cols(trj, ["angle"]):
        angle = calc_angle(trj)
    else:
        angle = trj.angle

    dx = trj.x.diff()
    dy = trj.y.diff()
    # Get heading from angle
    mask = (dx > 0) & (dy >= 0)
    trj.loc[mask, "heading"] = angle[mask]
    mask = (dx >= 0) & (dy < 0)
    trj.loc[mask, "heading"] = -angle[mask]
    mask = (dx < 0) & (dy <= 0)
    trj.loc[mask, "heading"] = -(180 - angle[mask])
    mask = (dx <= 0) & (dy > 0)
    trj.loc[mask, "heading"] = 180 - angle[mask]
    return trj.heading
Exemplo n.º 11
0
def polar_bar(
    trj: TrajaDataFrame,
    feature: str = "turn_angle",
    bin_size: int = 2,
    overlap: bool = True,
    ax: Optional[matplotlib.axes.Axes] = None,
    **plot_kws: str,
):
    """Plot polar bar chart.
    Args:
        trj
        feature (str): Options: 'turn_angle', 'heading'
        bins (int): width of bins
        overlap (bool): Overlapping shows all values, if set to false is a histogram

    Returns:
        ax

    """
    DIST_THRESHOLD = 0.001
    # Get displacement

    displacement = traja.trajectory.calc_displacement(trj)
    trj["displacement"] = displacement
    trj = trj.loc[trj.displacement > DIST_THRESHOLD]
    if feature == "turn_angle":
        feature_series = traja.trajectory.calc_turn_angle(trj)
        trj["turn_angle"] = feature_series
        trj.turn_angle = trj.turn_angle.shift(-1)
    elif feature == "heading":
        feature_series = traja.trajectory.calc_heading(trj)
        trj[feature] = feature_series

    trj = trj[pd.notnull(trj[feature])]
    trj = trj[pd.notnull(trj.displacement)]

    assert len(
        trj) > 0, "Dataframe is empty after filtering, check coordinates"

    ax = _polar_bar(
        trj.displacement,
        trj[feature],
        bin_size=bin_size,
        overlap=overlap,
        ax=ax,
        **plot_kws,
    )
    return ax
Exemplo n.º 12
0
def resample_time(trj: TrajaDataFrame,
                  step_time: str,
                  new_fps: Optional[bool] = None):
    """Returns a ``TrajaDataFrame`` resampled to consistent `step_time` intervals.

    ``step_time`` should be expressed as a number-time unit combination, eg "2S" for 2 seconds and “2100L” for 2100 milliseconds.

    Args:
        trj (:class:`~traja.frame.TrajaDataFrame`): Trajectory
        step_time (str): step time interval / offset string (eg, '2S' (seconds), '50L' (milliseconds), '50N' (nanoseconds))
        new_fps (bool, optional): new fps

    Results:
        trj (:class:`~traja.frame.TrajaDataFrame`): Trajectory


    .. doctest::

        >>> from traja import generate, resample_time
        >>> df = generate()
        >>> resampled = resample_time(df, '50L') # 50 milliseconds
        >>> resampled.head()
                                         x         y
        time                                        
        1970-01-01 00:00:00.000   0.000000  0.000000
        1970-01-01 00:00:00.050   0.999571  4.293384
        1970-01-01 00:00:00.100  -1.298510  5.423373
        1970-01-01 00:00:00.150  -6.056916  4.874502
        1970-01-01 00:00:00.200 -10.347759  2.108385
        
    """
    time_col = _get_time_col(trj)
    if time_col == "index" and is_datetime64_any_dtype(trj.index):
        _trj = _resample_time(trj, step_time)
    elif time_col == "index" and is_timedelta64_dtype(trj.index):
        trj.index = pd.to_datetime(trj.index)
        _trj = _resample_time(trj, step_time)
        _trj.index = pd.to_timedelta(_trj.index)
    elif time_col:
        if isinstance(step_time, str):
            try:
                if "." in step_time:
                    raise NotImplementedError(
                        """Fractional step time not implemented.
                          For milliseconds/microseconds/nanoseconds use:
                            L       milliseonds
                            U       microseconds
                            N       nanoseconds
                            eg, step_time='2100L'""")
            except Exception:
                raise NotImplementedError(
                    f"Inferring from time format {step_time} not yet implemented."
                )
        _trj = trj.set_index(time_col)
        time_units = _trj.__dict__.get("time_units", "s")
        _trj.index = pd.to_datetime(_trj.index, unit=time_units)
        _trj = _resample_time(_trj, step_time)
    else:
        raise NotImplementedError(
            f"Time column ({time_col}) not of expected datasets type.")
    return _trj
Exemplo n.º 13
0
def apply_all(trj: TrajaDataFrame, method: Callable, id_col: str, **kwargs):
    """Applies method to all trajectories"""
    return trj.groupby(by=id_col).apply(method, **kwargs)
Exemplo n.º 14
0
def read_file(
    filepath: str,
    id: Optional[str] = None,
    xcol: Optional[str] = None,
    ycol: Optional[str] = None,
    parse_dates: Union[str, bool] = False,
    xlim: Optional[tuple] = None,
    ylim: Optional[tuple] = None,
    spatial_units: str = "m",
    fps: Optional[float] = None,
    **kwargs,
):
    """Convenience method wrapping pandas `read_csv` and initializing metadata.

    Args:
      filepath (str): path to csv file with `x`, `y` and `time` (optional) columns
      id (str): id for trajectory
      xcol (str): name of column containing x coordinates
      ycol (str): name of column containing y coordinates
      parse_dates (Union[list,bool]): The behavior is as follows:
                                    - boolean. if True -> try parsing the index.
                                    - list of int or names. e.g. If [1, 2, 3] -> try parsing columns 1, 2, 3 each as a
                                    separate date column.
      xlim (tuple): x limits (min,max) for plotting
      ylim (tuple): y limits (min,max) for plotting
      spatial_units (str): for plotting (eg, 'cm')
      fps (float): for time calculations
      **kwargs: Additional arguments for :meth:`pandas.read_csv`.

    Returns:
        traj_df (:class:`~traja.main.TrajaDataFrame`): Trajectory

    """
    date_parser = kwargs.pop("date_parser", None)

    # TODO: Set index to first column containing 'time'
    df_test = pd.read_csv(
        filepath, nrows=10, parse_dates=parse_dates, infer_datetime_format=True
    )

    if xcol is not None or ycol is not None:
        if not xcol in df_test or ycol not in df_test:
            raise Exception(f"{xcol} or {ycol} not found as headers.")

    # Strip whitespace
    whitespace_cols = [c for c in df_test if " " in df_test[c].name]
    stripped_cols = {c: lambda x: x.strip() for c in whitespace_cols}
    converters = {**stripped_cols, **kwargs.pop("converters", {})}

    # Downcast to float32 # TODO: Benchmark float32 vs float64 for very big dataset
    float_cols = df_test.select_dtypes(include=[np.float]).columns
    float32_cols = {c: np.float32 for c in float_cols}

    # Convert string columns to sequence_ids
    string_cols = [c for c in df_test if df_test[c].dtype == str]
    category_cols = {c: "category" for c in string_cols}
    dtype = {**float32_cols, **category_cols, **kwargs.pop("dtype", {})}

    # Parse time column if present
    time_cols = [col for col in df_test.columns if "time" in col.lower()]
    time_col = time_cols[0] if time_cols else None

    if parse_dates and not date_parser and time_col:
        # try different parsers
        format_strs = [
            "%Y-%m-%d %H:%M:%S:%f",
            "%Y-%m-%d %H:%M:%S.%f",
            "%Y-%m-%d %H:%M:%S",
        ]
        for format_str in format_strs:
            date_parser = lambda x: pd.datetime.strptime(x, format_str)
            try:
                df_test = pd.read_csv(
                    filepath, date_parser=date_parser, nrows=10, parse_dates=[time_col]
                )
            except ValueError:
                pass
            if is_datetime64_any_dtype(df_test[time_col]):
                break
            elif is_timedelta64_dtype(df_test[time_col]):
                break
            else:
                # No datetime or timestamp column found
                date_parser = None

    if "csv" in filepath:
        trj = pd.read_csv(
            filepath,
            date_parser=date_parser,
            parse_dates=parse_dates or [time_col] if date_parser else False,
            converters=converters,
            dtype=dtype,
            **kwargs,
        )

        # TODO: Replace default column renaming with user option if needed
        if time_col:
            trj.rename(columns={time_col: "time"})
        elif fps is not None:
            time = np.array([x for x in trj.index], dtype=int) / fps
            trj["time"] = time
        else:
            # leave index as int frames
            pass
        if xcol and ycol:
            trj.rename(columns={xcol: "x", ycol: "y"})
    else:
        # TODO: Implement for HDF5 and .npy files.
        raise NotImplementedError("Non-csv's not yet implemented")

    trj = TrajaDataFrame(trj)

    # Set meta properties of TrajaDataFrame
    metadata = dict(
        id=id,
        xlim=xlim,
        spatial_units=spatial_units,
        title=kwargs.get("title", None),
        xlabel=kwargs.get("xlabel", None),
        ylabel=kwargs.get("ylabel", None),
        fps=fps,
    )
    trj.__dict__.update(**metadata)
    return trj
Exemplo n.º 15
0
def read_file(filepath, **kwargs):
    """Convenience method wrapping pandas `read_csv` and initializing metadata.

    Args:
      filepath: 
      **kwargs: 

    Returns:

    """

    xlim = kwargs.pop('xlim', None)
    ylim = kwargs.pop('ylim', None)
    title = kwargs.pop('title', "Trajectory")
    spatial_units = kwargs.pop('spatial_units', 'm')
    xlabel = kwargs.pop('xlabel', f"x ({spatial_units})")
    ylabel = kwargs.pop('ylabel', f"y ({spatial_units})")
    fps = kwargs.pop('fps', None)
    date_parser = kwargs.pop('data_parser', None)

    # TODO: Set index to first column containing 'time'
    df_test = pd.read_csv(filepath, nrows=10, parse_dates=True, infer_datetime_format=True)

    # Strip whitespace
    whitespace_cols = [c for c in df_test if ' ' in df_test[c].name]
    stripped_cols = {c: lambda x:x.strip() for c in whitespace_cols}
    converters = {**stripped_cols, **kwargs.pop('converters',{})}

    # Downcast to float32 # TODO: Benchmark float32 vs float64 for very big datasets
    float_cols = [c for c in df_test if 'float' in df_test[c].dtype]
    float32_cols = {c: np.float32 for c in float_cols}

    # Convert string columns to categories
    string_cols = [c for c in df_test if df_test[c].dtype == str]
    category_cols = {c: 'category' for c in string_cols}
    dtype = {**float32_cols, **category_cols, **kwargs.pop('dtype', {})}

    time_cols = [col for col in df_test.columns if 'time' in col.lower()]

    if 'csv' in filepath:
        trj = pd.read_csv(filepath,
                          date_parser=date_parser,
                          infer_datetime_format=kwargs.pop('infer_datetime_format', True),
                          parse_dates=kwargs.pop('parse_dates', True),
                          converters=converters,
                          dtype=dtype,
                          **kwargs)
        if time_cols:
            time_col = time_cols[0]
            trj.rename(columns={time_col: 'time'})
        else:
            time = (trj.index) / fps
            trj['time'] = time
    else:
        # TODO: Implement for HDF5 and .npy files.
        raise NotImplementedError("Non-csv's not yet implemented")

    trj = TrajaDataFrame(trj)
    # Set meta properties of TrajaDataFrame
    trj.xlim = xlim
    trj.ylim = ylim
    trj.spatial_units = spatial_units
    trj.title = title
    trj.xlabel = xlabel
    trj.ylabel = ylabel
    trj.fps = fps
    return trj
Exemplo n.º 16
0
def _resample_time(trj: TrajaDataFrame, step_time: Union[float, int]):
    if not is_datetime_or_timedelta_dtype(trj.index):
        raise Exception(f"{trj.index.dtype} is not datetime or timedelta.")
    return trj.resample(step_time).agg({"x": np.mean, "y": np.mean})