Add path generation functionality.

2026-02-02 14:33:09 +01:00 · 2026-01-27 15:18:21 +01:00
parent afabff6e73
commit bb2f81fc20
6 changed files with 253 additions and 0 deletions
--- a/src/pg_rad/init.py
+++ b/src/pg_rad/init.py
--- a/src/pg_rad/configs/logging.yml
+++ b/src/pg_rad/configs/logging.yml
@ -0,0 +1,15 @@
+version: 1
+disable_existing_loggers: false
+formatters:
+  simple:
+    format: '%(asctime)s - %(levelname)s: %(message)s'
+handlers:
+  stdout:
+    class: logging.StreamHandler
+    formatter: simple
+    stream: ext://sys.stdout
+loggers:
+  root:
+    level: INFO
+    handlers:
+      - stdout
--- a/src/pg_rad/dataloader.py
+++ b/src/pg_rad/dataloader.py
@ -0,0 +1,26 @@
+import pandas as pd
+
+from pg_rad.logger import setup_logger
+from pg_rad.exceptions import DataLoadError, InvalidCSVError
+
+logger = setup_logger(__name__)
+
+def load_data(filename: str) -> pd.DataFrame:
+    logger.debug(f"Attempting to load data from {filename}")
+
+    try:
+        df = pd.read_csv(filename, delimiter=',')
+
+    except FileNotFoundError as e:
+        logger.error(f"File not found: {filename}")
+        raise DataLoadError(f"File does not exist: {filename}") from e
+
+    except pd.errors.ParserError as e:
+        logger.error(f"Invalid CSV format: {filename}")
+        raise InvalidCSVError(f"Invalid CSV file: {filename}") from e
+
+    except Exception as e:
+        logger.exception(f"Unexpected error while loading {filename}")
+        raise DataLoadError("Unexpected error while loading data") from e
+
+    return df
--- a/src/pg_rad/exceptions.py
+++ b/src/pg_rad/exceptions.py
@ -0,0 +1,8 @@
+class ConvergenceError(Exception):
+    """Raised when an algorithm fails to converge."""
+
+class DataLoadError(Exception):
+    """Base class for data loading errors."""
+
+class InvalidCSVError(DataLoadError):
+    """Raised when a file is not a valid CSV."""
--- a/src/pg_rad/logger.py
+++ b/src/pg_rad/logger.py
@ -0,0 +1,17 @@
+import logging
+import logging.config
+import pathlib
+
+import yaml
+
+def setup_logger(name):
+    logger = logging.getLogger(name)
+
+    base_dir = pathlib.Path(__file__).resolve().parent
+    config_file = base_dir / "configs" / "logging.yml"
+
+    with open(config_file) as f:
+        config = yaml.safe_load(f)
+    
+    logging.config.dictConfig(config)
+    return logger
--- a/src/pg_rad/path.py
+++ b/src/pg_rad/path.py
@ -0,0 +1,187 @@
+from collections.abc import Sequence
+import math
+
+from matplotlib import pyplot as plt
+import numpy as np
+import pandas as pd
+import piecewise_regression
+
+from pg_rad.exceptions import ConvergenceError
+from pg_rad.logger import setup_logger
+
+logger = setup_logger(__name__)
+
+class PathSegment:
+    def __init__(self, a: tuple[float, float], b: tuple[float, float]):
+        """_A straight Segment of a Path, from (x_a, y_a) to (x_b, y_b)._
+
+        Args:
+            a (tuple[float, float]): _The starting point (x_a, y_a)._
+            b (tuple[float, float]): _The final point (x_b, y_b)._
+        """        
+        self.a = a
+        self.b = b
+
+    def get_length(self) -> float:
+        return math.dist(self.a, self.b)
+    
+    length = property(get_length)
+
+    def __str__(self) -> str:
+        return str(f"({self.a}, {self.b})")
+    
+    def __getitem__(self, index) -> float:
+        if index == 0:
+            return self.a
+        elif index == 1:
+            return self.b
+        else:
+            raise IndexError
+
+class Path:
+    def __init__(
+            self,
+            coord_list: Sequence[tuple[float, float]],
+            z: float = 0,
+            simplify_path = False
+                 ):
+        """Construct a path of sequences based on a list of coordinates.
+
+        Args:
+            coord_list (Sequence[tuple[float, float]]): _description_
+            z (float, optional): _description_. Defaults to 0.
+
+        Raises:
+            ValueError: _description_
+        """        
+        
+        if len(coord_list) < 2:
+            raise ValueError("Must provide at least two coordinates as a list of tuples, e.g. [(x1, y1), (x2, y2)]")
+
+        x, y = tuple(zip(*coord_list))
+    
+        if simplify_path:
+            try:
+                x, y = piecewise_regression_on_path(list(x), list(y))
+            except ConvergenceError:
+                logger.warning("Continuing without simplifying path.")
+
+        self.x_list = list(x)
+        self.y_list = list(y)
+
+        coord_list = list(zip(x, y))
+
+        self.segments = [PathSegment(i, ip1) for i, ip1 in zip(coord_list, coord_list[1:])]
+
+        self.z = z
+
+    def get_length(self) -> float:
+        return sum([s.length for s in self.segments])
+    
+    length = property(get_length)
+
+    def __getitem__(self, index) -> PathSegment:
+        return self.segments[index]
+
+    def __str__(self) -> str:
+        return str([str(s) for s in self.segments])
+    
+    def plot(self, **kwargs):
+        """
+        Plot the path using matplotlib.
+        """  
+        plt.plot(self.x_list, self.y_list, **kwargs)
+
+def piecewise_regression_on_path(
+        x: Sequence[float],
+        y: Sequence[float],
+        keep_endpoints_equal: bool = False,
+        n_breakpoints: int = 3
+        ):
+    """_Take a Path object and return a piece-wise linear approximated Path._
+
+    This function uses the `piecewise_regression` package. From a full set of
+    coordinate pairs, the function fits linear sections, automatically finding
+    the number of breakpoints and their positions.
+
+    On why the default value of n_breakpoints is 3, from the `piecewise_regression`
+    docs:
+    "If you do not have (or do not want to use) initial guesses for the number
+    of breakpoints, you can set it to n_breakpoints=3, and the algorithm will
+    randomly generate start_values. With a 50% chance, the bootstrap restarting
+    algorithm will either use the best currently converged breakpoints or
+    randomly generate new start_values, escaping the local optima in two ways in
+    order to find better global optima."
+
+    Args:
+        x (Sequence[float]): _Full list of x coordinates._
+        y (Sequence[float]): _Full list of y coordinates._
+        keep_endpoints_equal (bool, optional): _Whether or not to force start
+        and end to be exactly equal to the original. This will worsen the linear
+        approximation at the beginning and end of path. Defaults to False._
+        n_breakpoints (int, optional): _Number of breakpoints. Defaults to 3._
+
+    Returns:
+        x (Sequence[float]): _Reduced list of x coordinates._
+        y (Sequence[float]): _Reduced list of y coordinates._
+
+    Reference:
+        Pilgrim, C., (2021). piecewise-regression (aka segmented regression) in Python. Journal of Open Source Software, 6(68), 3859, https://doi.org/10.21105/joss.03859.
+    """
+    
+    logger.debug(f"Attempting piecewise regression on path.")
+
+    pw_fit = piecewise_regression.Fit(x, y, n_breakpoints=n_breakpoints)
+    pw_res = pw_fit.get_results()
+
+    if pw_res == None:
+        logger.error("Piecewise regression failed to converge.")
+        raise ConvergenceError("Piecewise regression failed to converge.") 
+    
+    est = pw_res['estimates']
+
+    # extract and sort breakpoints
+    breakpoints_x = sorted(
+        v['estimate'] for k, v in est.items() if k.startswith('breakpoint')
+    )
+ 
+    x_points = [x[0]] + breakpoints_x + [x[-1]]
+
+    y_points = pw_fit.predict(x_points)
+
+    if keep_endpoints_equal:
+        logger.debug("Forcing endpoint equality.")
+        y_points[0] = y[0]
+        y_points[-1] = y[-1]
+
+    logger.info(
+        f"Piecewise regression reduced path from {len(x)-1} to {len(x_points)-1} segments."
+    )
+
+    return x_points, y_points
+
+def path_from_RT90(
+        df: pd.DataFrame,
+        east_col: str = "East",
+        north_col: str = "North",
+        **kwargs
+        ) -> Path:
+    
+    """_Construct a path from East and North formatted coordinates (RT90) in a Pandas DataFrame._
+
+    Args:
+        df (pd.DataFrame): _DataFrame containing at least the two columns noted in the cols argument._
+        east_col (str): _The column name for the East coordinates._
+        north_col (str): _The column name for the North coordinates._
+
+    Returns:
+        Path: _A Path object built from the aquisition coordinates in the DataFrame._
+    """    
+    
+    east_arr = np.array(df[east_col]) - min(df[east_col])
+    north_arr = np.array(df[north_col]) - min(df[north_col])
+
+    coord_pairs = list(zip(east_arr, north_arr))
+
+    path = Path(coord_pairs, **kwargs)
+    return path