graphomotor.features.distance

Feature extraction module for distance-based metrics in spiral drawing data.

  1"""Feature extraction module for distance-based metrics in spiral drawing data."""
  2
  3import numpy as np
  4from scipy import stats
  5from scipy.spatial import distance
  6
  7from graphomotor.core import models
  8
  9
 10def _segment_data(data: np.ndarray, start_prop: float, end_prop: float) -> np.ndarray:
 11    """Extract segment of data based on given proportion range.
 12
 13    Args:
 14        data: Data to segment.
 15        start_prop: Start proportion, [0-1).
 16        end_prop: End proportion, (0-1].
 17
 18    Returns:
 19        Segmented data.
 20    """
 21    if not (0 <= start_prop < end_prop <= 1):
 22        raise ValueError(
 23            "Proportions must be between 0 and 1, "
 24            "and start_prop must be less than end_prop"
 25        )
 26    num_samples = len(data)
 27    start_idx = int(start_prop * num_samples)
 28    end_idx = int(end_prop * num_samples)
 29    return data[start_idx:end_idx]
 30
 31
 32def calculate_hausdorff_metrics(
 33    spiral: models.Spiral, reference_spiral: np.ndarray
 34) -> dict[str, float]:
 35    """Calculate Hausdorff distance metrics for a spiral object.
 36
 37    This function computes multiple features based on the Hausdorff distance between a
 38    drawn spiral and a reference (ideal) spiral, as described in [1]. Implementation
 39    is based on the original R script provided with the publication. The Hausdorff
 40    distance measures the maximum distance of a set to the nearest point in the other
 41    set. This metric and its derivatives capture various aspects of the spatial
 42    relationship between the drawn and reference spirals.
 43
 44    Calculated features include:
 45
 46    - **hausdorff_distance_maximum**: The maximum of the directed Hausdorff distances
 47      between the data points and the reference data points
 48    - **hausdorff_distance_sum**: The sum of the directed Hausdorff distances
 49    - **hausdorff_distance_sum_per_second**: The sum of the directed Hausdorff distances
 50      divided by the total drawing duration
 51    - **hausdorff_distance_interquartile_range**: The interquartile range of the
 52      directed Hausdorff distances
 53    - **hausdorff_distance_start_segment_maximum_normalized**: The maximum of the
 54      directed Hausdorff distances between the beginning segment (0% to 25%) of
 55      data points and the beginning segment of reference data points divided by
 56      the number of data points in the beginning segment
 57    - **hausdorff_distance_end_segment_maximum_normalized**: The maximum of the directed
 58      Hausdorff distances in the ending segment (75% to 100%) of data points and
 59      the ending segment of reference data points divided by the number of data
 60      points in the ending segment
 61    - **hausdorff_distance_middle_segment_maximum**: The maximum of the directed
 62      Hausdorff distances in the middle segment (15% to 85%) of data points and
 63      the ending segment of reference data points (this metric is not divided by
 64      the number of data points in the middle segment unlike previous ones)
 65    - **hausdorff_distance_middle_segment_maximum_per_second**: The maximum of the
 66      directed Hausdorff distances in the middle segment divided by the total
 67      drawing duration
 68
 69    Args:
 70        spiral: Spiral object with drawing data.
 71        reference_spiral: Reference spiral data for comparison.
 72
 73    Returns:
 74        Dictionary containing Hausdorff distance-based features.
 75
 76    References:
 77        [1] Messan, Komi S et al. “Assessment of Smartphone-Based Spiral Tracing in
 78            Multiple Sclerosis Reveals Intra-Individual Reproducibility as a Major
 79            Determinant of the Clinical Utility of the Digital Test.” Frontiers in
 80            medical technology vol. 3 714682. 1 Feb. 2022, doi:10.3389/fmedt.2021.714682
 81    """
 82    spiral_data = np.column_stack((spiral.data["x"].values, spiral.data["y"].values))
 83
 84    total_duration = spiral.data["seconds"].iloc[-1]
 85
 86    start_segment_data = _segment_data(spiral_data, 0.0, 0.25)
 87    end_segment_data = _segment_data(spiral_data, 0.75, 1.0)
 88    mid_segment_data = _segment_data(spiral_data, 0.15, 0.85)
 89
 90    start_segment_ref = _segment_data(reference_spiral, 0.0, 0.25)
 91    end_segment_ref = _segment_data(reference_spiral, 0.75, 1.0)
 92    mid_segment_ref = _segment_data(reference_spiral, 0.15, 0.85)
 93
 94    haus_dist = [
 95        distance.directed_hausdorff(spiral_data, reference_spiral)[0],
 96        distance.directed_hausdorff(reference_spiral, spiral_data)[0],
 97    ]
 98    haus_dist_start = [
 99        distance.directed_hausdorff(start_segment_data, start_segment_ref)[0],
100        distance.directed_hausdorff(start_segment_ref, start_segment_data)[0],
101    ]
102    haus_dist_end = [
103        distance.directed_hausdorff(end_segment_data, end_segment_ref)[0],
104        distance.directed_hausdorff(end_segment_ref, end_segment_data)[0],
105    ]
106    haus_dist_mid = [
107        distance.directed_hausdorff(mid_segment_data, mid_segment_ref)[0],
108        distance.directed_hausdorff(mid_segment_ref, mid_segment_data)[0],
109    ]
110
111    return {
112        "hausdorff_distance_maximum": np.max(haus_dist),
113        "hausdorff_distance_sum": np.sum(haus_dist),
114        "hausdorff_distance_sum_per_second": np.sum(haus_dist) / total_duration,
115        "hausdorff_distance_interquartile_range": stats.iqr(haus_dist),
116        "hausdorff_distance_start_segment_maximum_normalized": np.max(haus_dist_start)
117        / len(start_segment_data),
118        "hausdorff_distance_end_segment_maximum_normalized": np.max(haus_dist_end)
119        / len(end_segment_data),
120        "hausdorff_distance_middle_segment_maximum": np.max(haus_dist_mid),
121        "hausdorff_distance_middle_segment_maximum_per_second": np.max(haus_dist_mid)
122        / total_duration,
123    }
def calculate_hausdorff_metrics( spiral: graphomotor.core.models.Spiral, reference_spiral: numpy.ndarray) -> dict[str, float]:
 33def calculate_hausdorff_metrics(
 34    spiral: models.Spiral, reference_spiral: np.ndarray
 35) -> dict[str, float]:
 36    """Calculate Hausdorff distance metrics for a spiral object.
 37
 38    This function computes multiple features based on the Hausdorff distance between a
 39    drawn spiral and a reference (ideal) spiral, as described in [1]. Implementation
 40    is based on the original R script provided with the publication. The Hausdorff
 41    distance measures the maximum distance of a set to the nearest point in the other
 42    set. This metric and its derivatives capture various aspects of the spatial
 43    relationship between the drawn and reference spirals.
 44
 45    Calculated features include:
 46
 47    - **hausdorff_distance_maximum**: The maximum of the directed Hausdorff distances
 48      between the data points and the reference data points
 49    - **hausdorff_distance_sum**: The sum of the directed Hausdorff distances
 50    - **hausdorff_distance_sum_per_second**: The sum of the directed Hausdorff distances
 51      divided by the total drawing duration
 52    - **hausdorff_distance_interquartile_range**: The interquartile range of the
 53      directed Hausdorff distances
 54    - **hausdorff_distance_start_segment_maximum_normalized**: The maximum of the
 55      directed Hausdorff distances between the beginning segment (0% to 25%) of
 56      data points and the beginning segment of reference data points divided by
 57      the number of data points in the beginning segment
 58    - **hausdorff_distance_end_segment_maximum_normalized**: The maximum of the directed
 59      Hausdorff distances in the ending segment (75% to 100%) of data points and
 60      the ending segment of reference data points divided by the number of data
 61      points in the ending segment
 62    - **hausdorff_distance_middle_segment_maximum**: The maximum of the directed
 63      Hausdorff distances in the middle segment (15% to 85%) of data points and
 64      the ending segment of reference data points (this metric is not divided by
 65      the number of data points in the middle segment unlike previous ones)
 66    - **hausdorff_distance_middle_segment_maximum_per_second**: The maximum of the
 67      directed Hausdorff distances in the middle segment divided by the total
 68      drawing duration
 69
 70    Args:
 71        spiral: Spiral object with drawing data.
 72        reference_spiral: Reference spiral data for comparison.
 73
 74    Returns:
 75        Dictionary containing Hausdorff distance-based features.
 76
 77    References:
 78        [1] Messan, Komi S et al. “Assessment of Smartphone-Based Spiral Tracing in
 79            Multiple Sclerosis Reveals Intra-Individual Reproducibility as a Major
 80            Determinant of the Clinical Utility of the Digital Test.” Frontiers in
 81            medical technology vol. 3 714682. 1 Feb. 2022, doi:10.3389/fmedt.2021.714682
 82    """
 83    spiral_data = np.column_stack((spiral.data["x"].values, spiral.data["y"].values))
 84
 85    total_duration = spiral.data["seconds"].iloc[-1]
 86
 87    start_segment_data = _segment_data(spiral_data, 0.0, 0.25)
 88    end_segment_data = _segment_data(spiral_data, 0.75, 1.0)
 89    mid_segment_data = _segment_data(spiral_data, 0.15, 0.85)
 90
 91    start_segment_ref = _segment_data(reference_spiral, 0.0, 0.25)
 92    end_segment_ref = _segment_data(reference_spiral, 0.75, 1.0)
 93    mid_segment_ref = _segment_data(reference_spiral, 0.15, 0.85)
 94
 95    haus_dist = [
 96        distance.directed_hausdorff(spiral_data, reference_spiral)[0],
 97        distance.directed_hausdorff(reference_spiral, spiral_data)[0],
 98    ]
 99    haus_dist_start = [
100        distance.directed_hausdorff(start_segment_data, start_segment_ref)[0],
101        distance.directed_hausdorff(start_segment_ref, start_segment_data)[0],
102    ]
103    haus_dist_end = [
104        distance.directed_hausdorff(end_segment_data, end_segment_ref)[0],
105        distance.directed_hausdorff(end_segment_ref, end_segment_data)[0],
106    ]
107    haus_dist_mid = [
108        distance.directed_hausdorff(mid_segment_data, mid_segment_ref)[0],
109        distance.directed_hausdorff(mid_segment_ref, mid_segment_data)[0],
110    ]
111
112    return {
113        "hausdorff_distance_maximum": np.max(haus_dist),
114        "hausdorff_distance_sum": np.sum(haus_dist),
115        "hausdorff_distance_sum_per_second": np.sum(haus_dist) / total_duration,
116        "hausdorff_distance_interquartile_range": stats.iqr(haus_dist),
117        "hausdorff_distance_start_segment_maximum_normalized": np.max(haus_dist_start)
118        / len(start_segment_data),
119        "hausdorff_distance_end_segment_maximum_normalized": np.max(haus_dist_end)
120        / len(end_segment_data),
121        "hausdorff_distance_middle_segment_maximum": np.max(haus_dist_mid),
122        "hausdorff_distance_middle_segment_maximum_per_second": np.max(haus_dist_mid)
123        / total_duration,
124    }

Calculate Hausdorff distance metrics for a spiral object.

This function computes multiple features based on the Hausdorff distance between a drawn spiral and a reference (ideal) spiral, as described in [1]. Implementation is based on the original R script provided with the publication. The Hausdorff distance measures the maximum distance of a set to the nearest point in the other set. This metric and its derivatives capture various aspects of the spatial relationship between the drawn and reference spirals.

Calculated features include:

  • hausdorff_distance_maximum: The maximum of the directed Hausdorff distances between the data points and the reference data points
  • hausdorff_distance_sum: The sum of the directed Hausdorff distances
  • hausdorff_distance_sum_per_second: The sum of the directed Hausdorff distances divided by the total drawing duration
  • hausdorff_distance_interquartile_range: The interquartile range of the directed Hausdorff distances
  • hausdorff_distance_start_segment_maximum_normalized: The maximum of the directed Hausdorff distances between the beginning segment (0% to 25%) of data points and the beginning segment of reference data points divided by the number of data points in the beginning segment
  • hausdorff_distance_end_segment_maximum_normalized: The maximum of the directed Hausdorff distances in the ending segment (75% to 100%) of data points and the ending segment of reference data points divided by the number of data points in the ending segment
  • hausdorff_distance_middle_segment_maximum: The maximum of the directed Hausdorff distances in the middle segment (15% to 85%) of data points and the ending segment of reference data points (this metric is not divided by the number of data points in the middle segment unlike previous ones)
  • hausdorff_distance_middle_segment_maximum_per_second: The maximum of the directed Hausdorff distances in the middle segment divided by the total drawing duration
Arguments:
  • spiral: Spiral object with drawing data.
  • reference_spiral: Reference spiral data for comparison.
Returns:

Dictionary containing Hausdorff distance-based features.

References:

[1] Messan, Komi S et al. “Assessment of Smartphone-Based Spiral Tracing in Multiple Sclerosis Reveals Intra-Individual Reproducibility as a Major Determinant of the Clinical Utility of the Digital Test.” Frontiers in medical technology vol. 3 714682. 1 Feb. 2022, doi:10.3389/fmedt.2021.714682