graphomotor.features.distance
Feature extraction module for distance-based metrics in spiral drawing data.
1"""Feature extraction module for distance-based metrics in spiral drawing data.""" 2 3import numpy as np 4from scipy import stats 5from scipy.spatial import distance 6 7from graphomotor.core import models 8 9 10def _segment_data(data: np.ndarray, start_prop: float, end_prop: float) -> np.ndarray: 11 """Extract segment of data based on given proportion range. 12 13 Args: 14 data: Data to segment. 15 start_prop: Start proportion, [0-1). 16 end_prop: End proportion, (0-1]. 17 18 Returns: 19 Segmented data. 20 """ 21 if not (0 <= start_prop < end_prop <= 1): 22 raise ValueError( 23 "Proportions must be between 0 and 1, " 24 "and start_prop must be less than end_prop" 25 ) 26 num_samples = len(data) 27 start_idx = int(start_prop * num_samples) 28 end_idx = int(end_prop * num_samples) 29 return data[start_idx:end_idx] 30 31 32def calculate_hausdorff_metrics( 33 spiral: models.Spiral, reference_spiral: np.ndarray 34) -> dict[str, float]: 35 """Calculate Hausdorff distance metrics for a spiral object. 36 37 This function computes multiple features based on the Hausdorff distance between a 38 drawn spiral and a reference (ideal) spiral, as described in [1]. Implementation 39 is based on the original R script provided with the publication. The Hausdorff 40 distance measures the maximum distance of a set to the nearest point in the other 41 set. This metric and its derivatives capture various aspects of the spatial 42 relationship between the drawn and reference spirals. 43 44 Calculated features include: 45 46 - **hausdorff_distance_maximum**: The maximum of the directed Hausdorff distances 47 between the data points and the reference data points 48 - **hausdorff_distance_sum**: The sum of the directed Hausdorff distances 49 - **hausdorff_distance_sum_per_second**: The sum of the directed Hausdorff distances 50 divided by the total drawing duration 51 - **hausdorff_distance_interquartile_range**: The interquartile range of the 52 directed Hausdorff distances 53 - **hausdorff_distance_start_segment_maximum_normalized**: The maximum of the 54 directed Hausdorff distances between the beginning segment (0% to 25%) of 55 data points and the beginning segment of reference data points divided by 56 the number of data points in the beginning segment 57 - **hausdorff_distance_end_segment_maximum_normalized**: The maximum of the directed 58 Hausdorff distances in the ending segment (75% to 100%) of data points and 59 the ending segment of reference data points divided by the number of data 60 points in the ending segment 61 - **hausdorff_distance_middle_segment_maximum**: The maximum of the directed 62 Hausdorff distances in the middle segment (15% to 85%) of data points and 63 the ending segment of reference data points (this metric is not divided by 64 the number of data points in the middle segment unlike previous ones) 65 - **hausdorff_distance_middle_segment_maximum_per_second**: The maximum of the 66 directed Hausdorff distances in the middle segment divided by the total 67 drawing duration 68 69 Args: 70 spiral: Spiral object with drawing data. 71 reference_spiral: Reference spiral data for comparison. 72 73 Returns: 74 Dictionary containing Hausdorff distance-based features. 75 76 References: 77 [1] Messan, Komi S et al. “Assessment of Smartphone-Based Spiral Tracing in 78 Multiple Sclerosis Reveals Intra-Individual Reproducibility as a Major 79 Determinant of the Clinical Utility of the Digital Test.” Frontiers in 80 medical technology vol. 3 714682. 1 Feb. 2022, doi:10.3389/fmedt.2021.714682 81 """ 82 spiral_data = np.column_stack((spiral.data["x"].values, spiral.data["y"].values)) 83 84 total_duration = spiral.data["seconds"].iloc[-1] 85 86 start_segment_data = _segment_data(spiral_data, 0.0, 0.25) 87 end_segment_data = _segment_data(spiral_data, 0.75, 1.0) 88 mid_segment_data = _segment_data(spiral_data, 0.15, 0.85) 89 90 start_segment_ref = _segment_data(reference_spiral, 0.0, 0.25) 91 end_segment_ref = _segment_data(reference_spiral, 0.75, 1.0) 92 mid_segment_ref = _segment_data(reference_spiral, 0.15, 0.85) 93 94 haus_dist = [ 95 distance.directed_hausdorff(spiral_data, reference_spiral)[0], 96 distance.directed_hausdorff(reference_spiral, spiral_data)[0], 97 ] 98 haus_dist_start = [ 99 distance.directed_hausdorff(start_segment_data, start_segment_ref)[0], 100 distance.directed_hausdorff(start_segment_ref, start_segment_data)[0], 101 ] 102 haus_dist_end = [ 103 distance.directed_hausdorff(end_segment_data, end_segment_ref)[0], 104 distance.directed_hausdorff(end_segment_ref, end_segment_data)[0], 105 ] 106 haus_dist_mid = [ 107 distance.directed_hausdorff(mid_segment_data, mid_segment_ref)[0], 108 distance.directed_hausdorff(mid_segment_ref, mid_segment_data)[0], 109 ] 110 111 return { 112 "hausdorff_distance_maximum": np.max(haus_dist), 113 "hausdorff_distance_sum": np.sum(haus_dist), 114 "hausdorff_distance_sum_per_second": np.sum(haus_dist) / total_duration, 115 "hausdorff_distance_interquartile_range": stats.iqr(haus_dist), 116 "hausdorff_distance_start_segment_maximum_normalized": np.max(haus_dist_start) 117 / len(start_segment_data), 118 "hausdorff_distance_end_segment_maximum_normalized": np.max(haus_dist_end) 119 / len(end_segment_data), 120 "hausdorff_distance_middle_segment_maximum": np.max(haus_dist_mid), 121 "hausdorff_distance_middle_segment_maximum_per_second": np.max(haus_dist_mid) 122 / total_duration, 123 }
33def calculate_hausdorff_metrics( 34 spiral: models.Spiral, reference_spiral: np.ndarray 35) -> dict[str, float]: 36 """Calculate Hausdorff distance metrics for a spiral object. 37 38 This function computes multiple features based on the Hausdorff distance between a 39 drawn spiral and a reference (ideal) spiral, as described in [1]. Implementation 40 is based on the original R script provided with the publication. The Hausdorff 41 distance measures the maximum distance of a set to the nearest point in the other 42 set. This metric and its derivatives capture various aspects of the spatial 43 relationship between the drawn and reference spirals. 44 45 Calculated features include: 46 47 - **hausdorff_distance_maximum**: The maximum of the directed Hausdorff distances 48 between the data points and the reference data points 49 - **hausdorff_distance_sum**: The sum of the directed Hausdorff distances 50 - **hausdorff_distance_sum_per_second**: The sum of the directed Hausdorff distances 51 divided by the total drawing duration 52 - **hausdorff_distance_interquartile_range**: The interquartile range of the 53 directed Hausdorff distances 54 - **hausdorff_distance_start_segment_maximum_normalized**: The maximum of the 55 directed Hausdorff distances between the beginning segment (0% to 25%) of 56 data points and the beginning segment of reference data points divided by 57 the number of data points in the beginning segment 58 - **hausdorff_distance_end_segment_maximum_normalized**: The maximum of the directed 59 Hausdorff distances in the ending segment (75% to 100%) of data points and 60 the ending segment of reference data points divided by the number of data 61 points in the ending segment 62 - **hausdorff_distance_middle_segment_maximum**: The maximum of the directed 63 Hausdorff distances in the middle segment (15% to 85%) of data points and 64 the ending segment of reference data points (this metric is not divided by 65 the number of data points in the middle segment unlike previous ones) 66 - **hausdorff_distance_middle_segment_maximum_per_second**: The maximum of the 67 directed Hausdorff distances in the middle segment divided by the total 68 drawing duration 69 70 Args: 71 spiral: Spiral object with drawing data. 72 reference_spiral: Reference spiral data for comparison. 73 74 Returns: 75 Dictionary containing Hausdorff distance-based features. 76 77 References: 78 [1] Messan, Komi S et al. “Assessment of Smartphone-Based Spiral Tracing in 79 Multiple Sclerosis Reveals Intra-Individual Reproducibility as a Major 80 Determinant of the Clinical Utility of the Digital Test.” Frontiers in 81 medical technology vol. 3 714682. 1 Feb. 2022, doi:10.3389/fmedt.2021.714682 82 """ 83 spiral_data = np.column_stack((spiral.data["x"].values, spiral.data["y"].values)) 84 85 total_duration = spiral.data["seconds"].iloc[-1] 86 87 start_segment_data = _segment_data(spiral_data, 0.0, 0.25) 88 end_segment_data = _segment_data(spiral_data, 0.75, 1.0) 89 mid_segment_data = _segment_data(spiral_data, 0.15, 0.85) 90 91 start_segment_ref = _segment_data(reference_spiral, 0.0, 0.25) 92 end_segment_ref = _segment_data(reference_spiral, 0.75, 1.0) 93 mid_segment_ref = _segment_data(reference_spiral, 0.15, 0.85) 94 95 haus_dist = [ 96 distance.directed_hausdorff(spiral_data, reference_spiral)[0], 97 distance.directed_hausdorff(reference_spiral, spiral_data)[0], 98 ] 99 haus_dist_start = [ 100 distance.directed_hausdorff(start_segment_data, start_segment_ref)[0], 101 distance.directed_hausdorff(start_segment_ref, start_segment_data)[0], 102 ] 103 haus_dist_end = [ 104 distance.directed_hausdorff(end_segment_data, end_segment_ref)[0], 105 distance.directed_hausdorff(end_segment_ref, end_segment_data)[0], 106 ] 107 haus_dist_mid = [ 108 distance.directed_hausdorff(mid_segment_data, mid_segment_ref)[0], 109 distance.directed_hausdorff(mid_segment_ref, mid_segment_data)[0], 110 ] 111 112 return { 113 "hausdorff_distance_maximum": np.max(haus_dist), 114 "hausdorff_distance_sum": np.sum(haus_dist), 115 "hausdorff_distance_sum_per_second": np.sum(haus_dist) / total_duration, 116 "hausdorff_distance_interquartile_range": stats.iqr(haus_dist), 117 "hausdorff_distance_start_segment_maximum_normalized": np.max(haus_dist_start) 118 / len(start_segment_data), 119 "hausdorff_distance_end_segment_maximum_normalized": np.max(haus_dist_end) 120 / len(end_segment_data), 121 "hausdorff_distance_middle_segment_maximum": np.max(haus_dist_mid), 122 "hausdorff_distance_middle_segment_maximum_per_second": np.max(haus_dist_mid) 123 / total_duration, 124 }
Calculate Hausdorff distance metrics for a spiral object.
This function computes multiple features based on the Hausdorff distance between a drawn spiral and a reference (ideal) spiral, as described in [1]. Implementation is based on the original R script provided with the publication. The Hausdorff distance measures the maximum distance of a set to the nearest point in the other set. This metric and its derivatives capture various aspects of the spatial relationship between the drawn and reference spirals.
Calculated features include:
- hausdorff_distance_maximum: The maximum of the directed Hausdorff distances between the data points and the reference data points
- hausdorff_distance_sum: The sum of the directed Hausdorff distances
- hausdorff_distance_sum_per_second: The sum of the directed Hausdorff distances divided by the total drawing duration
- hausdorff_distance_interquartile_range: The interquartile range of the directed Hausdorff distances
- hausdorff_distance_start_segment_maximum_normalized: The maximum of the directed Hausdorff distances between the beginning segment (0% to 25%) of data points and the beginning segment of reference data points divided by the number of data points in the beginning segment
- hausdorff_distance_end_segment_maximum_normalized: The maximum of the directed Hausdorff distances in the ending segment (75% to 100%) of data points and the ending segment of reference data points divided by the number of data points in the ending segment
- hausdorff_distance_middle_segment_maximum: The maximum of the directed Hausdorff distances in the middle segment (15% to 85%) of data points and the ending segment of reference data points (this metric is not divided by the number of data points in the middle segment unlike previous ones)
- hausdorff_distance_middle_segment_maximum_per_second: The maximum of the directed Hausdorff distances in the middle segment divided by the total drawing duration
Arguments:
- spiral: Spiral object with drawing data.
- reference_spiral: Reference spiral data for comparison.
Returns:
Dictionary containing Hausdorff distance-based features.
References:
[1] Messan, Komi S et al. “Assessment of Smartphone-Based Spiral Tracing in Multiple Sclerosis Reveals Intra-Individual Reproducibility as a Major Determinant of the Clinical Utility of the Digital Test.” Frontiers in medical technology vol. 3 714682. 1 Feb. 2022, doi:10.3389/fmedt.2021.714682