wristpy.core.orchestrator
Python based runner.
1"""Python based runner.""" 2 3import itertools 4import logging 5import pathlib 6from typing import Dict, Literal, Optional, Sequence, Tuple, Union 7 8from wristpy.core import config, exceptions, models 9from wristpy.io.readers import readers 10from wristpy.io.writers import writers 11from wristpy.processing import ( 12 analytics, 13 calibration, 14 idle_sleep_mode_imputation, 15 metrics, 16 nonwear_utils, 17) 18 19logger = config.get_logger() 20 21VALID_FILE_TYPES = (".csv", ".parquet") 22 23 24def run( 25 input: Union[pathlib.Path, str], 26 output: Optional[Union[pathlib.Path, str]] = None, 27 thresholds: Optional[Tuple[float, float, float]] = None, 28 calibrator: Union[ 29 None, 30 Literal["ggir", "gradient"], 31 ] = "gradient", 32 epoch_length: float = 5, 33 activity_metric: Literal["enmo", "mad", "ag_count", "mims"] = "enmo", 34 nonwear_algorithm: Sequence[Literal["ggir", "cta", "detach"]] = ["ggir"], 35 verbosity: int = logging.WARNING, 36 output_filetype: Literal[".csv", ".parquet"] = ".csv", 37) -> Union[writers.OrchestratorResults, Dict[str, writers.OrchestratorResults]]: 38 """Runs main processing steps for wristpy on single files, or directories. 39 40 The run() function will execute the run_file() function on individual files, or 41 run_directory() on entire directories. When the input path points to a file, the 42 name of the save file will be taken from the given output path (if any). When the 43 input path points to a directory the output path must be a valid directory as well. 44 Output file names will be derived from original file names in the case of directory 45 processing. 46 47 48 Args: 49 input: Path to the input file or directory of files to be read. Currently, 50 this supports .bin and .gt3x 51 output: Path to directory data will be saved to. If processing a single file the 52 path should end in the save file name in either .csv or .parquet formats. 53 thresholds: The cut points for the light, moderate, and vigorous thresholds, 54 given in that order. Values must be asscending, unique, and greater than 0. 55 Default values are optimized for subjects ages 7-11 [1][3]. 56 calibrator: The calibrator to be used on the input data. 57 epoch_length: The temporal resolution in seconds, the data will be down sampled 58 to. It must be > 0.0. 59 activity_metric: The metric to be used for physical activity categorization. 60 nonwear_algorithm: The algorithm to be used for nonwear detection. 61 verbosity: The logging level for the logger. 62 output_filetype: Specifies the data format for the save files. Only used when 63 processing directories. 64 65 Returns: 66 All calculated data in a save ready format as a Results object or as a 67 dictionary of OrchestratorResults objects. 68 69 Raises: 70 ValueError: If the physical activity thresholds are not unique or not in 71 ascending order. 72 73 74 References: 75 [1] Hildebrand, M., et al. (2014). Age group comparability of raw accelerometer 76 output from wrist- and hip-worn monitors. Medicine and Science in Sports and 77 Exercise, 46(9), 1816-1824. 78 [2] Treuth MS, Schmitz K, Catellier DJ, McMurray RG, Murray DM, Almeida MJ, 79 Going S, Norman JE, Pate R. Defining accelerometer thresholds for activity 80 intensities in adolescent girls. Med Sci Sports Exerc. 2004 Jul;36(7):1259-66. 81 PMID: 15235335; PMCID: PMC2423321. 82 [3] Karas M, Muschelli J, Leroux A, Urbanek J, Wanigatunga A, Bai J, 83 Crainiceanu C, Schrack J Comparison of Accelerometry-Based Measures of Physical 84 Activity: Retrospective Observational Data Analysis Study JMIR Mhealth Uhealth 85 2022;10(7):e38077 URL: https://mhealth.jmir.org/2022/7/e38077 DOI: 10.2196/38077 86 """ 87 logger.setLevel(verbosity) 88 89 input = pathlib.Path(input) 90 output = pathlib.Path(output) if output is not None else None 91 92 if activity_metric == "enmo": 93 thresholds = thresholds or (0.0563, 0.1916, 0.6958) 94 elif activity_metric == "mad": 95 thresholds = thresholds or (0.029, 0.338, 0.604) 96 elif activity_metric == "ag_count": 97 thresholds = thresholds or (100, 3000, 5200) 98 elif activity_metric == "mims": 99 thresholds = thresholds or (10.558, 15.047, 19.614) 100 101 if not (0 <= thresholds[0] < thresholds[1] < thresholds[2]): # type: ignore 102 message = "Threshold values must be >=0, unique, and in ascending order." 103 logger.error(message) 104 raise ValueError(message) 105 106 if input.is_file(): 107 return _run_file( 108 input=input, 109 output=output, 110 thresholds=thresholds, 111 calibrator=calibrator, 112 epoch_length=epoch_length, 113 activity_metric=activity_metric, 114 verbosity=verbosity, 115 nonwear_algorithm=nonwear_algorithm, 116 ) 117 118 return _run_directory( 119 input=input, 120 output=output, 121 thresholds=thresholds, 122 calibrator=calibrator, 123 epoch_length=epoch_length, 124 activity_metric=activity_metric, 125 verbosity=verbosity, 126 output_filetype=output_filetype, 127 nonwear_algorithm=nonwear_algorithm, 128 ) 129 130 131def _run_directory( 132 input: pathlib.Path, 133 output: Optional[pathlib.Path] = None, 134 thresholds: Tuple[float, float, float] = (0.0563, 0.1916, 0.6958), 135 calibrator: Union[ 136 None, 137 Literal["ggir", "gradient"], 138 ] = "gradient", 139 epoch_length: float = 5, 140 nonwear_algorithm: Sequence[Literal["ggir", "cta", "detach"]] = ["ggir"], 141 verbosity: int = logging.WARNING, 142 output_filetype: Literal[".csv", ".parquet"] = ".csv", 143 activity_metric: Literal["enmo", "mad", "ag_count", "mims"] = "enmo", 144) -> Dict[str, writers.OrchestratorResults]: 145 """Runs main processing steps for wristpy on directories. 146 147 The run_directory() function will execute the run_file() function on entire 148 directories. The input and output (if any) paths must directories. Output file 149 names will be derived from input file names. 150 151 152 Args: 153 input: Path to the input directory of files to be read. Currently, 154 this supports .bin and .gt3x 155 output: Path to directory data will be saved to. 156 thresholds: The cut points for the light, moderate, and vigorous thresholds, 157 given in that order. Values must be asscending, unique, and greater than 0. 158 Default values are optimized for subjects ages 7-11 [1][2]. 159 calibrator: The calibrator to be used on the input data. 160 epoch_length: The temporal resolution in seconds, the data will be down sampled 161 to. It must be > 0.0. 162 nonwear_algorithm: The algorithm to be used for nonwear detection. 163 verbosity: The logging level for the logger. 164 output_filetype: Specifies the data format for the save files. 165 activity_metric: The metric to be used for physical activity categorization. 166 167 Returns: 168 All calculated data in a save ready format as a dictionary of 169 OrchestratorResults objects. 170 171 Raises: 172 ValueError: The output given is not a directory. 173 ValueError: The output_filetype is not a valid type. 174 FileNotFoundError: If the input directory contained no files of a valid type. 175 176 177 References: 178 [1] Hildebrand, M., et al. (2014). Age group comparability of raw accelerometer 179 output from wrist- and hip-worn monitors. Medicine and Science in Sports and 180 Exercise, 46(9), 1816-1824. 181 [2] Karas M, Muschelli J, Leroux A, Urbanek J, Wanigatunga A, Bai J, 182 Crainiceanu C, Schrack J Comparison of Accelerometry-Based Measures of Physical 183 Activity: Retrospective Observational Data Analysis Study JMIR Mhealth Uhealth 184 2022;10(7):e38077 URL: https://mhealth.jmir.org/2022/7/e38077 DOI: 10.2196/38077 185 """ 186 if output is not None: 187 if output.is_file(): 188 raise ValueError( 189 "Output is a file, but must be a directory when input is a directory." 190 ) 191 if output_filetype not in VALID_FILE_TYPES: 192 raise ValueError( 193 "Invalid output_filetype: " 194 f"{output_filetype}. Valid options are: {VALID_FILE_TYPES}." 195 ) 196 197 file_names = list(itertools.chain(input.glob("*.gt3x"), input.glob("*.bin"))) 198 199 if not file_names: 200 raise exceptions.EmptyDirectoryError( 201 f"Directory {input} contains no .gt3x or .bin files." 202 ) 203 204 results_dict = {} 205 for file in file_names: 206 output_file_path = ( 207 output / pathlib.Path(file.stem).with_suffix(output_filetype) 208 if output 209 else None 210 ) 211 logger.debug( 212 "Processing directory: %s, current file: %s, save path: %s", 213 input, 214 file, 215 output_file_path, 216 ) 217 try: 218 results_dict[str(file)] = _run_file( 219 input=input / file, 220 output=output_file_path, 221 thresholds=thresholds, 222 calibrator=calibrator, 223 epoch_length=epoch_length, 224 verbosity=verbosity, 225 nonwear_algorithm=nonwear_algorithm, 226 activity_metric=activity_metric, 227 ) 228 except Exception as e: 229 logger.error("Did not run file: %s, Error: %s", file, e) 230 logger.info("Processing for directory %s completed successfully.", output) 231 return results_dict 232 233 234def _run_file( 235 input: pathlib.Path, 236 output: Optional[pathlib.Path] = None, 237 thresholds: Tuple[float, float, float] = (0.0563, 0.1916, 0.6958), 238 calibrator: Union[ 239 None, 240 Literal["ggir", "gradient"], 241 ] = "gradient", 242 epoch_length: float = 5.0, 243 activity_metric: Literal["enmo", "mad", "ag_count", "mims"] = "enmo", 244 nonwear_algorithm: Sequence[Literal["ggir", "cta", "detach"]] = ["ggir"], 245 verbosity: int = logging.WARNING, 246) -> writers.OrchestratorResults: 247 """Runs main processing steps for wristpy and returns data for analysis. 248 249 The run_file() function will provide the user with the specified physical activity 250 metric, anglez, physical activity levels, detected sleep periods, and nonwear data. 251 All measures will be in the same temporal resolution. 252 Users may choose from 'ggir' and 'gradient' calibration methods, 253 or enter None to proceed without calibration. 254 255 Args: 256 input: Path to the input file to be read. Currently, this supports .bin and 257 .gt3x 258 output: Path to save data to. The path should end in the save file name in 259 either .csv or .parquet formats. 260 thresholds: The cut points for the light, moderate, and vigorous thresholds, 261 given in that order. Values must be ascending, unique, and greater than 0. 262 Default values are optimized for subjects ages 7-11 [1] - [3]. 263 calibrator: The calibrator to be used on the input data. 264 epoch_length: The temporal resolution in seconds, the data will be down sampled 265 to. It must be > 0.0. 266 activity_metric: The metric to be used for physical activity categorization. 267 nonwear_algorithm: The algorithm to be used for nonwear detection. A sequence of 268 algorithms can be provided. If so, a majority vote will be taken. 269 verbosity: The logging level for the logger. 270 271 Returns: 272 All calculated data in a save ready format as a OrchestratorResults object. 273 274 Raises: 275 ValueError: If an invalid Calibrator is chosen. 276 ValueError: If epoch_length is not greater than 0. 277 278 References: 279 [1] Hildebrand, M., et al. (2014). Age group comparability of raw accelerometer 280 output from wrist- and hip-worn monitors. Medicine and Science in Sports and 281 Exercise, 46(9), 1816-1824. 282 [2] Aittasalo, M., Vähä-Ypyä, H., Vasankari, T. et al. Mean amplitude deviation 283 calculated from raw acceleration data: a novel method for classifying the 284 intensity of adolescents' physical activity irrespective of accelerometer brand. 285 BMC Sports Sci Med Rehabil 7, 18 (2015). https://doi.org/10.1186/s13102-015-0010-0. 286 [3] Karas M, Muschelli J, Leroux A, Urbanek J, Wanigatunga A, Bai J, 287 Crainiceanu C, Schrack J Comparison of Accelerometry-Based Measures of Physical 288 Activity: Retrospective Observational Data Analysis Study JMIR Mhealth Uhealth 289 2022;10(7):e38077 URL: https://mhealth.jmir.org/2022/7/e38077 DOI: 10.2196/38077 290 """ 291 logger.setLevel(verbosity) 292 if output is not None: 293 writers.OrchestratorResults.validate_output(output=output) 294 295 parameters_dictionary = { 296 "thresholds": list(thresholds), 297 "calibrator": calibrator, 298 "epoch_length": epoch_length, 299 "activity_metric": activity_metric, 300 "nonwear_algorithm": list(nonwear_algorithm), 301 "input_file": str(input), 302 } 303 304 if calibrator is not None and calibrator not in ["ggir", "gradient"]: 305 msg = ( 306 f"Invalid calibrator: {calibrator}. Choose: 'ggir', 'gradient'. " 307 "Enter None if no calibration is desired." 308 ) 309 logger.error(msg) 310 raise ValueError(msg) 311 312 if epoch_length <= 0: 313 msg = "Epoch_length must be greater than 0." 314 logger.error(msg) 315 raise ValueError(msg) 316 317 watch_data = readers.read_watch_data(input) 318 319 if calibrator is None: 320 logger.debug("Running without calibration") 321 calibrated_acceleration = watch_data.acceleration 322 else: 323 calibrated_acceleration = calibration.CalibrationDispatcher(calibrator).run( 324 watch_data.acceleration, return_input_on_error=True 325 ) 326 327 if watch_data.idle_sleep_mode_flag: 328 logger.debug("Imputing idle sleep mode gaps.") 329 calibrated_acceleration = ( 330 idle_sleep_mode_imputation.impute_idle_sleep_mode_gaps( 331 calibrated_acceleration 332 ) 333 ) 334 335 anglez = metrics.angle_relative_to_horizontal( 336 calibrated_acceleration, epoch_length=epoch_length 337 ) 338 activity_measurement = _compute_activity( 339 calibrated_acceleration, 340 activity_metric, 341 epoch_length, 342 dynamic_range=watch_data.dynamic_range, 343 ) 344 345 sleep_detector = analytics.GgirSleepDetection(anglez) 346 sleep_windows = sleep_detector.run_sleep_detection() 347 348 nonwear_array = nonwear_utils.get_nonwear_measurements( 349 calibrated_acceleration=calibrated_acceleration, 350 temperature=watch_data.temperature, 351 non_wear_algorithms=nonwear_algorithm, 352 ) 353 354 nonwear_epoch = nonwear_utils.nonwear_array_cleanup( 355 nonwear_array=nonwear_array, 356 reference_measurement=activity_measurement, 357 epoch_length=epoch_length, 358 ) 359 360 physical_activity_levels = analytics.compute_physical_activty_categories( 361 activity_measurement, thresholds 362 ) 363 364 sleep_array = analytics.sleep_cleanup( 365 sleep_windows=sleep_windows, nonwear_measurement=nonwear_epoch 366 ) 367 results = writers.OrchestratorResults( 368 physical_activity_metric=activity_measurement, 369 anglez=anglez, 370 physical_activity_levels=physical_activity_levels, 371 sleep_status=sleep_array, 372 nonwear_status=nonwear_epoch, 373 processing_params=parameters_dictionary, 374 ) 375 if output is not None: 376 try: 377 results.save_results(output=output) 378 except ( 379 exceptions.InvalidFileTypeError, 380 PermissionError, 381 FileExistsError, 382 ) as exc_info: 383 # Allowed to pass to recover in Jupyter Notebook scenarios. 384 logger.error( 385 ( 386 "Could not save output due to: %s. Call save_results " 387 "on the output object with a correct filename to save these " 388 "results.", 389 exc_info, 390 ) 391 ) 392 logger.info("Processing for %s completed successfully.", input.stem) 393 return results 394 395 396def _compute_activity( 397 acceleration: models.Measurement, 398 activity_metric: Literal["ag_count", "mad", "enmo", "mims"], 399 epoch_length: float, 400 dynamic_range: Optional[tuple[float, float]], 401) -> models.Measurement: 402 """This is a helper function to organize the computation of the activity metric. 403 404 This function organizes the logic for computing the requested physical activity 405 metric at the desired temporal resolution. 406 407 Args: 408 acceleration: The acceleration data to compute the activity metric from. 409 activity_metric: The metric to be used for physical activity categorization. 410 epoch_length: The temporal resolution in seconds, the data will be down sampled 411 to. 412 dynamic_range: Tuple of the minimum and maximum accelerometer values. This 413 argument is only relevant to the mims metric. Values are taken from watch 414 metadata, if no metadata could be extracted, the default 415 values of (-8,8) are used. 416 417 Returns: 418 A Measurement object with the computed physical activity metric. 419 """ 420 if activity_metric == "ag_count": 421 return metrics.actigraph_activity_counts( 422 acceleration, 423 epoch_length=epoch_length, 424 ) 425 elif activity_metric == "mad": 426 return metrics.mean_amplitude_deviation(acceleration, epoch_length=epoch_length) 427 elif activity_metric == "mims": 428 if dynamic_range is None: 429 return metrics.monitor_independent_movement_summary_units( 430 acceleration, 431 epoch=epoch_length, 432 ) 433 return metrics.monitor_independent_movement_summary_units( 434 acceleration, epoch=epoch_length, dynamic_range=dynamic_range 435 ) 436 437 return metrics.euclidean_norm_minus_one(acceleration, epoch_length=epoch_length)
25def run( 26 input: Union[pathlib.Path, str], 27 output: Optional[Union[pathlib.Path, str]] = None, 28 thresholds: Optional[Tuple[float, float, float]] = None, 29 calibrator: Union[ 30 None, 31 Literal["ggir", "gradient"], 32 ] = "gradient", 33 epoch_length: float = 5, 34 activity_metric: Literal["enmo", "mad", "ag_count", "mims"] = "enmo", 35 nonwear_algorithm: Sequence[Literal["ggir", "cta", "detach"]] = ["ggir"], 36 verbosity: int = logging.WARNING, 37 output_filetype: Literal[".csv", ".parquet"] = ".csv", 38) -> Union[writers.OrchestratorResults, Dict[str, writers.OrchestratorResults]]: 39 """Runs main processing steps for wristpy on single files, or directories. 40 41 The run() function will execute the run_file() function on individual files, or 42 run_directory() on entire directories. When the input path points to a file, the 43 name of the save file will be taken from the given output path (if any). When the 44 input path points to a directory the output path must be a valid directory as well. 45 Output file names will be derived from original file names in the case of directory 46 processing. 47 48 49 Args: 50 input: Path to the input file or directory of files to be read. Currently, 51 this supports .bin and .gt3x 52 output: Path to directory data will be saved to. If processing a single file the 53 path should end in the save file name in either .csv or .parquet formats. 54 thresholds: The cut points for the light, moderate, and vigorous thresholds, 55 given in that order. Values must be asscending, unique, and greater than 0. 56 Default values are optimized for subjects ages 7-11 [1][3]. 57 calibrator: The calibrator to be used on the input data. 58 epoch_length: The temporal resolution in seconds, the data will be down sampled 59 to. It must be > 0.0. 60 activity_metric: The metric to be used for physical activity categorization. 61 nonwear_algorithm: The algorithm to be used for nonwear detection. 62 verbosity: The logging level for the logger. 63 output_filetype: Specifies the data format for the save files. Only used when 64 processing directories. 65 66 Returns: 67 All calculated data in a save ready format as a Results object or as a 68 dictionary of OrchestratorResults objects. 69 70 Raises: 71 ValueError: If the physical activity thresholds are not unique or not in 72 ascending order. 73 74 75 References: 76 [1] Hildebrand, M., et al. (2014). Age group comparability of raw accelerometer 77 output from wrist- and hip-worn monitors. Medicine and Science in Sports and 78 Exercise, 46(9), 1816-1824. 79 [2] Treuth MS, Schmitz K, Catellier DJ, McMurray RG, Murray DM, Almeida MJ, 80 Going S, Norman JE, Pate R. Defining accelerometer thresholds for activity 81 intensities in adolescent girls. Med Sci Sports Exerc. 2004 Jul;36(7):1259-66. 82 PMID: 15235335; PMCID: PMC2423321. 83 [3] Karas M, Muschelli J, Leroux A, Urbanek J, Wanigatunga A, Bai J, 84 Crainiceanu C, Schrack J Comparison of Accelerometry-Based Measures of Physical 85 Activity: Retrospective Observational Data Analysis Study JMIR Mhealth Uhealth 86 2022;10(7):e38077 URL: https://mhealth.jmir.org/2022/7/e38077 DOI: 10.2196/38077 87 """ 88 logger.setLevel(verbosity) 89 90 input = pathlib.Path(input) 91 output = pathlib.Path(output) if output is not None else None 92 93 if activity_metric == "enmo": 94 thresholds = thresholds or (0.0563, 0.1916, 0.6958) 95 elif activity_metric == "mad": 96 thresholds = thresholds or (0.029, 0.338, 0.604) 97 elif activity_metric == "ag_count": 98 thresholds = thresholds or (100, 3000, 5200) 99 elif activity_metric == "mims": 100 thresholds = thresholds or (10.558, 15.047, 19.614) 101 102 if not (0 <= thresholds[0] < thresholds[1] < thresholds[2]): # type: ignore 103 message = "Threshold values must be >=0, unique, and in ascending order." 104 logger.error(message) 105 raise ValueError(message) 106 107 if input.is_file(): 108 return _run_file( 109 input=input, 110 output=output, 111 thresholds=thresholds, 112 calibrator=calibrator, 113 epoch_length=epoch_length, 114 activity_metric=activity_metric, 115 verbosity=verbosity, 116 nonwear_algorithm=nonwear_algorithm, 117 ) 118 119 return _run_directory( 120 input=input, 121 output=output, 122 thresholds=thresholds, 123 calibrator=calibrator, 124 epoch_length=epoch_length, 125 activity_metric=activity_metric, 126 verbosity=verbosity, 127 output_filetype=output_filetype, 128 nonwear_algorithm=nonwear_algorithm, 129 )
Runs main processing steps for wristpy on single files, or directories.
The run() function will execute the run_file() function on individual files, or run_directory() on entire directories. When the input path points to a file, the name of the save file will be taken from the given output path (if any). When the input path points to a directory the output path must be a valid directory as well. Output file names will be derived from original file names in the case of directory processing.
Arguments:
- input: Path to the input file or directory of files to be read. Currently, this supports .bin and .gt3x
- output: Path to directory data will be saved to. If processing a single file the path should end in the save file name in either .csv or .parquet formats.
- thresholds: The cut points for the light, moderate, and vigorous thresholds, given in that order. Values must be asscending, unique, and greater than 0. Default values are optimized for subjects ages 7-11 [1][3].
- calibrator: The calibrator to be used on the input data.
- epoch_length: The temporal resolution in seconds, the data will be down sampled to. It must be > 0.0.
- activity_metric: The metric to be used for physical activity categorization.
- nonwear_algorithm: The algorithm to be used for nonwear detection.
- verbosity: The logging level for the logger.
- output_filetype: Specifies the data format for the save files. Only used when processing directories.
Returns:
All calculated data in a save ready format as a Results object or as a dictionary of OrchestratorResults objects.
Raises:
- ValueError: If the physical activity thresholds are not unique or not in ascending order.
References:
[1] Hildebrand, M., et al. (2014). Age group comparability of raw accelerometer output from wrist- and hip-worn monitors. Medicine and Science in Sports and Exercise, 46(9), 1816-1824. [2] Treuth MS, Schmitz K, Catellier DJ, McMurray RG, Murray DM, Almeida MJ, Going S, Norman JE, Pate R. Defining accelerometer thresholds for activity intensities in adolescent girls. Med Sci Sports Exerc. 2004 Jul;36(7):1259-66. PMID: 15235335; PMCID: PMC2423321. [3] Karas M, Muschelli J, Leroux A, Urbanek J, Wanigatunga A, Bai J, Crainiceanu C, Schrack J Comparison of Accelerometry-Based Measures of Physical Activity: Retrospective Observational Data Analysis Study JMIR Mhealth Uhealth 2022;10(7):e38077 URL: https://mhealth.jmir.org/2022/7/e38077 DOI: 10.2196/38077