Classification

`sleepecg.evaluate(stages_true, stages_pred, stages_mode, show_undefined=False)`

Evaluate the performance of a sleep stage classifier.

Prints overall accuracy, Cohen's kappa, confusion matrix, per-class precision, recall, and F1 score.

Parameters:

stages_true (ndarray) –
The annotated (ground truth) sleep stages as a 2D array of shape (n_records, n_samples) containing integer class labels, or a 3D array of shape (n_records, n_samples, n_classes) containing one-hot encoded class labels.
stages_pred (ndarray) –
The predicted sleep stages as a 2D array of shape (n_records, n_samples) containing integer class labels, or a 3D array of shape (n_records, n_samples, n_classes) containing one-hot encoded class labels.
stages_mode (str) –
Identifier of the grouping mode. Can be any of 'wake-sleep', 'wake-rem-nrem', 'wake-rem-light-n3', 'wake-rem-n1-n2-n3'.
show_undefined (bool) –
If True, include SleepStage.UNDEFINED (i.e 0) in the confusion matrix output. This can be helpful during debugging. By default False.

Returns:

conf_mat( ndarray ) –
Confusion matrix.
stage_names( list[str] ) –
Sleep stage names.

Source code in sleepecg/classification.py

def evaluate(
    stages_true: np.ndarray,
    stages_pred: np.ndarray,
    stages_mode: str,
    show_undefined: bool = False,
) -> tuple[np.ndarray, list[str]]:
    """
    Evaluate the performance of a sleep stage classifier.

    Prints overall accuracy, Cohen's kappa, confusion matrix, per-class precision, recall,
    and F1 score.

    Parameters
    ----------
    stages_true : np.ndarray
        The annotated (ground truth) sleep stages as a 2D array of shape
        `(n_records, n_samples)` containing integer class labels, or a 3D array of shape
        `(n_records, n_samples, n_classes)` containing one-hot encoded class labels.
    stages_pred : np.ndarray
        The predicted sleep stages as a 2D array of shape `(n_records, n_samples)`
        containing integer class labels, or a 3D array of shape
        `(n_records, n_samples, n_classes)` containing one-hot encoded class labels.
    stages_mode : str
        Identifier of the grouping mode. Can be any of `'wake-sleep'`, `'wake-rem-nrem'`,
        `'wake-rem-light-n3'`, `'wake-rem-n1-n2-n3'`.
    show_undefined : bool, optional
        If `True`, include `SleepStage.UNDEFINED` (i.e `0`) in the confusion matrix output.
        This can be helpful during debugging. By default `False`.

    Returns
    -------
    conf_mat : np.ndarray
        Confusion matrix.
    stage_names : list[str]
        Sleep stage names.
    """
    stage_names = _STAGE_NAMES[stages_mode]

    if stages_true.ndim == 3:
        stages_true = stages_true.argmax(2)
    if stages_pred.ndim == 3:
        stages_pred = stages_pred.argmax(2)

    confmat_full = _confusion_matrix(
        stages_true.ravel(),
        stages_pred.ravel(),
        len(stage_names) + 1,
    )
    confmat = confmat_full[1:, 1:]

    print(f"Confusion matrix ({stages_mode.upper()}):")
    if show_undefined:
        print(confmat_full)
    else:
        print(confmat)

    kappa = _cohen_kappa(confmat)

    acc = confmat.trace() / confmat.sum()
    tp = np.diag(confmat)
    fp = confmat.sum(1) - tp
    fn = confmat.sum(0) - tp
    precision = tp / (tp + fn)
    recall = tp / (tp + fp)
    f1 = 2 / (recall**-1 + precision**-1)
    support = confmat.sum(1)

    print(f"Accuracy: {acc:.4f}")
    print(f"Cohen's kappa: {kappa:.4f}")
    print("       precision    recall  f1-score    support")
    for i, stage_name in enumerate(stage_names):
        print(
            f"{stage_name:>5}{precision[i]:11.2f}{recall[i]:10.2f}{f1[i]:10.2f}"
            f"{support[i]:11}"
        )
    print(f"{support.sum():47}")

    return confmat_full, stage_names

`sleepecg.list_classifiers(classifiers_dir=None)`

List available classifiers.

Parameters:

classifiers_dir (str | Path) –
Directory in which to look for classifiers. If None (default), the value is taken from the configuration. If 'SleepECG', site-packages/sleepecg/classifiers is used.

`sleepecg.load_classifier(name, classifiers_dir=None, silence_tf_messages=True)`

Load a SleepClassifier from disk.

This functions reads .zip files saved by save_classifier(). Pass 'SleepECG' as the second argument to load a classifier bundled with SleepECG.

Parameters:

name (str) –
The identifier of the classifier to load.
classifiers_dir (str | Path) –
Directory in which to look for <name>.zip. If None (default), the value is taken from the configuration. If 'SleepECG', load classifiers from site-packages/sleepecg/classifiers.
silence_tf_messages (bool) –
Whether or not to silence messages from TensorFlow when loading a model. By default True.

Returns:

SleepClassifier –
Contains the model and metadata required for feature extraction and preprocessing. Can be passed to stage().

`sleepecg.prepare_data_keras(features, stages, stages_mode, mask_value=-1)`

Mask and pad data and calculate sample weights for a Keras model.

The following steps are performed:

Merge sleep stages in stages according to stage_mode.
Set features corresponding to SleepStage.UNDEFINED to mask_value.
Replace np.nan and np.inf in features with mask_value.
Pad to a common length, where mask_value is used for features and SleepStage.UNDEFINED (i.e 0) is used for stages.
One-hot encode stages.
Calculate sample weights with class weights taken as n_samples / (n_classes * np.bincount(y)).

Parameters:

features (list[ndarray]) –
Each 2D array in this list is a feature matrix of shape (n_samples, n_features) corresponding to a single record as returned by extract_features().
stages (list[ndarray]) –
Each 1D array in this list contains the sleep stages of a single record as returned by extract_features().
stages_mode (str) –
Identifier of the grouping mode. Can be any of 'wake-sleep', 'wake-rem-nrem', 'wake-rem-light-n3', 'wake-rem-n1-n2-n3'.
mask_value (int) –
Value used to pad features and replace np.nan and np.inf, by default -1. Remember to pass the same value to layers.Masking in your model.

Returns:

features_padded( ndarray ) –
A 3D array of shape (n_records, max_n_samples, n_features), where n_records is the length of features/stages and max_n_samples is the maximum number of rows of all feature matrices in features.
stages_padded_onehot( ndarray ) –
A 3D array of shape (n_records, max_n_samples, n_classes+1), where n_classes is the number of classes remaining after merging sleep stages (excluding SleepStage.UNDEFINED).
sample_weight( ndarray ) –
A 2D array of shape (n_records, max_n_samples).

Source code in sleepecg/classification.py

def prepare_data_keras(
    features: list[np.ndarray],
    stages: list[np.ndarray],
    stages_mode: str,
    mask_value: int = -1,
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
    """
    Mask and pad data and calculate sample weights for a Keras model.

    The following steps are performed:

    - Merge sleep stages in `stages` according to `stage_mode`.
    - Set features corresponding to `SleepStage.UNDEFINED` to `mask_value`.
    - Replace `np.nan` and `np.inf` in `features` with `mask_value`.
    - Pad to a common length, where `mask_value` is used for `features` and
      `SleepStage.UNDEFINED` (i.e `0`) is used for stages.
    - One-hot encode stages.
    - Calculate sample weights with class weights taken as `n_samples /
      (n_classes * np.bincount(y))`.

    Parameters
    ----------
    features : list[np.ndarray]
        Each 2D array in this list is a feature matrix of shape `(n_samples, n_features)`
        corresponding to a single record as returned by `extract_features()`.
    stages : list[np.ndarray]
        Each 1D array in this list contains the sleep stages of a single record as returned
        by `extract_features()`.
    stages_mode : str
        Identifier of the grouping mode. Can be any of `'wake-sleep'`, `'wake-rem-nrem'`,
        `'wake-rem-light-n3'`, `'wake-rem-n1-n2-n3'`.
    mask_value : int, optional
        Value used to pad features and replace `np.nan` and `np.inf`, by default `-1`.
        Remember to pass the same value to `layers.Masking` in your model.

    Returns
    -------
    features_padded : np.ndarray
        A 3D array of shape `(n_records, max_n_samples, n_features)`, where `n_records` is
        the length of `features`/`stages` and `max_n_samples` is the maximum number of rows
        of all feature matrices in `features`.
    stages_padded_onehot : np.ndarray
        A 3D array of shape `(n_records, max_n_samples, n_classes+1)`, where `n_classes` is
        the number of classes remaining after merging sleep stages (excluding
        `SleepStage.UNDEFINED`).
    sample_weight : np.ndarray
        A 2D array of shape `(n_records, max_n_samples)`.
    """
    from tensorflow.keras.preprocessing.sequence import pad_sequences
    from tensorflow.keras.utils import to_categorical

    stages_merged = _merge_sleep_stages(stages, stages_mode)
    stages_padded = pad_sequences(stages_merged, value=SleepStage.UNDEFINED)
    stages_padded_onehot = to_categorical(stages_padded)

    features_padded = pad_sequences(features, dtype=float, value=mask_value)
    features_padded[stages_padded == SleepStage.UNDEFINED, :] = mask_value
    features_padded[~np.isfinite(features_padded)] = mask_value

    stage_counts = stages_padded_onehot.sum(0).sum(0)
    # samples corresponding to SleepStage.UNDEFINED are ignored, so their count shouldn't
    # influence the class weights -> slice with [1:]
    class_weight = np.sum(stage_counts[1:]) / stage_counts
    sample_weight = class_weight[stages_padded]

    return features_padded, stages_padded_onehot, sample_weight

`sleepecg.print_class_balance(stages, stages_mode=None)`

Print the number of samples and percentages of each class in stages.

Parameters:

stages (ndarray) –
A 2D array of shape (n_records, n_samples) containing integer class labels or a 3D array of shape (n_records, n_samples, n_classes) containing one-hot encoded class labels.
stages_mode (str) –
Identifier of the grouping mode. Can be any of 'wake-sleep', 'wake-rem-nrem', 'wake-rem-light-n3', 'wake-rem-n1-n2-n3'. If None (default), no class labels are printed.

Source code in sleepecg/classification.py

def print_class_balance(stages: np.ndarray, stages_mode: Optional[str] = None) -> None:
    """
    Print the number of samples and percentages of each class in `stages`.

    Parameters
    ----------
    stages : np.ndarray
        A 2D array of shape `(n_records, n_samples)` containing integer class labels or a
        3D array of shape `(n_records, n_samples, n_classes)` containing one-hot encoded
        class labels.
    stages_mode : str, optional
        Identifier of the grouping mode. Can be any of `'wake-sleep'`, `'wake-rem-nrem'`,
        `'wake-rem-light-n3'`, `'wake-rem-n1-n2-n3'`. If `None` (default), no class labels
        are printed.
    """
    if stages.ndim == 3:
        stages = stages.argmax(2)

    if stages_mode is not None:
        stage_names = ["UNDEFINED"] + _STAGE_NAMES[stages_mode]
    else:
        stage_names = [str(n) for n in range(6)]

    print("Class balance:")

    unique_stages, counts = np.unique(stages, return_counts=True)
    max_len_counts = len(str(max(counts)))
    max_len_stages = max(len(str(s)) for s in stage_names)
    total_count = counts.sum()
    for stage, count, fraction in zip(unique_stages, counts, counts / total_count):
        print(
            f"    {stage_names[stage]:>{max_len_stages}}: {count:{max_len_counts}} "
            f"({fraction:3.0%})"
        )

`sleepecg.save_classifier(name, model, stages_mode, feature_extraction_params, mask_value=None, classifiers_dir=None)`

Save a trained classifier to disk.

The model itself and a .yml file containing classifier metadata are stored as <name>.zip in classifiers_dir. Model serialization is performed as suggested by the respective package documentation. Currently only Keras models are supported.

Parameters:

name (str) –
An identifier which is used as the filename.
model (Any) –
The classification model, should have fit() and predict() methods.
stages_mode (str) –
Identifier of the grouping mode. Can be any of 'wake-sleep', 'wake-rem-nrem', 'wake-rem-light-n3', or 'wake-rem-n1-n2-n3'.
feature_extraction_params (dict[str, Any]) –
The parameters passed to extract_features(), as a dictionary mapping string parameter names to values. Should not include records and n_jobs.
mask_value (int) –
Only required for Keras models, as passed to prepare_data_keras() and keras.layers.Masking, by default None.
classifiers_dir (str | Path) –
Directory in which the .zip file is stored. If None (default), the value is taken from the configuration.

`sleepecg.stage(clf, record, return_mode='int')`

Predict sleep stages for a single record.

Feature extraction and preprocessing are performed according to the information stored in clf.

Parameters:

clf (SleepClassifier) –
A classifier object as loaded with load_classifier().
record (SleepRecord) –
A single record (i.e. night).
return_mode (str) –
If 'int', return the predicted sleep stages as a 1D array of integers. If 'prob', return a 2D array of probabilities. If 'str', return a 1D array of strings.

Returns:

ndarray –
Array of sleep stages. Depending on return_mode, this takes different forms.

Warnings

Note that the returned labels depend on clf.stages_mode, so they do not necessarily follow the stage-to-integer mapping defined in SleepStage. See classification for details.

Source code in sleepecg/classification.py

def stage(
    clf: SleepClassifier,
    record: SleepRecord,
    return_mode: str = "int",
) -> np.ndarray:
    """
    Predict sleep stages for a single record.

    Feature extraction and preprocessing are performed according to the information stored
    in `clf`.

    Parameters
    ----------
    clf : SleepClassifier
        A classifier object as loaded with `load_classifier()`.
    record : SleepRecord
        A single record (i.e. night).
    return_mode : str, optional
        If `'int'`, return the predicted sleep stages as a 1D array of integers. If
        `'prob'`, return a 2D array of probabilities. If `'str'`, return a 1D array of
        strings.

    Returns
    -------
    np.ndarray
        Array of sleep stages. Depending on `return_mode`, this takes different forms.

    Warnings
    --------
    Note that the returned labels depend on `clf.stages_mode`, so they do not necessarily
    follow the stage-to-integer mapping defined in `SleepStage`. See
    [classification](../classification.md) for details.
    """
    return_modes = {"int", "prob", "str"}
    if return_mode not in return_modes:
        raise ValueError(
            f"Invalid return_mode: {return_mode!r}. Possible options: {return_modes}"
        )

    stage_names = ["UNDEFINED"] + _STAGE_NAMES[clf.stages_mode]

    features = extract_features(records=[record], **clf.feature_extraction_params)[0][0]
    if clf.model_type == "keras":
        features[~np.isfinite(features)] = clf.mask_value
        stages_pred_proba: np.ndarray = clf.model.predict(features[np.newaxis, ...])[0]
        stages_pred: np.ndarray = stages_pred_proba.argmax(-1)
    else:
        raise ValueError(f"Staging with model of type {type(clf)} is not supported")

    if return_mode == "prob":
        return stages_pred_proba
    elif return_mode == "str":
        return np.array([stage_names[s] for s in stages_pred])
    return stages_pred

`sleepecg.SleepClassifier` `dataclass`

Store a sleep classifier model and metadata.

Attributes:

model (_Model) –

The classification model, should have fit and predict methods.
stages_mode (str) –

Identifier of the grouping mode. Can be any of 'wake-sleep', 'wake-rem-nrem', 'wake-rem-light-n3', or 'wake-rem-n1-n2-n3'.
feature_extraction_params (dict[str, Any]) –

The parameters passed to extract_features(), as a dictionary mapping string parameter names to values. Does not include records and n_jobs.
model_type (str) –

A string identifying the model type, e.g. 'keras' or 'sklearn'. This is used by stage() to determine how to perform sleep stage predictions.
mask_value ((int, optional)) –

Only required for models of type 'keras', as passed to prepare_data_keras() and keras.layers.Masking, by default None.
source_file ((Path, optional)) –

The file from which the classifier was loaded using load_classifier(), by default None.

Source code in sleepecg/classification.py

@dataclass
class SleepClassifier:
    """
    Store a sleep classifier model and metadata.

    Attributes
    ----------
    model : _Model
        The classification model, should have `fit` and `predict` methods.
    stages_mode : str
        Identifier of the grouping mode. Can be any of `'wake-sleep'`, `'wake-rem-nrem'`,
        `'wake-rem-light-n3'`, or `'wake-rem-n1-n2-n3'`.
    feature_extraction_params : dict[str, typing.Any]
        The parameters passed to `extract_features()`, as a dictionary mapping string
        parameter names to values. Does not include `records` and `n_jobs`.
    model_type : str
        A string identifying the model type, e.g. `'keras'` or `'sklearn'`. This is used by
        `stage()` to determine how to perform sleep stage predictions.
    mask_value : int, optional
        Only required for models of type `'keras'`, as passed to `prepare_data_keras()` and
        `keras.layers.Masking`, by default `None`.
    source_file : pathlib.Path, optional
        The file from which the classifier was loaded using `load_classifier()`, by default
        `None`.
    """

    model: _Model
    stages_mode: str
    feature_extraction_params: dict[str, Any]
    model_type: str
    mask_value: Optional[int] = None
    source_file: Optional[Path] = None

    def __repr__(self) -> str:
        if self.source_file is not None:
            return (
                f"<SleepClassifier | {self.stages_mode}, {self.model_type}, "
                f"{self.source_file.name}>"
            )
        return f"<SleepClassifier | {self.stages_mode}, {self.model_type}>"

    def __str__(self) -> str:
        features = ", ".join(self.feature_extraction_params["feature_selection"])
        return (
            f"SleepClassifier for {self.stages_mode.upper()}\n"
            f"    features: {features}\n"
            f"    model type: {self.model_type}\n"
            f"    source file: {self.source_file}\n"
        )

Classification

sleepecg.evaluate(stages_true, stages_pred, stages_mode, show_undefined=False)

sleepecg.list_classifiers(classifiers_dir=None)

sleepecg.load_classifier(name, classifiers_dir=None, silence_tf_messages=True)

sleepecg.prepare_data_keras(features, stages, stages_mode, mask_value=-1)

sleepecg.print_class_balance(stages, stages_mode=None)

sleepecg.save_classifier(name, model, stages_mode, feature_extraction_params, mask_value=None, classifiers_dir=None)

sleepecg.stage(clf, record, return_mode='int')

sleepecg.SleepClassifier dataclass

`sleepecg.evaluate(stages_true, stages_pred, stages_mode, show_undefined=False)`

`sleepecg.list_classifiers(classifiers_dir=None)`

`sleepecg.load_classifier(name, classifiers_dir=None, silence_tf_messages=True)`

`sleepecg.prepare_data_keras(features, stages, stages_mode, mask_value=-1)`

`sleepecg.print_class_balance(stages, stages_mode=None)`

`sleepecg.save_classifier(name, model, stages_mode, feature_extraction_params, mask_value=None, classifiers_dir=None)`

`sleepecg.stage(clf, record, return_mode='int')`

`sleepecg.SleepClassifier` `dataclass`