Skip to content

Preprocessing

Classes

TakBuilder(base, max_days=None)

Tak builder constructor

Parameters:

Name Type Description Default
base DataFrame

event log as a dataframe

required
max_days int | None

patients' sequence max length in days

None
Source code in opentak/preprocessing.py
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
def __init__(
    self,
    base: pd.DataFrame,
    max_days: int | None = None,
) -> None:
    """Tak builder constructor

    :param base: event log as a dataframe
    :param max_days: patients' sequence max length in days
    """
    _validate_args(base, max_days)
    self.base = checks.Checks(base).base
    self.timescale = 1
    self.dict_label_id = dict(_BASE_DICT)
    self._create_dict_label_id()

    if not max_days:
        self.max_days = self.base.loc[self.base["EVT"].ne("end"), "TIMESTAMP"].max()
    else:
        self.max_days = max_days

    self._add_start()
    self._add_end()
    self.base = stable_sort(self.base)
    self.base = add_evt_duration(self.base)
    self.index_patients = self.base["ID_PATIENT"].unique()
    self.array: npt.NDArray | None = None
    self._must_create_array = True
Functions
build(kind='hca')

Build Tak object from builder.

The kind parameter can take four values:

"hca" Classic Tak algorithm. Stands for Hierarchical Clustering Analysis.

Parameters:

Name Type Description Default
kind Literal['hca']

kind always set to "hca" for Tak minimal release; other types will be introduced in next releases

'hca'

Returns:

Type Description
Tak

Tak object

Source code in opentak/preprocessing.py
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
def build(
    self,
    kind: Literal["hca"] = "hca",
) -> Tak:
    """Build Tak object from builder.

    The ``kind`` parameter can take four values:

    ``"hca"``
        Classic Tak algorithm. Stands for Hierarchical Clustering Analysis.

    :param kind: kind always set to "hca" for Tak minimal release; other types will be introduced in next releases
    :return: Tak object
    """
    if self._must_create_array:
        self._create_array_from_evt_log()
        self._must_create_array = False

    kwargs = {
        "array": self.array,
        "index_patients": self.index_patients,
        "dict_label_id": self.dict_label_id,
        "timescale": self.timescale,
        "evt_log": self.base,
    }
    tak: Tak
    if kind == "hca":
        tak = TakHca(**kwargs)
    else:
        raise ValueError(
            "'kind' argument should be equal to 'hca' in this minimal release"
        )
    return tak

Functions

reset_array(func)

Indicate that the output array should be recomputed regardless of the cache system.

Parameters:

Name Type Description Default
func

decorated preprocessing method

required

Returns:

Type Description

method with extra functionality

Source code in opentak/preprocessing.py
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
def reset_array(func):
    """Indicate that the output array should be recomputed regardless of the cache system.

    :param func: decorated preprocessing method
    :return: method with extra functionality
    """

    @wraps(func)
    def wrapper(*args):
        """Set the _must_create_array flag before calling the original method.

        :param args: arguments to pass to the decorated function
        :return: result of the decorated function
        """
        # ruff: noqa: SLF001
        args[0]._must_create_array = True
        return func(*args)

    return wrapper