Skip to content

Checks

Classes

Checks(base, check_unique_in_for_everybody=True, check_unique_out_or_death_for_everybody=True, reorder=True, check_no_in_after_treatment=True, check_no_out_before_treatment=True, check_no_duplicated_rows=True, check_mutiple_delivrance_on_same_date=True)

Initialize the class that runs checks on the initial dataset.

Note: It orders the dataset by ID_PATIENT then TIMESTAMP, placing 'in' events first and 'out'/'death' events last.

Parameters:

Name Type Description Default
base

DataFrame — one row per event for a patient; columns: 'ID_PATIENT', 'EVT', 'TIMESTAMP'

required
check_unique_in_for_everybody bool

run check method check_in_for_everybody

True
check_unique_out_or_death_for_everybody bool

run check method check_out_for_everybody

True
reorder bool

reorder dataset according to ordonne method

True
check_no_in_after_treatment bool

run check method check_no_in_after_treatment (only valid if check_unique_in_for_everybody=True and reorder=True)

True
check_no_out_before_treatment bool

run check method check_no_out_before_treatment (only valid if check_unique_out_or_death_for_everybody=True and reorder=True)

True
check_no_duplicated_rows bool

run check method check_no_duplicated_rows

True
check_mutiple_delivrance_on_same_date bool

run check method check_mutiple_delivrance_on_same_date (only valid if reorder=True)

True

Returns:

Type Description
None

None (raises an error if any check fails)

Source code in opentak/utils_events/checks.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
def __init__(
    self,
    base,
    check_unique_in_for_everybody: bool = True,
    check_unique_out_or_death_for_everybody: bool = True,
    reorder: bool = True,
    check_no_in_after_treatment: bool = True,
    check_no_out_before_treatment: bool = True,
    check_no_duplicated_rows: bool = True,
    check_mutiple_delivrance_on_same_date: bool = True,
) -> None:
    """Initialize the class that runs checks on the initial dataset.

    Note: It orders the dataset by ID_PATIENT then TIMESTAMP, placing 'in' events first and 'out'/'death' events last.

    :param base: DataFrame — one row per event for a patient; columns: 'ID_PATIENT', 'EVT', 'TIMESTAMP'
    :param check_unique_in_for_everybody: run check method ``check_in_for_everybody``
    :param check_unique_out_or_death_for_everybody: run check method ``check_out_for_everybody``
    :param reorder: reorder dataset according to ``ordonne`` method
    :param check_no_in_after_treatment: run check method ``check_no_in_after_treatment``
        (only valid if ``check_unique_in_for_everybody=True`` and ``reorder=True``)
    :param check_no_out_before_treatment: run check method ``check_no_out_before_treatment``
        (only valid if ``check_unique_out_or_death_for_everybody=True`` and ``reorder=True``)
    :param check_no_duplicated_rows: run check method ``check_no_duplicated_rows``
    :param check_mutiple_delivrance_on_same_date: run check method ``check_mutiple_delivrance_on_same_date``
        (only valid if ``reorder=True``)
    :return: None (raises an error if any check fails)
    """
    self.base = base

    if check_unique_in_for_everybody:
        self.check_in_for_everybody()
    if check_unique_out_or_death_for_everybody:
        self.check_out_for_everybody()

    if reorder:
        self.base = self.ordonne()

    if check_no_in_after_treatment:
        if not check_unique_in_for_everybody or not reorder:
            raise ValueError(
                "If check_no_in_after_treatment = True, reorder and check_unique_in_for_everybody should be set to True"
            )
        self.check_no_in_after_treatment()

    if check_no_out_before_treatment:
        if not check_unique_out_or_death_for_everybody or not reorder:
            raise ValueError(
                "If check_no_out_before_treatment = True, reorder and check_unique_out_or_death_for_everybody should be set to True"
            )
        self.check_no_out_before_treatment()

    if check_no_duplicated_rows:
        self.check_no_duplicated_rows()

    if check_mutiple_delivrance_on_same_date:
        if not reorder:
            raise ValueError(
                "If check_mutiple_delivrance_on_same_date = True, reorder should be set to True"
            )
        self.check_mutiple_delivrance_on_same_date()
Functions
check_in_for_everybody()

Verify that every patient has exactly one 'in' event.

Raises:

Type Description
ValueError

if a patient is missing an 'in' or has more than one

Source code in opentak/utils_events/checks.py
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
def check_in_for_everybody(
    self,
) -> None:
    """Verify that every patient has exactly one 'in' event.

    :raises ValueError: if a patient is missing an 'in' or has more than one
    """
    set_patients = set(self.base["ID_PATIENT"])
    list_patients_in = list(
        self.base[self.base["EVT"].eq("in")]["ID_PATIENT"].values
    )

    patient_without_in = set_patients - set(list_patients_in)
    if len(patient_without_in):
        raise ValueError(
            f"Attention : Patients {patient_without_in} do not have 'in'"
        )

    pat_several_in = {
        pat for pat, nb_in in Counter(list_patients_in).items() if nb_in != 1
    }
    if pat_several_in:
        raise ValueError(
            f"Attention : Patients {pat_several_in} have multiple 'in'"
        )
check_out_for_everybody()

Verify that every patient has exactly one 'out' or one 'death' event.

Raises:

Type Description
ValueError

if a patient is missing an 'out' or 'death' event or has more than one

Source code in opentak/utils_events/checks.py
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
def check_out_for_everybody(
    self,
) -> None:
    """Verify that every patient has exactly one 'out' or one 'death' event.

    :raises ValueError: if a patient is missing an 'out' or 'death' event or has more than one
    """
    set_patients = set(self.base["ID_PATIENT"])
    list_patients_out_by_out = list(
        self.base[self.base["EVT"].eq("out")]["ID_PATIENT"].values
    )
    list_patients_out_by_death = list(
        self.base[self.base["EVT"].eq("death")]["ID_PATIENT"].values
    )
    list_patients_out = list_patients_out_by_out + list_patients_out_by_death

    patient_sans_out = set_patients - set(list_patients_out)
    if len(patient_sans_out):
        raise ValueError(
            f"Attention : Patients {patient_sans_out} do not have 'out'"
        )

    pat_plusieurs_out = {
        pat for pat, nb_in in Counter(list_patients_out).items() if nb_in != 1
    }
    if pat_plusieurs_out:
        raise ValueError(
            f"Attention : Patients {pat_plusieurs_out} have multiple 'out'"
        )
ordonne()

Place 'in' events first and 'out'/'death' events last, then stably sort by ID_PATIENT then TIMESTAMP (mergesort).

Returns:

Type Description
DataFrame

Sorted eventlog

Source code in opentak/utils_events/checks.py
128
129
130
131
132
133
134
135
136
137
138
def ordonne(
    self,
) -> pd.DataFrame:
    """Place 'in' events first and 'out'/'death' events last, then stably sort by ID_PATIENT then TIMESTAMP (mergesort).

    :return: Sorted eventlog
    """
    base_in = self.base[self.base["EVT"].eq("in")]
    base_pas_in_pas_out = self.base[~self.base["EVT"].isin(["in", "out", "death"])]
    base_out = self.base[self.base["EVT"].isin(["out", "death"])]
    return stable_sort(pd.concat([base_in, base_pas_in_pas_out, base_out]))
check_no_in_after_treatment()

Ensure each patient's 'in' occurs before their first treatment event.

If violations are found: logs the patient IDs and the first four related rows, then raises ValueError.

Raises:

Type Description
ValueError

if an 'in' appears after the first treatment

Source code in opentak/utils_events/checks.py
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
def check_no_in_after_treatment(
    self,
) -> None:
    """Ensure each patient's 'in' occurs before their first treatment event.

    If violations are found: logs the patient IDs and the first four related rows, then raises ValueError.

    :raises ValueError: if an 'in' appears after the first treatment
    """
    pat_in_after_treatment = self.base[
        self.base["ID_PATIENT"].eq(self.base["ID_PATIENT"].shift(+1))
        & self.base["EVT"].eq("in")
    ]["ID_PATIENT"].to_numpy()

    if len(pat_in_after_treatment):
        logger.error(
            "Patients %s have an 'in' after their first treatment",
            pat_in_after_treatment,
        )

        for pat in pat_in_after_treatment:
            logger.error(self.base[self.base["ID_PATIENT"].eq(pat)].iloc[:4, :])

        raise ValueError("Some patients have 'in' after their first treatment")
check_no_out_before_treatment()

Ensure each patient's 'out' occurs after their last treatment event.

If violations are found: logs the patient IDs and the last four related rows, then raises ValueError.

Raises:

Type Description
ValueError

if an 'out' appears before the last treatment

Source code in opentak/utils_events/checks.py
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
def check_no_out_before_treatment(
    self,
) -> None:
    """Ensure each patient's 'out' occurs after their last treatment event.

    If violations are found: logs the patient IDs and the last four related rows, then raises ValueError.

    :raises ValueError: if an 'out' appears before the last treatment
    """
    pat_out_before_treatment = self.base[
        self.base["ID_PATIENT"].eq(self.base["ID_PATIENT"].shift(-1))
        & self.base["EVT"].eq("out")
    ]["ID_PATIENT"].to_numpy()

    if len(pat_out_before_treatment):
        logger.error(
            "Patients %s have an 'out' before their last treatment",
            pat_out_before_treatment,
        )
        for pat in pat_out_before_treatment:
            logger.error(self.base[self.base["ID_PATIENT"].eq(pat)].iloc[-4:, :])

        raise ValueError("Some patients have 'out' before their last treatment")
check_no_duplicated_rows()

Ensure the dataset has no duplicated rows.

Raises:

Type Description
ValueError

if duplicate rows are found

Source code in opentak/utils_events/checks.py
189
190
191
192
193
194
195
196
197
198
199
200
201
202
def check_no_duplicated_rows(
    self,
) -> None:
    """Ensure the dataset has no duplicated rows.

    :raises ValueError: if duplicate rows are found
    """
    try:
        assert not self.base.duplicated().any()  # noqa: S101
    except AssertionError as exc:
        pat_duplicated = self.base[self.base.duplicated()]["ID_PATIENT"].unique()
        raise ValueError(
            f"There are duplicate rows in the DataFrame (patient IDs: {pat_duplicated})"
        ) from exc
check_mutiple_delivrance_on_same_date()

Check whether any patients have multiple deliveries of different medications on the same day.

Returns:

Type Description
bool

bool — True if multiple deliveries exist, False otherwise

Raises:

Type Description
ValueError

if three or more deliveries occur the same day (excluding 'in' and 'out')

Source code in opentak/utils_events/checks.py
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
def check_mutiple_delivrance_on_same_date(
    self,
) -> bool:
    """Check whether any patients have multiple deliveries of different medications on the same day.

    :return: bool — True if multiple deliveries exist, False otherwise
    :raises ValueError: if three or more deliveries occur the same day (excluding 'in' and 'out')
    """
    base_cop = self.base.copy()

    conditions = (
        base_cop["ID_PATIENT"].eq(base_cop["ID_PATIENT"].shift(-1))
        & base_cop["TIMESTAMP"].eq(base_cop["TIMESTAMP"].shift(-1))
        & (base_cop["EVT"].shift(-1) != "out")
        & (base_cop["EVT"] != "in")
    )

    if len(base_cop[conditions]):
        patients_concernes = base_cop[conditions]["ID_PATIENT"].unique()
        logger.info(
            "There are %s deliveries of 2 different medications on the same day (excluding 'in' and 'out')",
            len(base_cop[conditions]),
        )
        logger.debug("It concerns the following patients %s", patients_concernes)

        conditions_triple = (
            base_cop["ID_PATIENT"].eq(base_cop["ID_PATIENT"].shift(-2))
            & base_cop["TIMESTAMP"].eq(base_cop["TIMESTAMP"].shift(-2))
            & (base_cop["EVT"].shift(-2) != "out")
            & (base_cop["EVT"] != "in")
        )
        if len(base_cop[conditions_triple]):
            patients_concernes_triple = base_cop[conditions_triple][
                "ID_PATIENT"
            ].unique()
            logger.error(
                "Patients %s have 3 or more deliveries on the same day (excluding 'in' and 'out')",
                patients_concernes_triple,
            )
            raise ValueError(
                "There are %s deliveries of 3 different medications (or more) on the same day (excluding 'in' and 'out')",
                len(base_cop[conditions_triple]),
            )
        return True
    return False

Functions