Skip to content

autora.experimentalist.falsification.utils

align_dataframe_to_ivs(dataframe, independent_variables)

Aligns a dataframe to a metadata object, ensuring that the columns are in the same order as the independent variables in the metadata.

Parameters:

Name Type Description Default
dataframe DataFrame

a dataframe with columns to align

required
independent_variables List[IV]

a list of independent variables

required

Returns:

Type Description
DataFrame

a dataframe with columns in the same order as the independent variables in the metadata

Source code in temp_dir/falsification/src/autora/experimentalist/falsification/utils.py
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
def align_dataframe_to_ivs(
    dataframe: pd.DataFrame, independent_variables: List[IV]
) -> pd.DataFrame:
    """
    Aligns a dataframe to a metadata object, ensuring that the columns are in the same order
    as the independent variables in the metadata.

    Args:
        dataframe: a dataframe with columns to align
        independent_variables: a list of independent variables

    Returns:
        a dataframe with columns in the same order as the independent variables in the metadata
    """
    variable_names = list()
    for variable in independent_variables:
        variable_names.append(variable.name)
    return dataframe[variable_names]

class_to_onehot(y, n_classes=None)

Converts a class vector (integers) to binary class matrix.

E.g. for use with categorical_crossentropy.

Arguments

y: class vector to be converted into a matrix
    (integers from 0 to num_classes).
n_classes: total number of classes.

Returns

A binary matrix representation of the input.
Source code in temp_dir/falsification/src/autora/experimentalist/falsification/utils.py
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
def class_to_onehot(y: np.array, n_classes: Optional[int] = None):
    """Converts a class vector (integers) to binary class matrix.

    E.g. for use with categorical_crossentropy.

    # Arguments
        y: class vector to be converted into a matrix
            (integers from 0 to num_classes).
        n_classes: total number of classes.

    # Returns
        A binary matrix representation of the input.
    """
    y = np.array(y, dtype="int")
    input_shape = y.shape
    if input_shape and input_shape[-1] == 1 and len(input_shape) > 1:
        input_shape = tuple(input_shape[:-1])
    y = y.ravel()
    if not n_classes:
        n_classes = np.max(y) + 1
    n = y.shape[0]
    categorical = np.zeros((n, n_classes))
    categorical[np.arange(n), y] = 1
    output_shape = input_shape + (n_classes,)
    categorical = np.reshape(categorical, output_shape)
    return categorical

get_iv_limits(reference_conditions, metadata)

Get the limits of the independent variables

Parameters:

Name Type Description Default
reference_conditions ndarray

data that the model was trained on

required
metadata VariableCollection

Meta-data about the dependent and independent variables

required

Returns: List of limits for each independent variable

Source code in temp_dir/falsification/src/autora/experimentalist/falsification/utils.py
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
def get_iv_limits(
    reference_conditions: np.ndarray,
    metadata: VariableCollection,
):
    """
    Get the limits of the independent variables

    Args:
        reference_conditions: data that the model was trained on
        metadata: Meta-data about the dependent and independent variables

    Returns: List of limits for each independent variable
    """

    # create list of IV limits
    iv_limit_list = list()
    if metadata is not None:
        ivs = metadata.independent_variables
        for iv in ivs:
            if hasattr(iv, "value_range"):
                value_range = cast(Tuple, iv.value_range)
                lower_bound = value_range[0]
                upper_bound = value_range[1]
                iv_limit_list.append(([lower_bound, upper_bound]))
    else:
        for col in range(reference_conditions.shape[1]):
            min = np.min(reference_conditions[:, col])
            max = np.max(reference_conditions[:, col])
            iv_limit_list.append(([min, max]))

    return iv_limit_list