Skip to content

autora.experimentalist.falsification.utils

align_dataframe_to_ivs(dataframe, independent_variables)

Aligns a dataframe to a metadata object, ensuring that the columns are in the same order as the independent variables in the metadata.

Parameters:

Name Type Description Default
dataframe DataFrame

a dataframe with columns to align

required
independent_variables List[IV]

a list of independent variables

required

Returns:

Type Description
DataFrame

a dataframe with columns in the same order as the independent variables in the metadata

Source code in temp_dir/falsification/src/autora/experimentalist/falsification/utils.py
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
def align_dataframe_to_ivs(
    dataframe: pd.DataFrame, independent_variables: List[IV]
) -> pd.DataFrame:
    """
    Aligns a dataframe to a metadata object, ensuring that the columns are in the same order
    as the independent variables in the metadata.

    Args:
        dataframe: a dataframe with columns to align
        independent_variables: a list of independent variables

    Returns:
        a dataframe with columns in the same order as the independent variables in the metadata
    """
    variable_names = list()
    for variable in independent_variables:
        variable_names.append(variable.name)
    return dataframe[variable_names]

class_to_onehot(y, n_classes=None)

Converts a class vector (integers) to binary class matrix.

E.g. for use with categorical_crossentropy.

Arguments

y: class vector to be converted into a matrix
    (integers from 0 to num_classes).
n_classes: total number of classes.

Returns

A binary matrix representation of the input.
Source code in temp_dir/falsification/src/autora/experimentalist/falsification/utils.py
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
def class_to_onehot(y: np.array, n_classes: Optional[int] = None):
    """Converts a class vector (integers) to binary class matrix.

    E.g. for use with categorical_crossentropy.

    # Arguments
        y: class vector to be converted into a matrix
            (integers from 0 to num_classes).
        n_classes: total number of classes.

    # Returns
        A binary matrix representation of the input.
    """
    y = np.array(y, dtype="int")
    input_shape = y.shape
    if input_shape and input_shape[-1] == 1 and len(input_shape) > 1:
        input_shape = tuple(input_shape[:-1])
    y = y.ravel()
    if not n_classes:
        n_classes = np.max(y) + 1
    n = y.shape[0]
    categorical = np.zeros((n, n_classes))
    categorical[np.arange(n), y] = 1
    output_shape = input_shape + (n_classes,)
    categorical = np.reshape(categorical, output_shape)
    return categorical

get_iv_limits(reference_conditions, metadata)

Get the limits of the independent variables

Parameters:

Name Type Description Default
reference_conditions ndarray

data that the model was trained on

required
metadata VariableCollection

Meta-data about the dependent and independent variables

required

Returns: List of limits for each independent variable

Source code in temp_dir/falsification/src/autora/experimentalist/falsification/utils.py
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
def get_iv_limits(
        reference_conditions: np.ndarray,
        metadata: VariableCollection,
                  ):
    """
    Get the limits of the independent variables

    Args:
        reference_conditions: data that the model was trained on
        metadata: Meta-data about the dependent and independent variables

    Returns: List of limits for each independent variable
    """

    # create list of IV limits
    iv_limit_list = list()
    if metadata is not None:
        ivs = metadata.independent_variables
        for iv in ivs:
            if hasattr(iv, "value_range"):
                value_range = cast(Tuple, iv.value_range)
                lower_bound = value_range[0]
                upper_bound = value_range[1]
                iv_limit_list.append(([lower_bound, upper_bound]))
    else:
        for col in range(reference_conditions.shape[1]):
            min = np.min(reference_conditions[:, col])
            max = np.max(reference_conditions[:, col])
            iv_limit_list.append(([min, max]))

    return iv_limit_list