Skip to content

poppernet

PopperNet

Bases: nn.Module

Source code in autora/experimentalist/pooler/poppernet.py
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
class PopperNet(nn.Module):
    def __init__(self, n_input: torch.Tensor, n_output: torch.Tensor):
        # Perform initialization of the pytorch superclass
        super(PopperNet, self).__init__()

        # Define network layer dimensions
        D_in, H1, H2, H3, D_out = [n_input, 64, 64, 64, n_output]

        # Define layer types
        self.linear1 = nn.Linear(D_in, H1)
        self.linear2 = nn.Linear(H1, H2)
        self.linear3 = nn.Linear(H2, H3)
        self.linear4 = nn.Linear(H3, D_out)

    def forward(self, x: torch.Tensor):
        """
        This method defines the network layering and activation functions
        """
        x = self.linear1(x)  # hidden layer
        x = torch.tanh(x)  # activation function

        x = self.linear2(x)  # hidden layer
        x = torch.tanh(x)  # activation function

        x = self.linear3(x)  # hidden layer
        x = torch.tanh(x)  # activation function

        x = self.linear4(x)  # output layer

        return x

    def freeze_weights(self):
        for param in self.parameters():
            param.requires_grad = False

forward(x)

This method defines the network layering and activation functions

Source code in autora/experimentalist/pooler/poppernet.py
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
def forward(self, x: torch.Tensor):
    """
    This method defines the network layering and activation functions
    """
    x = self.linear1(x)  # hidden layer
    x = torch.tanh(x)  # activation function

    x = self.linear2(x)  # hidden layer
    x = torch.tanh(x)  # activation function

    x = self.linear3(x)  # hidden layer
    x = torch.tanh(x)  # activation function

    x = self.linear4(x)  # output layer

    return x

class_to_onehot(y, n_classes=None)

Converts a class vector (integers) to binary class matrix.

E.g. for use with categorical_crossentropy.

Arguments

y: class vector to be converted into a matrix
    (integers from 0 to num_classes).
n_classes: total number of classes.

Returns

A binary matrix representation of the input.
Source code in autora/experimentalist/pooler/poppernet.py
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
def class_to_onehot(y: np.array, n_classes: Optional[int] = None):
    """Converts a class vector (integers) to binary class matrix.

    E.g. for use with categorical_crossentropy.

    # Arguments
        y: class vector to be converted into a matrix
            (integers from 0 to num_classes).
        n_classes: total number of classes.

    # Returns
        A binary matrix representation of the input.
    """
    y = np.array(y, dtype="int")
    input_shape = y.shape
    if input_shape and input_shape[-1] == 1 and len(input_shape) > 1:
        input_shape = tuple(input_shape[:-1])
    y = y.ravel()
    if not n_classes:
        n_classes = np.max(y) + 1
    n = y.shape[0]
    categorical = np.zeros((n, n_classes))
    categorical[np.arange(n), y] = 1
    output_shape = input_shape + (n_classes,)
    categorical = np.reshape(categorical, output_shape)
    return categorical

poppernet_pool(model, x_train, y_train, metadata, n=100, training_epochs=1000, optimization_epochs=1000, training_lr=0.001, optimization_lr=0.001, mse_scale=1, limit_offset=0, limit_repulsion=0, plot=False)

A pooler that generates samples for independent variables with the objective of maximizing the (approximated) loss of the model. The samples are generated by first training a neural network to approximate the loss of a model for all patterns in the training data. Once trained, the network is then inverted to generate samples that maximize the approximated loss of the model.

Note: If the pooler returns samples that are close to the boundaries of the variable space, then it is advisable to increase the limit_repulsion parameter (e.g., to 0.000001).

Parameters:

Name Type Description Default
model

Scikit-learn model, could be either a classification or regression model

required
x_train np.ndarray

data that the model was trained on

required
y_train np.ndarray

labels that the model was trained on

required
metadata VariableCollection

Meta-data about the dependent and independent variables

required
n int

number of samples to return

100
training_epochs int

number of epochs to train the popper network for approximating the

1000
optimization_epochs int

number of epochs to optimize the samples based on the trained

1000
training_lr float

learning rate for training the popper network

0.001
optimization_lr float

learning rate for optimizing the samples

0.001
mse_scale float

scale factor for the MSE loss

1
limit_offset float

a limited offset to prevent the samples from being too close to the value

0
limit_repulsion float

a limited repulsion to prevent the samples from being too close to the

0
plot bool

print out the prediction of the popper network as well as its training loss

False
Source code in autora/experimentalist/pooler/poppernet.py
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
def poppernet_pool(
    model,
    x_train: np.ndarray,
    y_train: np.ndarray,
    metadata: VariableCollection,
    n: int = 100,
    training_epochs: int = 1000,
    optimization_epochs: int = 1000,
    training_lr: float = 1e-3,
    optimization_lr: float = 1e-3,
    mse_scale: float = 1,
    limit_offset: float = 0,  # 10**-10,
    limit_repulsion: float = 0,
    plot: bool = False,
):
    """
    A pooler that generates samples for independent variables with the objective of maximizing the
    (approximated) loss of the model. The samples are generated by first training a neural network
    to approximate the loss of a model for all patterns in the training data. Once trained, the
    network is then inverted to generate samples that maximize the approximated loss of the model.

    Note: If the pooler returns samples that are close to the boundaries of the variable space,
    then it is advisable to increase the limit_repulsion parameter (e.g., to 0.000001).

    Args:
        model: Scikit-learn model, could be either a classification or regression model
        x_train: data that the model was trained on
        y_train: labels that the model was trained on
        metadata: Meta-data about the dependent and independent variables
        n: number of samples to return
        training_epochs: number of epochs to train the popper network for approximating the
        error fo the model
        optimization_epochs: number of epochs to optimize the samples based on the trained
        popper network
        training_lr: learning rate for training the popper network
        optimization_lr: learning rate for optimizing the samples
        mse_scale: scale factor for the MSE loss
        limit_offset: a limited offset to prevent the samples from being too close to the value
        boundaries
        limit_repulsion: a limited repulsion to prevent the samples from being too close to the
        allowed value boundaries
        plot: print out the prediction of the popper network as well as its training loss

    Returns: Sampled pool

    """

    # format input

    x_train = np.array(x_train)
    if len(x_train.shape) == 1:
        x_train = x_train.reshape(-1, 1)

    x = np.empty([n, x_train.shape[1]])

    y_train = np.array(y_train)
    if len(y_train.shape) == 1:
        y_train = y_train.reshape(-1, 1)

    if metadata.dependent_variables[0].type == ValueType.CLASS:
        # find all unique values in y_train
        num_classes = len(np.unique(y_train))
        y_train = class_to_onehot(y_train, n_classes=num_classes)

    x_train_tensor = torch.from_numpy(x_train).float()

    # create list of IV limits
    ivs = metadata.independent_variables
    iv_limit_list = list()
    for iv in ivs:
        if hasattr(iv, "value_range"):
            value_range = cast(Tuple, iv.value_range)
            lower_bound = value_range[0]
            upper_bound = value_range[1]
            iv_limit_list.append(([lower_bound, upper_bound]))

    # get dimensions of input and output
    n_input = len(metadata.independent_variables)
    n_output = len(metadata.dependent_variables)

    # get input pattern for popper net
    popper_input = Variable(torch.from_numpy(x_train), requires_grad=False).float()

    # get target pattern for popper net
    model_predict = getattr(model, "predict_proba", None)
    if callable(model_predict) is False:
        model_predict = getattr(model, "predict", None)

    if callable(model_predict) is False or model_predict is None:
        raise Exception("Model must have `predict` or `predict_proba` method.")

    model_prediction = model_predict(x_train)
    if isinstance(model_prediction, np.ndarray) is False:
        try:
            model_prediction = np.array(model_prediction)
        except Exception:
            raise Exception("Model prediction must be convertable to numpy array.")
    if model_prediction.ndim == 1:
        model_prediction = model_prediction.reshape(-1, 1)

    criterion = nn.MSELoss()
    model_loss = (model_prediction - y_train) ** 2 * mse_scale
    model_loss = np.mean(model_loss, axis=1)

    # standardize the loss
    scaler = StandardScaler()
    model_loss = scaler.fit_transform(model_loss.reshape(-1, 1)).flatten()

    model_loss = torch.from_numpy(model_loss).float()
    popper_target = Variable(model_loss, requires_grad=False)

    # create the network
    popper_net = PopperNet(n_input, n_output)

    # reformat input in case it is 1D
    if len(popper_input.shape) == 1:
        popper_input = popper_input.flatten()
        popper_input = popper_input.reshape(-1, 1)

    # define the optimizer
    popper_optimizer = torch.optim.Adam(popper_net.parameters(), lr=training_lr)

    # train the network
    losses = []
    for epoch in range(training_epochs):
        popper_prediction = popper_net(popper_input)
        loss = criterion(popper_prediction, popper_target.reshape(-1, 1))
        popper_optimizer.zero_grad()
        loss.backward()
        popper_optimizer.step()
        losses.append(loss.item())

    if plot:
        popper_input_full = np.linspace(
            iv_limit_list[0][0], iv_limit_list[0][1], 1000
        ).reshape(-1, 1)
        popper_input_full = Variable(
            torch.from_numpy(popper_input_full), requires_grad=False
        ).float()
        popper_prediction = popper_net(popper_input_full)
        plot_popper_diagnostics(
            losses,
            popper_input,
            popper_input_full,
            popper_prediction,
            popper_target,
            model_prediction,
            y_train,
        )

    # now that the popper network is trained we can sample new data points
    # to sample data points we need to provide the popper network with an initial condition
    # we will sample those initial conditions proportional to the loss of the current model

    # feed average model losses through softmax
    # model_loss_avg= torch.from_numpy(np.mean(model_loss.detach().numpy(), axis=1)).float()
    softmax_func = torch.nn.Softmax(dim=0)
    probabilities = softmax_func(model_loss)
    # sample data point in proportion to model loss
    transform_category = torch.distributions.categorical.Categorical(probabilities)

    popper_net.freeze_weights()

    for condition in range(n):

        index = transform_category.sample()
        input_sample = torch.flatten(x_train_tensor[index, :])
        popper_input = Variable(input_sample, requires_grad=True)

        # invert the popper network to determine optimal experiment conditions
        for optimization_epoch in range(optimization_epochs):
            # feedforward pass on popper network
            popper_prediction = popper_net(popper_input)
            # compute gradient that maximizes output of popper network
            # (i.e. predicted loss of original model)
            popper_loss_optim = -popper_prediction
            popper_loss_optim.backward()
            # compute new input
            # with torch.no_grad():
            #     delta = -optimization_lr * popper_input.grad
            #     popper_input += -optimization_lr * popper_input.grad
            #     print(delta)
            #     popper_input.grad.zero_()

            with torch.no_grad():

                # first add repulsion from variable limits
                for idx in range(len(input_sample)):
                    iv_value = popper_input[idx]
                    iv_limits = iv_limit_list[idx]
                    dist_to_min = np.abs(iv_value - np.min(iv_limits))
                    dist_to_max = np.abs(iv_value - np.max(iv_limits))
                    # deal with boundary case where distance is 0 or very small
                    dist_to_min = np.max([dist_to_min, 0.00000001])
                    dist_to_max = np.max([dist_to_max, 0.00000001])
                    repulsion_from_min = limit_repulsion / (dist_to_min**2)
                    repulsion_from_max = limit_repulsion / (dist_to_max**2)
                    iv_value_repulsed = (
                        iv_value + repulsion_from_min - repulsion_from_max
                    )
                    popper_input[idx] = iv_value_repulsed

                # now add gradient for theory loss maximization
                delta = -optimization_lr * popper_input.grad
                popper_input += delta

                # finally, clip input variable from its limits
                for idx in range(len(input_sample)):
                    iv_raw_value = input_sample[idx]
                    iv_limits = iv_limit_list[idx]
                    iv_clipped_value = np.min(
                        [iv_raw_value, np.max(iv_limits) - limit_offset]
                    )
                    iv_clipped_value = np.max(
                        [
                            iv_clipped_value,
                            np.min(iv_limits) + limit_offset,
                        ]
                    )
                    popper_input[idx] = iv_clipped_value
                popper_input.grad.zero_()

        # add condition to new experiment sequence
        for idx in range(len(input_sample)):
            iv_limits = iv_limit_list[idx]

            # first clip value
            iv_clipped_value = np.min([iv_raw_value, np.max(iv_limits) - limit_offset])
            iv_clipped_value = np.max(
                [iv_clipped_value, np.min(iv_limits) + limit_offset]
            )
            # make sure to convert variable to original scale
            iv_clipped_scaled_value = iv_clipped_value

            x[condition, idx] = iv_clipped_scaled_value

    return iter(x)