# poppernet

## PopperNet

Bases: nn.Module

Source code in autora/experimentalist/pooler/poppernet.py
 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 class PopperNet(nn.Module): def __init__(self, n_input: torch.Tensor, n_output: torch.Tensor): # Perform initialization of the pytorch superclass super(PopperNet, self).__init__() # Define network layer dimensions D_in, H1, H2, H3, D_out = [n_input, 64, 64, 64, n_output] # Define layer types self.linear1 = nn.Linear(D_in, H1) self.linear2 = nn.Linear(H1, H2) self.linear3 = nn.Linear(H2, H3) self.linear4 = nn.Linear(H3, D_out) def forward(self, x: torch.Tensor): """ This method defines the network layering and activation functions """ x = self.linear1(x) # hidden layer x = torch.tanh(x) # activation function x = self.linear2(x) # hidden layer x = torch.tanh(x) # activation function x = self.linear3(x) # hidden layer x = torch.tanh(x) # activation function x = self.linear4(x) # output layer return x def freeze_weights(self): for param in self.parameters(): param.requires_grad = False 

### forward(x)

This method defines the network layering and activation functions

Source code in autora/experimentalist/pooler/poppernet.py
 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 def forward(self, x: torch.Tensor): """ This method defines the network layering and activation functions """ x = self.linear1(x) # hidden layer x = torch.tanh(x) # activation function x = self.linear2(x) # hidden layer x = torch.tanh(x) # activation function x = self.linear3(x) # hidden layer x = torch.tanh(x) # activation function x = self.linear4(x) # output layer return x 

## class_to_onehot(y, n_classes=None)

Converts a class vector (integers) to binary class matrix.

E.g. for use with categorical_crossentropy.

### Arguments

y: class vector to be converted into a matrix
(integers from 0 to num_classes).
n_classes: total number of classes.


### Returns

A binary matrix representation of the input.

Source code in autora/experimentalist/pooler/poppernet.py
 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 def class_to_onehot(y: np.array, n_classes: Optional[int] = None): """Converts a class vector (integers) to binary class matrix. E.g. for use with categorical_crossentropy. # Arguments y: class vector to be converted into a matrix (integers from 0 to num_classes). n_classes: total number of classes. # Returns A binary matrix representation of the input. """ y = np.array(y, dtype="int") input_shape = y.shape if input_shape and input_shape[-1] == 1 and len(input_shape) > 1: input_shape = tuple(input_shape[:-1]) y = y.ravel() if not n_classes: n_classes = np.max(y) + 1 n = y.shape[0] categorical = np.zeros((n, n_classes)) categorical[np.arange(n), y] = 1 output_shape = input_shape + (n_classes,) categorical = np.reshape(categorical, output_shape) return categorical 

## poppernet_pool(model, x_train, y_train, metadata, n=100, training_epochs=1000, optimization_epochs=1000, training_lr=0.001, optimization_lr=0.001, mse_scale=1, limit_offset=0, limit_repulsion=0, plot=False)

A pooler that generates samples for independent variables with the objective of maximizing the (approximated) loss of the model. The samples are generated by first training a neural network to approximate the loss of a model for all patterns in the training data. Once trained, the network is then inverted to generate samples that maximize the approximated loss of the model.

Note: If the pooler returns samples that are close to the boundaries of the variable space, then it is advisable to increase the limit_repulsion parameter (e.g., to 0.000001).

Parameters:

Name Type Description Default
model

Scikit-learn model, could be either a classification or regression model

required
x_train np.ndarray

data that the model was trained on

required
y_train np.ndarray

labels that the model was trained on

required
metadata VariableCollection

Meta-data about the dependent and independent variables

required
n int

number of samples to return

100
training_epochs int

number of epochs to train the popper network for approximating the

1000
optimization_epochs int

number of epochs to optimize the samples based on the trained

1000
training_lr float

learning rate for training the popper network

0.001
optimization_lr float

learning rate for optimizing the samples

0.001
mse_scale float

scale factor for the MSE loss

1
limit_offset float

a limited offset to prevent the samples from being too close to the value

0
limit_repulsion float

a limited repulsion to prevent the samples from being too close to the

0
plot bool

print out the prediction of the popper network as well as its training loss

False
Source code in autora/experimentalist/pooler/poppernet.py
  12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 def poppernet_pool( model, x_train: np.ndarray, y_train: np.ndarray, metadata: VariableCollection, n: int = 100, training_epochs: int = 1000, optimization_epochs: int = 1000, training_lr: float = 1e-3, optimization_lr: float = 1e-3, mse_scale: float = 1, limit_offset: float = 0, # 10**-10, limit_repulsion: float = 0, plot: bool = False, ): """ A pooler that generates samples for independent variables with the objective of maximizing the (approximated) loss of the model. The samples are generated by first training a neural network to approximate the loss of a model for all patterns in the training data. Once trained, the network is then inverted to generate samples that maximize the approximated loss of the model. Note: If the pooler returns samples that are close to the boundaries of the variable space, then it is advisable to increase the limit_repulsion parameter (e.g., to 0.000001). Args: model: Scikit-learn model, could be either a classification or regression model x_train: data that the model was trained on y_train: labels that the model was trained on metadata: Meta-data about the dependent and independent variables n: number of samples to return training_epochs: number of epochs to train the popper network for approximating the error fo the model optimization_epochs: number of epochs to optimize the samples based on the trained popper network training_lr: learning rate for training the popper network optimization_lr: learning rate for optimizing the samples mse_scale: scale factor for the MSE loss limit_offset: a limited offset to prevent the samples from being too close to the value boundaries limit_repulsion: a limited repulsion to prevent the samples from being too close to the allowed value boundaries plot: print out the prediction of the popper network as well as its training loss Returns: Sampled pool """ # format input x_train = np.array(x_train) if len(x_train.shape) == 1: x_train = x_train.reshape(-1, 1) x = np.empty([n, x_train.shape[1]]) y_train = np.array(y_train) if len(y_train.shape) == 1: y_train = y_train.reshape(-1, 1) if metadata.dependent_variables[0].type == ValueType.CLASS: # find all unique values in y_train num_classes = len(np.unique(y_train)) y_train = class_to_onehot(y_train, n_classes=num_classes) x_train_tensor = torch.from_numpy(x_train).float() # create list of IV limits ivs = metadata.independent_variables iv_limit_list = list() for iv in ivs: if hasattr(iv, "value_range"): value_range = cast(Tuple, iv.value_range) lower_bound = value_range[0] upper_bound = value_range[1] iv_limit_list.append(([lower_bound, upper_bound])) # get dimensions of input and output n_input = len(metadata.independent_variables) n_output = len(metadata.dependent_variables) # get input pattern for popper net popper_input = Variable(torch.from_numpy(x_train), requires_grad=False).float() # get target pattern for popper net model_predict = getattr(model, "predict_proba", None) if callable(model_predict) is False: model_predict = getattr(model, "predict", None) if callable(model_predict) is False or model_predict is None: raise Exception("Model must have predict or predict_proba method.") model_prediction = model_predict(x_train) if isinstance(model_prediction, np.ndarray) is False: try: model_prediction = np.array(model_prediction) except Exception: raise Exception("Model prediction must be convertable to numpy array.") if model_prediction.ndim == 1: model_prediction = model_prediction.reshape(-1, 1) criterion = nn.MSELoss() model_loss = (model_prediction - y_train) ** 2 * mse_scale model_loss = np.mean(model_loss, axis=1) # standardize the loss scaler = StandardScaler() model_loss = scaler.fit_transform(model_loss.reshape(-1, 1)).flatten() model_loss = torch.from_numpy(model_loss).float() popper_target = Variable(model_loss, requires_grad=False) # create the network popper_net = PopperNet(n_input, n_output) # reformat input in case it is 1D if len(popper_input.shape) == 1: popper_input = popper_input.flatten() popper_input = popper_input.reshape(-1, 1) # define the optimizer popper_optimizer = torch.optim.Adam(popper_net.parameters(), lr=training_lr) # train the network losses = [] for epoch in range(training_epochs): popper_prediction = popper_net(popper_input) loss = criterion(popper_prediction, popper_target.reshape(-1, 1)) popper_optimizer.zero_grad() loss.backward() popper_optimizer.step() losses.append(loss.item()) if plot: popper_input_full = np.linspace( iv_limit_list[0][0], iv_limit_list[0][1], 1000 ).reshape(-1, 1) popper_input_full = Variable( torch.from_numpy(popper_input_full), requires_grad=False ).float() popper_prediction = popper_net(popper_input_full) plot_popper_diagnostics( losses, popper_input, popper_input_full, popper_prediction, popper_target, model_prediction, y_train, ) # now that the popper network is trained we can sample new data points # to sample data points we need to provide the popper network with an initial condition # we will sample those initial conditions proportional to the loss of the current model # feed average model losses through softmax # model_loss_avg= torch.from_numpy(np.mean(model_loss.detach().numpy(), axis=1)).float() softmax_func = torch.nn.Softmax(dim=0) probabilities = softmax_func(model_loss) # sample data point in proportion to model loss transform_category = torch.distributions.categorical.Categorical(probabilities) popper_net.freeze_weights() for condition in range(n): index = transform_category.sample() input_sample = torch.flatten(x_train_tensor[index, :]) popper_input = Variable(input_sample, requires_grad=True) # invert the popper network to determine optimal experiment conditions for optimization_epoch in range(optimization_epochs): # feedforward pass on popper network popper_prediction = popper_net(popper_input) # compute gradient that maximizes output of popper network # (i.e. predicted loss of original model) popper_loss_optim = -popper_prediction popper_loss_optim.backward() # compute new input # with torch.no_grad(): # delta = -optimization_lr * popper_input.grad # popper_input += -optimization_lr * popper_input.grad # print(delta) # popper_input.grad.zero_() with torch.no_grad(): # first add repulsion from variable limits for idx in range(len(input_sample)): iv_value = popper_input[idx] iv_limits = iv_limit_list[idx] dist_to_min = np.abs(iv_value - np.min(iv_limits)) dist_to_max = np.abs(iv_value - np.max(iv_limits)) # deal with boundary case where distance is 0 or very small dist_to_min = np.max([dist_to_min, 0.00000001]) dist_to_max = np.max([dist_to_max, 0.00000001]) repulsion_from_min = limit_repulsion / (dist_to_min**2) repulsion_from_max = limit_repulsion / (dist_to_max**2) iv_value_repulsed = ( iv_value + repulsion_from_min - repulsion_from_max ) popper_input[idx] = iv_value_repulsed # now add gradient for theory loss maximization delta = -optimization_lr * popper_input.grad popper_input += delta # finally, clip input variable from its limits for idx in range(len(input_sample)): iv_raw_value = input_sample[idx] iv_limits = iv_limit_list[idx] iv_clipped_value = np.min( [iv_raw_value, np.max(iv_limits) - limit_offset] ) iv_clipped_value = np.max( [ iv_clipped_value, np.min(iv_limits) + limit_offset, ] ) popper_input[idx] = iv_clipped_value popper_input.grad.zero_() # add condition to new experiment sequence for idx in range(len(input_sample)): iv_limits = iv_limit_list[idx] # first clip value iv_clipped_value = np.min([iv_raw_value, np.max(iv_limits) - limit_offset]) iv_clipped_value = np.max( [iv_clipped_value, np.min(iv_limits) + limit_offset] ) # make sure to convert variable to original scale iv_clipped_scaled_value = iv_clipped_value x[condition, idx] = iv_clipped_scaled_value return iter(x)