Q-Learning¶

In [1]:

Copied!

# Uncomment the following line when running on Google Colab
# !pip install "autora"
# Uncomment the following line when running on Google Colab
# !pip install "autora"

The q-learning experiment has to be initialized with a specific formula and effects.

In [2]:

Copied!

import numpy as np
from autora.experiment_runner.synthetic.psychology.q_learning import  q_learning

s = q_learning()
import numpy as np
from autora.experiment_runner.synthetic.psychology.q_learning import  q_learning

s = q_learning()

Check the docstring to get information about the model

In [3]:

Copied!

help(q_learning)
help(q_learning)

Help on function q_learning in module autora.experiment_runner.synthetic.psychology.q_learning:

q_learning(name='Q-Learning', learning_rate: float = 0.2, decision_noise: float = 3.0, n_actions: int = 2, forget_rate: float = 0.0, perseverance_bias: float = 0.0, correlated_reward: bool = False)
    An agent that runs simple Q-learning for an n-armed bandits tasks.
    
    Args:
        name: name of the experiment
        trials: number of trials
        learning_rate: learning rate for Q-learning
        decision_noise: softmax parameter for decision noise
        n_actions: number of actions
        forget_rate: rate of forgetting
        perseverance_bias: bias towards choosing the previously chosen action
        correlated_reward: whether rewards are correlated
    
    Examples:
        >>> experiment = q_learning()
    
        # The runner can accept numpy arrays or pandas DataFrames, but the return value will
        # always be a list of numpy arrays. Each array corresponds to the choices made by the agent
        # for each trial in the input. Thus, arrays have shape (n_trials, n_actions).
        >>> experiment.run(np.array([[0, 1], [0, 1], [0, 1], [1, 0], [1, 0], [1, 0]]),
        ...                random_state=42)
        [array([[1., 0.],
               [0., 1.],
               [0., 1.],
               [0., 1.],
               [1., 0.],
               [1., 0.]])]
    
        # The runner can accept pandas DataFrames. Each cell of the DataFrame should contain a
        # numpy array with shape (n_trials, n_actions). The return value will be a list of numpy
        # arrays, each corresponding to the choices made by the agent for each trial in the input.
        >>> experiment.run(
        ...     pd.DataFrame(
        ...         {'reward array': [np.array([[0, 1], [0, 1], [0, 1], [1, 0], [1, 0], [1, 0]])]}),
        ...     random_state = 42)
        [array([[1., 0.],
               [0., 1.],
               [0., 1.],
               [0., 1.],
               [1., 0.],
               [1., 0.]])]

... or use the describe function:

In [4]:

Copied!

from autora.experiment_runner.synthetic.utilities import describe

print(describe(s))
from autora.experiment_runner.synthetic.utilities import describe

print(describe(s))

    An agent that runs simple Q-learning for an n-armed bandits tasks.

    Args:
        name: name of the experiment
        trials: number of trials
        learning_rate: learning rate for Q-learning
        decision_noise: softmax parameter for decision noise
        n_actions: number of actions
        forget_rate: rate of forgetting
        perseverance_bias: bias towards choosing the previously chosen action
        correlated_reward: whether rewards are correlated

    Examples:
        >>> experiment = q_learning()

        # The runner can accept numpy arrays or pandas DataFrames, but the return value will
        # always be a list of numpy arrays. Each array corresponds to the choices made by the agent
        # for each trial in the input. Thus, arrays have shape (n_trials, n_actions).
        >>> experiment.run(np.array([[0, 1], [0, 1], [0, 1], [1, 0], [1, 0], [1, 0]]),
        ...                random_state=42)
        [array([[1., 0.],
               [0., 1.],
               [0., 1.],
               [0., 1.],
               [1., 0.],
               [1., 0.]])]

        # The runner can accept pandas DataFrames. Each cell of the DataFrame should contain a
        # numpy array with shape (n_trials, n_actions). The return value will be a list of numpy
        # arrays, each corresponding to the choices made by the agent for each trial in the input.
        >>> experiment.run(
        ...     pd.DataFrame(
        ...         {'reward array': [np.array([[0, 1], [0, 1], [0, 1], [1, 0], [1, 0], [1, 0]])]}),
        ...     random_state = 42)
        [array([[1., 0.],
               [0., 1.],
               [0., 1.],
               [0., 1.],
               [1., 0.],
               [1., 0.]])]

The synthetic experiement s has properties like the name of the experiment:

In [5]:

Copied!

s.name
s.name

Out[5]:

'Q-Learning'

... a valid variables description:

In [6]:

Copied!

s.variables
s.variables

Out[6]:

VariableCollection(independent_variables=[IV(name='reward array', value_range=None, allowed_values=None, units='reward', type=<ValueType.BOOLEAN: 'boolean'>, variable_label='Reward Sequence', rescale=1, is_covariate=False)], dependent_variables=[DV(name='choice array', value_range=None, allowed_values=None, units='actions', type=<ValueType.REAL: 'real'>, variable_label='Action Sequence', rescale=1, is_covariate=False)], covariates=[])

... the conditions for this experiment are reward sequences. This is a variable type not yet fully integrated in AutoRA. Therefore ther is no domain yet:

In [7]:

Copied!

x = s.domain()
x
x = s.domain()
x

... the plotter is not implemented yet:

In [8]:

Copied!

s.plotter()
s.plotter()

---------------------------------------------------------------------------
NotImplementedError                       Traceback (most recent call last)
Cell In[8], line 1
----> 1 s.plotter()

File ~/Documents/GitHub/AutoRA/autora-synthetic/src/autora/experiment_runner/synthetic/psychology/q_learning.py:257, in q_learning.<locals>.plotter()
    256 def plotter():
--> 257     raise NotImplementedError

NotImplementedError:

We can wrap this functions to use with the state logic of AutoRA: First, we create the state with the variables:

In [9]:

Copied!





from autora.state import StandardState, on_state, Delta, experiment_runner_on_state, estimator_on_state
# We can get the variables from the runner
variables = s.variables

# With the variables, we initialize a StandardState
state = StandardState(variables)
from autora.state import StandardState, on_state, Delta, experiment_runner_on_state, estimator_on_state
# We can get the variables from the runner
variables = s.variables

# With the variables, we initialize a StandardState
state = StandardState(variables)

Here, we use a special experimentalist that can generate random trial sequences and wrap it with the on_state function to use them on state:

In [10]:

Copied!

%%capture
!pip install autora-experimentalist-bandit-random
%%capture
!pip install autora-experimentalist-bandit-random

In [11]:

Copied!





from autora.experimentalist.bandit_random import bandit_random_pool
# Wrap the functions to use on state
# Experimentalists:

@on_state()
def pool_on_state(num_samples):
      return Delta(conditions=bandit_random_pool(num_rewards=2, sequence_length=20, num_samples=num_samples))


state = pool_on_state(state, num_samples=2)
print(state.conditions)
from autora.experimentalist.bandit_random import bandit_random_pool
# Wrap the functions to use on state
# Experimentalists:

@on_state()
def pool_on_state(num_samples):
      return Delta(conditions=bandit_random_pool(num_rewards=2, sequence_length=20, num_samples=num_samples))


state = pool_on_state(state, num_samples=2)
print(state.conditions)

                                        reward array
0  [[0, 0], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1...
1  [[0, 0], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1...

Wrap the runner with the experiment_runner_on_state wrapper to use it on state:

In [12]:

Copied!

# Runner:
run_on_state = experiment_runner_on_state(s.run)
state = run_on_state(state)

state.experiment_data
# Runner:
run_on_state = experiment_runner_on_state(s.run)
state = run_on_state(state)

state.experiment_data

Out[12]:

	reward array	choice array
0	[[0, 0], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1...	[[0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0...
1	[[0, 0], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1...	[[0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0...

Wrap the regressor with the estimator_on_state wrapper:

In [17]:

Copied!





theorist = LinearRegression()
theorist_on_state = estimator_on_state(theorist)

state = theorist_on_state(state)
# Access the last model:
model = state.models[-1]


print(f"choose_A1 = "
      f"{model.coef_[0][0]:.2f}*similarity_category_A1 "
      f"{model.coef_[0][1]:.2f}*similarity_category_A2 "
      f"{model.coef_[0][2]:.2f}*similarity_category_B1 "
      f"{model.coef_[0][3]:.2f}*similarity_category_B2 "
      f"{model.intercept_[0]:+.2f} ")
theorist = LinearRegression()
theorist_on_state = estimator_on_state(theorist)

state = theorist_on_state(state)
# Access the last model:
model = state.models[-1]


print(f"choose_A1 = "
      f"{model.coef_[0][0]:.2f}*similarity_category_A1 "
      f"{model.coef_[0][1]:.2f}*similarity_category_A2 "
      f"{model.coef_[0][2]:.2f}*similarity_category_B1 "
      f"{model.coef_[0][3]:.2f}*similarity_category_B2 "
      f"{model.intercept_[0]:+.2f} ")

In [ ]:

Epoch 86/100 --- Loss: 0.5586882; Time: 0.0762s; Convergence value: 1.78e-01
Epoch 87/100 --- Loss: 0.7901477; Time: 0.0767s; Convergence value: 1.82e-01
Epoch 88/100 --- Loss: 0.5265486; Time: 0.0751s; Convergence value: 1.92e-01
Epoch 89/100 --- Loss: 0.4401408; Time: 0.0743s; Convergence value: 1.86e-01
Epoch 90/100 --- Loss: 0.3039415; Time: 0.0756s; Convergence value: 1.82e-01
Epoch 91/100 --- Loss: 0.3906522; Time: 0.0771s; Convergence value: 1.73e-01
Epoch 92/100 --- Loss: 0.5437022; Time: 0.0769s; Convergence value: 1.65e-01
Epoch 93/100 --- Loss: 0.4635772; Time: 0.0737s; Convergence value: 1.54e-01
Epoch 94/100 --- Loss: 0.4845441; Time: 0.0743s; Convergence value: 1.48e-01
Epoch 95/100 --- Loss: 0.2648371; Time: 0.0770s; Convergence value: 1.56e-01
Epoch 96/100 --- Loss: 0.3382604; Time: 0.0748s; Convergence value: 1.37e-01
Epoch 97/100 --- Loss: 0.2581106; Time: 0.0742s; Convergence value: 1.25e-01
Epoch 98/100 --- Loss: 0.6365235; Time: 0.0737s; Convergence value: 1.47e-01
Epoch 99/100 --- Loss: 0.2228255; Time: 0.0741s; Convergence value: 1.67e-01
Epoch 100/100 --- Loss: 0.3986339; Time: 0.0745s; Convergence value: 1.73e-01
Maximum number of training epochs reached.
Model did not converge yet.
Test the trained RNN on a test dataset...
Epoch 1/1 --- Loss: 0.4823526; Time: 0.0079s; Convergence value: nan
Maximum number of training epochs reached.
Model did not converge yet.
RNN training took 7.47 seconds.

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
Cell In[18], line 12
      9     y = np.stack(experiment_data[dv].tolist())
     10     return Delta(models=[theorist_a.fit(x, y)])
---> 12 state = theorist_on_state(state)
     13 # Access the last model:
     14 model = state.models[-1]

File ~/Documents/GitHub/AutoRA/autora-synthetic/venv/lib/python3.11/site-packages/autora/state.py:939, in delta_to_state.<locals>._f(state_, **kwargs)
    937 @wraps(f)
    938 def _f(state_: S, **kwargs) -> S:
--> 939     delta = f(state_, **kwargs)
    940     assert isinstance(delta, Mapping), (
    941         "Output of %s must be a `Delta`, `UserDict`, " "or `dict`." % f
    942     )
    943     new_state = state_ + delta

File ~/Documents/GitHub/AutoRA/autora-synthetic/venv/lib/python3.11/site-packages/autora/state.py:675, in inputs_from_state.<locals>._f(state_, **kwargs)
    673     arguments_from_state["state"] = state_
    674 arguments = dict(arguments_from_state, **kwargs)
--> 675 result = f(**arguments)
    676 return result

Cell In[18], line 10, in theorist_on_state(experiment_data)
      8 x = np.stack(experiment_data[iv].tolist())
      9 y = np.stack(experiment_data[dv].tolist())
---> 10 return Delta(models=[theorist_a.fit(x, y)])

File ~/Documents/GitHub/AutoRA/autora-synthetic/venv/lib/python3.11/site-packages/autora/theorist/rnn_sindy_rl/__init__.py:159, in RNNSindy.fit(self, conditions, observations, epochs, **kwargs)
    156 if epochs is None:
    157     epochs = self.epochs
--> 159 self.rnn = rnn_main(
    160     xs=conditions,
    161     ys=observations,
    162     model=self.rnn,
    163     epochs=epochs,
    164     **kwargs,
    165 )
    167 self.sindy = sindy_main(
    168     conditions,
    169     observations,
   (...)
    180     **kwargs,
    181 )
    183 return self

File ~/Documents/GitHub/AutoRA/autora-synthetic/venv/lib/python3.11/site-packages/autora/theorist/rnn_sindy_rl/rnn_main.py:103, in main(xs, ys, model, epochs, n_steps_per_call, batch_size, learning_rate, convergence_threshold, analysis, save_name, checkpoint, **kwargs)
     98 # save trained parameters  
     99 state_dict = {
    100   'model': model.state_dict() if isinstance(model, torch.nn.Module) else [model_i.state_dict() for model_i in model],
    101   'optimizer': optimizer_rnn.state_dict() if isinstance(optimizer_rnn, torch.optim.Adam) else [optim_i.state_dict() for optim_i in optimizer_rnn],
    102 }
--> 103 torch.save(state_dict, save_name)
    105 print(f'Saved RNN parameters to file {save_name}.')
    107 # Analysis

File ~/Documents/GitHub/AutoRA/autora-synthetic/venv/lib/python3.11/site-packages/torch/serialization.py:651, in save(obj, f, pickle_module, pickle_protocol, _use_new_zipfile_serialization, _disable_byteorder_record)
    648 _check_save_filelike(f)
    650 if _use_new_zipfile_serialization:
--> 651     with _open_zipfile_writer(f) as opened_zipfile:
    652         _save(obj, opened_zipfile, pickle_module, pickle_protocol, _disable_byteorder_record)
    653         return

File ~/Documents/GitHub/AutoRA/autora-synthetic/venv/lib/python3.11/site-packages/torch/serialization.py:525, in _open_zipfile_writer(name_or_buffer)
    523 else:
    524     container = _open_zipfile_writer_buffer
--> 525 return container(name_or_buffer)

File ~/Documents/GitHub/AutoRA/autora-synthetic/venv/lib/python3.11/site-packages/torch/serialization.py:496, in _open_zipfile_writer_file.__init__(self, name)
    494     super().__init__(torch._C.PyTorchFileWriter(self.file_stream))
    495 else:
--> 496     super().__init__(torch._C.PyTorchFileWriter(self.name))

RuntimeError: Parent directory trained_models does not exist.