Synthetic Data Examples¶

In [ ]:

Copied!

# Uncomment the following line when running on Google Colab
# !pip install "autora"
# Uncomment the following line when running on Google Colab
# !pip install "autora"

Load the Weber-Fechner Law:

In [ ]:

Copied!

import numpy as np
from autora.experiment_runner.synthetic.psychophysics.weber_fechner_law import weber_fechner_law
s = weber_fechner_law()
import numpy as np
from autora.experiment_runner.synthetic.psychophysics.weber_fechner_law import weber_fechner_law
s = weber_fechner_law()

Check the docstring to get information about the model

In [ ]:

Copied!

help(weber_fechner_law)
help(weber_fechner_law)

Help on function weber_fechner_law in module autora.experiment_runner.synthetic.psychophysics.weber_fechner_law:

weber_fechner_law(name='Weber-Fechner Law', resolution=100, constant=1.0, maximum_stimulus_intensity=5.0)
    Weber-Fechner Law
    
    Args:
        name: name of the experiment
        resolution: number of allowed values for stimulus 1 and 2
        constant: constant multiplier
        maximum_stimulus_intensity: maximum value for stimulus 1 and 2
    
    Examples:
        >>> experiment = weber_fechner_law()
    
        # We can run the runner with numpy arrays or DataFrames. Ther return value will
        # always be a pandas DataFrame.
        >>> experiment.run(np.array([[.1,.2]]), random_state=42)
            S1   S2  difference_detected
        0  0.1  0.2             0.696194
    
        >>> experiment.run(pd.DataFrame({'S1': [0.1], 'S2': [0.2]}), random_state=42)
            S1   S2  difference_detected
        0  0.1  0.2             0.696194

... or use the describe function:

In [ ]:

Copied!

from autora.experiment_runner.synthetic.utilities import describe

print(describe(s))
from autora.experiment_runner.synthetic.utilities import describe

print(describe(s))

    Weber-Fechner Law

    Args:
        name: name of the experiment
        resolution: number of allowed values for stimulus 1 and 2
        constant: constant multiplier
        maximum_stimulus_intensity: maximum value for stimulus 1 and 2

    Examples:
        >>> experiment = weber_fechner_law()

        # We can run the runner with numpy arrays or DataFrames. Ther return value will
        # always be a pandas DataFrame.
        >>> experiment.run(np.array([[.1,.2]]), random_state=42)
            S1   S2  difference_detected
        0  0.1  0.2             0.696194

        >>> experiment.run(pd.DataFrame({'S1': [0.1], 'S2': [0.2]}), random_state=42)
            S1   S2  difference_detected
        0  0.1  0.2             0.696194

The synthetic experiement s has properties like the name of the experiment:

In [ ]:

Copied!

s.name
s.name

Out[ ]:

'Weber-Fechner Law'

... a valid metadata description:

In [ ]:

Copied!

s.variables
s.variables

Out[ ]:

VariableCollection(independent_variables=[IV(name='S1', value_range=(0.01, 5.0), allowed_values=array([0.01      , 0.06040404, 0.11080808, 0.16121212, 0.21161616,
       0.2620202 , 0.31242424, 0.36282828, 0.41323232, 0.46363636,
       0.5140404 , 0.56444444, 0.61484848, 0.66525253, 0.71565657,
       0.76606061, 0.81646465, 0.86686869, 0.91727273, 0.96767677,
       1.01808081, 1.06848485, 1.11888889, 1.16929293, 1.21969697,
       1.27010101, 1.32050505, 1.37090909, 1.42131313, 1.47171717,
       1.52212121, 1.57252525, 1.62292929, 1.67333333, 1.72373737,
       1.77414141, 1.82454545, 1.87494949, 1.92535354, 1.97575758,
       2.02616162, 2.07656566, 2.1269697 , 2.17737374, 2.22777778,
       2.27818182, 2.32858586, 2.3789899 , 2.42939394, 2.47979798,
       2.53020202, 2.58060606, 2.6310101 , 2.68141414, 2.73181818,
       2.78222222, 2.83262626, 2.8830303 , 2.93343434, 2.98383838,
       3.03424242, 3.08464646, 3.13505051, 3.18545455, 3.23585859,
       3.28626263, 3.33666667, 3.38707071, 3.43747475, 3.48787879,
       3.53828283, 3.58868687, 3.63909091, 3.68949495, 3.73989899,
       3.79030303, 3.84070707, 3.89111111, 3.94151515, 3.99191919,
       4.04232323, 4.09272727, 4.14313131, 4.19353535, 4.24393939,
       4.29434343, 4.34474747, 4.39515152, 4.44555556, 4.4959596 ,
       4.54636364, 4.59676768, 4.64717172, 4.69757576, 4.7479798 ,
       4.79838384, 4.84878788, 4.89919192, 4.94959596, 5.        ]), units='intensity', type=<ValueType.REAL: 'real'>, variable_label='Stimulus 1 Intensity', rescale=1, is_covariate=False), IV(name='S2', value_range=(0.01, 5.0), allowed_values=array([0.01      , 0.06040404, 0.11080808, 0.16121212, 0.21161616,
       0.2620202 , 0.31242424, 0.36282828, 0.41323232, 0.46363636,
       0.5140404 , 0.56444444, 0.61484848, 0.66525253, 0.71565657,
       0.76606061, 0.81646465, 0.86686869, 0.91727273, 0.96767677,
       1.01808081, 1.06848485, 1.11888889, 1.16929293, 1.21969697,
       1.27010101, 1.32050505, 1.37090909, 1.42131313, 1.47171717,
       1.52212121, 1.57252525, 1.62292929, 1.67333333, 1.72373737,
       1.77414141, 1.82454545, 1.87494949, 1.92535354, 1.97575758,
       2.02616162, 2.07656566, 2.1269697 , 2.17737374, 2.22777778,
       2.27818182, 2.32858586, 2.3789899 , 2.42939394, 2.47979798,
       2.53020202, 2.58060606, 2.6310101 , 2.68141414, 2.73181818,
       2.78222222, 2.83262626, 2.8830303 , 2.93343434, 2.98383838,
       3.03424242, 3.08464646, 3.13505051, 3.18545455, 3.23585859,
       3.28626263, 3.33666667, 3.38707071, 3.43747475, 3.48787879,
       3.53828283, 3.58868687, 3.63909091, 3.68949495, 3.73989899,
       3.79030303, 3.84070707, 3.89111111, 3.94151515, 3.99191919,
       4.04232323, 4.09272727, 4.14313131, 4.19353535, 4.24393939,
       4.29434343, 4.34474747, 4.39515152, 4.44555556, 4.4959596 ,
       4.54636364, 4.59676768, 4.64717172, 4.69757576, 4.7479798 ,
       4.79838384, 4.84878788, 4.89919192, 4.94959596, 5.        ]), units='intensity', type=<ValueType.REAL: 'real'>, variable_label='Stimulus 2 Intensity', rescale=1, is_covariate=False)], dependent_variables=[DV(name='difference_detected', value_range=(0, 5.0), allowed_values=None, units='sensation', type=<ValueType.REAL: 'real'>, variable_label='Sensation', rescale=1, is_covariate=False)], covariates=[])

... a function to generate the full domain of the data (if possible)

In [ ]:

Copied!

x = s.domain()
x
x = s.domain()
x

Out[ ]:

array([[0.01      , 0.01      ],
       [0.01      , 0.06040404],
       [0.01      , 0.11080808],
       ...,
       [4.94959596, 4.94959596],
       [4.94959596, 5.        ],
       [5.        , 5.        ]])

... the experiment_runner runner which can be called to generate experimental results:

In [ ]:

Copied!

import numpy as np
y = s.run(x)  # doctest: +ELLIPSIS
y
import numpy as np
y = s.run(x)  # doctest: +ELLIPSIS
y

Out[ ]:

	S1	S2	difference_detected
0	0.010000	0.010000	-0.000829
1	0.010000	0.060404	1.806354
2	0.010000	0.110808	2.406270
3	0.010000	0.161212	2.774411
4	0.010000	0.211616	3.056933
...	...	...	...
5045	4.899192	4.949596	-0.000753
5046	4.899192	5.000000	0.037958
5047	4.949596	4.949596	-0.013647
5048	4.949596	5.000000	0.020839
5049	5.000000	5.000000	-0.021462

5050 rows × 3 columns

... a function to plot the ground truth:

In [ ]:

Copied!

s.plotter()
s.plotter()

No description has been provided for this image

... against a fitted model if it exists:

In [ ]:

Copied!

from sklearn.linear_model import LinearRegression
model = LinearRegression().fit(x, y)
s.plotter(model)
from sklearn.linear_model import LinearRegression
model = LinearRegression().fit(x, y)
s.plotter(model)

These can be used to run a full experimental cycle

In [ ]:

Copied!





from autora.workflow.protocol import ResultKind
from autora.experimentalist.pipeline import make_pipeline
from autora.experimentalist.pooler.grid import grid_pool
from autora.experimentalist.sampler.random_sampler import random_sample
from functools import partial
import random
variables = s.variables
pool = partial(grid_pool, ivs=variables.independent_variables)
random.seed(181) # set the seed for the random sampler
sampler = partial(random_sample, n=20)
experimentalist_pipeline = make_pipeline([pool, sampler])

from autora.workflow import Controller
theorist = LinearRegression()

cycle = Controller(
    variables=variables, experimentalist=experimentalist_pipeline,
    experiment_runner=s.experiment_runner, theorist=theorist,
    monitor=lambda s: (s.history[-1].kind == ResultKind.MODEL) and
                       print(f"finished cycle {len(s.models)}"))

c = cycle.run(10)
best_model = c.state.models[-1]
print(f"I = "
      f"{best_model.coef_[0]:.2f} S0 "
      f"{best_model.coef_[1]:+.2f} S1 "
      f"{best_model.intercept_:+.2f}")
from autora.workflow.protocol import ResultKind
from autora.experimentalist.pipeline import make_pipeline
from autora.experimentalist.pooler.grid import grid_pool
from autora.experimentalist.sampler.random_sampler import random_sample
from functools import partial
import random
variables = s.variables
pool = partial(grid_pool, ivs=variables.independent_variables)
random.seed(181) # set the seed for the random sampler
sampler = partial(random_sample, n=20)
experimentalist_pipeline = make_pipeline([pool, sampler])

from autora.workflow import Controller
theorist = LinearRegression()

cycle = Controller(
    variables=variables, experimentalist=experimentalist_pipeline,
    experiment_runner=s.experiment_runner, theorist=theorist,
    monitor=lambda s: (s.history[-1].kind == ResultKind.MODEL) and
                       print(f"finished cycle {len(s.models)}"))

c = cycle.run(10)
best_model = c.state.models[-1]
print(f"I = "
      f"{best_model.coef_[0]:.2f} S0 "
      f"{best_model.coef_[1]:+.2f} S1 "
      f"{best_model.intercept_:+.2f}")

finished cycle 1
finished cycle 2
finished cycle 3
I = -0.49 S0 +0.58 S1 -0.21

In [ ]: