autora.theorist.bms.fit_prior

`parse_options()`

Parse command-line arguments.

Source code in temp_dir/bms/src/autora/theorist/bms/fit_prior.py

def parse_options():
    """Parse command-line arguments."""
    parser = OptionParser()
    parser.add_option(
        "-s",
        "--source",
        dest="source",
        default="named_equations",
        help="formula dataset to use ('full' or 'named_equations' (default))",
    )
    parser.add_option(
        "-n",
        "--nvar",
        dest="nvar",
        type="int",
        default=5,
        help="number of variables to include (default 5)",
    )
    parser.add_option(
        "-m",
        "--npar",
        dest="npar",
        type="int",
        default=None,
        help="number of parameters to include (default: 2*NVAR)",
    )
    parser.add_option(
        "-f",
        "--factor",
        dest="fact",
        type="float",
        default=0.05,
        help="factor for the parameter adjustment (default 0.05)",
    )
    parser.add_option(
        "-r",
        "--repetitions",
        type="int",
        default=1000000,
        dest="nrep",
        help="formulas to generate between parameter updates",
    )
    parser.add_option(
        "-M",
        "--maxsize",
        type="int",
        default=50,
        dest="max_size",
        help="maximum tree (formula) size",
    )
    parser.add_option(
        "-c",
        "--continue",
        dest="contfile",
        default=None,
        help="continue from parameter values in CONTFILE (default: start from scratch)",
    )
    parser.add_option(
        "-q",
        "--quadratic",
        action="store_true",
        dest="quadratic",
        default=False,
        help="fit parameters for quadratic terms (default: False)",
    )
    return parser

`read_target_values(source, quadratic=False)`

Read the target proportions for each type of operation.

Source code in temp_dir/bms/src/autora/theorist/bms/fit_prior.py

def read_target_values(source, quadratic=False):
    """Read the target proportions for each type of operation."""
    # Number of formulas
    infn1 = "./data/%s.wiki.parsed__num_operations.dat" % source
    with open(infn1) as inf1:
        lines = inf1.readlines()
        nform = sum([int(line.strip().split()[1]) for line in lines])
    # Fraction of each of the operations
    infn2 = "./data/%s.wiki.parsed__operation_type.dat" % source
    with open(infn2) as inf2:
        lines = inf2.readlines()
        target = dict(
            [
                (
                    "Nopi_%s" % line.strip().split()[0],
                    float(line.strip().split()[1]) / nform,
                )
                for line in lines
            ]
        )
    # Fraction of each of the operations squared
    if quadratic:
        infn3 = "./data/%s.wiki.parsed__operation_type_sq.dat" % (source)
        with open(infn3) as inf3:
            lines = inf3.readlines()
            target2 = dict(
                [
                    (
                        "Nopi2_%s" % line.strip().split()[0],
                        float(line.strip().split()[1]) / nform,
                    )
                    for line in lines
                ]
            )
        for k, v in list(target2.items()):
            target[k] = v
    # Done
    return target, nform

`update_ppar(tree, current, target, terms=None, step=0.05)`

Update the prior parameters using a gradient descend of sorts.

Source code in temp_dir/bms/src/autora/theorist/bms/fit_prior.py

def update_ppar(tree, current, target, terms=None, step=0.05):
    """Update the prior parameters using a gradient descend of sorts."""

    # Which terms should we update? (Default: all)
    if terms is None:
        terms = list(current.keys())
    # Update
    for t in terms:
        if current[t] > target[t]:
            tree.prior_par[t] += min(
                0.5,
                random() * step * float(current[t] - target[t]) / (target[t] + 1e-10),
            )
        elif current[t] < target[t]:
            tree.prior_par[t] -= min(
                0.5,
                random() * step * float(target[t] - current[t]) / (target[t] + 1e-10),
            )
        else:
            pass
    # Make sure quadratic terms are not below the minimum allowed
    for t in [t for t in terms if t.startswith("Nopi2_")]:
        """
        lint = t.replace('Nopi2_', 'Nopi_')
        op = t[6:]
        nopmax = float(tree.max_size) / tree.ops[op] - 1.
        minval = - tree.prior_par[lint] / nopmax
        """
        minval = 0.0
        if tree.prior_par[t] < minval:
            tree.prior_par[t] = minval

    return