Skip to content

autora.theorist.bms.fit_prior

parse_options()

Parse command-line arguments.

Source code in temp_dir/bms/src/autora/theorist/bms/fit_prior.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
def parse_options():
    """Parse command-line arguments."""
    parser = OptionParser()
    parser.add_option(
        "-s",
        "--source",
        dest="source",
        default="named_equations",
        help="formula dataset to use ('full' or 'named_equations' (default))",
    )
    parser.add_option(
        "-n",
        "--nvar",
        dest="nvar",
        type="int",
        default=5,
        help="number of variables to include (default 5)",
    )
    parser.add_option(
        "-m",
        "--npar",
        dest="npar",
        type="int",
        default=None,
        help="number of parameters to include (default: 2*NVAR)",
    )
    parser.add_option(
        "-f",
        "--factor",
        dest="fact",
        type="float",
        default=0.05,
        help="factor for the parameter adjustment (default 0.05)",
    )
    parser.add_option(
        "-r",
        "--repetitions",
        type="int",
        default=1000000,
        dest="nrep",
        help="formulas to generate between parameter updates",
    )
    parser.add_option(
        "-M",
        "--maxsize",
        type="int",
        default=50,
        dest="max_size",
        help="maximum tree (formula) size",
    )
    parser.add_option(
        "-c",
        "--continue",
        dest="contfile",
        default=None,
        help="continue from parameter values in CONTFILE (default: start from scratch)",
    )
    parser.add_option(
        "-q",
        "--quadratic",
        action="store_true",
        dest="quadratic",
        default=False,
        help="fit parameters for quadratic terms (default: False)",
    )
    return parser

read_target_values(source, quadratic=False)

Read the target proportions for each type of operation.

Source code in temp_dir/bms/src/autora/theorist/bms/fit_prior.py
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
def read_target_values(source, quadratic=False):
    """Read the target proportions for each type of operation."""
    # Number of formulas
    infn1 = "./data/%s.wiki.parsed__num_operations.dat" % source
    with open(infn1) as inf1:
        lines = inf1.readlines()
        nform = sum([int(line.strip().split()[1]) for line in lines])
    # Fraction of each of the operations
    infn2 = "./data/%s.wiki.parsed__operation_type.dat" % source
    with open(infn2) as inf2:
        lines = inf2.readlines()
        target = dict(
            [
                (
                    "Nopi_%s" % line.strip().split()[0],
                    float(line.strip().split()[1]) / nform,
                )
                for line in lines
            ]
        )
    # Fraction of each of the operations squared
    if quadratic:
        infn3 = "./data/%s.wiki.parsed__operation_type_sq.dat" % (source)
        with open(infn3) as inf3:
            lines = inf3.readlines()
            target2 = dict(
                [
                    (
                        "Nopi2_%s" % line.strip().split()[0],
                        float(line.strip().split()[1]) / nform,
                    )
                    for line in lines
                ]
            )
        for k, v in list(target2.items()):
            target[k] = v
    # Done
    return target, nform

update_ppar(tree, current, target, terms=None, step=0.05)

Update the prior parameters using a gradient descend of sorts.

Source code in temp_dir/bms/src/autora/theorist/bms/fit_prior.py
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
def update_ppar(tree, current, target, terms=None, step=0.05):
    """Update the prior parameters using a gradient descend of sorts."""

    # Which terms should we update? (Default: all)
    if terms is None:
        terms = list(current.keys())
    # Update
    for t in terms:
        if current[t] > target[t]:
            tree.prior_par[t] += min(
                0.5,
                random() * step * float(current[t] - target[t]) / (target[t] + 1e-10),
            )
        elif current[t] < target[t]:
            tree.prior_par[t] -= min(
                0.5,
                random() * step * float(target[t] - current[t]) / (target[t] + 1e-10),
            )
        else:
            pass
    # Make sure quadratic terms are not below the minimum allowed
    for t in [t for t in terms if t.startswith("Nopi2_")]:
        """
        lint = t.replace('Nopi2_', 'Nopi_')
        op = t[6:]
        nopmax = float(tree.max_size) / tree.ops[op] - 1.
        minval = - tree.prior_par[lint] / nopmax
        """
        minval = 0.0
        if tree.prior_par[t] < minval:
            tree.prior_par[t] = minval

    return