Skip to content

regressor

DARTSExecutionMonitor

A monitor of the execution of the DARTS algorithm.

Source code in src/autora/theorist/darts/regressor.py
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
class DARTSExecutionMonitor:
    """
    A monitor of the execution of the DARTS algorithm.
    """

    def __init__(self):
        """
        Initializes the execution monitor.
        """
        self.arch_weight_history = list()
        self.loss_history = list()
        self.epoch_history = list()
        self.primitives = list()

    def execution_monitor(
        self,
        network: Network,
        architect: Architect,
        epoch: int,
        **kwargs: Any,
    ):
        """
        A function to monitor the execution of the DARTS algorithm.

        Arguments:
            network: The DARTS network containing the weights each operation
                in the mixture architecture
            architect: The architect object used to construct the mixture architecture.
            epoch: The current epoch of the training.
            **kwargs: other parameters which may be passed from the DARTS optimizer
        """

        # collect data for visualization
        self.epoch_history.append(epoch)
        self.arch_weight_history.append(
            network.arch_parameters()[0].detach().numpy().copy()[np.newaxis, :]
        )
        self.loss_history.append(architect.current_loss)
        self.primitives = network.primitives

    def display(self):
        """
        A function to display the execution monitor. This function will generate two plots:
        (1) A plot of the training loss vs. epoch,
        (2) a plot of the architecture weights vs. epoch, divided into subplots by each edge
        in the mixture architecture.
        """

        loss_fig, loss_ax = plt.subplots(1, 1)
        loss_ax.plot(self.loss_history)

        loss_ax.set_ylabel("Loss", fontsize=14)
        loss_ax.set_xlabel("Epoch", fontsize=14)
        loss_ax.set_title("Training Loss")

        arch_weight_history_array = np.vstack(self.arch_weight_history)
        num_epochs, num_edges, num_primitives = arch_weight_history_array.shape

        subplots_per_side = int(np.ceil(np.sqrt(num_edges)))

        arch_fig, arch_axes = plt.subplots(
            subplots_per_side,
            subplots_per_side,
            sharex=True,
            sharey=True,
            figsize=(10, 10),
            squeeze=False,
        )

        arch_fig.suptitle("Architecture Weights", fontsize=10)

        for (edge_i, ax) in zip(range(num_edges), arch_axes.flat):
            for primitive_i in range(num_primitives):
                print(f"{edge_i}, {primitive_i}, {ax}")
                ax.plot(
                    arch_weight_history_array[:, edge_i, primitive_i],
                    label=f"{self.primitives[primitive_i]}",
                )

            ax.set_title("k{}".format(edge_i), fontsize=8)

            # there is no need to have the legend for each subplot
            if edge_i == 0:
                ax.legend(loc="upper center")
                ax.set_ylabel("Edge Weights", fontsize=8)
                ax.set_xlabel("Epoch", fontsize=8)

        return SimpleNamespace(
            loss_fig=loss_fig,
            loss_ax=loss_ax,
            arch_fig=arch_fig,
            arch_axes=arch_axes,
        )

__init__()

Initializes the execution monitor.

Source code in src/autora/theorist/darts/regressor.py
779
780
781
782
783
784
785
786
def __init__(self):
    """
    Initializes the execution monitor.
    """
    self.arch_weight_history = list()
    self.loss_history = list()
    self.epoch_history = list()
    self.primitives = list()

display()

A function to display the execution monitor. This function will generate two plots: (1) A plot of the training loss vs. epoch, (2) a plot of the architecture weights vs. epoch, divided into subplots by each edge in the mixture architecture.

Source code in src/autora/theorist/darts/regressor.py
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
def display(self):
    """
    A function to display the execution monitor. This function will generate two plots:
    (1) A plot of the training loss vs. epoch,
    (2) a plot of the architecture weights vs. epoch, divided into subplots by each edge
    in the mixture architecture.
    """

    loss_fig, loss_ax = plt.subplots(1, 1)
    loss_ax.plot(self.loss_history)

    loss_ax.set_ylabel("Loss", fontsize=14)
    loss_ax.set_xlabel("Epoch", fontsize=14)
    loss_ax.set_title("Training Loss")

    arch_weight_history_array = np.vstack(self.arch_weight_history)
    num_epochs, num_edges, num_primitives = arch_weight_history_array.shape

    subplots_per_side = int(np.ceil(np.sqrt(num_edges)))

    arch_fig, arch_axes = plt.subplots(
        subplots_per_side,
        subplots_per_side,
        sharex=True,
        sharey=True,
        figsize=(10, 10),
        squeeze=False,
    )

    arch_fig.suptitle("Architecture Weights", fontsize=10)

    for (edge_i, ax) in zip(range(num_edges), arch_axes.flat):
        for primitive_i in range(num_primitives):
            print(f"{edge_i}, {primitive_i}, {ax}")
            ax.plot(
                arch_weight_history_array[:, edge_i, primitive_i],
                label=f"{self.primitives[primitive_i]}",
            )

        ax.set_title("k{}".format(edge_i), fontsize=8)

        # there is no need to have the legend for each subplot
        if edge_i == 0:
            ax.legend(loc="upper center")
            ax.set_ylabel("Edge Weights", fontsize=8)
            ax.set_xlabel("Epoch", fontsize=8)

    return SimpleNamespace(
        loss_fig=loss_fig,
        loss_ax=loss_ax,
        arch_fig=arch_fig,
        arch_axes=arch_axes,
    )

execution_monitor(network, architect, epoch, **kwargs)

A function to monitor the execution of the DARTS algorithm.

Parameters:

Name Type Description Default
network Network

The DARTS network containing the weights each operation in the mixture architecture

required
architect Architect

The architect object used to construct the mixture architecture.

required
epoch int

The current epoch of the training.

required
**kwargs Any

other parameters which may be passed from the DARTS optimizer

{}
Source code in src/autora/theorist/darts/regressor.py
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
def execution_monitor(
    self,
    network: Network,
    architect: Architect,
    epoch: int,
    **kwargs: Any,
):
    """
    A function to monitor the execution of the DARTS algorithm.

    Arguments:
        network: The DARTS network containing the weights each operation
            in the mixture architecture
        architect: The architect object used to construct the mixture architecture.
        epoch: The current epoch of the training.
        **kwargs: other parameters which may be passed from the DARTS optimizer
    """

    # collect data for visualization
    self.epoch_history.append(epoch)
    self.arch_weight_history.append(
        network.arch_parameters()[0].detach().numpy().copy()[np.newaxis, :]
    )
    self.loss_history.append(architect.current_loss)
    self.primitives = network.primitives

DARTSRegressor

Bases: BaseEstimator, RegressorMixin

Differentiable ARchiTecture Search Regressor.

DARTS finds a composition of functions and coefficients to minimize a loss function suitable for the dependent variable.

This class is intended to be compatible with the Scikit-Learn Estimator API.

Examples:

>>> import numpy as np
>>> num_samples = 1000
>>> X = np.linspace(start=0, stop=1, num=num_samples).reshape(-1, 1)
>>> y = 15. * np.ones(num_samples)
>>> estimator = DARTSRegressor(num_graph_nodes=1)
>>> estimator = estimator.fit(X, y)
>>> estimator.predict([[0.5]])
array([[15.051043]], dtype=float32)

Attributes:

Name Type Description
network_ Optional[Network]

represents the optimized network for the architecture search, without the output function

model_ Optional[Network]

represents the best-fit model including the output function after sampling of the network to pick a single computation graph. By default, this is the computation graph with the maximum weights, but can be set to a graph based on a sample on the edge weights by running the resample_model(sample_strategy="sample") method. It can be reset by running the resample_model(sample_strategy="max") method.

Source code in src/autora/theorist/darts/regressor.py
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
class DARTSRegressor(BaseEstimator, RegressorMixin):
    """
    Differentiable ARchiTecture Search Regressor.

    DARTS finds a composition of functions and coefficients to minimize a loss function suitable for
    the dependent variable.

    This class is intended to be compatible with the
    [Scikit-Learn Estimator API](https://scikit-learn.org/stable/developers/develop.html).

    Examples:

        >>> import numpy as np
        >>> num_samples = 1000
        >>> X = np.linspace(start=0, stop=1, num=num_samples).reshape(-1, 1)
        >>> y = 15. * np.ones(num_samples)
        >>> estimator = DARTSRegressor(num_graph_nodes=1)
        >>> estimator = estimator.fit(X, y)
        >>> estimator.predict([[0.5]])
        array([[15.051043]], dtype=float32)


    Attributes:
        network_: represents the optimized network for the architecture search, without the
            output function
        model_: represents the best-fit model including the output function
            after sampling of the network to pick a single computation graph.
            By default, this is the computation graph with the maximum weights,
            but can be set to a graph based on a sample on the edge weights
            by running the `resample_model(sample_strategy="sample")` method.
            It can be reset by running the `resample_model(sample_strategy="max")` method.



    """

    def __init__(
        self,
        batch_size: int = 64,
        num_graph_nodes: int = 2,
        output_type: IMPLEMENTED_OUTPUT_TYPES = "real",
        classifier_weight_decay: float = 1e-2,
        darts_type: IMPLEMENTED_DARTS_TYPES = "original",
        init_weights_function: Optional[Callable] = None,
        param_updates_per_epoch: int = 10,
        param_updates_for_sampled_model: int = 100,
        param_learning_rate_max: float = 2.5e-2,
        param_learning_rate_min: float = 0.01,
        param_momentum: float = 9e-1,
        param_weight_decay: float = 3e-4,
        arch_updates_per_epoch: int = 1,
        arch_learning_rate_max: float = 3e-3,
        arch_weight_decay: float = 1e-4,
        arch_weight_decay_df: float = 3e-4,
        arch_weight_decay_base: float = 0.0,
        arch_momentum: float = 9e-1,
        fair_darts_loss_weight: int = 1,
        max_epochs: int = 10,
        grad_clip: float = 5,
        primitives: Sequence[str] = PRIMITIVES,
        train_classifier_coefficients: bool = False,
        train_classifier_bias: bool = False,
        execution_monitor: Callable = (lambda *args, **kwargs: None),
        sampling_strategy: SAMPLING_STRATEGIES = "max",
    ) -> None:
        """
        Initializes the DARTSRegressor.

        Arguments:
            batch_size: Batch size for the data loader.
            num_graph_nodes: Number of nodes in the desired computation graph.
            output_type: Type of output function to use. This function is applied to transform
                the output of the mixture architecture.
            classifier_weight_decay: Weight decay for the classifier.
            darts_type: Type of DARTS to use ('original' or 'fair').
            init_weights_function: Function to initialize the parameters of each operation.
            param_updates_per_epoch: Number of updates to perform per epoch.
                for the operation parameters.
            param_learning_rate_max: Initial (maximum) learning rate for the operation parameters.
            param_learning_rate_min: Final (minimum) learning rate for the operation parameters.
            param_momentum: Momentum for the operation parameters.
            param_weight_decay: Weight decay for the operation parameters.
            arch_updates_per_epoch: Number of architecture weight updates to perform per epoch.
            arch_learning_rate_max: Initial (maximum) learning rate for the architecture.
            arch_weight_decay: Weight decay for the architecture weights.
            arch_weight_decay_df: An additional weight decay that scales with the number of
                parameters (degrees of freedom) in the operation. The higher this weight decay,
                the more DARTS will prefer simple operations.
            arch_weight_decay_base: A base weight decay that is added to the scaled weight decay.
                arch_momentum: Momentum for the architecture weights.
            fair_darts_loss_weight: Weight of the loss in fair darts which forces architecture
                weights to become either 0 or 1.
            max_epochs: Maximum number of epochs to train for.
            grad_clip: Gradient clipping value for updating the parameters of the operations.
            primitives: List of primitives (operations) to use.
            train_classifier_coefficients: Whether to train the coefficients of the classifier.
            train_classifier_bias: Whether to train the bias of the classifier.
            execution_monitor: Function to monitor the execution of the model.
            primitives: list of primitive operations used in the DARTS network,
                e.g., 'add', 'subtract', 'none'. For details, see
                [`autora.theorist.darts.operations`][autora.theorist.darts.operations]
        """

        self.batch_size = batch_size

        self.num_graph_nodes = num_graph_nodes
        self.classifier_weight_decay = classifier_weight_decay
        self.darts_type = darts_type
        self.init_weights_function = init_weights_function

        self.param_updates_per_epoch = param_updates_per_epoch
        self.param_updates_for_sampled_model = param_updates_for_sampled_model

        self.param_learning_rate_max = param_learning_rate_max
        self.param_learning_rate_min = param_learning_rate_min
        self.param_momentum = param_momentum
        self.arch_momentum = arch_momentum
        self.param_weight_decay = param_weight_decay

        self.arch_updates_per_epoch = arch_updates_per_epoch
        self.arch_weight_decay = arch_weight_decay
        self.arch_weight_decay_df = arch_weight_decay_df
        self.arch_weight_decay_base = arch_weight_decay_base
        self.arch_learning_rate_max = arch_learning_rate_max
        self.fair_darts_loss_weight = fair_darts_loss_weight

        self.max_epochs = max_epochs
        self.grad_clip = grad_clip

        self.primitives = primitives

        self.output_type = output_type
        self.darts_type = darts_type

        self.X_: Optional[np.ndarray] = None
        self.y_: Optional[np.ndarray] = None
        self.network_: Optional[Network] = None
        self.model_: Optional[Network] = None

        self.train_classifier_coefficients = train_classifier_coefficients
        self.train_classifier_bias = train_classifier_bias

        self.execution_monitor = execution_monitor

        self.sampling_strategy = sampling_strategy

    def fit(self, X: np.ndarray, y: np.ndarray):
        """
        Runs the optimization for a given set of `X`s and `y`s.

        Arguments:
            X: independent variables in an n-dimensional array
            y: dependent variables in an n-dimensional array

        Returns:
            self (DARTSRegressor): the fitted estimator
        """

        if self.output_type == "class":
            raise NotImplementedError(
                "Classification not implemented for DARTSRegressor."
            )

        params = self.get_params()

        fit_results = _general_darts(X=X, y=y, network=self.network_, **params)
        self.X_ = X
        self.y_ = y
        self.network_ = fit_results.network
        self.model_ = fit_results.model
        return self

    def predict(self, X: np.ndarray) -> np.ndarray:
        """
        Applies the fitted model to a set of independent variables `X`,
        to give predictions for the dependent variable `y`.

        Arguments:
            X: independent variables in an n-dimensional array

        Returns:
            y: predicted dependent variable values
        """
        X_ = check_array(X)

        # First run the checks using the scikit-learn API, listing the key parameters
        check_is_fitted(self, attributes=["model_"])

        # Since self.model_ is initialized as None, mypy throws an error if we
        # just call self.model_(X) in the predict method, as it could still be none.
        # MyPy doesn't understand that the sklearn check_is_fitted function
        # ensures the self.model_ parameter is initialized and otherwise throws an error,
        # so we check that explicitly here and pass the model which can't be None.
        assert self.model_ is not None

        y_ = self.model_(torch.as_tensor(X_).float())
        y = y_.detach().numpy()

        return y

    def visualize_model(
        self,
        input_labels: Optional[Sequence[str]] = None,
    ):
        """
        Visualizes the model architecture as a graph.

        Arguments:
            input_labels: labels for the input nodes

        """

        check_is_fitted(self, attributes=["model_"])
        assert self.model_ is not None
        fitted_sampled_network = self.model_[0]

        genotype = Network.genotype(fitted_sampled_network).normal
        (
            _,
            _,
            param_list,
        ) = fitted_sampled_network.count_parameters()

        if input_labels is not None:
            input_labels_ = tuple(input_labels)
        else:
            input_labels_ = self._get_input_labels()

        assert self.y_ is not None
        out_dim = 1 if self.y_.ndim == 1 else self.y_.shape[1]

        out_func = get_output_str(ValueType(self.output_type))

        # call to plot function
        graph = darts_model_plot(
            genotype=genotype,
            input_labels=input_labels_,
            param_list=param_list,
            full_label=True,
            out_dim=out_dim,
            out_fnc=out_func,
        )

        return graph

    def _get_input_labels(self):
        """
        Returns the input labels for the model.

        Returns:
            input_labels: labels for the input nodes

        """
        return self._get_labels(self.X_, "x")

    def _get_output_labels(self):
        """
        Returns the output labels for the model.

        Returns:
            output_labels: labels for the output nodes

        """
        return self._get_labels(self.y_, "y")

    def _get_labels(
        self, data: Optional[np.ndarray], default_label: str
    ) -> Sequence[str]:
        """
        Returns the labels for the model.

        Arguments:
            data: data to get labels for
            default_label: default label to use if no labels are provided

        Returns:
            labels: labels for the model

        """
        assert data is not None

        if hasattr(data, "columns"):  # it's a dataframe with column names
            labels_ = tuple(data.columns)
        elif (
            hasattr(data, "name") and len(data.shape) == 1
        ):  # it's a single series with a single name
            labels_ = (data.name,)

        else:
            dim = 1 if data.ndim == 1 else data.shape[1]
            labels_ = tuple(f"{default_label}{i+1}" for i in range(dim))
        return labels_

    def model_repr(
        self,
        input_labels: Optional[Sequence[str]] = None,
        output_labels: Optional[Sequence[str]] = None,
        output_function_label: str = "",
        decimals_to_display: int = 2,
        output_format: Literal["latex", "console"] = "console",
    ) -> str:
        """
        Prints the equations of the model architecture.

        Args:
            input_labels: which names to use for the independent variables (X)
            output_labels: which names to use for the dependent variables (y)
            output_function_label: name to use for the output transformation
            decimals_to_display: amount of rounding for the coefficient values
            output_format: whether the output should be formatted for
                the command line (`console`) or as equations in a latex file (`latex`)

        Returns:
            The equations of the model architecture

        """
        assert self.model_ is not None
        fitted_sampled_network: Network = self.model_[0]

        if input_labels is None:
            input_labels_ = self._get_input_labels()
        else:
            input_labels_ = input_labels

        if output_labels is None:
            output_labels_ = self._get_output_labels()
        else:
            output_labels_ = output_labels

        edge_list = fitted_sampled_network.architecture_to_str_list(
            input_labels=input_labels_,
            output_labels=output_labels_,
            output_function_label=output_function_label,
            decimals_to_display=decimals_to_display,
            output_format=output_format,
        )

        model_repr_ = "\n".join(["Model:"] + edge_list)
        return model_repr_

__init__(batch_size=64, num_graph_nodes=2, output_type='real', classifier_weight_decay=0.01, darts_type='original', init_weights_function=None, param_updates_per_epoch=10, param_updates_for_sampled_model=100, param_learning_rate_max=0.025, param_learning_rate_min=0.01, param_momentum=0.9, param_weight_decay=0.0003, arch_updates_per_epoch=1, arch_learning_rate_max=0.003, arch_weight_decay=0.0001, arch_weight_decay_df=0.0003, arch_weight_decay_base=0.0, arch_momentum=0.9, fair_darts_loss_weight=1, max_epochs=10, grad_clip=5, primitives=PRIMITIVES, train_classifier_coefficients=False, train_classifier_bias=False, execution_monitor=lambda : None, sampling_strategy='max')

Initializes the DARTSRegressor.

Parameters:

Name Type Description Default
batch_size int

Batch size for the data loader.

64
num_graph_nodes int

Number of nodes in the desired computation graph.

2
output_type IMPLEMENTED_OUTPUT_TYPES

Type of output function to use. This function is applied to transform the output of the mixture architecture.

'real'
classifier_weight_decay float

Weight decay for the classifier.

0.01
darts_type IMPLEMENTED_DARTS_TYPES

Type of DARTS to use ('original' or 'fair').

'original'
init_weights_function Optional[Callable]

Function to initialize the parameters of each operation.

None
param_updates_per_epoch int

Number of updates to perform per epoch. for the operation parameters.

10
param_learning_rate_max float

Initial (maximum) learning rate for the operation parameters.

0.025
param_learning_rate_min float

Final (minimum) learning rate for the operation parameters.

0.01
param_momentum float

Momentum for the operation parameters.

0.9
param_weight_decay float

Weight decay for the operation parameters.

0.0003
arch_updates_per_epoch int

Number of architecture weight updates to perform per epoch.

1
arch_learning_rate_max float

Initial (maximum) learning rate for the architecture.

0.003
arch_weight_decay float

Weight decay for the architecture weights.

0.0001
arch_weight_decay_df float

An additional weight decay that scales with the number of parameters (degrees of freedom) in the operation. The higher this weight decay, the more DARTS will prefer simple operations.

0.0003
arch_weight_decay_base float

A base weight decay that is added to the scaled weight decay. arch_momentum: Momentum for the architecture weights.

0.0
fair_darts_loss_weight int

Weight of the loss in fair darts which forces architecture weights to become either 0 or 1.

1
max_epochs int

Maximum number of epochs to train for.

10
grad_clip float

Gradient clipping value for updating the parameters of the operations.

5
primitives Sequence[str]

List of primitives (operations) to use.

PRIMITIVES
train_classifier_coefficients bool

Whether to train the coefficients of the classifier.

False
train_classifier_bias bool

Whether to train the bias of the classifier.

False
execution_monitor Callable

Function to monitor the execution of the model.

lambda : None
primitives Sequence[str]

list of primitive operations used in the DARTS network, e.g., 'add', 'subtract', 'none'. For details, see autora.theorist.darts.operations

PRIMITIVES
Source code in src/autora/theorist/darts/regressor.py
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
def __init__(
    self,
    batch_size: int = 64,
    num_graph_nodes: int = 2,
    output_type: IMPLEMENTED_OUTPUT_TYPES = "real",
    classifier_weight_decay: float = 1e-2,
    darts_type: IMPLEMENTED_DARTS_TYPES = "original",
    init_weights_function: Optional[Callable] = None,
    param_updates_per_epoch: int = 10,
    param_updates_for_sampled_model: int = 100,
    param_learning_rate_max: float = 2.5e-2,
    param_learning_rate_min: float = 0.01,
    param_momentum: float = 9e-1,
    param_weight_decay: float = 3e-4,
    arch_updates_per_epoch: int = 1,
    arch_learning_rate_max: float = 3e-3,
    arch_weight_decay: float = 1e-4,
    arch_weight_decay_df: float = 3e-4,
    arch_weight_decay_base: float = 0.0,
    arch_momentum: float = 9e-1,
    fair_darts_loss_weight: int = 1,
    max_epochs: int = 10,
    grad_clip: float = 5,
    primitives: Sequence[str] = PRIMITIVES,
    train_classifier_coefficients: bool = False,
    train_classifier_bias: bool = False,
    execution_monitor: Callable = (lambda *args, **kwargs: None),
    sampling_strategy: SAMPLING_STRATEGIES = "max",
) -> None:
    """
    Initializes the DARTSRegressor.

    Arguments:
        batch_size: Batch size for the data loader.
        num_graph_nodes: Number of nodes in the desired computation graph.
        output_type: Type of output function to use. This function is applied to transform
            the output of the mixture architecture.
        classifier_weight_decay: Weight decay for the classifier.
        darts_type: Type of DARTS to use ('original' or 'fair').
        init_weights_function: Function to initialize the parameters of each operation.
        param_updates_per_epoch: Number of updates to perform per epoch.
            for the operation parameters.
        param_learning_rate_max: Initial (maximum) learning rate for the operation parameters.
        param_learning_rate_min: Final (minimum) learning rate for the operation parameters.
        param_momentum: Momentum for the operation parameters.
        param_weight_decay: Weight decay for the operation parameters.
        arch_updates_per_epoch: Number of architecture weight updates to perform per epoch.
        arch_learning_rate_max: Initial (maximum) learning rate for the architecture.
        arch_weight_decay: Weight decay for the architecture weights.
        arch_weight_decay_df: An additional weight decay that scales with the number of
            parameters (degrees of freedom) in the operation. The higher this weight decay,
            the more DARTS will prefer simple operations.
        arch_weight_decay_base: A base weight decay that is added to the scaled weight decay.
            arch_momentum: Momentum for the architecture weights.
        fair_darts_loss_weight: Weight of the loss in fair darts which forces architecture
            weights to become either 0 or 1.
        max_epochs: Maximum number of epochs to train for.
        grad_clip: Gradient clipping value for updating the parameters of the operations.
        primitives: List of primitives (operations) to use.
        train_classifier_coefficients: Whether to train the coefficients of the classifier.
        train_classifier_bias: Whether to train the bias of the classifier.
        execution_monitor: Function to monitor the execution of the model.
        primitives: list of primitive operations used in the DARTS network,
            e.g., 'add', 'subtract', 'none'. For details, see
            [`autora.theorist.darts.operations`][autora.theorist.darts.operations]
    """

    self.batch_size = batch_size

    self.num_graph_nodes = num_graph_nodes
    self.classifier_weight_decay = classifier_weight_decay
    self.darts_type = darts_type
    self.init_weights_function = init_weights_function

    self.param_updates_per_epoch = param_updates_per_epoch
    self.param_updates_for_sampled_model = param_updates_for_sampled_model

    self.param_learning_rate_max = param_learning_rate_max
    self.param_learning_rate_min = param_learning_rate_min
    self.param_momentum = param_momentum
    self.arch_momentum = arch_momentum
    self.param_weight_decay = param_weight_decay

    self.arch_updates_per_epoch = arch_updates_per_epoch
    self.arch_weight_decay = arch_weight_decay
    self.arch_weight_decay_df = arch_weight_decay_df
    self.arch_weight_decay_base = arch_weight_decay_base
    self.arch_learning_rate_max = arch_learning_rate_max
    self.fair_darts_loss_weight = fair_darts_loss_weight

    self.max_epochs = max_epochs
    self.grad_clip = grad_clip

    self.primitives = primitives

    self.output_type = output_type
    self.darts_type = darts_type

    self.X_: Optional[np.ndarray] = None
    self.y_: Optional[np.ndarray] = None
    self.network_: Optional[Network] = None
    self.model_: Optional[Network] = None

    self.train_classifier_coefficients = train_classifier_coefficients
    self.train_classifier_bias = train_classifier_bias

    self.execution_monitor = execution_monitor

    self.sampling_strategy = sampling_strategy

fit(X, y)

Runs the optimization for a given set of Xs and ys.

Parameters:

Name Type Description Default
X np.ndarray

independent variables in an n-dimensional array

required
y np.ndarray

dependent variables in an n-dimensional array

required

Returns:

Name Type Description
self DARTSRegressor

the fitted estimator

Source code in src/autora/theorist/darts/regressor.py
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
def fit(self, X: np.ndarray, y: np.ndarray):
    """
    Runs the optimization for a given set of `X`s and `y`s.

    Arguments:
        X: independent variables in an n-dimensional array
        y: dependent variables in an n-dimensional array

    Returns:
        self (DARTSRegressor): the fitted estimator
    """

    if self.output_type == "class":
        raise NotImplementedError(
            "Classification not implemented for DARTSRegressor."
        )

    params = self.get_params()

    fit_results = _general_darts(X=X, y=y, network=self.network_, **params)
    self.X_ = X
    self.y_ = y
    self.network_ = fit_results.network
    self.model_ = fit_results.model
    return self

model_repr(input_labels=None, output_labels=None, output_function_label='', decimals_to_display=2, output_format='console')

Prints the equations of the model architecture.

Parameters:

Name Type Description Default
input_labels Optional[Sequence[str]]

which names to use for the independent variables (X)

None
output_labels Optional[Sequence[str]]

which names to use for the dependent variables (y)

None
output_function_label str

name to use for the output transformation

''
decimals_to_display int

amount of rounding for the coefficient values

2
output_format Literal['latex', 'console']

whether the output should be formatted for the command line (console) or as equations in a latex file (latex)

'console'

Returns:

Type Description
str

The equations of the model architecture

Source code in src/autora/theorist/darts/regressor.py
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
def model_repr(
    self,
    input_labels: Optional[Sequence[str]] = None,
    output_labels: Optional[Sequence[str]] = None,
    output_function_label: str = "",
    decimals_to_display: int = 2,
    output_format: Literal["latex", "console"] = "console",
) -> str:
    """
    Prints the equations of the model architecture.

    Args:
        input_labels: which names to use for the independent variables (X)
        output_labels: which names to use for the dependent variables (y)
        output_function_label: name to use for the output transformation
        decimals_to_display: amount of rounding for the coefficient values
        output_format: whether the output should be formatted for
            the command line (`console`) or as equations in a latex file (`latex`)

    Returns:
        The equations of the model architecture

    """
    assert self.model_ is not None
    fitted_sampled_network: Network = self.model_[0]

    if input_labels is None:
        input_labels_ = self._get_input_labels()
    else:
        input_labels_ = input_labels

    if output_labels is None:
        output_labels_ = self._get_output_labels()
    else:
        output_labels_ = output_labels

    edge_list = fitted_sampled_network.architecture_to_str_list(
        input_labels=input_labels_,
        output_labels=output_labels_,
        output_function_label=output_function_label,
        decimals_to_display=decimals_to_display,
        output_format=output_format,
    )

    model_repr_ = "\n".join(["Model:"] + edge_list)
    return model_repr_

predict(X)

Applies the fitted model to a set of independent variables X, to give predictions for the dependent variable y.

Parameters:

Name Type Description Default
X np.ndarray

independent variables in an n-dimensional array

required

Returns:

Name Type Description
y np.ndarray

predicted dependent variable values

Source code in src/autora/theorist/darts/regressor.py
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
def predict(self, X: np.ndarray) -> np.ndarray:
    """
    Applies the fitted model to a set of independent variables `X`,
    to give predictions for the dependent variable `y`.

    Arguments:
        X: independent variables in an n-dimensional array

    Returns:
        y: predicted dependent variable values
    """
    X_ = check_array(X)

    # First run the checks using the scikit-learn API, listing the key parameters
    check_is_fitted(self, attributes=["model_"])

    # Since self.model_ is initialized as None, mypy throws an error if we
    # just call self.model_(X) in the predict method, as it could still be none.
    # MyPy doesn't understand that the sklearn check_is_fitted function
    # ensures the self.model_ parameter is initialized and otherwise throws an error,
    # so we check that explicitly here and pass the model which can't be None.
    assert self.model_ is not None

    y_ = self.model_(torch.as_tensor(X_).float())
    y = y_.detach().numpy()

    return y

visualize_model(input_labels=None)

Visualizes the model architecture as a graph.

Parameters:

Name Type Description Default
input_labels Optional[Sequence[str]]

labels for the input nodes

None
Source code in src/autora/theorist/darts/regressor.py
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
def visualize_model(
    self,
    input_labels: Optional[Sequence[str]] = None,
):
    """
    Visualizes the model architecture as a graph.

    Arguments:
        input_labels: labels for the input nodes

    """

    check_is_fitted(self, attributes=["model_"])
    assert self.model_ is not None
    fitted_sampled_network = self.model_[0]

    genotype = Network.genotype(fitted_sampled_network).normal
    (
        _,
        _,
        param_list,
    ) = fitted_sampled_network.count_parameters()

    if input_labels is not None:
        input_labels_ = tuple(input_labels)
    else:
        input_labels_ = self._get_input_labels()

    assert self.y_ is not None
    out_dim = 1 if self.y_.ndim == 1 else self.y_.shape[1]

    out_func = get_output_str(ValueType(self.output_type))

    # call to plot function
    graph = darts_model_plot(
        genotype=genotype,
        input_labels=input_labels_,
        param_list=param_list,
        full_label=True,
        out_dim=out_dim,
        out_fnc=out_func,
    )

    return graph