示例#1
0
class TestOperatorMatrices:
    """Tests that get_operator_matrix returns the correct matrix."""

    @pytest.mark.parametrize("name,expected", [
        ("PauliX", np.array([[0, 1], [1, 0]])),
        ("PauliY", np.array([[0, -1j], [1j, 0]])),
        ("PauliZ", np.array([[1, 0], [0, -1]])),
        ("Hadamard", np.array([[1, 1], [1, -1]])/np.sqrt(2)),
        ("CNOT", np.array([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 0, 1], [0, 0, 1, 0]])),
        ("SWAP", np.array([[1, 0, 0, 0], [0, 0, 1, 0], [0, 1, 0, 0], [0, 0, 0, 1]])),
        ("CSWAP",np.array([[1, 0, 0, 0, 0, 0, 0, 0],
                           [0, 1, 0, 0, 0, 0, 0, 0],
                           [0, 0, 1, 0, 0, 0, 0, 0],
                           [0, 0, 0, 1, 0, 0, 0, 0],
                           [0, 0, 0, 0, 1, 0, 0, 0],
                           [0, 0, 0, 0, 0, 0, 1, 0],
                           [0, 0, 0, 0, 0, 1, 0, 0],
                           [0, 0, 0, 0, 0, 0, 0, 1]])),
        ("CZ", np.array([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, -1]])),
    ])
    def test_get_operator_matrix_no_parameters(self, qubit_device_3_wires, tol, name, expected):
        """Tests that get_operator_matrix returns the correct matrix."""

        res = qubit_device_3_wires._get_operator_matrix(name, ())

        assert np.allclose(res, expected, atol=tol, rtol=0)

    @pytest.mark.parametrize("name,expected,par", [
        ('PhaseShift', lambda phi: np.array([[1, 0], [0, np.exp(1j*phi)]]), [0.223]),
        ('RX', lambda phi: np.array([[math.cos(phi/2), -1j*math.sin(phi/2)], [-1j*math.sin(phi/2), math.cos(phi/2)]]), [0.223]),
        ('RY', lambda phi: np.array([[math.cos(phi/2), -math.sin(phi/2)], [math.sin(phi/2), math.cos(phi/2)]]), [0.223]),
        ('RZ', lambda phi: np.array([[cmath.exp(-1j*phi/2), 0], [0, cmath.exp(1j*phi/2)]]), [0.223]),
        ('Rot', lambda phi, theta, omega: np.array([[cmath.exp(-1j*(phi+omega)/2)*math.cos(theta/2), -cmath.exp(1j*(phi-omega)/2)*math.sin(theta/2)], [cmath.exp(-1j*(phi-omega)/2)*math.sin(theta/2), cmath.exp(1j*(phi+omega)/2)*math.cos(theta/2)]]), [0.223, 0.153, 1.212]),
        ('CRX', lambda phi: np.array([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, math.cos(phi/2), -1j*math.sin(phi/2)], [0, 0, -1j*math.sin(phi/2), math.cos(phi/2)]]), [0.223]),
        ('CRY', lambda phi: np.array([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, math.cos(phi/2), -math.sin(phi/2)], [0, 0, math.sin(phi/2), math.cos(phi/2)]]), [0.223]),
        ('CRZ', lambda phi: np.array([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, cmath.exp(-1j*phi/2), 0], [0, 0, 0, cmath.exp(1j*phi/2)]]), [0.223]),
        ('CRot', lambda phi, theta, omega: np.array([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, cmath.exp(-1j*(phi+omega)/2)*math.cos(theta/2), -cmath.exp(1j*(phi-omega)/2)*math.sin(theta/2)], [0, 0, cmath.exp(-1j*(phi-omega)/2)*math.sin(theta/2), cmath.exp(1j*(phi+omega)/2)*math.cos(theta/2)]]), [0.223, 0.153, 1.212]),
        ('QubitUnitary', lambda U: np.asarray(U), [np.array([[0.83645892 - 0.40533293j, -0.20215326 + 0.30850569j], [-0.23889780 - 0.28101519j, -0.88031770 - 0.29832709j]])]),
        ('Hermitian', lambda H: np.asarray(H), [np.array([[1.02789352, 1.61296440 - 0.3498192j], [1.61296440 + 0.3498192j, 1.23920938 + 0j]])]),
        # Identity will always return a 2x2 Identity, but is still parameterized
        ('Identity', lambda n: np.eye(2), [2])
    ])
    def test_get_operator_matrix_with_parameters(self, qubit_device_2_wires, tol, name, expected, par):
        """Tests that get_operator_matrix returns the correct matrix building functions."""

        res = qubit_device_2_wires._get_operator_matrix(name, par)

        assert np.allclose(res, expected(*par), atol=tol, rtol=0)

    @pytest.mark.parametrize("name", ["BasisState", "QubitStateVector"])
    def test_get_operator_matrix_none(self, qubit_device_2_wires, name):
        """Tests that get_operator_matrix returns none for direct state manipulations."""

        res = qubit_device_2_wires._get_operator_matrix(name, ())

        assert res is None
示例#2
0
    def _braket_to_pl_result(self, braket_result, circuit):
        """Calculates the PennyLane results from a Braket task result. A PennyLane circuit
        also determines the output observables."""
        # Compute the required statistics
        results = self.statistics(braket_result, circuit.observables)

        # Ensures that a combination with sample does not put
        # single-number results in superfluous arrays
        all_sampled = all(obs.return_type is Sample
                          for obs in circuit.observables)
        if circuit.is_sampled and not all_sampled:
            return np.asarray(results, dtype="object")

        return np.asarray(results)
    def _asarray(array, dtype=None):
        res = np.asarray(array, dtype=dtype)

        if res.dtype is np.dtype("O"):
            return np.hstack(array).flatten().astype(dtype)

        return res
    def test_array_and_list_return_same_update(self, bunch, tol):
        """Tests that gradient descent optimizer has the same output for
         lists and arrays."""
        def hybrid_fun_mdarr(var):
            return quant_fun_mdarr(var) + var[0, 0]

        def hybrid_fun_mdlist(var):
            return quant_fun_mdlist(var) + var[0][0]

        array = bunch.sgd_opt.step(hybrid_fun_mdarr, multid_array)
        ls = bunch.sgd_opt.step(hybrid_fun_mdlist, multid_list)

        assert array == pytest.approx(np.asarray(ls), abs=tol)
示例#5
0
    def wn_cost_fcn(params, circuit, ang_array, actual):
        '''
        use MAE to start
        '''
        w = params[:, -1]

        theta = params[:, :-1]
        #print(w.shape,w,theta.shape,theta)
        predictions = np.asarray([
            2. * (1.0 / (1.0 + exp(np.dot(-w, circuit(theta, features=x))))) -
            1. for x in ang_array
        ])
        return mse(actual, predictions)
示例#6
0
def generate_surface(cost_function):
    Z = []
    Z_assembler = []

    X = np.arange(-np.pi, np.pi, 0.25)
    Y = np.arange(-np.pi, np.pi, 0.25)
    X, Y = np.meshgrid(X, Y)

    for x in X[0, :]:
        for y in Y[:, 0]:
            rotations = [[x for i in range(wires)], [y for i in range(wires)]]
            Z_assembler.append(cost_function(rotations))
        Z.append(Z_assembler)
        Z_assembler = []

    Z = np.asarray(Z)
    return Z
示例#7
0
    def wn_cost_fcn(params, circuit, ang_array, actual):
        """use MAE to start

        Args:
          params:
          circuit:
          ang_array:
          actual:

        Returns:

        """
        w = params[:, -1]

        theta = params[:, :-1]
        print(w.shape, w, theta.shape, theta)
        predictions = np.asarray([
            2. * (1.0 / (1.0 + exp(np.dot(-w, circuit(theta, x))))) - 1.
            for x in ang_array
        ])
        return mse(actual, predictions)
示例#8
0
# Since we are not going to train the quantum convolution layer, it is more
# efficient to apply it as a "pre-processing" layer to all the images of our dataset.
# Later an entirely classical model will be directly trained and tested on the
# pre-processed dataset, avoiding unnecessary repetitions of quantum computations.
#
# The pre-processed images will be saved in the folder ``SAVE_PATH``.
# Once saved, they can be directly loaded by setting ``PREPROCESS = False``,
# otherwise the quantum convolution is evaluated at each run of the code.

if PREPROCESS == True:
    q_train_images = []
    print("Quantum pre-processing of train images:")
    for idx, img in enumerate(train_images):
        print("{}/{}        ".format(idx + 1, n_train), end="\r")
        q_train_images.append(quanv(img))
    q_train_images = np.asarray(q_train_images)

    q_test_images = []
    print("\nQuantum pre-processing of test images:")
    for idx, img in enumerate(test_images):
        print("{}/{}        ".format(idx + 1, n_test), end="\r")
        q_test_images.append(quanv(img))
    q_test_images = np.asarray(q_test_images)

    # Save pre-processed images
    np.save(SAVE_PATH + "q_train_images.npy", q_train_images)
    np.save(SAVE_PATH + "q_test_images.npy", q_test_images)

# Load pre-processed images
q_train_images = np.load(SAVE_PATH + "q_train_images.npy")
q_test_images = np.load(SAVE_PATH + "q_test_images.npy")
示例#9
0
def train_circuit(circuit, parameter_shape, X_train, Y_train, batch_size,
                  learning_rate, **kwargs):
    """
    train a circuit classifier
    Args:
        circuit (qml.QNode): A circuit that you want to train
        parameter_shape: A tuple describing the shape of the parameters. The first entry is the number of qubits,
        the second one is the number of layers in the circuit architecture.
        X_train (np.ndarray): An array of floats of size (M, n) to be used as training data.
        Y_train (np.ndarray): An array of size (M,) which are the categorical labels
            associated to the training data.

        batch_size (int): Batch size for the circuit training.

        learning_rate (float): The learning rate/step size of the optimizer.

        kwargs: Hyperparameters for the training (passed as keyword arguments). There are the following hyperparameters:

            nsteps (int) : Number of training steps.

            optim (pennylane.optimize instance): Optimizer used during the training of the circuit.
                Pass as qml.OptimizerName.

            Tmax (list): Maximum point T as defined in https://arxiv.org/abs/2010.08512. (Definition 8)
                    The first element is the maximum number of parameters among all architectures,
                    the second is the maximum inference time among all architectures in terms of computing time,
                    the third one is the maximum inference time among all architectures in terms of the number of CNOTS
                    in the circuit

            rate_type (string): Determines the type of error rate in the W-coefficient.
                    If rate_type == 'accuracy', the inference time of the circuit
                    is equal to the time it takes to evaluate the accuracy of the trained circuit with
                    respect to a validation batch three times the size of the training batch size and
                    the error rate is equal to 1-accuracy (w.r.t. to a validation batch).

                    If rate_type == 'accuracy', the inference time of the circuit is equal to the time
                    it takes to train the circuit (for nsteps training steps) and compute the cost at
                    each step and the error rate is equal to the cost after nsteps training steps.






    Returns:
        (W_,weights): W-coefficient, trained weights
    """

    #print('batch_size',batch_size)
    # fix the seed while debugging
    #np.random.seed(1337)
    def ohe_cost_fcn(params, circuit, ang_array, actual):
        '''
        use MAE to start
        '''
        predictions = (np.stack([circuit(params, x)
                                 for x in ang_array]) + 1) * 0.5
        return mse(actual, predictions)

    def wn_cost_fcn(params, circuit, ang_array, actual):
        '''
        use MAE to start
        '''
        w = params[:, -1]

        theta = params[:, :-1]
        #print(w.shape,w,theta.shape,theta)
        predictions = np.asarray([
            2. * (1.0 / (1.0 + exp(np.dot(-w, circuit(theta, features=x))))) -
            1. for x in ang_array
        ])
        return mse(actual, predictions)

    if kwargs['readout_layer'] == 'one_hot':
        var = np.zeros(parameter_shape)
    elif kwargs['readout_layer'] == "weighted_neuron":
        var = np.hstack(
            (np.zeros(parameter_shape), np.random.random(
                (kwargs['nqubits'], 1)) - 0.5))
    rate_type = kwargs['rate_type']
    inf_time = kwargs['inf_time']
    optim = kwargs['optim']
    numcnots = kwargs['numcnots']

    Tmax = kwargs[
        'Tmax']  #Tmax[0] is maximum parameter size, Tmax[1] maximum inftime (timeit),Tmax[2] maximum number of entangling gates
    num_train = len(Y_train)
    validation_size = int(0.1 * num_train)
    opt = optim(
        stepsize=learning_rate
    )  #all optimizers in autograd module take in argument stepsize, so this works for all
    start = time.time()
    for _ in range(kwargs['nsteps']):
        batch_index = np.random.randint(0, num_train, (batch_size, ))
        X_train_batch = np.asarray(X_train[batch_index])
        Y_train_batch = np.asarray(Y_train[batch_index])
        if kwargs['readout_layer'] == 'one_hot':
            var, cost = opt.step_and_cost(
                lambda v: ohe_cost_fcn(v, circuit, X_train_batch, Y_train_batch
                                       ), var)
        elif kwargs['readout_layer'] == 'weighted_neuron':
            var, cost = opt.step_and_cost(
                lambda v: wn_cost_fcn(v, circuit, X_train_batch, Y_train_batch
                                      ), var)
    end = time.time()
    cost_time = (end - start)

    if kwargs['rate_type'] == 'accuracy':
        validation_batch = np.random.randint(0, num_train, (validation_size, ))
        X_validation_batch = np.asarray(X_train[validation_batch])
        Y_validation_batch = np.asarray(Y_train[validation_batch])
        start = time.time()  # add in timeit function from Wbranch
        if kwargs['readout_layer'] == 'one_hot':
            predictions = np.stack(
                [circuit(var, x) for x in X_validation_batch])
        elif kwargs['readout_layer'] == 'weighted_neuron':
            n = kwargs.get('nqubits')
            w = var[:, -1]
            theta = var[:, :-1]
            predictions = [
                int(
                    np.round(
                        2. *
                        (1.0 /
                         (1.0 + exp(np.dot(-w, circuit(theta, features=x))))) -
                        1., 1)) for x in X_validation_batch
            ]
        end = time.time()
        inftime = (end - start) / len(X_validation_batch)
        if kwargs['readout_layer'] == 'one_hot':
            err_rate = (
                1.0 - ohe_accuracy(Y_validation_batch, predictions)
            ) + 10**-7  #add small epsilon to prevent divide by 0 errors
            #print('error rate:',err_rate)
            #print('weights: ',var)
        elif kwargs['readout_layer'] == 'weighted_neuron':
            err_rate = (
                1.0 - wn_accuracy(Y_validation_batch, predictions)
            ) + 10**-7  #add small epsilon to prevent divide by 0 errors
            #print('error rate:',err_rate)
            #print('weights: ',var)
    elif kwargs['rate_type'] == 'batch_cost':
        err_rate = (
            cost) + 10**-7  #add small epsilon to prevent divide by 0 errors
        #print('error rate:',err_rate)
        #print('weights: ',var)
        inftime = cost_time
    # QHACK #

    if kwargs['inf_time'] == 'timeit':

        W_ = np.abs((Tmax[0] - len(var)) / (Tmax[0])) * np.abs(
            (Tmax[1] - inftime) / (Tmax[1])) * (1. / err_rate)

    elif kwargs['inf_time'] == 'numcnots':
        nc_ = numcnots
        W_ = np.abs((Tmax[0] - len(var)) / (Tmax[0])) * np.abs(
            (Tmax[2] - nc_) / (Tmax[2])) * (1. / err_rate)

    return W_, var
示例#10
0
# visual check
plt.scatter(one_samples[0], one_samples[1])
plt.xlim(0, 28)
plt.ylim(0, 28)
plt.show()

# +
# split the two datasets, containing y=0 and y=1 respectively, into 5 datasets:
## distinct_zeros: samples unique to y=0
## distinct_ones: samples unique to y=1
## duplicates: samples that are both in y=0 and y=1
## not_ones: samples that are not in the "unique to y=0" set
## not_zeros: samples that are not in the "unique to y=1" set

zeros = np.unique(np.asarray(zero_samples)[:].T, axis=0)
ones = np.unique(np.asarray(one_samples)[:].T, axis=0)
distinct_zeros = []
distinct_ones = []
duplicates = []
# find unique zeros and duplicates
for sample in zeros:
    first_index = np.where(ones[:, 0] == sample[0])
    if (len(first_index) > 0):
        second_index = np.where(ones[first_index][:, 1] == sample[1])
        if (len(second_index[0]) > 0):
            duplicates.append(sample)
        else:
            distinct_zeros.append(sample)
    else:
        distinct_zeros.append(sample)
示例#11
0
def vqe_runner(
    backend,
    hamiltonian,
    x0,
    program_id,
    ansatz="EfficientSU2",
    ansatz_config=None,
    optimizer="SPSA",
    optimizer_config=None,
    shots=8192,
    use_measurement_mitigation=False,
    **kwargs,
):
    """Routine that executes a given VQE problem via the sample-vqe program on the target backend.

    Args:
        backend (ProgramBackend): Qiskit backend instance.
        hamiltonian (qml.Hamiltonian): Hamiltonian whose ground state we want to find.
        x0 (array_like): Initial vector of parameters.
        program_id(str): Id of the program, it has to be generated by using the upload_vqe_runner function.
        Once the program is uploaded, you can find the id in your program list online.
        ansatz (Quantum function or str): Optional, a PennyLane quantum function or the name of the Qiskit
            ansatz quantum circuit to use. Default='EfficientSU2'
        ansatz_config (dict): Optional, configuration parameters for the ansatz circuit if from Qiskit library.
        optimizer (str): Optional, string specifying classical optimizer. Default='SPSA'.
        optimizer_config (dict): Optional, configuration parameters for the optimizer.
        shots (int): Optional, number of shots to take per circuit. Default=1024.
        use_measurement_mitigation (bool): Optional, use measurement mitigation. Default=False.

    Returns:
        OptimizeResult: The result in SciPy optimization format.
    """
    # Init the dictionnaries
    if ansatz_config is None:
        ansatz_config = {}

    if optimizer_config is None:
        optimizer_config = {"maxiter": 100}

    if not isinstance(hamiltonian, qml.Hamiltonian):
        raise qml.QuantumFunctionError(
            "A PennyLane Hamiltonian object is required.")

    connect(kwargs)

    options = {"backend_name": backend}

    inputs = {}

    # Validate circuit ansatz and number of qubits
    if not isinstance(ansatz, str):
        inputs["x0"], inputs[
            "ansatz"], num_qubits, wires = _pennylane_to_qiskit_ansatz(
                ansatz, x0, hamiltonian)

    # The circuit will be taken from the Qiskit library as a str was passed.
    else:
        wires = hamiltonian.wires
        num_qubits = len(wires)

        ansatz_circ = getattr(lib_local, ansatz, None)
        if ansatz_circ is None:
            raise ValueError(
                f"Ansatz {ansatz} not in n_local circuit library.")

        inputs["ansatz"] = ansatz
        inputs["ansatz_config"] = ansatz_config

        # If given x0, validate its length against num_params in ansatz
        x0 = np.asarray(x0)
        ansatz_circ = ansatz_circ(num_qubits, **ansatz_config)
        num_params = ansatz_circ.num_parameters

        if x0.shape[0] != num_params:
            warnings.warn(
                "The shape of parameters array is not correct, a random initialization has been applied."
            )
            x0 = 2 * np.pi * np.random.rand(num_params)

        inputs["x0"] = x0

    # Transform the PennyLane hamilonian to a suitable form
    hamiltonian = hamiltonian_to_list_string(hamiltonian, wires)

    inputs["hamiltonian"] = hamiltonian

    # Set the rest of the inputs
    inputs["optimizer"] = optimizer
    inputs["optimizer_config"] = optimizer_config
    inputs["shots"] = shots
    inputs["use_measurement_mitigation"] = use_measurement_mitigation

    # Specify a single hub, group and project
    hub = kwargs.get("hub", "ibm-q")
    group = kwargs.get("group", "open")
    project = kwargs.get("project", "main")

    provider = IBMQ.get_provider(hub=hub, group=group, project=project)

    rt_job = RuntimeJobWrapper()

    # Callbacks functions are different between optimizers.
    if optimizer in ["SPSA", "QNSPSA"]:
        job = provider.runtime.run(program_id,
                                   options=options,
                                   inputs=inputs,
                                   callback=rt_job._callback)
    else:
        job = provider.runtime.run(program_id,
                                   options=options,
                                   inputs=inputs,
                                   callback=rt_job._scipy_callback)
    rt_job._job = job

    return rt_job
示例#12
0
    print(len(X), len(np.unique(X, axis=0)))
    assert False, "DATA NOT UNIQUE, DUPLICATES DETECTED!"

# make sure we have balanced data
X_neg = []
X_pos = []
Y_neg = []
Y_pos = []
for i in range(len(Y)):
    if Y[i] < 0:
        X_neg.append(X[i])
        Y_neg.append(Y[i])
    else:
        X_pos.append(X[i])
        Y_pos.append(Y[i])
X_neg = np.asarray(X_neg)
X_pos = np.asarray(X_pos)
Y_neg = np.asarray(Y_neg)
Y_pos = np.asarray(Y_pos)

# shuffle our data, positive and negative samples seperately
randomize_neg = np.arange(len(X_neg))
np.random.shuffle(randomize_neg)
X_neg = X_neg[randomize_neg]
Y_neg = Y_neg[randomize_neg]
randomize_pos = np.arange(len(X_pos))
np.random.shuffle(randomize_pos)
X_pos = X_pos[randomize_pos]
Y_pos = Y_pos[randomize_pos]

# first the stitching and reshufling of the train data
def _(_: qml.QubitChannel, parameters):
    K_list = [np.asarray(matrix) for matrix in parameters[0]]
    return noises.Kraus(K_list)
def _(qubit_unitary: qml.QubitUnitary, parameters):
    U = np.asarray(parameters[0])
    return gates.Unitary(U.conj().T) if qubit_unitary.inverse else gates.Unitary(U)
示例#15
0
def train_best(circuit, pre_trained_vals, X_train, Y_train, batch_size,
               learning_rate, **kwargs):
    """train a circuit classifier

    Args:
      circuit(qml.QNode): A circuit that you want to train
      parameter_shape: A tuple describing the shape of the parameters. The first entry is the number of qubits,
      parameter_shape: A tuple describing the shape of the parameters. The first entry is the number of qubits,
    the second one is the number of layers in the circuit architecture.
      X_train(np.ndarray): An array of floats of size (M, n) to be used as training data.
      Y_train(np.ndarray): An array of size (M,) which are the categorical labels
    associated to the training data.
      batch_size(int): Batch size for the circuit training.
      learning_rate(float): The learning rate/step size of the optimizer.
      kwargs: Hyperparameters for the training (passed as keyword arguments). There are the following hyperparameters:
    nsteps (int) : Number of training steps.
    optim (pennylane.optimize instance): Optimizer used during the training of the circuit.
    Pass as qml.OptimizerName.
    Tmax (list): Maximum point T as defined in https://arxiv.org/abs/2010.08512. (Definition 8)
    The first element is the maximum number of parameters among all architectures,
    the second is the maximum inference time among all architectures in terms of computing time,
    the third one is the maximum inference time among all architectures in terms of the number of CNOTS
    in the circuit
    rate_type (string): Determines the type of error rate in the W-coefficient.
    If rate_type == 'accuracy', the inference time of the circuit
    is equal to the time it takes to evaluate the accuracy of the trained circuit with
    respect to a validation batch three times the size of the training batch size and
    the error rate is equal to 1-accuracy (w.r.t. to a validation batch).
    If rate_type == 'accuracy', the inference time of the circuit is equal to the time
    it takes to train the circuit (for nsteps training steps) and compute the cost at
    each step and the error rate is equal to the cost after nsteps training steps.
      pre_trained_vals:
      **kwargs:

    Returns:
      Yprime: final predictions, final accuracy

    """
    from autograd.numpy import exp

    def ohe_cost_fcn(params, circuit, ang_array, actual):
        """use MAE to start

        Args:
          params:
          circuit:
          ang_array:
          actual:

        Returns:

        """
        predictions = (np.stack([circuit(params, x)
                                 for x in ang_array]) + 1) * 0.5
        return mse(actual, predictions)

    def wn_cost_fcn(params, circuit, ang_array, actual):
        """use MAE to start

        Args:
          params:
          circuit:
          ang_array:
          actual:

        Returns:

        """
        w = params[:, -1]

        theta = params[:, :-1]
        print(w.shape, w, theta.shape, theta)
        predictions = np.asarray([
            2. * (1.0 / (1.0 + exp(np.dot(-w, circuit(theta, x))))) - 1.
            for x in ang_array
        ])
        return mse(actual, predictions)

    if kwargs['readout_layer'] == 'one_hot':
        var = pre_trained_vals
    elif kwargs['readout_layer'] == "weighted_neuron":
        var = pre_trained_vals
    rate_type = kwargs['rate_type']
    optim = kwargs['optim']
    num_train = len(Y_train)
    validation_size = int(0.1 * num_train)
    opt = optim(
        stepsize=learning_rate
    )  #all optimizers in autograd module take in argument stepsize, so this works for all

    for _ in range(kwargs['nsteps']):
        batch_index = np.random.randint(0, num_train, (batch_size, ))
        X_train_batch = np.asarray(X_train[batch_index])
        Y_train_batch = np.asarray(Y_train[batch_index])

        if kwargs['readout_layer'] == 'one_hot':
            var, cost = opt.step_and_cost(
                lambda v: ohe_cost_fcn(v, circuit, X_train_batch, Y_train_batch
                                       ), var)
        elif kwargs['readout_layer'] == 'weighted_neuron':
            print(var)
            var, cost = opt.step_and_cost(
                lambda v: wn_cost_fcn(v, circuit, X_train_batch, Y_train_batch
                                      ), var)
        print(_, cost)
        # check for early stopping
        if _ % 5 == 0:
            validation_batch = np.random.randint(0, num_train,
                                                 (validation_size, ))
            X_validation_batch = np.asarray(X_train[validation_batch])
            Y_validation_batch = np.asarray(Y_train[validation_batch])
            if kwargs['rate_type'] == 'accuracy':
                if kwargs['readout_layer'] == 'one_hot':
                    predictions = np.stack(
                        [circuit(var, x) for x in X_validation_batch])
                    acc = ohe_accuracy(Y_validation_batch, predictions)
                elif kwargs['readout_layer'] == 'weighted_neuron':
                    n = kwargs.get('nqubits')
                    w = var[:, -1]
                    theta = var[:, :-1].numpy()
                    predictions = [
                        int(
                            np.round(
                                2. *
                                (1.0 /
                                 (1.0 + exp(np.dot(-w, circuit(theta, x))))) -
                                1., 1)) for x in X_validation_batch
                    ]
                    acc = wn_accuracy(Y_validation_batch, predictions)
                if acc > 0.95:
                    break

            elif kwargs['rate_type'] == 'batch_cost':
                if cost < 0.001:
                    break
    # make final predictions
    if kwargs['readout_layer'] == 'one_hot':
        final_predictions = np.stack([circuit(var, x) for x in X_train])
    elif kwargs['readout_layer'] == 'weighted_neuron':
        n = kwargs.get('nqubits')
        w = var[:, -1]
        theta = var[:, :-1]
        final_predictions = [
            int(
                np.round(
                    2. * (1.0 / (1.0 + exp(np.dot(-w, circuit(theta, x))))) -
                    1., 1)) for x in X_train
        ]
    return var, final_predictions