def grad_weak(self): fl = create_list_fl(self.F, self.N) ul = self.list_U self.A[self.N] = np.eye(self.N, dtype=complex) for l in range(self.N - 1, -1, -1): self.A[l] = np.dot(self.A[l + 1], np.dot(fl[l + 1], ul[l])) self.B[0] = np.eye(self.N, dtype=complex) for l in range(1, self.N + 1, 1): self.B[l] = np.dot(np.dot(ul[l - 1], fl[l - 1]), self.B[l - 1]) u = self.forward() for l in range(self.N): for k in range(self.N - 1): grad_u = 1j * np.exp(1j * self.F[l][k]) * np.dot( self.A[l], np.dot(self.D[k], self.B[l])) self.grad_F[l][k] = 4 * np.sum( (np.abs(u)**2 - np.abs(self.target)**2) * u.conj() * grad_u).real for k in range(self.N): grad_u = 1j * np.exp(1j * self.F[k][self.N - 1]) * np.dot( self.A[self.N], np.dot(self.D[k], self.B[self.N])) self.grad_F[k][self.N - 1] = 4 * np.sum( (np.abs(u)**2 - np.abs(self.target)**2) * u.conj() * grad_u).real
def grad_fidelity(self): fl = create_list_fl(self.F, self.N) ul = self.list_U self.A[self.N] = np.eye(self.N, dtype=complex) for l in range(self.N - 1, -1, -1): self.A[l] = np.dot(self.A[l + 1], np.dot(fl[l + 1], ul[l])) self.B[0] = np.eye(self.N, dtype=complex) for l in range(1, self.N + 1, 1): self.B[l] = np.dot(np.dot(ul[l - 1], fl[l - 1]), self.B[l - 1]) u = self.forward() z = np.trace(np.dot(self.target.T.conj(), u)) for l in range(self.N): for k in range(self.N - 1): grad_u = 1j * np.exp(1j * self.F[l][k]) * np.dot( self.A[l], np.dot(self.D[k], self.B[l])) self.grad_F[l][k] = - (2 / (self.N ** 2)) * \ (z.conjugate() * np.trace(np.dot(self.target.T.conj(), grad_u))).real for k in range(self.N): grad_u = 1j * np.exp(1j * self.F[k][self.N - 1]) * np.dot( self.A[self.N], np.dot(self.D[k], self.B[self.N])) self.grad_F[k][self.N - 1] = - (2 / (self.N ** 2)) * \ (z.conjugate() * np.trace(np.dot(self.target.T.conj(), grad_u))).real
def grad_frobenius(self, mini_batch_f, mini_batch_u): # This class method calculates the gradient self.grad_U for l in range(self.N): self.grad_U[l] = np.zeros((self.N, self.N), dtype=complex) # We occupy a gradient for the subsequent calculation of the sum # Calculate self.list_A and self.list_B for k in range(self.mini_batch_size): fl = create_list_fl(mini_batch_f[k], self.N) self.list_A[k][self.N - 1] = fl[self.N] for l in range(self.N - 2, -1, -1): self.list_A[k][l] = np.dot(self.list_A[k][l + 1], np.dot(self.list_U[l + 1], fl[l + 1])) self.list_B[k][0] = fl[0] for l in range(1, self.N, 1): self.list_B[k][l] = np.dot(np.dot(fl[l], self.list_U[l - 1]), self.list_B[k][l - 1]) u_target = mini_batch_u[k] u_result = interferometer(fl, self.list_U, self.N) for l in range(self.N): for p in range(self.N): for t in range(self.N): a = self.list_A[k][l] b = self.list_B[k][l] d = self.D[p][t] grad_u_x = np.dot(a, np.dot(d, b)) grad_u_y = 1j * np.dot(a, np.dot(d, b)) self.grad_U[l][p][t] += (2 / self.N) * np.sum((u_result - u_target).conj() * grad_u_x).real + \ 1j * (2 / self.N) * np.sum((u_result - u_target).conj() * grad_u_y).real for l in range(self.N): self.grad_U[l] = self.grad_U[l] / self.mini_batch_size # Average the gradient over the mini-packet
def func_sst(x, network, mini_batch_f, mini_batch_u, n): c = 0.0 # The cost function itself, which must be calculated on the mini-package mini_batch_size = len(mini_batch_f) list_u = transform_to_matrix( x, n) # Restored our list of trial basis matrices for k in range(mini_batch_size): fl = create_list_fl(mini_batch_f[k], n) c = c + sst(interferometer(fl, list_u, n), mini_batch_u[k]) c = c / len(mini_batch_f) return c # The function returns a real number
def save_sample_unitary_matrices(n, m, file_name1, file_name2): # Generate random phases for the sample (M different matrices, size N by N) fm = [] for k in range(m): fm.append(2 * 3.141592 * np.random.rand(n, n)) # Create a list from U1, ..., UN file1 = open(file_name1, 'r') list_u = [] for l in range(n): u = np.zeros((n, n), dtype=complex) for i in range(n): for j in range(n): real = float(file1.readline()) imag = float(file1.readline()) u[i][j] = real + 1j * imag s = file1.readline() # Read the empty line list_u.append(u) file1.close() um = [] # List of resulting unitary matrices of size N by N # We pass through the sample for k in range(m): um.append(interferometer(create_list_fl(fm[k], n), list_u, n)) file2 = open(file_name2, 'w') for k in range( m ): # We go through the selection and write phase matrices to a file for i in range(n): for j in range(n): file2.write(str(fm[k][i][j].real) + '\n') file2.write('\n') for k in range( m ): # We go through the selection and write the resulting selection of unitary matrices to a file for i in range(n): for j in range(n): file2.write(str(um[k][i][j].real) + '\n') file2.write(str(um[k][i][j].imag) + '\n') file2.write('\n') file2.close() print('Training dataset successfully loaded to file')
def trainer(file_name1, file_name2, file_name3, n, m, mini_batch_size, counts_of_epochs, func, derivative_func, functional, coeff, noisy_f, noisy_u, network, method='L-BFGS-B'): fm, um = load_data(n, m, file_name2) # Got the whole sample for u in um: fm = fm + noisy_f * np.random.randn(n, n) um = um + noisy_u * (np.random.randn(n, n) + 1j * np.random.randn(n, n)) um = polar_correct(um) # Unitarization of basis matrices # network = Network(n, m, mini_batch_size, file_name3) # Created an object of class Network if coeff is not None: list_goal_u = load_goal_matrices(n, file_name1) # Downloaded the list of correct unitary matrices to facilitate the search list_u = get_list_noisy(list_goal_u, coeff, n) network.list_U = list_u # Facilitating the search for a solution with large values of n steps = [] results = [] cross_validation = [] norma = [] x0 = transform_to_1d_list(network.list_U, n) # Initialized Optimization algorithm list_goal_u = load_goal_matrices(n, file_name1) if method == 'L-BFGS-B': print('Turned on L-BFGS-B') for i in range(counts_of_epochs): mini_batch_f, mini_batch_u = create_mini_batch( n, m, mini_batch_size, fm, um) # Formed a mini-package for Learning at one step steps.append(i) results.append(func(x0, network, mini_batch_f, mini_batch_u, n)) f = get_random_phase(n) cross_validation.append( functional( interferometer(create_list_fl(f, n), network.list_U, n), interferometer(create_list_fl(f, n), list_goal_u, n))) norma.append( norma_square( interferometer(create_list_fl(mini_batch_f[0], n), network.list_U, n), n)) res = minimize(func, x0, args=(network, mini_batch_f, mini_batch_u, n), method='L-BFGS-B', jac=derivative_func, options={ 'disp': False, 'maxiter': 1 }) # Optimization step 'BFGS' network.list_U = transform_to_matrix( res.x, n) # Updated the neural network network.polar_correct() x0 = res.x f = get_random_phase(n) print( 'Epoch: ', i + 1, ' Training set: ', results[i], ' Test set: ', functional( interferometer(create_list_fl(f, n), network.list_U, n), interferometer(create_list_fl(f, n), list_goal_u, n))) if method == 'SGD': print('Turned on SGD') # rate_learning = 0.1 # The best rate_learning = 0.2 for i in range(counts_of_epochs): mini_batch_f, mini_batch_u = create_mini_batch( n, m, mini_batch_size, fm, um) # Formed a mini-package for Learning at one step steps.append(i) results.append(func(x0, network, mini_batch_f, mini_batch_u, n)) f = get_random_phase(n) cross_validation.append( functional( interferometer(create_list_fl(f, n), network.list_U, n), interferometer(create_list_fl(f, n), list_goal_u, n))) norma.append( norma_square( interferometer(create_list_fl(mini_batch_f[0], n), network.list_U, n), n)) x0 = x0 - rate_learning * derivative_func( x0, network, mini_batch_f, mini_batch_u, n) # Optimization step 'SGD' network.list_U = transform_to_matrix( x0, n) # Updated the neural network network.polar_correct() # print(norma_square(network.list_U[0], network.N)) # print(x0, ' ', results[i]) f = get_random_phase(n) print( 'epoch: ', i + 1, ' Training set: ', results[i], ' Test set: ', functional( interferometer(create_list_fl(f, n), network.list_U, n))) # Cross validation list_goal_u = load_goal_matrices(n, file_name1) for i in range(10): f = get_random_phase(n) print( frobenius_reduced( interferometer(create_list_fl(f, n), network.list_U, n), interferometer(create_list_fl(f, n), list_goal_u, n)), infidelity(interferometer(create_list_fl(f, n), network.list_U, n), interferometer(create_list_fl(f, n), list_goal_u, n)), weak_reduced( interferometer(create_list_fl(f, n), network.list_U, n), interferometer(create_list_fl(f, n), list_goal_u, n)), sst(interferometer(create_list_fl(f, n), network.list_U, n), interferometer(create_list_fl(f, n), list_goal_u, n))) steps = np.array(steps) results = np.array(results) cross_validation = np.array(cross_validation) norma = np.array(norma) error = 0.0 for i in range(1000): f = get_random_phase(n) error = error + infidelity( interferometer(create_list_fl(f, n), network.list_U, n), interferometer(create_list_fl(f, n), list_goal_u, n)) error = error / 1000 return steps, results, cross_validation, norma, error
def forward(self): fl = create_list_fl(self.F, self.N) _u = interferometer(fl, self.list_U, self.N) return _u
def grad_sst(self): fl = create_list_fl(self.F, self.N) ul = self.list_U self.A[self.N] = np.eye(self.N, dtype=complex) for l in range(self.N - 1, -1, -1): self.A[l] = np.dot(self.A[l + 1], np.dot(fl[l + 1], ul[l])) self.B[0] = np.eye(self.N, dtype=complex) for l in range(1, self.N + 1, 1): self.B[l] = np.dot(np.dot(ul[l - 1], fl[l - 1]), self.B[l - 1]) u = self.forward() r_l, r_r = r_r_r_l(u) u_1 = transform_sst(u) target_1 = transform_sst(self.target) for l in range(self.N): for k in range(self.N - 1): grad_u = 1j * np.exp(1j * self.F[l][k]) * np.dot( self.A[l], np.dot(self.D[k], self.B[l])) grad_r_l = np.eye(self.N, dtype=complex) grad_r_r = np.eye(self.N, dtype=complex) for i in range(self.N): grad_r_l[i][i] = (1j * ((u[i][0].conjugate() / abs(u[i][0])) * grad_u[i][0]).imag / u[i][0].conjugate()).conjugate() if i == 0: grad_r_r[i][i] = 0.0 else: grad_r_r[i][i] = ( 1j * ((u[0][i].conjugate() / abs(u[0][i])) * grad_u[0][i]).imag / u[0][i].conjugate()).conjugate() grad_v = np.dot(grad_r_l, np.dot(u, r_r)) + np.dot(r_l, np.dot(grad_u, r_r)) + \ np.dot(r_l, np.dot(u, grad_r_r)) self.grad_F[l][k] = (2 / self.N) * np.sum( (u_1 - target_1).conj() * grad_v).real for k in range(self.N): grad_u = 1j * np.exp(1j * self.F[k][self.N - 1]) * np.dot( self.A[self.N], np.dot(self.D[k], self.B[self.N])) grad_r_l = np.eye(self.N, dtype=complex) grad_r_r = np.eye(self.N, dtype=complex) for i in range(self.N): grad_r_l[i][i] = (1j * ( (u[i][0].conjugate() / abs(u[i][0])) * grad_u[i][0]).imag / u[i][0].conjugate()).conjugate() if i == 0: grad_r_r[i][i] = 0.0 else: grad_r_r[i][i] = (1j * ((u[0][i].conjugate() / abs(u[0][i])) * grad_u[0][i]).imag / u[0][i].conjugate()).conjugate() grad_v = np.dot(grad_r_l, np.dot(u, r_r)) + np.dot(r_l, np.dot(grad_u, r_r)) + \ np.dot(r_l, np.dot(u, grad_r_r)) self.grad_F[k][self.N - 1] = (2 / self.N) * np.sum( (u_1 - target_1).conj() * grad_v).real
def grad_sst(self, mini_batch_f, mini_batch_u): # This class method calculates the gradient self.grad_U for l in range(self.N): self.grad_U[l] = np.zeros((self.N, self.N), dtype=complex) # We occupy a gradient for the subsequent calculation of the sum # Calculate self.list_A and self.list_B for k in range(self.mini_batch_size): fl = create_list_fl(mini_batch_f[k], self.N) self.list_A[k][self.N - 1] = fl[self.N] for l in range(self.N - 2, -1, -1): self.list_A[k][l] = np.dot(self.list_A[k][l + 1], np.dot(self.list_U[l + 1], fl[l + 1])) self.list_B[k][0] = fl[0] for l in range(1, self.N, 1): self.list_B[k][l] = np.dot(np.dot(fl[l], self.list_U[l - 1]), self.list_B[k][l - 1]) u_target = mini_batch_u[k] u_result = interferometer(fl, self.list_U, self.N) r_l, r_r = r_r_r_l(u_result) u_1 = transform_sst(u_result) target_1 = transform_sst(u_target) for l in range(self.N): for p in range(self.N): for t in range(self.N): a = self.list_A[k][l] b = self.list_B[k][l] d = self.D[p][t] grad_u_x = np.dot(a, np.dot(d, b)) grad_u_y = 1j * np.dot(a, np.dot(d, b)) grad_r_l_x = np.eye(self.N, dtype=complex) grad_r_r_x = np.eye(self.N, dtype=complex) grad_r_l_y = np.eye(self.N, dtype=complex) grad_r_r_y = np.eye(self.N, dtype=complex) for i in range(self.N): # grad_r_l_x[i][i] = grad_u_x[i][0].conjugate() # grad_r_l_y[i][i] = grad_u_y[i][0].conjugate() grad_r_l_x[i][i] = (1j * ((u_result[i][0].conjugate() / abs(u_result[i][0])) * grad_u_x[i][0]).imag / u_result[i][0].conjugate()).conjugate() grad_r_l_y[i][i] = (1j * ((u_result[i][0].conjugate() / abs(u_result[i][0])) * grad_u_y[i][0]).imag / u_result[i][0].conjugate()).conjugate() if i == 0: grad_r_r_x[i][i] = 0.0 grad_r_r_y[i][i] = 0.0 else: # grad_r_r_x[i][i] = grad_u_x[0][i].conjugate() # grad_r_r_y[i][i] = grad_u_y[0][i].conjugate() grad_r_r_x[i][i] = (1j * ((u_result[0][i].conjugate() / abs(u_result[0][i])) * grad_u_x[0][i]).imag / u_result[0][i].conjugate()).conjugate() grad_r_r_y[i][i] = (1j * ((u_result[0][i].conjugate() / abs(u_result[0][i])) * grad_u_y[0][i]).imag / u_result[0][i].conjugate()).conjugate() grad_v_x = np.dot(grad_r_l_x, np.dot(u_result, r_r)) + np.dot(r_l, np.dot(grad_u_x, r_r)) + \ np.dot(r_l, np.dot(u_result, grad_r_r_x)) grad_v_y = np.dot(grad_r_l_y, np.dot(u_result, r_r)) + np.dot(r_l, np.dot(grad_u_y, r_r)) + \ np.dot(r_l, np.dot(u_result, grad_r_r_y)) self.grad_U[l][p][t] += (2 / self.N) * np.sum((u_1 - target_1).conj() * grad_v_x).real + \ 1j * (2 / self.N) * np.sum((u_1 - target_1).conj() * grad_v_y).real for l in range(self.N): self.grad_U[l] = self.grad_U[l] / self.mini_batch_size # Average the gradient over the mini-packet