def setup(): """ Set up the run - Get input arguments Create necessary directories Generate individual transcript FASTA files Create output files """ proteomefile = sys.argv[1] directory = f'{sys.argv[2]}/' # Deletes output directory if it exists if os.path.isdir(directory): shutil.rmtree(directory, ignore_errors=True) os.makedirs(directory) generate_data(proteomefile, directory) main_output = f"{directory}/index_hopping_output.txt" Path(f"{directory}/hopper.txt").touch() Path(f"{directory}/final.txt").touch() Path(f"{directory}/never.txt").touch() Path(f"{directory}/maybehopper.txt").touch() Path(main_output).touch() with open(main_output, "a") as fh: fh.write( 'filename uniquely multi totalReads uniquelyAGAINST multiAGAINST totalreadsAGAINST percRatio\n' ) return directory
def get_data(config_data): """ Get training and test data given parameters in config file. Used for constructing the images of "randomized" shapes. """ type = str(config_data["type"]) n_samples_train = int(config_data["n_samples_train"]) n_samples_test = int(config_data["n_samples_test"]) size = int(config_data["size"]) if type == "2D": if config_data["width"] == "random": width = "random" else: width = int(config_data["width"]) if config_data["noise"] == "False": noise_strength = False else: noise_strength = float(config_data["noise"]) random_size = bool(config_data["random_size"]) regular_polygons = bool(config_data["regular_polygons"]) flatten = bool(config_data["flatten"]) data_train, labels_train = generate_data( n_samples=n_samples_train, size=size, width=width, noise_strength=noise_strength, random_size=random_size, regular_polygons=regular_polygons, flatten=True) data_test, labels_test = generate_data( n_samples=n_samples_test, size=size, width=width, noise_strength=noise_strength, random_size=random_size, regular_polygons=regular_polygons, flatten=True) elif type == "1D": data_train, labels_train = generate_1D_test(size, n_samples_train) data_test, labels_test = generate_1D_test(size, n_samples_train) return data_train, labels_train, data_test, labels_test, size, type
def generate_data(self): Path(self.data_dir).mkdir(parents=True, exist_ok=True) # If not the good length remove all ldir = os.listdir(self.data_dir) for l in ldir: with open(os.path.join(self.data_dir, l), 'r') as f: if len(f.read().split('\n')) < (self.data_size + 2): os.remove(os.path.join(self.data_dir, l)) n_existing_sample = len(os.listdir(self.data_dir)) Tstruct = self.load_struct() ndag = otagr.NamedDAG(Tstruct) for i in range(n_existing_sample, self.data_number): sample = dg.generate_data(ndag, self.data_size, self.data_distribution, **self.data_parameters) data_file_name = "sample" + str(i + 1).zfill(2) sample.exportToCSVFile( os.path.join(self.data_dir, data_file_name) + ".csv", ',')
def main(argv): if len(argv) > 1: raise app.UsageError("Too many command-line arguments.") assert FLAGS.min_frequency < 100, "--min_frequency higher than 100." assert FLAGS.max_frequency > 15250, "--max_frequency lower than 15250." save_directory = os.path.join(os.path.dirname(os.path.realpath(__file__)), FLAGS.save_directory) if not os.path.exists(save_directory): os.mkdir(save_directory) # TODO(lauraruis): define this elsewhere critical_bands = [ FLAGS.min_frequency, 100, 200, 300, 400, 505, 630, 770, 915, 1080, 1265, 1475, 1720, 1990, 2310, 2690, 3125, 3675, 4350, 5250, 6350, 7650, 9400, 11750, 15250, FLAGS.max_frequency ] # Generate the data. data = data_generation.generate_data( num_examples_per_cb=FLAGS.examples_per_critical_band, desired_mean=FLAGS.desired_mean, desired_variance=FLAGS.desired_variance, min_tones=FLAGS.min_tones, max_tones=FLAGS.max_tones, clip_db=FLAGS.clip_db, desired_skewness=FLAGS.skewness_parameter, min_frequency=FLAGS.min_frequency, max_frequency=FLAGS.max_frequency, critical_bands=critical_bands, min_phons=FLAGS.min_phons, max_phons=FLAGS.max_phons) # Run and save some analysis on the generated data. (covered_num_tones_per_cb, total_unique_examples, total_num_examples_listeners) = data_analysis.save_data( data, save_directory, critical_bands)
def data_load(batch_num, size, shots, num_qubits, depth, max_operands, prob_one, prob_two, clifford, device=0, train=1): loader_ideal = [] loader_noisy = [] noise_model, basis_gates = dg.noisy_model(prob_one, prob_two) cbasis_gates = ['cx', 'id', 'rz', 'sx', 'x'] length = [] for group in range(batch_num): if clifford: if device: cg.generate_data(size, shots, num_qubits, cbasis_gates, device, group, batch_num, train) else: data_ideal, data_noisy, sizes = cg.generate_data( size, shots, num_qubits, cbasis_gates, device, group, batch_num, train) loader_ideal.append(data_ideal) loader_noisy.append(data_noisy) length.append(sizes) else: data_ideal, data_noisy, sizes = dg.generate_data( size, shots, num_qubits, depth, max_operands, noise_model, basis_gates) loader_ideal.append(data_ideal) loader_noisy.append(data_noisy) length.append(sizes) return (loader_ideal, loader_noisy, length)
config, dir_name = util.load_configuration(cfg_fn) params = util.create_param_dict(config) df = pd.DataFrame(util.parseParams(params)) all_predY = None all_error = None mean_errors = [] std_errors = [] for iter_number in range(params['exp_details__num_iterations_per_setting']): # generate mobile points, base stations, and angles mobiles, bases, angles = data_generation.generate_data(params['data__num_pts'], params['data__num_stations'], params ['data__ndims'], pts_r=3., bs_r=4, bs_type=params['data__bs_type'], points_type=params['data__data_dist']) # IMPORTANT: remember to add noise before replicating data (e.g., for snbp-mlp) if params['noise__addnoise_train']: angles, mobiles = noise_models.add_noise_dispatcher(angles, mobiles, params['noise__noise_model'], params['data__ndims'], base_idxs=params['noise__bases_to_noise'], noise_params=params['noise__noise_params']) if params['NN__type'] == 'snbp-mlp' or params['NN__type'] == 'smlp': rep_idxs = [comb for comb in itertools.combinations(range(params['data__num_stations']),2)] angles = data_generation.replicate_data(angles, params['data__ndims'], rep_idxs)
"_data_geom_classification_"+ \ "_r_min_%.4f"%(r_min) + \ "_r_max_%.4f"%(r_max) + \ "_L_%.4f"%(L) + \ "_n_samples_" + str(n_samples) + \ "_n_samples_per_point_" + str(n_point_samples) print("Using data in %s" % (dataFile)) # if the file doesn't exist we create it if not path.exists(dataFile): # TODO: encapsulate all this in a function print("Data file does not exist, we create a new one") labels, \ points = generate_data(n_samples, n_point_samples, L, r_min, r_max) hf = h5py.File(dataFile, 'w') hf.create_dataset('labels', data=labels) hf.create_dataset('points', data=points) hf.close() # extracting the data hf = h5py.File(dataFile, 'r') labels_array = np.array(hf['labels'][:], dtype=np.int32) points_array = np.array(hf['points'][:], dtype=np.float32) # Defining the Keras model
def main(): # extras dictionary for importing to functions extras = {} ########################################### # # S P I C E C O D E # ########################################## # basic .bsp filename (generic, such as de430, etc) extras['basic_bsp'] = 'de430.bsp' # .bsp filename for mission extras['mission_bsp'] = 'DINO_kernel.bsp' # .tls filename extras['tls'] = 'naif0011.tls' # prep pyswice for the extraction of initial data # is the only reason that we do this is for lines 165 and 166? pyswice.furnsh_c(bskSpicePath + 'de430.bsp') pyswice.furnsh_c(dinoSpicePath + 'naif0011.tls') pyswice.furnsh_c(dinoSpicePath + 'DINO_kernel.bsp') DINO_kernel = dinoSpicePath + 'DINO_kernel.bsp' body_int = -100 #SP.spkobj(DINO_kernel) body_id_str = str(body_int) # search_window = pyswice.new_doubleArray(2) # pyswice.spkcov_c(DINO_kernel, body_int, search_window) # list_of_events = pyswice.wnfetd_c(search_window, 0) # tBSP_Start = list_of_events[0] # tBSP_End = list_of_events[1] ########################################### # Initial condition for spacecraft # data = io.loadmat('saves/obsData.mat') # trueEphemeris = {} # reference of sun to sc # trueEphemeris['spacecraft'] = np.copy(data['stateS']) # # reference of sun to Earth # trueEphemeris['S2E'] = np.copy(data['stateE']) # # reference of sun to Mars # trueEphemeris['S2M'] = np.copy(data['stateM']) # time span # timeSpan = data['etT'].flatten() #Filtering End Epochs start_et = pyswice.new_doubleArray(1) end_et = pyswice.new_doubleArray(1) pyswice.utc2et_c('23 JUL 2020 17:00:00', start_et) pyswice.utc2et_c('30 JUL 2020 17:00:00', end_et) start_et = pyswice.doubleArray_getitem(start_et, 0) end_et = pyswice.doubleArray_getitem(end_et, 0) # body vector for SUN, EARTH, MARS # CODE RELIES ON SUN BEING INDEXED AS 0 extras['bodies'] = ['SUN', '3', '399'] # specify primary and secondary extras['primary'] = 0 extras['secondary'] = [1, 2] # respective GP vector extras['mu'] = [1.32712428 * 10**11, 3.986004415 * 10**5, 4.305 * 10**4] # abcorr for spkzer extras['abcorr'] = 'NONE' # reference frame extras['ref_frame'] = 'J2000' # SRP parameter # A/M ratio multiplied by solar pressure constant at 1 AU with adjustments extras[ 'SRP'] = 0.3**2 / 14. * 149597870.**2 * 1358. / 299792458. / 1000. # turboprop document Eq (64) # coefficient of reflectivity extras['cR'] = 1. # number of observations per beacon until moving to the next extras['repeat_obs'] = 1 # SNC coefficient extras['SNC'] = (2 * 10**(-4))**3 # Number of batch iterations extras['iterations'] = 3 # Initializing the error extras['x_hat_0'] = 0 # rng seed for debugging purposes extras['seed'] = 5 ################################################################################## # # Camera/P&L Parameters # ################################################################################## # Focal Length (mm) extras['FoL'] = 100. angles = [] extras['DCM_BI'] = np.eye(3) extras['DCM_TVB'] = np.eye(3) # Camera resolution (pixels) extras['resolution'] = [1024., 1024.] # width and height of pixels in camera extras['pixel_width'] = 5. extras['pixel_height'] = 5. # direction coefficient of pixel and line axes extras['pixel_direction'] = 1. extras['line_direction'] = 1. # Are we using the real dynamics for the ref or the trueData extras['realData'] = 'OFF' # Add anomaly detection parameters extras['anomaly'] = False extras['anomaly_num'] = 0 extras['anomaly_threshold'] = 4 ################################################################################## # Get Observation Times and Ephemerides. This outputs a full data set that is not # parsed in any way. Ephemerides for all objects at all times are given. trueEphemeris, timeSpan = dg.generate_data( sc_ephem_file=DINO_kernel, planet_beacons=['earth', 'mars barycenter'], beaconIDs=[], n_observations=24, start_et=start_et, end_et=end_et, extras=extras, realData=extras['realData']) tt_switch = 5 print '------------------' print 'Filter Image Span : ', (timeSpan[-1] - timeSpan[0]) / (60 * 60 * 24), 'days' print '------------------' # number and keys of beacons. note that the true ephem is going to have one spot for the # sun, which in NOT a beacon. These are used in beaconBinSPICE. beacon_names = trueEphemeris.keys() beacon_names.remove('spacecraft') extras['unique_beacon_IDs'] = beacon_names extras['n_unique_beacons'] = len(beacon_names) ################################################################################## # # BLOCK A page 196 # ################################################################################## # copy the initial conditions as the first sun to SC referenceStates from the SPICE file IC = np.copy(trueEphemeris['spacecraft'][:, 0]) print 'IC', IC # spice_derived_state is only referenced here. Should these be axed? spice_derived_state = pyswice.new_doubleArray(6) lt = pyswice.new_doubleArray(1) pyswice.spkezr_c(body_id_str, timeSpan[0], 'J2000', 'None', 'Sun', spice_derived_state, lt) # a priori uncertainty for the referenceStates covBar = np.zeros((IC.shape[0], IC.shape[0])) covBar[0, 0] = 10000**2 covBar[1, 1] = 10000**2 covBar[2, 2] = 10000**2 covBar[3, 3] = .1**2 covBar[4, 4] = .1**2 covBar[5, 5] = .1**2 # add uncertainty to the IC initialPositionError = 1000 * np.divide(IC[0:3], np.linalg.norm(IC[0:3])) initialVelocityError = 0.01 * np.divide(IC[3:6], np.linalg.norm(IC[3:6])) IC[0:6] += np.append(initialPositionError, initialVelocityError) # uncertainty to be added in the form of noise to the measurables. # Takes the form of variance. Currently, the same value is used in both # the creation of the measurements as well as the weighting of the filter (W) observationUncertainty = np.identity(2) observationUncertainty[0, 0] = 0.2**2 observationUncertainty[1, 1] = 0.2**2 # the initial STM is an identity matrix phi0 = np.identity(IC.shape[0]) # initiate a priori deviation stateDevBar = np.zeros(IC.shape) # initiate a filter output dictionary filterOutputs = {} ################################################################################## # # Get the noisy observations # ################################################################################## # observation inputs observationInputs = (trueEphemeris, observationUncertainty, angles, extras) # Get the observation data (dataObservations). This dictionary contains the SPICE data # from which values are calculated (key = 'SPICE'), the true observations before # uncertainty is added (key = 'truth') and the measured observations (key = 'measurements'). # These are the 'measurements' values that are now simulating an actual observation, # and they are to be processed by the filter. # The dictionary also contains the list of beacons by name and order of processing. # This list of strings (key = 'beacons') is needed for # the filter's own beacon position generator dataObservations = getObs(observationInputs) # create dictionary for observation data to be inputs in filter. This is a more limited # dictionary than dataObservations and serves as the most "real" input filterObservations = {} filterObservations['measurements'] = dataObservations['measurements'] filterObservations['beaconIDs'] = dataObservations['beacons'] ################################################################################## # # Run the Filter # ################################################################################## # alter to coefficient of reflectivity to be zero. This negates any contribution of # modeling SRP extras['cR'] = 0.0 # run the filter and output the referenceStates (including STMs), est states and extra data for itr in xrange(extras['iterations']): if itr > 0: IC = estimatedState[0, :] stateDevBar -= extraData['stateDevHatArray'][0, :] if itr == 0: extras['oldPost'] = np.zeros([len(timeSpan), 2]) # the arguments for the filter: the IC, the first STM, the time span, the observables # data dictionary, a priori uncertainty, and the measurables' uncertainty, # as well as any extras filterInputs = (IC, phi0, timeSpan, filterObservations,\ covBar, observationUncertainty, stateDevBar, angles, extras) # run filter function referenceState, estimatedState, extraData = run_batch(filterInputs) extras['oldPost'] = extraData['postfit residuals'] # Check for anomaly: [anomaly_bool, anomaly_num] = extraData['anomaly_detected'] if anomaly_bool == True: print '**********************************************************' print 'Anomaly Detected - Estimates are not to be trusted' print '**********************************************************' print anomaly_num, 'Residuals out of bounds' return # save all outputs into the dictionary with a name associated with the iteration filterOutputs[str(itr)] = {} filterOutputs[str(itr)]['referenceState'] = referenceState filterOutputs[str(itr)]['estimatedState'] = estimatedState filterOutputs[str(itr)]['extraData'] = extraData ################################################################################## # # \ BLOCK A page 196 # ################################################################################## # Iteration Directory dirIt = 'Batch_Iteration' + str(itr + 1) # Make directory for the iterations if not os.path.exists(dirIt): os.makedirs(dirIt) # File to write data writingText( itr+1, referenceState, estimatedState, trueEphemeris, extraData,\ initialPositionError , initialVelocityError) # calculate the difference between the perturbed reference and # true trajectories: reference state errors stateError = referenceState[:, 0:6] - trueEphemeris['spacecraft'].T # compare the estimated and true trajectories: estimated state errors stateErrorHat = estimatedState[:, 0:6] - trueEphemeris['spacecraft'].T plotData = extraData plotData['postfit delta'] = extraData['postfit changes'] plotData['states'] = estimatedState plotData['truth'] = dataObservations['truth'] plotData['beacon_list'] = dataObservations['beacons'] plotData['timeSpan'] = timeSpan plotData['dirIt'] = dirIt plotData['err'] = stateError plotData['stateErrorHat'] = stateErrorHat plotData['obs_uncertainty'] = observationUncertainty plotData['referenceState'] = referenceState plotData['trueEphemeris'] = trueEphemeris plotData['extras'] = extras plotData['acc_est'] = 'OFF' PF(plotData) # Write the output to the pickle file fileTag = 'SRP_test' file = dirIt + '/' + fileTag + '_data.pkl' pklFile = open(file, 'wb') pickle.dump(plotData, pklFile, -1) pklFile.flush() pklFile.close()
], ) parsl.set_stream_logger() parsl.load(config) from data_generation import generate_data proteomefile = sys.argv[1] directory = f'/home/users/ellenrichards/{sys.argv[2]}/' threshold = 1000 if not os.path.isdir(directory): os.makedirs(directory) generate_data(proteomefile, directory) os.system(f"touch {directory}/hopper.txt") os.system(f"touch {directory}/final.txt") os.system(f"touch {directory}/never.txt") os.system(f"touch {directory}/maybehopper.txt") os.system(f"touch {directory}/index_hopping_output.txt") #csv_filename = "index_hopping_output.csv" #csv_fh = open(csv_filename, "write") #csv_writer = csv.DictWriter(csv_fh, fieldnames=["filename", "uniquely", "multi", "totalReads", "uniquelyAGAINST", "multiAGAINST", "totalReadsAGAINST", "percratio"]) #csv_row = {"filename": "filename ", "uniquely": "uniquely ", "multi": "multi ", "totalReads": "totalreads ", "uniquelyAGAINST": "uniquelyC ", "multiAGAINST": "multiC ", "totalReadsAGAINST": "totalreadsC ", "percratio": "percRatio "} #csv_writer.writerow(csv_row) os.system(f"echo 'filename uniquely multi totalReads uniquelyAGAINST multiAGAINST totalreadsAGAINST percRatio' >> {directory}/index_hopping_output.txt") #@python_app(executors=["login"])
def run_experiment(configurations): X_train, X_val, X_test, y_train, y_val, y_test = data_generation.generate_data(data_fn=data_generation.sine_2, nb_samples=nb_samples, seq_len=seq_len, signal_freq=60., add_noise=False) rnn = models.lstm_rnn_gru(input_size=input_size, hidden_size=hidden_size, cell_type="lstm").cuda() for module in rnn.modules(): print module loss_fn = nn.MSELoss() optimizer = optim.RMSprop(rnn.parameters(), lr=0.00001, momentum=0.9) """ Training with ground truth -- The input is the ground truth """ try: val_loss_list = [] for epoch in range(nb_epochs_mainTraining): training_loss = 0 val_loss = 0 rnn.train(True) for batch, i in enumerate(range(0, X_train.size(0) - 1, batch_size)): data, targets = data_generation.get_batch(X_train, y_train, i, batch_size=batch_size) output = rnn(data) # This is the original, training with ground truth only # output = rnn(data[:, :100], future=400) # Here, trying to use the model output as part of the input optimizer.zero_grad() loss = loss_fn(output, targets) loss.backward() optimizer.step() training_loss += loss.data[0] training_loss /= batch rnn.train(False) for batch, i in enumerate(range(0, X_val.size(0) - 1, batch_size)): data, targets = data_generation.get_batch(X_val, y_val, i, batch_size=batch_size) output = rnn(data) loss = loss_fn(output, targets) val_loss += loss.data[0] val_loss /= batch val_loss_list.append(val_loss) print "Ground truth - Epoch " + str(epoch) + " -- train loss = " + str(training_loss) + " -- val loss = " + str(val_loss) # Early stopping condition -- when the last 4 epochs results in a validation error < 0.015 cond_true = False if len(val_loss_list) >= 4: cond_true = True for i in val_loss_list[-4:]: if i > 0.015: cond_true = False break if cond_true == True: print "Triggering early stopping criteria - Out of training" break except KeyboardInterrupt: print "Early stopping for the training" """ Measuring the test score -> running the test data on the model """ rnn.train(False) test_loss = 0 list1 = [] list2 = [] for batch, i in enumerate(range(0, X_test.size(0) - 1, batch_size)): data, targets = data_generation.get_batch(X_test, y_test, i, batch_size=batch_size) output = rnn(data) loss = loss_fn(output, targets) test_loss += loss.data[0] target_last_point = torch.squeeze(targets[:, -1]).data.cpu().numpy().tolist() pred_last_point = torch.squeeze(output[:, -1]).data.cpu().numpy().tolist() list1 += target_last_point list2 += pred_last_point plt.figure() plt.plot(list1, "b") plt.plot(list2, "r") plt.legend(["Original data", "Generated data"]) plt.show() test_loss /= batch print "Test loss = ", test_loss """ Generating sequences - attempt 1 --> Exactly like "time sequence prediction" example """ data = X_test[0, :].view(1, -1) # output = rnn(data[:, :100], future=future_steps) output = rnn(data, future=future_steps) output = torch.squeeze(output).data.cpu().numpy() plt.figure() plt.plot(output) plt.xlabel("Time step") plt.ylabel("Signal amplitude") plt.show()
def generate_data(self): self.input, self.target = data_generation.generate_data(self.dataset_type, self.no_of_samples, self.no_of_classes)
target_posterior_y = rslds.smooth(target_posterior_x, target) return { 'training_elbos': q_elbos_lem, 'input_xhat': xhat_lem, 'input_zhat': zhat_lem, 'target_elbos': target_elbos, 'target_posterior': target_posterior_y } if __name__ == "__main__": # sample from Lorentz system batch_size = 10 t_steps = 1000 data = generate_data(batch_size, t_steps) inputs, targets = input_and_target(data) # fit an slsd model res = fit_slds(inputs[0], targets[0]) fig, axs = plt.subplots(nrows=3, ncols=1) axs[0].plot(res['training_elbos'], label='Training ELBO') axs[0].set_xlabel('iteration') axs[0].set_ylabel('ELBO') axs[1].plot(res['target_elbos'], label='Prediction ELBO') axs[1].set_xlabel('iteration') axs[1].set_ylabel('ELBO') axs[2].plot(targets[0][:, 0], c='b', label='true target')
ds_size = 10000 distribution = 'student' restarts = 20 S = list(range(1000, 10100, 100)) names = ['X', 'Y'] dag = gum.DAG() dag.addNodes(2) # dag.addArc(0,1) ndag = otagrum.NamedDAG(dag, names) D = [dg.generate_data(ndag, ds_size, distribution, r=0.8) for _ in range(restarts)] I = [] for size in S: print("Size: ", size) info = 0 for i,data in enumerate(D): print("Restart: ", i+1) cmi = otagrum.CorrectedMutualInformation(data[:size]) cmi.setKMode(otagrum.CorrectedMutualInformation.KModeTypes_NoCorr) info += cmi.compute2PtCorrectedInformation(0, 1) I.append(info/restarts) plt.plot(S, I) plt.show()
def test_generation(args): global random_data random_data = generate_data(args)
parser.add_argument('--sigmoid', type=str, default=None) parser.add_argument('--tanh', type=str, default=None) parser.add_argument('--warmup', type=int, default=10) parser.add_argument('--optim', type=str, default='Adam_HD') parser.add_argument('--seed', type=int, default=None) parser.add_argument('--verbose', action='store_true') args = parser.parse_args() print(args) if args.seed is not None: np.random.seed(args.seed) torch.manual_seed(args.seed) X_train, X_val, X_test, y_train, y_val, y_test = data_generation.generate_data( data_fn=args.data_fn, batch_size=args.batch_size, length=args.length, add_noise=args.add_noise) rnn = models.Model(input_size=X_train.size(-1), layers=args.layers, output_size=y_train.size(-1), sigmoid=args.sigmoid, tanh=args.tanh) print(rnn) print(sum([p.numel() for p in rnn.parameters() if p.requires_grad]), "trainable parameters") loss_fn = nn.MSELoss() if hasattr(custom_optim, args.optim): optimizer = getattr(custom_optim, args.optim)(rnn.parameters())
placeholder_input = tf.placeholder(tf.complex64, shape=(BATCH_SIZE, WINDOW_PIXEL_NUM)) onn_measurement = mmm.inference_testing(placeholder_input, save_mask_phase, save_mask_amp, save_mask_holes) # tf.gfile.MakeDirs(SENSOR_SAVING_PATH) sess = tf.InteractiveSession() count = 0 for step in range(MAX_STEPS): testing_input, testing_gt, testing_gt_1 = dtg.generate_data('testing') onn_measurement_value_test = sess.run( onn_measurement, feed_dict={placeholder_input: testing_input}) save_measurement = np.reshape(onn_measurement_value_test, (M, N)) if (OBJECT_AMPLITUDE_INPUT): save_input = np.reshape(np.real(testing_input), (M, N)) else: save_input = (np.reshape(np.angle(testing_input), (M, N)) + np.pi) / 2 / np.pi save_input = save_input[M//2-OBJECT_ROW//2:M//2-OBJECT_ROW//2+OBJECT_ROW,\ N//2-OBJECT_COL//2:N//2-OBJECT_COL//2+OBJECT_COL]
"FN", "TP", "R2", "DEMEAN" ]) record_i = 0 # %% settings n = 1600 split_ratio = 1000 / 1600 sample_size = 10 # 10 sample size ~= 1 HR test_cases = [(2, 20, 0.2)] # exp_no, d, noise_sigma # %% for exp_no, d, noise_sigma in test_cases: for sample_no in range(sample_size): # sample size for demean in (True, False): # %% create data data = generate_data(n, d, exp_no, noise_sigma) prediction = gp_predict(data, split_ratio, demean=demean) # %% calculate scores tn, fp, fn, tp = confusion_matrix(prediction["t"], prediction["t_hat"]).ravel() acc = (tn + tp) / (tn + fp + fn + tp) r = np.corrcoef(prediction["te"], prediction["te_hat"])[0, 1] r2 = r**2 # %% add records record.loc[record_i] = [ n, d, exp_no, noise_sigma, sample_no, acc, tn, fp, fn, tp, r2, demean ]
def load_create_data(data_type, data_out, is_logging_enabled=True, fn_csv=None, label_nm=None): df_train, df_test, dset = None, None, None features = None if data_type in data_loader_mlab.get_available_datasets() + ['show'] \ or fn_csv is not None: if fn_csv is not None: rval, dset = data_loader_mlab.load_dataset_from_csv( logger, fn_csv, label_nm) else: rval, dset = data_loader_mlab.get_dataset(data_type) assert rval == 0 data_loader_mlab.dataset_log_properties(logger, dset) if is_logging_enabled: logger.info('warning no seed') df = dset['df'] features = dset['features'] labels = dset['targets'] nsample = len(df) train_ratio = 0.8 idx = np.random.permutation(nsample) ntrain = int(nsample * train_ratio) df_train = df.iloc[idx[:ntrain]] df_test = df.iloc[idx[ntrain:]] col_drop = utilmlab.col_with_nan(df) if is_logging_enabled and len(col_drop): print('warning: dropping features {}' ', contains nan'.format(col_drop)) time.sleep(2) features = [el for el in features if el not in col_drop] x_train = df_train[features].values y_train = df_train[labels].values x_test = df_test[features].values y_test = df_test[labels].values g_train, g_test = None, None y_train = one_hot_encoder(np.ravel(y_train)) y_test = one_hot_encoder(np.ravel(y_test)) if is_logging_enabled: logger.info('y: train:{} test:{}'.format(set(np.ravel(y_train)), set(np.ravel(y_test)))) else: x_train, y_train, g_train = generate_data(n=train_N, data_type=data_type, seed=train_seed, out=data_out) x_test, y_test, g_test = generate_data(n=test_N, data_type=data_type, seed=test_seed, out=data_out) if is_logging_enabled: logger.info('{} {} {} {}'.format(x_train.shape, y_train.shape, x_test.shape, y_test.shape)) return x_train, y_train, g_train, x_test, y_test, \ g_test, df_train, df_test, dset, features
def main(): # extras dictionary for importing to functions extras = {} ########################################### # # S P I C E C O D E # ########################################## # basic .bsp filename (generic, such as de430, etc) extras['basic_bsp'] = 'de430.bsp' # .bsp filename for mission extras['mission_bsp'] = 'DINO_kernel.bsp' # .tls filename extras['tls'] = 'naif0011.tls' # prep pyswice for the extraction of initial data # is the only reason that we do this is for lines 165 and 166? pyswice.furnsh_c(bskSpicePath + 'de430.bsp') pyswice.furnsh_c(dinoSpicePath + 'naif0011.tls') pyswice.furnsh_c(dinoSpicePath + 'DINO_kernel.bsp') DINO_kernel = dinoSpicePath + 'DINO_kernel.bsp' body_int = -100#SP.spkobj(DINO_kernel) body_id_str = str(body_int) # search_window = pyswice.new_doubleArray(2) # pyswice.spkcov_c(DINO_kernel, body_int, search_window) # list_of_events = pyswice.wnfetd_c(search_window, 0) # tBSP_Start = list_of_events[0] # tBSP_End = list_of_events[1] ########################################### # Initial condition for spacecraft # data = io.loadmat('saves/obsData.mat') # trueEphemeris = {} # reference of sun to sc # trueEphemeris['spacecraft'] = np.copy(data['stateS']) # # reference of sun to Earth # trueEphemeris['S2E'] = np.copy(data['stateE']) # # reference of sun to Mars # trueEphemeris['S2M'] = np.copy(data['stateM']) # time span # timeSpan = data['etT'].flatten() #Filtering End Epochs start_et = pyswice.new_doubleArray(1) end_et=pyswice.new_doubleArray(1) pyswice.utc2et_c('23 JUL 2020 17:00:00', start_et) pyswice.utc2et_c('30 JUL 2020 17:00:00', end_et) start_et = pyswice.doubleArray_getitem(start_et, 0) end_et = pyswice.doubleArray_getitem(end_et, 0) # body vector for SUN, EARTH, MARS # CODE RELIES ON SUN BEING INDEXED AS 0 extras['bodies'] = ['SUN', '3', '399'] # specify primary and secondary extras['primary'] = 0 extras['secondary'] = [1, 2] # respective GP vector extras['mu'] = [1.32712428 * 10 ** 11, 3.986004415 * 10 ** 5, 4.305 * 10 ** 4] # abcorr for spkzer extras['abcorr'] = 'NONE' # reference frame extras['ref_frame'] = 'J2000' # SRP parameter # A/M ratio multiplied by solar pressure constant at 1 AU with adjustments extras['SRP'] = 0.3**2/14. * 149597870.**2 * 1358. / 299792458. / 1000. # turboprop document Eq (64) # coefficient of reflectivity extras['cR'] = 1. # number of observations per beacon until moving to the next extras['repeat_obs'] = 1 # SNC coefficient extras['SNC'] = (2 * 10 ** (-4)) ** 3 # Number of batch iterations extras['iterations'] = 3 # Initializing the error extras['x_hat_0'] = 0 # rng seed for debugging purposes extras['seed'] = 5 ################################################################################## # # Camera/P&L Parameters # ################################################################################## # Focal Length (mm) extras['FoL'] = 100. angles = [] extras['DCM_BI'] = np.eye(3) extras['DCM_TVB'] = np.eye(3) # Camera resolution (pixels) extras['resolution'] = [1024., 1024.] # width and height of pixels in camera extras['pixel_width'] = 5. extras['pixel_height'] = 5. # direction coefficient of pixel and line axes extras['pixel_direction'] = 1. extras['line_direction'] = 1. # Are we using the real dynamics for the ref or the trueData extras['realData']= 'OFF' # Add anomaly detection parameters extras['anomaly']= False extras['anomaly_num'] = 0 extras['anomaly_threshold'] = 4 ################################################################################## # Get Observation Times and Ephemerides. This outputs a full data set that is not # parsed in any way. Ephemerides for all objects at all times are given. trueEphemeris, timeSpan = dg.generate_data(sc_ephem_file=DINO_kernel, planet_beacons = ['earth','mars barycenter'], beaconIDs=[], n_observations=24, start_et=start_et, end_et=end_et, extras = extras, realData = extras['realData']) tt_switch = 5 print '------------------' print 'Filter Image Span : ' , (timeSpan[-1] - timeSpan[0])/(60*60*24), 'days' print '------------------' # number and keys of beacons. note that the true ephem is going to have one spot for the # sun, which in NOT a beacon. These are used in beaconBinSPICE. beacon_names = trueEphemeris.keys() beacon_names.remove('spacecraft') extras['unique_beacon_IDs'] = beacon_names extras['n_unique_beacons'] = len(beacon_names) ################################################################################## # # BLOCK A page 196 # ################################################################################## # copy the initial conditions as the first sun to SC referenceStates from the SPICE file IC = np.copy(trueEphemeris['spacecraft'][:, 0]) ###################################### # UNMODELED ACCELERATION TERMS # ALPHA IMPLEMENTATION # CURRENTLY TOO MUCH HARD CODING ###################################### IC = np.append( IC, np.array( [0, 0, 0] ) ) print 'IC', IC # spice_derived_state is only referenced here. Should these be axed? spice_derived_state = pyswice.new_doubleArray(6) lt = pyswice.new_doubleArray(1) pyswice.spkezr_c(body_id_str, timeSpan[0], 'J2000', 'None', 'Sun', spice_derived_state, lt) # a priori uncertainty for the referenceStates covBar = np.zeros((IC.shape[0], IC.shape[0])) covBar[0, 0] = 10000**2 covBar[1, 1] = 10000**2 covBar[2, 2] = 10000**2 covBar[3, 3] = .1**2 covBar[4, 4] = .1**2 covBar[5, 5] = .1**2 covBar[6, 6] = (10**(-8))**2 covBar[7, 7] = (10**(-8))**2 covBar[8, 8] = (10**(-8))**2 # add uncertainty to the IC initialPositionError = 1000 * np.divide(IC[0:3], np.linalg.norm(IC[0:3])) initialVelocityError = 0.01 * np.divide(IC[3:6], np.linalg.norm(IC[3:6])) IC[0:6] += np.append(initialPositionError, initialVelocityError) # uncertainty to be added in the form of noise to the measurables. # Takes the form of variance. Currently, the same value is used in both # the creation of the measurements as well as the weighting of the filter (W) observationUncertainty = np.identity(2) observationUncertainty[0, 0] = 0.2 ** 2 observationUncertainty[1, 1] = 0.2 ** 2 # the initial STM is an identity matrix phi0 = np.identity(IC.shape[0]) # initiate a priori deviation stateDevBar = np.zeros(IC.shape) # initiate a filter output dictionary filterOutputs = {} ################################################################################## # # Get the noisy observations # ################################################################################## # observation inputs observationInputs = (trueEphemeris, observationUncertainty, angles, extras) # Get the observation data (dataObservations). This dictionary contains the SPICE data # from which values are calculated (key = 'SPICE'), the true observations before # uncertainty is added (key = 'truth') and the measured observations (key = 'measurements'). # These are the 'measurements' values that are now simulating an actual observation, # and they are to be processed by the filter. # The dictionary also contains the list of beacons by name and order of processing. # This list of strings (key = 'beacons') is needed for # the filter's own beacon position generator dataObservations = getObs(observationInputs) # create dictionary for observation data to be inputs in filter. This is a more limited # dictionary than dataObservations and serves as the most "real" input filterObservations = {} filterObservations['measurements'] = dataObservations['measurements'] filterObservations['beaconIDs'] = dataObservations['beacons'] ################################################################################## # # Run the Filter # ################################################################################## # alter to coefficient of reflectivity to be zero. This negates any contribution of # modeling SRP extras['cR'] = 0.0 # run the filter and output the reference referenceStates (including STMs), est states and extra data for itr in xrange(extras['iterations']): if itr > 0: # IC = est_state[0, :] IC += extraData['stateDevHatArray'][0, :] stateDevBar -= extraData['stateDevHatArray'][0, :] # the arguments for the filter are the IC, the first STM, the time span, the observables # data dictionary, a priori uncertainty, and the measurables' uncertainty, # as well as any extras if itr==0: extras['oldPost'] = np.zeros([len(timeSpan), 2]) filterInputs = (IC, phi0, timeSpan, filterObservations,\ covBar, observationUncertainty, stateDevBar, angles, extras) # run filter function referenceState, estimatedState, extraData = run_batch(filterInputs) extras['oldPost'] = extraData['postfit residuals'] # save all outputs into the dictionary with a name associated with the iteration filterOutputs[str(itr)] = {} filterOutputs[str(itr)]['referenceState'] = referenceState filterOutputs[str(itr)]['estimatedState'] = estimatedState filterOutputs[str(itr)]['extraData'] = extraData ################################################################################## # # \ BLOCK A page 196 # ################################################################################## # Iteration Directory dirIt = 'Batch_Iteration' + str(itr+1) # Make directory for the iterations if not os.path.exists(dirIt): os.makedirs(dirIt) # File to write data writingText(itr+1, referenceState, estimatedState, trueEphemeris, extraData, initialPositionError , initialVelocityError) # calculate the difference between the perturbed reference and true trajectories: reference state errors stateError = referenceState[:, 0:6] - trueEphemeris['spacecraft'].T # compare the estimated and true trajectories: estimated state errors stateErrorHat = estimatedState[:, 0:6] - trueEphemeris['spacecraft'].T plotData = extraData plotData['postfit delta'] = extraData['postfit changes'] plotData['states'] = estimatedState plotData['truth'] = dataObservations['truth'] plotData['beacon_list'] = dataObservations['beacons'] plotData['timeSpan'] = timeSpan plotData['dirIt'] = dirIt plotData['err'] = stateError plotData['stateErrorHat'] = stateErrorHat plotData['obs_uncertainty'] = observationUncertainty plotData['referenceState'] = referenceState plotData['trueEphemeris'] = trueEphemeris plotData['extras'] = extras plotData['acc_est'] = 'ON' PF( plotData ) # Write the output to the pickle file fileTag = 'SRP_test' file = dirIt+'/'+fileTag+'_data.pkl' pklFile = open( file, 'wb') pickle.dump( plotData, pklFile, -1 ) pklFile.flush() pklFile.close() [anomaly_bool , anomaly_num] = extraData['anomaly_detected'] if anomaly_bool == True: print '**********************************************************' print 'Anomaly Detected - Estimates are not to be trusted' print '**********************************************************' print anomaly_num, 'Residuals out of bounds' return
params = util.create_param_dict(config) df = pd.DataFrame(util.parseParams(params)) all_predY = None all_error = None mean_errors = [] std_errors = [] for iter_number in range( params['exp_details__num_iterations_per_setting']): # generate mobile points, base stations, and angles mobiles, bases, angles = data_generation.generate_data( params['data__num_pts'], params['data__num_stations'], params['data__ndims'], pts_r=3., bs_r=4, bs_type=params['data__bs_type'], points_type=params['data__data_dist']) # IMPORTANT: remember to add noise before replicating data (e.g., for snbp-mlp) if params['noise__addnoise_train']: angles, mobiles = noise_models.add_noise_dispatcher( angles, mobiles, params['noise__noise_model'], params['data__ndims'], base_idxs=params['noise__bases_to_noise'], noise_params=params['noise__noise_params']) if params['NN__type'] == 'snbp-mlp' or params['NN__type'] == 'smlp':
import numpy as np import matplotlib.pyplot as plt import data_generation import models batch_size = 64 seq_len = 100 # This is equivalent to time steps of the sequence in keras input_size = 1 hidden_size = 51 target_size = 1 nb_samples = 1000 nb_epochs_mainTraining = 2000 nb_epochs_fineTuning = 200 X_train, X_val, X_test, y_train, y_val, y_test = data_generation.generate_data( data_fn=data_generation.sine_2, nb_samples=nb_samples, seq_len=seq_len) rnn = models.lstm_rnn_gru(input_size=input_size, hidden_size=hidden_size, cell_type="lstm").cuda() for module in rnn.modules(): print module loss_fn = nn.MSELoss() optimizer = optim.RMSprop(rnn.parameters(), lr=0.00001, momentum=0.9) # optimizer = optim.SGD(rnn.parameters(), lr=0.000003, momentum=0.95) # optimizer = optim.LBFGS(rnn.parameters()) # optimizer = optim.Adam(rnn.parameters(), lr=0.00001) # optimizer = optim.Adagrad(rnn.parameters(), lr=0.0001) """ Training with ground truth -- The input is the ground truth """ try:
# Synthetic or leukemia dataset dataset = "leukemia" if dataset == "synthetic": # Generate data set n_samples = 100 n_features = 200 sigma = 1. sparsity = 0.9 corr = 0.5 random_state = np.random.randint(0, 100) X, y, true_beta, true_sigma = generate_data(n_samples, n_features, sigma, sparsity, corr, random_state=random_state) if dataset == "leukemia": data = fetch_mldata('leukemia') X = data.data y = data.target X = X.astype(float) y = y.astype(float) n_samples, n_features = X.shape NO_SCREENING = 0 GAPSAFE = 1 WSTRT_SIGMA_0 = 2 BOUND = 3
Tstruct_file = structure + ".txt" struct_directory = "../../data/structures/" data_directory = path.join(data_directory, structure) if not path.isdir(data_directory): os.mkdir(data_directory) if args.distribution == "gaussian" or args.distribution == "student": r_subdir = 'r' + str(args.correlation).replace('.', '') data_directory = path.join(data_directory, r_subdir) if not path.isdir(data_directory): os.mkdir(data_directory) # If not the good length remove all ldir = os.listdir(data_directory) if ldir: with open(path.join(data_directory, ldir[0]), 'r') as f: if len(f.read().split('\n')) != (sample_size + 2): for l in ldir: os.remove(path.join(data_directory, l)) n_existing_sample = len(os.listdir(data_directory)) Tstruct = load.load_struct(path.join(struct_directory, Tstruct_file)) ndag=otagr.NamedDAG(Tstruct) for i in range(n_existing_sample, n_sample): sample = dg.generate_data(ndag, sample_size, args.distribution, correlation) sample.exportToCSVFile(path.join(data_directory, data_file_name) + \ '_' + str(i+1).zfill(2) + ".csv", ',')