def setUp(self): self._export_dir_base = tempfile.mkdtemp() + "export/" gfile.MkDir(self._export_dir_base)
def testScalarsRealistically(self): """Test accumulator by writing values and then reading them.""" def FakeScalarSummary(tag, value): value = summary_pb2.Summary.Value(tag=tag, simple_value=value) summary = summary_pb2.Summary(value=[value]) return summary directory = os.path.join(self.get_temp_dir(), 'values_dir') if gfile.IsDirectory(directory): gfile.DeleteRecursively(directory) gfile.MkDir(directory) writer = writer_lib.FileWriter(directory, max_queue=100) with ops.Graph().as_default() as graph: _ = constant_op.constant([2.0, 1.0]) # Add a graph to the summary writer. writer.add_graph(graph) meta_graph_def = saver.export_meta_graph(graph_def=graph.as_graph_def( add_shapes=True)) writer.add_meta_graph(meta_graph_def) run_metadata = config_pb2.RunMetadata() device_stats = run_metadata.step_stats.dev_stats.add() device_stats.device = 'test device' writer.add_run_metadata(run_metadata, 'test run') # Write a bunch of events using the writer. for i in xrange(30): summ_id = FakeScalarSummary('id', i) summ_sq = FakeScalarSummary('sq', i * i) writer.add_summary(summ_id, i * 5) writer.add_summary(summ_sq, i * 5) writer.flush() # Verify that we can load those events properly acc = ea.EventAccumulator(directory) acc.Reload() self.assertTagsEqual( acc.Tags(), { ea.IMAGES: [], ea.AUDIO: [], ea.SCALARS: ['id', 'sq'], ea.HISTOGRAMS: [], ea.COMPRESSED_HISTOGRAMS: [], ea.GRAPH: True, ea.META_GRAPH: True, ea.RUN_METADATA: ['test run'] }) id_events = acc.Scalars('id') sq_events = acc.Scalars('sq') self.assertEqual(30, len(id_events)) self.assertEqual(30, len(sq_events)) for i in xrange(30): self.assertEqual(i * 5, id_events[i].step) self.assertEqual(i * 5, sq_events[i].step) self.assertEqual(i, id_events[i].value) self.assertEqual(i * i, sq_events[i].value) # Write a few more events to test incremental reloading for i in xrange(30, 40): summ_id = FakeScalarSummary('id', i) summ_sq = FakeScalarSummary('sq', i * i) writer.add_summary(summ_id, i * 5) writer.add_summary(summ_sq, i * 5) writer.flush() # Verify we can now see all of the data acc.Reload() id_events = acc.Scalars('id') sq_events = acc.Scalars('sq') self.assertEqual(40, len(id_events)) self.assertEqual(40, len(sq_events)) for i in xrange(40): self.assertEqual(i * 5, id_events[i].step) self.assertEqual(i * 5, sq_events[i].step) self.assertEqual(i, id_events[i].value) self.assertEqual(i * i, sq_events[i].value) self.assertProtoEquals(graph.as_graph_def(add_shapes=True), acc.Graph()) self.assertProtoEquals(meta_graph_def, acc.MetaGraph())
def get_stimulus_response(src_dir, src_dataset, stim_id, boundary=0, if_get_stim=True): """Get stimulus-response data for all datasets. Args : src_dir : Location of all joint embedding datasets. src_dataset : Dataset corresponding of a specific stimulus. stim_id : string ID of the stimulus. boundary : Remove cells within a boundary to the edges. if_get_stim : If False, do not load stimulus Returns : stimulus : Stimulus matrix (Time x dimx x dimy). responses : Discretized cell responses (Time x n_cells). dimx : X dimension of stimulus. dimy : Y dimension of stimulus. num_cell_types : number of cell types. """ # Copy data locally. # Since gfile does not support reading of large files directly from CNS, # we need to copy the data locally first. src = os.path.join(src_dir, src_dataset) if not gfile.IsDirectory(FLAGS.tmp_dir): gfile.MkDir(FLAGS.tmp_dir) dst = os.path.join(FLAGS.tmp_dir, src_dataset) print('Source %s' % src) print('Destination %s' % dst) copy_locally(src, dst) # Load stimulus-response data. if if_get_stim: data = h5py.File(os.path.join(dst, 'stimulus.mat')) stimulus = np.array(data.get('stimulus')) # Make dynamic range of stimuli from -0.5 to 0.5 stim_min = np.min(stimulus) stim_max = np.max(stimulus) stimulus -= stim_min stimulus /= (stim_max - stim_min) stimulus -= 0.5 # Make the stimuli mean 0 stimulus -= np.mean(stimulus) else: stimulus = None # Load responses from multiple retinas. datasets_list = os.path.join(dst, 'datasets.txt') datasets = open(datasets_list, 'r').read() training_datasets = [line for line in datasets.splitlines()] num_cell_types = 2 dimx_desired = 80 dimy_desired = 40 if stimulus is not None: dimx_actual = stimulus.shape[1] dimy_actual = stimulus.shape[2] else: stix_sz = np.int(src_dataset.split('-')[1]) dimx_actual = np.int(640 / stix_sz) dimy_actual = np.int(320 / stix_sz) responses = [] for idata in training_datasets: print(idata) data_file = os.path.join(dst, idata) data = sio.loadmat(data_file) data.update({'stimulus_key': stim_id}) process_dataset(data, dimx_desired, dimy_desired, dimx_actual, dimy_actual, num_cell_types, boundary=boundary) data.update({'piece': idata}) responses += [data] if FLAGS.minimize_disk_usage: gfile.DeleteRecursively(dst) return stimulus, responses, dimx_desired, dimy_desired, num_cell_types
def _create_test_export_dir(export_dir_base): export_dir = _get_timestamped_export_dir(export_dir_base) gfile.MkDir(export_dir) time.sleep(2) return export_dir
def _CreateCleanDirectory(path): if gfile.IsDirectory(path): gfile.DeleteRecursively(path) gfile.MkDir(path)
def _create_test_export_dir(export_dir_base): export_dir = saved_model_export_utils.get_timestamped_export_dir( export_dir_base) gfile.MkDir(export_dir) time.sleep(1) return export_dir
def main(argv): cell_idx = FLAGS.taskid file_list = gfile.ListDirectory(FLAGS.src_dir) cell_file = file_list[cell_idx] print('Cell file %s' % cell_file) # copy data dst = os.path.join(FLAGS.tmp_dir, cell_file) if not gfile.Exists(dst): print('Started Copy') src = os.path.join(FLAGS.src_dir, cell_file) if not gfile.IsDirectory(FLAGS.tmp_dir): gfile.MkDir(FLAGS.tmp_dir) gfile.Copy(src, dst) print('File copied to destination') else: print('File exists') # load stimulus, response data data = sio.loadmat(dst) trainMov_filterNSEM = data['trainMov_filterNSEM'] testMov_filterNSEM = data['testMov_filterNSEM'] trainSpksNSEM = data['trainSpksNSEM'] testSpksNSEM = data['testSpksNSEM'] mask = data['mask'] trainMov_filterWN = data['trainMov_filterWN'] testMov_filterWN = data['testMov_filterWN'] trainSpksWN = data['trainSpksWN'] testSpksWN = data['testSpksWN'] # get NSEM stimulus and resposne stimulus_WN = np.array(trainMov_filterWN.transpose(), dtype='float32') response_WN = np.array(np.squeeze(trainSpksWN), dtype='float32') stimulus_NSEM = np.array(trainMov_filterNSEM.transpose(), dtype='float32') response_NSEM = np.array(np.squeeze(trainSpksNSEM), dtype='float32') print('Prepared data') # Do fitting # set random seed. np.random.seed(23) print('Made partitions') # Do fitting # WN data ifrac = 0.8 tms_train_WN = np.arange(0, np.floor(stimulus_WN.shape[0] * ifrac)).astype( np.int) tms_test_WN = np.arange(np.floor(stimulus_WN.shape[0] * ifrac), 1 * np.floor(stimulus_WN.shape[0] * 1)).astype(np.int) # NSEM data ifrac = 0.8 tms_train_NSEM = np.arange(0, np.floor(stimulus_NSEM.shape[0] * ifrac)).astype(np.int) tms_test_NSEM = np.arange(np.floor(stimulus_NSEM.shape[0] * ifrac), 1 * np.floor(stimulus_NSEM.shape[0] * 1)).astype( np.int) ''' eps = 1e-7 for Nsub in [1, 2, 3, 4, 5, 7, 10]: print('Fitting started') # WN fit op = jnt_model.Flat_clustering(stimulus_WN, response_WN, Nsub, tms_train_WN, tms_test_WN, steps_max=10000, eps=eps) K, b, alpha, lam_log, lam_log_test, fitting_phase, fit_params = op WN_fit = {'K': K, 'b': b, 'lam_log': lam_log, 'lam_log_test': lam_log_test} print('WN fit done') # NSEM fit # Just fit the scales # fit NL + b + Kscale K, b, params, loss_log, loss_log_test = jnt_model.fit_scales(stimulus_NSEM[tms_train_NSEM, :], response_NSEM[tms_train_NSEM], stimulus_NSEM[tms_test_NSEM, :], response_NSEM[tms_test_NSEM], Ns=Nsub, K=WN_fit['K'], b=WN_fit['b'], params=[1.0, 0.0], lr=0.001, eps=eps) NSEM_fit_scales = {'K': K, 'b': b, 'nl_params': params, 'lam_log': loss_log, 'lam_log_test': loss_log_test} print('NSEM scales fit') # Fit all params K, b, params, loss_log, loss_log_test = jnt_model.fit_all(stimulus_NSEM[tms_train_NSEM, :], response_NSEM[tms_train_NSEM], stimulus_NSEM[tms_test_NSEM, :], response_NSEM[tms_test_NSEM], Ns=Nsub, K=NSEM_fit_scales['K'], b=NSEM_fit_scales['b'], train_phase=3, params=NSEM_fit_scales['nl_params'], lr=0.001, eps=eps) NSEM_fit_full = {'K': K, 'b': b, 'nl_params': params, 'lam_log': loss_log, 'lam_log_test': loss_log_test} print('NSEM all fit') save_dict = {'WN_fit': WN_fit, 'NSEM_fit_scales': NSEM_fit_scales, 'NSEM_fit_full': NSEM_fit_full} pickle.dump(save_dict, gfile.Open(os.path.join(FLAGS.save_path, 'Cell_%s_nsub_%d_suff_%d_jnt.pkl' % (cell_file, Nsub, 1)), 'w' )) print('Saved results') ''' ''' eps = 1e-7 for Nsub in [1, 2, 3, 4, 5, 7, 10]: print('Fitting started') # Fit all params K = 2*rng.rand(stimulus_NSEM.shape[1], Nsub)-0.5 b = 2*rng.rand(Nsub)-0.5 K, b, params, loss_log, loss_log_test = jnt_model.fit_all(stimulus_NSEM[tms_train_NSEM, :], response_NSEM[tms_train_NSEM], stimulus_NSEM[tms_test_NSEM, :], response_NSEM[tms_test_NSEM], Ns=Nsub, K=K.astype(np.float32), b=b.astype(np.float32), train_phase=3, params=[1.0, 0.0], lr=0.001, eps=eps) NSEM_fit_full = {'K': K, 'b': b, 'nl_params': params, 'lam_log': loss_log, 'lam_log_test': loss_log_test} print('NSEM all (random) fit') save_dict = {'NSEM_fit_full_random': NSEM_fit_full} pickle.dump(save_dict, gfile.Open(os.path.join(FLAGS.save_path, 'Cell_%s_nsub_%d_suff_%d_randomly_init.pkl' % (cell_file, Nsub, 1)), 'w' )) print('Saved results') ''' eps = 1e-7 for Nsub in [1, 2, 3, 4, 5, 7, 10]: print('Fitting started') # NSEM clustering fit op = jnt_model.Flat_clustering(stimulus_NSEM, response_NSEM, Nsub, tms_train_NSEM, tms_test_NSEM, steps_max=10000, eps=eps) K, b, alpha, lam_log, lam_log_test, fitting_phase, fit_params = op NSEM_clustering = { 'K': K, 'b': b, 'lam_log': lam_log, 'lam_log_test': lam_log_test } print('NSEM clustering fit') # NSEM fit # Just fit the scales # fit NL + b + Kscale K, b, params, loss_log, loss_log_test = jnt_model.fit_scales( stimulus_NSEM[tms_train_NSEM, :], response_NSEM[tms_train_NSEM], stimulus_NSEM[tms_test_NSEM, :], response_NSEM[tms_test_NSEM], Ns=Nsub, K=NSEM_clustering['K'], b=NSEM_clustering['b'], params=[1.0, 0.0], lr=0.001, eps=eps) NSEM_fit_scales = { 'K': K, 'b': b, 'nl_params': params, 'lam_log': loss_log, 'lam_log_test': loss_log_test } print('NSEM scales fit') # Fit all params K, b, params, loss_log, loss_log_test = jnt_model.fit_all( stimulus_NSEM[tms_train_NSEM, :], response_NSEM[tms_train_NSEM], stimulus_NSEM[tms_test_NSEM, :], response_NSEM[tms_test_NSEM], Ns=Nsub, K=NSEM_fit_scales['K'], b=NSEM_fit_scales['b'], train_phase=3, params=NSEM_fit_scales['nl_params'], lr=0.001, eps=eps) NSEM_fit_full = { 'K': K, 'b': b, 'nl_params': params, 'lam_log': loss_log, 'lam_log_test': loss_log_test } print('NSEM all fit') save_dict = { 'NSEM_clustering': NSEM_clustering, 'NSEM_fit_scales': NSEM_fit_scales, 'NSEM_fit_full': NSEM_fit_full } pickle.dump( save_dict, gfile.Open( os.path.join( FLAGS.save_path, 'Cell_%s_nsub_%d_suff_%d_NSEM_3_steps.pkl' % (cell_file, Nsub, 1)), 'w')) print('Saved results')
def initialize(sess): """Initialize data and model.""" if FLAGS.jobid >= 0: data.log_filename = os.path.join(FLAGS.train_dir, "log%d" % FLAGS.jobid) data.print_out("NN ", newline=False) # Set random seed. seed = FLAGS.random_seed + max(0, FLAGS.jobid) tf.set_random_seed(seed) random.seed(seed) np.random.seed(seed) # Check data sizes. assert data.bins min_length = 3 max_length = min(FLAGS.max_length, data.bins[-1]) assert max_length + 1 > min_length while len(data.bins) > 1 and data.bins[-2] > max_length + EXTRA_EVAL: data.bins = data.bins[:-1] assert data.bins[0] > FLAGS.rx_step data.forward_max = max(FLAGS.forward_max, data.bins[-1]) nclass = min(FLAGS.niclass, FLAGS.noclass) data_size = FLAGS.train_data_size if FLAGS.mode == 0 else 1000 # Initialize data for each task. tasks = FLAGS.task.split("-") for t in tasks: for l in xrange(max_length + EXTRA_EVAL - 1): data.init_data(t, l, data_size, nclass) data.init_data(t, data.bins[-2], data_size, nclass) data.init_data(t, data.bins[-1], data_size, nclass) end_size = 4 * 1024 if FLAGS.mode > 0 else 1024 data.init_data(t, data.forward_max, end_size, nclass) # Print out parameters. curriculum = FLAGS.curriculum_bound msg1 = ("layers %d kw %d h %d kh %d relax %d batch %d noise %.2f task %s" % (FLAGS.nconvs, FLAGS.kw, FLAGS.height, FLAGS.kh, FLAGS.rx_step, FLAGS.batch_size, FLAGS.grad_noise_scale, FLAGS.task)) msg2 = "data %d %s" % (FLAGS.train_data_size, msg1) msg3 = ( "cut %.2f pull %.3f lr %.2f iw %.2f cr %.2f nm %d d%.4f gn %.2f %s" % (FLAGS.cutoff, FLAGS.pull_incr, FLAGS.lr, FLAGS.init_weight, curriculum, FLAGS.nmaps, FLAGS.dropout, FLAGS.max_grad_norm, msg2)) data.print_out(msg3) # Create checkpoint directory if it does not exist. checkpoint_dir = os.path.join( FLAGS.train_dir, "neural_gpu%s" % ("" if FLAGS.jobid < 0 else str(FLAGS.jobid))) if not gfile.IsDirectory(checkpoint_dir): data.print_out("Creating checkpoint directory %s." % checkpoint_dir) gfile.MkDir(checkpoint_dir) # Create model and initialize it. tf.get_variable_scope().set_initializer( tf.uniform_unit_scaling_initializer(factor=1.8 * FLAGS.init_weight)) model = neural_gpu.NeuralGPU(FLAGS.nmaps, FLAGS.nmaps, FLAGS.niclass, FLAGS.noclass, FLAGS.dropout, FLAGS.rx_step, FLAGS.max_grad_norm, FLAGS.cutoff, FLAGS.nconvs, FLAGS.kw, FLAGS.kh, FLAGS.height, FLAGS.mode, FLAGS.lr, FLAGS.pull, FLAGS.pull_incr, min_length + 3) data.print_out("Created model.") sess.run(tf.initialize_all_variables()) data.print_out("Initialized variables.") # Load model from parameters if a checkpoint exists. ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and gfile.Exists(ckpt.model_checkpoint_path): data.print_out("Reading model parameters from %s" % ckpt.model_checkpoint_path) model.saver.restore(sess, ckpt.model_checkpoint_path) # Check if there are ensemble models and get their checkpoints. ensemble = [] ensemble_dir_list = [d for d in FLAGS.ensemble.split(",") if d] for ensemble_dir in ensemble_dir_list: ckpt = tf.train.get_checkpoint_state(ensemble_dir) if ckpt and gfile.Exists(ckpt.model_checkpoint_path): data.print_out("Found ensemble model %s" % ckpt.model_checkpoint_path) ensemble.append(ckpt.model_checkpoint_path) # Return the model and needed variables. return (model, min_length, max_length, checkpoint_dir, curriculum, ensemble)
def setUpClass(cls): # pylint: disable=invalid-name global bucket, get_oss_path bucket = os.getenv("OSS_FS_TEST_BUCKET") get_oss_path = lambda p: os.path.join("oss://" + bucket, "oss_fs_test", p) gfile.MkDir(get_oss_path(""))
def testScalarsRealistically(self): """Test accumulator by writing values and then reading them.""" def FakeScalarSummary(tag, value): value = tf.Summary.Value(tag=tag, simple_value=value) summary = tf.Summary(value=[value]) return summary directory = os.path.join(self.get_temp_dir(), 'values_dir') if gfile.IsDirectory(directory): gfile.DeleteRecursively(directory) gfile.MkDir(directory) writer = tf.train.SummaryWriter(directory, max_queue=100) graph_def = tf.GraphDef(node=[tf.NodeDef(name='A', op='Mul')]) # Add a graph to the summary writer. writer.add_graph(graph_def) # Write a bunch of events using the writer for i in xrange(30): summ_id = FakeScalarSummary('id', i) summ_sq = FakeScalarSummary('sq', i * i) writer.add_summary(summ_id, i * 5) writer.add_summary(summ_sq, i * 5) writer.flush() # Verify that we can load those events properly acc = ea.EventAccumulator(directory) acc.Reload() self.assertTagsEqual( acc.Tags(), { ea.IMAGES: [], ea.SCALARS: ['id', 'sq'], ea.HISTOGRAMS: [], ea.COMPRESSED_HISTOGRAMS: [], ea.GRAPH: True }) id_events = acc.Scalars('id') sq_events = acc.Scalars('sq') self.assertEqual(30, len(id_events)) self.assertEqual(30, len(sq_events)) for i in xrange(30): self.assertEqual(i * 5, id_events[i].step) self.assertEqual(i * 5, sq_events[i].step) self.assertEqual(i, id_events[i].value) self.assertEqual(i * i, sq_events[i].value) # Write a few more events to test incremental reloading for i in xrange(30, 40): summ_id = FakeScalarSummary('id', i) summ_sq = FakeScalarSummary('sq', i * i) writer.add_summary(summ_id, i * 5) writer.add_summary(summ_sq, i * 5) writer.flush() # Verify we can now see all of the data acc.Reload() self.assertEqual(40, len(id_events)) self.assertEqual(40, len(sq_events)) for i in xrange(40): self.assertEqual(i * 5, id_events[i].step) self.assertEqual(i * 5, sq_events[i].step) self.assertEqual(i, id_events[i].value) self.assertEqual(i * i, sq_events[i].value) self.assertProtoEquals(graph_def, acc.Graph())
def main(argv): # copy data dst = os.path.join(FLAGS.tmp_dir, 'Off_parasol.mat') if not gfile.Exists(dst): print('Started Copy') src = os.path.join(FLAGS.src_dir, 'Off_parasol.mat') if not gfile.IsDirectory(FLAGS.tmp_dir): gfile.MkDir(FLAGS.tmp_dir) gfile.Copy(src, dst) print('File copied to destination') else: print('File exists') # load stimulus file = h5py.File(dst, 'r') # Load Masked movie data = file.get('maskedMovdd') stimulus = np.array(data) # load cell response cells = file.get('cells') ttf_log = file.get('ttf_log') ttf_avg = file.get('ttf_avg') # Load spike Response of cells data = file.get('Y') responses = np.array(data) # get mask total_mask_log = file.get('totalMaskAccept_log') print('Got data') # get cell and mask nsub_list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] if FLAGS.taskid < 107 * len(nsub_list): cell_idx = [np.int(np.floor(FLAGS.taskid / len(nsub_list)))] cellid = cells[np.int(np.floor(FLAGS.taskid / len(nsub_list)))] Nsub = nsub_list[FLAGS.taskid % len(nsub_list)] partition_list = np.arange(10) elif FLAGS.taskid < 107 * len(nsub_list) + 37 * 10: cell_idx = [39, 42, 44, 45] #[np.int(FLAGS.taskid)] cellid = cells[cell_idx] cellid = np.squeeze(cellid) task_id_effective = FLAGS.taskid - 107 * len(nsub_list) partition_list = [task_id_effective % 10] nsub_list_pop = np.arange(4, 41) Nsub = nsub_list_pop[np.int(np.floor(task_id_effective / 10))] elif FLAGS.taskid < 107 * len(nsub_list) + 37 * 10 + 19 * 10: cell_idx = [39, 42] #[np.int(FLAGS.taskid)] cellid = cells[cell_idx] cellid = np.squeeze(cellid) task_id_effective = FLAGS.taskid - 107 * len(nsub_list) - 37 * 10 partition_list = [task_id_effective % 10] nsub_list_pop = np.arange(2, 21) Nsub = nsub_list_pop[np.int(np.floor(task_id_effective / 10))] elif FLAGS.taskid < 107 * len(nsub_list) + 37 * 10 + 19 * 10 + 19 * 10: cell_idx = [44, 45] #[np.int(FLAGS.taskid)] cellid = cells[cell_idx] cellid = np.squeeze(cellid) task_id_effective = FLAGS.taskid - 107 * len( nsub_list) - 37 * 10 - 19 * 10 partition_list = [task_id_effective % 10] nsub_list_pop = np.arange(2, 21) Nsub = nsub_list_pop[np.int(np.floor(task_id_effective / 10))] print(cell_idx) print(Nsub) mask = (total_mask_log[cell_idx, :].sum(0) != 0) mask_matrix = np.reshape(mask != 0, [40, 80]) # make mask bigger - add one row one left/right r, c = np.where(mask_matrix) mask_matrix[r.min() - 1:r.max() + 1, c.min() - 1:c.max() + 1] = True mask = np.ndarray.flatten(mask_matrix) stim_use = stimulus[:, mask] resp_use = responses[:, cell_idx] print('Prepared data') # get last 10% as test data np.random.seed(23) frac_test = 0.1 tms_test = np.arange(np.floor(stim_use.shape[0] * (1 - frac_test)), 1 * np.floor(stim_use.shape[0])).astype(np.int) # Random partitions n_partitions = 10 tms_train_validate = np.arange( 0, np.floor(stim_use.shape[0] * (1 - frac_test))).astype(np.int) frac_validate = 0.1 partitions = [] for ipartition in range(n_partitions): perm = np.random.permutation(tms_train_validate) tms_train = perm[0:np.floor((1 - frac_validate) * perm.shape[0])] tms_validate = perm[np.floor((1 - frac_validate) * perm.shape[0]):perm.shape[0]] partitions += [{ 'tms_train': tms_train, 'tms_validate': tms_validate, 'tms_test': tms_test }] print('Made partitions') # Do fitting # tms_train = np.arange(0, np.floor(stim_use.shape[0] * 0.8)).astype(np.int) # tms_test = np.arange(np.floor(stim_use.shape[0] * 0.8), # 1 * np.floor(stim_use.shape[0] * 0.9)).astype(np.int) for ipartition in partition_list: print(cell_idx, cellid, Nsub) ss = '_'.join([str(ic) for ic in cellid]) save_filename = os.path.join( FLAGS.save_path, 'Cell_%s_nsub_%d_part_%d_jnt.pkl' % (ss, Nsub, ipartition)) if not gfile.Exists(save_filename): print('Fitting started') op = jnt_model.Flat_clustering_jnt( stim_use, resp_use, Nsub, partitions[ipartition]['tms_train'], partitions[ipartition]['tms_validate'], steps_max=10000, eps=1e-9) # op = jnt_model.Flat_clustering_jnt(stim_use, resp_use, Nsub, # tms_train, # tms_test, # steps_max=10000, eps=1e-9) K, b, alpha, lam_log, lam_log_test, fitting_phase, fit_params = op print('Fitting done') save_dict = { 'K': K, 'b': b, 'lam_log': lam_log, 'lam_log_test': lam_log_test, 'fitting_phase': fitting_phase, 'fit_params': fit_params } pickle.dump(save_dict, gfile.Open(save_filename, 'w')) print('Saved results')
def _base_export_fn(unused_estimator, export_dir_base, unused_checkpoint_path=None): base_path = os.path.join(export_dir_base, "e1") gfile.MkDir(base_path) return base_path
def _post_export_fn(orig_path, new_path): assert orig_path.endswith("/e1") post_export_path = os.path.join(new_path, "rewrite") gfile.MkDir(post_export_path) return post_export_path
def main(argv): # parse task params # read line corresponding to task with gfile.Open(FLAGS.task_params_file, 'r') as f: for _ in range(FLAGS.taskid + 1): line = f.readline() print(line) # get task parameters by parsing the line. line_split = line.split(';') cells = gfile.ListDirectory(FLAGS.src_dir) cell_idx = line_split[0] cell_idx = cell_idx[1:-1].split(',') nsub = int(line_split[1]) projection_type = line_split[2] lam_proj = float(line_split[3]) ipartition = int(line_split[4][:-1]) # Copy data for all the data cell_str_final = '' dst_log = [] for icell in cell_idx: icell = int(icell) cell_string = cells[icell] cell_str_final += cell_string # copy data for the corresponding task dst = os.path.join(FLAGS.tmp_dir, cell_string) dst_log += [dst] if not gfile.Exists(dst): print('Started Copy') src = os.path.join(FLAGS.src_dir, cell_string) if not gfile.IsDirectory(FLAGS.tmp_dir): gfile.MkDir(FLAGS.tmp_dir) gfile.Copy(src, dst) print('File %s copied to destination' % cell_string) else: print('File %s exists' % cell_string) # Load data for different cells stim_log = [] resp_log = [] mask_matrix_log = [] for dst in dst_log: print('Loading %s' % dst) data = h5py.File(dst) stimulus = np.array(data.get('stimulus')) stimulus = stimulus[:-1, :] # drop the last frame so that it's # the same size as the binned spike train response = np.squeeze(np.array(data.get('response'))) response = np.expand_dims(response, 1) mask_matrix = np.array(data.get('mask')) stim_log += [stimulus] resp_log += [response] mask_matrix_log += [mask_matrix] # Prepare for fitting across multiple cells # Get total mask mask_matrix_pop = np.array(mask_matrix_log).sum(0) > 0 # Get total response. resp_len = np.min([resp_log[icell].shape[0] for icell in range(4)]) response_pop = np.zeros((resp_len, len(resp_log))) for icell in range(len(resp_log)): response_pop[:, icell] = resp_log[icell][:resp_len, 0] # Get total stimulus. stimulus_pop = np.zeros((resp_len, mask_matrix_pop.sum())) # Find non-zero locations for each mask element nnz_log = [np.where(imask > 0) for imask in mask_matrix_log] nnz_pop = np.where(mask_matrix_pop > 0) for ipix in range(mask_matrix_pop.sum()): print(ipix) r = nnz_pop[0][ipix] c = nnz_pop[1][ipix] stim_pix = np.zeros(resp_len) nc = 0 for icell in range(len(nnz_log)): pix_cell_bool = np.logical_and(nnz_log[icell][0] == r, nnz_log[icell][1] == c) if pix_cell_bool.sum() > 0: pix_cell = np.where(pix_cell_bool > 0)[0][0] stim_pix += stim_log[icell][:resp_len, pix_cell] nc += 1 if nc == 0: print('Error') stim_pix = stim_pix / nc stimulus_pop[:, ipix] = stim_pix # Fit with a given number of subunits print('Starting fitting') get_su_nsub(stimulus_pop, response_pop, mask_matrix_pop, cell_str_final, nsub, projection_type, lam_proj, ipartition)
def main(argv): # copy WN data dst = os.path.join(FLAGS.tmp_dir, 'Off_parasol.mat') if not gfile.Exists(dst): print('Started Copy') src = os.path.join(FLAGS.src_dir, 'Off_parasol.mat') if not gfile.IsDirectory(FLAGS.tmp_dir): gfile.MkDir(FLAGS.tmp_dir) gfile.Copy(src, dst) print('File copied to destination') else: print('File exists') # load stimulus file = h5py.File(dst, 'r') # Load Masked movie data = file.get('maskedMovdd') stimulus = np.array(data) # load cell response cells = file.get('cells') cells = np.array(cells) cells = np.squeeze(cells) ttf_log = file.get('ttf_log') ttf_avg = file.get('ttf_avg') # Load spike Response of cells data = file.get('Y') responses = np.array(data) # get mask total_mask_log = np.array(file.get('totalMaskAccept_log')) print('Got WN data') # Get NSEM data dat_nsem_mov = sio.loadmat( gfile.Open( '/home/bhaishahster/nsem_data/' 'pc2015_10_29_2/NSinterval_30_025.mat', 'r')) stimulus_nsem = dat_nsem_mov['mov'] stimulus_nsem = np.transpose(stimulus_nsem, [2, 1, 0]) stimulus_nsem = np.reshape(stimulus_nsem, [stimulus_nsem.shape[0], -1]) dat_nsem_resp = sio.loadmat( gfile.Open( '/home/bhaishahster/nsem_data/' 'pc2015_10_29_2/OFF_parasol_trial_resp' '_data_NSEM_data039.mat', 'r')) responses_nsem = dat_nsem_resp['resp_cell_log'] print('Git NSEM data') # read line corresponding to task with gfile.Open(FLAGS.task_params_file, 'r') as f: for itask in range(FLAGS.taskid + 1): line = f.readline() line = line[:-1] # Remove \n from end. print(line) # get task parameters by parsing the lines line_split = line.split(';') cell_idx = line_split[0] cell_idx = cell_idx[1:-1].split(',') cell_idx = [int(i) for i in cell_idx] Nsub = int(line_split[1]) projection_type = line_split[2] lam_proj = float(line_split[3]) ipartition = int(line_split[4]) cell_idx_mask = cell_idx ## print(cell_idx) print(Nsub) print(cell_idx_mask) mask = (total_mask_log[cell_idx_mask, :].sum(0) != 0) mask_matrix = np.reshape(mask != 0, [40, 80]) # make mask bigger - add one row one left/right r, c = np.where(mask_matrix) mask_matrix[r.min() - 1:r.max() + 1, c.min() - 1:c.max() + 1] = True neighbor_mat = su_model.get_neighbormat(mask_matrix, nbd=1) mask = np.ndarray.flatten(mask_matrix) ## WN preprocess stim_use_wn = stimulus[:, mask] resp_use_wn = responses[:, cell_idx] # get last 10% as test data np.random.seed(23) frac_test = 0.1 tms_test = np.arange(np.floor(stim_use_wn.shape[0] * (1 - frac_test)), 1 * np.floor(stim_use_wn.shape[0])).astype(np.int) # Random partitions n_partitions = 10 tms_train_validate = np.arange( 0, np.floor(stim_use_wn.shape[0] * (1 - frac_test))).astype(np.int) frac_validate = 0.1 partitions_wn = [] for _ in range(n_partitions): perm = np.random.permutation(tms_train_validate) tms_train = perm[0:np.floor((1 - frac_validate) * perm.shape[0])] tms_validate = perm[np.floor((1 - frac_validate) * perm.shape[0]):perm.shape[0]] partitions_wn += [{ 'tms_train': tms_train, 'tms_validate': tms_validate, 'tms_test': tms_test }] print('Made partitions') print('WN data preprocessed') ## NSEM preprocess stim_use_nsem = stimulus_nsem[:, mask] ttf_use = np.array(ttf_log[cell_idx, :]).astype(np.float32).squeeze() stim_use_nsem = filterMov_time(stim_use_nsem, ttf_use) resp_use_nsem = np.array(responses_nsem[cell_idx][0, 0]).astype(np.float32).T # Remove first 30 frames due to convolution artifact. stim_use_nsem = stim_use_nsem[30:, :] resp_use_nsem = resp_use_nsem[30:, :] n_trials = resp_use_nsem.shape[1] t_nsem = resp_use_nsem.shape[0] tms_train_1tr_nsem = np.arange(np.floor(t_nsem / 2)) tms_test_1tr_nsem = np.arange(np.ceil(t_nsem / 2), t_nsem) # repeat in time dimension, divide into training and testing. stim_use_nsem = np.tile(stim_use_nsem.T, n_trials).T resp_use_nsem = np.ndarray.flatten(resp_use_nsem.T) resp_use_nsem = np.expand_dims(resp_use_nsem, 1) tms_train_nsem = np.array([]) tms_test_nsem = np.array([]) for itrial in range(n_trials): tms_train_nsem = np.append(tms_train_nsem, tms_train_1tr_nsem + itrial * t_nsem) tms_test_nsem = np.append(tms_test_nsem, tms_test_1tr_nsem + itrial * t_nsem) tms_train_nsem = tms_train_nsem.astype(np.int) tms_test_nsem = tms_test_nsem.astype(np.int) print('NSEM data preprocessed') ss = '_'.join([str(cells[ic]) for ic in cell_idx]) save_filename = os.path.join( FLAGS.save_path, 'Cell_%s_nsub_%d_%s_%.3f_part_%d_jnt.pkl' % (ss, Nsub, projection_type, lam_proj, ipartition)) save_filename_partial = os.path.join( FLAGS.save_path_partial, 'Cell_%s_nsub_%d_%s_%.3f_part_%d_jnt.pkl' % (ss, Nsub, projection_type, lam_proj, ipartition)) ## Do fitting # Fit SU on WN print('Fitting started on WN') op = su_model.Flat_clustering_jnt( stim_use_wn, resp_use_wn, Nsub, partitions_wn[ipartition]['tms_train'], partitions_wn[ipartition]['tms_validate'], steps_max=10000, eps=1e-9, projection_type=projection_type, neighbor_mat=neighbor_mat, lam_proj=lam_proj, eps_proj=0.01, save_filename_partial=save_filename_partial, fitting_phases=[1]) _, _, alpha, lam_log_wn, lam_log_test_wn, fitting_phase, fit_params_wn = op print('Fitting done on WN') # Fit on NSEM op = su_model.fit_scales(stim_use_nsem[tms_train_nsem, :], resp_use_nsem[tms_train_nsem, :], stim_use_nsem[tms_test_nsem, :], resp_use_nsem[tms_test_nsem, :], Ns=Nsub, K=fit_params_wn[0][0], b=fit_params_wn[0][1], params=fit_params_wn[0][2], lr=0.1, eps=1e-9) K_nsem, b_nsem, nl_params_nsem, lam_log_nsem, lam_log_test_nsem = op # Collect results and save fit_params = fit_params_wn + [[K_nsem, b_nsem, nl_params_nsem]] lam_log = [lam_log_wn, np.array(lam_log_nsem)] lam_log_test = [lam_log_test_wn, np.array(lam_log_test_nsem)] save_dict = { 'lam_log': lam_log, 'lam_log_test': lam_log_test, 'fit_params': fit_params } pickle.dump(save_dict, gfile.Open(save_filename, 'w')) print('Saved results')