Exemplo n.º 1
0
 def setUp(self):
     self._export_dir_base = tempfile.mkdtemp() + "export/"
     gfile.MkDir(self._export_dir_base)
    def testScalarsRealistically(self):
        """Test accumulator by writing values and then reading them."""
        def FakeScalarSummary(tag, value):
            value = summary_pb2.Summary.Value(tag=tag, simple_value=value)
            summary = summary_pb2.Summary(value=[value])
            return summary

        directory = os.path.join(self.get_temp_dir(), 'values_dir')
        if gfile.IsDirectory(directory):
            gfile.DeleteRecursively(directory)
        gfile.MkDir(directory)

        writer = writer_lib.FileWriter(directory, max_queue=100)

        with ops.Graph().as_default() as graph:
            _ = constant_op.constant([2.0, 1.0])
        # Add a graph to the summary writer.
        writer.add_graph(graph)
        meta_graph_def = saver.export_meta_graph(graph_def=graph.as_graph_def(
            add_shapes=True))
        writer.add_meta_graph(meta_graph_def)

        run_metadata = config_pb2.RunMetadata()
        device_stats = run_metadata.step_stats.dev_stats.add()
        device_stats.device = 'test device'
        writer.add_run_metadata(run_metadata, 'test run')

        # Write a bunch of events using the writer.
        for i in xrange(30):
            summ_id = FakeScalarSummary('id', i)
            summ_sq = FakeScalarSummary('sq', i * i)
            writer.add_summary(summ_id, i * 5)
            writer.add_summary(summ_sq, i * 5)
        writer.flush()

        # Verify that we can load those events properly
        acc = ea.EventAccumulator(directory)
        acc.Reload()
        self.assertTagsEqual(
            acc.Tags(), {
                ea.IMAGES: [],
                ea.AUDIO: [],
                ea.SCALARS: ['id', 'sq'],
                ea.HISTOGRAMS: [],
                ea.COMPRESSED_HISTOGRAMS: [],
                ea.GRAPH: True,
                ea.META_GRAPH: True,
                ea.RUN_METADATA: ['test run']
            })
        id_events = acc.Scalars('id')
        sq_events = acc.Scalars('sq')
        self.assertEqual(30, len(id_events))
        self.assertEqual(30, len(sq_events))
        for i in xrange(30):
            self.assertEqual(i * 5, id_events[i].step)
            self.assertEqual(i * 5, sq_events[i].step)
            self.assertEqual(i, id_events[i].value)
            self.assertEqual(i * i, sq_events[i].value)

        # Write a few more events to test incremental reloading
        for i in xrange(30, 40):
            summ_id = FakeScalarSummary('id', i)
            summ_sq = FakeScalarSummary('sq', i * i)
            writer.add_summary(summ_id, i * 5)
            writer.add_summary(summ_sq, i * 5)
        writer.flush()

        # Verify we can now see all of the data
        acc.Reload()
        id_events = acc.Scalars('id')
        sq_events = acc.Scalars('sq')
        self.assertEqual(40, len(id_events))
        self.assertEqual(40, len(sq_events))
        for i in xrange(40):
            self.assertEqual(i * 5, id_events[i].step)
            self.assertEqual(i * 5, sq_events[i].step)
            self.assertEqual(i, id_events[i].value)
            self.assertEqual(i * i, sq_events[i].value)
        self.assertProtoEquals(graph.as_graph_def(add_shapes=True),
                               acc.Graph())
        self.assertProtoEquals(meta_graph_def, acc.MetaGraph())
Exemplo n.º 3
0
def get_stimulus_response(src_dir,
                          src_dataset,
                          stim_id,
                          boundary=0,
                          if_get_stim=True):
    """Get stimulus-response data for all datasets.

  Args :
    src_dir : Location of all joint embedding datasets.
    src_dataset : Dataset corresponding of a specific stimulus.
    stim_id : string ID of the stimulus.
    boundary : Remove cells within a boundary to the edges.
    if_get_stim : If False, do not load stimulus

  Returns :
    stimulus : Stimulus matrix (Time x dimx x dimy).
    responses : Discretized cell responses (Time x n_cells).
    dimx : X dimension of stimulus.
    dimy : Y dimension of stimulus.
    num_cell_types : number of cell types.
  """

    # Copy data locally.
    # Since gfile does not support reading of large files directly from CNS,
    # we need to copy the data locally first.
    src = os.path.join(src_dir, src_dataset)
    if not gfile.IsDirectory(FLAGS.tmp_dir):
        gfile.MkDir(FLAGS.tmp_dir)
    dst = os.path.join(FLAGS.tmp_dir, src_dataset)
    print('Source %s' % src)
    print('Destination %s' % dst)
    copy_locally(src, dst)

    # Load stimulus-response data.
    if if_get_stim:
        data = h5py.File(os.path.join(dst, 'stimulus.mat'))
        stimulus = np.array(data.get('stimulus'))

        # Make dynamic range of stimuli from -0.5 to 0.5
        stim_min = np.min(stimulus)
        stim_max = np.max(stimulus)
        stimulus -= stim_min
        stimulus /= (stim_max - stim_min)
        stimulus -= 0.5

        # Make the stimuli mean 0
        stimulus -= np.mean(stimulus)

    else:
        stimulus = None

    # Load responses from multiple retinas.
    datasets_list = os.path.join(dst, 'datasets.txt')
    datasets = open(datasets_list, 'r').read()
    training_datasets = [line for line in datasets.splitlines()]

    num_cell_types = 2
    dimx_desired = 80
    dimy_desired = 40
    if stimulus is not None:
        dimx_actual = stimulus.shape[1]
        dimy_actual = stimulus.shape[2]
    else:
        stix_sz = np.int(src_dataset.split('-')[1])
        dimx_actual = np.int(640 / stix_sz)
        dimy_actual = np.int(320 / stix_sz)

    responses = []
    for idata in training_datasets:
        print(idata)
        data_file = os.path.join(dst, idata)
        data = sio.loadmat(data_file)
        data.update({'stimulus_key': stim_id})
        process_dataset(data,
                        dimx_desired,
                        dimy_desired,
                        dimx_actual,
                        dimy_actual,
                        num_cell_types,
                        boundary=boundary)
        data.update({'piece': idata})
        responses += [data]

    if FLAGS.minimize_disk_usage:
        gfile.DeleteRecursively(dst)

    return stimulus, responses, dimx_desired, dimy_desired, num_cell_types
Exemplo n.º 4
0
def _create_test_export_dir(export_dir_base):
    export_dir = _get_timestamped_export_dir(export_dir_base)
    gfile.MkDir(export_dir)
    time.sleep(2)
    return export_dir
def _CreateCleanDirectory(path):
  if gfile.IsDirectory(path):
    gfile.DeleteRecursively(path)
  gfile.MkDir(path)
def _create_test_export_dir(export_dir_base):
    export_dir = saved_model_export_utils.get_timestamped_export_dir(
        export_dir_base)
    gfile.MkDir(export_dir)
    time.sleep(1)
    return export_dir
Exemplo n.º 7
0
def main(argv):

    cell_idx = FLAGS.taskid
    file_list = gfile.ListDirectory(FLAGS.src_dir)
    cell_file = file_list[cell_idx]
    print('Cell file %s' % cell_file)

    # copy data
    dst = os.path.join(FLAGS.tmp_dir, cell_file)

    if not gfile.Exists(dst):
        print('Started Copy')
        src = os.path.join(FLAGS.src_dir, cell_file)
        if not gfile.IsDirectory(FLAGS.tmp_dir):
            gfile.MkDir(FLAGS.tmp_dir)

        gfile.Copy(src, dst)
        print('File copied to destination')

    else:
        print('File exists')

    # load stimulus, response data
    data = sio.loadmat(dst)
    trainMov_filterNSEM = data['trainMov_filterNSEM']
    testMov_filterNSEM = data['testMov_filterNSEM']
    trainSpksNSEM = data['trainSpksNSEM']
    testSpksNSEM = data['testSpksNSEM']
    mask = data['mask']

    trainMov_filterWN = data['trainMov_filterWN']
    testMov_filterWN = data['testMov_filterWN']
    trainSpksWN = data['trainSpksWN']
    testSpksWN = data['testSpksWN']

    # get NSEM stimulus and resposne
    stimulus_WN = np.array(trainMov_filterWN.transpose(), dtype='float32')
    response_WN = np.array(np.squeeze(trainSpksWN), dtype='float32')

    stimulus_NSEM = np.array(trainMov_filterNSEM.transpose(), dtype='float32')
    response_NSEM = np.array(np.squeeze(trainSpksNSEM), dtype='float32')
    print('Prepared data')
    # Do fitting

    # set random seed.
    np.random.seed(23)

    print('Made partitions')

    # Do fitting
    # WN data
    ifrac = 0.8
    tms_train_WN = np.arange(0, np.floor(stimulus_WN.shape[0] * ifrac)).astype(
        np.int)
    tms_test_WN = np.arange(np.floor(stimulus_WN.shape[0] * ifrac), 1 *
                            np.floor(stimulus_WN.shape[0] * 1)).astype(np.int)

    # NSEM data
    ifrac = 0.8

    tms_train_NSEM = np.arange(0, np.floor(stimulus_NSEM.shape[0] *
                                           ifrac)).astype(np.int)
    tms_test_NSEM = np.arange(np.floor(stimulus_NSEM.shape[0] * ifrac),
                              1 * np.floor(stimulus_NSEM.shape[0] * 1)).astype(
                                  np.int)
    '''
  eps = 1e-7
  for Nsub in [1, 2, 3, 4, 5, 7, 10]:
      print('Fitting started')

      # WN fit
      op = jnt_model.Flat_clustering(stimulus_WN, response_WN, Nsub, tms_train_WN, tms_test_WN,
                           steps_max=10000, eps=eps)
      K, b, alpha, lam_log, lam_log_test, fitting_phase, fit_params  = op
      WN_fit = {'K': K, 'b': b,
                'lam_log': lam_log, 'lam_log_test': lam_log_test}
      print('WN fit done')

      # NSEM fit
      # Just fit the scales
      # fit NL + b + Kscale
      K, b, params, loss_log, loss_log_test  = jnt_model.fit_scales(stimulus_NSEM[tms_train_NSEM, :], response_NSEM[tms_train_NSEM],
                                                          stimulus_NSEM[tms_test_NSEM, :], response_NSEM[tms_test_NSEM],
                                                          Ns=Nsub, K=WN_fit['K'], b=WN_fit['b'], params=[1.0, 0.0],
                                                          lr=0.001, eps=eps)
      NSEM_fit_scales = {'K': K, 'b': b, 'nl_params': params,
                         'lam_log': loss_log, 'lam_log_test': loss_log_test}
      print('NSEM scales fit')

      # Fit all params
      K, b, params, loss_log, loss_log_test  = jnt_model.fit_all(stimulus_NSEM[tms_train_NSEM, :], response_NSEM[tms_train_NSEM],
                                                       stimulus_NSEM[tms_test_NSEM, :], response_NSEM[tms_test_NSEM],
                                                       Ns=Nsub,
                                                       K=NSEM_fit_scales['K'], b=NSEM_fit_scales['b'],
                                                       train_phase=3,
                                                       params=NSEM_fit_scales['nl_params'],
                                                       lr=0.001, eps=eps)
      NSEM_fit_full = {'K': K, 'b': b, 'nl_params': params,
                       'lam_log': loss_log, 'lam_log_test': loss_log_test}
      print('NSEM all fit')

      save_dict = {'WN_fit': WN_fit,
                   'NSEM_fit_scales': NSEM_fit_scales,
                   'NSEM_fit_full': NSEM_fit_full}

      pickle.dump(save_dict,
                  gfile.Open(os.path.join(FLAGS.save_path,
                                    'Cell_%s_nsub_%d_suff_%d_jnt.pkl' %
                                    (cell_file, Nsub, 1)), 'w' ))
      print('Saved results')
  '''
    '''
  eps = 1e-7
  for Nsub in [1, 2, 3, 4, 5, 7, 10]:
      print('Fitting started')

      # Fit all params
      K = 2*rng.rand(stimulus_NSEM.shape[1], Nsub)-0.5
      b = 2*rng.rand(Nsub)-0.5

      K, b, params, loss_log, loss_log_test  = jnt_model.fit_all(stimulus_NSEM[tms_train_NSEM, :], response_NSEM[tms_train_NSEM],
                                                       stimulus_NSEM[tms_test_NSEM, :], response_NSEM[tms_test_NSEM],
                                                       Ns=Nsub,
                                                       K=K.astype(np.float32), b=b.astype(np.float32),
                                                       train_phase=3,
                                                       params=[1.0, 0.0],
                                                       lr=0.001, eps=eps)
      NSEM_fit_full = {'K': K, 'b': b, 'nl_params': params,
                       'lam_log': loss_log, 'lam_log_test': loss_log_test}
      print('NSEM all (random) fit')

      save_dict = {'NSEM_fit_full_random': NSEM_fit_full}

      pickle.dump(save_dict,
                  gfile.Open(os.path.join(FLAGS.save_path,
                                    'Cell_%s_nsub_%d_suff_%d_randomly_init.pkl' %
                                    (cell_file, Nsub, 1)), 'w' ))
      print('Saved results')
  '''

    eps = 1e-7
    for Nsub in [1, 2, 3, 4, 5, 7, 10]:
        print('Fitting started')

        # NSEM clustering fit
        op = jnt_model.Flat_clustering(stimulus_NSEM,
                                       response_NSEM,
                                       Nsub,
                                       tms_train_NSEM,
                                       tms_test_NSEM,
                                       steps_max=10000,
                                       eps=eps)
        K, b, alpha, lam_log, lam_log_test, fitting_phase, fit_params = op
        NSEM_clustering = {
            'K': K,
            'b': b,
            'lam_log': lam_log,
            'lam_log_test': lam_log_test
        }
        print('NSEM clustering fit')

        # NSEM fit
        # Just fit the scales
        # fit NL + b + Kscale
        K, b, params, loss_log, loss_log_test = jnt_model.fit_scales(
            stimulus_NSEM[tms_train_NSEM, :],
            response_NSEM[tms_train_NSEM],
            stimulus_NSEM[tms_test_NSEM, :],
            response_NSEM[tms_test_NSEM],
            Ns=Nsub,
            K=NSEM_clustering['K'],
            b=NSEM_clustering['b'],
            params=[1.0, 0.0],
            lr=0.001,
            eps=eps)
        NSEM_fit_scales = {
            'K': K,
            'b': b,
            'nl_params': params,
            'lam_log': loss_log,
            'lam_log_test': loss_log_test
        }
        print('NSEM scales fit')

        # Fit all params
        K, b, params, loss_log, loss_log_test = jnt_model.fit_all(
            stimulus_NSEM[tms_train_NSEM, :],
            response_NSEM[tms_train_NSEM],
            stimulus_NSEM[tms_test_NSEM, :],
            response_NSEM[tms_test_NSEM],
            Ns=Nsub,
            K=NSEM_fit_scales['K'],
            b=NSEM_fit_scales['b'],
            train_phase=3,
            params=NSEM_fit_scales['nl_params'],
            lr=0.001,
            eps=eps)
        NSEM_fit_full = {
            'K': K,
            'b': b,
            'nl_params': params,
            'lam_log': loss_log,
            'lam_log_test': loss_log_test
        }
        print('NSEM all fit')

        save_dict = {
            'NSEM_clustering': NSEM_clustering,
            'NSEM_fit_scales': NSEM_fit_scales,
            'NSEM_fit_full': NSEM_fit_full
        }

        pickle.dump(
            save_dict,
            gfile.Open(
                os.path.join(
                    FLAGS.save_path,
                    'Cell_%s_nsub_%d_suff_%d_NSEM_3_steps.pkl' %
                    (cell_file, Nsub, 1)), 'w'))
        print('Saved results')
Exemplo n.º 8
0
def initialize(sess):
    """Initialize data and model."""
    if FLAGS.jobid >= 0:
        data.log_filename = os.path.join(FLAGS.train_dir,
                                         "log%d" % FLAGS.jobid)
    data.print_out("NN ", newline=False)

    # Set random seed.
    seed = FLAGS.random_seed + max(0, FLAGS.jobid)
    tf.set_random_seed(seed)
    random.seed(seed)
    np.random.seed(seed)

    # Check data sizes.
    assert data.bins
    min_length = 3
    max_length = min(FLAGS.max_length, data.bins[-1])
    assert max_length + 1 > min_length
    while len(data.bins) > 1 and data.bins[-2] > max_length + EXTRA_EVAL:
        data.bins = data.bins[:-1]
    assert data.bins[0] > FLAGS.rx_step
    data.forward_max = max(FLAGS.forward_max, data.bins[-1])
    nclass = min(FLAGS.niclass, FLAGS.noclass)
    data_size = FLAGS.train_data_size if FLAGS.mode == 0 else 1000

    # Initialize data for each task.
    tasks = FLAGS.task.split("-")
    for t in tasks:
        for l in xrange(max_length + EXTRA_EVAL - 1):
            data.init_data(t, l, data_size, nclass)
        data.init_data(t, data.bins[-2], data_size, nclass)
        data.init_data(t, data.bins[-1], data_size, nclass)
        end_size = 4 * 1024 if FLAGS.mode > 0 else 1024
        data.init_data(t, data.forward_max, end_size, nclass)

    # Print out parameters.
    curriculum = FLAGS.curriculum_bound
    msg1 = ("layers %d kw %d h %d kh %d relax %d batch %d noise %.2f task %s" %
            (FLAGS.nconvs, FLAGS.kw, FLAGS.height, FLAGS.kh, FLAGS.rx_step,
             FLAGS.batch_size, FLAGS.grad_noise_scale, FLAGS.task))
    msg2 = "data %d %s" % (FLAGS.train_data_size, msg1)
    msg3 = (
        "cut %.2f pull %.3f lr %.2f iw %.2f cr %.2f nm %d d%.4f gn %.2f %s" %
        (FLAGS.cutoff, FLAGS.pull_incr, FLAGS.lr, FLAGS.init_weight,
         curriculum, FLAGS.nmaps, FLAGS.dropout, FLAGS.max_grad_norm, msg2))
    data.print_out(msg3)

    # Create checkpoint directory if it does not exist.
    checkpoint_dir = os.path.join(
        FLAGS.train_dir,
        "neural_gpu%s" % ("" if FLAGS.jobid < 0 else str(FLAGS.jobid)))
    if not gfile.IsDirectory(checkpoint_dir):
        data.print_out("Creating checkpoint directory %s." % checkpoint_dir)
        gfile.MkDir(checkpoint_dir)

    # Create model and initialize it.
    tf.get_variable_scope().set_initializer(
        tf.uniform_unit_scaling_initializer(factor=1.8 * FLAGS.init_weight))
    model = neural_gpu.NeuralGPU(FLAGS.nmaps, FLAGS.nmaps, FLAGS.niclass,
                                 FLAGS.noclass, FLAGS.dropout, FLAGS.rx_step,
                                 FLAGS.max_grad_norm, FLAGS.cutoff,
                                 FLAGS.nconvs, FLAGS.kw, FLAGS.kh,
                                 FLAGS.height, FLAGS.mode, FLAGS.lr,
                                 FLAGS.pull, FLAGS.pull_incr, min_length + 3)
    data.print_out("Created model.")
    sess.run(tf.initialize_all_variables())
    data.print_out("Initialized variables.")

    # Load model from parameters if a checkpoint exists.
    ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
    if ckpt and gfile.Exists(ckpt.model_checkpoint_path):
        data.print_out("Reading model parameters from %s" %
                       ckpt.model_checkpoint_path)
        model.saver.restore(sess, ckpt.model_checkpoint_path)

    # Check if there are ensemble models and get their checkpoints.
    ensemble = []
    ensemble_dir_list = [d for d in FLAGS.ensemble.split(",") if d]
    for ensemble_dir in ensemble_dir_list:
        ckpt = tf.train.get_checkpoint_state(ensemble_dir)
        if ckpt and gfile.Exists(ckpt.model_checkpoint_path):
            data.print_out("Found ensemble model %s" %
                           ckpt.model_checkpoint_path)
            ensemble.append(ckpt.model_checkpoint_path)

    # Return the model and needed variables.
    return (model, min_length, max_length, checkpoint_dir, curriculum,
            ensemble)
Exemplo n.º 9
0
 def setUpClass(cls):  # pylint: disable=invalid-name
     global bucket, get_oss_path
     bucket = os.getenv("OSS_FS_TEST_BUCKET")
     get_oss_path = lambda p: os.path.join("oss://" + bucket, "oss_fs_test",
                                           p)
     gfile.MkDir(get_oss_path(""))
Exemplo n.º 10
0
    def testScalarsRealistically(self):
        """Test accumulator by writing values and then reading them."""
        def FakeScalarSummary(tag, value):
            value = tf.Summary.Value(tag=tag, simple_value=value)
            summary = tf.Summary(value=[value])
            return summary

        directory = os.path.join(self.get_temp_dir(), 'values_dir')
        if gfile.IsDirectory(directory):
            gfile.DeleteRecursively(directory)
        gfile.MkDir(directory)

        writer = tf.train.SummaryWriter(directory, max_queue=100)
        graph_def = tf.GraphDef(node=[tf.NodeDef(name='A', op='Mul')])
        # Add a graph to the summary writer.
        writer.add_graph(graph_def)

        # Write a bunch of events using the writer
        for i in xrange(30):
            summ_id = FakeScalarSummary('id', i)
            summ_sq = FakeScalarSummary('sq', i * i)
            writer.add_summary(summ_id, i * 5)
            writer.add_summary(summ_sq, i * 5)
        writer.flush()

        # Verify that we can load those events properly
        acc = ea.EventAccumulator(directory)
        acc.Reload()
        self.assertTagsEqual(
            acc.Tags(), {
                ea.IMAGES: [],
                ea.SCALARS: ['id', 'sq'],
                ea.HISTOGRAMS: [],
                ea.COMPRESSED_HISTOGRAMS: [],
                ea.GRAPH: True
            })
        id_events = acc.Scalars('id')
        sq_events = acc.Scalars('sq')
        self.assertEqual(30, len(id_events))
        self.assertEqual(30, len(sq_events))
        for i in xrange(30):
            self.assertEqual(i * 5, id_events[i].step)
            self.assertEqual(i * 5, sq_events[i].step)
            self.assertEqual(i, id_events[i].value)
            self.assertEqual(i * i, sq_events[i].value)

        # Write a few more events to test incremental reloading
        for i in xrange(30, 40):
            summ_id = FakeScalarSummary('id', i)
            summ_sq = FakeScalarSummary('sq', i * i)
            writer.add_summary(summ_id, i * 5)
            writer.add_summary(summ_sq, i * 5)
        writer.flush()

        # Verify we can now see all of the data
        acc.Reload()
        self.assertEqual(40, len(id_events))
        self.assertEqual(40, len(sq_events))
        for i in xrange(40):
            self.assertEqual(i * 5, id_events[i].step)
            self.assertEqual(i * 5, sq_events[i].step)
            self.assertEqual(i, id_events[i].value)
            self.assertEqual(i * i, sq_events[i].value)
        self.assertProtoEquals(graph_def, acc.Graph())
Exemplo n.º 11
0
def main(argv):

    # copy data
    dst = os.path.join(FLAGS.tmp_dir, 'Off_parasol.mat')

    if not gfile.Exists(dst):
        print('Started Copy')
        src = os.path.join(FLAGS.src_dir, 'Off_parasol.mat')
        if not gfile.IsDirectory(FLAGS.tmp_dir):
            gfile.MkDir(FLAGS.tmp_dir)

        gfile.Copy(src, dst)
        print('File copied to destination')

    else:
        print('File exists')

    # load stimulus
    file = h5py.File(dst, 'r')

    # Load Masked movie
    data = file.get('maskedMovdd')
    stimulus = np.array(data)
    # load cell response
    cells = file.get('cells')

    ttf_log = file.get('ttf_log')
    ttf_avg = file.get('ttf_avg')

    # Load spike Response of cells
    data = file.get('Y')
    responses = np.array(data)

    # get mask
    total_mask_log = file.get('totalMaskAccept_log')

    print('Got data')

    # get cell and mask
    nsub_list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
    if FLAGS.taskid < 107 * len(nsub_list):
        cell_idx = [np.int(np.floor(FLAGS.taskid / len(nsub_list)))]
        cellid = cells[np.int(np.floor(FLAGS.taskid / len(nsub_list)))]
        Nsub = nsub_list[FLAGS.taskid % len(nsub_list)]
        partition_list = np.arange(10)

    elif FLAGS.taskid < 107 * len(nsub_list) + 37 * 10:
        cell_idx = [39, 42, 44, 45]  #[np.int(FLAGS.taskid)]
        cellid = cells[cell_idx]
        cellid = np.squeeze(cellid)
        task_id_effective = FLAGS.taskid - 107 * len(nsub_list)
        partition_list = [task_id_effective % 10]
        nsub_list_pop = np.arange(4, 41)
        Nsub = nsub_list_pop[np.int(np.floor(task_id_effective / 10))]

    elif FLAGS.taskid < 107 * len(nsub_list) + 37 * 10 + 19 * 10:
        cell_idx = [39, 42]  #[np.int(FLAGS.taskid)]
        cellid = cells[cell_idx]
        cellid = np.squeeze(cellid)
        task_id_effective = FLAGS.taskid - 107 * len(nsub_list) - 37 * 10
        partition_list = [task_id_effective % 10]
        nsub_list_pop = np.arange(2, 21)
        Nsub = nsub_list_pop[np.int(np.floor(task_id_effective / 10))]

    elif FLAGS.taskid < 107 * len(nsub_list) + 37 * 10 + 19 * 10 + 19 * 10:
        cell_idx = [44, 45]  #[np.int(FLAGS.taskid)]
        cellid = cells[cell_idx]
        cellid = np.squeeze(cellid)
        task_id_effective = FLAGS.taskid - 107 * len(
            nsub_list) - 37 * 10 - 19 * 10
        partition_list = [task_id_effective % 10]
        nsub_list_pop = np.arange(2, 21)
        Nsub = nsub_list_pop[np.int(np.floor(task_id_effective / 10))]

    print(cell_idx)
    print(Nsub)

    mask = (total_mask_log[cell_idx, :].sum(0) != 0)
    mask_matrix = np.reshape(mask != 0, [40, 80])

    # make mask bigger - add one row one left/right
    r, c = np.where(mask_matrix)
    mask_matrix[r.min() - 1:r.max() + 1, c.min() - 1:c.max() + 1] = True
    mask = np.ndarray.flatten(mask_matrix)

    stim_use = stimulus[:, mask]
    resp_use = responses[:, cell_idx]

    print('Prepared data')

    # get last 10% as test data
    np.random.seed(23)

    frac_test = 0.1
    tms_test = np.arange(np.floor(stim_use.shape[0] * (1 - frac_test)),
                         1 * np.floor(stim_use.shape[0])).astype(np.int)

    # Random partitions
    n_partitions = 10
    tms_train_validate = np.arange(
        0, np.floor(stim_use.shape[0] * (1 - frac_test))).astype(np.int)

    frac_validate = 0.1

    partitions = []
    for ipartition in range(n_partitions):
        perm = np.random.permutation(tms_train_validate)
        tms_train = perm[0:np.floor((1 - frac_validate) * perm.shape[0])]
        tms_validate = perm[np.floor((1 - frac_validate) *
                                     perm.shape[0]):perm.shape[0]]

        partitions += [{
            'tms_train': tms_train,
            'tms_validate': tms_validate,
            'tms_test': tms_test
        }]

    print('Made partitions')

    # Do fitting
    # tms_train = np.arange(0, np.floor(stim_use.shape[0] * 0.8)).astype(np.int)
    # tms_test = np.arange(np.floor(stim_use.shape[0] * 0.8),
    #                       1 * np.floor(stim_use.shape[0] * 0.9)).astype(np.int)

    for ipartition in partition_list:
        print(cell_idx, cellid, Nsub)

        ss = '_'.join([str(ic) for ic in cellid])

        save_filename = os.path.join(
            FLAGS.save_path,
            'Cell_%s_nsub_%d_part_%d_jnt.pkl' % (ss, Nsub, ipartition))
        if not gfile.Exists(save_filename):
            print('Fitting started')
            op = jnt_model.Flat_clustering_jnt(
                stim_use,
                resp_use,
                Nsub,
                partitions[ipartition]['tms_train'],
                partitions[ipartition]['tms_validate'],
                steps_max=10000,
                eps=1e-9)

            # op = jnt_model.Flat_clustering_jnt(stim_use, resp_use, Nsub,
            #                                   tms_train,
            #                                   tms_test,
            #                                   steps_max=10000, eps=1e-9)

            K, b, alpha, lam_log, lam_log_test, fitting_phase, fit_params = op

            print('Fitting done')
            save_dict = {
                'K': K,
                'b': b,
                'lam_log': lam_log,
                'lam_log_test': lam_log_test,
                'fitting_phase': fitting_phase,
                'fit_params': fit_params
            }
            pickle.dump(save_dict, gfile.Open(save_filename, 'w'))
            print('Saved results')
Exemplo n.º 12
0
 def _base_export_fn(unused_estimator,
                     export_dir_base,
                     unused_checkpoint_path=None):
     base_path = os.path.join(export_dir_base, "e1")
     gfile.MkDir(base_path)
     return base_path
Exemplo n.º 13
0
 def _post_export_fn(orig_path, new_path):
     assert orig_path.endswith("/e1")
     post_export_path = os.path.join(new_path, "rewrite")
     gfile.MkDir(post_export_path)
     return post_export_path
Exemplo n.º 14
0
def main(argv):

    # parse task params
    # read line corresponding to task
    with gfile.Open(FLAGS.task_params_file, 'r') as f:
        for _ in range(FLAGS.taskid + 1):
            line = f.readline()

    print(line)

    # get task parameters by parsing the line.
    line_split = line.split(';')
    cells = gfile.ListDirectory(FLAGS.src_dir)

    cell_idx = line_split[0]
    cell_idx = cell_idx[1:-1].split(',')

    nsub = int(line_split[1])
    projection_type = line_split[2]
    lam_proj = float(line_split[3])
    ipartition = int(line_split[4][:-1])

    # Copy data for all the data
    cell_str_final = ''
    dst_log = []
    for icell in cell_idx:
        icell = int(icell)
        cell_string = cells[icell]
        cell_str_final += cell_string

        # copy data for the corresponding task
        dst = os.path.join(FLAGS.tmp_dir, cell_string)

        dst_log += [dst]
        if not gfile.Exists(dst):
            print('Started Copy')
            src = os.path.join(FLAGS.src_dir, cell_string)
            if not gfile.IsDirectory(FLAGS.tmp_dir):
                gfile.MkDir(FLAGS.tmp_dir)

            gfile.Copy(src, dst)
            print('File %s copied to destination' % cell_string)

        else:
            print('File %s exists' % cell_string)

    # Load data for different cells
    stim_log = []
    resp_log = []
    mask_matrix_log = []
    for dst in dst_log:
        print('Loading %s' % dst)
        data = h5py.File(dst)
        stimulus = np.array(data.get('stimulus'))
        stimulus = stimulus[:-1, :]  # drop the last frame so that it's
        # the same size as the binned spike train

        response = np.squeeze(np.array(data.get('response')))
        response = np.expand_dims(response, 1)
        mask_matrix = np.array(data.get('mask'))

        stim_log += [stimulus]
        resp_log += [response]
        mask_matrix_log += [mask_matrix]

    # Prepare for fitting across multiple cells
    # Get total mask
    mask_matrix_pop = np.array(mask_matrix_log).sum(0) > 0

    # Get total response.
    resp_len = np.min([resp_log[icell].shape[0] for icell in range(4)])
    response_pop = np.zeros((resp_len, len(resp_log)))
    for icell in range(len(resp_log)):
        response_pop[:, icell] = resp_log[icell][:resp_len, 0]

    # Get total stimulus.
    stimulus_pop = np.zeros((resp_len, mask_matrix_pop.sum()))
    # Find non-zero locations for each mask element
    nnz_log = [np.where(imask > 0) for imask in mask_matrix_log]
    nnz_pop = np.where(mask_matrix_pop > 0)

    for ipix in range(mask_matrix_pop.sum()):
        print(ipix)
        r = nnz_pop[0][ipix]
        c = nnz_pop[1][ipix]

        stim_pix = np.zeros(resp_len)
        nc = 0
        for icell in range(len(nnz_log)):
            pix_cell_bool = np.logical_and(nnz_log[icell][0] == r,
                                           nnz_log[icell][1] == c)
            if pix_cell_bool.sum() > 0:
                pix_cell = np.where(pix_cell_bool > 0)[0][0]
                stim_pix += stim_log[icell][:resp_len, pix_cell]
                nc += 1

        if nc == 0:
            print('Error')

        stim_pix = stim_pix / nc
        stimulus_pop[:, ipix] = stim_pix

    # Fit with a given number of subunits
    print('Starting fitting')
    get_su_nsub(stimulus_pop, response_pop, mask_matrix_pop, cell_str_final,
                nsub, projection_type, lam_proj, ipartition)
def main(argv):

    # copy WN data
    dst = os.path.join(FLAGS.tmp_dir, 'Off_parasol.mat')

    if not gfile.Exists(dst):
        print('Started Copy')
        src = os.path.join(FLAGS.src_dir, 'Off_parasol.mat')
        if not gfile.IsDirectory(FLAGS.tmp_dir):
            gfile.MkDir(FLAGS.tmp_dir)

        gfile.Copy(src, dst)
        print('File copied to destination')

    else:
        print('File exists')

    # load stimulus
    file = h5py.File(dst, 'r')

    # Load Masked movie
    data = file.get('maskedMovdd')
    stimulus = np.array(data)

    # load cell response
    cells = file.get('cells')
    cells = np.array(cells)
    cells = np.squeeze(cells)

    ttf_log = file.get('ttf_log')
    ttf_avg = file.get('ttf_avg')

    # Load spike Response of cells
    data = file.get('Y')
    responses = np.array(data)

    # get mask
    total_mask_log = np.array(file.get('totalMaskAccept_log'))

    print('Got WN data')

    # Get NSEM data
    dat_nsem_mov = sio.loadmat(
        gfile.Open(
            '/home/bhaishahster/nsem_data/'
            'pc2015_10_29_2/NSinterval_30_025.mat', 'r'))
    stimulus_nsem = dat_nsem_mov['mov']

    stimulus_nsem = np.transpose(stimulus_nsem, [2, 1, 0])
    stimulus_nsem = np.reshape(stimulus_nsem, [stimulus_nsem.shape[0], -1])

    dat_nsem_resp = sio.loadmat(
        gfile.Open(
            '/home/bhaishahster/nsem_data/'
            'pc2015_10_29_2/OFF_parasol_trial_resp'
            '_data_NSEM_data039.mat', 'r'))
    responses_nsem = dat_nsem_resp['resp_cell_log']
    print('Git NSEM data')

    # read line corresponding to task
    with gfile.Open(FLAGS.task_params_file, 'r') as f:
        for itask in range(FLAGS.taskid + 1):
            line = f.readline()
    line = line[:-1]  # Remove \n from end.
    print(line)

    # get task parameters by parsing the lines
    line_split = line.split(';')
    cell_idx = line_split[0]
    cell_idx = cell_idx[1:-1].split(',')
    cell_idx = [int(i) for i in cell_idx]

    Nsub = int(line_split[1])
    projection_type = line_split[2]
    lam_proj = float(line_split[3])
    ipartition = int(line_split[4])

    cell_idx_mask = cell_idx

    ##

    print(cell_idx)
    print(Nsub)
    print(cell_idx_mask)

    mask = (total_mask_log[cell_idx_mask, :].sum(0) != 0)
    mask_matrix = np.reshape(mask != 0, [40, 80])

    # make mask bigger - add one row one left/right
    r, c = np.where(mask_matrix)
    mask_matrix[r.min() - 1:r.max() + 1, c.min() - 1:c.max() + 1] = True
    neighbor_mat = su_model.get_neighbormat(mask_matrix, nbd=1)
    mask = np.ndarray.flatten(mask_matrix)

    ## WN preprocess
    stim_use_wn = stimulus[:, mask]
    resp_use_wn = responses[:, cell_idx]

    # get last 10% as test data
    np.random.seed(23)

    frac_test = 0.1
    tms_test = np.arange(np.floor(stim_use_wn.shape[0] * (1 - frac_test)),
                         1 * np.floor(stim_use_wn.shape[0])).astype(np.int)

    # Random partitions
    n_partitions = 10
    tms_train_validate = np.arange(
        0, np.floor(stim_use_wn.shape[0] * (1 - frac_test))).astype(np.int)

    frac_validate = 0.1

    partitions_wn = []
    for _ in range(n_partitions):
        perm = np.random.permutation(tms_train_validate)
        tms_train = perm[0:np.floor((1 - frac_validate) * perm.shape[0])]
        tms_validate = perm[np.floor((1 - frac_validate) *
                                     perm.shape[0]):perm.shape[0]]

        partitions_wn += [{
            'tms_train': tms_train,
            'tms_validate': tms_validate,
            'tms_test': tms_test
        }]

    print('Made partitions')
    print('WN data preprocessed')

    ## NSEM preprocess
    stim_use_nsem = stimulus_nsem[:, mask]
    ttf_use = np.array(ttf_log[cell_idx, :]).astype(np.float32).squeeze()
    stim_use_nsem = filterMov_time(stim_use_nsem, ttf_use)
    resp_use_nsem = np.array(responses_nsem[cell_idx][0,
                                                      0]).astype(np.float32).T

    # Remove first 30 frames due to convolution artifact.
    stim_use_nsem = stim_use_nsem[30:, :]
    resp_use_nsem = resp_use_nsem[30:, :]

    n_trials = resp_use_nsem.shape[1]
    t_nsem = resp_use_nsem.shape[0]
    tms_train_1tr_nsem = np.arange(np.floor(t_nsem / 2))
    tms_test_1tr_nsem = np.arange(np.ceil(t_nsem / 2), t_nsem)

    # repeat in time dimension, divide into training and testing.
    stim_use_nsem = np.tile(stim_use_nsem.T, n_trials).T
    resp_use_nsem = np.ndarray.flatten(resp_use_nsem.T)
    resp_use_nsem = np.expand_dims(resp_use_nsem, 1)

    tms_train_nsem = np.array([])
    tms_test_nsem = np.array([])
    for itrial in range(n_trials):
        tms_train_nsem = np.append(tms_train_nsem,
                                   tms_train_1tr_nsem + itrial * t_nsem)
        tms_test_nsem = np.append(tms_test_nsem,
                                  tms_test_1tr_nsem + itrial * t_nsem)
    tms_train_nsem = tms_train_nsem.astype(np.int)
    tms_test_nsem = tms_test_nsem.astype(np.int)

    print('NSEM data preprocessed')

    ss = '_'.join([str(cells[ic]) for ic in cell_idx])

    save_filename = os.path.join(
        FLAGS.save_path, 'Cell_%s_nsub_%d_%s_%.3f_part_%d_jnt.pkl' %
        (ss, Nsub, projection_type, lam_proj, ipartition))

    save_filename_partial = os.path.join(
        FLAGS.save_path_partial, 'Cell_%s_nsub_%d_%s_%.3f_part_%d_jnt.pkl' %
        (ss, Nsub, projection_type, lam_proj, ipartition))

    ## Do fitting
    # Fit SU on WN
    print('Fitting started on WN')
    op = su_model.Flat_clustering_jnt(
        stim_use_wn,
        resp_use_wn,
        Nsub,
        partitions_wn[ipartition]['tms_train'],
        partitions_wn[ipartition]['tms_validate'],
        steps_max=10000,
        eps=1e-9,
        projection_type=projection_type,
        neighbor_mat=neighbor_mat,
        lam_proj=lam_proj,
        eps_proj=0.01,
        save_filename_partial=save_filename_partial,
        fitting_phases=[1])

    _, _, alpha, lam_log_wn, lam_log_test_wn, fitting_phase, fit_params_wn = op
    print('Fitting done on WN')

    # Fit on NSEM
    op = su_model.fit_scales(stim_use_nsem[tms_train_nsem, :],
                             resp_use_nsem[tms_train_nsem, :],
                             stim_use_nsem[tms_test_nsem, :],
                             resp_use_nsem[tms_test_nsem, :],
                             Ns=Nsub,
                             K=fit_params_wn[0][0],
                             b=fit_params_wn[0][1],
                             params=fit_params_wn[0][2],
                             lr=0.1,
                             eps=1e-9)

    K_nsem, b_nsem, nl_params_nsem, lam_log_nsem, lam_log_test_nsem = op

    # Collect results and save
    fit_params = fit_params_wn + [[K_nsem, b_nsem, nl_params_nsem]]
    lam_log = [lam_log_wn, np.array(lam_log_nsem)]
    lam_log_test = [lam_log_test_wn, np.array(lam_log_test_nsem)]

    save_dict = {
        'lam_log': lam_log,
        'lam_log_test': lam_log_test,
        'fit_params': fit_params
    }
    pickle.dump(save_dict, gfile.Open(save_filename, 'w'))
    print('Saved results')