示例#1
0
def run_norm_sift(dirname):
  sift_dir = path(dirname)
  pbar = ProgressBar(len(sift_dir.listdir()))
  for ii, sift_file in enumerate(sift_dir.listdir()):
    pbar.animate(ii)
    (kp, desc) = load_from_disk(sift_file)
    normalize_sift(desc, inplace=True)
    save_to_disk(kp, desc, sift_file)
示例#2
0
def compute_dff(data, percentile=8., window_size=1., step_size=.025, subtract_minimum=True, pad_mode='edge'):
    """Compute delta-f-over-f

    Computes the percentile-based delta-f-over-f along the 0th axis of the supplied data.

    Parameters
    ----------
    data : np.ndarray
        n-dimensional data (DFF is taken over axis 0)
    percentile : float 
        percentile of data window to be taken as F0
    window_size : float
        size of window to determine F0, in seconds
    step_size : float
        size of steps used to determine F0, in seconds
    subtract_minimum : bool
        substract minimum value from data before computing
    pad_mode : str 
        mode argument for np.pad, used to specify F0 determination at start of data

    Returns
    -------
    Data of the same shape as input, transformed to DFF
    """
    data = data.copy()

    window_size = int(window_size*data.fs)
    step_size = int(step_size*data.fs)

    if step_size<1:
        warnings.warn('Requested a step size smaller than sampling interval. Using sampling interval.')
        step_size = 1.

    if subtract_minimum:
        data -= data.min()
     
    pad_size = window_size - 1
    pad = ((pad_size,0),) + tuple([(0,0) for _ in xrange(data.ndim-1)])
    padded = np.pad(data, pad, mode=pad_mode)

    out_size = ((len(padded) - window_size) // step_size) + 1
    pbar = ProgressBar(maxval=out_size).start()
    f0 = []
    for idx,win in enumerate(sw(padded, ws=window_size, ss=step_size)):
        f0.append(np.percentile(win, percentile, axis=0))
        pbar.update(idx)
    f0 = np.repeat(f0, step_size, axis=0)[:len(data)]
    pbar.finish()

    return (data-f0)/f0 
示例#3
0
 def consumer(queue, rep, total_files, print_progress):
     if print_progress:
         bar = ProgressBar(total_files, width=50)
             
     counter = 0
     try:
         while counter < total_files: # run until all images are fetched
             thread = queue.get(True)
             thread.join()
             if thread.has_result:
                 rep.add_site(thread.tag, thread.id, thread.page)
                 rep.add_image(thread.tag, thread.id, thread.image)
                 counter += 1
                 if print_progress:
                     bar.add()
             else:
                 total_files -= 1
                 if print_progress:
                     bar = ProgressBar(total_files, width=50)
                     bar.add(counter)
 
             if print_progress:
                 sys.stdout.write("%i%% %r fetched %i of %i \r" %( counter*100/total_files, bar, counter, total_files))
                 sys.stdout.flush()
     except KeyboardInterrupt:
         raise
示例#4
0
def load_bow(data_annos, config):
  features = np.empty(shape=[len(data_annos), 
                             config.SIFT.BoW.num_clusters * 
                             len(config.SIFT.pool_boxes)])
  progress = ProgressBar(len(data_annos))
  for ii in range(len(data_annos)):
    img_name = data_annos.iloc[ii]['basename']
    img_name = os.path.splitext(img_name)[0]
    hist_filename = os.path.join(config.SIFT.BoW.hist_dir,
                                 img_name) + '_hist.dat'
    hist = load(hist_filename)
    features[ii, :] = hist
    progress.animate(ii)
  print('')

  return features
    def validation(self, img_query, img_database, R=100):
        C_tmp = self.sess.run(self.C)
        print("%s #validation# start validation")
        query_batch = int(ceil(img_query.n_samples / self.batch_size))
        print("%s #validation# totally %d query in %d batches" %
              (datetime.now(), img_query.n_samples, query_batch))
        if self.console_log:
            bar = ProgressBar(total=query_batch)
        for i in xrange(query_batch):
            images, labels, codes = img_query.next_batch(self.batch_size)

            output, loss = self.sess.run([self.img_last_layer, self.cos_loss],
                                         feed_dict={
                                             self.img: images,
                                             self.img_label: labels
                                         })
            img_query.feed_batch_output(self.batch_size, output)
            if False:
                bar.move('Cosine Loss: %s' % loss)
            else:
                print('Cosine Loss: %s' % loss)

        database_batch = int(ceil(img_database.n_samples / self.batch_size))
        print("%s #validation# totally %d database in %d batches" %
              (datetime.now(), img_database.n_samples, database_batch))
        if self.console_log:
            bar = ProgressBar(total=database_batch)
        for i in xrange(database_batch):
            images, labels, codes = img_database.next_batch(self.batch_size)

            output, loss = self.sess.run([self.img_last_layer, self.cos_loss],
                                         feed_dict={
                                             self.img: images,
                                             self.img_label: labels
                                         })
            img_database.feed_batch_output(self.batch_size, output)
            #print output[:10, :10]
            if False:
                bar.move('Cosine Loss: %s' % loss)
            else:
                print('Cosine Loss: %s' % loss)

        self.update_codes_batch(img_query, self.code_batch_size)
        self.update_codes_batch(img_database, self.code_batch_size)

        print("%s #validation# calculating MAP@%d" % (datetime.now(), R))
        C_tmp = self.sess.run(self.C)
        ## save features and codes
        self.save_codes(img_database, img_query, C_tmp)

        mAPs = MAPs_CQ(C_tmp, self.subspace_num, self.subcenter_num, R)
        return {
            'i2i_nocq': mAPs.get_mAPs_by_feature(img_database, img_query),
            'i2i_AQD': mAPs.get_mAPs_AQD(img_database, img_query),
            'i2i_SQD': mAPs.get_mAPs_SQD(img_database, img_query),
        }
def create_flipped_images(train_annos, config):
  flipped_annos = train_annos.copy()
  
  # Create new ids for the flipped images
  flipped_annos.index = 1e5 + flipped_annos.index
  
  cache_dir = config.cache_dir
  fp_suffix = config.flip_suffix
  rel_to_cache = path(config.dataset.main_path).relpathto(config.cache_dir)
  
  n_imgs = train_annos.shape[0]
  pbar = ProgressBar(n_imgs)
  print('Creating flipped copies of train images')
  for ii in range(n_imgs):
    parts = cache_dir.joinpath(train_annos.basename.iloc[ii]).splitext()
    flipped_file = parts[0] + fp_suffix + parts[1]
#     print "flipped_file: ", flipped_file
    
    img_file = train_annos.img_path.iloc[ii] 
    img = Image.open(img_file)
    (width, height) = img.size
    
    # We might need to create the flipped image if it is not in 
    # cache already.
    if not flipped_file.isfile():
      f_img = img.transpose(Image.FLIP_LEFT_RIGHT)
      f_img.save(flipped_file)
      
    # Modify the annotations for it
    flipped_annos.img_path.iloc[ii] = flipped_file
    flipped_annos.basename.iloc[ii] = flipped_file.basename()
    box = (train_annos.iloc[ii].xmin, train_annos.iloc[ii].ymin,
           train_annos.iloc[ii].xmax, train_annos.iloc[ii].ymax)
    (xmin, ymin, xmax, ymax) = flip_box_LR(box, width)
    flipped_annos.xmin.iloc[ii] = xmin
    flipped_annos.xmax.iloc[ii] = xmax
    flipped_annos.ymin.iloc[ii] = ymin
    flipped_annos.ymax.iloc[ii] = ymax
    
    pbar.animate(ii)
    
  print ''
    
  return pd.concat([train_annos, flipped_annos], axis=0)
示例#7
0
class SharedProgressBar(object):
    def __init__(self, number_of_items):
        self.bar = ProgressBar(0, number_of_items, 77)
        self.number_of_items = number_of_items
        self.counter = 0
        self.old_bar_string = ""

    def update(self):
        self.counter += 1
        if self.old_bar_string != str(self.bar):
            sys.stdout.write(str(self.bar) + '\r')
            sys.stdout.flush()
            self.old_bar_string = str(self.bar)
        self.bar.updateAmount(self.counter)

    def flush(self):
        self.bar.updateAmount(self.number_of_items)
        sys.stdout.write(str(self.bar) + '\r')
        sys.stdout.flush()
示例#8
0
 def create_attrib_res_on_images(self, data_annos, features=None):
   '''
   Calculates the predicion of all attribute classifiers on training images.
   This table can be used to caclulate all the Conditional Probability tables
   for the Bayes Net.
   '''
   if self.use_gt:
     return None, None
   
   
   # Define some conviniece pointers 
   config = self.config
   attrib_classifiers = self.attrib_clfs
   
   if features is None:
     print "Load image Bow histograms from disk"
     features = Bow.load_bow(data_annos, config)
 
   print "Apply attribute classifiers on images"
   res = {}
   res_descrete = {}
   pbar = ProgressBar(len(attrib_classifiers))
   for ii in range(len(attrib_classifiers)):
     attrib_clf = attrib_classifiers[ii]
     curr_res = attrib_clf.decision_function(features,
                                             use_prob=config.attribute.use_prob)
     curr_res_d = attrib_clf.predict(features)
     
     res[attrib_clf.name] = curr_res.reshape(len(curr_res))
     res_descrete[attrib_clf.name] = curr_res_d.reshape(len(curr_res_d))
     pbar.animate(ii)
   print ''
 
   res = pd.DataFrame(data=res, index=data_annos.index)
   res_descrete = pd.DataFrame(data=res_descrete, index=data_annos.index)
 
 
   res = pd.concat([res, data_annos.ix[:, ['class_index']]], axis=1)
   res_descrete = pd.concat([res_descrete, data_annos.ix[:, ['class_index']]], axis=1)
   
   
   return res, res_descrete
示例#9
0
    def stitchAllBlobs(slidelist, quiet=True, debug=False):
        t_start_stitching = time.time()
        printl('')
        for slide_num, slide in enumerate(slidelist[:-1]):
            # Skipping last slide, because pairing go from lower slide to upper slide, so it's already processed with the second to last slide
            # IE blob2ds in the last slide are partners to the previous slide's blob2ds, and have no direct possible partners of their own
            t_start_stitching_this_slide = time.time()
            printl('Stitching %s blob2ds from slide #%s/%s to %s blob2ds from slide #%s/%s' % (len(slide.blob2dlist), slide_num + 1,
                len(slidelist), len(slidelist[slide_num+1].blob2dlist), str(slide_num + 2), len(slidelist)), end=' ')

            progress = ProgressBar(max_val=len(slide.blob2dlist), increments=20,
                                   symbol='.')  # Note actually more responsive to do based on blob than # of pixels, due to using only a subset to stitch
            for b_num, blob1 in enumerate(slide.blob2dlist):
                blob1 = Blob2d.get(blob1)
                if len(blob1.possible_partners) > 0:
                    if debug:
                        printl('  Starting on a new blob from bloblist:' + str(blob1) + ' which has:' + str(
                            len(blob1.possible_partners)) + ' possible partners')
                for b2_num, blob2 in enumerate(blob1.possible_partners):
                    blob2 = Blob2d.get(blob2)
                    if debug:
                        printl('   Comparing to blob2:' + str(blob2))
                    new_stitch = Pairing(blob1.id, blob2.id, 1.1, 36, quiet=quiet) # TODO use this to assign ids to pairings
                progress.update(b_num, set_val=True)

            if quiet and not debug:
                progress.finish()
                print_elapsed_time(t_start_stitching_this_slide, time.time(), prefix='took')
        print_elapsed_time(t_start_stitching, time.time(), prefix='Stitching all slides took', endline=False)
        printl(' total')
示例#10
0
    def fit(self, x, y, steps=0, batch_size=32):
        num_batches = x.shape[0] // batch_size

        for i, p in enumerate(self.particles):
            local_score = p.get_score(x, y)

            if local_score < self.global_best_score:
                self.global_best_score = local_score
                self.global_best_weights = p.get_best_weights()

        print "PSO -- Initial best score {:0.4f}".format(self.global_best_score)

        bar = ProgressBar(steps, updates=20)

        for i in range(steps):
            for j in range(num_batches):
                x_ = x[j*batch_size:(j+1)*batch_size,:]
                y_ = y[j*batch_size:(j+1)*batch_size]

                for p in self.particles:
                    local_score = p.step(x_, y_, self.global_best_weights)

                    if local_score < self.global_best_score:
                        self.global_best_score = local_score
                        self.global_best_weights = p.get_best_weights()

            bar.update(i)

        bar.done()
示例#11
0
    def loadFromFile(self, file_path):
        if self.log is not None:
            self.log.log("Loading DFT data")
            self.log.indent()

            self.log.log("File = %s" % (file_path))

        with open(file_path, 'r') as file:
            text = file.read()

        text = text.rstrip()
        lines = text.split("\n")

        progress = ProgressBar("Poscar Files ",
                               22,
                               len(lines),
                               update_every=50)
        progress.estimate = False
        # This code originally had validation checks for all values.
        # For now, they have been removed. Experience using the program
        # for quite a while has led me to believe that they are uneccessary.

        start_line = 0
        while start_line < len(lines):
            # We need to know the number of atoms in the file
            # before we can send the proper string of text to
            # the parsing function.

            atoms_in_struct = int(lines[start_line + 5])

            base = start_line
            stride = base + 8 + atoms_in_struct
            structure_lines = lines[base:stride]

            struct = PoscarStructure(structure_lines, self.e_shift)
            self.n_atoms += struct.n_atoms
            self.structures.append(struct)

            if struct.comment not in self.all_comments:
                self.all_comments.append(struct.comment)

            start_line += 8 + atoms_in_struct
            progress.update(start_line)

            self.n_structures = len(self.structures)

        progress.finish()
        self.loaded = True

        if self.log is not None:
            self.log.log("Atoms      Loaded = %i" % self.n_atoms)
            self.log.log("Structures Loaded = %i" % self.n_structures)
            self.log.unindent()

        return self
示例#12
0
    def writeToFile(self, file_path):
        if self.log is not None:
            self.log.log("Writing Training Set to File")
            self.log.indent()
            self.log.log("File = %s" % (file_path))

        # 50 Kb buffer because these files are always large. This should
        # make the write a little faster.
        with open(file_path, 'w', 1024 * 50) as file:
            file.write(self.config.toFileString(prepend_comment=True))
            file.write(' # %i - Potential Type\n' % (1))
            file.write(' # %i - Number of Structures\n' % (self.n_structures))
            file.write(' # %i - Number of Atoms\n' % (self.n_atoms))
            file.write(' # ATOM-ID GROUP-NAME GROUP_ID STRUCTURE_ID ')
            file.write('STRUCTURE_Natom STRUCTURE_E_DFT STRUCTURE_Vol\n')

            progress = ProgressBar("Writing LSParams ",
                                   22,
                                   self.n_atoms,
                                   update_every=50)
            progress.estimate = False

            atom_idx = 0
            for struct in self.structures:
                for training_input in struct:

                    file.write(
                        'ATOM-%i %s %i %i %i %.6E %.6E\n' %
                        (atom_idx, training_input.group_name,
                         training_input.group_id, training_input.structure_id,
                         training_input.structure_n_atoms,
                         training_input.structure_energy,
                         training_input.structure_volume))

                    current_params = training_input.structure_params
                    params_strs = ['%.6E' % g for g in current_params]
                    params_strs = ' '.join(params_strs)
                    file.write('Gi  %s\n' % (params_strs))

                    atom_idx += 1

                progress.update(atom_idx)

            progress.finish()
            file.write('\n')

        if self.log is not None:
            self.log.log("Time Elapsed = %ss" % progress.ttc)
            self.log.unindent()
示例#13
0
def eval(model,
         train_data,
         train_label,
         train_candi,
         origin_labels,
         origin_candi,
         batch_size,
         USE_CUDA=True):
    model.eval()
    eval_tool = Rouge()
    if USE_CUDA:
        print("using GPU")
        model = model.cuda()
        train_data = train_data.cuda()
        train_label = train_label.cuda()
        train_candi = train_candi.cuda()
    dataset = torch.utils.data.TensorDataset(train_data, train_candi,
                                             train_label)
    train_iter = torch.utils.data.DataLoader(dataset,
                                             batch_size,
                                             shuffle=False)
    pbar = ProgressBar(n_total=len(train_iter), desc='Evaling')
    pos = 0
    rouge1 = 0
    rouge2 = 0
    rougeL = 0
    for x, y, z in train_iter:
        output = model(x, y, z)
        score = output["score"].detach().cpu().numpy().tolist()[0]
        i = score.index(max(score))
        test = random.randint(0, 200)
        if pos == test:
            print('\n', pos, output)
        #   print(len(score))
        #    print("choose",i)
        summary = origin_candi[pos][i]
        label = origin_labels[pos]
        rouge_score = eval_tool.get_scores(summary, label)
        rouge1 += rouge_score[0]["rouge-1"]['r']
        rouge2 += rouge_score[0]["rouge-2"]['r']
        rougeL += rouge_score[0]["rouge-l"]['r']
        #pbar(pos, {'index':pos })
        pos += 1
    print("\nROUGE1 Recall ", rouge1 / pos)
    print("ROUGE2 Recall ", rouge2 / pos)
    print("ROUGEL Recall ", rougeL / pos)
示例#14
0
def GenerateLocalStructureParams(neighbor_list, potential_config, log=None):

    if log is not None:
        log.log("Generating Local Structure Parameters")
        log.indent()

    # Here we compute the number of operations that will need
    # to take place in order to calculate the structural
    # parameters. This is somewhat of an estimate, but the
    # operation should scale roughly by a factor of n^2.
    # In practice, this has generally been an excellent estimate.
    n_total = 0
    for struct in neighbor_list:
        for atom in struct:
            n_total += (len(atom)**2 - len(atom)) / 2
    n_processed = 0

    progress = ProgressBar("Structural Parameters ",
                           22,
                           n_total,
                           update_every=8)

    structural_parameters = []

    parameters_per_atom = potential_config.n_legendre_polynomials
    parameters_per_atom *= potential_config.n_r0
    # Here we iterate over every structure. And then
    # over every atom. We export the structural parameter
    # calculation for each individual atom to another function.
    for struct in neighbor_list:
        processed = 0
        # Iterate over all structures.
        parameters_for_structure = np.zeros((len(struct), parameters_per_atom))
        for idx, atom_neighbors in enumerate(struct):
            processed += (len(atom_neighbors)**2 - len(atom_neighbors)) / 2
            # Iterate over each atom in the structure and compute the
            # parameters for it.
            parameters_for_structure[idx, :] = computeParameters(
                atom_neighbors, potential_config)

        n_processed += processed
        progress.update(n_processed)

        structural_parameters.append(parameters_for_structure)

    progress.finish()

    if log is not None:
        log.log("Time Elapsed = %ss" % progress.ttc)
        log.unindent()

    return structural_parameters
示例#15
0
 def set_all_shape_contexts(slidelist):
     # Note Use the shape contexts approach from here: http://www.cs.berkeley.edu/~malik/papers/mori-belongie-malik-pami05.pdf
     # Note The paper uses 'Representative Shape Contexts' to do inital matching; I will do away with this in favor of checking bounds for possible overlaps
     t0 = time.time()
     pb = ProgressBar(max_val=sum(
         len(Blob2d.get(b2d).edge_pixels) for slide in slidelist
         for b2d in slide.blob2dlist))
     for slide in slidelist:
         for blob in slide.blob2dlist:
             Blob2d.get(blob).set_shape_contexts(36)
             pb.update(len(Blob2d.get(blob).edge_pixels), set_val=False)
     pb.finish()
     print_elapsed_time(t0, time.time(), prefix='took')
示例#16
0
    def loadFromText(self, text):
        lines = text.rstrip().split('\n')

        self.config = PotentialConfig().loadFromText('\n'.join(lines[:8]))

        self.potential_type = int(self._getCellsFromLine(lines[8])[0])
        self.n_structures = int(self._getCellsFromLine(lines[9])[0])
        self.n_atoms = int(self._getCellsFromLine(lines[10])[0])

        parameters_per_atom = self.config.n_legendre_polynomials
        parameters_per_atom *= self.config.n_r0

        progress = ProgressBar("Loading Training Set",
                               22,
                               self.n_structures,
                               update_every=10)

        # Every set of two lines from 13 onwards should correspond to a single
        # atom. Line 12 doesn't contain useful information.

        # This code will convert the file into a list of structures. Each
        # element in this list is a list of training inputs, each one
        # corresponding to an atom in the structure.
        self.structures = []
        idx = 12
        current_struct = []
        current_id = 0
        while idx < len(lines):

            atom = TrainingInput().fromLines(lines[idx], lines[idx + 1],
                                             parameters_per_atom)

            if atom.structure_id != current_id:
                self.structures.append(current_struct)
                current_struct = []
                current_id = atom.structure_id
                progress.update(current_id + 1)

            current_struct.append(atom)
            idx += 2

        progress.finish()
        self.structures.append(current_struct)

        if self.log is not None:
            self.log.log("Atoms      Loaded = %i" % self.n_atoms)
            self.log.log("Structures Loaded = %i" % self.n_structures)
            self.log.log("Time Elapsed = %ss" % progress.ttc)
            self.log.unindent()

        return self
示例#17
0
def test(ctx):
    '''test code
    '''
    # pbar = click.progressbar(length=10, label='update nav')
    # iterable=iterable, length=length, show_eta=show_eta,
    #                    show_percent=show_percent, show_pos=show_pos,
    #                    item_show_func=item_show_func, fill_char=fill_char,
    #                    empty_char=empty_char, bar_template=bar_template,
    #                    info_sep=info_sep, file=file, label=label,
    #                    width=width, color=color)

    with ProgressBar.ProgressBar(
            list(range(0, 10)),
            label='update nav',
            item_show_func=lambda x: "subtask %d" % x if x is not None else '',
            fill_char='#',
            empty_char='-',
            bar_template='%(label)-25s  [%(bar)s]  %(info)s',
            width=0) as bar:
        for i in bar:
            time.sleep(0.3)
示例#18
0
    def test(self, data, label='Test'):
        N = int(math.ceil(len(data) / self.batch_size))
        cost = 0

        x = np.ndarray([self.batch_size, self.edim], dtype=np.float32)
        time = np.ndarray([self.batch_size, self.mem_size], dtype=np.int32)
        target = np.zeros([self.batch_size, self.nwords])
        context = np.ndarray([self.batch_size, self.mem_size])

        x.fill(self.init_hid)
        for t in xrange(self.mem_size):
            time[:, t].fill(t)

        if self.show:
            from util import ProgressBar
            bar = ProgressBar(label, max=N)

        m = self.mem_size
        for idx in xrange(N):
            if self.show: bar.next()
            target.fill(0)
            for b in xrange(self.batch_size):
                target[b][data[m]] = 1
                context[b] = data[m - self.mem_size:m]
                m += 1

                if m >= len(data):
                    m = self.mem_size

            loss = self.sess.run([self.loss], feed_dict={self.input: x,
                                                         self.time: time,
                                                         self.target: target,
                                                         self.context: context})
            cost += np.sum(loss)

        if self.show: bar.finish()
        return cost / N / self.batch_size
示例#19
0
    def train(self, data):
        N = int(math.ceil(len(data) / self.batch_size))
        cost = 0

        x = np.ndarray([self.batch_size, self.edim], dtype=np.float32)
        time = np.ndarray([self.batch_size, self.mem_size], dtype=np.int32)
        target = np.zeros([self.batch_size, self.nwords])
        context = np.ndarray([self.batch_size, self.mem_size])

        x.fill(self.init_hid)
        for t in xrange(self.mem_size):
            time[:, t].fill(t)

        if self.show:
            from util import ProgressBar
            bar = ProgressBar('Train', max=N)

        for idx in xrange(N):
            if self.show: bar.next()
            target.fill(0)
            for b in xrange(self.batch_size):
                m = random.randrange(self.mem_size, len(data))
                target[b][data[m]] = 1
                context[b] = data[m - self.mem_size:m]

            _, loss, self.step = self.sess.run([self.optim,
                                                self.loss,
                                                self.global_step],
                                                feed_dict={
                                                    self.input: x,
                                                    self.time: time,
                                                    self.target: target,
                                                    self.context: context})
            cost += np.sum(loss)

        if self.show: bar.finish()
        return cost / N / self.batch_size
示例#20
0
	def generateLSP(self, neighbors, max_chunk=500):
		chunk_start  = 0
		chunk_stride = chunk_start + max_chunk

		lsp = None

		progress = ProgressBar(
			"Structural Parameters ", 
			22, int(len(neighbors) / max_chunk),
			update_every = 5
		)

		idx = 0

		while chunk_start < len(neighbors):
			self.loadNeighbors(neighbors[chunk_start:chunk_stride])

			tmp = self._computeLSP()
			self.cleanupNeighbors()

			if lsp is None:
				lsp = tmp
			else:
				lsp = torch.cat((lsp, tmp), 0)

			chunk_start  += max_chunk
			chunk_stride += max_chunk

			chunk_stride = min(chunk_stride, len(neighbors))

			idx += 1
			progress.update(idx)

		progress.finish()

		return lsp
示例#21
0
    def _train_loop(self):
        progress = ProgressBar("Training ",
                               22,
                               self.iterations + int(self.iterations == 0),
                               update_every=1)

        while self.iteration <= self.iterations:
            progress.update(self.iteration)

            self.training_losses[self.iteration] = self.last_loss

            if self.restart_error != 0.0:
                if self.last_loss > self.restart_error:
                    if self.restarts == 3:
                        if self.log is not None:
                            msg = "Maximum number of restarts exceeded."
                            self.log.log(msg)
                        break
                    else:
                        if self.log is not None:
                            msg = "Error threshold exceeded, restarting."
                            self.log.log(msg)
                        self.need_to_restart = True
                        self.restarts += 1
                        break

            # The following lines figure out if we have reached an iteration
            # where validation information or volume vs. energy information
            # needs to be stored.
            if self.val_interval != 0:
                if self.iteration % self.val_interval == 0:
                    idx = (self.iteration // self.val_interval)
                    self.validation_losses[idx] = self.validation_loss()

            if self.energy_interval != 0:
                if self.iteration % self.energy_interval == 0:
                    idx = (self.iteration // self.energy_interval)
                    self.energies[idx, :] = self.get_structure_energies()

            if self.backup_interval != 0:
                if self.iteration % self.backup_interval == 0:
                    idx = self.iteration
                    path = self.backup_dir + 'nn_bk_%05i.nn.dat' % idx
                    layers = self.nn.getNetworkValues()
                    self.potential.layers = layers
                    self.potential.writeNetwork(path)

            if self.smi_log != '':
                if self.iteration % 50 == 0:
                    try:
                        smi_stdout = subprocess.getoutput("nvidia-smi")
                        self.smi_outputs.append(smi_stdout)
                    except:
                        self.smi_outputs.append("nvidia-smi call failed")

            # Perform an evaluate and correct step, while storing
            # the resulting loss in self.training_losses.
            self.optimizer.step(self.training_closure)

            self.iteration += 1

        progress.finish()
示例#22
0
			layer_time_dict[func_name]['number'] += 1

	print '================================================'
	for name, record in layer_time_dict.items():
		print '[{}]:'.format(name)
		print 'total time: {} ms'.format(record['time'] * 1000)
		print 'average time: {} ms\n'.format(float(record['time']) * 1000/ record['number'])
	print '================================================'

timer_hook = TimerHook()
layer_timer_hook = TimerHook()
layer_timer_hook.name = 'Layer-by-layer timer hook'

# configuration for inference
chainer.config.train = False
progress_bar = ProgressBar(estimate_load_time)
progress_bar.start()

print 'loading caffe model...'

start_time = time.time()
func = CaffeFunction(model_path)
end_time = time.time()

progress_bar.end()
time.sleep(1)
print '\nsuccessfully load caffe model, it costs %s seconds' % (end_time - start_time)

max_iter = 1000 if 50000 / N >= 1000 else 50000 / N
total_time = 0
average_time = 0
示例#23
0
    def GenerateNeighborList(self, structures):
        if self.log is not None:
            self.log.log("Generating Neighbor List")
            self.log.indent()

        # For each atom within each structure, we need to generate a list
        # of atoms within the cutoff distance. Periodic images need to be
        # accounted for during this process. Neighbors in this list are
        # specified as coordinates, rather than indices.

        # The final return value of this function in a 3 dimensional list,
        # with the following access structure:
        #     neighbor = list[structure][atom][neighbor_index]

        # First we will compute the total number of atoms that need to be
        # processed in order to get an estimate of the time this will take
        # to complete.
        n_total = sum([struct.n_atoms**2 for struct in structures])

        progress = ProgressBar("Neighbor List ", 22, n_total, update_every=25)
        progress.estimate = False

        # IMPORTANT NOTE: This needs to be multiplied by 1.5 when PINN
        #                 gets implemented.
        cutoff = self.config.cutoff_distance * 1.0

        n_processed = 0

        structure_start = 0
        structure_stride = 0
        for structure in structures:

            # Normalize the translation vectors.
            a1_n = np.linalg.norm(structure.a1)
            a2_n = np.linalg.norm(structure.a2)
            a3_n = np.linalg.norm(structure.a3)

            # Numpy will automatically convert these to arrays when they are
            # passed to numpy functions, but it will do that each time we call
            # a function. Converting them beforehand will save some time.
            a1 = structure.a1
            a2 = structure.a2
            a3 = structure.a3

            # Determine the number of times to repeat the
            # crystal structure in each direction.

            x_repeat = int(np.ceil(cutoff / a1_n))
            y_repeat = int(np.ceil(cutoff / a2_n))
            z_repeat = int(np.ceil(cutoff / a3_n))

            # Now we construct an array of atoms that contains all
            # of the repeated atoms that are necessary. We need to
            # repeat the crystal structure from -repeat*A_n to
            # positive repeat*A_n.

            # This is the full periodic structure that we generate.
            # It is a list of vectors, each vector being a length 3
            # list of floating points.
            n_periodic_atoms = (2 * x_repeat + 1)
            n_periodic_atoms *= (2 * y_repeat + 1)
            n_periodic_atoms *= (2 * z_repeat + 1)
            n_periodic_atoms *= structure.n_atoms
            periodic_structure = np.zeros((n_periodic_atoms, 3))
            atom_idx = 0
            for i in range(-x_repeat, x_repeat + 1):
                for j in range(-y_repeat, y_repeat + 1):
                    for k in range(-z_repeat, z_repeat + 1):
                        # This is the new location to use as the center
                        # of the crystal lattice.
                        center_location = a1 * i + a2 * j + a3 * k

                        # Now we add each atom + new center location
                        # into the periodic structure.
                        for atom in structure.atoms:
                            val = atom + center_location
                            periodic_structure[atom_idx] = val
                            atom_idx += 1

            # Here we actually iterate over every atom and then for each atom
            # determine which atoms are within the cutoff distance.
            for atom in structure.atoms:
                # This statement will subtract the current atom position from
                # the position of each potential neighbor, element wise. It
                # will then calculate the magnitude of each of these vectors
                # element  wise.
                distances = np.linalg.norm(periodic_structure - atom, axis=1)

                # This is special numpy syntax for selecting all items in an
                # array  that meet a condition. The boolean operators in the
                # square  brackets actually convert the 'distances' array into
                # two arrays  of boolean values and then computes their
                # boolean 'and' operation element wise. It then selects all
                # items in the array  'periodic_structure' that correspond to
                # a value of true in the  array of boolean values.
                mask = (distances > 1e-8) & (distances < cutoff)
                neighbors = periodic_structure[mask]

                # This line just takes all of the neighbor vectors that we now
                # have (as absolute vectors) and changes them into vectors
                # relative to the atom that we are currently finding neighbors
                # for.
                neighbor_vecs = neighbors - atom

                self.atom_neighbors.append(neighbor_vecs)

                structure_stride += 1

            self.structure_strides.append((structure_start, structure_stride))
            structure_start = structure_stride

            # Update the performance information so we can report
            # progress to the user.
            n_processed += structure.n_atoms**2
            progress.update(n_processed)

        progress.update(n_total)
        progress.finish()

        if self.log is not None:
            self.log.log("Time Elapsed = %ss" % progress.ttc)
            self.log.unindent()
示例#24
0
  def predict(self, test_annos):
    n_imgs = test_annos.shape[0]
    use_gt = self.use_gt
    class_inds = self.class_inds
    attrib_names = self.attrib_names
    
    print "Load image Bow histograms from disk"
    features = Bow.load_bow(test_annos, self.config)
    
    
    if not use_gt:
      clf_res, clf_res_discrete = self.create_attrib_res_on_images(test_annos,
                                                                   features)
        
    # using ground truth    
    if use_gt:
      attrib_meta = self.attrib_selector.create_attrib_meta(attrib_names)
        
        
    # apply multi class classifier on test annos
    
    # (if boolean)
#     m = self.multi_class_clf.predict(features=features)
#     m_clf_values = pd.DataFrame(data=np.zeros([m.shape[0], len(class_inds)]), 
#                            index=test_annos.index, 
#                            columns=class_inds, dtype=bool)
#     for ii in range(m.shape[0]):
#           m_clf_values.iloc[ii][m[ii]] = True

    # (if using score values)
    m = self.multi_class_clf.decision_function(features=features)
    mm = np.zeros_like(m, dtype=str)

    mm[m <= -1] = 'nn'
    mm[np.logical_and(m > -1, m  <= -0.2)] = 'n'
    mm[np.logical_and(m > -0.2, m  <= 0.2)] = 'u'
    mm[np.logical_and(m > 0.2, m  <= 1)] = 'p'
    mm[m > 1] = 'pp'
    m_clf_values = pd.DataFrame(data=mm, index=test_annos.index, 
                                columns=class_inds, dtype=str)
    
    c = np.array(clf_res[attrib_names])
    tmp = clf_res['class_index']
    cc = np.zeros_like(c, dtype=str)
    cc[c <= -1] = 'nn'
    cc[np.logical_and(c > -1, c  <= -0.2)] = 'n'
    cc[np.logical_and(c > -0.2, c  <= 0.2)] = 'u'
    cc[np.logical_and(c > 0.2, c  <= 1)] = 'p'
    cc[c > 1] = 'pp'
    clf_res = pd.DataFrame(data=cc, index=test_annos.index, 
                                columns=attrib_names, dtype=str)
    clf_res['class_index'] = tmp
    
    

    class_prob = pd.DataFrame(np.zeros([n_imgs, 
                                        len(class_inds)]),
                              index=test_annos.index, 
                              columns=class_inds)
    
    attrib_prob = pd.DataFrame(np.zeros([n_imgs, 
                                         len(attrib_names)]),
                              index=test_annos.index, 
                              columns=attrib_names)
    
    pbar = ProgressBar(n_imgs)
    print 'Prdicting class probabilities on images'
    for ii in range(n_imgs):
#       print "=================={}/{}========================".format(ii+1, n_imgs)
#       print "Image: {}, class_id: {}, class_name: {}".format(test_annos.iloc[ii]['basename'],
#                                                             test_annos.iloc[ii]['class_index'], 
#                                                             test_annos.iloc[ii]['class_name'])
      if use_gt:
        attrib_res = attrib_meta.loc[test_annos.iloc[ii]['class_index']]
      else:
#         attrib_res = clf_res_discrete.iloc[ii]
        attrib_res = clf_res.iloc[ii]
      
      m_clf_values_one = m_clf_values.iloc[ii]
      (class_prob_ii, attrib_prob_ii) = self.predict_one(attrib_res, m_clf_values_one)
      class_prob.iloc[ii] = class_prob_ii
      attrib_prob.iloc[ii] = attrib_prob_ii
      pbar.animate(ii)
      
    print ' '
    return (class_prob, attrib_prob)
示例#25
0
def bloom_b3ds(blob3dlist, stitch=False):
    allb2ds = [Blob2d.get(b2d) for b3d in blob3dlist for b2d in b3d.blob2ds]
    printl('\nProcessing internals of ' + str(len(allb2ds)) +
           ' 2d blobs via \'blooming\' ',
           end='')
    t_start_bloom = time.time()
    num_unbloomed = len(allb2ds)
    pb = ProgressBar(max_val=sum(len(b2d.pixels) for b2d in allb2ds),
                     increments=50)
    for bnum, blob2d in enumerate(allb2ds):
        blob2d.gen_internal_blob2ds(
        )  # NOTE will have len 0 if no blooming can be done
        pb.update(len(blob2d.pixels), set_val=False
                  )  # set is false so that we add to an internal counter
    pb.finish()

    print_elapsed_time(t_start_bloom, time.time(), prefix='took')
    printl('Before blooming there were: ' + str(num_unbloomed) +
           ' b2ds contained within b3ds, there are now ' +
           str(len(Blob2d.all)))

    # Setting possible_partners
    printl(
        'Pairing all new blob2ds with their potential partners in adjacent slides'
    )
    max_avail_depth = max(b2d.recursive_depth for b2d in Blob2d.all.values())
    for cur_depth in range(max_avail_depth)[1:]:  # Skip those at depth 0
        depth = [
            b2d.id for b2d in Blob2d.all.values()
            if b2d.recursive_depth == cur_depth
        ]
        max_h_d = max(Blob2d.all[b2d].height for b2d in depth)
        min_h_d = min(Blob2d.all[b2d].height for b2d in depth)
        ids_by_height = [[] for _ in range(max_h_d - min_h_d + 1)]
        for b2d in depth:
            ids_by_height[Blob2d.get(b2d).height - min_h_d].append(b2d)
        for height_val, h in enumerate(
                ids_by_height[:-1]):  # All but the last one
            for b2d in h:
                b2d = Blob2d.all[b2d]
                b2d.set_possible_partners(ids_by_height[height_val + 1])

    # Creating b3ds
    printl('Creating 3d blobs from the generated 2d blobs')
    all_new_b3ds = []
    for depth_offset in range(
            max_avail_depth + 1
    )[1:]:  # Skip offset of zero, which refers to the b3ds which have already been stitched
        printd('Depth_offset: ' + str(depth_offset), Config.debug_blooming)
        new_b3ds = []
        for b3d in blob3dlist:
            all_d1_with_pp_in_this_b3d = []
            for b2d in b3d.blob2ds:
                # Note this is the alternative to storing b3dID with b2ds
                b2d = Blob2d.get(b2d)
                d_1 = [
                    blob for blob in b2d.getdescendants()
                    if blob.recursive_depth == b2d.recursive_depth +
                    depth_offset
                ]
                if len(d_1):
                    for desc in d_1:
                        if len(desc.possible_partners):
                            all_d1_with_pp_in_this_b3d.append(desc.id)
            all_d1_with_pp_in_this_b3d = set(all_d1_with_pp_in_this_b3d)
            if len(all_d1_with_pp_in_this_b3d) != 0:
                printd(' Working on b3d: ' + str(b3d), Config.debug_blooming)
                printd(
                    '  Len of all_d1_with_pp: ' +
                    str(len(all_d1_with_pp_in_this_b3d)),
                    Config.debug_blooming)
                printd('  They are: ' + str(all_d1_with_pp_in_this_b3d),
                       Config.debug_blooming)
                printd(
                    '   = ' + str(
                        list(
                            Blob2d.get(b2d)
                            for b2d in all_d1_with_pp_in_this_b3d)),
                    Config.debug_blooming)
            for b2d in all_d1_with_pp_in_this_b3d:
                b2d = Blob2d.get(b2d)
                printd(
                    '    Working on b2d: ' + str(b2d) + ' with pp: ' +
                    str(b2d.possible_partners), Config.debug_blooming)
                if b2d.b3did == -1:  # unset
                    cur_matches = [
                        b2d
                    ]  # NOTE THIS WAS CHANGED BY REMOVED .getdescendants() #HACK
                    for pp in b2d.possible_partners:
                        printd(
                            "     *Checking if pp:" + str(pp) +
                            ' is in all_d1: ' +
                            str(all_d1_with_pp_in_this_b3d),
                            Config.debug_blooming)
                        if pp in all_d1_with_pp_in_this_b3d:  # HACK REMOVED
                            printd("     Added partner: " + str(pp),
                                   Config.debug_blooming)
                            cur_matches += [
                                Blob2d.get(b)
                                for b in Blob2d.get(pp).getpartnerschain()
                            ]
                    if len(cur_matches) > 1:
                        printd("**LEN OF CUR_MATCHES MORE THAN 1",
                               Config.debug_blooming)
                        new_b3d_list = [
                            blob.id for blob in set(cur_matches)
                            if blob.recursive_depth == b2d.recursive_depth
                            and blob.b3did == -1
                        ]
                        if len(new_b3d_list):
                            new_b3ds.append(
                                Blob3d(new_b3d_list,
                                       r_depth=b2d.recursive_depth))
        all_new_b3ds += new_b3ds
    printl(' Made a total of ' + str(len(all_new_b3ds)) + ' new b3ds')

    if stitch:
        # Set up shape contexts
        printl('Setting shape contexts for stitching')
        for b2d in [
                Blob2d.all[b2d] for b3d in all_new_b3ds for b2d in b3d.blob2ds
        ]:
            b2d.set_shape_contexts(36)

        # Stitching
        printl('Stitching the newly generated 2d blobs')
        for b3d_num, b3d in enumerate(all_new_b3ds):
            printl(' Working on b3d: ' + str(b3d_num) + ' / ' +
                   str(len(all_new_b3ds)))
            Pairing.stitch_blob2ds(b3d.blob2ds, debug=False)
    return all_new_b3ds
示例#26
0
        args.z = z_center
        if args.span_dir != '':
            args.z = z_center
        elif args.sweep_dir != '':
            delta = args.z_max - args.z_min
            args.z_min = z_center - (delta / 2)
            args.z_max = args.z_min + delta

    if args.sweep_dir != '':
        if not os.path.isdir(args.sweep_dir):
            os.mkdir(args.sweep_dir)

        if args.sweep_dir[-1] != '/':
            args.sweep_dir += '/'

        progress = ProgressBar("Rendering ", 22, args.sweep_n, update_every=1)

        sweep = np.linspace(args.z_min, args.z_max, args.sweep_n)
        for idx, z in enumerate(sweep):
            fname = args.sweep_dir + '%05i.png' % idx
            render_heatmap(structure,
                           potential,
                           nn,
                           res,
                           width,
                           z,
                           args,
                           save=fname)

            progress.update(idx + 1)
示例#27
0
文件: motion.py 项目: agiovann/pyfluo
def compute_motion_correction(mov, max_shift=5, sub_pixel=True, template_func=np.median, n_iters=5):
    """Computes motion correction shifts by template matching
    
    Parameters
    ----------
    (described in correct_motion doc)
    
    This can be used on its own to attain only the shifts without correcting the movie
    """
    def _run_iter(mov, base_shape, ms, sub_pixel):
        mov = mov.astype(np.float32)
        h_i,w_i = base_shape
        template=template_func(mov,axis=0)
        template=template[ms:h_i-ms,ms:w_i-ms].astype(np.float32)
        h,w = template.shape

        shifts=[]   # store the amount of shift in each frame
        
        for i,frame in enumerate(mov):
             pbar.update(it_i*len(mov) + i)
             res = cv2.matchTemplate(frame,template,cv2.TM_CCORR_NORMED)
             avg_corr=np.mean(res);
             top_left = cv2.minMaxLoc(res)[3]
             sh_y,sh_x = top_left
             bottom_right = (top_left[0] + w, top_left[1] + h)

             if sub_pixel:
                 if (0 < top_left[1] < 2 * ms-1) & (0 < top_left[0] < 2 * ms-1):
                     # if max is internal, check for subpixel shift using gaussian
                     # peak registration
                     log_xm1_y = np.log(res[sh_x-1,sh_y])
                     log_xp1_y = np.log(res[sh_x+1,sh_y])             
                     log_x_ym1 = np.log(res[sh_x,sh_y-1])             
                     log_x_yp1 = np.log(res[sh_x,sh_y+1])             
                     four_log_xy = 4*np.log(res[sh_x,sh_y])

                     sh_x_n = -(sh_x - ms + (log_xm1_y - log_xp1_y) / (2 * log_xm1_y - four_log_xy + 2 * log_xp1_y))
                     sh_y_n = -(sh_y - ms + (log_x_ym1 - log_x_yp1) / (2 * log_x_ym1 - four_log_xy + 2 * log_x_yp1))
                 else:
                     sh_x_n = -(sh_x - ms)
                     sh_y_n = -(sh_y - ms)
                         
                 M = np.float32([[1,0,sh_y_n],[0,1,sh_x_n]])
                 mov[i] = cv2.warpAffine(frame,M,(w_i,h_i),flags=cv2.INTER_CUBIC)
             else:
                 sh_x = -(top_left[1] - ms)
                 sh_y = -(top_left[0] - ms)
                 M = np.float32([[1,0,sh_y],[0,1,sh_x]])
                 mov[i] = cv2.warpAffine(frame,M,(w_i,h_i))
             shifts.append([sh_x_n,sh_y_n,avg_corr]) 
                 
        return (template,np.array(shifts),mov)

    mov_orig = mov.copy()
    h_i,w_i = mov.shape[1:]
    templates = []
    values = []
    n_steps = n_iters*len(mov_orig) #for progress bar
    pbar = ProgressBar(maxval=n_steps).start() 
    for it_i in xrange(n_iters):
        pbar.update(it_i*len(mov_orig))
        ti,vi,mov = _run_iter(mov, (h_i,w_i), max_shift, sub_pixel)
        templates.append(ti)
        values.append(vi)
    pbar.finish()
    return np.array(templates), np.array(values)
示例#28
0
文件: motion.py 项目: agiovann/pyfluo
def compute_motion_AG(mov, max_shift_hw=(5,5), show_movie=False,template=np.median,interpolation=cv2.INTER_LINEAR,in_place=False):
        """                
        Performs motion corretion using the opencv matchtemplate function. At every iteration a template is built by taking the median of all frames and then used to align the other frames.
         
        Parameters
        ----------
        max_shift: maximum pixel shifts allowed when correcting
        show_movie : display the movie wile correcting it
        in_place: if True the input vector is overwritten
        
        Returns
        -------
        movCorr: motion corected movie              
        shifts : tuple, contains shifts in x and y and correlation with template
        template: the templates created at each iteration
        """
        
        if not in_place:
            mov=mov.copy()
           
        mov=mov.astype(np.float32)    
        n_frames_,h_i, w_i = mov.shape
        
        ms_h,ms_w=max_shift_hw
        
        if callable(template):
            template=template(mov,axis=0)            
        elif not type(template) == np.ndarray:
            raise Exception('Only matrices or function accepted')
        
            
        template=template[ms_h:h_i-ms_h,ms_w:w_i-ms_w].astype(np.float32)    
        h,w = template.shape      # template width and height
        
        #if show_movie:
        #    cv2.imshow('template',template/255)
        #    cv2.waitKey(2000) 
        #    cv2.destroyAllWindows()
        
        #% run algorithm, press q to stop it 
        shifts=[];   # store the amount of shift in each frame
        pbar = ProgressBar(maxval=n_frames_).start()
        for i,frame in enumerate(mov):
             pbar.update(i)             
             res = cv2.matchTemplate(frame,template,cv2.TM_CCORR_NORMED)
             avg_corr=np.mean(res);
             top_left = cv2.minMaxLoc(res)[3]
             sh_y,sh_x = top_left
             bottom_right = (top_left[0] + w, top_left[1] + h)
        
             if (0 < top_left[1] < 2 * ms_h-1) & (0 < top_left[0] < 2 * ms_w-1):
                 # if max is internal, check for subpixel shift using gaussian
                 # peak registration
                 log_xm1_y = np.log(res[sh_x-1,sh_y]);             
                 log_xp1_y = np.log(res[sh_x+1,sh_y]);             
                 log_x_ym1 = np.log(res[sh_x,sh_y-1]);             
                 log_x_yp1 = np.log(res[sh_x,sh_y+1]);             
                 four_log_xy = 4*np.log(res[sh_x,sh_y]);
    
                 sh_x_n = -(sh_x - ms_h + (log_xm1_y - log_xp1_y) / (2 * log_xm1_y - four_log_xy + 2 * log_xp1_y))
                 sh_y_n = -(sh_y - ms_w + (log_x_ym1 - log_x_yp1) / (2 * log_x_ym1 - four_log_xy + 2 * log_x_yp1))
             else:
                 sh_x_n = -(sh_x - ms_h)
                 sh_y_n = -(sh_y - ms_w)
                     
             M = np.float32([[1,0,sh_y_n],[0,1,sh_x_n]])
             mov[i] = cv2.warpAffine(frame,M,(w_i,h_i),flags=interpolation)

             shifts.append([sh_x_n,sh_y_n,avg_corr]) 
                 
             if show_movie:        
                 fr = cv2.resize(mov[i],None,fx=2, fy=2, interpolation = cv2.INTER_CUBIC)
                 cv2.imshow('frame',fr/255.0)
                 if cv2.waitKey(1) & 0xFF == ord('q'):
                     cv2.destroyAllWindows()
                     break 
        pbar.finish()         
        cv2.destroyAllWindows()
        return (mov,template,shifts)
示例#29
0
def Train(USE_CUDA=True, num_epochs=5, batch_size=1):
    loader = Loader("bert")
    train_data, train_label, train_candi, origin_labels, origin_candi = loader.read_data(
        base_path + document_path,
        base_path + label_path,
        "candi.txt",
        pairs_num=300,
        max_len=256,
        init_flag=False)
    print("\n")
    print(train_data.size())
    print(train_label.size())
    print(train_candi.size())

    Model = MatchSum(candidate_num=10)
    eval(Model, train_data, train_label, train_candi, origin_labels,
         origin_candi, batch_size)

    Model.train()

    if USE_CUDA:
        print("using GPU")
        Model = Model.cuda()
        train_data = train_data.cuda()
        train_label = train_label.cuda()
        train_candi = train_candi.cuda()
    dataset = torch.utils.data.TensorDataset(train_data, train_candi,
                                             train_label)
    train_iter = torch.utils.data.DataLoader(dataset,
                                             batch_size,
                                             shuffle=False)
    optimizer = Adam(filter(lambda p: p.requires_grad, Model.parameters()),
                     lr=2e-5)
    loss_func = Loss_func(0.01)
    pbar = ProgressBar(n_total=len(train_iter), desc='Training')
    for epoch in range(num_epochs):
        index = 0
        total_loss = 0
        print("----Start", epoch, "----")
        test = random.randint(0, 250)
        for x, y, z in train_iter:
            #print(x)
            #print("\n")
            #print(y)
            #print("\n")
            #print(z)
            optimizer.zero_grad()
            output = Model(x, y, z)
            if index == test:
                print('\n', test, output)
            #print(output)
            loss = loss_func.get_loss(output['score'], output['summary_score'])
            loss.backward()
            optimizer.step()
            total_loss += loss.mean().data
            #pbar(index, {'Loss': total_loss/index})
            index += 1
        print("Epoch: ", epoch, " Loss: ", total_loss / index)
        eval(Model, train_data, train_label, train_candi, origin_labels,
             origin_candi, batch_size)
    eval(Model, train_data, train_label, train_candi, origin_labels,
         origin_candi, batch_size)
示例#30
0
        port = 8091
    else:
        hostname = node[:node.find(":")]
        port = node[node.find(":") + 1:]
    server = {"ip": hostname,
              "port": port,
              "rest_username": options.username,
              "rest_password": options.password,
              "username": options.username,
              "password": options.password}
    print server
    v = None
    try:
        v = VBucketAwareMembaseClient(server, options.bucket)
        number_of_items = int(options.items)
        bar = ProgressBar(0, number_of_items, 77)
        old_bar_string = ""
        value = StringUtil.create_value("*", options.value_size)
        for i in range(0, number_of_items):
            key = "{0}-{1}".format(options.key_prefix, str(uuid.uuid4())[:5])
            if options.load_json:
                document = "\"name\":\"pymc-{0}\"".format(key, key)
                document = document + ",\"age\":{0}".format(random.randint(0, 1000))
                document = "{" + document + "}"
                a, b, c = v.set(key, 0, 0, document)
            else:
                a, b, c = v.set(key, 0, 0, value)
            a, b, c = v.get(key)

            bar.updateAmount(i)
            if old_bar_string != str(bar):
示例#31
0
 def __init__(self, number_of_items):
     self.bar = ProgressBar(0, number_of_items, 77)
     self.number_of_items = number_of_items
     self.counter = 0
     self.old_bar_string = ""
示例#32
0
    def read_data(self,
                  path1,
                  path2,
                  path3,
                  pairs_num,
                  max_len=128,
                  init_flag=True):
        print("----start Read train data----")
        fo = open(path1, "r", encoding='gb18030', errors='ignore')
        fl = open(path2, "r", encoding='gb18030', errors='ignore')

        candi_list = []
        pbar = ProgressBar(n_total=pairs_num, desc='Loading')
        if init_flag:
            self.gen_data(path1, path3, pairs_num)
        self.check_data(path3, pairs_num)
        fc = open(path3, "r", encoding='gb18030', errors='ignore')
        origin_labels = []
        origin_candi = []
        for i in range(pairs_num):
            pbar(i, {'current': i})
            line1 = fo.readline()
            line2 = fl.readline()
            if line1 == None or line2 == None:
                continue
            #line1="A ##SENT## B ##SENT## C ##SENT## D ##SENT## E ##SENT## F"
            do = self.get_document(line1)
            la = self.get_labels(line2)

            document = " ".join(do)
            la = " ".join(la)
            origin_labels.append(la)
            candidata_data = []
            temp_candi = []
            for j in range(10):
                temp = fc.readline()
                temp = temp.replace("\n", "")
                temp_candi.append(temp)
                if len(temp) == 0:
                    print("Hit bad Trap at", i * 10 + j)
                candidata_data.append(
                    tokenizer.encode(temp, add_special_tokens=False))
            #print(len(candidata_data))
            #print(candidata_data[0])
            origin_candi.append(temp_candi)
            self.train_data['text'].append(
                tokenizer.encode(document, add_special_tokens=False))
            self.train_data['label'].append(
                tokenizer.encode(la, add_special_tokens=False))
            self.train_data['candi'].append(candidata_data)

        data_list = self.pad_and_add_token(self.train_data['text'], max_len)
        label_list = self.pad_and_add_token(self.train_data['label'], max_len)

        pos = 0
        for i in self.train_data['candi']:
            pos += 1
            temp = self.pad_and_add_token(i, max_len)
            candi_list.append(temp)

        train_data = torch.tensor(data_list)
        train_label = torch.tensor(label_list)
        train_candi = torch.tensor(candi_list)
        return train_data, train_label, train_candi, origin_labels, origin_candi