Пример #1
0
def raw_analysis(file_bin, analysed_parts, sign_min_size, test_dir, subdiv, manual, sleep, replacing_value=0):
    # Each part to be analysed will be divided 'subdiv' times if each subdivision is higher than sign_min_size
    range_list, minimal_range_set = generate_ranges(analysed_parts, subdiv, sign_min_size)

    if minimal_range_set:
        # each range is equal or smaller than the minimal signature size option, abort
        return range_list

    new_range_list = []

    range_file_dict = {}

    print("[i] Creating %d test files..." % len(range_list), end="")

    for i, r in enumerate(range_list):
        filepath = os.path.join(test_dir, "test-%s.bin" % i)
        range_file_dict[filepath] = r

        f = open(filepath, "wb")
        f.write(file_bin[0:r[0]])
        f.write(bytes([replacing_value])*(r[1]+1-r[0]))
        f.write(file_bin[r[1]+1:])
        f.close()

    print("Done")

    if not manual:
        time.sleep(sleep)
    else:
        _ = input("Press any key to continue...")

    found_sign = False
    for filepath, r in range_file_dict.items():
        if os.path.exists(filepath):
            print("[i] Located signature between bytes %d and %d" % (r[0], r[1]))
            new_range_list.append(r)
            found_sign = True
        try:
            os.remove(filepath)
        except FileNotFoundError:
            pass

    if len(new_range_list) == 0:
        print("[i] Unable to get a more precise location of the signature, probable a payload containing multiple signatures")
        return new_range_list
    elif union(new_range_list) == union(analysed_parts):
        print("[i] Unable to get a more precise location of the signature")
        return new_range_list
    else:
        return raw_analysis(file_bin, new_range_list, sign_min_size, test_dir, subdiv, manual, sleep)
def crawl_web(seed): # returns index, graph of inlinks
    tocrawl = [seed]
    crawled = []
    graph = {}  # <url>, [list of pages it links to]
    index = {} 
    while tocrawl: 
        page = tocrawl.pop()
        if page not in crawled:
            content = get_page(page)
            add_page_to_index(index, page, content)
            outlinks = get_all_links(content)
            graph[page] = outlinks
            utils.union(tocrawl, outlinks)
            crawled.append(page)
    return index, graph
Пример #3
0
def holes_filling(input, _):

    struct_1 = np.array([
        [1], [1], [1]
    ])

    struct_2 = np.array([
        [1, 1, 1]
    ])

    structure = np.array([
        [0, 1, 0],
        [1, 1, 1],
        [0, 1, 0]
    ])

    mask = complementary(input)

    #iter = 0
    X0 = np.zeros(input.shape)
    while True:
        d1 = ndimage.binary_dilation(X0, struct_1, border_value=1, mask=mask)
        d2 = ndimage.binary_dilation(X0, struct_2, border_value=1, mask=mask)
        X1 = union(d1, d2)

        if np.array_equal(X0, X1): break
        X0 = X1
        #iter += 1

    return complementary(X0)
Пример #4
0
def line_categoric(datasets, x, y, title, path, yscale='linear', col_map=None):
    print("Drawing line chart %s" % title)

    if col_map is None:
        col_map = make_col_map(datasets)

    xdatas = []
    for name in datasets:
        xdata = list(map(lambda i: i[x], datasets[name]))
        xdatas.append(xdata)

    xdata = utils.union(*xdatas)
    ypos = np.arange(len(xdata))

    for i, name in enumerate(datasets):
        data = datasets[name]

        ydata = []
        for j in range(len(xdata)):
            xitem = xdata[j]
            items = list(filter(lambda i: i[x] == xitem, data))
            if len(items) > 0:
                ydata.append(items[0][y])
            else:
                ydata.append(0)

        plt.plot(xdata, ydata, '-o', label=name, color=col_map[name])

    plt.ylabel(display.axis_case(y))
    plt.yscale(yscale)
    plt.xticks(ypos, xdata, rotation=90)
    plt.title(title)
    plt.legend(loc='upper left')
    savefig(path)
Пример #5
0
def computeWordOvelap(imgc, word_gt, words, wordsOk, wordsFp):
    
    best_match = 0
    best_match2 = 0
    for det_word in words:
        try:
            cv2.rectangle(imgc, (det_word[0], det_word[1]), (det_word[2], det_word[3]), (0, 0, 255))
            for gt_box in word_gt:
                rect_int =  utils.intersect( det_word, gt_box )
                int_area = utils.area(rect_int)
                union_area = utils.area(utils.union(det_word, gt_box))
                
                ratio = int_area / float(union_area)
                ratio2 = int_area / utils.area(gt_box)
                if ratio > best_match:
                    best_match = ratio
                    w = det_word
                    best_match2 = ratio2
                    
            if best_match2 > 0.3:
                wordsOk.append(det_word)
            elif best_match == 0:
                wordsFp.append(det_word)
        except:
            pass
            
    return (best_match, best_match2)
Пример #6
0
    def join(self, p1, p2, f, i, j):
        buckets = defaultdict(set)
        for ai, aj in self.get_tuples_from_function(f, i, j):
            buckets[aj].add(ai)
        image2 = set(frozenset(union(buckets[aj] for aj in s)) for s in p2)
        image2 = make_partition(image2)

        # print map(lambda x:map(str, x), image2)

        # image2 = [[o for o in elems if o not in conflict] for elems in image2]
        result = []
        for s1 in p1:
            # print " old set:", map(str, s1)
            if True or not any(o in conflict for o in s1):
                all = set()
                for s2 in image2:
                    new = [o for o in s1 if o in s2]
                    if new:
                        # print " new set:", map(str, new)
                        result.append(new)
                        all.update(new)
                # if len(all) < len(s1):
                #     print " remaining:", map(str, [o for o in s1 if o not in all])
                #     result.append([o for o in s1 if o not in all])

        return result
Пример #7
0
def QXGen(C, Bd, B, d, l):
    global genhash
    if l < lmax:
        if f(d) > 0:
            u = utils.union(B, Bd)
            if (genhash == ""):
                hash = utils.getHash(u, len(modelCNF.clauses))
            else:
                hash = genhash
                genhash = ""

            if (not (hash in cache)
                ):  #evito crear multiples hilos si ya esta en ejecución
                future = pool.apply_async(callConsistencyCheck, args=([u]))
                cache.update({hash: future})
                #print("Genero: "+str(hash))
        if f(C) == 1 and f(Bd) > 0:
            QXGen(Bd, [], B + [C[0]], [C[0]], l + 1)

        elif f(C) > 1:
            if (len(C) > 1):
                k = int(len(C) / 2)
                Ca = C[0:k]
                Cb = C[k:len(C)]
            else:
                k = int(len(C[0]) / 2)
                Ca = [C[0][0:k]]
                Cb = [C[0][k:len(C[0])]]
            QXGen(Ca, Cb + Bd, B, Cb, l + 1)
        if f(Bd) > 0 and f(d) > 0:
            QXGen([Bd[0]], utils.Diff(Bd, [Bd[0]]), B, [], l + 1)
Пример #8
0
def computeWordOvelap(imgc, word_gt, words, wordsOk, wordsFp):

    best_match = 0
    best_match2 = 0
    for det_word in words:
        try:
            cv2.rectangle(imgc, (det_word[0], det_word[1]),
                          (det_word[2], det_word[3]), (0, 0, 255))
            for gt_box in word_gt:
                rect_int = utils.intersect(det_word, gt_box)
                int_area = utils.area(rect_int)
                union_area = utils.area(utils.union(det_word, gt_box))

                ratio = int_area / float(union_area)
                ratio2 = int_area / utils.area(gt_box)
                if ratio > best_match:
                    best_match = ratio
                    w = det_word
                    best_match2 = ratio2

            if best_match2 > 0.3:
                wordsOk.append(det_word)
            elif best_match == 0:
                wordsFp.append(det_word)
        except:
            pass

    return (best_match, best_match2)
Пример #9
0
def depth_first_pebble(P, v, S):
    if (P.is_source(v)):
        P.pebble(v)
    for u in P.get_parents(v):
        if (not pebbled(u)):
            depth_first_pebble(P, u, utils.union(S, P.get_parents(v)))
    P.pebble(v)
    P.remove_pebbles(utils.complement1(P.size(), S))
Пример #10
0
def computeSegmOverlap(gt_rects, segmentations, MIN_SEGM_OVRLAP=0.6):

    segm2chars = 0

    for k in range(len(gt_rects)):
        gt_rect = gt_rects[k]
        best_match = 0
        best_match_line = 0
        if (gt_rect[4] == ',' or gt_rect[4] == '.' or gt_rect[4] == '\''
                or gt_rect[4] == ':'
                or gt_rect[4] == '-') and not evalPunctuation:
            continue

        best_match2 = 0
        for detId in range(segmentations.shape[0]):
            rectn = segmentations[detId, :]
            rect_int = utils.intersect(rectn, gt_rect)
            int_area = utils.area(rect_int)
            union_area = utils.area(utils.union(rectn, gt_rect))

            ratio = int_area / float(union_area)

            if ratio > best_match:
                best_match = ratio

            if ratio > best_match_line and rectn[7] == 1.0:
                best_match_line = ratio

            gt_rect[5] = best_match
            if best_match < MIN_SEGM_OVRLAP:
                if k < len(gt_rects) - 1:
                    gt_rect2 = gt_rects[k + 1]
                    chars2Rect = utils.union(gt_rect2, gt_rect)
                    rect_int = utils.intersect(rectn, chars2Rect)
                    int_area = utils.area(rect_int)
                    union_area = utils.area(utils.union(rectn, chars2Rect))
                    ratio = int_area / float(union_area)
                    if ratio > best_match2:
                        if ratio > MIN_SEGM_OVRLAP:
                            segm2chars += 1
                            best_match2 = ratio
                            gt_rect[5] = ratio
                            gt_rect2[5] = ratio
Пример #11
0
 def narrow_docids(self, idx):
   m0 = [ decode_array(idx[w]) for w in self.r0 if idx.has_key(w) ]
   if self.r0 and not m0:
     return []
   m2 = [ decode_array(idx[w]) for w in self.r2 if idx.has_key(w) ]
   if self.r2 and not m2:
     return []
   if self.r1:
     try:
       refs = intersect( decode_array(idx[w]) for w in self.r1 )
     except KeyError:
       return []
     refs = union(refs, [ m for m in (m0,m2) if m ])
   elif not self.r2:
     refs = merge(m0)
   else:
     refs = union(merge(m0), [m2])
   # Now: refs = [ docid1,sentid1, docid2,sentid2, ... ]
   locs = [ (refs[i], refs[i+1]) for i in xrange(0, len(refs), 2) ]
   return locs
Пример #12
0
 def narrow_docids(self, idx):
     m0 = [decode_array(idx[w]) for w in self.r0 if idx.has_key(w)]
     if self.r0 and not m0:
         return []
     m2 = [decode_array(idx[w]) for w in self.r2 if idx.has_key(w)]
     if self.r2 and not m2:
         return []
     if self.r1:
         try:
             refs = intersect(decode_array(idx[w]) for w in self.r1)
         except KeyError:
             return []
         refs = union(refs, [m for m in (m0, m2) if m])
     elif not self.r2:
         refs = merge(m0)
     else:
         refs = union(merge(m0), [m2])
     # Now: refs = [ docid1,sentid1, docid2,sentid2, ... ]
     locs = [(refs[i], refs[i + 1]) for i in xrange(0, len(refs), 2)]
     return locs
Пример #13
0
def computeSegmOverlap(gt_rects, segmentations, MIN_SEGM_OVRLAP = 0.6):
    
    segm2chars = 0
    
    for k in range(len(gt_rects)):
        gt_rect = gt_rects[k]
        best_match = 0
        best_match_line = 0
        if (gt_rect[4] == ',' or gt_rect[4] == '.' or gt_rect[4] == '\'' or gt_rect[4] == ':' or gt_rect[4] == '-') and not evalPunctuation:
            continue    

        best_match2 = 0 
        for detId in range(segmentations.shape[0]):
            rectn = segmentations[detId, :]
            rect_int =  utils.intersect( rectn, gt_rect )
            int_area = utils.area(rect_int)
            union_area = utils.area(utils.union(rectn, gt_rect))
        
            ratio = int_area / float(union_area)
        
            if ratio > best_match:
                best_match = ratio
                
            if ratio > best_match_line and rectn[7] == 1.0 :
                best_match_line = ratio
            
            gt_rect[5] = best_match
            if best_match < MIN_SEGM_OVRLAP: 
                if k < len(gt_rects) - 1:
                    gt_rect2 = gt_rects[k + 1]
                    chars2Rect = utils.union(gt_rect2, gt_rect)
                    rect_int = utils.intersect( rectn, chars2Rect )
                    int_area = utils.area(rect_int)
                    union_area = utils.area(utils.union(rectn, chars2Rect))
                    ratio = int_area / float(union_area)
                    if ratio > best_match2:
                        if ratio > MIN_SEGM_OVRLAP:
                            segm2chars += 1
                            best_match2 = ratio
                            gt_rect[5] = ratio
                            gt_rect2[5] = ratio
Пример #14
0
    def create_optimistic_partitions(self):
        self.best_function_partitions = {}

        def get_partition(t):
            if t not in self.best_partitions:
                assert isinstance(t, pddl.types.CompositeType)
                pnew = sum((get_partition(subt) for subt in t.types), [])
                self.best_partitions[t] = pnew
            return self.best_partitions[t]

        logger.debug("Functions: %s", map(str, self.functions))
        for f in self.functions:
            logger.debug("Function: %s, args: %s, type: %s", f,
                         map(str, f.args), f.type)
            self.best_function_partitions[f] = {}
            logger.debug("best function Partiiton: %s",
                         map(str, self.best_function_partitions))
            for i, t in enumerate(chain((a.type for a in f.args), [f.type])):
                if t == pddl.t_boolean:
                    continue
                p1 = [set(self.problem.get_all_objects(t))]
                original = set(p1[0])
                logger.debug("")
                logger.debug("start %s %s %s", f, i, t)
                for j, t2 in enumerate(
                        chain((a.type for a in f.args), [f.type])):
                    if i == j or t2 == pddl.t_boolean:
                        continue
                    logger.debug("inner %s %s", j, t2)
                    p2 = get_partition(t2)
                    logger.debug("%s %s %s", t2, map(lambda x: map(str, x),
                                                     p1),
                                 map(lambda x: map(str, x), p2))

                    p1 = self.join(p1, p2, f, i, j)

                used = union(p1)
                if used < original:
                    p1.append(list(original - used))
                self.best_function_partitions[f][i] = p1
                logger.debug("%s %s", f, i)
                for p in p1:
                    logger.debug(map(str, p))
Пример #15
0
def bar_categoric(datasets, x, y, path, yscale='linear'):
    title = display.header_case(y)
    print("Drawing bar chart %s" % title)

    xdatas = []
    for name in datasets:
        xdata = list(map(lambda i: i[x], datasets[name]))
        xdatas.append(xdata)

    xdata = utils.union(*xdatas)
    ypos = np.arange(len(xdata))

    cols = len(datasets)
    width = (1 - 2 * bar_padding) / cols

    for i, name in enumerate(datasets):
        data = datasets[name]

        ydata = []
        for j in range(len(xdata)):
            xitem = xdata[j]
            items = list(filter(lambda i: i[x] == xitem, data))
            if len(items) > 0:
                ydata.append(items[0][y])
            else:
                ydata.append(0)

        offset = i - (cols - 1) / 2
        plt.bar(ypos + offset * width,
                ydata,
                width=width,
                align='center',
                label=name)

    plt.ylabel(display.axis_case(y))
    plt.yscale(yscale)
    plt.xticks(ypos, xdata, rotation=90)
    plt.title(title)
    plt.legend(loc='upper left')
    savefig(path)
Пример #16
0
def stripPunc(word, w_type="any"):
    n_spaces = word.count(" ")
    if word.isalpha() and len(word) <= 2:
        return False
    chars = set(word)
    for p in chars.intersection(PUNC):
        if p != "/":
            word = word.replace(p, "")
        else:
            splt = p.split(-1)
            if splt in union(uniq, gqa_answers) and splt.isalpha():
                return splt
            else:
                word = word.replace(p, "")
    if all(map(lambda x: x.isdigit() or x == " ",
               word)) and w_type is not "number":
        return False
    if not word:
        return False
    if not chars.intersection(VOWELS):
        return False
    return word
Пример #17
0
        def complete_partition(t):
            if t not in partitions:
                if isinstance(t, pddl.types.CompositeType):
                    pnew = sum((complete_partition(subt) for subt in t.types),
                               [])
                    partitions[t] = pnew
                else:
                    # print "Composite"
                    # print "subtypes: ", map(str, get_direct_subypes(t))
                    pnew = sum((complete_partition(t2)
                                for t2 in get_direct_subypes(t)), [])
                    # print t, map(lambda x:map(str, x), pnew)
                    all = set(self.problem.get_all_objects(t))
                    used = union(pnew)
                    assert used <= all
                    if used < all:
                        pnew.append(list(all - used))
                    # print (pnew)
                    # tmp = []
                    # for p in pnew:
                    #    print type(p)
                    #    if len(p)>1:
                    #
                    #        for el in p:
                    #            print el
                    #            tmp.append(el)
                    #    else:
                    #        #el = p.pop()
                    #        print "else ", p
                    #        pass
                    #        #tmp.append(el)
                    # print "tmp ", (tmp), "\n"
                    # print "pnew: ",t, map(lambda x:map(str, x), pnew)
                    partitions[t] = pnew

            return partitions[t]
Пример #18
0
def run_batches(model,
                opt,
                lr_scheduler,
                loader,
                args,
                timer,
                training,
                epoch=None,
                epoch_fraction=None,
                logger=None,
                writer=None):
    if not training and epoch_fraction != 1:
        raise ValueError("Must do full epochs for val")
    if epoch_fraction > 1 or epoch_fraction <= 0:
        msg = "Invalid epoch_fraction {}.".format(epoch_fraction)
        msg += " Should satisfy 0 < epoch_fraction <= 1"
        raise ValueError(msg)

    model.train(training)
    client_download = torch.zeros(loader.dataset.num_clients)
    client_upload = torch.zeros(loader.dataset.num_clients)
    spe = steps_per_epoch(args.local_batch_size, loader.dataset,
                          args.num_workers)

    if training:
        epoch_idxs = epoch * spe
        losses = []
        for batch_idx, batch in enumerate(loader):
            if batch_idx > 2 and args.do_test and batch_idx < spe - 10:
                print("skipping ", batch_idx)
                continue
            # only carry out an epoch_fraction portion of the epoch
            if batch_idx > spe * epoch_fraction:
                break
            lr_scheduler.step()
            if lr_scheduler.get_lr() == 0:
                # hack to get the starting LR right for fedavg
                opt.step()

            if args.local_batch_size == -1:
                expected_num_clients = args.num_workers
                if torch.unique(batch[0]).numel() < expected_num_clients:
                    # skip if there weren't enough clients left
                    print("SKIPPING BATCH: NOT ENOUGH CLIENTS")
                    continue
            else:
                expected_numel = args.num_workers * args.local_batch_size
                if batch[0].numel() < expected_numel:
                    # skip incomplete batches
                    print("SKIPPING BATCH: NOT ENOUGH DATA")
                    continue

            loss, download, upload = model(batch)

            client_download += download
            client_upload += upload

            opt.step()
            loss = np.mean(loss)
            losses.append(loss)
            train_time = timer()
            download_mb = download.sum().item() / (1024 * 1024)
            upload_mb = upload.sum().item() / (1024 * 1024)
            batch_stats = {
                'train_time': train_time,
                'train_loss': loss,
                'total_time': timer.total_time,
                'down (MiB)': round(download_mb),
                'up (MiB)': round(upload_mb),
            }
            lr = lr_scheduler.get_lr()[0]

            writer.add_scalar('training/loss', loss, batch_idx + epoch_idxs)
            writer.add_scalar('Lr', lr, batch_idx + epoch_idxs)
            writer.add_scalar('Time/train', train_time, batch_idx + epoch_idxs)
            summary = union({
                'batch_idx': batch_idx + 1 + epoch_idxs,
                'lr': lr
            }, batch_stats)
            logger.append(summary)
        return np.mean(losses), client_download, client_upload

    else:
        nlls, accs, ppls = [], [], []
        for batch_idx, batch in enumerate(loader):
            if batch_idx > 5 and args.do_test and batch_idx < spe - 5:
                print("skipping ", batch_idx)
                continue
            nll, acc = model(batch)
            nll = np.mean(nll)
            acc = np.mean(acc)
            nlls.append(nll)
            accs.append(acc)
        return np.mean(nlls), np.mean(accs), np.exp(np.mean(nlls))
Пример #19
0
qid2keep = {}

#data to write
vqa_number_subset_qids = []
vqa_train_qid2ans = []
vqa_val_qid2ans = []

vqa_trashed_qids = set()
vqa_gqa_ans_overlap = set()
uniq_ans = set()

#data to load
uniq_attributes = loadTxt("phase_1/uniq_attrs")
uniq_relations = loadTxt("phase_1/uniq_rels")
uniq_objects = loadTxt("phase_1/uniq_objs")
uniq = union(uniq_attributes, uniq_relations, uniq_objects)
gqa_answers = loadTxt("phase_1/gqa_answers")

#funcs


def stripPunc(word, w_type="any"):
    n_spaces = word.count(" ")
    if word.isalpha() and len(word) <= 2:
        return False
    chars = set(word)
    for p in chars.intersection(PUNC):
        if p != "/":
            word = word.replace(p, "")
        else:
            splt = p.split(-1)
Пример #20
0
def process_batch(nets, optim, optim2, image_size, args):
    global it, mean_loss, mean_rec
    it += 1  # 迭代次数加一

    net, net_ctc = nets

    net = net.net
    net_ctc = net_ctc.net

    net.blobs['data'].reshape(args.batch_size, 1, image_size[1],
                              image_size[0])  # 把一个batch的输入图片reshape
    net.reshape()

    optim2.step(1)

    im = net.blobs['data'].data[...]  # shape [batch_size,1,416,416]
    draw = np.swapaxes(im, 2, 3)
    draw = np.swapaxes(draw, 1, 3)
    im_ctc = np.copy(draw)
    draw += 1
    draw *= 128
    draw = np.array(draw, dtype="uint8").copy()

    if args.debug:
        grid_step = 16
        line = 0
        while line < image_size[0]:
            cv2.line(draw[0], (0, line), (image_size[1], line),
                     (128, 128, 128))
            line += grid_step

    boxes = net.blobs['boxes'].data[...]  # shape (4, 1, 500, 15)

    word_gtob = net.blobs['gt_boxes'].data[...]  # shape  (4, 6, 1, 6)
    word_txt = net.blobs['gt_labels'].data[...]  # shape (4, 6, 1, 14)

    lines_gtob = net.blobs['line_boxes'].data[...]  # shape (4, 1, 1, 5)
    lines_txt = net.blobs['line_labels'].data[...]  # shape (4, 1, 1, 7)

    #nms = boxeso[:, 0, 0, 8] == 0
    #boxes = boxes[:, :, nms, :]

    boxes[:, 0, :, 0] *= image_size[0]
    boxes[:, 0, :, 1] *= image_size[1]
    normFactor = math.sqrt(image_size[1] * image_size[1] +
                           image_size[0] * image_size[0])
    boxes[:, 0, :, 2] *= normFactor
    boxes[:, 0, :, 3] *= normFactor

    sum_cost = 0
    count = 0

    labels_gt = []
    labels_det = []

    gt_to_detection = {}
    net_ctc.clear_param_diffs()

    batch_buckets = []
    dummy = {}

    matched_detections = 0
    for bid in range(im.shape[0]):  # 遍历batchsize下的每一个样本

        o_image = net.layers[0].get_image_file_name(bid)
        o_image = cv2.imread(o_image, cv2.IMREAD_GRAYSCALE)
        cx = net.layers[0].get_crop(bid, 0)
        cy = net.layers[0].get_crop(bid, 1)
        cmx = net.layers[0].get_crop(bid, 2)
        cmy = net.layers[0].get_crop(bid, 3)
        o_image = o_image[cy:cmy, cx:cmx]

        boxes_count = 0
        for i in range(0, boxes.shape[2]):
            det_word = boxes[bid, 0, i]
            if (det_word[0] == 0 and det_word[1] == 0) or det_word[5] < 0.01:
                break
            boxes_count += 1

        x = [i for i in range(boxes_count)]
        #random.shuffle(x)

        bucket_images = {}
        batch_buckets.append(bucket_images)

        word_gto = word_gtob[bid]
        word_gto_txt = word_txt[bid]
        gt_count = 0
        for gt_no in range(word_gto.shape[0]):
            gt = word_gto[gt_no, :]
            gt = gt.reshape(6)
            gtnum = 1000 * bid + gt_no

            if gt[5] == -1:
                #print("ignore gt!")
                continue

            gt_count += 1

            txt = word_gto_txt[gt_no, :]
            gtbox = ((gt[0] * image_size[0], gt[1] * image_size[1]),
                     (gt[2] * normFactor,
                      gt[3] * normFactor), gt[4] * 180 / 3.14)
            gtbox = cv2.boxPoints(gtbox)

            gtbox = np.array(gtbox, dtype="int")
            rect_gt = cv2.boundingRect(gtbox)

            if rect_gt[0] == 0 or rect_gt[
                    1] == 0 or rect_gt[0] + rect_gt[2] >= image_size[
                        0] or rect_gt[1] + rect_gt[3] >= image_size[1]:
                continue

            if gt[3] * normFactor < 3:
                if args.debug:
                    pass
                    print('too small gt!')
                continue

            rect_gt = [rect_gt[0], rect_gt[1], rect_gt[2], rect_gt[3]]
            rect_gt[2] += rect_gt[0]
            rect_gt[3] += rect_gt[1]

            for i in range(0, min(100, boxes_count)):
                if math.fabs(gt[4] - det_word[4]) > math.pi / 16:
                    continue

                det_word = boxes[bid, 0, x[i], :]

                if (det_word[0] == 0
                        and det_word[1] == 0) or det_word[5] < 0.01:
                    break

                box = ((det_word[0], det_word[1]), (det_word[2], det_word[3]),
                       det_word[4] * 180 / 3.14)
                box = cv2.boxPoints(box)

                if args.debug:
                    boxp = np.array(box, dtype="int")
                    vis.draw_box_points(draw[bid], boxp, color=(0, 255, 0))

                box = np.array(box, dtype="int")
                bbox = cv2.boundingRect(box)
                bbox = [bbox[0], bbox[1], bbox[2], bbox[3]]
                bbox[2] += bbox[0]
                bbox[3] += bbox[1]

                #rectangle intersection ...
                inter = intersect(bbox, rect_gt)
                uni = union(bbox, rect_gt)
                ratio = area(inter) / float(area(uni))

                ratio_gt = area(inter) / float(area(rect_gt))
                if ratio_gt < 0.95:
                    continue

                if ratio < 0.5:
                    continue

                if not gt_to_detection.has_key(gtnum):
                    gt_to_detection[gtnum] = [0, 0, 0]
                tupl = gt_to_detection[gtnum]
                if tupl[0] < ratio:
                    tupl[0] = ratio
                    tupl[1] = x[i]
                    tupl[2] = ratio_gt

                det_word = boxes[bid, 0, x[i], :]
                box = ([det_word[0],
                        det_word[1]], [det_word[2],
                                       det_word[3]], det_word[4] * 180 / 3.14)

                boxO = get_obox(im_ctc[bid], o_image, box)
                boxO = ((boxO[0][0], boxO[0][1]), (boxO[1][0], boxO[1][1]),
                        boxO[2])
                norm2, rot_mat = get_normalized_image(o_image, boxO)
                #norm3, rot_mat = get_normalized_image(im_ctc[bid], ([det_word[0], det_word[1]], [det_word[2] * 1.2, det_word[3] * 1.1], det_word[4] * 180 / 3.14))
                if norm2 is None:
                    continue
                #if norm3 is None:
                #  continue
                #continue
                #cv2.imshow('ts', norm2)
                #cv2.imshow('ts3', norm3)
                #cv2.waitKey(1)
                width_scale = 32.0 / norm2.shape[0]
                width = norm2.shape[1] * width_scale
                best_diff = width
                bestb = 0
                for b in range(0, len(buckets)):
                    if best_diff > abs(width * 1.3 - buckets[b]):
                        best_diff = abs(width * 1.3 - buckets[b])
                        bestb = b

                scaled = cv2.resize(norm2, (buckets[bestb], 32))
                scaled = np.asarray(scaled, dtype=np.float)
                delta = scaled.max() - scaled.min()
                scaled = (scaled) / (delta / 2)
                scaled -= scaled.mean()

                if not bucket_images.has_key(bestb):
                    bucket_images[bestb] = {}
                    bucket_images[bestb]['img'] = []
                    bucket_images[bestb]['sizes'] = []
                    bucket_images[bestb]['txt'] = []
                    bucket_images[bestb]['gt_enc'] = []
                    dummy[bestb] = 1
                else:
                    if args.debug and len(bucket_images[bestb]) > 4:
                        continue
                    elif len(bucket_images[bestb]) > 32:
                        continue

                gt_labels = []
                txt_enc = ''
                for k in range(txt.shape[1]):
                    if txt[0, k] > 0:
                        if codec_rev.has_key(txt[0, k]):
                            gt_labels.append(codec_rev[txt[0, k]])
                        else:
                            gt_labels.append(3)

                        txt_enc += unichr(txt[0, k])
                    else:
                        gt_labels.append(0)

                if scaled.ndim == 3:
                    scaled = cv2.cvtColor(scaled, cv2.COLOR_BGR2GRAY)
                if args.debug:
                    cv2.imshow('scaled', scaled)
                bucket_images[bestb]['sizes'].append(len(gt_labels))
                bucket_images[bestb]['gt_enc'].append(gt_labels)
                bucket_images[bestb]['txt'].append(txt_enc)
                bucket_images[bestb]['img'].append(scaled)
                matched_detections += 1

    #and learn OCR
    for bucket in bucket_images.keys():

        imtf = np.asarray(bucket_images[bucket]['img'], dtype=np.float)
        imtf = np.reshape(imtf,
                          (imtf.shape[0], -1, imtf.shape[1], imtf.shape[2]))
        #imtf = imtf.reshape((imtf.shape[0], imtf.shape[1], imtf.shape[2], 1))
        #imtf = np.swapaxes(imtf,1,3)

        net_ctc.blobs['data'].reshape(imtf.shape[0], imtf.shape[1],
                                      imtf.shape[2], imtf.shape[3])
        net_ctc.blobs['data'].data[...] = imtf

        labels = bucket_images[bucket]['gt_enc']
        txt = bucket_images[bucket]['txt']

        max_len = 0
        for l in range(0, len(labels)):
            max_len = max(max_len, len(labels[l]))
        for l in range(0, len(labels)):
            while len(labels[l]) < max_len:
                labels[l].append(0)

        labels = np.asarray(labels, np.float)

        net_ctc.blobs['label'].reshape(labels.shape[0], labels.shape[1])

        net_ctc.blobs['label'].data[...] = labels

        if args.debug:
            vis.vis_square(imtf[0])
            cv2.imshow('draw', draw[0])
            cv2.waitKey(5)

        #optim.step(1)
        sum_cost += net_ctc.blobs['loss'].data[...]
        if net_ctc.blobs['loss'].data[...] > 10:
            #vis.vis_square(imtf[0])
            #cv2.imshow('draw', draw[0])
            sf = net_ctc.blobs['transpose'].data[...]
            labels2 = sf.argmax(3)
            out = utils.print_seq(labels2[:, 0, :])
            print(u'{0} --- {1}'.format(out, txt[0]))
            #cv2.waitKey(5)

        count += imtf.shape[0]

    correct_cout = 0
    for i in range(len(labels_gt)):
        det_text = labels_det[i]
        gt_text = labels_gt[i]

        if it % 100 == 0:
            pass
            #print( u"{0} -- {1}".format(det_text, gt_text).encode('utf8') )
        if det_text == gt_text:
            correct_cout += 1

    count = max(count, 1)
    mean_loss = 0.99 * mean_loss + 0.01 * sum_cost / count
    mean_rec = mean_rec * 0.99 + 0.01 * correct_cout / float(
        max(1, len(labels_gt)))

    #count detection ratio

    tp = 0
    for bid in range(im.shape[0]):
        word_gto = word_gtob[bid]
        for gt_no in range(len(word_gto)):
            gt = word_gto[gt_no]
            gtnum = 1000 * bid + gt_no
            if gt_to_detection.has_key(gtnum):
                tupl = gt_to_detection[gtnum]
                if tupl[0] > 0.5:
                    tp += 1

    loc_recall = tp / float(max(1, gt_count))

    if it % 10 == 0:
        print(
            '{0} - lr:{1:.3e} ctc:{2:.4f}/{3:.4f} wr:{4:.2f}/{5:.2f}, loc:{6:.2f} {7}'
            .format(it, 0.0001, sum_cost / count, mean_loss,
                    correct_cout / float(max(1, len(labels_gt))), mean_rec,
                    loc_recall, matched_detections))

    if it % snapshot_interval == 0:
        #optim.snapshot()
        optim2.snapshot()
Пример #21
0
def process_batch(nets, optim, optim2, image_size, args):
  global it, mean_loss, mean_rec
  
  net, net_ctc = nets
  
  net = net.net
  net_ctc = net_ctc.net
  
  
  net.blobs['data'].reshape(args.batch_size,1,image_size[1],image_size[0])
  net.reshape()
      
  it += 1 
  
  optim2.step(1)
  
  im = net.blobs['data'].data[...]
  draw = np.swapaxes(im,2,3)
  draw = np.swapaxes(draw,1,3)
  im_ctc = np.copy(draw)
  draw += 1
  draw *= 128
  draw = np.array(draw, dtype="uint8").copy() 
  
  
  if args.debug:
    grid_step = 16
    line = 0
    while line < image_size[0]:
      cv2.line(draw[0], (0, line), (image_size[1], line), (128, 128, 128))
      line += grid_step
  
  boxes  =  net.blobs['boxes'].data[...]
                 
  word_gtob = net.blobs['gt_boxes'].data[...]
  word_txt = net.blobs['gt_labels'].data[...]
  
  lines_gtob = net.blobs['line_boxes'].data[...]
  lines_txt = net.blobs['line_labels'].data[...]
  
  #nms = boxeso[:, 0, 0, 8] == 0
  #boxes = boxes[:, :, nms, :]
  
  boxes[:, 0, :, 0] *= image_size[0]
  boxes[:, 0, :, 1] *= image_size[1]
  normFactor = math.sqrt(image_size[1] * image_size[1] + image_size[0] * image_size[0])
  boxes[:, 0, :, 2] *= normFactor
  boxes[:, 0, :, 3] *= normFactor
  
  sum_cost = 0
  count = 0
  
  labels_gt = []
  labels_det = []
  
  gt_to_detection = {}
  net_ctc.clear_param_diffs()
  
  
  batch_buckets = []    
  dummy = {} 
  
  matched_detections = 0
  for bid in range(im.shape[0]):
    
    o_image = net.layers[0].get_image_file_name(bid)
    o_image = cv2.imread(o_image, cv2.IMREAD_GRAYSCALE)
    cx = net.layers[0].get_crop(bid, 0)
    cy = net.layers[0].get_crop(bid, 1)
    cmx = net.layers[0].get_crop(bid, 2)
    cmy = net.layers[0].get_crop(bid, 3)
    o_image = o_image[cy:cmy, cx:cmx]
    
    boxes_count = 0
    for i in range(0, boxes.shape[2]):
      det_word = boxes[bid, 0, i]
      if (det_word[0] == 0 and det_word[1] == 0) or det_word[5] < 0.01:
          break
      boxes_count += 1
        
    x = [i for i in range(boxes_count)]
    #random.shuffle(x)
    
    bucket_images = {}
    batch_buckets.append(bucket_images)
    
    word_gto = word_gtob[bid]
    word_gto_txt = word_txt[bid]
    gt_count = 0 
    for gt_no in range(word_gto.shape[0]):
      gt = word_gto[gt_no, :]
      gt = gt.reshape(6)
      gtnum = 1000 * bid +  gt_no
      
      if gt[5] == -1:
        #print("ignore gt!")
        continue
      
      gt_count += 1
                  
      txt = word_gto_txt[gt_no, :]
      gtbox  = ((gt[0] * image_size[0], gt[1] * image_size[1]), (gt[2] * normFactor, gt[3] * normFactor), gt[4] * 180 / 3.14)
      gtbox = cv2.boxPoints(gtbox)
      
      gtbox = np.array(gtbox, dtype="int")
      rect_gt = cv2.boundingRect(gtbox)

      if rect_gt[0] == 0 or rect_gt[1] == 0 or  rect_gt[0] + rect_gt[2]  >= image_size[0] or rect_gt[1] + rect_gt[3]  >= image_size[1]:
        continue
      
      if gt[3] * normFactor <  3:
        if args.debug:
          #print('too small gt!')
          vis.draw_box_points(draw[bid], gtbox, color = (255, 255, 0))
          cv2.imshow('draw', draw[bid])
        continue
        
      if args.debug:
        vis.draw_box_points(draw[bid], gtbox, color = (0, 0, 0), thickness=2)
      
      #vis.draw_box_points(draw[bid], gtbox, color = (255, 255, 255))
      #cv2.imshow('draw', draw[bid])
      
      rect_gt = [rect_gt[0], rect_gt[1], rect_gt[2], rect_gt[3]]
      rect_gt[2] += rect_gt[0]
      rect_gt[3] += rect_gt[1]

      for i in range(0, min(100, boxes_count)):
        if math.fabs(gt[4] - det_word[4]) > math.pi / 16:
          continue
        
        det_word = boxes[bid, 0, x[i], :]
        
        if (det_word[0] == 0 and det_word[1] == 0) or det_word[5] < 0.01:
          break
        
        box  = ((det_word[0], det_word[1]), (det_word[2], det_word[3]), det_word[4] * 180 / 3.14)
        box = cv2.boxPoints(box)
        
        if args.debug:
          boxp = np.array(box, dtype="int")
          vis.draw_box_points(draw[bid], boxp, color = (0, 255, 0))
        
        box = np.array(box, dtype="int")
        bbox = cv2.boundingRect(box)
        bbox = [bbox[0], bbox[1], bbox[2], bbox[3]]
        bbox[2] += bbox[0]
        bbox[3] += bbox[1]
   
        #rectangle intersection ... 
        inter = intersect(bbox, rect_gt)
        uni = union(bbox, rect_gt)
        ratio = area(inter) / float(area(uni))
        
        ratio_gt = area(inter) / float(area(rect_gt))
        if ratio_gt < 0.95:
          continue 
        
        if ratio < 0.5:
          continue
        
        if not gt_to_detection.has_key(gtnum):
            gt_to_detection[gtnum] = [0, 0, 0]
        tupl = gt_to_detection[gtnum] 
        if tupl[0] < ratio:
          tupl[0] = ratio 
          tupl[1] = x[i]  
          tupl[2] = ratio_gt       
        
        det_word = boxes[bid, 0, x[i], :]
        box  = ([det_word[0], det_word[1]], [det_word[2], det_word[3]], det_word[4] * 180 / 3.14)
        
        boxO = get_obox(im_ctc[bid], o_image, box)
        boxO = ((boxO[0][0], boxO[0][1]), (boxO[1][0], boxO[1][1]), boxO[2])
        norm2, rot_mat = get_normalized_image(o_image, boxO)
        #norm3, rot_mat = get_normalized_image(im_ctc[bid], ([det_word[0], det_word[1]], [det_word[2] * 1.2, det_word[3] * 1.1], det_word[4] * 180 / 3.14))
        if norm2 is None:
          continue
        #if norm3 is None:
        #  continue
        #continue
        #cv2.imshow('ts', norm2)
        #cv2.imshow('ts3', norm3)
        #cv2.waitKey(1)
        width_scale = 32.0 / norm2.shape[0]
        width = norm2.shape[1] * width_scale
        best_diff = width
        bestb = 0
        for b in range(0, len(buckets)):
          if best_diff > abs(width * 1.3 - buckets[b]):
            best_diff = abs(width * 1.3 - buckets[b])
            bestb = b
        
        scaled = cv2.resize(norm2, (buckets[bestb], 32))  
        scaled = np.asarray(scaled, dtype=np.float)
        delta = scaled.max() - scaled.min()
        scaled = (scaled) / (delta / 2)
        scaled -= scaled.mean()
                
        if not bucket_images.has_key(bestb):
          bucket_images[bestb] = {}
          bucket_images[bestb]['img'] = []  
          bucket_images[bestb]['sizes'] = []    
          bucket_images[bestb]['txt'] = []
          bucket_images[bestb]['gt_enc'] = []
          dummy[bestb] = 1
        else:
          if args.debug and len(bucket_images[bestb]) > 4:
            continue    
          elif  len(bucket_images[bestb]) > 32:
            continue
        
        gt_labels = []
        txt_enc = ''
        for k in range(txt.shape[1]):
          if txt[0, k] > 0:
            if codec_rev.has_key(txt[0, k]):                
              gt_labels.append( codec_rev[txt[0, k]] )
            else:
              gt_labels.append( 3 )
                              
            txt_enc += unichr(txt[0, k])
          else:
            gt_labels.append( 0 )
        
        if scaled.ndim == 3:
          scaled = cv2.cvtColor(scaled, cv2.COLOR_BGR2GRAY)
        if args.debug:
          cv2.imshow('scaled', scaled)
        bucket_images[bestb]['sizes'].append(len(gt_labels))
        bucket_images[bestb]['gt_enc'].append(gt_labels)
        bucket_images[bestb]['txt'].append(txt_enc)
        bucket_images[bestb]['img'].append(scaled)
        matched_detections += 1   
      
  #and learn OCR
  for bucket in bucket_images.keys():
      
    imtf = np.asarray(bucket_images[bucket]['img'], dtype=np.float)
    imtf = np.reshape(imtf, (imtf.shape[0], -1, imtf.shape[1], imtf.shape[2]))    
    #imtf = imtf.reshape((imtf.shape[0], imtf.shape[1], imtf.shape[2], 1))
    #imtf = np.swapaxes(imtf,1,3)
    
    
    net_ctc.blobs['data'].reshape(imtf.shape[0],imtf.shape[1],imtf.shape[2], imtf.shape[3]) 
    net_ctc.blobs['data'].data[...] = imtf
    
    labels = bucket_images[bucket]['gt_enc']
    txt = bucket_images[bucket]['txt']
    
    max_len = 0
    for l in range(0, len(labels)):
      max_len = max(max_len, len(labels[l]))
    for l in range(0, len(labels)):
      while len(labels[l]) <  max_len:
        labels[l].append(0)
      
    
    labels = np.asarray(labels, np.float)
    
    net_ctc.blobs['label'].reshape(labels.shape[0], labels.shape[1])
    
    net_ctc.blobs['label'].data[...] = labels
    
    if args.debug:
        vis.vis_square(imtf[0])
        cv2.imshow('draw', draw[0])
        cv2.waitKey(5)
         
     
    optim.step(1)  
    sum_cost += net_ctc.blobs['loss'].data[...]
    if net_ctc.blobs['loss'].data[...] > 10:
      vis.vis_square(imtf[0])
      cv2.imshow('draw', draw[0])
      sf = net_ctc.blobs['transpose'].data[...]
      labels2 = sf.argmax(3)
      out = utils.print_seq(labels2[:, 0, :])
      print(u'{0} - {1}'.format(out, txt[0])  )
      cv2.waitKey(5)
          
          
    count += imtf.shape[0]
              
  correct_cout = 0    
  for i in range(len(labels_gt)):
    det_text = labels_det[i]
    gt_text = labels_gt[i]
    
    if it % 100 == 0:
      print( u"{0} - {1}".format(det_text, gt_text).encode('utf8') )
    if det_text == gt_text:
      correct_cout += 1
      
  count = max(count, 1)    
  mean_loss = 0.99 * mean_loss + 0.01 * sum_cost / count
  mean_rec = mean_rec * 0.99 + 0.01 * correct_cout / float(max(1, len(labels_gt)))
  
  #count detection ratio

  tp = 0
  for bid in range(im.shape[0]):
    word_gto = word_gtob[bid]
    for gt_no in range(len(word_gto)):
      gt = word_gto[gt_no]
      gtnum = 1000 * bid +  gt_no
      if gt_to_detection.has_key(gtnum):
        tupl = gt_to_detection[gtnum] 
        if tupl[0] > 0.5:
          tp += 1
          
                      
  loc_recall = tp / float(max(1, gt_count))             
  if args.debug:
    cv2.imshow('draw', draw[0])
    if im.shape[0] > 1:
        cv2.imshow('draw2', draw[1])
        
    cv2.waitKey(10)
  
  if it % 10 == 0:
    print('{0} - lr:{1:.3e} ctc:{2:.4f}/{3:.4f} wr:{4:.2f}/{5:.2f}, loc:{6:.2f} {7}'.format(it, 0.0001, sum_cost / count, mean_loss, correct_cout / float(max(1, len(labels_gt))), mean_rec, loc_recall, matched_detections))
  
  if it % 1000 == 0:
    optim.snapshot()
    optim2.snapshot()
Пример #22
0
def draw_missed_letters_tile(
        input_dir='/datagrid/personal/TextSpotter/FastTextEval/ICDAR-Train',
        color=0,
        edgeThreshold=13,
        inter=True,
        scalingFactor=1.6,
        segmList=[]):

    ft = FASTex(process_color=color, edgeThreshold=edgeThreshold)

    d = input_dir
    subdirs = [
        os.path.join(d, o) for o in os.listdir(d)
        if os.path.isdir(os.path.join(d, o))
    ]
    subdirs = np.sort(subdirs)
    lastDir = ''
    for dir_name in subdirs:
        file_name = '{0}/evaluation.npz'.format(dir_name)
        if not os.path.exists(file_name):
            continue
        vars_dict = np.load(file_name)
        inputDir = vars_dict['inputDir']
        lastDir = dir_name
        if 'letterKeypointHistogram' in vars_dict.keys():
            letterKeypointHistogram = vars_dict['letterKeypointHistogram']
            letterKeypointHistogram = dict(letterKeypointHistogram.tolist())

    print(lastDir)

    missing_letters = vars_dict['missing_letters']
    missing_letters = dict(missing_letters.tolist())

    segmDir = '{0}/segmentations'.format(inputDir)
    segmDir = '/datagrid/personal/TextSpotter/evaluation-sets/icdar2013-Test/segmentations'

    keys = []
    ticks = []
    values = []
    values.append([])
    values.append([])
    values.append([])
    values.append([])
    ticks.append([])
    ticks.append([])
    ticks.append([])
    ticks.append([])
    listlen = 0
    for letter in letterKeypointHistogram.keys():
        keys.append(letter)
        values[0].append(0)
        ticks[0].append(listlen)
        values[1].append(0)
        ticks[1].append(listlen + 0.2)
        values[2].append(0)
        ticks[2].append(listlen + 0.4)
        values[3].append(0)
        ticks[3].append(listlen + 0.6)
        for num in letterKeypointHistogram[letter].keys():
            values[num][listlen] = letterKeypointHistogram[letter][num]

        listlen += 1

    indices = sorted(range(len(values[0])), key=lambda x: values[0][x])
    indices.reverse()

    border = 15

    missLetter = []
    imagesMiss = {}
    for letter in np.asarray(keys)[np.asarray(indices)]:
        if not missing_letters.has_key(letter):
            continue
        arr = missing_letters[letter]
        for i in range(len(arr)):
            miss = arr[i]

            if len(segmList) > 0:
                base = os.path.basename(miss[0])
                if not base in segmList:
                    continue

            missLetter.append(miss)

            if imagesMiss.has_key(miss[0]):
                imagesMiss[miss[0]].append(miss[1])
            else:
                imagesMiss[miss[0]] = []
                imagesMiss[miss[0]].append(miss[1])

    rowSize = len(imagesMiss.keys())
    f, axes = plt.subplots(2, len(imagesMiss.keys()))
    plt.subplots_adjust(left=None,
                        bottom=None,
                        right=None,
                        top=None,
                        wspace=None,
                        hspace=None)

    figNo = 0

    for image in imagesMiss.keys():
        if len(imagesMiss.keys()) > 1:
            ax0 = axes[0][figNo]
            ax = axes[1][figNo]
        else:
            ax0 = axes[figNo]
            ax = axes[figNo]

        figNo += 1
        if color == 1:
            img = cv2.imread(image)
        else:
            img = cv2.imread(image, 0)

        baseName = os.path.basename(image)
        baseName = baseName[:-4]
        segmImg = '{0}/{1}_GT.bmp'.format(segmDir, baseName)
        if not os.path.exists(segmImg):
            segmImg = '{0}/gt_{1}.png'.format(segmDir, baseName)
        segmImg = cv2.imread(segmImg)

        segmentations = ft.getCharSegmentations(img)
        keypoints = ft.getLastDetectionKeypoints()

        if color == 1:
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        for i in range(len(imagesMiss[image])):
            if i == 0:
                orBox = imagesMiss[image][0]
            else:
                orBox = utils.union(orBox, imagesMiss[image][i])

        gt0 = orBox
        gt = [
            gt0[0] - border, gt0[1] - border, gt0[2] + border, gt0[3] + border
        ]
        gt[0] = max(0, gt[0])
        gt[1] = max(0, gt[1])
        gt[2] = min(img.shape[1], gt[2])
        gt[3] = min(img.shape[0], gt[3])
        zoom = img[gt[1]:gt[3], gt[0]:gt[2]]
        ax.imshow(zoom, cmap=pylab.gray(), interpolation='nearest')
        ax0.imshow(zoom, cmap=pylab.gray(), interpolation='nearest')

        centers = segmImg[keypoints[:, 1].astype(int),
                          keypoints[:, 0].astype(int)]
        keypointsInsideMask = centers == (255, 255, 255)
        keypointsInsideMask = np.invert(
            np.bitwise_and(
                np.bitwise_and(keypointsInsideMask[:, 0],
                               keypointsInsideMask[:, 1]),
                keypointsInsideMask[:, 2]))
        keypointsInside = keypoints[keypointsInsideMask, :]

        mask = (keypoints[:, 0] > gt[0]) * (keypoints[:, 0] < gt[2]) * (
            keypoints[:, 1] > gt[1]) * (keypoints[:, 1] < gt[3])

        kpMask = keypoints[mask]
        kpMask[:, 0] = kpMask[:, 0] - gt[0]
        kpMask[:, 1] = kpMask[:, 1] - gt[1]
        kpMask[:, 7] = kpMask[:, 7] - gt[0]
        kpMask[:, 8] = kpMask[:, 8] - gt[1]

        ax.plot(kpMask[:, 0], kpMask[:, 1], 'ro')
        ax.xaxis.set_ticklabels([])
        ax.yaxis.set_ticklabels([])
        ax0.xaxis.set_ticklabels([])
        ax0.yaxis.set_ticklabels([])

        for k in range(kpMask.shape[0]):
            ax.plot([kpMask[k, 0], kpMask[k, 7]], [kpMask[k, 1], kpMask[k, 8]],
                    'r-')

        style = 'rx'
        if kpMask.shape[1] > 9:
            for k in range(3):
                maski = kpMask[:, 9] == k + 1
                if k == 1:
                    style = "rv"
                if k == 2:
                    style = "rs"
                if k == 4:
                    style = "bo"
                if k == 5:
                    style = "yo"

                ax.plot([kpMask[maski, 7]], [kpMask[maski, 8]], style)

        for i in range(len(imagesMiss[image])):

            gt0 = imagesMiss[image][i]

            mask = (keypointsInside[:, 0] >
                    gt[0]) * (keypointsInside[:, 0] < gt[2]) * (
                        keypointsInside[:, 1] > gt[1]) * (keypointsInside[:, 1]
                                                          < gt[3])
            kpMask = keypointsInside[mask]
            keypointsInside[:, 0] = keypointsInside[:, 0] - gt[0]
            keypointsInside[:, 1] = keypointsInside[:, 1] - gt[1]
            keypointsInside[:, 7] = keypointsInside[:, 7] - gt[0]
            keypointsInside[:, 8] = keypointsInside[:, 8] - gt[1]

            ax.plot(keypointsInside[:, 0], keypointsInside[:, 1], 'go')
            for k in range(keypointsInside.shape[0]):
                ax.plot([keypointsInside[k, 0], keypointsInside[k, 7]],
                        [keypointsInside[k, 1], keypointsInside[k, 8]], 'g-')

            ax.set_xlim(0, gt[2] - max(0, gt[0]))
            ax.set_ylim((gt[3] - max(0, gt[1]), 0))

            line = mlines.Line2D(np.array([
                gt0[0] - gt[0], gt0[2] - gt[0], gt0[2] - gt[0], gt0[0] - gt[0],
                gt0[0] - gt[0]
            ]),
                                 np.array([
                                     gt0[1] - gt[1], gt0[1] - gt[1],
                                     gt0[3] - gt[1], gt0[3] - gt[1],
                                     gt0[1] - gt[1]
                                 ]),
                                 lw=5.,
                                 alpha=0.6,
                                 color='r')
            ax0.add_line(line)

    plt.show()
Пример #23
0
def compare_missed_segm(
        input_dir='/datagrid/personal/TextSpotter/FastTextEval/experiments/segmentation',
        input_dir2='/datagrid/personal/TextSpotter/FastTextEval/experiments/segmentationg',
        showPictures=False):

    ft = FASTex()

    (ms, dirs) = read_segm_data(input_dir)
    (ms2, dirs2) = read_segm_data(input_dir2, 'g')

    ms.extend(ms2)
    dirs.extend(dirs2)

    sumHash = {}
    for j in np.arange(0, len(ms)):
        missing_segm = ms[j]
        for image in missing_segm.keys():
            arr = missing_segm[image]
            if not sumHash.has_key(image):
                sumHash[image] = arr
                continue
            for i in range(len(arr)):
                miss_gt = arr[i]
                check = sumHash[image]
                hasGt = False
                for k in range(len(check)):
                    miss_gt2 = check[k]
                    if miss_gt == miss_gt2:
                        hasGt = True

                if not hasGt:
                    sumHash[image].append(miss_gt)

    missing_segm = ms[0]

    data = []
    dataf = []
    gt_id = 0
    columns = ['Img', 'GT Id']
    for image in sumHash.keys():
        arr = sumHash[image]
        f = None
        for i in range(len(arr)):
            orValue = False
            miss_gt = arr[i]
            row = []
            row.append(os.path.basename(image))
            row.append(gt_id)
            gt_id += 1
            rowf = []

            for j in np.arange(0, len(ms)):
                if gt_id == 1:
                    columns.append(dirs[j])
                msj = ms[j]
                hasSegmj = True
                val = 1
                if msj.has_key(image):
                    arrj = msj[image]
                    for k in range(len(arrj)):
                        miss_gtj = arrj[k]
                        if miss_gtj == miss_gt:
                            hasSegmj = False
                            val = 0
                            break

                row.append(hasSegmj)
                rowf.append(val)

                orValue = orValue or hasSegmj
            if orValue:
                rowf.append(1)

            else:
                rowf.append(0)
                if showPictures:
                    img = cv2.imread(image)
                    imgg = cv2.imread(image, cv2.IMREAD_GRAYSCALE)
                    if f == None:

                        f, axes = plt.subplots(1, 2, figsize=(16, 3))
                        f.suptitle('Missing segmentation: {0}'.format(image))
                        ax = axes[0]
                        ax.imshow(img,
                                  cmap=pylab.gray(),
                                  interpolation='nearest')
                        ax = axes[1]
                        ax.imshow(imgg,
                                  cmap=pylab.gray(),
                                  interpolation='nearest')
                        orBox = miss_gt

                        segmentations = ft.getCharSegmentations(imgg)
                        keypoints = ft.getLastDetectionKeypoints()

                        style = 'rx'
                        for k in range(5):
                            maski = keypoints[:, 9] == k + 1
                            if k == 1:
                                style = "rv"
                            if k == 2:
                                style = "ro"
                            if k == 4:
                                style = "bo"

                            ax.plot(keypoints[maski, 0], keypoints[maski, 1],
                                    style)

                        for k in range(keypoints.shape[0]):
                            ax.plot([keypoints[k, 0], keypoints[k, 7]],
                                    [keypoints[k, 1], keypoints[k, 8]], 'r-')
                        ax = axes[0]

                    else:
                        orBox = utils.union(orBox, miss_gt)

                    line = mlines.Line2D(np.array([
                        miss_gt[0], miss_gt[2], miss_gt[2], miss_gt[0],
                        miss_gt[0]
                    ]),
                                         np.array([
                                             miss_gt[1], miss_gt[1],
                                             miss_gt[3], miss_gt[3], miss_gt[1]
                                         ]),
                                         lw=5.,
                                         alpha=0.6,
                                         color='r')
                    ax.add_line(line)

            row.append(orValue)

            data.append(row)
            dataf.append(rowf)

        if f != None:
            ax = axes[0]
            ax.set_xlim(orBox[0] - 20, orBox[2] + 20)
            ax.set_ylim(orBox[3] + 20, orBox[1] - 20)
            ax = axes[1]
            ax.set_xlim(orBox[0] - 20, orBox[2] + 20)
            ax.set_ylim(orBox[3] + 20, orBox[1] - 20)
            plt.show()

    columns.append("OR")
    data = np.array(data)
    dataf = np.array(dataf)

    df = pandas.DataFrame(data=data, columns=columns)
    #print(df)
    sumCols = dataf.sum(0)
    sumCols = dataf.shape[0] - sumCols
    print("Missing Segmentations:")
    print(sumCols)

    indices = np.argsort(sumCols)

    bestFactor = indices[1]
    missing_segm = ms[bestFactor]
    print("Best factor: {0}".format(dirs[bestFactor]))
    maskBest = dataf[:, bestFactor] == 0
    datafSec = dataf[maskBest, :]
    sumCols = datafSec.sum(0)
    sumCols = datafSec.shape[0] - sumCols

    print("Missing Segmentations 2 best:")
    print(sumCols)

    indices = np.argsort(sumCols)
    bestFactor2 = indices[1]
    print("Best factor 2: {0}, missing segmentations: {1} -> {2}".format(
        dirs[bestFactor2], datafSec.shape[0], sumCols[indices[1]]))

    maskBest = datafSec[:, bestFactor2] == 0
    dataf3 = datafSec[maskBest, :]
    sumCols = dataf3.sum(0)
    sumCols = dataf3.shape[0] - sumCols

    indices = np.argsort(sumCols)
    bestFactor2 = indices[1]
    print("Best factor 3: {0}, missing segmentations: {1} -> {2}".format(
        dirs[bestFactor2], dataf3.shape[0], sumCols[indices[1]]))
Пример #24
0
def compare_missed_segm(input_dir='/datagrid/personal/TextSpotter/FastTextEval/experiments/segmentation', input_dir2='/datagrid/personal/TextSpotter/FastTextEval/experiments/segmentationg', showPictures = False):
    
    ft = FASTex()
    
    (ms, dirs) = read_segm_data(input_dir)
    (ms2, dirs2) = read_segm_data(input_dir2, 'g')
    
    ms.extend(ms2)
    dirs.extend(dirs2)
    
    sumHash = {}
    for j in np.arange(0, len(ms)):
        missing_segm = ms[j]
        for image in  missing_segm.keys():
            arr =  missing_segm[image]
            if not sumHash.has_key(image):
                sumHash[image] = arr
                continue
            for i in range(len(arr)):
                miss_gt = arr[i]
                check = sumHash[image]
                hasGt = False
                for k in range(len(check)):
                    miss_gt2 = check[k]
                    if miss_gt == miss_gt2:
                        hasGt = True 
                    
                if not hasGt:
                    sumHash[image].append(miss_gt)
                        
        
    missing_segm = ms[0]    
    
    data = []
    dataf = []
    gt_id = 0
    columns = ['Img', 'GT Id']
    for image in  sumHash.keys():
        arr =  sumHash[image]
        f = None
        for i in range(len(arr)):
            orValue = False
            miss_gt = arr[i]
            row = []
            row.append(os.path.basename(image))
            row.append(gt_id)
            gt_id += 1
            rowf = []
            
            for j in np.arange(0, len(ms)):
                if gt_id == 1:
                    columns.append(dirs[j])
                msj =  ms[j]
                hasSegmj = True
                val = 1
                if msj.has_key(image):
                    arrj =  msj[image]
                    for k in range(len(arrj)):
                        miss_gtj = arrj[k]
                        if miss_gtj == miss_gt:
                            hasSegmj = False
                            val = 0
                            break
                        
                row.append(hasSegmj)
                rowf.append(val)
                
                orValue = orValue or hasSegmj
            if orValue:
                rowf.append(1)
                    
            else:
                rowf.append(0)
                if showPictures:
                    img = cv2.imread(image)
                    imgg = cv2.imread(image, cv2.IMREAD_GRAYSCALE)
                    if f == None:
                        
                        f, axes = plt.subplots(1, 2, figsize=(16, 3))
                        f.suptitle('Missing segmentation: {0}'.format(image))
                        ax = axes[0]
                        ax.imshow(img, cmap=pylab.gray(), interpolation='nearest')
                        ax = axes[1]
                        ax.imshow(imgg, cmap=pylab.gray(), interpolation='nearest')
                        orBox = miss_gt
                        
                        segmentations = ft.getCharSegmentations(imgg)
                        keypoints = ft.getLastDetectionKeypoints()
                        
                        style = 'rx'
                        for k in range(5):
                            maski = keypoints[:, 9] == k + 1
                            if k == 1:
                                style = "rv"
                            if k == 2:
                                style = "ro"
                            if k == 4:
                                style = "bo" 
                        
                            ax.plot(keypoints[maski, 0], keypoints[maski, 1], style)
            
                        for k in range(keypoints.shape[0]):
                            ax.plot([keypoints[k,0], keypoints[k,7]], [keypoints[k,1], keypoints[k,8]], 'r-')
                        ax = axes[0]
                        
                    else:
                        orBox = utils.union(orBox, miss_gt)    
            
                    line = mlines.Line2D(np.array([miss_gt[0], miss_gt[2], miss_gt[2], miss_gt[0], miss_gt[0]]), np.array([miss_gt[1], miss_gt[1], miss_gt[3], miss_gt[3], miss_gt[1]]), lw=5., alpha=0.6, color='r')
                    ax.add_line(line)
            
                    
            row.append(orValue)
                
            data.append(row)
            dataf.append(rowf)
        
        if f != None:    
            ax = axes[0]
            ax.set_xlim(orBox[0] - 20, orBox[2] + 20)
            ax.set_ylim(orBox[3] + 20, orBox[1] - 20)
            ax = axes[1]
            ax.set_xlim(orBox[0] - 20, orBox[2] + 20)
            ax.set_ylim(orBox[3] + 20, orBox[1] - 20)
            plt.show()
                               
            
    columns.append("OR")
    data = np.array(data)
    dataf = np.array(dataf)        
            
    df = pandas.DataFrame(data = data, columns=columns)
    #print(df)
    sumCols = dataf.sum(0)
    sumCols = dataf.shape[0] - sumCols
    print("Missing Segmentations:")
    print(sumCols)
    
    indices = np.argsort(sumCols)
    
    bestFactor = indices[1]
    missing_segm = ms[bestFactor]
    print( "Best factor: {0}".format(dirs[bestFactor])  )
    maskBest = dataf[:, bestFactor] == 0
    datafSec = dataf[maskBest, :]
    sumCols = datafSec.sum(0)
    sumCols = datafSec.shape[0] - sumCols      
        
    print("Missing Segmentations 2 best:")
    print(sumCols)
    
    indices = np.argsort(sumCols)
    bestFactor2 = indices[1]
    print( "Best factor 2: {0}, missing segmentations: {1} -> {2}".format(dirs[bestFactor2], datafSec.shape[0], sumCols[indices[1]])  )
    
    maskBest = datafSec[:, bestFactor2] == 0
    dataf3 = datafSec[maskBest, :]
    sumCols = dataf3.sum(0)
    sumCols = dataf3.shape[0] - sumCols      
    
    indices = np.argsort(sumCols)
    bestFactor2 = indices[1]
    print( "Best factor 3: {0}, missing segmentations: {1} -> {2}".format(dirs[bestFactor2], dataf3.shape[0], sumCols[indices[1]])  )
Пример #25
0
    def preprocess(self, data):
        print('preprocessing data...')

        # modify date format
        data['Date'] = data['Date'].apply(lambda x : datetime.datetime.strptime(x, '%d/%m/%y').strftime('%Y-%m-%d'))

        # average out betting odds
        data['Hodds'] = np.mean(data[['B365H','BWH','GBH','IWH','LBH','SBH','WHH','SJH','VCH','BSH']],axis=1)
        data['Dodds'] = np.mean(data[['B365D','BWD','GBD','IWD','LBD','SBD','WHD','SJD','VCD','BSD']],axis=1)
        data['Aodds'] = np.mean(data[['B365A','BWA','GBA','IWA','LBA','SBA','WHA','SJA','VCA','BSA']],axis=1)

        # filter columns - meta data @ http://www.football-data.co.uk/notes.txt
        use_col = ['Date','HomeTeam','AwayTeam','FTHG','FTAG','FTR','HTHG','HTAG','HTR','Referee','HS','AS','HST','AST',
                   'HC','AC','HF','AF','HY','AY','HR','AR','Hodds','Dodds','Aodds']
        data = data[use_col]

        # accumulate histories
        # : referenced http://andrew.carterlunn.co.uk/programming/2018/02/20/beating-the-bookmakers-with-tensorflow.html
        acc_hist = {'home_wins' : [], 'home_draws' : [], 'home_losses' : [], 'home_goals' : [], 'home_oppos_goals' : [],
                    'home_shots' : [], 'home_oppos_shots' : [], 'home_shotontarget' : [], 'home_oppos_shotontarget' : [],
                    'away_wins' : [], 'away_draws' : [], 'away_losses' : [], 'away_goals' : [], 'away_oppos_goals' : [],
                    'away_shots' : [], 'away_oppos_shots' : [], 'away_shotontarget' : [], 'away_oppos_shotontarget' : [],
                    'home_oppos_wins' : [], 'home_oppos_draws' : [], 'home_oppos_losses' : [],
                    'home_fouls' : [], 'home_yellowcards' : [], 'home_redcards' : [], 'home_cornerkicks' : [],
                    'home_oppos_cornerkicks' : [], 'home_oppos_fouls' : [], 'home_oppos_yellowcards' : [], 'home_oppos_redcards' : [],
                    'away_fouls' : [], 'away_yellowcards' : [], 'away_redcards' : [], 'away_cornerkicks' : [],
                    'away_oppos_cornerkicks' : [],'away_oppos_fouls' : [], 'away_oppos_yellowcards' : [], 'away_oppos_redcards' : []}
        d = 0
        for row in data.iterrows() :
            hometeam = row[1]['HomeTeam']
            awayteam = row[1]['AwayTeam']
            date = row[1]['Date']

            # filter matches with same playing teams
            temp1 = data[conjunction(data['HomeTeam']==hometeam, data['AwayTeam']==awayteam)]
            temp2 = data[conjunction(data['HomeTeam']==awayteam, data['AwayTeam']==hometeam)]
            temp = pd.concat([temp1, temp2], axis=0)
            history = temp[temp['Date']<date].sort_values(by='Date').tail(self.config.lookback_opp_matches)
            # if opponent history is too short, continue
            if len(history) < self.config.lookback_opp_matches :
                for key in list(acc_hist.keys()) :
                    acc_hist[key].append(np.nan)
                continue

            # compute average number of goals scored against opponent in the past N matches with the opponent
            home = history[history['HomeTeam'] == hometeam]
            away = history[history['AwayTeam'] == hometeam]
            home_sum = np.sum(home[['FTHG','FTAG','HS','AS','HST','AST','HC','AC','HF','AF','HY','AY','HR','AR']])
            away_sum = np.sum(away[['FTHG','FTAG','HS','AS','HST','AST','HC','AC','HF','AF','HY','AY','HR','AR']])


            # filter recent N matches of both home and away
            home = data[union(data['HomeTeam']==hometeam, data['AwayTeam']==hometeam)]
            home = home[home['Date']<date].sort_values(by='Date').tail(self.config.lookback_matches)
            away = data[union(data['HomeTeam']==awayteam, data['AwayTeam']==awayteam)]
            away = away[away['Date']<date].sort_values(by='Date').tail(self.config.lookback_matches)

            # if match history is too short, continue
            if len(home) < self.config.lookback_matches or len(away) < self.config.lookback_matches :
                for key in list(acc_hist.keys()) :
                    acc_hist[key].append(np.nan)
                continue

            home_home_sum = np.sum(home[home['HomeTeam']==hometeam][['FTHG','HS','HST','HC','HF','HY','HR']])
            home_away_sum = np.sum(home[home['AwayTeam']==hometeam][['FTAG','AS','AST','AC','AF','AY','AR']])
            away_home_sum = np.sum(away[away['HomeTeam']==awayteam][['FTHG','HS','HST','HC','HF','HY','HR']])
            away_away_sum = np.sum(away[away['AwayTeam']==awayteam][['FTAG','AS','AST','AC','AF','AY','AR']])

            # append computation results to dictionary
            acc_hist['home_oppos_goals'].append((home_sum['FTHG'] + away_sum['FTAG']) / self.config.lookback_opp_matches)
            acc_hist['away_oppos_goals'].append((home_sum['FTAG'] + away_sum['FTHG']) / self.config.lookback_opp_matches)
            acc_hist['home_oppos_shots'].append((home_sum['HS'] + away_sum['AS']) / self.config.lookback_opp_matches)
            acc_hist['away_oppos_shots'].append((home_sum['AS'] + away_sum['HS']) / self.config.lookback_opp_matches)
            acc_hist['home_oppos_shotontarget'].append((home_sum['HST'] + away_sum['AST']) / self.config.lookback_opp_matches)
            acc_hist['away_oppos_shotontarget'].append((home_sum['AST'] + away_sum['HST']) / self.config.lookback_opp_matches)
            acc_hist['home_oppos_cornerkicks'].append((home_sum['HC'] + away_sum['AC']) / self.config.lookback_opp_matches)
            acc_hist['away_oppos_cornerkicks'].append((home_sum['AC'] + away_sum['HC']) / self.config.lookback_opp_matches)
            acc_hist['home_oppos_fouls'].append((home_sum['HF'] + away_sum['AF']) / self.config.lookback_opp_matches)
            acc_hist['away_oppos_fouls'].append((home_sum['AF'] + away_sum['HF']) / self.config.lookback_opp_matches)
            acc_hist['home_oppos_yellowcards'].append((home_sum['HY'] + away_sum['AY']) / self.config.lookback_opp_matches)
            acc_hist['away_oppos_yellowcards'].append((home_sum['AY'] + away_sum['HY']) / self.config.lookback_opp_matches)
            acc_hist['home_oppos_redcards'].append((home_sum['HR'] + away_sum['AR']) / self.config.lookback_opp_matches)
            acc_hist['away_oppos_redcards'].append((home_sum['AR'] + away_sum['HR']) / self.config.lookback_opp_matches)

            acc_hist['home_goals'].append((home_home_sum['FTHG'] + home_away_sum['FTAG']) / self.config.lookback_matches)
            acc_hist['away_goals'].append((away_home_sum['FTHG'] + away_away_sum['FTAG']) / self.config.lookback_matches)
            acc_hist['home_shots'].append((home_home_sum['HS'] + home_away_sum['AS']) / self.config.lookback_matches)
            acc_hist['away_shots'].append((away_home_sum['HS'] + away_away_sum['AS']) / self.config.lookback_matches)
            acc_hist['home_shotontarget'].append((home_home_sum['HST'] + home_away_sum['AST']) / self.config.lookback_matches)
            acc_hist['away_shotontarget'].append((away_home_sum['HST'] + away_away_sum['AST']) / self.config.lookback_matches)
            acc_hist['home_cornerkicks'].append((home_home_sum['HC'] + home_away_sum['AC']) / self.config.lookback_matches)
            acc_hist['away_cornerkicks'].append((away_home_sum['HC'] + away_away_sum['AC']) / self.config.lookback_matches)
            acc_hist['home_fouls'].append((home_home_sum['HF'] + home_away_sum['AF']) / self.config.lookback_matches)
            acc_hist['away_fouls'].append((away_home_sum['HF'] + away_away_sum['AF']) / self.config.lookback_matches)
            acc_hist['home_yellowcards'].append((home_home_sum['HY'] + home_away_sum['AY']) / self.config.lookback_matches)
            acc_hist['away_yellowcards'].append((away_home_sum['HY'] + away_away_sum['AY']) / self.config.lookback_matches)
            acc_hist['home_redcards'].append((home_home_sum['HR'] + home_away_sum['AR']) / self.config.lookback_matches)
            acc_hist['away_redcards'].append((away_home_sum['HR'] + away_away_sum['AR']) / self.config.lookback_matches)


            # count ratio of wins / draws / losses in the past N matches of Home vs Away
            res = []
            for r in history.iterrows() :
                if r[1]['HomeTeam'] == hometeam :
                    res.append(r[1]['FTR'])
                else :
                    if r[1]['FTR'] == 'A' :
                        res.append('H')
                    elif r[1]['FTR'] == 'H' :
                        res.append('A')
                    else :
                        res.append('D')
            acc_hist['home_oppos_wins'].append(res.count('H') / self.config.lookback_opp_matches)
            acc_hist['home_oppos_draws'].append(res.count('D') / self.config.lookback_opp_matches)
            acc_hist['home_oppos_losses'].append(res.count('A') / self.config.lookback_opp_matches)


            # count ratio of wins / draws / losses in the past N matches
            res = []
            for r in home.iterrows() :
                if r[1]['HomeTeam'] == hometeam :
                    res.append(r[1]['FTR'])
                else :
                    if r[1]['FTR'] == 'A' :
                        res.append('H')
                    elif r[1]['FTR'] == 'H' :
                        res.append('A')
                    else :
                        res.append('D')
            acc_hist['home_wins'].append(res.count('H') / self.config.lookback_matches)
            acc_hist['home_draws'].append(res.count('D') / self.config.lookback_matches)
            acc_hist['home_losses'].append(res.count('A') / self.config.lookback_matches)

            res = []
            for r in away.iterrows() :
                if r[1]['HomeTeam'] == awayteam :
                    res.append(r[1]['FTR'])
                else :
                    if r[1]['FTR'] == 'A' :
                        res.append('H')
                    elif r[1]['FTR'] == 'H' :
                        res.append('A')
                    else :
                        res.append('D')
            acc_hist['away_wins'].append(res.count('H') / self.config.lookback_matches)
            acc_hist['away_draws'].append(res.count('D') / self.config.lookback_matches)
            acc_hist['away_losses'].append(res.count('A') / self.config.lookback_matches)

        acc_hist = pd.DataFrame(acc_hist)
        data = pd.concat([data, acc_hist], axis=1)
        data = data.dropna()

        return data
Пример #26
0
def evaluate_image(batch,
                   detections,
                   word_gto,
                   iou_th=0.3,
                   iou_th_vis=0.5,
                   iou_th_eval=0.4):
    '''
  Summary : Returns end-to-end true-positives, detection true-positives, number of GT to be considered for eval (len > 2).
  Description : For each predicted bounding-box, comparision is made with each GT entry. Values of number of end-to-end true
                positives, number of detection true positives, number of GT entries to be considered for evaluation are computed.
  
  Parameters
  ----------
  iou_th_eval : float
      Threshold value of intersection-over-union used for evaluation of predicted bounding-boxes
  iou_th_vis : float
      Threshold value of intersection-over-union used for visualization when transciption is true but IoU is lesser.
  iou_th : float
      Threshold value of intersection-over-union between GT and prediction.
  word_gto : list of lists
      List of ground-truth bounding boxes along with transcription.
  batch : list of lists
      List containing data (input image, image file name, ground truth).
  detections : tuple of tuples
      Tuple of predicted bounding boxes along with transcriptions and text/no-text score.
  
  Returns
  -------
  tp : int
      Number of predicted bounding-boxes having IoU with GT greater than iou_th_eval.
  tp_e2e : int
      Number of predicted bounding-boxes having same transciption as GT and len > 2.
  gt_e2e : int
      Number of GT entries for which transcription len > 2.
  '''

    gt_to_detection = {}
    tp = 0
    tp_e2e = 0
    gt_e2e = 0

    draw = batch[4][0]
    normFactor = math.sqrt(
        draw.shape[1] * draw.shape[1] +
        draw.shape[0] * draw.shape[0])  # Normalization factor
    for i in range(0, len(detections)):

        det = detections[i]
        boxr = det[0]
        box = cv2.boxPoints(boxr)  # Predicted bounding-box parameters
        box = np.array(
            box, dtype="int")  # Convert predicted bounding-box to numpy array
        bbox = cv2.boundingRect(box)

        bbox = [bbox[0], bbox[1], bbox[2], bbox[3]]
        bbox[2] += bbox[0]  # Convert width to right-coordinate
        bbox[3] += bbox[1]  # Convert height to bottom-coordinate

        vis.draw_box_points(draw, box, color=(255, 0, 0))

        det_text = det[1][0]  # Predicted transcription for bounding-box
        #print(det_text)

        for gt_no in range(len(word_gto)):

            gt = word_gto[gt_no]
            txt = gt[5]  # GT transcription for given GT bounding-box
            gtbox = ((gt[0] * draw.shape[1], gt[1] * draw.shape[0]),
                     (gt[2] * normFactor, gt[3] * normFactor),
                     gt[4] * 180 / 3.14)  # Re-scaling GT values
            gtbox = cv2.boxPoints(gtbox)
            gtbox = np.array(gtbox, dtype="int")
            rect_gt = cv2.boundingRect(gtbox)

            rect_gt = [rect_gt[0], rect_gt[1], rect_gt[2], rect_gt[3]]
            rect_gt[2] += rect_gt[0]  # Convert GT width to right-coordinate
            rect_gt[3] += rect_gt[1]  # Convert GT height to bottom-coordinate

            inter = intersect(
                bbox,
                rect_gt)  # Intersection of predicted and GT bounding-boxes
            uni = union(bbox,
                        rect_gt)  # Union of predicted and GT bounding-boxes
            ratio = area(inter) / float(area(
                uni))  # IoU measure between predicted and GT bounding-boxes

            # 1). Visualize the predicted-bounding box if IoU with GT is higher than IoU threshold (iou_th) (Always required)
            # 2). Visualize the predicted-bounding box if transcription matches the GT and condition 1. holds
            # 3). Visualize the predicted-bounding box if transcription matches and IoU with GT is less than iou_th_vis and 1. and 2. hold
            if ratio > iou_th:
                vis.draw_box_points(draw, box, color=(0, 128, 0))
                if not gt_to_detection.has_key(gt_no):
                    gt_to_detection[gt_no] = [0, 0]

                if txt.lower() == det_text.lower():
                    to_cls_x.append(
                        [len(det_text), det[1][1], det[1][2], det[1][3]])
                    to_cls_y.append(1)
                    vis.draw_box_points(draw,
                                        box,
                                        color=(0, 255, 0),
                                        thickness=2)
                    gt[7] = 1  # Change this parameter to 1 when predicted transcription is correct.

                    if ratio < iou_th_vis:
                        vis.draw_box_points(draw,
                                            box,
                                            color=(255, 255, 255),
                                            thickness=2)
                        cv2.imshow('draw', draw)
                        #cv2.waitKey(0)

                else:
                    to_cls_x.append(
                        [len(det_text), det[1][1], det[1][2], det[1][3]])
                    to_cls_y.append(0)

                tupl = gt_to_detection[gt_no]
                if tupl[0] < ratio:
                    tupl[0] = ratio
                    tupl[1] = i

    # Count the number of end-to-end and detection true-positives
    for gt_no in range(len(word_gto)):
        gt = word_gto[gt_no]
        txt = gt[5]
        if len(txt) > 2:
            gt_e2e += 1
            if gt[7] == 1:
                tp_e2e += 1

        if gt_to_detection.has_key(gt_no):
            tupl = gt_to_detection[gt_no]
            if tupl[0] > iou_th_eval:  # Increment detection true-positive, if IoU is greater than iou_th_eval
                tp += 1

    cv2.imshow('draw', draw)
    return tp, tp_e2e, gt_e2e
Пример #27
0
def draw_missed_letters_tile(input_dir='/datagrid/personal/TextSpotter/FastTextEval/ICDAR-Train', color = 0, edgeThreshold = 13, inter = True, scalingFactor=1.6, segmList=[]):
    
    ft = FASTex(process_color = color, edgeThreshold = edgeThreshold)
    
    d=input_dir
    subdirs = [os.path.join(d,o) for o in os.listdir(d) if os.path.isdir(os.path.join(d,o))]
    subdirs = np.sort(subdirs)
    lastDir = ''
    for dir_name in subdirs:
        file_name = '{0}/evaluation.npz'.format(dir_name)
        if not os.path.exists(file_name):
            continue
        vars_dict = np.load(file_name) 
        inputDir = vars_dict['inputDir']
        lastDir = dir_name
        if 'letterKeypointHistogram' in vars_dict.keys():
            letterKeypointHistogram = vars_dict['letterKeypointHistogram']
            letterKeypointHistogram = dict(letterKeypointHistogram.tolist())
        
    print(lastDir)
    
    missing_letters = vars_dict['missing_letters']
    missing_letters = dict(missing_letters.tolist())
    
    segmDir = '{0}/segmentations'.format(inputDir)
    segmDir = '/datagrid/personal/TextSpotter/evaluation-sets/icdar2013-Test/segmentations'
    
    keys = []
    ticks = []
    values = []
    values.append([])
    values.append([])
    values.append([])
    values.append([])
    ticks.append([])
    ticks.append([])
    ticks.append([])
    ticks.append([])
    listlen = 0
    for letter in letterKeypointHistogram.keys():
        keys.append(letter)
        values[0].append(0)
        ticks[0].append(listlen)
        values[1].append(0)
        ticks[1].append(listlen + 0.2)
        values[2].append(0)
        ticks[2].append(listlen + 0.4)
        values[3].append(0)
        ticks[3].append(listlen + 0.6)
        for num in letterKeypointHistogram[letter].keys():
            values[num][listlen] = letterKeypointHistogram[letter][num]
            
        listlen += 1
        
    indices = sorted(range(len(values[0])),key=lambda x:values[0][x])
    indices.reverse()
    
    border = 15
    
    missLetter = []
    imagesMiss = {}
    for letter in  np.asarray(keys)[np.asarray(indices)]:
        if not missing_letters.has_key(letter):
            continue
        arr =  missing_letters[letter]
        for i in range(len(arr)):
            miss = arr[i]
            
            if len(segmList) > 0:
                base = os.path.basename(miss[0])
                if not base in segmList:
                    continue
            
            missLetter.append(miss) 
            
            if imagesMiss.has_key(miss[0]):
                imagesMiss[miss[0]].append( miss[1] )
            else:
                imagesMiss[miss[0]] = []
                imagesMiss[miss[0]].append( miss[1] )
    
    rowSize = len(imagesMiss.keys())    
    f, axes = plt.subplots(2, len(imagesMiss.keys()))
    plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)
    
    figNo = 0
    
    
    for image in imagesMiss.keys():
        if len(imagesMiss.keys()) > 1:
            ax0 = axes[0][figNo]
            ax = axes[1][figNo]
        else:
            ax0 = axes[figNo]
            ax = axes[figNo]
            
        figNo += 1
        if color == 1:
            img = cv2.imread(image)
        else:
            img = cv2.imread(image, 0)
        
        baseName = os.path.basename(image)
        baseName = baseName[:-4]
        segmImg = '{0}/{1}_GT.bmp'.format(segmDir, baseName)
        if not os.path.exists(segmImg):
            segmImg = '{0}/gt_{1}.png'.format(segmDir, baseName)
        segmImg = cv2.imread(segmImg)
            
        segmentations = ft.getCharSegmentations(img)
        keypoints = ft.getLastDetectionKeypoints()
        
        if color == 1:
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            
        for i in range(len(imagesMiss[image])):
            if i == 0:
                orBox = imagesMiss[image][0]
            else:
                orBox = utils.union(orBox, imagesMiss[image][i])
            
        gt0 = orBox
        gt = [gt0[0] - border, gt0[1] - border, gt0[2] + border, gt0[3] + border ]
        gt[0] = max(0, gt[0])
        gt[1] = max(0, gt[1])
        gt[2] = min(img.shape[1], gt[2])
        gt[3] = min(img.shape[0], gt[3])
        zoom = img[gt[1]:gt[3], gt[0]:gt[2]]
        ax.imshow(zoom, cmap=pylab.gray(), interpolation='nearest')
        ax0.imshow(zoom, cmap=pylab.gray(), interpolation='nearest')
        
        centers = segmImg[keypoints[:, 1].astype(int), keypoints[:, 0].astype(int)]
        keypointsInsideMask = centers == (255, 255, 255)
        keypointsInsideMask = np.invert(np.bitwise_and(np.bitwise_and(keypointsInsideMask[:, 0], keypointsInsideMask[:, 1]), keypointsInsideMask[:, 2]))
        keypointsInside = keypoints[keypointsInsideMask, :]
        
        mask = (keypoints[:, 0] > gt[0]) * (keypoints[:, 0] < gt[2]) * (keypoints[:, 1] > gt[1]) * (keypoints[:, 1] <  gt[3])        
        
        kpMask = keypoints[mask]
        kpMask[:, 0] = kpMask[:, 0] - gt[0]
        kpMask[:, 1] = kpMask[:, 1] - gt[1]
        kpMask[:, 7] = kpMask[:, 7] - gt[0]
        kpMask[:, 8] = kpMask[:, 8] - gt[1]
        
        ax.plot(kpMask[:, 0], kpMask[:, 1], 'ro')
        ax.xaxis.set_ticklabels([])
        ax.yaxis.set_ticklabels([])
        ax0.xaxis.set_ticklabels([])
        ax0.yaxis.set_ticklabels([])
        
        for k in range(kpMask.shape[0]):
            ax.plot([kpMask[k,0], kpMask[k,7]], [kpMask[k,1], kpMask[k,8]], 'r-')
        
        style = 'rx'
        if kpMask.shape[1] > 9:
            for k in range(3):
                maski = kpMask[:, 9] == k + 1
                if k == 1:
                    style = "rv"
                if k == 2:
                    style = "rs"
                if k == 4:
                    style = "bo"
                if k == 5:
                    style = "yo"
                
                ax.plot([kpMask[maski,7]], [kpMask[maski,8]], style)
        
        
        
        for i in range(len(imagesMiss[image])):
            
            gt0 = imagesMiss[image][i]
                        
            mask = (keypointsInside[:, 0] > gt[0]) * (keypointsInside[:, 0] < gt[2]) * (keypointsInside[:, 1] > gt[1]) * (keypointsInside[:, 1] <  gt[3])
            kpMask = keypointsInside[mask]
            keypointsInside[:, 0] = keypointsInside[:, 0] - gt[0]
            keypointsInside[:, 1] = keypointsInside[:, 1] - gt[1]
            keypointsInside[:, 7] = keypointsInside[:, 7] - gt[0]
            keypointsInside[:, 8] = keypointsInside[:, 8] - gt[1]
            
            ax.plot(keypointsInside[:, 0], keypointsInside[:, 1], 'go')
            for k in range(keypointsInside.shape[0]):
                ax.plot([keypointsInside[k,0], keypointsInside[k,7]], [keypointsInside[k,1], keypointsInside[k,8]], 'g-')
                
            
            ax.set_xlim(0, gt[2] - max(0, gt[0]))
            ax.set_ylim((gt[3] - max(0, gt[1]), 0))
            
            line = mlines.Line2D(np.array([gt0[0] - gt[0], gt0[2] - gt[0], gt0[2] - gt[0], gt0[0] - gt[0], gt0[0] - gt[0]]), np.array([gt0[1] - gt[1], gt0[1] - gt[1], gt0[3] - gt[1], gt0[3] - gt[1], gt0[1] - gt[1]]), lw=5., alpha=0.6, color='r')
            ax0.add_line(line)
            
    plt.show()    
Пример #28
0
            try:
                graph.add_edge((names[i], names[j], dist))

            except ValueError:
                graph.update_edge(names[i], names[j], dist)

            print(names[i], names[j])
    graph.create_adjacency_matrix()

    return graph


imdb, afi, rt, mc, hw = scrape_movies.get_movie_lists()
movie_lists = [imdb, afi, rt, mc, hw]

union = utils.union(movie_lists)
intersection = utils.intersect(movie_lists)

ranks = {}
counts = {}
for movie in union:
    rank, count = average_rank(movie, movie_lists)
    ranks[movie], counts[movie] = rank, count

top_movies = sorted(ranks, key=ranks.get)
with open("movie_lists/aggregate_top_100.txt", "w") as file:
    for rank, movie in enumerate(top_movies[:100]):
        file.write(str(rank + 1) + ". " + movie + "\n")

union = sorted(union)
imdb_ranks = np.zeros((len(union), 1))
Пример #29
0
def run_words(inputDir, outputDir, invert=False):

    if not os.path.exists(outputDir):
        os.mkdir(outputDir)

    #images = glob.glob('{0}/*.png'.format('/datagrid/personal/TextSpotter/evaluation-sets/MS-text_database'))
    #images = glob.glob('{0}/*.jpg'.format('/datagrid/personal/TextSpotter/evaluation-sets/neocr_dataset'))
    images = glob.glob('{0}/*.jpg'.format(inputDir))
    images.extend(glob.glob('{0}/*.JPG'.format(inputDir)))
    images.extend(glob.glob('{0}/*.png'.format(inputDir)))

    matched_words = 0
    word_count = 0

    for image in sorted(images):
        print('Processing {0}'.format(image))

        img = cv2.imread(image, 0)
        imgc = cv2.imread(image)
        imgproc = img

        imgKp = np.copy(img)
        imgKp.fill(0)

        baseName = os.path.basename(image)
        baseName = baseName[:-4]
        workPoint = 0.3
        segmentations = ftext.getCharSegmentations(
            imgproc)  #, outputDir, baseName)
        segmentations = segmentations[:, 0:10]
        segmentations = np.column_stack([
            segmentations,
            np.zeros((segmentations.shape[0], 2), dtype=np.float)
        ])
        maskDuplicates = segmentations[:, 8] == -1
        segmentationsDuplicates = segmentations[maskDuplicates, :]
        maskNoNei = segmentationsDuplicates[:, 9] > workPoint
        keypoints = ftext.getLastDetectionKeypoints()
        imgKp[keypoints[:, 1].astype(int), keypoints[:, 0].astype(int)] = 255
        scales = ftext.getImageScales()
        statc = ftext.getDetectionStat()
        words = ftext.findTextLines()
        segmentations[:, 2] += segmentations[:, 0]
        segmentations[:, 3] += segmentations[:, 1]

        lineGt = '{0}/gt_{1}.txt'.format(inputDir, baseName)
        if not os.path.exists(lineGt):
            lineGt = '{0}/{1}.txt'.format(inputDir, baseName)

        lineGt = '{0}/gt_{1}.txt'.format(inputDir, baseName)
        if os.path.exists(lineGt):
            try:
                word_gt = utls.read_icdar2013_txt_gt(lineGt)
            except ValueError:
                try:
                    word_gt = utls.read_icdar2013_txt_gt(lineGt, separator=',')
                except ValueError:
                    word_gt = utls.read_icdar2015_txt_gt(lineGt, separator=',')
        else:
            lineGt = '{0}/{1}.txt'.format(inputDir, baseName)
            word_gt = utls.read_mrrc_txt_gt(lineGt, separator=',')

        cw = 0
        for detId in range(segmentations.shape[0]):
            best_match = 0

            for gt_box in word_gt:
                if len(gt_box[4]) == 1:
                    continue
                if gt_box[4][0] == "#":
                    continue
                cw += 1

                rectn = segmentations[detId, :]
                rect_int = utils.intersect(rectn, gt_box)
                int_area = utils.area(rect_int)
                union_area = utils.area(utils.union(rectn, gt_box))

                ratio = int_area / float(union_area)
                rectn[11] = max(ratio, rectn[11])
                if ratio > best_match:
                    best_match = ratio
                if ratio > 0.7:

                    #print( "Word Match!" )
                    #cv2.rectangle(imgc, (rectn[0], rectn[1]), (rectn[2], rectn[3]), (0, 255, 0))
                    #cv2.imshow("ts", imgc)
                    #cv2.waitKey(0)
                    ftext.acummulateCharFeatures(2, detId)
                    if gt_box[5] != -1:
                        matched_words += 1
                    gt_box[5] = -1

            if best_match == 0:
                ftext.acummulateCharFeatures(0, detId)

        word_count += cw
        print("word recall: {0}".format(matched_words / float(word_count)))
Пример #30
0
def run_evaluation(inputDir, outputDir, invert=False, isFp=False):

    if not os.path.exists(outputDir):
        os.mkdir(outputDir)

    images = glob.glob('{0}/*.jpg'.format(inputDir))
    images.extend(glob.glob('{0}/*.JPG'.format(inputDir)))
    images.extend(glob.glob('{0}/*.png'.format(inputDir)))
    segmDir = '{0}/segmentations'.format(inputDir)

    for image in images:
        print('Processing {0}'.format(image))

        img = cv2.imread(image, 0)
        imgc = cv2.imread(image)
        imgproc = img

        imgKp = np.copy(img)
        imgKp.fill(0)

        baseName = os.path.basename(image)
        baseName = baseName[:-4]
        workPoint = 0.3
        segmentations = ftext.getCharSegmentations(
            imgproc)  #, outputDir, baseName)
        segmentations = segmentations[:, 0:10]
        segmentations = np.column_stack([
            segmentations,
            np.zeros((segmentations.shape[0], 2), dtype=np.float)
        ])
        maskDuplicates = segmentations[:, 8] == -1
        segmentationsDuplicates = segmentations[maskDuplicates, :]
        maskNoNei = segmentationsDuplicates[:, 9] > workPoint
        segmentationsNoNei = segmentationsDuplicates[maskNoNei, :]
        keypoints = ftext.getLastDetectionKeypoints()
        imgKp[keypoints[:, 1].astype(int), keypoints[:, 0].astype(int)] = 255
        scales = ftext.getImageScales()
        statc = ftext.getDetectionStat()
        words = ftext.findTextLines()
        segmLine = segmentations[segmentations[:, 7] == 1.0, :]
        segmentations[:, 2] += segmentations[:, 0]
        segmentations[:, 3] += segmentations[:, 1]

        if isFp:
            for detId in range(0, segmentations.shape[0]):
                ftext.acummulateCharFeatures(0, detId)

            continue

        lineGt = '{0}/gt_{1}.txt'.format(inputDir, baseName)
        if not os.path.exists(lineGt):
            lineGt = '{0}/{1}.txt'.format(inputDir, baseName)

        lineGt = '{0}/gt_{1}.txt'.format(inputDir, baseName)
        if os.path.exists(lineGt):
            try:
                word_gt = utls.read_icdar2013_txt_gt(lineGt)
            except ValueError:
                try:
                    word_gt = utls.read_icdar2013_txt_gt(lineGt, separator=',')
                except ValueError:
                    word_gt = utls.read_icdar2015_txt_gt(lineGt, separator=',')
        else:
            lineGt = '{0}/{1}.txt'.format(inputDir, baseName)
            word_gt = utls.read_mrrc_txt_gt(lineGt, separator=',')

        rWcurrent = 0.0
        for gt_box in word_gt:
            if len(gt_box[4]) == 1:
                continue
            best_match = 0
            cv2.rectangle(imgc, (gt_box[0], gt_box[1]), (gt_box[2], gt_box[3]),
                          (0, 255, 0))
            for det_word in words:
                rect_int = utils.intersect(det_word, gt_box)
                int_area = utils.area(rect_int)
                union_area = utils.area(utils.union(det_word, gt_box))

                if union_area == 0:
                    continue

                ratio = int_area / float(union_area)
                det_word[11] = max(det_word[11], ratio)

                if ratio > best_match:
                    best_match = ratio
            rWcurrent += best_match

            best_match = 0
            for detId in range(segmentations.shape[0]):
                rectn = segmentations[detId, :]
                rect_int = utils.intersect(rectn, gt_box)
                int_area = utils.area(rect_int)
                union_area = utils.area(utils.union(rectn, gt_box))

                ratio = int_area / float(union_area)
                rectn[11] = max(ratio, rectn[11])
                if ratio > best_match:
                    best_match = ratio
                if ratio > 0.7:

                    #print( "Word Match!" )
                    #tmp = ftext.getSegmentationMask(detId)
                    #cv2.imshow("ts", tmp)
                    #cv2.waitKey(0)

                    ftext.acummulateCharFeatures(2, detId)

        segmImg = '{0}/{1}_GT.bmp'.format(segmDir, baseName)
        if not os.path.exists(segmImg):
            segmImg = '{0}/gt_{1}.png'.format(segmDir, baseName)
        if not os.path.exists(segmImg):
            segmImg = '{0}/{1}.png'.format(segmDir, baseName)
        segmImg = cv2.imread(segmImg, 0)
        if invert and segmImg is not None:
            segmImg = ~segmImg

        gt_rects = []
        miss_rects = []
        segmGt = '{0}/{1}_GT.txt'.format(segmDir, baseName)
        if os.path.exists(segmGt) and False:
            (gt_rects, groups) = utls.read_icdar2013_segm_gt(segmGt)
            segmImg = '{0}/{1}_GT.bmp'.format(segmDir, baseName)
            if not os.path.exists(segmImg):
                segmImg = '{0}/gt_{1}.png'.format(segmDir, baseName)
            segmImg = cv2.imread(segmImg)
        else:
            contours = cv2.findContours(np.copy(segmImg),
                                        mode=cv2.RETR_EXTERNAL,
                                        method=cv2.CHAIN_APPROX_SIMPLE)[1]
            for cont in contours:
                rect = cv2.boundingRect(cont)
                rect = [
                    rect[0], rect[1], rect[0] + rect[2], rect[1] + rect[3],
                    '?', 0, 0
                ]
                gt_rects.append(rect)

        for detId in range(segmentations.shape[0]):
            rectn = segmentations[detId, :]

            for k in range(len(gt_rects)):
                gt_rect = gt_rects[k]
                best_match = 0
                best_match_line = 0
                if (gt_rect[4] == ',' or gt_rect[4] == '.'
                        or gt_rect[4] == '\'' or gt_rect[4] == ':'
                        or gt_rect[4] == '-') and not evalPunctuation:
                    continue

                minSingleOverlap = MIN_SEGM_OVRLAP
                if gt_rect[4] == 'i' or gt_rect[4] == '!':
                    minSingleOverlap = 0.5

                rect_int = utils.intersect(rectn, gt_rect)
                int_area = utils.area(rect_int)
                union_area = utils.area(utils.union(rectn, gt_rect))
                ratio = int_area / float(union_area)
                rectn[10] = max(ratio, rectn[10])

                if rectn[9] > workPoint:
                    gt_rect[6] = max(ratio, gt_rect[6])

                if ratio > best_match:
                    best_match = ratio

                if ratio > best_match_line and rectn[7] == 1.0:
                    best_match_line = ratio
                if ratio > minSingleOverlap:
                    ftext.acummulateCharFeatures(1, detId)

                if ratio < minSingleOverlap:
                    if k < len(gt_rects) - 1:
                        gt_rect2 = gt_rects[k + 1]
                        chars2Rect = utils.union(gt_rect2, gt_rect)
                        rect_int = utils.intersect(rectn, chars2Rect)
                        int_area = utils.area(rect_int)
                        union_area = utils.area(utils.union(rectn, chars2Rect))
                        ratio = int_area / float(union_area)
                        rectn[10] = max(ratio, rectn[10])

                        if ratio > 0.8:
                            best_match2 = ratio
                            gt_rect[5] = ratio
                            gt_rect2[5] = ratio
                            ftext.acummulateCharFeatures(2, detId)

                thickness = 1
                color = (255, 0, 255)
                if best_match >= minSingleOverlap:
                    color = (0, 255, 0)
                if best_match > 0.7:
                    thickness = 2
                cv2.rectangle(imgc, (gt_rect[0], gt_rect[1]),
                              (gt_rect[2], gt_rect[3]), color, thickness)

            if rectn[10] == 0 and rectn[11] == 0:
                ftext.acummulateCharFeatures(0, detId)
        '''
Пример #31
0
def evaluate_image(batch, detections, word_gto, iou_th=0.3, iou_th_vis=0.5, iou_th_eval=0.4):
    
  '''
  Summary : Returns end-to-end true-positives, detection true-positives, number of GT to be considered for eval (len > 2).
  Description : For each predicted bounding-box, comparision is made with each GT entry. Values of number of end-to-end true
                positives, number of detection true positives, number of GT entries to be considered for evaluation are computed.
  
  Parameters
  ----------
  iou_th_eval : float
      Threshold value of intersection-over-union used for evaluation of predicted bounding-boxes
  iou_th_vis : float
      Threshold value of intersection-over-union used for visualization when transciption is true but IoU is lesser.
  iou_th : float
      Threshold value of intersection-over-union between GT and prediction.
  word_gto : list of lists
      List of ground-truth bounding boxes along with transcription.
  batch : list of lists
      List containing data (input image, image file name, ground truth).
  detections : tuple of tuples
      Tuple of predicted bounding boxes along with transcriptions and text/no-text score.
  
  Returns
  -------
  tp : int
      Number of predicted bounding-boxes having IoU with GT greater than iou_th_eval.
  tp_e2e : int
      Number of predicted bounding-boxes having same transciption as GT and len > 2.
  gt_e2e : int
      Number of GT entries for which transcription len > 2.
  '''
  
  gt_to_detection = {}
  tp = 0
  tp_e2e = 0
  gt_e2e = 0
  
  draw = batch[4][0]    
  normFactor = math.sqrt(draw.shape[1] * draw.shape[1] + draw.shape[0] * draw.shape[0]) # Normalization factor
  for i in range(0, len(detections)):
      
    det = detections[i]
    boxr = det[0]
    box = cv2.boxPoints(boxr) # Predicted bounding-box parameters
    box = np.array(box, dtype="int") # Convert predicted bounding-box to numpy array
    bbox = cv2.boundingRect(box)
    
    bbox = [bbox[0], bbox[1], bbox[2], bbox[3]]
    bbox[2] += bbox[0] # Convert width to right-coordinate
    bbox[3] += bbox[1] # Convert height to bottom-coordinate
    
    vis.draw_box_points(draw, box, color = (255, 0, 0))
    
    det_text = det[1][0] # Predicted transcription for bounding-box
    #print(det_text)
    
    for gt_no in range(len(word_gto)):
        
      gt = word_gto[gt_no]
      txt = gt[5] # GT transcription for given GT bounding-box
      gtbox  = ((gt[0] * draw.shape[1], gt[1] * draw.shape[0]), (gt[2] * normFactor, gt[3] * normFactor), gt[4] * 180 / 3.14) # Re-scaling GT values
      gtbox = cv2.boxPoints(gtbox)
      gtbox = np.array(gtbox, dtype="int")
      rect_gt = cv2.boundingRect(gtbox)
      
      
      rect_gt = [rect_gt[0], rect_gt[1], rect_gt[2], rect_gt[3]]
      rect_gt[2] += rect_gt[0] # Convert GT width to right-coordinate
      rect_gt[3] += rect_gt[1] # Convert GT height to bottom-coordinate 

      inter = intersect(bbox, rect_gt) # Intersection of predicted and GT bounding-boxes
      uni = union(bbox, rect_gt) # Union of predicted and GT bounding-boxes
      ratio = area(inter) / float(area(uni)) # IoU measure between predicted and GT bounding-boxes
      
      # 1). Visualize the predicted-bounding box if IoU with GT is higher than IoU threshold (iou_th) (Always required)
      # 2). Visualize the predicted-bounding box if transcription matches the GT and condition 1. holds
      # 3). Visualize the predicted-bounding box if transcription matches and IoU with GT is less than iou_th_vis and 1. and 2. hold
      if ratio > iou_th:
        vis.draw_box_points(draw, box, color = (0, 128, 0))
        if not gt_to_detection.has_key(gt_no):
          gt_to_detection[gt_no] = [0, 0]
            
        if txt.lower() == det_text.lower():
          to_cls_x.append([len(det_text), det[1][1], det[1][2], det[1][3]])
          to_cls_y.append(1)
          vis.draw_box_points(draw, box, color = (0, 255, 0), thickness=2)
          gt[7] = 1 # Change this parameter to 1 when predicted transcription is correct.
          
          if ratio < iou_th_vis:
              vis.draw_box_points(draw, box, color = (255, 255, 255), thickness=2)
              cv2.imshow('draw', draw) 
              #cv2.waitKey(0)
                
        else:
          to_cls_x.append([len(det_text), det[1][1], det[1][2], det[1][3]])
          to_cls_y.append(0)
          
        tupl = gt_to_detection[gt_no] 
        if tupl[0] < ratio:
          tupl[0] = ratio 
          tupl[1] = i   
                  
  # Count the number of end-to-end and detection true-positives
  for gt_no in range(len(word_gto)):
    gt = word_gto[gt_no]
    txt = gt[5]
    if len(txt) > 2:
      gt_e2e += 1
      if gt[7] == 1:
        tp_e2e += 1
            
    if gt_to_detection.has_key(gt_no):
      tupl = gt_to_detection[gt_no] 
      if tupl[0] > iou_th_eval: # Increment detection true-positive, if IoU is greater than iou_th_eval
        tp += 1             
          
  cv2.imshow('draw', draw)             
  return tp, tp_e2e, gt_e2e 
Пример #32
0
def train(model,
          opt,
          lr_scheduler,
          train_loader,
          test_loader,
          args,
          writer,
          loggers=(),
          timer=None):
    timer = timer or Timer()

    total_download = 0
    total_upload = 0
    if args.eval_before_start:
        # val
        test_loss, test_acc, _, _ = run_batches(model, None, None, test_loader,
                                                False, args)
        test_time = timer()
        print("Test acc at epoch 0: {:0.4f}".format(test_acc))
    # ceil in case num_epochs in case we want to do a
    # fractional number of epochs
    for epoch in range(math.ceil(args.num_epochs)):
        if epoch == math.ceil(args.num_epochs) - 1:
            epoch_fraction = args.num_epochs - epoch
        else:
            epoch_fraction = 1
        # train
        train_loss, train_acc, download, upload = run_batches(
            model, opt, lr_scheduler, train_loader, True, epoch_fraction, args)
        if train_loss is np.nan:
            print("TERMINATING TRAINING DUE TO NAN LOSS")
            return

        train_time = timer()
        download_mb = download.sum().item() / (1024 * 1024)
        upload_mb = upload.sum().item() / (1024 * 1024)
        total_download += download_mb
        total_upload += upload_mb

        # val
        test_loss, test_acc, _, _ = run_batches(model, None, None, test_loader,
                                                False, 1, args)
        test_time = timer()
        # report epoch results
        try:
            rounded_down = round(download_mb)
        except:
            rounded_down = np.nan
        try:
            rounded_up = round(upload_mb)
        except:
            rounded_up = np.nan
        epoch_stats = {
            'train_time': train_time,
            'train_loss': train_loss,
            'train_acc': train_acc,
            'test_loss': test_loss,
            'test_acc': test_acc,
            'down (MiB)': rounded_down,
            'up (MiB)': rounded_up,
            'total_time': timer.total_time,
        }
        lr = lr_scheduler.get_last_lr()[0]
        summary = union({'epoch': epoch + 1, 'lr': lr}, epoch_stats)
        for logger in loggers:
            logger.append(summary)
        if args.use_tensorboard:
            writer.add_scalar('Loss/train', train_loss, epoch)
            writer.add_scalar('Loss/test', test_loss, epoch)
            writer.add_scalar('Acc/train', train_acc, epoch)
            writer.add_scalar('Acc/test', test_acc, epoch)
            writer.add_scalar('Time/train', train_time, epoch)
            writer.add_scalar('Time/test', test_time, epoch)
            writer.add_scalar('Time/total', timer.total_time, epoch)
            writer.add_scalar('Lr', lr, epoch)

    print("Total Download (MiB): {:0.2f}".format(total_download))
    print("Total Upload (MiB): {:0.2f}".format(total_upload))
    print("Avg Download Per Client: {:0.2f}".format(
        total_download / train_loader.dataset.num_clients))
    print("Avg Upload Per Client: {:0.2f}".format(
        total_upload / train_loader.dataset.num_clients))
    return summary
Пример #33
0
def run_evaluation(inputDir, outputDir, process_color = 0, processTest = 0):
    
    if not os.path.exists(outputDir):
        os.mkdir(outputDir)

    edgeThreshold = 14
    fastex = FASTex(edgeThreshold = edgeThreshold)
    
    modelFile = '/home/busta/outModel.boost'
    model = cv2.Boost()
    model.load(modelFile)
    images = glob.glob('{0}/*.jpg'.format(inputDir))
    
    segmDir = '{0}/segmentations'.format(inputDir)
    
    precision = 0;
    precisionDen = 0
    recall = 0
    recall05 = 0
    recallNonMax = 0
    recallDen = 0
    wordRecall = 0
    wordRecallDen = 0
    segm2chars = 0 
    
    regionsCount = 0
    regionsCountNonMax = 0
    missing_segmNonMaxCount = 0
    
    letterKeypointHistogram = defaultdict(lambda : defaultdict(float))
    octaveLetterKeypointHistogram = defaultdict(lambda : defaultdict(float))
    missing_letters = {}
    letterHistogram = defaultdict(int)
    missing_segm = {}
    missing_segm2 = {}
    missing_segmNonMax = {}
    diffMaxOctavesMap = {}
    diffScoreOctavesMap = {}
    segmHistogram = []
    segmWordHistogram = []
    
    results = []  
    hist = None
    histFp = None
    histDist = None
    histDistFp = None
    histDistMax = None
    histDistMaxWhite = None
    histDistMaxFp = None
    hist2dDist =None
    hist2dDistFp = None
    hist2dDistScore = None
    hist2dDistScoreFp = None
    histDistMaxWhiteFp = None
    
    histSegm = np.zeros((256), dtype = np.float)
    histSegmCount = np.zeros((256), dtype = np.int)
    stat = np.asarray([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=np.float)
    times = []
    gtSegmCount = 0
    wordsOk = []
    wordsFp = []
    
    keypointsTotal = 0
    keypointsTotalInside = 0
    orbTime = 0
    
    lineNo = 0
    perfectWords = 0;
    perfectWordsNS = 0;
    
    hasSegm = False
    
    for image in images:
        print('Processing {0}'.format(image))
        
        img = cv2.imread(image, 0)
        imgc = cv2.imread(image)
        imgcO = cv2.imread(image)
        if process_color == 1:
            imgproc = imgc
        else:
            imgproc = img
        
        baseName = os.path.basename(image)
        
        
        baseName = baseName[:-4]
        workPoint = 0.3
        segmentations = fastex.getCharSegmentations(imgproc, outputDir, baseName)
        segmentations = segmentations[:, 0:10]
    
        segmentations = np.column_stack( [ segmentations , np.zeros( (segmentations.shape[0], 2), dtype = np.float ) ] )
        maskDuplicates = segmentations[:, 8] == -1
        segmentationsDuplicates = segmentations[maskDuplicates, :]
        maskNoNei = segmentationsDuplicates[:, 9] > workPoint
        segmentationsNoNei = segmentationsDuplicates[maskNoNei, :]
        if segmentations.shape[0] > 0:
            print( 'Dupl ratio: {0} - {1}/ {2} - {3}'.format(segmentationsDuplicates.shape[0] / float(segmentations.shape[0]), segmentationsDuplicates.shape[0], segmentations.shape[0], segmentationsNoNei.shape[0] ) )
        keypoints = fastex.getLastDetectionKeypoints()
        keypointsTotal += keypoints.shape[0]
        statc =  fastex.getDetectionStat()
    
        times.append([ statc[1], statc[2], statc[3], statc[4], statc[5], statc[6], statc[7], statc[8], statc[9], statc[10]])
        stat += statc
        values = img[ keypoints[:, 1].astype(int), keypoints[:, 0].astype(int) ]
        valuesMax = img[keypoints[:, 6].astype(int), keypoints[:, 5].astype(int)]
        diffValMax = np.abs(values - valuesMax)
        
        
        regionsCount += segmentations.shape[0]
        regionsCountNonMax += segmentationsNoNei.shape[0]
       
        segmentations[:, 2] += segmentations[:, 0]
        segmentations[:, 3] += segmentations[:, 1]
        
        keypointsOrb = fastex.getLastDetectionOrbKeypoints()
        orbTime += keypointsOrb[0][9]
            
            
        segmGt = '{0}/{1}_GT.txt'.format(segmDir, baseName)
        pden = 0
        rden = 0
        if os.path.exists(segmGt):
            hasSegm = True
            (gt_rects, groups) = utls.read_icdar2013_segm_gt(segmGt)
            segmImg = '{0}/{1}_GT.bmp'.format(segmDir, baseName)
            if not os.path.exists(segmImg):
                segmImg = '{0}/gt_{1}.png'.format(segmDir, baseName)
            segmImg = cv2.imread(segmImg)
            
            try:
                (hist, histFp, histDist, histDistMax, histDistMaxWhite, hist2dDist, hist2dDistScore, histDistFp, histDistMaxFp, histDistMaxWhiteFp, hist2dDistFp, hist2dDistScoreFp, keypointsInside) = collect_histograms(img, segmImg, keypoints, values, diffValMax, keypointsTotalInside, diffMaxOctavesMap, diffScoreOctavesMap, hist, histFp, histDist, histDistMax, histDistMaxWhite, hist2dDist, hist2dDistScore, histDistFp, histDistMaxFp, histDistMaxWhiteFp, hist2dDistFp, hist2dDistScoreFp)
            except:
                pass
                    
            rcurrent = 0
            rcurrent05 = 0
            rcurrentNonMax = 0
            for k in range(len(gt_rects)):
                gt_rect = gt_rects[k]
                best_match = 0
                best_match_line = 0
                if (gt_rect[4] == ',' or gt_rect[4] == '.' or gt_rect[4] == '\'' or gt_rect[4] == ':' or gt_rect[4] == '-') and not evalPunctuation:
                    continue
                
                gtSegmCount += 1
                
                rectMask = np.bitwise_and(np.bitwise_and( keypointsInside[:, 0] >= gt_rect[0], keypointsInside[:, 0] <= gt_rect[2]), np.bitwise_and(keypointsInside[:, 1] >= gt_rect[1], keypointsInside[:, 1] <= gt_rect[3]))
                letterInside =  keypointsInside[rectMask, :]
                
                #make keypoints histogram 
                if letterInside.shape[0] > 0:
                    octaves = np.unique( letterInside[:, 2])
                    maxOctave = np.max(octaves)
                    maxOctavePoints = 0
                    
                    for i in range(int(maxOctave) + 1):
                        octavePoints = letterInside[letterInside[:, 2] == i, :]
                        maxOctavePoints = max(maxOctavePoints, octavePoints.shape[0])
                    if maxOctavePoints > 0:
                        octaveLetterKeypointHistogram[gt_rect[4]][0] += 1
                    if maxOctavePoints > 1:
                        octaveLetterKeypointHistogram[gt_rect[4]][1] += 1
                    if maxOctavePoints > 2:
                        octaveLetterKeypointHistogram[gt_rect[4]][2] += 1
                    if maxOctavePoints > 3:
                        octaveLetterKeypointHistogram[gt_rect[4]][3] += 1
                    
                    
                
                if letterInside.shape[0] == 0:
                    if not missing_letters.has_key(gt_rect[4]):
                        missing_letters[gt_rect[4]] = []
                    missing_letters[gt_rect[4]].append( (image, gt_rect) )  
                if letterInside.shape[0] > 0:
                    letterKeypointHistogram[gt_rect[4]][0] += 1
                if letterInside.shape[0] > 1:
                    letterKeypointHistogram[gt_rect[4]][1] += 1
                if letterInside.shape[0] > 2:
                    letterKeypointHistogram[gt_rect[4]][2] += 1
                if letterInside.shape[0] > 3:
                    letterKeypointHistogram[gt_rect[4]][3] += 1
                     
                letterHistogram[gt_rect[4]] += 1
                
                best_match2 = 0 
                minSingleOverlap = MIN_SEGM_OVRLAP
                if gt_rect[4] == 'i' or gt_rect[4] == '!':
                    minSingleOverlap = 0.5
                 
                for detId in range(segmentations.shape[0]):
                    rectn = segmentations[detId, :]
                    rect_int =  utils.intersect( rectn, gt_rect )
                    int_area = utils.area(rect_int)
                    union_area = utils.area(utils.union(rectn, gt_rect))
                
                    ratio = int_area / float(union_area)
                    rectn[10] = max(ratio, rectn[10])
                    
                    if rectn[9] > workPoint:
                        gt_rect[6] =  max(ratio, gt_rect[6])
                    
                    if ratio > best_match:
                        best_match = ratio
                        best_segm = segmentations[detId, :]
                        
                    if ratio > best_match_line and rectn[7] == 1.0 :
                        best_match_line = ratio
                        
                    if best_match < minSingleOverlap: 
                        if k < len(gt_rects) - 1:
                            gt_rect2 = gt_rects[k + 1]
                            chars2Rect = utils.union(gt_rect2, gt_rect)
                            rect_int = utils.intersect( rectn, chars2Rect )
                            int_area = utils.area(rect_int)
                            union_area = utils.area(utils.union(rectn, chars2Rect))
                            ratio = int_area / float(union_area)
                            rectn[10] = max(ratio, rectn[10]) 
                            if ratio > best_match2:
                                if ratio > MIN_SEGM_OVRLAP:
                                    segm2chars += 1
                                    best_match2 = ratio
                                    gt_rect[5] = ratio
                                    gt_rect2[5] = ratio
                       
                thickness = 1
                color = (255, 0, 255)
                if best_match >= minSingleOverlap:
                    color = (0, 255, 0)
                if best_match > 0.7:
                    thickness = 2
                cv2.rectangle(imgc, (gt_rect[0], gt_rect[1]), (gt_rect[2], gt_rect[3]), color, thickness)
                        
                recall += best_match
                recallNonMax += gt_rect[6]
                if best_match >= minSingleOverlap:
                    recall05 += best_match
                    rcurrent05 += best_match
                else:
                    if not missing_segm.has_key(image):
                        missing_segm[image] = []
                    missing_segm[image].append(gt_rect)
                    
                    if gt_rect[5] < MIN_SEGM_OVRLAP:
                        if not missing_segm2.has_key(image):
                            missing_segm2[image] = []
                        missing_segm2[image].append(gt_rect)
                        segm2chars += 1
                
                if gt_rect[6] < minSingleOverlap:
                    if not missing_segmNonMax.has_key(image):
                        missing_segmNonMax[image] = []
                    missing_segmNonMax[image].append(gt_rect)
                    missing_segmNonMaxCount += 1
                        
                    
                rcurrent += best_match
                rcurrentNonMax += gt_rect[6]
                recallDen +=  1   
                rden += 1
                
                if best_match > 0 and process_color != 1:
                    val = img[best_segm[5], best_segm[4]]
                    histSegm[val] += best_match
                    histSegmCount[val] += 1
                
            pcurrent = 0
            for detId in range(segmentations.shape[0]):
                best_match = 0
                rectn = segmentations[detId, :]
                
                for gt_rect in gt_rects:
                    rect_int =  utils.intersect( rectn, gt_rect )
                    int_area = utils.area(rect_int)
                    union_area = utils.area(utils.union(rectn, gt_rect))
                    
                    ratio = int_area / float(union_area)
                    
                    if ratio > best_match:
                        best_match = ratio
                
                precision += best_match
                pcurrent += best_match
                precisionDen +=  1   
                pden += 1
                
        
        if pden == 0:
            pcurrent = 0
        else:
            pcurrent = pcurrent / pden
            
        if rden == 0:
            rcurrent = 0
            rcurrent05 = 0
            rcurrentNonMax = 0
        else:
            rcurrent = rcurrent / rden
            rcurrent05 = rcurrent05 / rden
            rcurrentNonMax = rcurrentNonMax / rden
        
        
        segmHistogram.append([ segmentations.shape[0], segmentations[segmentations[:, 10] > 0.4].shape[0], segmentations[segmentations[:, 10] > 0.5].shape[0], segmentations[segmentations[:, 10] > 0.6].shape[0], segmentations[segmentations[:, 10] > 0.7].shape[0] ])
        
        segmWordHistogram.append([segmentations.shape[0], segmentations[np.bitwise_or(segmentations[:, 10] > 0.5, segmentations[:, 11] > 0.5 )].shape[0]])
        
        results.append((baseName, rcurrent, pcurrent, rcurrent05))

    
    if precisionDen == 0:
        pcurrent = 0
    else:
        precision = precision / precisionDen
        
    if recallDen == 0:
        rcurrent = 0
    else:
        recall = recall / recallDen
        recall05 = recall05 / recallDen
        recallNonMax = recallNonMax / recallDen
        
    wordRecall = wordRecall / max(1, wordRecallDen)
            
    try:
        histSegm = histSegm / max(1, histSegmCount)
    except ValueError:
        pass
    
    print('Evalation Results:')
    print( 'recall: {0}, precision: {1}, recall 0.5: {2}, recall NonMax: {3}'.format(recall, precision, recall05, recallNonMax) )
    
    kpTimes = np.histogram(np.asarray(times)[:, 0], bins=20)
    print('Keypoint Time Histogram: {0}'.format(kpTimes))
    
    
    print('Detection statistics:')    
    print(stat)
    
    for letter in letterKeypointHistogram.keys():
        for num in letterKeypointHistogram[letter].keys():
            letterKeypointHistogram[letter][num] = letterKeypointHistogram[letter][num] / float(letterHistogram[letter])
        for num in octaveLetterKeypointHistogram[letter].keys():
            octaveLetterKeypointHistogram[letter][num] = octaveLetterKeypointHistogram[letter][num] / float(letterHistogram[letter])
        letterKeypointHistogram[letter] = dict(letterKeypointHistogram[letter])
        octaveLetterKeypointHistogram[letter] = dict(octaveLetterKeypointHistogram[letter])
    
    print('Perfect words: {0}'.format(perfectWords))
        
    eval_date = datetime.date.today()
    np.savez('{0}/evaluation'.format(outputDir), recall=recall, recall05 = recall05, recallNonMax=recallNonMax, precision=precision, eval_date=eval_date, regionsCount=regionsCount, inputDir = inputDir, hist = hist, histSegm = histSegm, stat=stat, letterKeypointHistogram = dict(letterKeypointHistogram), missing_letters=missing_letters, octaveLetterKeypointHistogram=dict(octaveLetterKeypointHistogram), missing_segm=missing_segm, 
             times=np.asarray(times), histFp = histFp, gtSegmCount = gtSegmCount, wordRecall=wordRecall, histDist=histDist, histDistFp = histDistFp, histDistMax=histDistMax, histDistMaxFp=histDistMaxFp, hist2dDist=hist2dDist, hist2dDistFp=hist2dDistFp, hist2dDistScore=hist2dDistScore, hist2dDistScoreFp=hist2dDistScoreFp, histDistMaxWhite=histDistMaxWhite, histDistMaxWhiteFp=histDistMaxWhiteFp, wordsOk=wordsOk, wordsFp=wordsFp, diffMaxOctavesMap = diffMaxOctavesMap, diffScoreOctavesMap = diffScoreOctavesMap, 
             missing_segm2=missing_segm2, segmHistogram=segmHistogram, segmWordHistogram=segmWordHistogram, regionsCountNonMax=regionsCountNonMax, missing_segmNonMax=missing_segmNonMax)
    
    print( "GT segmentations count {0}".format(gtSegmCount) )
    print('FasTex Inside {0}/{1} ({2})'.format(keypointsTotalInside, keypointsTotal, keypointsTotalInside / float(keypointsTotal) ))
    print('FasText time: {0}, Orb time: {1} '.format( np.sum(times, 0)[0], orbTime))
    print('2 Chars Segmentation: {0}'.format(segm2chars) )
    print('NonMax Regions Count: {0}/{1}'.format(regionsCountNonMax, missing_segmNonMaxCount))