示例#1
0
    def generate_region_gt(self, region_box, gt_bboxes, labels):
        chip_list = []
        for box in region_box:
            chip_list.append(np.array(box))

        # chip gt
        chip_gt_list = []
        chip_label_list = []
        chip_neglect_list = []
        if gt_bboxes is not None:
            for chip in chip_list:
                chip_gt = []
                chip_label = []
                neglect_gt = []
                for i, box in enumerate(gt_bboxes):
                    if utils.overlap(chip, box, 0.75):
                        box = [max(box[0], chip[0]), max(box[1], chip[1]),
                               min(box[2], chip[2]), min(box[3], chip[3])]
                        new_box = [box[0] - chip[0], box[1] - chip[1],
                                   box[2] - chip[0], box[3] - chip[1]]
                        chip_gt.append(np.array(new_box))
                        chip_label.append(labels[i])
                    elif utils.overlap(chip, box, 0.1):
                        box = [max(box[0], chip[0]), max(box[1], chip[1]),
                               min(box[2], chip[2]), min(box[3], chip[3])]
                        new_box = [box[0] - chip[0], box[1] - chip[1],
                                   box[2] - chip[0], box[3] - chip[1]]
                        neglect_gt.append(np.array(new_box, dtype=np.int))

                chip_gt_list.append(chip_gt)
                chip_label_list.append(chip_label)
                chip_neglect_list.append(neglect_gt)

        return chip_gt_list, chip_label_list, chip_neglect_list
def generate_region_gt(img_size, region_box, gt_boxes, labels):
    chip_list = []
    for box in region_box:
        chip_list.append(np.array(box))

    # chip gt
    chip_gt_list = []
    chip_label_list = []
    for chip in chip_list:
        chip_gt = []
        chip_label = []

        for i, box in enumerate(gt_boxes):
            if labels[i] == 0 or labels[i] == 11:
                continue
            if utils.overlap(chip, box, 0.75):
                box = [
                    max(box[0], chip[0]),
                    max(box[1], chip[1]),
                    min(box[2], chip[2]),
                    min(box[3], chip[3])
                ]
                new_box = [
                    box[0] - chip[0], box[1] - chip[1], box[2] - chip[0],
                    box[3] - chip[1]
                ]

                chip_gt.append(np.array(new_box))
                chip_label.append(labels[i])

        chip_gt_list.append(chip_gt)
        chip_label_list.append(chip_label)

    return chip_list, chip_gt_list, chip_label_list
示例#3
0
 def overlap(self, face):
     ol = utils.overlap(self.last_face.get_myRect().get_rect(),
                        face.get_myRect().get_rect())
     lastImgFrame = self.last_face.get_inImage().get_frameIx()
     faceFrame = face.get_inImage().get_frameIx()
     frameDist = np.abs(lastImgFrame - faceFrame) < 20
     return (ol and frameDist)
示例#4
0
    def make_voids(self):
        """
    List of void is built with the information given by ZOBOV.
    :return:
    2 arrays, zones_IDs and nzones
    """
        af = self.voids_zobov
        apf = []
        for i in range(len(af)):
            apf.append(af[i].split())
        vovp = []
        for i in range(1, len(apf) - 1):
            vovp.append(utils.overlap(apf[i]))
        zone_IDs = []
        nzones = []
        for i in range(len(vovp)):
            if vovp[i] != -1:
                zone_IDs.append(vovp[i])
                nzones.append(len(vovp[i]))
            elif vovp[i] == -1:
                zone_IDs.append(i)
                nzones.append(0)
        cond = np.array([elem != 0 for elem in nzones])
        zones_IDs = np.array(zone_IDs, dtype=object)[cond]
        Nzones = np.array(nzones)[cond]

        return zones_IDs, Nzones
示例#5
0
文件: dense.py 项目: uhh-lt/kb2vec
    def link(self, context, phrases):
        linked_phrases = []
        context_vector = self._vectorizer.transform([context])

        for phrase in phrases:
            try:
                dphrase = self._default_phrase(phrase)
                if dphrase in self._phrase2candidates:
                    # get the candidates
                    candidates = list(
                        self._phrase2candidates[dphrase])  # to remove
                    indices = []
                    for candidate in candidates:
                        if candidate in self._candidate2index:
                            indices.append(self._candidate2index[candidate])
                        else:
                            print("Warning: candidate '{}' is not indexed".
                                  format(candidate))
                            indices.append(
                                0)  # just to make sure lengths are equal

                    dense_candidate_vectors = self._dense_vectors[indices]
                    # check if candidates are correct
                    print("Retrieved {} candidates for '{}'".format(
                        len(indices), phrase.text))

                    dense_context_vector = self._get_dense_vector(
                        context_vector, dphrase.text)

                    # rank the candidates
                    sims = dot(dense_candidate_vectors, dense_context_vector.T)

                    if self._params["use_overlap"]:
                        overlap_scores = zeros(sims.shape)
                        for i, candidate in enumerate(candidates):
                            overlap_scores[i] = overlap(
                                candidate.name, phrase.text)
                    else:
                        overlap_scores = ones(sims.shape)

                    scores = multiply(sims, overlap_scores)
                    best_index = argmax(scores)
                    best_candidate = candidates[best_index]
                    best_candidate.score = scores[best_index]
                    best_candidate.link = self._get_dbpedia_uri(
                        best_candidate.wiki, best_candidate.uris)
                    linked_phrases.append((phrase, best_candidate))
                else:
                    print(
                        "Warning: phrase '{}' is not found in the vocabulary of the model"
                        .format(phrase))

                    linked_phrases.append((phrase, Candidate()))
            except:
                print("Error while processing phrase '{}':")
                print(format_exc())
                linked_phrases.append((phrase, Candidate()))
        return linked_phrases
示例#6
0
    def _f1s(self):
        f1s = np.zeros([self.batch_size(), self.max_node_size()])
        for (batch_idx, apid) in enumerate(self.apids):
            tree = self.trees[self.batch_idx_to_tree_idx(batch_idx)]
            f1s[batch_idx, :tree.num_nodes()] = [
                overlap(tree.span(i), self.answers[batch_idx])[-1]
                for i in range(tree.num_nodes())
            ]

        return f1s
def sample_ev(frame, data):
    neg_spans = [
        f.evidence for f in data['frames']
        if not utils.frame_overlap(frame, f)
    ]
    if not neg_spans:
        neg_spans = [
            s.s for s in data['sents']
            if not utils.overlap(s.i, s.f, frame.ev_i, frame.ev_f)
        ]
    return frame._replace(evidence=random.sample(neg_spans, 1)[0], label=-1)
示例#8
0
def get_roads_in_bounded_box(box, map_box_to_road):

	road_boxes = np.array(list(map_box_to_road.keys()))

	overlap_index = overlap(box, road_boxes)
	overlapped = road_boxes[overlap_index]

	results = []
	for i in range(overlapped.shape[0]):
		t = tuple(overlapped[i])
		results.append(map_box_to_road[t])

	return results
示例#9
0
    def _link_db_query(self, target, diffbot_query_response):
        candidates = []
        if "data" not in diffbot_query_response:
            return candidates
        else:
            data = diffbot_query_response["data"]

        for hit in data:
            if "allUris" not in hit: continue
            uris = set(hit["allUris"])
            if "origin" in hit: uris.add(hit["origin"])
            if "origins" in hit: uris.union(set(hit["origins"]))
            if "wikipediaUri" in hit:
                uris.add(hit["wikipediaUri"])

            if "importance" in hit:
                name = hit["name"]
                importance = float(hit["importance"])
                if self._use_overlap and self._use_importance:
                    score = truncated_log(importance) * overlap(
                        name, target, self._lower)
                elif self._use_overlap:
                    score = overlap(name, target, self._lower)
                elif self._use_importance:
                    score = importance
                else:
                    score = random()

                wiki_uri = self._find_wiki_uri(uris)
                dbpedia_uri = self._get_dbpedia_uri(wiki_uri, uris)

                c = Candidate(score, name, dbpedia_uri, wiki_uri, hit["types"],
                              hit["allNames"], uris)
                candidates.append(c)
            else:
                print("Warning: Skipping a hit without importance value.")

        return sorted(candidates, reverse=True)
示例#10
0
    def precision_recall(self, predicted_idxs):
        exact_matches, recalls, precisions, f1s, allf1s = [], [], [], [], []
        for (batch_idx, predicted_idx) in enumerate(predicted_idxs):
            tree = self.trees[self.batch_idx_to_tree_idx(batch_idx)]
            span = tree.span(predicted_idx)
            em, recall, precision, f1 = overlap(span, self.answers[batch_idx])

            allf1s.append(f1)

            if self.has_answer[batch_idx]:
                exact_matches.append(em)
                recalls.append(recall)
                precisions.append(precision)
                f1s.append(f1)

        return exact_matches, recalls, precisions, f1s, allf1s
示例#11
0
def main():
    label_df = pd.read_csv(annos_path)

    label_object = []
    detect_object = []
    mask_object = []
    undetected_img = []
    pixel_num = []
    for raw_file in tqdm(glob(mask_path + '/*.png')):
        img_name = os.path.basename(raw_file)
        imgid = os.path.splitext(img_name)[0]
        label_file = os.path.join(label_path, img_name)
        image_file = os.path.join(image_path, imgid + '.jpg')

        mask_img = cv.imread(raw_file, cv.IMREAD_GRAYSCALE)
        height, width = mask_img.shape[:2]

        pixel_num.append(np.sum(mask_img))

        label_box = utils.get_label_box(label_df, imgid + '.jpg')
        mask_box = utils.generate_box_from_mask(mask_img)
        mask_box = list(
            map(utils.resize_box, mask_box, [(width, height)] * len(mask_box),
                [(3200, 1800)] * len(mask_box)))
        # mask_box = utils.enlarge_box(mask_box, (3200, 1800), ratio=1)
        # _boxvis(mask_img, mask_box)

        count = 0
        for box1 in label_box:
            for box2 in mask_box:
                if utils.overlap(box2, box1):
                    count += 1
                    break

        label_object.append(len(label_box))
        detect_object.append(count)
        mask_object.append(len(mask_box))
        if len(label_box) != count:
            undetected_img.append(imgid)

    print('recall: %f' % (np.sum(detect_object) / np.sum(label_object)))
    print('cost avg: %f, std: %f' % (np.mean(pixel_num), np.std(pixel_num)))
    print('detect box avg: %f, std %d' %
          (np.mean(mask_object), np.std(mask_object)))
示例#12
0
def main():
    annos = json.loads(open(annos_path).read())

    label_object = []
    detect_object = []
    mask_object = []
    undetected_img = []
    for raw_file in tqdm(glob(mask_path + '/*.png')):
        img_name = os.path.basename(raw_file)
        imgid = os.path.splitext(img_name)[0]
        label_file = os.path.join(label_path, img_name)
        image_file = os.path.join(image_path, imgid + '.jpg')

        mask_img = cv.imread(raw_file, cv.IMREAD_GRAYSCALE)
        # mask_img = cv.resize(mask_img, (2048, 2048), interpolation=cv.INTER_LINEAR)

        height, width = mask_img.shape[:2]

        label_box = get_box(annos, imgid)
        mask_box = utils.region_box_generation(mask_img, (2048, 2048))
        # mask_box = utils.generate_box_from_mask(mask_img)
        # mask_box = list(map(utils.resize_box, mask_box,
        #                 [width]*len(mask_box), [2048]*len(mask_box)))
        # mask_box = utils.enlarge_box(mask_box, (2048, 2048), ratio=2)
        # _boxvis(mask_img, mask_box)
        # break

        count = 0
        for box1 in label_box:
            for box2 in mask_box:
                if utils.overlap(box2, box1):
                    count += 1
                    break

        label_object.append(len(label_box))
        detect_object.append(count)
        mask_object.append(len(mask_box))
        if len(label_box) != count:
            undetected_img.append(imgid)

    print('recall: %f' % (np.sum(detect_object) / np.sum(label_object)))
    print('detect box avg: %f, std %d' %
          (np.mean(mask_object), np.std(mask_object)))
示例#13
0
 def overlap_matrix(self, fiber_instance):
     """
     This method returns the overlap matrix for calculating the
     MIMO channel matrix of a complete system.
     """
     element_arrays = []
     for device in self._device_list:
         Er1 = device.get_mode_pattern(0, 0)
         fiber_modes = fiber_instance.get_admissible_modes()
         M = len(fiber_modes)
         mode_vector = numpy.zeros(2 * M)
         for i in range(M):
             current_mode = fiber_modes[i]
             Er2 = fiber_instance.modes.get_mode_pattern(
                 current_mode[0], current_mode[1])
             mode_vector[i] = mode_vector[M + i] = utils.overlap(Er1, Er2)
         element_arrays.append(mode_vector)
     element_arrays = numpy.array(element_arrays)
     return element_arrays
示例#14
0
def result_eval(detections, label_df):
    pred_num = []
    label_num = []
    tp_num = []
    for img_id, preds in detections.items():
        gt_boxes, gt_types = get_box_label(label_df, img_id+'.jpg')
        pred_num.append(len(preds))
        label_num.append(len(gt_boxes))
        correct = 0
        for pred in preds:
            pred_box = pred[:4]
            pred_type = pred[4]
            for gt_box, gt_type in zip(gt_boxes, gt_types):
                if gt_type == pred_type and utils.overlap(pred_box, gt_box, 0.5):
                    correct += 1
                    break
        tp_num.append(correct)

    recall = 1.0 * sum(tp_num) / sum(label_num)
    precision = 1.0 * sum(tp_num) / sum(pred_num)
    return recall, precision
示例#15
0
def optimize_single_use(func):
    domains = {}
    for reg in func.assigned_registers + func.free_registers:
        if int(reg[1:]) <= len(func.arguments):
            continue

        domains[reg] = func.get_domain_for(reg)

    replacements = {}

    # Find the Replacements here
    for r in domains:
        for r2 in domains:
            if r == r2:
                continue

            if r2 in replacements:
                continue

            if r in replacements:
                continue

            if not utils.overlap(domains[r], domains[r2]):
                replacements[r2] = r
                domains[r] += domains[r2]

    for line in func.lines.values():
        new_arguments = []

        for arg in line.arguments:
            if arg in replacements:
                new_arguments.append(replacements[arg])
            else:
                new_arguments.append(arg)

        line.arguments = new_arguments

    return func
def chip_v2(image, gt_boxes, labels):
    """generate chips from a image
    method: random crop around gt_box

    Args:
        image: np.array
        gt_boxes: list of [xmin, ymin, xmax, ymax]
        labels: list of
    Returns:
        chip list, size 300x300
        new gt_box list
    """
    size = image.shape
    # chip
    chip_list = []
    for box in gt_boxes:
        box_w = box[2] - box[0]
        box_h = box[3] - box[1]
        # different chip size for different gt size
        if box_w < 50 and box_h < 50:
            chip_size_list = [80, 100, 180, 250]
        if box_w < 100 and box_h < 100:
            chip_size_list = [150, 250, 350, 400]
        elif box_w < 200 and box_h < 200:
            chip_size_list = [300, 600, 800]
        else:
            chip_size_list = [600, 800, 1000]

        for chip_size in chip_size_list:
            # region to random crop around gt
            region = np.clip(
                [box[0] - chip_size, box[1] - chip_size,
                box[0] + chip_size, box[1] + chip_size],
                0, 2047)

            # random crop
            while True:
                start_point = 0
                new_x, new_y = region[0], region[1]
                if region[2] - region[0] - chip_size > 0:
                    new_x = region[0] + randint(start_point, region[2] - region[0] - chip_size)
                if region[3] - region[1] - chip_size > 0:
                    new_y = region[1] + randint(start_point, region[3] - region[1] - chip_size)
                chip = [new_x, new_y, new_x+chip_size, new_y+chip_size]
                # abandon partial overlap chip
                if chip[2] >= box[2] and chip[3] >= box[3]:
                    break
                start_point += 10
            chip_list.append(np.array(chip))

    # chip gt
    chip_gt_list = []
    chip_label_list = []
    for chip in chip_list:
        chip_gt = []
        chip_label = []

        for i, box in enumerate(gt_boxes):
            if utils.overlap(chip, box, 0.5):
                box = [max(box[0], chip[0]), max(box[1], chip[1]),
                       min(box[2], chip[2]), min(box[3], chip[3])]
                new_box = [box[0] - chip[0], box[1] - chip[1],
                           box[2] - chip[0], box[3] - chip[1]]

                chip_gt.append(np.array(new_box))
                chip_label.append(labels[i])

        chip_gt_list.append(chip_gt)
        chip_label_list.append(chip_label)

    return chip_list, chip_gt_list, chip_label_list
unigram_counts = {}
trigram_counts = {}

unique_trigrams = set()

q_words = ['who', 'why', 'when', 'how', 'which', 'whose', 'where', 'whom', 'what']

for idx, d in enumerate(data):
    tree = d['context_squad_tree']
    answers = d['answers']
    j = idx - 1
    while tree is None:
        tree = data[j]['context_squad_tree']
        j -= 1

    f1s = np.array([overlap(tree.span(i), answers)[-1] for i in range(tree.num_nodes())])
    best_idx = np.argmax(f1s)
    best_node = tree.nodes[best_idx]
    best_pos = best_node.label

    if best_pos not in pos_counts:
        pos_counts[best_pos] = 1
    else:
        pos_counts[best_pos] += 1

    q = d['question_tokens']
    q_tree = d['question_squad_tree']
    question_pos = [q_tree.nodes[idx].label for idx in q_tree.leaf_order()]

    for qi in range(len(q)):
        t = q[qi].lower()
示例#18
0
        
        img = cv2.imread(img_path)
        height, width = img.shape[:2]
        mask_img = cv2.imread(raw_file, cv2.IMREAD_GRAYSCALE)
        mask_h, mask_w = mask_img.shape[:2]
        
        pixel_num.append(np.sum(mask_img))

        label_box, _ = dataset.get_gtbox(img_path)
        region_box, contours = utils.generate_box_from_mask(mask_img)
        region_box = utils.region_postprocess(region_box, contours, (mask_w, mask_h))
        region_box = utils.resize_box(region_box, (mask_w, mask_h), (width, height))
        region_box = utils.generate_crop_region(region_box, (width, height))

        count = 0
        for box1 in label_box:
            for box2 in region_box:
                if utils.overlap(box2, box1):
                    count += 1
                    break

        label_object.append(len(label_box))
        detect_object.append(count)
        mask_object.append(len(region_box))
        if len(label_box) != count:
            undetected_img.append(img_name)

    print('recall: %f' % (np.sum(detect_object) / np.sum(label_object)))
    # print('cost avg: %f, std: %f' % (np.mean(pixel_num), np.std(pixel_num)))
    print('detect box avg: %f' %(np.mean(mask_object)))
    # print(sorted(undetected_img))
示例#19
0
def is_false_positive(label_box, window, threshold=0.6):
    s_window = cpt_area(window)
    s_overlap = cpt_area(overlap(label_box, window))
    r = s_overlap/float(s_window)
    return r < threshold
示例#20
0
def combine(outputname, Part_Fastq_Filename, step, length_bin, filter=40):
    Part_Fastq_Filename = Part_Fastq_Filename
    #print(Part_Fastq_Filename)
    cache_length = 3
    result = {}
    file_order = 0
    for name in Part_Fastq_Filename:
        command = 'samtools view ' + name + '.bam >' + name + '.sam'
        SamFileMaker = Pshell(command)
        SamFileMaker.process()
        with open(name + '.sam') as f:
            #print(partsamlines[-1])
            for line in f:
                #print(line)
                s = line.strip().split('\t')
                mismatch = int(s[11][s[11].rfind(':') + 1:])
                if not (s[0] in result) or (len(result[s[0]]) == 0):
                    result[s[0]] = [
                        utils.reads(s[2], int(s[3]), file_order, mismatch)
                    ]
                else:
                    temp = utils.reads(s[2], int(s[3]), file_order, mismatch)
                    #reads from cliped mapped bam
                    join_or_not = False
                    read_length = len(s[9])
                    tail_length = ((read_length - 1) % step) + 1
                    refseq = s[12][-2 - tail_length:-2]
                    readsseq = s[9][-tail_length:]
                    strand = s[13][-2:]
                    for reads in result[s[0]]:  #Try to join existing seeds
                        if reads.canjoin(temp, step, read_length, length_bin):
                            mis = 0
                            for ppp in range(tail_length):
                                if (refseq[ppp] != readsseq[ppp]):
                                    #Here ++/+-/-+/-- should be considered. C/T or A/G match should be identified.
                                    if strand[0] == '+':
                                        if (refseq[ppp] == 'C'
                                                and readsseq == 'T'):
                                            continue
                                    else:
                                        if (refseq[ppp] == 'G'
                                                and readsseq == 'A'):
                                            continue
                                    mis += 1
                            reads.join(temp, mis)
                            join_or_not = True
                            break

                    frac_list = result[s[0]]
                    if not join_or_not:  #temp reads haven't join any exist reads
                        frac_list.append(
                            temp)  #add temp reads to array as new seed
                        #if file_order>2 and len(frac_list)>=s:
                        #    for i in range(len(frac_list)-1,-1,-1):
                        #        read = frac_list[i]
                        #        if file_order-read.order>2:
                        #            if read.getSum()==0 and read.getMismatch()>1:
                        #                frac_list.pop(i)

                        #print(len(result[s[0]]))
        file_order += 1
    #join done
    #filter results: filter1
    for name in result:
        nonjoin_num = 0
        reads_list = result[name]
        for i in range(len(reads_list) - 1, -1, -1):
            if reads_list[i].getSum() == 0 and reads_list[i].getMismatch() > 1:
                reads_list.pop(
                    i
                )  #Remove all reads which have more than 1 mistake and never be joined
    #filter results: filter2
    for name in result:
        reads_list = result[name]
        num = len(reads_list)
        del_mark = [0 for i in range(num)]
        for i in range(num):
            for j in range(i + 1, num):
                if overlap(result[name][i], result[name][j], step, length_bin):
                    sss = result[name][i].getSum() - result[name][j].getSum()
                    if sss > 0: del_mark[j] = 1
                    elif sss < 0: del_mark[i] = 1
                    else:
                        mis = result[name][i].getMismatch(
                        ) - result[name][j].getMismatch()
                        if mis > 0: del_mark[i] = 1
                        else: del_mark[j] = 1
        #Only keep the best read which has the most extends and the least mismatches.
        for i in range(num - 1, -1, -1):
            if del_mark[i] == 1:
                reads_list.pop(i)

    fastq_dic = GetFastqList(result, Part_Fastq_Filename, step, length_bin)
    with open(outputname + '_finalfastq.fastq', 'w') as f:
        for name in fastq_dic:
            num = 1
            for read, quality in fastq_dic[name]:
                #read,quality = fastq_dic[name][num]
                if (len(read) < filter): continue
                f.write('@' + name + '_' + str(num) + '\n')
                f.write(read + '\n')
                f.write('+\n')
                f.write(quality + '\n')
                num += 1
示例#21
0
            candidate_joint_prob = [
                candidate_span_prob[j] * candidate_pos_prob[j]
                for j in range(len(candidate_idxs))
            ]
            reranked_best_idx = candidate_idxs[np.argmax(
                np.array(candidate_joint_prob))]
            candidate_tree_pos = np.array(tree_pos)[candidate_idxs]

            if not best_idx == reranked_best_idx:
                num_changed += 1
                old_pos, new_pos = predicted_pos, tree.nodes[
                    reranked_best_idx].label
                old_span, new_span = tree.span(best_idx), tree.span(
                    reranked_best_idx)

                _, old_recall, old_precision, old_f1 = overlap(
                    old_span, batch.answers[batch_idx])
                _, new_recall, new_precision, new_f1 = overlap(
                    new_span, batch.answers[batch_idx])

                old_metrics = np.add(
                    old_metrics, np.array([old_f1, old_recall, old_precision]))
                new_metrics = np.add(
                    new_metrics, np.array([new_f1, new_recall, new_precision]))

                if new_f1 == old_f1:
                    print('No material change')
                elif new_f1 > old_f1:
                    print('Improvement from %.3f to %.3f' % (old_f1, new_f1))
                else:
                    print('Worsened from %.3f to %.3f' % (old_f1, new_f1))
                print('\t' + ' '.join(question))
示例#22
0
def get_data():
    node_information = pd.read_csv(
        'node_information.csv',
        header=None,
        names=['ID', 'Year', 'Title', 'Authors', 'Journal', 'Abstract'])
    node_information = pd.read_csv(
        'node_information.csv',
        header=None,
        names=['ID', 'Year', 'Title', 'Authors', 'Journal', 'Abstract'])
    training_set = pd.read_csv('training_set.txt',
                               header=None,
                               names=['Target', 'Source', 'Edge'],
                               delim_whitespace=True)
    #testing_set = pd.read_csv('testing_set.txt', header=None, names=['Target', 'Source'], delim_whitespace=True)

    print("Get valid IDs")
    valid_ids = set()
    for element in training_set.values:
        valid_ids.add(element[0])
        valid_ids.add(element[1])

    print("Select valid indices from valid IDs")
    index_valid = [
        i for i, element in enumerate(node_information.values)
        if element[0] in valid_ids
    ]
    node_info = node_information.iloc[index_valid]

    print("Get index for nodes")
    IDs = []
    ID_pos = {}
    for element in node_info.values:
        ID_pos[element[0]] = len(IDs)
        IDs.append(element[0])

    print("Add ID column for merging")
    training_set['Target_ID'] = training_set.apply(lambda row: ID_pos[row[0]],
                                                   axis=1)
    training_set['Source_ID'] = training_set.apply(lambda row: ID_pos[row[1]],
                                                   axis=1)

    print("Merge")
    train = pd.merge(training_set,
                     node_information,
                     how='left',
                     left_on='Target_ID',
                     right_index=True)
    train = pd.merge(train,
                     node_information,
                     how='left',
                     left_on='Source_ID',
                     right_index=True,
                     suffixes=['_target', '_source'])
    #train.to_csv('train_blank.csv', index=False)

    #train = pd.read_csv('train_blank.csv')
    #train.to_csv('train.csv', index=False)

    t = time()
    print("Add overlapping titles")
    train['Overlap_title'] = train.apply(lambda row: overlap(row, 'Title'),
                                         axis=1)
    print("Add common_authors")
    train['Common_authors'] = train.apply(lambda row: common(row, 'Authors'),
                                          axis=1)
    print("Add overlapping abstract")
    train['Overlap_abstract'] = train.apply(
        lambda row: overlap(row, 'Abstract'), axis=1)
    print("Date difference")
    train['Date_diff'] = (train['Year_source'] - train['Year_target']).abs()
    print(time() - t)

    #train.to_csv('train_basic.csv', index=False)
    #print("Loading set")
    #train = pd.read_csv('train_basic.csv')

    #print("Loaded")
    t = time()
    print("Tfidf")
    tfidf_vect = TfidfVectorizer(stop_words="english")
    abstracts_source = train['Abstract_source'].values
    abstracts_target = train['Abstract_target'].values
    all_abstracts = np.concatenate((abstracts_source, abstracts_target))
    tfidf_vect.fit(all_abstracts)
    print("tf_idf fitted")
    vect_source = tfidf_vect.transform(abstracts_source)
    print("source transformed")
    vect_target = tfidf_vect.transform(abstracts_target)
    print("target transformed")
    train['Tfidf_cosine_abstracts_nolim'] = tfidf(vect_source, vect_target)
    print(time() - t)

    #train.to_csv('train_basic_tfidf.csv', index=False)
    #train = pd.read_csv('train_basic_tfidf.csv')

    t = time()
    print("Tfidf")
    tfidf_vect = TfidfVectorizer(stop_words="english")
    titles_source = train['Title_source'].values
    titles_target = train['Title_target'].values
    all_abstracts = np.concatenate((titles_source, titles_target))
    tfidf_vect.fit(all_abstracts)
    print("tf_idf fitted")
    vect_source = tfidf_vect.transform(titles_source)
    print("source transformed")
    vect_target = tfidf_vect.transform(titles_target)
    print("target transformed")
    train['Tfidf_cosine_titles'] = tfidf(vect_source, vect_target)
    print(time() - t)

    #train.to_csv('train_basic_tfidf_title.csv', index=False)
    #train = pd.read_csv('train_basic_tfidf_title.csv')

    return train
print len(offset_x)
for K in range(20):
    test_fiber = fiber.GHModes(w, XX, YY, theta=0.0)
    test_fiber1 = fiber.GHModes(w,
                                XX,
                                YY,
                                theta=theta_vals[K],
                                offset_x=offset_x[K],
                                offset_y=offset_y[K])
    E = []
    R = []
    for i in range(M):
        p, q = admissible_modes[i][0], admissible_modes[i][1]
        for j in range(M):
            m, n = admissible_modes[j][0], admissible_modes[j][1]
            mode_pattern_1 = test_fiber.get_mode_pattern(p, q)
            mode_pattern_2 = test_fiber1.get_mode_pattern(m, n)
            overlap = utils.overlap(mode_pattern_1, mode_pattern_2)
            E.append(overlap)
    A = numpy.array(E)
    O = numpy.sqrt(len(A))
    A1 = A.reshape(O, O)
    A1 = numpy.mat(A1)
    psi_matrix = numpy.kron(numpy.eye(2), A1)
    print K
    filename1 = 'small core lossy projectiom matrix constant offset 05' + str(
        K)
    f1 = open(filename1, 'w')
    pickle.dump(psi_matrix, f1)
    f1.close()
PREFIX = sys.argv[2]

MODEL = sys.argv[3]

lat = sys.argv[4]  ##concat or avg

CONFIG_NAME = 'ght_all'

# Load GHT level
ght_500 = np.load(GHT_FILE)[LEVELS[LEVELS_STR.index('500')]]
ght_700 = np.load(GHT_FILE)[LEVELS[LEVELS_STR.index('700')]]
ght_900 = np.load(GHT_FILE)[LEVELS[LEVELS_STR.index('900')]]

# Make 2 days (overlap 1 day)
ght_500, _, _ = utils.overlap(ght_500, win=4, t=8)
ght_700, _, _ = utils.overlap(ght_700, win=4, t=8)
ght_900, _, _ = utils.overlap(ght_900, win=4, t=8)

if MODEL == '':
    print('insert model..')
    exit()

print('Data Shape...', ght_500.shape)
print('Data Shape...', ght_700.shape)
print('Data Shape...', ght_900.shape)

##predict data...
import demo

model = demo.lstm_ght_all(8, 4096, lat)
示例#25
0
def cpt_accuracy(label_box, pred_box):
    s1 = cpt_area(pred_box)
    s2 = cpt_area(label_box)
    s_overlap = cpt_area(overlap(label_box, pred_box))
    return s_overlap/float(s1 + s2 - s_overlap)
示例#26
0
LEVELS = ['G5', 'G7', 'G9']
LEVEL = sys.argv[2]

PREFIX = sys.argv[3]

MODEL = sys.argv[4]

CONFIG_NAME = 'raw_kmeans'

_LEVEL = LEVELS[LEVELS_STR.index(LEVEL)]

# Load GHT level
data = np.load(GHT_FILE)[_LEVEL]

# Make 2 days (overlap 1 day)
data, _, _ = utils.overlap(data, win = 4, t = 8)

if MODEL=='':
    print('insert model..')
    exit()

print('Data Shape...',data.shape)
##predict data...
import demo
model=demo.stacked_lstm_ae(8,4096,'relu',32,'sgd',0.2,0.1)
model.load_weights(MODEL_FILE)
from tensorflow.python.keras.models import Model
model = Model(inputs=model.inputs, outputs=model.get_layer("encoder").output)
data = model.predict(data)

# Reshape
示例#27
0
def getBoundedBoxes(routes, map_box_to_road):
    """
		iterate over routes find all bounded boxes
		objective: minimize area in bounded boxes subject to all route data points
		are contained in bounded boxes

		create mapping from each bounding box to the roads contained in that box and
		the routes contained in that box
	"""
    boundedBoxes = None  #np.array([[],[],[],[]]).T
    boxToRoutes = {}
    for route in iter(routes):
        box = getBoundedBox(route['coordinates'])

        if box in boxToRoutes: boxToRoutes[box].append(route)
        else: boxToRoutes[box] = [route]

        if boundedBoxes is not None and boundedBoxes.shape[0] > 0:
            """ get all already discovered bounded boxes that overlap with route bb """
            overlappedIndex = overlap(box, boundedBoxes)
            overlapped = boundedBoxes[overlappedIndex]
            """
				if overlap then remove overlapped boxes from boundedBoxes, merge
				over lapped bounded boxes, place new bounded box back in boundedBoxes
			"""
            if overlapped.shape[0] > 0:

                boundedBoxes = boundedBoxes[~overlappedIndex]

                overlapped = np.vstack((box, overlapped))

                update = []
                for i in range(overlapped.shape[0]):
                    update += boxToRoutes[tuple(overlapped[i])]
                    boxToRoutes.pop(tuple(overlapped[i]), None)

                overlapped = mergeBoundedBoxes(overlapped)

                boxToRoutes[overlapped] = update

                boundedBoxes = np.vstack((boundedBoxes, overlapped))
            else:
                boundedBoxes = np.vstack((boundedBoxes, box))
        else:
            boundedBoxes = np.array([[box[0]], [box[1]], [box[2]], [box[3]]]).T

    box_to_roads = {}
    keep = [True] * boundedBoxes.shape[0]
    keep = np.array(keep)
    for i in range(boundedBoxes.shape[0]):
        bb = tuple(boundedBoxes[i])
        fips = get_roads_in_bounded_box(bb, map_box_to_road)
        # don't have road data for this bounded box
        if len(fips) == 0:
            keep[i] = False
            boxToRoutes.pop(bb)
            continue
        fips_filter = {'state_fips':[f['state_fips'] for f in fips], \
         'county_fips':[f['county_fips'] for f in fips]}
        box_to_roads[bb] = union_roads(fips_filter=fips_filter)
    boundedBoxes = boundedBoxes[keep]
    return boundedBoxes, box_to_roads, boxToRoutes