def process_NM(mes, config):
    new_anchor = Anchor(mes, 0)
    master = Anchor(mes, 1)
    if len(config.anchors) > 0:
        config.anchors.append(new_anchor)
    else:
        config.anchors.append(master)
        config.anchors.append(new_anchor)
示例#2
0
    def anchors(self):
        actual_anchors = [Anchor(i, self.reqresp) for i in self.internal.getAnchors() if validanchor(self.internal.url.toString(), i.getHrefAttribute().strip())] if not self.redirect and not self.error else []
        if not self.redirect:
            actual_anchors.append(Anchor(self.fake_anchor, self.reqresp))
        
        if not (self.redirect or self.error):
            links = self.internal.getElementsByTagName("link")
            for link in links:
                if "href" in link.getAttributesMap().keySet():
                    href = link.getAttribute("href")
                    if validanchor(self.internal.url.toString(), href.strip()):
                        actual_anchors.append(Anchor(FakeHtmlUnitAnchor(self.internal.getFullyQualifiedUrl(href).toString(), self.webclient, link.getCanonicalXPath()), self.reqresp))

        return actual_anchors
示例#3
0
def predict(image_file, model_path, draw_image=True):
    """
    Introduction
    ------------
        对图片进行预测
    Parameters
    ----------
        image_file: 图片路径
        model_path: model路径
    """
    image = cv2.imread(image_file)
    image_resized = cv2.resize(image, (config.image_size, config.image_size))
    image_tensor = transforms.ToTensor()(image_resized).unsqueeze(0).float()
    ckpt = torch.load(model_path, map_location='cpu')
    model = RetinaNet(config.num_classes)
    model.load_state_dict(ckpt)
    model.eval()
    anchors = Anchor(config.anchor_areas, config.aspect_ratio,
                     config.scale_ratios)
    anchor_boxes = anchors(input_size=config.image_size)
    detector = Detect(config.num_classes, config.top_k, config.conf_thresh,
                      config.nms_thresh)
    predictions = model(image_tensor)
    detections = detector(predictions, anchor_boxes)
    for j in range(detections.shape[1]):
        dets = detections[0, j, :]
        mask = dets[:, 0].gt(0.).expand(5, dets.size(0)).t()
        dets = torch.masked_select(dets, mask).view(-1, 5)

        if dets.shape[0] == 0:
            continue
        draw_bbox(image_resized, dets, j)
    image_resized = cv2.cvtColor(image_resized, cv2.COLOR_BGR2RGB)
    plt.imshow(image_resized)
    plt.show()
示例#4
0
    def __compute_iou(self, anchor: Anchor, scale, bbox: BBox):

        abox = anchor.to_box()
        abox = abox * scale

        # intersection
        x0 = max(abox.x0, bbox.x0)
        x1 = min(abox.x1, bbox.x1)

        y0 = max(abox.y0, bbox.y0)
        y1 = min(abox.y1, bbox.y1)

        xi = x1 - x0
        yi = y1 - y0

        iou = 0
        if xi > 0 and yi > 0:
            ai = xi * yi
            au = ((abox.x1 - abox.x0) *
                  (abox.y1 - abox.y0)) + ((bbox.x1 - bbox.x0) *
                                          (bbox.y1 - bbox.y0)) - ai

            iou = ai / au

        return iou
示例#5
0
    def Activated(self):
        r"""The Add Anchor Command was activated"""
        # selection = Gui.Selection.getSelection()
        selection_ex = Gui.Selection.getSelectionEx()

        debug("len selection_ex = %i" % len(selection_ex))

        if len(selection_ex) != 1:
            msg = "Anchors : " \
                  "Select feature(s) on only 1 solid to add anchors to"
            error(msg)
            return
        else:
            # https://forum.freecadweb.org/viewtopic.php?t=7249
            unique_selection = selection_ex[0]
            selected_object = unique_selection.Object
            debug("  Selection : %s || %s" %
                  (selected_object, selected_object.Shape.ShapeType))

            subselected_objects = unique_selection.SubObjects

            for i, subselected_object in enumerate(subselected_objects):
                debug("SubSelection : %s || %s" %
                      (subselected_object, type(subselected_object)))

                p, u, v = puv(subselected_object)

                # make_anchor_feature(p, u, v)

                print("SubElementName : %s" %
                      unique_selection.SubElementNames[0])

                a = App.ActiveDocument.addObject("App::FeaturePython",
                                                 "Anchor")
                Anchor(a,
                       p,
                       u,
                       v,
                       topo_element=(unique_selection.Object,
                                     unique_selection.SubElementNames[i]))
                ViewProviderAnchor(a.ViewObject)

                # -- Add the anchor to the App::PropertyLinkList
                #    of the selected AnchorableObject --
                try:
                    l = selected_object.Anchors
                    l.append(a)
                    selected_object.Anchors = l
                except AttributeError:
                    msg = "Are you adding anchors to an AnchorableObject?"
                    error(msg)
示例#6
0
    def create_world(self):
        """Creates the 3D world in which entities will evolve"""
        self.anchors = []
        with open('anchors.tab') as anchors_file:
            for line in anchors_file:
                data = line.strip().split()
                if len(data) != 5:
                    d_print('did not understood line "{}"'.format(
                        line.strip()))
                    continue
                (x, y, z) = (float(data[0]), float(data[1]), float(data[2]))
                name = data[3]

                color = data[4]

                self.anchors.append(Anchor(x, y, z, name, color))

    # creating the graphic world in the 3D engine. First 2 args define the size
        self.world = World(8, 8, self.anchors)
def isWithin(annotation, from_chapter, from_para, to_chapter, to_para):
  anchor = Anchor(annotation['anchor'])
  if anchor.isValid():
    return anchor.isWithin(from_chapter, from_para, to_chapter, to_para)
  else:
    return False
def isWithin(annotation, from_chapter, from_para, to_chapter, to_para):
  anchor = Anchor(annotation['anchor'])
  if anchor.isValid():
    return anchor.isWithin(from_chapter, from_para, to_chapter, to_para)
  else:
    return False

with open('./output/annotations_original.jsonl', 'r') as infile, \
  open(f'./output/annotations.{PART_ID}.jsonl', 'w') as outfile:

  all_annotations = infile.readlines()
  print(f'Read {len(all_annotations)} annotations total')
  as_json = list(map(lambda str: json.loads(str), all_annotations))

  # Keep only annotations on the relevant part
  relevant_annotations = list(filter(lambda a: isWithin(a, FROM_CHAPTER, FROM_PARA, TO_CHAPTER, TO_PARA), as_json))

  for annotation in relevant_annotations:
    # Assign new annotation- and version-UUID
    annotation['annotation_id'] = str(uuid.uuid4())
    annotation['version_id'] = str(uuid.uuid4())
    annotation['anchor'] = Anchor(annotation['anchor']).offset_by(OFFSET_CHAPTER, 0)

    # Replace doc & part ID
    annotation['annotates']['document_id'] = DOC_ID
    annotation['annotates']['filepart_id'] = PART_ID

    outfile.write(f'{json.dumps(annotation)}\n')

  print(f'Keeping {len(relevant_annotations)} annotations')
示例#9
0
 def test_generate(self):
     anchor = Anchor()
     base_anchors = anchor.generate()
     self.assertEqual(base_anchors.shape[0], 9)
def train(batch_size=2, learning_rate=1e-2, train_epoch=100):
    # Normalizer(), Augmenter(), Resizer() 各转换时按顺序进行的
    transform = transforms.Compose([Normalizer(), Augmenter(), Resizer()])
    dataset = CocoDataset('./data/coco/', 'train2017', transform)
    data_loader = Data.DataLoader(dataset, 2, num_workers=2, shuffle=True, \
                                  collate_fn=collater, pin_memory=True)
    dataset_size = len(dataset)
    print('sample number:', dataset_size)
    print('epoch size:', dataset_size / batch_size)

    retinanet = RetinaNet()
    anchor = Anchor()
    focal_loss = FocalLoss()

    if cuda:
        retinanet = torch.nn.DataParallel(retinanet).cuda()
        anchor = anchor.cuda()
        focal_loss = focal_loss.cuda()
    retinanet.module.freeze_bn()

    optimizer = torch.optim.SGD(retinanet.parameters(),
                                lr=learning_rate,
                                momentum=0.9,
                                weight_decay=1e-4)
    '''
    class torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', 
        factor=0.1, patience=10, verbose=False, threshold=0.0001, 
        threshold_mode='rel', cooldown=0, min_lr=0, eps=1e-08)
    :param optimer: 指的是网络的优化器
    :param mode: (str), 可选择‘min’或者‘max’,min表示当监控量停止下降的时候,学习率将减小,
                 max表示当监控量停止上升的时候,学习率将减小。默认值为‘min’
    :param factor: 学习率每次降低多少,new_lr = old_lr * factor
    :param patience=10: 容忍网路的性能不提升的次数,高于这个次数就降低学习率
    :param verbose: (bool), 如果为True,则为每次更新向stdout输出一条消息。 默认值:False
    :param threshold: (float), 测量新最佳值的阈值,仅关注重大变化。 默认值:1e-4
    :param cooldown: 减少lr后恢复正常操作之前要等待的时期数。 默认值:0。
    :param min_lr: 学习率的下限
    :param eps: 适用于lr的最小衰减。 如果新旧lr之间的差异小于eps,则忽略更新。 默认值:1e-8。
    ————————————————
    版权声明:本文为CSDN博主「张叫张大卫」的原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接及本声明。
    原文链接:https://blog.csdn.net/weixin_40100431/article/details/84311430
    '''
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           patience=3,
                                                           verbose=True)

    for epoch_num in range(train_epoch):
        epoch_loss = []

        for iter_num, data in enumerate(data_loader):
            iter_time = time.time()
            images, annots, scales = data
            if cuda:
                images = images.cuda()
                annots = annots.cuda()
                scales = scales.cuda()

            total_anchors = anchor(data['img'])
            classification, localization = retinanet(images)

            cls_loss, loc_loss = \
                focal_loss(classification, localization, total_anchors, annots)
            loss = cls_loss + loc_loss
            epoch_loss.append(float(loss))

            optimizer.zero_grad()
            loss.backward()
            '''
            关于torch.nn.utils.clip_grad_norm_(): 
            In some cases you may find that each layer of your net amplifies the 
            gradient it receives. This causes a problem because the lower layers of 
            the net then get huge gradients and their updates will be far too large 
            to allow the model to learn anything.

            This function ‘clips’ the norm of the gradients by scaling the gradients 
            down by the same amount in order to reduce the norm to an acceptable 
            level. In practice this places a limit on the size of the parameter 
            updates.

            The hope is that this will ensure that your model gets reasonably 
            sized gradients and that the corresponding updates will allow the 
            model to learn.
            引用自https://discuss.pytorch.org/t/about-torch-nn-utils-clip-grad-norm/13873 
            感受一下来自 PyTorch 讨论社区的窒息攻防,2333。。
            '''
            torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)
            optimizer.step()

            print('|', 'Epoch:', epoch_num + 1, '|', 'Iter:', iter_num + 1,
                  '|', 'cls loss:', float(cls_loss), '|', 'loc loss:',
                  float(loc_loss), '|', 'loss:', float(loss), '|', 'lr:',
                  float(optimizer.learning_rate), '|', 'time:',
                  time.time() - iter_time)

        scheduler.step(np.mean(epoch_loss))

        print('Saving parameters in model on epoch', epoch_num + 1)
        torch.save(
            retinanet.state_dict(),
            './param/param_epoch' + str(epoch_num + 1).zfill(3) + '.pkl')
示例#11
0
    def mergeFilesByName(cls, from_, to_):
        common_files, file2merged = [], []  # 成功合并的文件列表
        processed = 0
        for dir_path, dir_names, file_names in os.walk(from_):
            # 形成文件描述列表
            file_descriptors = pd.DataFrame(columns=('filename', 'common_name',
                                                     'anchor_name',
                                                     'anchor_cols'))
            for idx, filename in enumerate(file_names):
                file_fullname = os.path.join(dir_path, filename)
                processed += 1
                if processed % 100 == 0:
                    G.log.info('Merging %d files. %s', processed,
                               file_fullname)
                # 计算时间戳锚点, 滤掉没有锚点的文件
                try:
                    anchor = Anchor(file_fullname)
                except UserWarning as err:
                    G.log.debug('Failed to process\t%s, ignored.\t%s',
                                file_fullname, str(err))
                    continue

                common_name = G.fileMergePattern.sub(
                    '',
                    os.path.splitext(filename)[0])  # 计算切分日志的公共文件名
                if not common_name:  # 全数字文件名
                    common_name = 'digital1'
                file_descriptors.loc[
                    idx] = file_fullname, common_name, anchor.name, anchor.colRange  # 添加到descriptors中

            if not len(file_descriptors):  # 所有文件都没有anchor
                continue

            path_to = dir_path.replace(from_, to_)
            os.makedirs(path_to, exist_ok=True)  # 按需建立目标目录

            # 同目录内名称相似的文件分为一组
            for k_, v_ in file_descriptors.groupby(
                ['anchor_name', 'common_name']):
                file_descriptors.sort_values('filename')  # 按文件名排序,以便顺序合并
                # 同组文件合并为1个
                common_name = '-'.join(k_)
                common_file_fullname = os.path.join(path_to, common_name)
                common_files.append(common_file_fullname)
                with open(common_file_fullname, 'a', encoding='utf-8') as fp:
                    for file_fullname, anchor_name, anchor_colRange in zip(
                            v_['filename'], v_['anchor_name'],
                            v_['anchor_cols']):
                        try:
                            for line in open(file_fullname,
                                             'r',
                                             encoding='utf-8'):
                                fp.write(line)

                            file2merged.append([
                                file_fullname, anchor_name, anchor_colRange,
                                common_file_fullname
                            ])
                        except Exception as err:
                            G.log.warning('Failed to merge %s, ignored. %s',
                                          file_fullname, str(err))
                            continue
        G.log.info('Merged %d files from %s into %s', processed, from_, to_)
        return common_files, file2merged
        plt.imshow(image)
        plt.show()


if __name__ == '__main__':
    data = np.load('./data/data_try.npy', allow_pickle=True).item() 
    print('image:', data['img'].size())
    print('annot:', data['annot'].size())
    print('scale:', data['scale'])

    retinanet = RetinaNet()
    classification, localization = retinanet(data['img'])
    print('classification:', classification.size())
    print('localization:', localization.size())

    anchor = Anchor() 
    total_anchors = anchor(data['img'])
    print('Anchors:', total_anchors.shape)

    focal_loss = FocalLoss()
    cls_loss, loc_loss = focal_loss(classification, localization, total_anchors, data['annot'])
    print('classification loss:', cls_loss)
    print('localization loss:', loc_loss)

    '''
    # 查看FocalLoss计算中positive anchors的位置
    '''
    # imshow_postive_anchors(data['img'], total_anchors[:, :], data['annot'])

    '''
    # ./data/data_try.npy 路径下保存的数据样本, 可以运行下面代码查看
示例#13
0
    def label_anchors(self, scale, iou_true, iou_false):
        self.anchor_scale = scale
        num_anchor_sizes = len(self.anchor_sizes)

        # for each image
        for i in range(len(self.images)):

            img_height = self.images[i].shape[0]
            img_width = self.images[i].shape[1]

            label_height = int(img_height / scale)
            label_width = int(img_width / scale)

            aclass = np.zeros((label_height, label_width, num_anchor_sizes))
            areg = np.zeros((label_height, label_width, num_anchor_sizes, 4))

            label_tp = 1
            label_dc = 255

            #label_tp = 255
            #label_dc = 100

            # for each bounding box
            for bb in self.bb_labels[i]:

                bbox_gt = BBox(bb[1], bb[2], bb[3], bb[4])

                max_iou = 0.0
                idx_anchor_max = 0

                # for each anchor
                for idx_anchor in range(num_anchor_sizes):
                    #for idx_anchor in range(1, 2):
                    w_a = self.anchor_sizes[idx_anchor][0]  # anchor width
                    h_a = self.anchor_sizes[idx_anchor][1]  # anchor height

                    # for each anchor point
                    for y_a in range(label_height):
                        for x_a in range(label_width):

                            anchor = Anchor(x_a, y_a, w_a, h_a)

                            iou = self.__compute_iou(anchor=anchor,
                                                     scale=scale,
                                                     bbox=bbox_gt)

                            # positive
                            if iou >= iou_true:
                                aclass[y_a, x_a, idx_anchor] = label_tp
                                areg[y_a, x_a,
                                     idx_anchor] = self.__compute_reg(
                                         anchor=anchor * scale, bb_gt=bbox_gt)

                            # don't care
                            elif iou > iou_false and aclass[y_a, x_a,
                                                            idx_anchor] == 0:
                                aclass[y_a, x_a, idx_anchor] = label_dc
                                areg[y_a, x_a,
                                     idx_anchor, :] = self.__compute_reg(
                                         anchor=anchor * scale, bb_gt=bbox_gt)

                            # find the anchor with the maximum IoU.
                            if iou >= max_iou:
                                max_iou = iou
                                idx_anchor_max = idx_anchor
                                x_max, y_max = x_a, y_a

                #print("anchor: {}, max_iou: {}".format(idx_anchor_max, max_iou))

                if max_iou > iou_false:
                    aclass[y_max, x_max, idx_anchor_max] = label_tp

            self.anchor_cls.append(aclass)
            self.anchor_reg.append(areg)

            if False:
                cv2.namedWindow("img")
                cv2.imshow("img", self.images[i])

                cv2.namedWindow("anchor0")
                cv2.imshow("anchor0", self.anchor_cls[i][:, :, 0] / 255)

                cv2.namedWindow("anchor1")
                cv2.imshow("anchor1", self.anchor_cls[i][:, :, 1] / 255)

                cv2.waitKey(0)

        return self.anchor_cls
示例#14
0
class Categorizer(object):
    """
    新建对象或者调用trainModel方法,可以生成Categorizer模型
    调用predict方法,可以预测新记录的类型及其置信度
    $DATA/models/l2file_info.csv:记录(\t分割)每个样本文件名称、定界符位置
    $DATA/l2cache/: 存储各样本文件。每个样本文件由同类日志原始样本合并而成
    1. 日志文件中记录的识别。每条记录大都会在第一行特定位置输出日期和时间,因此以特定位置的时间戳(hh:mm:ss)判断上一记录
       的结束和下一记录的开始
    2. 记录聚类的首要目标是把程序执行到某处输出的记录汇聚成一类。程序在某点输出日志,大致会包含几种信息:
    2.1 日期、时间等几乎所有记录都包含的共有信息:这些对聚类结果没有影响,不必单独考虑
    2.2 常数字符串和枚举型数据(如INFO、ERROR等): 这是这一类记录最典型的独有特征,应尽量突出其在聚类中的比重
    2.3 URL、IP、Java包、path等变量:应尽量识别出来并归一化成常数字符串,改善聚类效果
    2.4 字符串变量(应用系统的数据)和数字变量:上两类有效特征在每个记录中不会超过10个,字符串/数字变量可能会很多,这会严重
         干扰聚类效果、影响运算速度,应尽量减少。数字可简单滤除。字符串变量可考虑去掉dfs字典中低频词
    3. 后续处理可能有价值的目标包括:
    3.1 数字变量中可能包含错误码、代码等高值信息,可考虑提取和利用的手段
    3.2 对于记录数特别多的类别(太粗),可根据枚举型、IP、URL、错误码等进一步细化分类
    3.4 数字变量中可能包含时长、数量等指标数据,可考虑提取和利用的手段
    """
    __RuleSets = []  # 文本处理的替换、停用词和k-shingle规则
    for rule_section in sorted([
            section for section in G.cfg.sections()
            if section.split('-')[0] == 'RuleSet'
    ]):
        replace_rules, stop_words, k_list = [], [], []
        for key, value in G.cfg.items(rule_section):
            if key == 'stopwords':
                stop_words = value.split(',')
            elif key == 'k-shingles':
                k_list = eval(value)
            else:
                replace_from, replace_to = value.split('TO')
                replace_rules.append((re.compile(replace_from.strip(),
                                                 re.I), replace_to.strip()))
        __RuleSets.append((rule_section, replace_rules, stop_words, k_list))

    __LeastDocuments = G.cfg.getint('RecordCluster',
                                    'LeastRecords')  # 样本数小于该值,没有必要聚类
    __LeastTokens = G.cfg.getint('RecordCluster',
                                 'LeastTokens')  # 字典最少词数,低于此数没必要聚类
    __KeepN = G.cfg.getint('RecordCluster', 'KeepN')  # 字典中最多词数,降低计算量
    __NoBelow = G.cfg.getfloat('RecordCluster',
                               'NoBelow')  # 某词出现在文档数低于该比例时,从字典去掉,以排除干扰,降低计算量

    __Top5Ratio = G.cfg.getfloat('RecordCluster',
                                 'Top5Ratio')  # Top5类中样本数占总样本比例。大于该值时聚类结果不可接受
    __MaxCategory = G.cfg.getint('RecordCluster',
                                 'MaxCategory')  # 尝试聚类的最大类别,以降低计算量
    __Quantile = G.cfg.getfloat('RecordCluster',
                                'Quantile')  # 类别的边界,该类中以该值为分位数的点,作为该类的边界

    def __init__(self, dataset=None, anchor=None, model_file=None):
        self.__anchor = anchor  # 时间戳锚点Anchor
        self.__ruleSet = None  # 处理文件正则表达式
        self.__d = None  # 字典Dictionary
        self.__k = None  # 聚类模型 KMeans
        self.__a = None  # cluster名称
        self.__p = None  # 该类别数量占比
        self.__b = None  # 最远点到(0.8)分位点距离平方
        self.__q = None  # (0.8)分位点到中心点距离平方

        if model_file is not None:  # 从模型文件装载模型
            if not os.path.exists(model_file):
                model_file += '.mdl'
                if not os.path.exists(model_file):
                    model_file = os.path.join(G.projectModelPath, model_file)
            model = joblib.load(model_file)
            self.__anchor = model[0]  # 时间戳锚点Anchor
            self.__ruleSet = model[1]  # 处理文件正则表达式
            self.__d = model[2]  # 字典Dictionary
            self.__k = model[3]  # 聚类模型 KMeans
            self.__a = model[4]  # cluster名称
            self.__p = model[5]  # 该类别数量占比
            self.__b = model[6]  # 最远点到(0.8)分位点距离平方
            self.__q = model[7]  # (0.8)分位点到中心点距离平方
        elif dataset is not None:  # 从样本训练模型
            self.trainModel(dataset, anchor)

    # 训练、生成模型并保存在$models/xxx.mdl中,dataset:绝对/相对路径样本文件名,或者可迭代样本字符流
    def trainModel(self, dataset, anchor=None):
        """

        :param dataset:
        :param anchor:
        """
        samples_file, temple_fp = self.__buildSamplesFile(
            dataset)  # 获得有效的样本文件名称
        if anchor is None:  # 从样本文件中提取时间戳锚点
            self.__anchor = Anchor(samples_file)

        for self.__ruleSet in self.__RuleSets:
            # 日志文件预处理为记录向量,并形成字典。vectors是稀疏矩阵(行-记录,列-词数)
            self.__d, vectors = self.__buildVectors(samples_file)
            if self.__d is None:  # 字典太短,换rule set重新采样
                continue
            start_k = self.__findStartK(vectors)  # 快速定位符合分布相对均衡的起点K
            if start_k is None:  # 聚类不均衡,换rule set重新采样
                continue
            preferred_k = self.__pilotClustering(vectors,
                                                 start_k)  # 多个K值试聚类,返回最佳K
            if preferred_k == 0:  # 没有合适的K,换rule set重新采样
                continue

            # 重新聚类, 得到模型(向量数、中心点和距离)和分类(向量-所属类)
            self.__k, self.__a, self.__p, self.__b, self.__q = self.__buildClusterModel(
                preferred_k, vectors)

            joblib.dump((self.__anchor, self.__ruleSet, self.__d, self.__k,
                         self.__a, self.__p, self.__b, self.__q),
                        os.path.join(G.projectModelPath,
                                     samples_file + '.mdl'))  # 保存模型,供后续使用

            self.__d.save_as_text(
                os.path.join(G.logsPath,
                             samples_file + '.dic.csv'))  # 保存文本字典,供人工审查

            df = DataFrame({
                '类型': self.__a,
                '样本占比': self.__p,
                '分位点距离': self.__q,
                '边界-分位点距离': self.__b
            })
            df.to_csv(os.path.join(G.logsPath, samples_file + '.mdl.csv'),
                      sep='\t')  # 保存聚类模型,供人工审查

            a = self.predict(samples_file)  # 保存聚类结果,供人工审查
            date_time = [
                strftime('%Y-%m-%d %H:%M:%S', localtime(seconds))
                for seconds in a[3]
            ]
            df = DataFrame({
                '类型名称': a[1],
                '置信度': a[2],
                '时间': date_time,
                '记录内容': a[4],
                '记录词汇': a[5]
            })
            df.to_csv(os.path.join(G.logsPath,
                                   os.path.split(samples_file)[1] +
                                   '.out.csv'),
                      index=False,
                      sep='\t')

            G.log.info('Model saved to %s successful.' %
                       os.path.join(G.projectModelPath, samples_file + '.mdl'))
            break
        else:
            raise UserWarning(
                'Cannot generate qualified corpus by all RuleSets')

        if temple_fp: os.remove(samples_file)

    # 从dataset(utf-8格式。绝对/相对路径样本文件、可迭代样本字符流)生成样本文件
    @staticmethod
    def __buildSamplesFile(dataset):
        samples_file, temple_fp = None, False
        if type(dataset) is str:  # 样本是文件名
            if os.path.exists(dataset):  # $绝对路径
                samples_file = dataset
            elif os.path.exists(os.path.join(G.l2_cache,
                                             dataset)):  # 相对路径($l2_cache目录下)
                samples_file = os.path.join(G.l2_cache, dataset)

        if samples_file is None:  # 输入不是文件名称
            temple_fp = NamedTemporaryFile(mode='w+t',
                                           delete=False,
                                           encoding='utf-8')
            size = 0
            for data in dataset:
                temple_fp.write(data)
                size += len(data)
                if size > 102400:  # 10K
                    break
            else:
                raise UserWarning(
                    'Failed to build Model: samples(%dK) is too small, or wrong filename'
                    % (size / 1024))
            samples_file = temple_fp.name
            temple_fp.close()

        return samples_file, temple_fp

    # 文档向量化。dataset-[document:M]-[[word]]-[[token]]-[BoW:M]-corpus-tfidf-dictionary:N, [vector:M*N]
    def __buildVectors(self, dataset_file):
        lines = 0
        dct = Dictionary()
        tmp_file = TemporaryFile(mode='w+t', encoding='utf-8')
        for doc_idx, (document,
                      lines) in enumerate(self.__buildDocument(dataset_file)):
            dct.add_documents([document])
            tmp_file.write(' '.join(document) + '\n')
            if doc_idx % 500 == 499:
                G.log.debug('%d', doc_idx)
        if dct.num_docs < self.__LeastDocuments:  # 字典字数太少或文档数太少,没必要聚类
            tmp_file.close()
            raise UserWarning('Too few records[%d]' % dct.num_docs)

        # 去掉低频词,压缩字典
        num_token = len(dct)
        no_below = int(min(self.__NoBelow, int(dct.num_docs / 50)))
        dct.filter_extremes(no_below=no_below,
                            no_above=0.999,
                            keep_n=self.__KeepN)
        dct.compactify()
        G.log.info(
            'Dictionary[%d tokens, reduced from %d] built with [%s]. '
            '[%d]records(%d lines, %d words) in %s', len(dct), num_token,
            self.__ruleSet[0], dct.num_docs, lines, dct.num_pos, dataset_file)
        if len(dct) < self.__LeastTokens:  # 字典字数太少,重新采样
            G.log.info('Too few tokens[%d], Re-sample with next RuleSet].' %
                       (len(dct)))
            tmp_file.close()
            return None, None

        # 构造tf-idf词袋和文档向量
        tfidf_model = TfidfModel(dictionary=dct, normalize=False)
        vectors = np.zeros((dct.num_docs, len(dct)))
        tmp_file.seek(0)
        for doc_idx, new_line in enumerate(tmp_file):
            for (word_idx, tf_idf_value) in tfidf_model[dct.doc2bow(
                    new_line.split())]:  # [(id,tf-idf)...], id是升序
                vectors[doc_idx, word_idx] = tf_idf_value
        G.log.info('[%d*%d]Vectors built, %.2f%% non-zeros.' %
                   (dct.num_docs, len(dct),
                    dct.num_nnz * 100 / len(dct) / dct.num_docs))
        tmp_file.close()
        return dct, vectors

    # 预处理文件,迭代方式返回某条记录的词表.
    def __buildDocument(self, dataset_file):
        line_idx, record = 0, ''
        with open(dataset_file, 'r', encoding='utf-8') as fp:
            for line_idx, next_line in enumerate(fp):
                try:
                    # 判断定界位置是否为恰好是时间戳,形成一条完整record
                    absent = self.__anchor.getAnchorTimeStamp(
                        next_line) is None
                    if absent or (record == ''):
                        if absent ^ (record
                                     == ''):  # 开始行是定界,或者当前行不是定界行,表示尚未读到下一记录
                            record += next_line
                        continue

                    document = G.getWords(
                        record, rule_set=self.__ruleSet
                    )  # 完整记录Record(变量替换/停用词/分词/Kshingle)-〉[word]

                    # 得到词表,并准备下次循环
                    record = next_line  # 当前行存入当前记录
                    yield document, line_idx
                except (UnicodeError, UnicodeDecodeError, UnicodeEncodeError):
                    G.log.exception(
                        'Record [%s] ignored due to the following error:',
                        record)
                    record = ''  # 清空并丢弃现有记录
                    continue
        if record != '':  # 处理最后一个记录
            try:
                # 完整记录Record(变量替换/停用词/分词/Kshingle)-〉[word]
                yield G.getWords(record, rule_set=self.__ruleSet), line_idx
            except (UnicodeError, UnicodeDecodeError, UnicodeEncodeError):
                G.log.exception(
                    'Record [%s] ignored due to the following error:', record)
        raise StopIteration()

    # 从k=64开始,二分法确定Top5类样本量小于指定比例的K
    def __findStartK(self, vectors):
        k_from, k_, k_to = 1, 64, 0
        while k_ < min(self.__MaxCategory, len(vectors)):
            kmeans = KMeans(n_clusters=k_).fit(vectors)  # 聚类
            n = min(5, int(k_ * 0.1) + 1)
            top5_ratio = sum([
                v for (k, v) in Counter(kmeans.labels_).most_common(n)
            ]) / vectors.shape[0]
            G.log.debug(
                'locating the starter. k=%d, SSE= %f, Top%d labels=%d%%', k_,
                kmeans.inertia_, n, top5_ratio * 100)

            if top5_ratio < self.__Top5Ratio:  # 向前找
                if k_ - k_from < 4:  # 已靠近低限,找到大致起点
                    G.log.info('start k=%d', k_from)
                    return k_from
                k_to = k_ - 1
                k_ = k_from + int((k_ - k_from) / 2)
            else:  # 向后找
                if k_ < k_to < k_ + 4:  # 已靠近高点,找到大致起点
                    G.log.info('start k=%d', k_)
                    return k_
                k_from = k_ + 1
                k_ = k_to - int((k_to - k_) / 2) if k_to > 0 else k_ * 2

            if kmeans.inertia_ < 1e-5:  # 已经完全分类,但仍不均衡
                break

        G.log.info('No starter found')
        return None  # No found,re-samples

    # 聚类,得到各簇SSE(sum of the squared errors),作为手肘法评估确定k的依据
    def __pilotClustering(self, vectors, k_from=1):
        cell_norm_factor = vectors.shape[1] * vectors.shape[
            0]  # 按行/样本数和列/字典宽度归一化

        k_, sse_set = 0, []
        for k_ in range(k_from, k_from + 3):
            kmeans = KMeans(n_clusters=k_).fit(vectors)  # 试聚类
            sse = kmeans.inertia_ / cell_norm_factor
            G.log.debug('pilot clustering. k=%d, normSSE= %f', k_, sse)
            sse_set.append(sse)
        last_indicator = (sse_set[0] + sse_set[2]) / sse_set[1]  # 二阶微分的相对值
        last_k = k_from + 2

        maxima = None  # (k, kmeans, sse, indicator)
        prefer = (0, None, 0, 0, 0
                  )  # (K_, kmeans, sse, indicator, ratio of top5 lables)
        last_top5_value, last_top5_idx = 100, 1
        for k_ in range(k_from + 3, self.__MaxCategory):
            kmeans = KMeans(n_clusters=k_).fit(vectors)  # 试聚类
            sse = kmeans.inertia_ / cell_norm_factor
            G.log.debug('pilot clustering. k=%d, normSSE= %f', k_, sse)
            if sse >= sse_set[-1]:  # SSE变大,是误差,应滤除之
                continue

            sse_step = (sse - sse_set[-1]) / (k_ - last_k
                                              )  # 用SSE_step代替SSE,可兼容有误差情况
            sse_set.pop(0)
            sse_set.append(sse_set[-1] + sse_step)
            indicator = (sse_set[-3] + sse_set[-1]) / sse_set[-2]

            if indicator > last_indicator:  # 开始增大
                maxima = [k_, kmeans, sse, indicator]
            elif maxima is not None and indicator < last_indicator:  # 增大后开始减小,prev是极大值点
                n = min(5, int(k_ * 0.1) + 1)
                top5_ratio = sum([
                    v for (k, v) in Counter(maxima[1].labels_).most_common(n)
                ]) / vectors.shape[0]
                if prefer[3] < maxima[
                        3] and top5_ratio < self.__Top5Ratio:  # Top5Label中样本比例有效(没有失衡)
                    prefer = maxima + [top5_ratio]
                G.log.info(
                    'Maxima point. k=(%d,%.2f) normSSE=%.2f, Top%d labels=%.1f%%. Preferred (%d,%.2f)',
                    maxima[0], maxima[3], maxima[2], n, top5_ratio * 100,
                    prefer[0], prefer[3])
                maxima = None  # 变量复位,准备下一个极大值点
                if top5_ratio < last_top5_value - 0.001:
                    last_top5_value = top5_ratio
                    last_top5_idx = k_
                else:
                    if k_ - last_top5_idx > 50:  # 连续50个K_比例不降
                        break

            if sse < 1:  # 已经收敛到很小且找到可选值,没必要继续增加
                break

            sse_set[-1] = sse  # 如无异常误差点,这些操作只是重复赋值,无影响。如有,更新当前点值,准备下一循环
            sse_set[-2] = sse_set[-1] - sse_step
            last_indicator = indicator
            last_k = k_

        G.log.info(
            'pilot-clustering[k:1-%d] finished. preferred k=(%d, %.2f),normSSE=%.2f, TopN labels=%.1f%%'
            % (k_, prefer[0], prefer[3], prefer[2], prefer[4] * 100))
        return prefer[0]

    # 重新聚类,得到各Cluster的中心点、分位点距离、边界距离以及数量占比等
    def __buildClusterModel(self, k_, vectors):
        # 再次聚类并对结果分组。 Kmeans不支持余弦距离
        kmeans = KMeans(n_clusters=k_, n_init=20, max_iter=500).fit(vectors)
        norm_factor = -vectors.shape[1]  # 按字典宽度归一化,保证不同模型的可比性
        groups = DataFrame({
            'C':
            kmeans.labels_,
            'S': [kmeans.score([v]) / norm_factor for v in vectors]
        }).groupby('C')
        alias = ['Type' + str(i) for i in range(k_)]  # 簇的别名,默认为Typei,可人工命名
        proportions = groups.size() / len(vectors)  # 该簇向量数在聚类总向量数中的占比
        quantiles = np.array([
            groups.get_group(i)['S'].quantile(self.__Quantile,
                                              interpolation='higher')
            for i in range(k_)
        ])
        boundaries = groups['S'].agg('max').values - quantiles  # 该簇中最远点到分位点的距离
        for i in range(k_):
            if boundaries[i] > quantiles[i]:  # 边界太远的话,修正一下
                boundaries[i] = quantiles[i]
            elif boundaries[i] == 0:  # 避免出现0/0
                boundaries[i] = 1e-100

        G.log.info(
            'Model(k=%d) built. inertia=%.3f, max proportion=%.2f%%, max quantile=%.3f, max border=%.3f',
            k_, kmeans.inertia_,
            max(proportions) * 100, max(quantiles), max(boundaries))
        return kmeans, alias, proportions, boundaries, quantiles

    # predict from file or list of lines,返回“类别id、名称,记录的置信度、时间戳、原文和词表”的列表
    # 置信度最大99.99,如>1, 表示到中心点距离小于0.8分位点,非常可信;最小-99.99,如< 0距离大于最远点,意味着不属于此类
    def predict(self, dataset):
        """

        :param dataset:
        :return:
        """
        if self.__k is None:
            raise UserWarning('Failed to predict: Model is not exist!')

        if type(dataset) is str:  # 如输入文件名,读取内容,得到dataset
            if not os.path.exists(dataset):  # $绝对路径
                dataset = os.path.join(G.l1_cache, dataset)
            dataset = [line for line in open(dataset, encoding='utf-8')]

        timestamps, records, documents = self.__getCorpus(dataset)
        vectors = self.__getVectors(
            documents)  # 计算向量[vector:Record*dictionary]
        type_ids, type_names, confidences = self.__getResult(
            vectors)  # 预测分类并计算可信度
        return type_ids, type_names, confidences, timestamps, records, documents

    # 预处理原始数据集,返回时间戳锚点、记录和文档的列表[timestamps] [records] [documents]
    def __getCorpus(self, dataset):
        timestamps, records, documents = [], [], []  # 初始化返回数据
        record = ''
        timestamp = None
        for new_line in dataset:
            try:
                # 获得记录的锚点时间戳,没有返回None
                next_timestamp = self.__anchor.getAnchorTimeStamp(new_line)

                # 判断定界位置是否是时间戳,形成一条完整record
                absent = next_timestamp is None
                if absent or (record == ''):
                    if absent ^ (record == ''):  # 开始行是定界,或者当前行不是定界行,表示尚未读到下一记录
                        if not absent:
                            timestamp = next_timestamp  # 需要改!!!!
                        record += new_line
                    continue
                words = G.getWords(
                    record, rule_set=self.__ruleSet
                )  # 完整记录Record(变量替换/停用词/分词/Kshingle)-〉[word]

                timestamps.append(timestamp)  # 保存时间戳
                records.append(record)  # 保存记录
                documents.append(words)  # 保存语料
                timestamp = next_timestamp  # 保存下一个时间戳
                record = new_line  # 当前行存入当前记录,准备下次循环
            except (UnicodeError, UnicodeDecodeError, UnicodeEncodeError):
                G.log.exception(
                    'Record [%s] ignored due to the following error:', record)
                record = ''  # 清空并丢弃现有记录
                continue

        if record != '':  # 处理最后一行
            try:
                words = G.getWords(
                    record, rule_set=self.__ruleSet
                )  # 完整记录Record(变量替换/停用词/分词/Kshingle)-〉[word]
                timestamps.append(timestamp)  # 保存时间戳
                records.append(record)  # 保存记录
                documents.append(words)  # 保存语料
            except (UnicodeError, UnicodeDecodeError, UnicodeEncodeError):
                G.log.exception(
                    'Record [%s] ignored due to the following error:', record)

        return timestamps, records, documents

    # 构造不归一的IF_IDF词袋和文档向量
    def __getVectors(self, corpus):
        tfidf_model = TfidfModel(dictionary=self.__d, normalize=False)
        vectors = np.zeros((len(corpus), len(self.__d)))
        for doc_idx, document in enumerate(corpus):
            for (word_idx, tf_idf_value) in tfidf_model[self.__d.doc2bow(
                    document)]:  # [(id,tf-idf)...], id是升序
                vectors[doc_idx, word_idx] = tf_idf_value
        return vectors

    # 预测分类并计算可信度。<0 表示超出边界,完全不对,〉1完全表示比分位点还近,非常可信
    def __getResult(self, vectors):
        type_ids = self.__k.predict(vectors)  # 使用聚类模型预测记录的类别
        type_names = [self.__a[label] for label in type_ids]
        norm_factor = -vectors.shape[1]  # 按字典宽度标准化,保证不同模型的可比性
        confidences = [
            1 + round(
                (self.__k.score([v]) / norm_factor + self.__q[type_ids[i]]) /
                (self.__q[type_ids[i]] + self.__b[type_ids[i]]), 2)
            for i, v in enumerate(vectors)
        ]  #####公式不对,应该改正
        confidences = np.array(confidences, copy=False)
        confidences[confidences > 99.99] = 99.99  # 太大或太小没有太多意义
        confidences[confidences < -99.99] = -99.99
        #        labels[confidences <= 0] = len(vectors) + 1  # 不可信的置入新的一类

        return type_ids, type_names, confidences
示例#15
0
    def trainModel(self, dataset, anchor=None):
        """

        :param dataset:
        :param anchor:
        """
        samples_file, temple_fp = self.__buildSamplesFile(
            dataset)  # 获得有效的样本文件名称
        if anchor is None:  # 从样本文件中提取时间戳锚点
            self.__anchor = Anchor(samples_file)

        for self.__ruleSet in self.__RuleSets:
            # 日志文件预处理为记录向量,并形成字典。vectors是稀疏矩阵(行-记录,列-词数)
            self.__d, vectors = self.__buildVectors(samples_file)
            if self.__d is None:  # 字典太短,换rule set重新采样
                continue
            start_k = self.__findStartK(vectors)  # 快速定位符合分布相对均衡的起点K
            if start_k is None:  # 聚类不均衡,换rule set重新采样
                continue
            preferred_k = self.__pilotClustering(vectors,
                                                 start_k)  # 多个K值试聚类,返回最佳K
            if preferred_k == 0:  # 没有合适的K,换rule set重新采样
                continue

            # 重新聚类, 得到模型(向量数、中心点和距离)和分类(向量-所属类)
            self.__k, self.__a, self.__p, self.__b, self.__q = self.__buildClusterModel(
                preferred_k, vectors)

            joblib.dump((self.__anchor, self.__ruleSet, self.__d, self.__k,
                         self.__a, self.__p, self.__b, self.__q),
                        os.path.join(G.projectModelPath,
                                     samples_file + '.mdl'))  # 保存模型,供后续使用

            self.__d.save_as_text(
                os.path.join(G.logsPath,
                             samples_file + '.dic.csv'))  # 保存文本字典,供人工审查

            df = DataFrame({
                '类型': self.__a,
                '样本占比': self.__p,
                '分位点距离': self.__q,
                '边界-分位点距离': self.__b
            })
            df.to_csv(os.path.join(G.logsPath, samples_file + '.mdl.csv'),
                      sep='\t')  # 保存聚类模型,供人工审查

            a = self.predict(samples_file)  # 保存聚类结果,供人工审查
            date_time = [
                strftime('%Y-%m-%d %H:%M:%S', localtime(seconds))
                for seconds in a[3]
            ]
            df = DataFrame({
                '类型名称': a[1],
                '置信度': a[2],
                '时间': date_time,
                '记录内容': a[4],
                '记录词汇': a[5]
            })
            df.to_csv(os.path.join(G.logsPath,
                                   os.path.split(samples_file)[1] +
                                   '.out.csv'),
                      index=False,
                      sep='\t')

            G.log.info('Model saved to %s successful.' %
                       os.path.join(G.projectModelPath, samples_file + '.mdl'))
            break
        else:
            raise UserWarning(
                'Cannot generate qualified corpus by all RuleSets')

        if temple_fp: os.remove(samples_file)
示例#16
0
def predict_one_image(retinanet, image):
    # 对图像进行归一化
    image = image.astype(np.float32) / 255.0
    print('source image shape:', image.shape)

    # 将图像调整为适合输入的尺寸,具体细节解释可以查阅'utils.py'的'class Resizer(object)'
    min_side, max_side = 400, 800
    rows, cols, cns = image.shape
    smallest_side = min(rows, cols)
    scale = min_side / smallest_side
    largest_side = max(rows, cols)
    if largest_side * scale > max_side:
        scale = max_side / largest_side
    image = cv2.resize(image,
                       (int(round(cols * scale)), int(round(rows * scale))))
    print('resize image shape:', image.shape)
    rows, cols, cns = image.shape
    pad_w = 32 - rows % 32
    pad_h = 32 - cols % 32
    net_input = np.zeros((rows + pad_w, cols + pad_h, cns)).astype(np.float32)
    net_input[:rows, :cols, :] = image.astype(np.float32)

    # 将 net_input 调整为可以输入 RetinaNet 的格式
    net_input = torch.Tensor(net_input)
    net_input = net_input.unsqueeze(dim=0)
    net_input = net_input.permute(0, 3, 1, 2)
    print('RetinaNet input size:', net_input.size())

    anchor = Anchor()
    decoder = Decoder()

    if cuda:
        net_input = net_input.cuda()
        anchor = anchor.cuda()
        decoder = decoder.cuda()

    total_anchors = anchor(net_input)
    print('create anchor number:', total_anchors.size()[0])
    classification, localization = retinanet(net_input)

    pred_boxes = decoder(total_anchors, localization)

    # pred_boxes中的边框,有可能会出现在图像边界以外,需要将其拉回
    height, width, _ = image.shape
    pred_boxes[:, 0] = torch.clamp(pred_boxes[:, 0], min=0)
    pred_boxes[:, 1] = torch.clamp(pred_boxes[:, 1], min=0)
    pred_boxes[:, 2] = torch.clamp(pred_boxes[:, 2], max=width)
    pred_boxes[:, 3] = torch.clamp(pred_boxes[:, 3], max=height)

    # classification: [1, -1, 80]
    # torch.max(classification, dim=2, keepdim=True): [(1, -1, 1), (1, -1, 1)]
    # scores: [1, -1, 1], 所有anchor对应的置信度最大的类别id
    scores, ss = torch.max(classification, dim=2, keepdim=True)

    scores_over_thresh = (scores > 0.05)[0, :, 0]  # [True or False]
    if scores_over_thresh.sum() == 0:
        # no boxes to NMS, just return
        nms_scores = torch.zeros(0)
        nms_cls = torch.zeros(0)
        nms_boxes = torch.zeros(0, 4)
    else:
        # 提取最大置信度超过阈值的 anchor 的 classification
        classification = classification[:, scores_over_thresh, :]
        # 提取最大置信度超过阈值的 anchor 的 pred_boxes
        pred_boxes = pred_boxes[scores_over_thresh, :]
        # 提取最大置信度超过阈值的 anchor 的 scores
        scores = scores[:, scores_over_thresh, :]

        nms_ind = nms(pred_boxes[:, :], scores[0, :, 0], 0.5)

        nms_scores, nms_cls = classification[0, nms_ind, :].max(dim=1)
        nms_boxes = pred_boxes[nms_ind, :]

    print('Predict bounding boxes number:', nms_scores.size()[0])
    bounding_boxes = [nms_scores, nms_cls, nms_boxes]

    imshow_result(image, bounding_boxes)
示例#17
0
def train():
    """
    Introduction
    ------------
        训练Retinanet模型
    """
    train_transform = Augmentation(size=config.image_size)
    # train_dataset = COCODataset(config.coco_train_dir, config.coco_train_annaFile, config.coco_label_file, training = True, transform = train_transform)
    from VOCDataset import build_vocDataset
    train_dataset = build_vocDataset(config.voc_root)
    train_dataloader = DataLoader(train_dataset,
                                  batch_size=config.train_batch,
                                  shuffle=True,
                                  num_workers=2,
                                  collate_fn=train_dataset.collate_fn)
    print("training on {} samples".format(train_dataset.__len__()))
    net = RetinaNet(config.num_classes, pre_train_path=config.resnet50_path)
    net.cuda()
    optimizer = optim.SGD(net.parameters(),
                          lr=config.learning_rate,
                          momentum=0.9,
                          weight_decay=1e-4)
    criterion = MultiBoxLoss(alpha=config.focal_alpha,
                             gamma=config.focal_gamma,
                             num_classes=config.num_classes)
    anchors = Anchor(config.anchor_areas, config.aspect_ratio,
                     config.scale_ratios)
    anchor_boxes = anchors(input_size=config.image_size)
    for epoch in range(config.Epochs):
        batch_time, loc_losses, conf_losses = AverageTracker(), AverageTracker(
        ), AverageTracker()
        net.train()
        net.freeze_bn()
        end = time.time()
        for index, (image, gt_boxes, labels) in enumerate(train_dataloader):
            loc_targets, cls_targets = [], []
            image = image.cuda()
            loc_preds, cls_preds = net(image)
            batch_num = image.shape[0]
            for idx in range(batch_num):
                gt_box = gt_boxes[index]
                label = labels[index]
                loc_target, cls_target = encode(anchor_boxes, gt_box, label)
                loc_targets.append(loc_target)
                cls_targets.append(cls_target)
            loc_targets = torch.stack(loc_targets).cuda()
            cls_targets = torch.stack(cls_targets).cuda()
            loc_loss, cls_loss = criterion(loc_preds, loc_targets, cls_preds,
                                           cls_targets)
            loss = loc_loss + cls_loss
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            loc_losses.update(loc_loss.item(), image.size(0))
            conf_losses.update(cls_loss.item(), image.size(0))
            batch_time.update(time.time() - end)
            end = time.time()
            if idx % config.print_freq == 0:
                print(
                    'Epoch: {}/{} Batch: {}/{} loc Loss: {:.4f} {:.4f} conf loss: {:.4f} {:.4f} Time: {:.4f} {:.4f}'
                    .format(epoch, config.Epochs, idx, len(train_dataloader),
                            loc_losses.val, loc_losses.avg, conf_losses.val,
                            conf_losses.avg, batch_time.val, batch_time.avg))
        if epoch % config.save_freq == 0:
            print('save model')
            torch.save(
                net.state_dict(),
                config.model_dir + 'train_model_epoch{}.pth'.format(epoch + 1))
示例#18
0
    def __init__(self, parent):
        # Inizializza Antenato
        QGLWidget.__init__(self, parent)
        self._zoom_factor = 1.0
        self._pan_x = 0
        self._pan_y = 0

        self.scene = Scene()

        self._drawHidden = False

        customer = Entity("Customer")
        self.scene.add_entity(customer)

        order = Entity("Order")
        order.set_drawable(True)
        self.scene.add_entity(order)

        age = Attribute("Age")
        age.setPrimaryKey(True)
        order.add_attribute(age)

        anchor = Anchor()
        anchor.anchor_to_rectangle(order)

        bezier = Bezier()
        #self.scene.add_entity(bezier)

        link1 = Link(anchor, age)
        order.addLink(link1)

        s0 = AutomataState("S0")
        s1 = AutomataState("S1")
        arc = AutomataArc(s0, s1)
        arc2 = AutomataArc(s0, s0)
        arc3 = AutomataArc(s1, s1)

        self.scene.add_entity(arc)
        self.scene.add_entity(arc2)
        self.scene.add_entity(arc3)
        self.scene.add_entity(s0)
        self.scene.add_entity(s1)

        self._mid_down_x = None
        self._mid_down_y = None

        self._mid_up_x = None
        self._mid_up_y = None

        self._left_down_x = None
        self._left_down_y = None

        self._left_up_x = None
        self._left_up_y = None

        self._last_mid_down_x = None
        self._last_mid_down_y = None

        self._tab_down = False

        self._selected_object = None