def __init__(self):
        self.images = []
        for format in self.formats:
            for fl in glob.glob(f'{self.DIR}\\*.{format}'):
                self.images.append(fl)

        self.images.sort()
        self.workonid = self.images[0]

        self.objDict = {}
        for fl in self.images:
            self.objDict[fl] = []

        if self.old:
            data = CONFIG.readOld(self.old)
            for el in data:
                bitz = CONFIG.splitbyfour(el[0][2:])

                for bit in bitz:
                    II = []
                    for I in bit:
                        II.append(int(I))
                    print(II)
                    self.objDict[el[0][0]].append(II)

        cv2.imshow('tool_1', self.grabImg())
        cv2.setMouseCallback('tool_1', self.onmouse)
Пример #2
0
def test():
    config = CONFIG()
    print('加载word2id===========================')
    word2id = load_word2id(config.word2id_path)
    config.vocab_size = len(word2id)
    print('加载test语料库=========================')
    x, y = load_corpus(config.test_path, word2id, max_sen_len=config.max_sen_len)
    # x, y = x[:10], y[:10]
    model = TextCNN(config)
    with tf.Session() as sess:
        init_op = tf.global_variables_initializer()
        sess.run(init_op)
        saver = tf.train.Saver()
        ckpt = tf.train.get_checkpoint_state(config.save_dir)
        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)

        yhat = model.predict(sess, x)

    cat, cat2id = cat_to_id()
    y_cls = np.argmax(y, 1)
    # 评估
    print("Precision, Recall and F1-Score...")
    print(metrics.classification_report(y_cls, yhat, target_names=cat))
    # 混淆矩阵
    print("Confusion Matrix...")
    cm = metrics.confusion_matrix(y_cls, yhat)
    print(cm)
Пример #3
0
def train():
    config = CONFIG()
    print('加载word2id===========================')
    word2id = load_word2id(config.word2id_file)
    config.vocab_size = len(word2id)
    print('加载word2vec==========================')
    word2vec = load_corpus_word2vec(config.corpus_w2v_file)
    print('加载train语料库========================')
    train = load_corpus(config.train_file,
                        word2id,
                        max_sen_len=config.max_sen_len)
    x_tr = train[:-1]
    y_tr = train[-1]
    print('加载test语料库==========================')
    test = load_corpus(config.test_file,
                       word2id,
                       max_sen_len=config.max_sen_len)
    x_te = test[:-1]
    y_te = test[-1]
    print('训练模型===============================')
    lstm = LSTM(CONFIG, embeddings=word2vec)
    with tf.Session() as sess:
        init_op = tf.global_variables_initializer()
        sess.run(init_op)
        lstm.fit(sess, x_tr, y_tr, x_te, y_te, config.save_dir,
                 config.print_per_batch)
 def rm(self, tup):
     x, y = tup
     for obj in self.objDict[self.workonid]:
         print(CONFIG.isInside(x, y, obj))
         if CONFIG.isInside(x, y, obj):
             print(self.objDict[self.workonid].index(obj))
             self.objDict[self.workonid].pop(
                 self.objDict[self.workonid].index(obj))
Пример #5
0
 def __init__(self):
     super(Attacker, self).__init__()  # 调用父类初始化
     self.attack = False
     self.conv1 = GCNConv(CONFIG.FeatureLen(), 180)
     self.conv2 = GCNConv(180, 120)
     self.lin1 = torch.nn.Linear(120, CONFIG.ClassNum())
     # 这次反而需要调整的参数是输入
     self.X = torch.tensor([1], dtype=torch.float)
     self.edges = torch.tensor([[], []], dtype=torch.long)
Пример #6
0
def predict(x, label=False, prob=False):
    """
    :param x: 语句列表
    :param label: 是否以分类标签的形式:pos或neg输出。默认为:0/1
    :param prob: 是否以概率的形式输出。
    :return: 情感预测结果
    """
    if label and prob:
        raise Exception("label和prob两个参数不能同时为True!")

    x = sent_to_id(x)
    config = CONFIG()
    model = TextCNN(config)
    with tf.Session() as sess:
        init_op = tf.global_variables_initializer()
        sess.run(init_op)
        saver = tf.train.Saver()
        ckpt = tf.train.get_checkpoint_state(config.save_dir)
        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)

        y = model.predict(sess, x, prob=prob)

    if label:
        cat, _ = cat_to_id()
        y = [cat[w] for w in y.tolist()]
    return y
def Cluster():
    # 得到需要聚类的样本的Features
    features = OpData.GetFeatures()
    features = features[CONFIG.TargetBegin():CONFIG.TargetEnd()]
    # print(features.shape)

    # 构造一个聚类分类器
    cluster = KMeans(n_clusters=CONFIG.ClassNum())
    # cluster.fit(features)
    result = cluster.fit_predict(features)

    # 将聚类结果输出
    np.save("cluster_result.npy", result)
    np.savetxt("cluster_result.csv", result, delimiter=',', fmt="%d")

    class_num = np.zeros((CONFIG.ClassNum()))
    for r in result:
        class_num[r] += 1
    np.savetxt("class_num.csv", class_num, delimiter=',', fmt="%d")
Пример #8
0
def init_add_edges() -> torch.tensor:
    """
    这是一个最基础的初始化方式,1节点连接1~100测试节点,2连接201~400测试节点,直到50000为止
    :return: 用这个函数来初始化加入的邻接矩阵
    """
    x = []
    y = []
    data = []
    # 前面三个初始化,用于记录起始点x,终点y和权重data
    for i in range(0, 500):
        for j in range(0, 100):
            src = CONFIG.TargetEnd() + i
            dst = CONFIG.TargetBegin() + i * 100 + j
            assert (src >= CONFIG.TargetEnd()) and (src <
                                                    CONFIG.TargetEnd() + 500)
            assert (dst >= CONFIG.TargetBegin()) and (dst < CONFIG.TargetEnd())
            x.append(src)
            y.append(dst)
            x.append(dst)
            y.append(src)
            data.append(1)
            data.append(1)
    assert len(x) == len(y) == len(data)
    # 将其化为tensor
    edges = []
    edges.append(x)
    edges.append(y)
    return torch.tensor(edges, dtype=torch.long).to(device)
Пример #9
0
def train():
    config = CONFIG()
    print('加载word2id===========================')
    word2id = load_word2id(config.word2id_path)
    print('加载word2vec==========================')
    word2vec = load_corpus_word2vec(config.corpus_word2vec_path)
    print('加载train语料库========================')
    x_tr, y_tr = load_corpus(config.train_path, word2id, max_sen_len=config.max_sen_len)
    print('加载dev语料库==========================')
    x_val, y_val = load_corpus(config.dev_path, word2id, max_sen_len=config.max_sen_len)
    print('训练模型===============================')
    tc = TextCNN(CONFIG, embeddings=word2vec)
    with tf.Session() as sess:
        init_op = tf.global_variables_initializer()
        sess.run(init_op)
        tc.fit(sess, x_tr, y_tr, x_val, y_val, config.save_dir, config.print_per_batch)
Пример #10
0
def sent_to_id(inputs):
    """
    将语句进行分词,然后将词语转换为word_to_id中的id编码
    """
    sentences = []
    cut_sents = [jb.cut(w) for w in inputs]
    config = CONFIG()
    word2id = load_word2id(config.word2id_path)

    for cut_sent in cut_sents:
        sentence = [word2id.get(w, 0) for w in cut_sent]
        sentence = sentence[:config.max_sen_len]
        if len(sentence) < config.max_sen_len:
            sentence += [word2id['_PAD_']
                         ] * (config.max_sen_len - len(sentence))

        sentences.append(sentence)

    return np.asarray(sentences)
def cut_cluster_by_100() -> list:
    """
    :return: 返回一个500*100的list标记edges的构造
    """
    cut = [[]]
    class_divide = []
    for i in range(0, 18):
        class_divide.append([])
    cluster = np.load("cluster_result.npy")

    # print(cluster)
    cluster = cluster.tolist()
    # print(cluster)
    for c in range(0, len(cluster)):
        class_divide[cluster[c]].append(c)
    # print(class_divide)

    remain_data = []  # 用来记录遗留下的数据,用于二次划分
    cut_i = 0

    for i in range(0, CONFIG.ClassNum()):
        # 用于遍历cluster
        for j in range(0, len(class_divide[i])):
            if j < int(len(class_divide[i]) / 100) * 100:
                cut[cut_i].append(class_divide[i][j])
                if len(cut[cut_i]) is 100:
                    cut_i += 1
                    cut.append([])
            else:
                # print(j)
                remain_data.append(class_divide[i][j])
    for r in remain_data:
        cut[cut_i].append(r)
        # print(len(cut))
        if (len(cut[cut_i]) is 100) and (len(cut) != 500):
            cut_i += 1
            cut.append([])

    # print(cut)
    # for cut_ in cut:
    #     print(len(cut_))

    return cut
Пример #12
0
def add_edges_by_cluster():
    cut = ClusteringTarget.cut_cluster_by_100()
    x = []
    y = []

    coo_x = []
    coo_y = []
    data = []

    for i in range(0, 500):
        for j in range(0, 100):
            src = CONFIG.TargetEnd() + i
            dst = CONFIG.TargetBegin() + cut[i][j]
            assert (src >= CONFIG.TargetEnd()) and (src <
                                                    CONFIG.TargetEnd() + 500)
            assert (dst >= CONFIG.TargetBegin()) and (dst < CONFIG.TargetEnd())
            x.append(src)
            y.append(dst)
            x.append(dst)
            y.append(src)

            coo_x.append(src - CONFIG.TargetEnd())
            coo_y.append(dst)
            data.append(1)

    # 保存邻接矩阵形式:
    adj = scipy.sparse.coo_matrix((data, (coo_x, coo_y)), shape=(500, 593986))
    adj = adj.tocsr()
    with open("adj.pkl", 'wb') as f:  # 将数据写入pkl文件
        pickle.dump(adj, f)

    # 将其化为tensor
    edges = []
    edges.append(x)
    edges.append(y)
    return torch.tensor(edges, dtype=torch.long).to(device)
Пример #13
0
def GetFeatures() -> np.matrix:
    features_ndarray = OpFile.ReadFeatures()
    features_matrix = features_ndarray.reshape(
        (len(features_ndarray), CONFIG.FeatureLen()))
    return features_matrix
Пример #14
0
 def __init__(self):
     super(GCNClassifier, self).__init__()  # 调用父类初始化
     self.conv1 = GCNConv(CONFIG.FeatureLen(), 180)
     self.conv2 = GCNConv(180, 120)
     # self.conv3 = GCNConv(100, CONFIG.ClassNum())
     self.lin1 = torch.nn.Linear(120, CONFIG.ClassNum())
Пример #15
0
 def __init__(self, drop=0.5, conv1_hide=100, leaky=0.01):
     super(RPGGCN, self).__init__()  # 调用父类
     self.drop = drop    # 在dropout操作中,元素被置为0的概率
     self.leaky = leaky
     self.conv1 = GCNConv(CONFIG.FeatureLen(), conv1_hide)
     self.conv2 = GCNConv(conv1_hide, CONFIG.ClassNum())
Пример #16
0
    attack_model = Attacker().to(device)
    attack_model.load_state_dict(gcn_model.state_dict())
    # attack_dict = attack_model.state_dict()
    # state_dict = {k: v for k, v in gcn_model.items() if k in attack_model.keys()}
    # print(state_dict.keys())
    # attack_dict.update(state_dict)
    # attack_model.load_state_dict(attack_dict)
    print(">>>>> Load Model Finish")

    # 冻结Attack模型中的参数
    attack_model.freeze()
    print(">>>>> Freeze all parameters in Attacker")

    # 这里得到所有的target的label
    _, pred = attack_model(data).max(dim=1)
    target_labels = pred[CONFIG.TargetBegin():CONFIG.TargetEnd()]
    print(target_labels)

    # 将需要添加的features载入模型
    add_features = torch.zeros((500, 100), requires_grad=False).to(device)
    attack_model.loadX(add_features)
    # 将需要添加的邻接矩阵edges载入模型
    add_edges = init_add_edges()
    # add_edges = add_edges_by_cluster()
    # add_edges = add_edges_by_class(target_labels)
    attack_model.loadEdges(add_edges)

    # 下面是attack的过程
    while True:
        attack_model.zero_grad()
        attack_model.loadX(add_features)
class Image():
    DIR = CONFIG.fetch(r"dir")[0]
    formats = CONFIG.fetch(r'formats')
    chunks = []
    lowerBorder = int(CONFIG.fetch('Lborder')[0])
    OutDir = CONFIG.fetch(r"outdir")[0]
    prefix = CONFIG.fetch(r"prefix")[0]
    try:
        old = CONFIG.fetch('old_data')[0]
    except:
        old = False

    def __init__(self):
        self.images = []
        for format in self.formats:
            for fl in glob.glob(f'{self.DIR}\\*.{format}'):
                self.images.append(fl)

        self.images.sort()
        self.workonid = self.images[0]

        self.objDict = {}
        for fl in self.images:
            self.objDict[fl] = []

        if self.old:
            data = CONFIG.readOld(self.old)
            for el in data:
                bitz = CONFIG.splitbyfour(el[0][2:])

                for bit in bitz:
                    II = []
                    for I in bit:
                        II.append(int(I))
                    print(II)
                    self.objDict[el[0][0]].append(II)

        cv2.imshow('tool_1', self.grabImg())
        cv2.setMouseCallback('tool_1', self.onmouse)

    def grabImg(self):
        canvas = cv2.imread(self.workonid)
        for obj in self.objDict[self.workonid]:
            x, y, w, h = obj
            if w < self.lowerBorder or h < self.lowerBorder:
                cv2.rectangle(canvas, (x, y), (x + w, y + h), (0, 0, 255), 4)
            else:
                cv2.rectangle(canvas, (x, y), (x + w, y + h), (0, 255, 0), 4)
        return canvas.copy()

    def nextImage(self):
        cur = self.images.index(self.workonid)
        if cur + 1 > len(self.images) - 1:
            self.workonid = self.images[0]
        else:
            self.workonid = self.images[cur + 1]

    def prevImage(self):
        print('called')
        cur = self.images.index(self.workonid)
        if cur - 1 < 0:
            self.workonid = self.images[len(self.images) - 1]
        else:
            self.workonid = self.images[cur - 1]
        print(self.workonid)

    def grabRoi(self):
        roi = cv2.selectROI('tool_1', self.grabImg(), True)
        cv2.setMouseCallback('tool_1', self.onmouse)
        x, y, w, h = roi
        if h * w != 0:
            self.objDict[self.workonid].append([x, y, w, h])

    def onmouse(self, k, x, y, s, p):
        if k == cv2.EVENT_LBUTTONDBLCLK:
            print(x, y)
            self.rm((x, y))

    def rm(self, tup):
        x, y = tup
        for obj in self.objDict[self.workonid]:
            print(CONFIG.isInside(x, y, obj))
            if CONFIG.isInside(x, y, obj):
                print(self.objDict[self.workonid].index(obj))
                self.objDict[self.workonid].pop(
                    self.objDict[self.workonid].index(obj))

    def writeUp(self):
        with open(
                f'{self.OutDir}\\{self.prefix}{CONFIG.getfreename(self.OutDir,self.prefix)}.txt',
                'a') as fl:
            for k, v in self.objDict.items():
                print(len(v))
                if len(v) > 0:
                    line = np.concatenate(v.copy())
                    print(f'{k} {int(len(line)/4)} {CONFIG.prettyfy(line)}',
                          file=fl)

    def run(self):
        while 1:
            try:
                cv2.imshow('tool_1', self.grabImg())

                key = cv2.waitKey(1)
                if key == 27:
                    self.writeUp()
                    break

                if key == ord('q'):
                    self.nextImage()

                if key == ord('e'):
                    self.prevImage()

                if key == ord('w'):
                    self.grabRoi()

                if key == ord('s'):
                    self.writeUp()

            except Exception as e:
                with open(f'{self.OutDir}\\logz.txt', 'a') as logz:
                    print(f'exception boiz, its-a {e} \n')
                    quit()
Пример #18
0
def add_edges_by_class(labels: torch.tensor) -> torch.tensor:
    labels_cpu = labels.cpu()
    labels_list = labels_cpu.numpy().tolist()
    cut = [[]]

    # class_divide用于将索引划入不同的类别
    class_divide = []
    for i in range(0, CONFIG.ClassNum()):
        class_divide.append([])

    for i in range(0, len(labels_list)):
        class_divide[labels_list[i]].append(i)

    remain_data = []
    cut_i = 0

    for i in range(0, CONFIG.ClassNum()):
        # 用于遍历cluster
        for j in range(0, len(class_divide[i])):
            if j < int(len(class_divide[i]) / 100) * 100:
                cut[cut_i].append(class_divide[i][j])
                if len(cut[cut_i]) is 100:
                    cut_i += 1
                    cut.append([])
            else:
                # print(j)
                remain_data.append(class_divide[i][j])
    for r in remain_data:
        cut[cut_i].append(r)
        # print(len(cut))
        if (len(cut[cut_i]) is 100) and (len(cut) != 500):
            cut_i += 1
            cut.append([])
    # 此时,cut里面就是分割好的对照关系

    x = []
    y = []

    coo_x = []
    coo_y = []
    data = []

    for i in range(0, 500):
        for j in range(0, 100):
            src = CONFIG.TargetEnd() + i
            dst = CONFIG.TargetBegin() + cut[i][j]
            assert (src >= CONFIG.TargetEnd()) and (src <
                                                    CONFIG.TargetEnd() + 500)
            assert (dst >= CONFIG.TargetBegin()) and (dst < CONFIG.TargetEnd())
            x.append(src)
            y.append(dst)
            x.append(dst)
            y.append(src)

            coo_x.append(src - CONFIG.TargetEnd())
            coo_y.append(dst)
            data.append(1)

    # for i in range(CONFIG.TargetBegin(), CONFIG.TargetEnd()):
    #     if i in coo_y:
    #         continue
    #     else:
    #         print("ERROR!!!", i)
    # print("OK!")

    # 保存邻接矩阵形式:
    adj = scipy.sparse.coo_matrix((data, (coo_x, coo_y)), shape=(500, 593986))
    adj = adj.tocsr()
    # print(adj)
    with open("adj.pkl", 'wb') as f:  # 将数据写入pkl文件
        pickle.dump(adj, f)

    # 将其化为tensor
    edges = []
    edges.append(x)
    edges.append(y)
    # print(edges)
    return torch.tensor(edges, dtype=torch.long).to(device)