Exemplo n.º 1
0
 def __init__(self):
     self.config = Config()
     self.reader = Reader()
     self.layer = self.setup_layer()
     self.loss = self.setup_loss()
     self.metrics = self.setup_metrics()
     self.merged = tf.summary.merge_all()
     self.train_writer = tf.summary.FileWriter(self.config.root + '/event_detect/summary/cnn')
Exemplo n.º 2
0
 def __init__(self, g, saveFile, init_emb_file=None):
     self.config = Config()
     self.g = g
     self.init_emb_file = init_emb_file
     self.saveFile = saveFile
     self.reader = Reader(self.g)
     self.layer = self.setup_layer()
     self.train_op = self.setup_train_op()
     self.test_metrics = self.get_test_metrics()
     self.loss_train_merged = tf.summary.merge(tf.get_collection('loss_train_summary'))
     self.test_merged = tf.summary.merge(tf.get_collection('multi_label_classification'))
     self.train_writer = tf.summary.FileWriter('model/summary/{}'.format(self.saveFile))
Exemplo n.º 3
0
 def __init__(self):
     self.config = Config()
     self.step_size = self.config.cblstm_step_size
     self.reader = Reader()
     self.layer = self.setup_layer()
     self.loss = self.setup_loss()
     self.train_op = self.setup_train_op()
     self.train_metrics = self.setup_metrics(True)
     self.test_metrics = self.setup_metrics(False)
     self.train_merged = tf.summary.merge(
         tf.get_collection('train_summary'))
     self.train_metrics_merged = tf.summary.merge(
         tf.get_collection('train_metrics_summary'))
     self.test_metrics_merged = tf.summary.merge(
         tf.get_collection('test_metrics_summary'))
     self.train_writer = tf.summary.FileWriter('summary/cblstm')
Exemplo n.º 4
0
def load_model():
    #
    cropper = Cropper()
    #
    detector = Detector(config.DETECTOR_CFG, config.DETECTOR_WEIGHT, config.DETECTOR_LABELS)
    #
    reader_config = Cfg.load_config_from_file(config.READER_CFG)
    reader_config['weights'] = config.READER_WEIGHT
    reader_config['device'] = config.DEVICE
    reader = Reader(reader_config)
    #
    return cropper, detector, reader
Exemplo n.º 5
0
def test():
    path = DEFAULT_PATH
    logging.basicConfig(filename='run.log', filemode='w', level=logging.DEBUG)
    logger = logging.getLogger('packet_extractor')

    parser = argparse.ArgumentParser()

    parser.add_argument('-v',
                        '--verbose',
                        help='increase output verbosity',
                        action='store_true')

    parser.add_argument('-p', '--port', help='port on which esp is running')

    args = parser.parse_args()

    if args.verbose:
        logger.setLevel(logging.DEBUG)
        logger.debug('Verbose mode activated')

    if args.port:
        path = args.port

    reader = Reader(path=path)
    reader.open_reader()

    logger.info("Starting read")
    with open("trace.txt", 'w') as tracer:
        while 1:
            try:
                data = reader.get_data()
                print(data.decode("utf-8"))
                tracer.write(data.decode("utf-8"))
            except KeyboardInterrupt:
                logging.error("Keyboard quit")
                break

            except IndexError:
                tracer.flush()
                logging.error("Logger error")
                pass

            except serial.serialutil.SerialException as err:
                tracer.flush()
                logger.error("Serial exception")
                logger.error(err)
                pass

    reader.close_reader()
    logging.info('Closing read')
Exemplo n.º 6
0
def main(model_name, new_scan=False, preprocess=True):
    reader = Reader()
    config = Config()

    reader.aftername.sort()

    if new_scan == True:
        print('start new scan!')
        file_list = reader.aftername

        start_point = 0
    else:
        with open('detect_result/' + model_name + '/checkpoint') as file:
            start_point = int(file.readline())
            file_list = reader.aftername[start_point:]
            print('restart from {}'.format(file_list[0]))

    if model_name == 'cnn':
        from event_detect.cnn import CNN
        import tensorflow as tf
        from tflib.models import Model

        model = CNN()
        sess = tf.Session()
        saver, global_step = Model.continue_previous_session(
            sess, model_file='cnn', ckpt_file='saver/cnn/checkpoint')

    if model_name == 'cldnn':
        from event_detect.cldnn import CLDNN
        import tensorflow as tf
        from tflib.models import Model

        model = CLDNN()
        sess = tf.Session()
        saver, global_step = Model.continue_previous_session(
            sess, model_file='cldnn', ckpt_file='saver/cldnn/checkpoint')

    for file in file_list:
        begin = datetime.datetime.now()
        traces = obspy.read(file[0])
        traces = traces + obspy.read(file[1])
        traces = traces + obspy.read(file[2])

        if not (traces[0].stats.starttime == traces[1].stats.starttime
                and traces[0].stats.starttime == traces[2].stats.starttime):
            starttime = max([
                traces[0].stats.starttime, traces[1].stats.starttime,
                traces[2].stats.starttime
            ])
            for j in range(3):
                traces[j] = traces[j].slice(starttime=starttime)

        if not (traces[0].stats.endtime == traces[1].stats.endtime
                and traces[0].stats.endtime == traces[2].stats.endtime):
            endtime = min([
                traces[0].stats.endtime, traces[1].stats.endtime,
                traces[2].stats.endtime
            ])
            for j in range(3):
                traces[j] = traces[j].slice(endtime=endtime)

        start_flag = -1
        end_flag = -1
        event_list = []

        for windowed_st in traces.slide(window_length=(config.winsize - 1) /
                                        100.0,
                                        step=config.winlag / 100.0):
            data_input = []
            for j in range(3):
                data_input.append(windowed_st[j].data)

            if model_name == 'cnn':
                # raw_data = [data_preprocess(d, 'bandpass', False) for d in data_input]
                data_input = np.array(data_input).T
                if preprocess:
                    # data_input = sklearn.preprocessing.minmax_scale(data_input)

                    data_mean = np.mean(data_input, axis=0)
                    data_input = np.absolute(data_input - data_mean)
                    data_input = data_input / (np.max(data_input, axis=0) +
                                               np.array([1, 1, 1]))
                data_input = np.array([np.array([data_input])])
            elif model_name == 'cldnn':
                # raw_data = [data_preprocess(d, 'bandpass', False) for d in data_input]

                data_input = [data_preprocess(d) for d in data_input]
                data_input = np.array(data_input).T
                data_input = np.array([data_input])

            class_pred, confidence = model.classify(sess=sess,
                                                    input_=data_input)
            if class_pred == 1:

                # plt.subplot(3, 1, 1)
                # plt.plot(raw_data[0])
                # plt.subplot(3, 1, 2)
                # plt.plot(raw_data[1])
                # plt.subplot(3, 1, 3)
                # plt.plot(raw_data[2])
                # plt.show()

                if start_flag == -1:
                    start_flag = windowed_st[0].stats.starttime
                    end_flag = windowed_st[0].stats.endtime
                else:
                    end_flag = windowed_st[0].stats.endtime

            if class_pred == 0 and start_flag != -1 and end_flag < windowed_st[
                    0].stats.starttime:
                event = [
                    file[0].split('\\')[-1][:-4], start_flag, end_flag,
                    confidence
                ]

                # print(event)

                event_list.append(event)
                start_flag = -1
                end_flag = -1

        if len(event_list) != 0:
            with open('detect_result/' + model_name + '/events_test.csv',
                      mode='a',
                      newline='') as f:
                csvwriter = csv.writer(f)
                for event in event_list:
                    csvwriter.writerow(event)
                f.close()

        start_point += 1
        with open('detect_result/' + model_name + '/checkpoint',
                  mode='w') as f:
            f.write(str(start_point))
            end = datetime.datetime.now()
            print('{} scanned, num {}, time {}.'.format(
                file[0].split('\\')[-1][:-4], start_point, end - begin))
            print('checkpoint saved.')
Exemplo n.º 7
0
            for ent in labels:
                start_dic[(ent[0], ent[2])].append(ent)
                end_dic[(ent[1], ent[2])].append(ent)
                all_num += 1
            for k, v in start_dic.items():
                if len(v) > 1:
                    start_num += len(v)
            for k, v in end_dic.items():
                if len(v) > 1:
                    end_num += len(v)

    print("All {}, start {}, end {}".format(all_num, start_num, end_num))


if __name__ == "__main__":
    reader = Reader()
    reader.read_and_gen_vectors_glove(config.embed_path)
    reader.read_all_data("./data/ace2005/", "ace2005.train", "ace2005.dev",
                         "ace2005.test")

    # print reader.train_sents[0]
    train_batches, dev_batches, test_batches = reader.to_batch(
        config.batch_size)
    f = open(config.train_data_path, 'wb')
    pickle.dump(train_batches, f)
    f.close()

    f = open(config.dev_data_path, 'wb')
    pickle.dump(dev_batches, f)
    f.close()
Exemplo n.º 8
0
            for ent in labels:
                start_dic[(ent[0], ent[2])].append(ent)
                end_dic[(ent[1], ent[2])].append(ent)
                all_num += 1
            for k, v in start_dic.items():
                if len(v) > 1:
                    start_num += len(v)
            for k, v in end_dic.items():
                if len(v) > 1:
                    end_num += len(v)

    print("All {}, start {}, end {}".format(all_num, start_num, end_num))


if __name__ == "__main__":
    reader = Reader()
    reader.read_all_data("./data/ace2005/", "ace2005.train", "ace2005.dev",
                         "ace2005.test")

    # print reader.train_sents[0]
    train_batches, dev_batches, test_batches = reader.to_batch(
        config.batch_size)
    f = open(config.train_data_path, 'wb')
    pickle.dump(train_batches, f)
    f.close()

    f = open(config.dev_data_path, 'wb')
    pickle.dump(dev_batches, f)
    f.close()

    f = open(config.test_data_path, 'wb')
Exemplo n.º 9
0
            for ent in labels:
                start_dic[(ent[0], ent[2])].append(ent)
                end_dic[(ent[1], ent[2])].append(ent)
                all_num += 1
            for k, v in start_dic.items():
                if len(v) > 1:
                    start_num += len(v)
            for k, v in end_dic.items():
                if len(v) > 1:
                    end_num += len(v)

    print("All {}, start {}, end {}".format(all_num, start_num, end_num))


if __name__ == "__main__":
    reader = Reader()
    #reader.read_and_gen_vectors_pubmed_word2vec(config.embed_path)
    reader.read_all_data("./data/genia/", "genia.train", "genia.dev", "genia.test")

    # print reader.train_sents[0]
    train_batches, dev_batches, test_batches = reader.to_batch(config.batch_size)
    f = open(config.train_data_path, 'wb')
    pickle.dump(train_batches, f)
    f.close()

    f = open(config.dev_data_path, 'wb')
    pickle.dump(dev_batches, f)
    f.close()

    f = open(config.test_data_path, 'wb')
    pickle.dump(test_batches, f)
Exemplo n.º 10
0
                    start_num += len(v)
            for k, v in end_dic.items():
                if len(v) > 1:
                    end_num += len(v)

    print("All {}, start {}, end {}".format(all_num, start_num, end_num))


if __name__ == "__main__":
    tokenizer_dir = "tokenization/polish-roberta-large/"
    tokenizer = SentencePieceBPETokenizer(f"{tokenizer_dir}/vocab.json",
                                          f"{tokenizer_dir}/merges.txt")
    getattr(tokenizer,
            "_tokenizer").post_processor = RobertaProcessing(sep=("</s>", 2),
                                                             cls=("<s>", 0))
    reader = Reader("polish", tokenizer, cls="<s>", sep="</s>", threshold=8)
    reader.read_all_data("./data/poleval/", "poleval.train", "poleval.dev",
                         "poleval.test")

    # print reader.train_sents[0]
    train_batches, dev_batches, test_batches = reader.to_batch(
        config.batch_size)
    f = open(config.train_data_path, 'wb')
    pickle.dump(train_batches, f)
    f.close()

    f = open(config.dev_data_path, 'wb')
    pickle.dump(dev_batches, f)
    f.close()

    f = open(config.test_data_path, 'wb')
Exemplo n.º 11
0
def main(model_name, new_scan=False, preprocess=True):
    reader = Reader()
    config = Config()
    plot = config.plot
    bandpass = config.bandpass
    resample = config.resample

    confidence0=[]
    plot_num = 0
    reader.aftername.sort()

    if new_scan == True:
        print('start new scan!')
        file_list = reader.aftername

        start_point = 0
    else:
        with open(config.root + '/event_detect/detect_result/' + model_name + '/checkpoint') as file:
            start_point = int(file.readline())
            file_list = reader.aftername[start_point:]
            print('restart from {}'.format(file_list[0]))

    if model_name == 'cnn':
        from cnn import CNN
        import tensorflow as tf
        from tflib.models import Model

        model = CNN()
        sess = tf.Session(config=tf.ConfigProto(device_count={"CPU":20},inter_op_parallelism_threads=0,intra_op_parallelism_threads=0))
        saver, global_step = Model.continue_previous_session(sess,
                                                             model_file='cnn',
                                                             ckpt_file=config.root + '/event_detect/saver/cnn/checkpoint')
    file_list_len = len(file_list)
    # print(file_list_len)

    try:
        os.system('rm -rf %s/event_detect/detect_result/png/*'%config.root)
        os.system('rm -rf %s/event_detect/detect_result/cnn/*.csv'%config.root)
    except:
        pass

    for file in file_list:
        file=np.array(file)
        #print(file)
        #file=file.T
        #np.random.shuffle(file)  #random
        #file=file.T

        #print(file,'\n')
        begin = datetime.datetime.now()
        if plot:
            plot_traces = obspy.read(file[2][0]) #Z component
        sta_num = len(file[0])
        trace_len = []

        for i in range(3):
            for j in range(sta_num):
                trace_len.append(obspy.read(file[i][j])[0].stats.npts)
        max_len = max(trace_len)

        for i in range(3):
            for j in range(sta_num):        # station number
                each_tr = obspy.read(file[i][j])
                if each_tr[0].stats.npts < max_len:
                    zero = np.zeros(max_len-each_tr[0].stats.npts)
                    each_tr[0].data = np.concatenate([each_tr[0].data,zero])
                if i==j==0:
                    traces = each_tr
                else:
                    traces=traces + each_tr
                if i == 2:
                    if j == 0:
                        pass
                    else:
                        plot_traces = plot_traces + each_tr

        if plot:
            if resample:
                plot_traces = plot_traces.resample(sampling_rate=resample)
            plot_traces = plot_traces.filter('bandpass',freqmin=bandpass[0],freqmax=bandpass[1],corners=4,zerophase=True)
        
        if resample:
            traces = traces.resample(sampling_rate=resample)
        traces = traces.filter('bandpass',freqmin=bandpass[0],freqmax=bandpass[1],corners=4,zerophase=True)
        starttime = traces[0].stats.starttime;
        endtime = traces[0].stats.endtime;
        #print(traces)

        start_flag = -1
        end_flag = -1
        event_list = []
        confidence_total=[]
        start_total=[]
        end_total=[]
        samples_trace= 1.0/traces[0].stats.delta;
        npts = traces[0].stats.npts


        for windowed_st in traces.slide(window_length=(config.winsize-1)/samples_trace,
                                        step=config.winlag / samples_trace):
            data_input = [[],[],[]]

            for j in range(sta_num):
                if len(windowed_st[j].data) < config.winsize:
                    windowed_st[j].data = np.concatenate([windowed_st[j].data,np.zeros(config.winsize-len(windowed_st[j].data))])
                data_input[0].append(windowed_st[j].data[:config.winsize])
            for j in range(sta_num,2*sta_num):
                if len(windowed_st[j].data) < config.winsize:
                    windowed_st[j].data = np.concatenate([windowed_st[j].data,np.zeros(config.winsize-len(windowed_st[j].data))])
                data_input[1].append(windowed_st[j].data[:config.winsize])
            for j in range(2*sta_num,3*sta_num):
                if len(windowed_st[j].data) < config.winsize:
                    windowed_st[j].data = np.concatenate([windowed_st[j].data,np.zeros(config.winsize-len(windowed_st[j].data))])
                data_input[2].append(windowed_st[j].data[:config.winsize])

            if model_name == 'cnn':

                if preprocess:
                    for i in range(3):
                        for j in range(sta_num):
                            data_input[i][j] = data_preprocess(data_input[i][j])

                data_input=np.array(data_input)

                if len(data_input[0][0])<config.winsize:
                    concat = np.zeros([3, sta_num, config.winsize - len(data_input[0][0])])
                    data_input=np.concatenate([data_input,concat],axis=2)

                if len(data_input[0][0])>config.winsize:
                    data_input=data_input[:, :, :config.winsize]

                data_input=data_input.transpose((1,2,0))
                data_input = np.array([data_input])
                #print(event_list)

            class_pred, confidence = model.classify(sess=sess, input_=data_input)
            confidence0.append(confidence)

            print(class_pred,confidence)
            if class_pred == 1:
                confidence_total.append(confidence)
                start_total.append(windowed_st[0].stats.starttime)
                end_total.append(windowed_st[0].stats.endtime)

                if start_flag == -1:
                    start_flag = windowed_st[0].stats.starttime
                    end_flag = windowed_st[0].stats.endtime
                else:
                    end_flag = windowed_st[0].stats.endtime
            print(class_pred,start_flag,end_flag,windowed_st[0].stats.starttime)

            if class_pred == 0 and start_flag != -1:  #end_flag < windowed_st[0].stats.starttime:

                confidence = np.max(confidence_total)
                for j in range(len(confidence_total)):
                    if confidence == confidence_total[j]:
                        break
                start_local = start_total[j]
                end_local = end_total[j]
                a=True


                # event = [file[0][0].split('/')[-2], start_flag, end_flag,
                #          confidence, start_local, end_local]
                event = [file[0][0].split('/')[-2], start_flag, end_flag, confidence]

                confidence_total=[]
                start_total = []
                end_total = []

                if plot:
                    plot_num = int(plot_num + 1)
                    name = config.root + '/event_detect/detect_result/png/' \
                           + str(plot_num) + '_' + str(confidence) + '.png'
                    plot_traces.plot(starttime=start_flag, endtime=end_flag, size=(800, 800),
                                    automerge=False, equal_scale=False, linewidth=0.8, outfile=name)

                # print(event)

                event_list.append(event)
                #print(event_list)

                start_flag = -1
                end_flag = -1

            if class_pred == 1 and end_flag+config.winlag / samples_trace>=endtime:
                confidence = np.max(confidence_total)
                for j in range(len(confidence_total)):
                    if confidence == confidence_total[j]:
                        break
                start_local = start_total[j]
                end_local = end_total[j]

                if plot:
                    plot_num = int(plot_num + 1)
                    name = config.root + '/event_detect/detect_result/png/' \
                           + str(plot_num) + '_' + str(confidence) + '.png'
                    plot_traces.plot(starttime=start_flag, endtime=endtime, size=(800, 800),
                                     automerge=False, equal_scale=False, linewidth=0.8, outfile=name)

                # event = [file[0][0].split('/')[-2], start_flag, endtime,
                #          confidence, start_total, end_total]
                event = [file[0][0].split('/')[-2], start_flag, endtime, confidence]

                event_list.append(event)
                start_flag = -1
                end_flag = -1

        if len(event_list) != 0:
            with open(config.root + '/event_detect/detect_result/' + model_name + '/events_list.csv', mode='a', newline='') as f:
                csvwriter = csv.writer(f)
                for event in event_list:
                    csvwriter.writerow(event)
                f.close()

        start_point += 1
        with open(config.root + '/event_detect/detect_result/' + model_name + '/checkpoint', mode='w') as f:
            f.write(str(start_point))
            end = datetime.datetime.now()
            print('{} scanned, num {}, time {}.'.format(file[0][0].split('/')[-2], start_point, end - begin))
            print('checkpoint saved.')
Exemplo n.º 12
0
            for ent in labels:
                start_dic[(ent[0], ent[2])].append(ent)
                end_dic[(ent[1], ent[2])].append(ent)
                all_num += 1
            for k, v in start_dic.items():
                if len(v) > 1:
                    start_num += len(v)
            for k, v in end_dic.items():
                if len(v) > 1:
                    end_num += len(v)

    print("All {}, start {}, end {}".format(all_num, start_num, end_num))


if __name__ == "__main__":
    reader = Reader()
    reader.read_and_gen_vectors_pubmed_word2vec(config.embed_path)
    reader.read_all_data("./data/genia_sample/", "train.data", "dev.data", "test.data")

    # print reader.train_sents[0]
    train_batches, dev_batches, test_batches = reader.to_batch(config.batch_size)
    f = open(config.train_data_path, 'wb')
    pickle.dump(train_batches, f)
    f.close()

    f = open(config.dev_data_path, 'wb')
    pickle.dump(dev_batches, f)
    f.close()

    f = open(config.test_data_path, 'wb')
    pickle.dump(test_batches, f)
Exemplo n.º 13
0
class CNN(object):
    def __init__(self):
        self.config = Config()
        self.reader = Reader()
        self.layer = self.setup_layer()
        self.loss = self.setup_loss()
        self.metrics = self.setup_metrics()
        self.merged = tf.summary.merge_all()
        self.train_writer = tf.summary.FileWriter(self.config.root +
                                                  '/event_detect/summary/cnn')

    def setup_layer(self):
        layer = dict()
        layer['target'] = tf.placeholder(tf.int32, shape=[None], name='target')
        layer['input'] = tf.placeholder(
            tf.float32,
            shape=[None, None, self.config.winsize, 3],
            name='input')
        layer['conv1'] = layers.conv(
            layer['input'],
            filter=[3, 3, 3, 8],
            # strides=[1, 1, 4, 1],
            strides=[1, 1, 1, 1],
            padding='SAME',
            wd=0.001,
            bias=0.0,
            name='conv1')
        layer['pooling1'] = layers.pool(layer['conv1'],
                                        ksize=[1, 1, 3, 1],
                                        strides=[1, 1, 3, 1],
                                        padding='SAME',
                                        pool_func=tf.nn.max_pool,
                                        name='pooling1')
        layer['conv2'] = layers.conv(
            layer['pooling1'],
            filter=[3, 3, 8, 16],
            # strides=[1, 1, 4, 1],
            strides=[1, 1, 1, 1],
            padding='SAME',
            wd=0.001,
            bias=0.0,
            name='conv2')
        layer['pooling2'] = layers.pool(layer['conv2'],
                                        ksize=[1, 1, 3, 1],
                                        strides=[1, 1, 3, 1],
                                        padding='SAME',
                                        pool_func=tf.nn.max_pool,
                                        name='pooling2')
        layer['conv3'] = layers.conv(
            layer['pooling2'],
            filter=[3, 3, 16, 32],
            # strides=[1, 1, 4, 1],
            strides=[1, 1, 1, 1],
            padding='VALID',
            wd=0.001,
            bias=0.0,
            name='conv3')
        layer['pooling3'] = layers.pool(layer['conv3'],
                                        ksize=[1, 1, 3, 1],
                                        strides=[1, 1, 3, 1],
                                        padding='SAME',
                                        pool_func=tf.nn.max_pool,
                                        name='pooling3')
        layer['conv4'] = layers.conv(
            layer['pooling3'],
            filter=[1, 3, 32, 32],
            # strides=[1, 1, 4, 1],
            strides=[1, 1, 1, 1],
            padding='SAME',
            wd=0.001,
            bias=0.0,
            name='conv4')
        layer['pooling4'] = layers.pool(layer['conv4'],
                                        ksize=[1, 1, 3, 1],
                                        strides=[1, 1, 3, 1],
                                        padding='SAME',
                                        pool_func=tf.nn.max_pool,
                                        name='pooling4')
        layer['conv5'] = layers.conv(
            layer['pooling4'],
            filter=[1, 3, 32, 64],
            # strides=[1, 1, 4, 1],
            strides=[1, 1, 1, 1],
            padding='SAME',
            wd=0.001,
            bias=0.0,
            name='conv5')
        layer['pooling5'] = layers.pool(layer['conv5'],
                                        ksize=[1, 1, 3, 1],
                                        strides=[1, 1, 3, 1],
                                        padding='SAME',
                                        pool_func=tf.nn.max_pool,
                                        name='pooling5')
        layer['conv6'] = layers.conv(
            layer['pooling5'],
            filter=[1, 9, 64, 128],
            # strides=[1, 1, 4, 1],
            strides=[1, 1, 1, 1],
            padding='VALID',
            wd=0.001,
            bias=0.0,
            name='conv6')
        layer['conv7'] = layers.conv(
            layer['conv6'],
            filter=[1, 1, 128, 128],
            # strides=[1, 1, 4, 1],
            strides=[1, 1, 1, 1],
            padding='SAME',
            wd=0.001,
            bias=0.0,
            name='conv7')
        layer['conv8'] = layers.conv(
            layer['conv7'],
            filter=[1, 1, 128, 2],
            # strides=[1, 1, 4, 1],
            strides=[1, 1, 1, 1],
            padding='SAME',
            wd=0.001,
            bias=0.0,
            name='conv8')

        layer['unfold'] = layers.unfold(layer['conv8'], name='unfold')
        #layer['logits'] = tf.reduce_mean(layer['unfold'], 1, name='logits')
        layer['class_prob'] = tf.nn.softmax(layer['unfold'], name='class_prob')
        layer['class_prediction'] = tf.argmax(layer['class_prob'],
                                              1,
                                              name='class_pred')

        return layer

    def setup_loss(self):
        with tf.name_scope('loss'):
            raw_loss = tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=self.layer['unfold'], labels=self.layer['target']))

            tf.summary.scalar('raw_loss', raw_loss)  #tensorboard可视化
            tf.add_to_collection('losses', raw_loss)  #将raw_loss添加到losses中
            loss = tf.add_n(tf.get_collection('losses'),
                            name='total_loss')  #所有元素相加
            tf.summary.scalar('total_loss', loss)  #tensorboard可视化

        return loss

    def setup_metrics(self):
        metrics = dict()
        with tf.variable_scope('metrics'):
            metrics['accuracy'] = tf.metrics.accuracy(
                labels=self.layer['target'],
                predictions=self.layer['class_prediction'],
                name='accuracy')[1]
            tf.summary.scalar('accuracy', metrics['accuracy'])
            metrics['recall'] = tf.metrics.recall(
                labels=self.layer['target'],
                predictions=self.layer['class_prediction'],
                name='recall')[1]
            tf.summary.scalar('recall', metrics['recall'])
            metrics['precision'] = tf.metrics.precision(
                labels=self.layer['target'],
                predictions=self.layer['class_prediction'],
                name='precision')[1]
            tf.summary.scalar('precision', metrics['precision'])
        return metrics

    def train(self, passes, new_training=True):
        with tf.Session() as sess:
            global_step = tf.Variable(0, trainable=False)
            #learning_rate = tf.train.exponential_decay(0.001, global_step, 200, 0.8, staircase=True)
            training = tf.train.AdamOptimizer(
                self.config.learning_rate).minimize(self.loss)

            if new_training:
                saver, global_step = Model.start_new_session(sess)
            else:
                saver, global_step = Model.continue_previous_session(
                    sess,
                    model_file='cnn',
                    ckpt_file=self.config.root +
                    '/event_detect/saver/cnn/checkpoint')

            sess.run(tf.local_variables_initializer())
            self.train_writer.add_graph(sess.graph, global_step=global_step)

            test_restlt = []

            for step in range(1 + global_step, 1 + passes + global_step):
                input, target = self.reader.get_cnn_batch_data('train')
                #print(input.shape)
                summary, _, acc = sess.run(
                    [self.merged, training, self.metrics['accuracy']],
                    feed_dict={
                        self.layer['input']: input,
                        self.layer['target']: target
                    })
                self.train_writer.add_summary(summary, step)

                if step % 10 == 0:
                    loss = sess.run(self.loss,
                                    feed_dict={
                                        self.layer['input']: input,
                                        self.layer['target']: target
                                    })
                    test_restlt.append(loss)

                    print(
                        "gobal_step {}, training_loss {}, accuracy {}".format(
                            step, loss, acc))

                if step % 100 == 0:
                    test_x, text_y = self.reader.get_cnn_batch_data('test')
                    acc, recall, precision = sess.run([
                        self.metrics['accuracy'], self.metrics['recall'],
                        self.metrics['precision']
                    ],
                                                      feed_dict={
                                                          self.layer['input']:
                                                          test_x,
                                                          self.layer['target']:
                                                          text_y
                                                      })

                    print("test: accuracy {}, recall {}, precision {}".format(
                        acc, recall, precision))
                    saver.save(sess,
                               self.config.root +
                               '/event_detect/saver/cnn/cnn',
                               global_step=step)
                    print('checkpoint saved')
                    #print(sess.run([self.layer['class_prob']], feed_dict={self.layer['input']: input}))

            print(test_restlt)

    def classify(self, sess, input_):
        class_prediction, confidence = sess.run(
            [self.layer['class_prediction'], self.layer['class_prob']],
            feed_dict={self.layer['input']: input_})
        confidence = confidence[:, 1]
        if confidence > self.config.prob:
            class_prediction = 1
        else:
            class_prediction = 0
        return class_prediction, confidence
            for ent in labels:
                start_dic[(ent[0], ent[2])].append(ent)
                end_dic[(ent[1], ent[2])].append(ent)
                all_num += 1
            for k, v in start_dic.items():
                if len(v) > 1:
                    start_num += len(v)
            for k, v in end_dic.items():
                if len(v) > 1:
                    end_num += len(v)

    print("All {}, start {}, end {}".format(all_num, start_num, end_num))


if __name__ == "__main__":
    reader = Reader(config.bert_model)
    reader.read_all_data("./data/ace2005/", "ace2005.train", "ace2005.dev",
                         "ace2005.test")

    # print reader.train_sents[0]
    train_batches, dev_batches, test_batches = reader.to_batch(
        config.batch_size)
    f = open(config.train_data_path, 'wb')
    pickle.dump(train_batches, f)
    f.close()

    f = open(config.dev_data_path, 'wb')
    pickle.dump(dev_batches, f)
    f.close()

    f = open(config.test_data_path, 'wb')
Exemplo n.º 15
0
            for ent in labels:
                start_dic[(ent[0], ent[2])].append(ent)
                end_dic[(ent[1], ent[2])].append(ent)
                all_num += 1
            for k, v in start_dic.items():
                if len(v) > 1:
                    start_num += len(v)
            for k, v in end_dic.items():
                if len(v) > 1:
                    end_num += len(v)

    print("All {}, start {}, end {}".format(all_num, start_num, end_num))


if __name__ == "__main__":
    reader = Reader(config)
    reader.read_all_data()

    # print reader.train_sents[0]
    train_batches, dev_batches, test_batches = reader.to_batch()
    f = open(config.data_path + "_train.pkl", 'wb')
    pickle.dump(train_batches, f)
    f.close()

    f = open(config.data_path + "_dev.pkl", 'wb')
    pickle.dump(dev_batches, f)
    f.close()

    f = open(config.data_path + "_test.pkl", 'wb')
    pickle.dump(test_batches, f)
    f.close()
            for ent in labels:
                start_dic[(ent[0], ent[2])].append(ent)
                end_dic[(ent[1], ent[2])].append(ent)
                all_num += 1
            for k, v in start_dic.items():
                if len(v) > 1:
                    start_num += len(v)
            for k, v in end_dic.items():
                if len(v) > 1:
                    end_num += len(v)

    print("All {}, start {}, end {}".format(all_num, start_num, end_num))


if __name__ == "__main__":
    reader = Reader(config.bert_model)
    reader.read_all_data("./data/genia_sample/", "train.data", "dev.data", "test.data")

    # print reader.train_sents[0]
    train_batches, dev_batches, test_batches = reader.to_batch(config.batch_size)
    f = open(config.train_data_path, 'wb')
    pickle.dump(train_batches, f)
    f.close()

    f = open(config.dev_data_path, 'wb')
    pickle.dump(dev_batches, f)
    f.close()

    f = open(config.test_data_path, 'wb')
    pickle.dump(test_batches, f)
    f.close()
Exemplo n.º 17
0
class CNN(object):
    def __init__(self):
        self.config = Config()
        self.reader = Reader()
        self.layer = self.setup_layer()
        self.loss = self.setup_loss()
        self.metrics = self.setup_metrics()
        self.merged = tf.summary.merge_all()
        self.train_writer = tf.summary.FileWriter('summary/cnn')

    def setup_layer(self):
        layer = dict()
        layer['target'] = tf.placeholder(tf.int32, shape=[None], name='target')
        layer['input'] = tf.placeholder(
            tf.float32, shape=[None, 1, self.config.winsize, 3], name='input')
        layer['conv1'] = layers.conv(
            layer['input'],
            filter=[1, 6, 3, 8],
            # strides=[1, 1, 4, 1],
            strides=[1, 1, 1, 1],
            padding='SAME',
            wd=1e-3,
            bias=0.0,
            name='conv1')
        layer['pooling1'] = layers.pool(layer['conv1'],
                                        ksize=[1, 1, 3, 1],
                                        strides=[1, 1, 3, 1],
                                        padding='SAME',
                                        pool_func=tf.nn.max_pool,
                                        name='pooling1')
        layer['conv2'] = layers.conv(
            layer['pooling1'],
            filter=[1, 6, 8, 16],
            # strides=[1, 1, 4, 1],
            strides=[1, 1, 1, 1],
            padding='SAME',
            wd=1e-3,
            bias=0.0,
            name='conv2')
        layer['pooling2'] = layers.pool(layer['conv2'],
                                        ksize=[1, 1, 3, 1],
                                        strides=[1, 1, 3, 1],
                                        padding='SAME',
                                        pool_func=tf.nn.max_pool,
                                        name='pooling2')
        layer['conv3'] = layers.conv(
            layer['pooling2'],
            filter=[1, 6, 16, 32],
            # strides=[1, 1, 4, 1],
            strides=[1, 1, 1, 1],
            padding='SAME',
            wd=1e-3,
            bias=0.0,
            name='conv3')
        layer['pooling3'] = layers.pool(layer['conv3'],
                                        ksize=[1, 1, 3, 1],
                                        strides=[1, 1, 3, 1],
                                        padding='SAME',
                                        pool_func=tf.nn.max_pool,
                                        name='pooling3')
        layer['conv4'] = layers.conv(
            layer['pooling3'],
            filter=[1, 6, 32, 32],
            # strides=[1, 1, 4, 1],
            strides=[1, 1, 1, 1],
            padding='SAME',
            wd=1e-3,
            bias=0.0,
            name='conv4')
        layer['pooling4'] = layers.pool(layer['conv4'],
                                        ksize=[1, 1, 3, 1],
                                        strides=[1, 1, 3, 1],
                                        padding='SAME',
                                        pool_func=tf.nn.max_pool,
                                        name='pooling4')
        layer['conv5'] = layers.conv(
            layer['pooling4'],
            filter=[1, 6, 32, 32],
            # strides=[1, 1, 4, 1],
            strides=[1, 1, 1, 1],
            padding='SAME',
            wd=1e-3,
            bias=0.0,
            name='conv5')
        layer['pooling5'] = layers.pool(layer['conv5'],
                                        ksize=[1, 1, 3, 1],
                                        strides=[1, 1, 3, 1],
                                        padding='SAME',
                                        pool_func=tf.nn.max_pool,
                                        name='pooling5')
        layer['conv6'] = layers.conv(
            layer['pooling5'],
            filter=[1, 6, 32, 32],
            # strides=[1, 1, 4, 1],
            strides=[1, 1, 1, 1],
            padding='SAME',
            wd=1e-3,
            bias=0.0,
            name='conv6')
        layer['pooling6'] = layers.pool(layer['conv6'],
                                        ksize=[1, 1, 3, 1],
                                        strides=[1, 1, 3, 1],
                                        padding='SAME',
                                        pool_func=tf.nn.max_pool,
                                        name='pooling6')
        layer['unfold'] = layers.Unfold(layer['pooling6'], name='unfold')
        layer['logits'] = layers.fc(layer['unfold'], 2, wd=1e-3, name='logits')
        layer['class_prob'] = tf.nn.softmax(layer['logits'], name='class_prob')
        layer['class_prediction'] = tf.argmax(layer['class_prob'],
                                              1,
                                              name='class_pred')

        return layer

    def setup_loss(self):
        with tf.name_scope('loss'):
            raw_loss = tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=self.layer['logits'], labels=self.layer['target']))
            tf.summary.scalar('raw_loss', raw_loss)
            tf.add_to_collection('losses', raw_loss)
            loss = tf.add_n(tf.get_collection('losses'), name='total_loss')
            tf.summary.scalar('total_loss', loss)

        return loss

    def setup_metrics(self):
        metrics = dict()
        with tf.variable_scope('metrics'):
            metrics['accuracy'] = tf.metrics.accuracy(
                labels=self.layer['target'],
                predictions=self.layer['class_prediction'],
                name='accuracy')[1]
            tf.summary.scalar('accuracy', metrics['accuracy'])
            metrics['recall'] = tf.metrics.recall(
                labels=self.layer['target'],
                predictions=self.layer['class_prediction'],
                name='recall')[1]
            tf.summary.scalar('recall', metrics['recall'])
            metrics['precision'] = tf.metrics.precision(
                labels=self.layer['target'],
                predictions=self.layer['class_prediction'],
                name='precision')[1]
            tf.summary.scalar('precision', metrics['precision'])
        return metrics

    def train(self, passes, new_training=True):
        with tf.Session() as sess:
            training = tf.train.AdamOptimizer(1e-3).minimize(self.loss)
            if new_training:
                saver, global_step = Model.start_new_session(sess)
            else:
                saver, global_step = Model.continue_previous_session(
                    sess, model_file='cnn', ckpt_file='saver/cnn/checkpoint')
            sess.run(tf.local_variables_initializer())
            self.train_writer.add_graph(sess.graph, global_step=global_step)

            for step in range(1 + global_step, 1 + passes + global_step):
                input, target = self.reader.get_cnn_batch_data('train')

                summary, _, acc = sess.run(
                    [self.merged, training, self.metrics['accuracy']],
                    feed_dict={
                        self.layer['input']: input,
                        self.layer['target']: target
                    })
                self.train_writer.add_summary(summary, step)

                if step % 10 == 0:
                    loss = sess.run(self.loss,
                                    feed_dict={
                                        self.layer['input']: input,
                                        self.layer['target']: target
                                    })
                    print(
                        "gobal_step {}, training_loss {}, accuracy {}".format(
                            step, loss, acc))

                if step % 100 == 0:
                    test_x, text_y = self.reader.get_cnn_batch_data('test')
                    acc, recall, precision = sess.run([
                        self.metrics['accuracy'], self.metrics['recall'],
                        self.metrics['precision']
                    ],
                                                      feed_dict={
                                                          self.layer['input']:
                                                          test_x,
                                                          self.layer['target']:
                                                          text_y
                                                      })
                    print("test: accuracy {}, recall {}, precision {}".format(
                        acc, recall, precision))
                    saver.save(sess, 'saver/cnn/cnn', global_step=step)
                    print('checkpoint saved')

    def classify(self, sess, input_):
        # with tf.Session() as sess:
        #     saver, global_step = Model.continue_previous_session(sess,
        #                                                          model_file='cnn',
        #                                                          ckpt_file='saver/cnn/checkpoint')
        class_prediction, confidence = sess.run(
            [self.layer['class_prediction'], self.layer['class_prob']],
            feed_dict={self.layer['input']: input_})
        confidence = confidence[:, 1]
        return class_prediction, confidence
Exemplo n.º 18
0
from reader.reader import Reader

reader = Reader("../data/cifar_100_caffe_hdf5")
X,y = reader.getTrainData()
print X
print y
Exemplo n.º 19
0
            for ent in labels:
                start_dic[(ent[0], ent[2])].append(ent)
                end_dic[(ent[1], ent[2])].append(ent)
                all_num += 1
            for k, v in start_dic.items():
                if len(v) > 1:
                    start_num += len(v)
            for k, v in end_dic.items():
                if len(v) > 1:
                    end_num += len(v)

    print("All {}, start {}, end {}".format(all_num, start_num, end_num))


if __name__ == "__main__":
    reader = Reader(config)
    reader.read_all_data()

    # print reader.train_sents[0]
    train_batches, dev_batches, test_batches = reader.to_batch()
    f = open(config.data_path + "_train.pkl", 'wb')
    pickle.dump(train_batches, f)
    f.close()

    f = open(config.data_path + "_dev.pkl", 'wb')
    pickle.dump(dev_batches, f)
    f.close()

    f = open(config.data_path + "_test.pkl", 'wb')
    pickle.dump(test_batches, f)
    f.close()
Exemplo n.º 20
0
class CBLSTM(object):
    def __init__(self):
        self.config = Config()
        self.step_size = self.config.cblstm_step_size
        self.reader = Reader()
        self.layer = self.setup_layer()
        self.loss = self.setup_loss()
        self.train_op = self.setup_train_op()
        self.train_metrics = self.setup_metrics(True)
        self.test_metrics = self.setup_metrics(False)
        self.train_merged = tf.summary.merge(
            tf.get_collection('train_summary'))
        self.train_metrics_merged = tf.summary.merge(
            tf.get_collection('train_metrics_summary'))
        self.test_metrics_merged = tf.summary.merge(
            tf.get_collection('test_metrics_summary'))
        self.train_writer = tf.summary.FileWriter('summary/cblstm')

    def data_padding_preprocess(self, data, data_type):
        step_size = self.step_size
        sequence_len = list(map(len, data))
        sequence_len = [math.ceil(i / float(step_size)) for i in sequence_len]
        max_len = max(sequence_len)
        if max_len % step_size != 0:
            max_len = math.ceil(max_len)

        if data_type == 'input':
            result = np.zeros([len(data), max_len * step_size, 3],
                              dtype=np.float32)
            for i, example in enumerate(data):
                for j, row in enumerate(example):
                    for k, val in enumerate(row):
                        result[i][j][k] = val

        elif data_type == 'targets':
            result = np.zeros([len(data), max_len], dtype=np.int32)
            for i, example in enumerate(data):
                for step, val in enumerate(example[::step_size]):
                    result[i][step] = np.max(val)

        return result, sequence_len

    def setup_layer(self):
        # LSTM_units_num = self.config.cblstm_l
        lstm_layers_num = self.config.cblstm_lstm_layer_num
        class_num = self.config.cblstm_class_num
        # batch_size = self.config.cblstm_batch_size

        layer = dict()
        input_ = tf.placeholder(tf.float32,
                                shape=[None, None, 3],
                                name='input')
        sequence_length = tf.placeholder(tf.int32,
                                         shape=[None],
                                         name='seq_len')
        targets = tf.placeholder(tf.int32, shape=[None, None], name='targets')
        keep_prob = tf.placeholder(tf.float32, name='keep_prob')

        batch_size = tf.shape(input_)[0]

        layer['input'] = input_
        layer['seq_len'] = sequence_length
        layer['targets'] = targets
        layer['keep_prob'] = keep_prob

        layer['conv1'] = layers.conv1d(layer['input'],
                                       filter=[4, 3, 8],
                                       strides=1,
                                       padding='SAME',
                                       wd=5e-5,
                                       bias=0.0,
                                       name='conv1')
        layer['pooling1'] = layers.pool1d(layer['conv1'],
                                          ksize=[2],
                                          strides=[2],
                                          padding='SAME',
                                          name='pooling1')
        layer['conv2'] = layers.conv1d(layer['pooling1'],
                                       filter=[4, 8, 16],
                                       strides=1,
                                       padding='SAME',
                                       wd=5e-5,
                                       bias=0.0,
                                       name='conv2')
        layer['pooling2'] = layers.pool1d(layer['conv2'],
                                          ksize=[2],
                                          strides=[2],
                                          padding='SAME',
                                          name='pooling2')
        layer['unfold'] = tf.reshape(layer['pooling2'], [batch_size, -1, 400])
        layer['unfold'] = tf.reshape(layer['unfold'], [-1, 400])

        layer['unfold'] = tf.nn.dropout(layer['unfold'], keep_prob)

        layer['dim_red'] = layers.fc(layer['unfold'],
                                     output_dim=100,
                                     wd=5e-5,
                                     name='dim_red')

        layer['dim_red'] = tf.reshape(layer['dim_red'], [batch_size, -1, 100])

        lstm_cell_fw1 = tf.nn.rnn_cell.LSTMCell(
            num_units=100,
            forget_bias=1.0,
            state_is_tuple=True,
            reuse=tf.get_variable_scope().reuse)
        lstm_cell_fw2 = tf.nn.rnn_cell.LSTMCell(
            num_units=100,
            num_proj=50,
            forget_bias=1.0,
            state_is_tuple=True,
            reuse=tf.get_variable_scope().reuse)
        lstm_cell_bw1 = tf.nn.rnn_cell.LSTMCell(
            num_units=100,
            forget_bias=1.0,
            state_is_tuple=True,
            reuse=tf.get_variable_scope().reuse)
        lstm_cell_bw2 = tf.nn.rnn_cell.LSTMCell(
            num_units=100,
            num_proj=50,
            forget_bias=1.0,
            state_is_tuple=True,
            reuse=tf.get_variable_scope().reuse)
        lstm_cell_fw1 = tf.nn.rnn_cell.DropoutWrapper(
            lstm_cell_fw1, output_keep_prob=keep_prob)
        lstm_cell_fw2 = tf.nn.rnn_cell.DropoutWrapper(
            lstm_cell_fw2, output_keep_prob=keep_prob)
        lstm_cell_bw1 = tf.nn.rnn_cell.DropoutWrapper(
            lstm_cell_bw1, output_keep_prob=keep_prob)
        lstm_cell_bw2 = tf.nn.rnn_cell.DropoutWrapper(
            lstm_cell_bw2, output_keep_prob=keep_prob)

        cell_fw = tf.nn.rnn_cell.MultiRNNCell([lstm_cell_fw1, lstm_cell_fw2],
                                              state_is_tuple=True)
        cell_bw = tf.nn.rnn_cell.MultiRNNCell([lstm_cell_bw1, lstm_cell_bw2],
                                              state_is_tuple=True)

        layer['dim_red'] = tf.nn.dropout(layer['dim_red'], keep_prob)

        with tf.variable_scope('bi_rnn'):
            (outputs, _) = tf.nn.bidirectional_dynamic_rnn(
                cell_fw=cell_fw,
                cell_bw=cell_bw,
                inputs=layer['dim_red'],
                sequence_length=sequence_length,
                dtype=tf.float32)
            output = tf.concat(outputs, 2)
            layer['birnn'] = tf.reshape(output, [-1, 50 * 2])

        layer['birnn'] = tf.nn.dropout(layer['birnn'], keep_prob)
        layer['fc'] = layers.fc(layer['birnn'],
                                output_dim=50,
                                wd=5e-5,
                                name='fc')

        with tf.variable_scope('softmax'):
            softmax_w = tf.get_variable(
                name='softmax_w',
                shape=[50, class_num],
                initializer=tf.truncated_normal_initializer(stddev=0.05),
                dtype=tf.float32)
            weight_decay = tf.multiply(tf.nn.l2_loss(softmax_w),
                                       5e-5,
                                       name='weight_loss')
            tf.add_to_collection('losses', weight_decay)
            softmax_b = tf.get_variable(
                name='softmax_b',
                shape=[class_num],
                initializer=tf.constant_initializer(value=0),
                dtype=tf.float32)
            xw_plus_b = tf.nn.xw_plus_b(layer['fc'], softmax_w, softmax_b)
            logits = tf.reshape(xw_plus_b, [batch_size, -1, class_num])
            layer['logits'] = logits
            class_prob = tf.nn.softmax(logits)
            layer['class_prob'] = class_prob

        layer['pred_seq'] = tf.cast(tf.argmax(class_prob, axis=2), tf.int32)
        return layer

    def setup_loss(self):
        with tf.name_scope('loss'):
            # fake_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.training_layer['logits'],
            #                                                            labels=self.training_layer['targets'])
            # mask = tf.cast(tf.sign(self.training_layer['targets']), dtype=tf.float32)
            mask = tf.sequence_mask(self.layer['seq_len'], dtype=tf.float32)
            # loss_per_example_per_step = tf.multiply(fake_loss, mask)
            # loss_per_example_sum = tf.reduce_sum(loss_per_example_per_step, reduction_indices=[1])
            # loss_per_example_average = tf.div(x=loss_per_example_sum,
            #                                   y=tf.cast(self.training_layer['seq_len'], tf.float32))
            raw_loss = tf.contrib.seq2seq.sequence_loss(
                self.layer['logits'],
                self.layer['targets'],
                mask,
                average_across_timesteps=True,
                average_across_batch=True)
            # loss = tf.reduce_mean(loss_per_example_average, name='loss')
            # loss_per_example_per_step = tf.multiply(raw_loss, mask)
            # mask = tf.reduce_sum(mask, axis=0)
            # loss_per_example_per_step = tf.div(x=loss_per_example_per_step,
            #                                    y=mask)
            raw_loss_summ = tf.summary.scalar('raw_loss', raw_loss)
            tf.add_to_collection('losses', raw_loss)
            loss = tf.add_n(tf.get_collection('losses'), name='total_loss')
            loss_summ = tf.summary.scalar('total_loss', loss)
            tf.add_to_collection('train_summary', raw_loss_summ)
            tf.add_to_collection('train_summary', loss_summ)
        return loss

    def setup_train_op(self):
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars),
                                          self.config.max_grad_norm)
        optimizer = tf.train.AdamOptimizer(5e-4)
        return optimizer.apply_gradients(zip(grads, tvars))
        # return optimizer.minimize(self.loss)

    def setup_metrics(self, is_training):
        if is_training:
            name_scope = 'training'
            summary_name = 'train_metrics_summary'
        else:
            name_scope = 'test'
            summary_name = 'test_metrics_summary'

        metrics = dict()
        with tf.name_scope(name_scope):
            with tf.variable_scope(name_scope):
                metrics['acc'] = tf.placeholder(dtype=tf.float32, name='acc')
                acc_summ = tf.summary.scalar('acc', metrics['acc'])
                metrics['p_error'] = tf.placeholder(dtype=tf.float32,
                                                    name='p_error')
                p_err_summ = tf.summary.scalar('p_error', metrics['p_error'])
                metrics['p_error_max'] = tf.placeholder(dtype=tf.float32,
                                                        name='p_error_max')
                p_err_max_summ = tf.summary.scalar('p_error_max',
                                                   metrics['p_error_max'])
                metrics['s_error'] = tf.placeholder(dtype=tf.float32,
                                                    name='s_error')
                s_err_summ = tf.summary.scalar('s_error', metrics['s_error'])
                metrics['s_error_max'] = tf.placeholder(dtype=tf.float32,
                                                        name='s_error_max')
                s_err_max_summ = tf.summary.scalar('s_error_max',
                                                   metrics['s_error_max'])
                tf.add_to_collection(summary_name, acc_summ)
                tf.add_to_collection(summary_name, p_err_summ)
                tf.add_to_collection(summary_name, p_err_max_summ)
                tf.add_to_collection(summary_name, s_err_summ)
                tf.add_to_collection(summary_name, s_err_max_summ)

        return metrics

    def train(self, passes, new_training=True):
        sess_config = tf.ConfigProto()
        sess_config.gpu_options.allow_growth = True
        with tf.Session(config=sess_config) as sess:
            if new_training:
                saver, global_step = Model.start_new_session(sess)
            else:
                saver, global_step = Model.continue_previous_session(
                    sess,
                    model_file='cblstm',
                    ckpt_file='saver/cblstm/checkpoint')

            self.train_writer.add_graph(sess.graph, global_step=global_step)

            for step in range(1 + global_step, 1 + passes + global_step):
                with tf.variable_scope('Train'):
                    input_, targets = self.reader.get_birnn_batch_data('train')
                    input_, seq_len = self.data_padding_preprocess(
                        input_, 'input')
                    targets, _ = self.data_padding_preprocess(
                        targets, 'targets')
                    _, train_summary, loss, pred_seq = sess.run(
                        [
                            self.train_op, self.train_merged, self.loss,
                            self.layer['pred_seq']
                        ],
                        feed_dict={
                            self.layer['input']: input_,
                            self.layer['targets']: targets,
                            self.layer['seq_len']: seq_len,
                            self.layer['keep_prob']: self.config.keep_prob
                        })
                    self.train_writer.add_summary(train_summary, step)

                    train_p_err, train_p_err_max, train_s_err, train_s_err_max = get_p_s_error(
                        pred_seq, targets, seq_len)
                    train_acc = get_acc(pred_seq, targets, seq_len)

                    [train_metrics_summary] = sess.run(
                        [self.train_metrics_merged],
                        feed_dict={
                            self.train_metrics['acc']: train_acc,
                            self.train_metrics['p_error']: train_p_err,
                            self.train_metrics['p_error_max']: train_p_err_max,
                            self.train_metrics['s_error']: train_s_err,
                            self.train_metrics['s_error_max']: train_s_err_max
                        })
                    self.train_writer.add_summary(train_metrics_summary, step)
                    print("gobal_step {},"
                          " training_loss {},"
                          " accuracy {},"
                          " p_error {},"
                          " p_err_max {},"
                          " s_error {},"
                          " s_err_max {}.".format(step, loss, train_acc,
                                                  train_p_err, train_p_err_max,
                                                  train_s_err,
                                                  train_s_err_max))

                if step % 5 == 0:
                    with tf.variable_scope('Test', reuse=True):
                        test_input, test_targets = self.reader.get_birnn_batch_data(
                            'test')
                        test_input, test_seq_len = self.data_padding_preprocess(
                            test_input, 'input')
                        test_targets, _ = self.data_padding_preprocess(
                            test_targets, 'targets')
                        [test_pred_seq] = sess.run(
                            [self.layer['pred_seq']],
                            feed_dict={
                                self.layer['input']: test_input,
                                self.layer['seq_len']: test_seq_len,
                                self.layer['keep_prob']: 1.0
                            })
                        test_p_err, test_p_err_max, test_s_err, test_s_err_max = get_p_s_error(
                            test_pred_seq, test_targets, test_seq_len)
                        test_acc = get_acc(test_pred_seq, test_targets,
                                           test_seq_len)
                        [test_metrics_summary] = sess.run(
                            [self.test_metrics_merged],
                            feed_dict={
                                self.test_metrics['acc']: test_acc,
                                self.test_metrics['p_error']: test_p_err,
                                self.test_metrics['p_error_max']:
                                test_p_err_max,
                                self.test_metrics['s_error']: test_s_err,
                                self.test_metrics['s_error_max']:
                                test_s_err_max
                            })
                        self.train_writer.add_summary(test_metrics_summary,
                                                      step)
                        print("test_acc {}, "
                              "test_p_err {},"
                              "test_p_err_max {},"
                              "test_s_err {},"
                              "test_s_err_max {}.".format(
                                  test_acc, test_p_err, test_p_err_max,
                                  test_s_err, test_s_err_max))

                if step % 100 == 0:
                    saver.save(sess, 'saver/cblstm/cblstm', global_step=step)
                    print('checkpoint saved')

    def pickup_p_s(self, sess, input_, get_pred_seq=False):
        # with tf.Session() as sess:
        #     saver, global_step = Model.continue_previous_session(sess,
        #                                                          model_file='bi_rnn',
        #                                                          ckpt_file='saver/bi_rnn/checkpoint')
        with tf.variable_scope('model', reuse=True):
            input_, seq_len = self.data_padding_preprocess(input_, 'input')
            pred_seq, class_prob = sess.run(
                [self.layer['pred_seq'], self.layer['class_prob']],
                feed_dict={
                    self.layer['input']: input_,
                    self.layer['seq_len']: seq_len,
                    self.layer['keep_prob']: 1.0
                })
            p_index = [
                get_arrival_index(pred_seq[i], seq_len[i], 'P')
                for i in range(len(input_))
            ]
            s_index = [
                get_arrival_index(pred_seq[i], seq_len[i], 'S')
                for i in range(len(input_))
            ]
        if get_pred_seq:
            return p_index, s_index, class_prob, pred_seq
        else:
            return p_index, s_index, class_prob
            for ent in labels:
                start_dic[(ent[0], ent[2])].append(ent)
                end_dic[(ent[1], ent[2])].append(ent)
                all_num += 1
            for k, v in start_dic.items():
                if len(v) > 1:
                    start_num += len(v)
            for k, v in end_dic.items():
                if len(v) > 1:
                    end_num += len(v)

    print("All {}, start {}, end {}".format(all_num, start_num, end_num))


if __name__ == "__main__":
    reader = Reader()
    reader.read_and_gen_vectors_pubmed_word2vec(config.embed_path)
    reader.read_all_data("./data/genia/", "genia.train", "genia.dev",
                         "genia.test")

    # print reader.train_sents[0]
    train_batches, dev_batches, test_batches = reader.to_batch(
        config.batch_size)
    f = open(config.train_data_path, 'wb')
    pickle.dump(train_batches, f)
    f.close()

    f = open(config.dev_data_path, 'wb')
    pickle.dump(dev_batches, f)
    f.close()
Exemplo n.º 22
0
class MPRSNE(object):
    def __init__(self, g, saveFile, init_emb_file=None):
        self.config = Config()
        self.g = g
        self.init_emb_file = init_emb_file
        self.saveFile = saveFile
        self.reader = Reader(self.g)
        self.layer = self.setup_layer()
        self.train_op = self.setup_train_op()
        self.test_metrics = self.get_test_metrics()
        self.loss_train_merged = tf.summary.merge(tf.get_collection('loss_train_summary'))
        self.test_merged = tf.summary.merge(tf.get_collection('multi_label_classification'))
        self.train_writer = tf.summary.FileWriter('model/summary/{}'.format(self.saveFile))

    def setup_layer(self):
        layer = dict()
        batch_num = self.config.nodes_seq_batch_num
        emb_dim = self.config.emb_dim
        neg_sample_num = self.config.loss1_neg_sample_num
        walk_len = self.config.walk_length
        gru_layer_num = self.config.gru_layer_num
        keep_prob = self.config.keep_prob
        labels_num = self.g.labels_num
        context_size = self.config.context_size
        label_l2_loss_wd = self.config.label_l2_loss_wd
        emb_l2_loss_wd = self.config.emb_l2_loss_wd
        pos_weight = 1.0 / self.config.pos_weight


        walk_nodes = tf.placeholder(tf.int32, shape=[batch_num, walk_len], name='walk_nodes')
        walk_nodes_labels = tf.placeholder(tf.float32, shape=[batch_num, walk_len, labels_num],
                                           name='walk_nodes_labels')
        neg_walk_nodes = tf.placeholder(tf.int32, shape=[batch_num, walk_len, neg_sample_num], name='neg_walk_node')

        layer['walk_nodes'] = walk_nodes
        layer['walk_nodes_labels'] = walk_nodes_labels
        layer['neg_walk_nodes'] = neg_walk_nodes

        if self.init_emb_file is not None:
            emb = list()
            with open(self.init_emb_file) as file:
                csv_reader = csv.reader(file)
                for row in csv_reader:
                    emb.append(row)
            emb = np.array(emb, dtype=np.float32)
            emb_initializer = tf.constant_initializer(emb)
        else:
            emb_initializer = tf.glorot_uniform_initializer(dtype=tf.float32)

        with tf.variable_scope('emb'):
            emb = tf.get_variable(name='emb',
                                  shape=[self.g.nodes_num, emb_dim],
                                  initializer=emb_initializer)

            layer['emb'] = emb
            zeros_vec = tf.constant(0, dtype=tf.float32, shape=[1, emb_dim])
            emb = tf.concat([emb, zeros_vec], 0)

        with tf.variable_scope('sup_emb'):
            sup_emb = tf.get_variable(name='sup_emb',
                                      shape=[self.g.nodes_num, emb_dim],
                                      initializer=tf.glorot_normal_initializer(dtype=tf.float32))
            layer['sup_emb'] = sup_emb

        with tf.variable_scope('labels_emb'):
            labels_emb = tf.get_variable(name='labels_emb',
                                         shape=[self.g.labels_num, emb_dim],
                                         initializer=tf.glorot_normal_initializer(dtype=tf.float32))
            labels_l2_loss = label_l2_loss_wd * tf.nn.l2_loss(labels_emb)
            tf.add_to_collection('label_loss_weight_decay', labels_l2_loss)
            layer['labels_emb'] = labels_emb

        with tf.variable_scope('lookup'):
            walk_nodes_emb = tf.nn.embedding_lookup(emb, walk_nodes, name='context_nodes_emb')
            walk_nodes_emb = layer_norm(walk_nodes_emb)
            true_sup_emb = tf.nn.embedding_lookup(sup_emb, walk_nodes, name='true_sup_emb')
            neg_sup_emb = tf.nn.embedding_lookup(sup_emb, neg_walk_nodes, name='neg_sup_emb')


        fw_context_gru_cell_list = [tf.nn.rnn_cell.DropoutWrapper(tf.nn.rnn_cell.DeviceWrapper(
            MRUCell(num_units=emb_dim,
                    forget_bias=0.0,
                    state_is_tuple=True,
                    activation=tf.nn.tanh,
                    reuse=tf.get_variable_scope().reuse,
                    kernel_initializer=tf.glorot_normal_initializer(dtype=tf.float32)),
            "/gpu:%d" % (i % 2)),
            input_keep_prob=keep_prob, variational_recurrent=True, input_size=emb_dim, dtype=tf.float32)
            for i in range(gru_layer_num)]

        fw_context_gru_cell = tf.nn.rnn_cell.MultiRNNCell(
            fw_context_gru_cell_list, state_is_tuple=True)

        fw_context_Residual_gru_cell = fw_context_gru_cell

        bw_context_gru_cell_list = [tf.nn.rnn_cell.DropoutWrapper(tf.nn.rnn_cell.DeviceWrapper(
            MRUCell(num_units=emb_dim,
                    forget_bias=0.0,
                    state_is_tuple=True,
                    activation=tf.nn.tanh,
                    reuse=tf.get_variable_scope().reuse,
                    kernel_initializer=tf.glorot_normal_initializer(dtype=tf.float32)),
            "/gpu:%d" % (i % 2)),
            input_keep_prob=keep_prob, variational_recurrent=True, input_size=emb_dim, dtype=tf.float32)
            for i in range(gru_layer_num)]

        bw_context_gru_cell = tf.nn.rnn_cell.MultiRNNCell(
            bw_context_gru_cell_list, state_is_tuple=True)


        bw_context_Residual_gru_cell = bw_context_gru_cell

        with tf.variable_scope('context'):
            (context_outputs, _) = tf.nn.bidirectional_dynamic_rnn(cell_fw=fw_context_Residual_gru_cell,
                                                                   cell_bw=bw_context_Residual_gru_cell,
                                                                   inputs=walk_nodes_emb,
                                                                   sequence_length=[walk_len] * batch_num,
                                                                   parallel_iterations=batch_num * 2,
                                                                   dtype=tf.float32)
            context_outputs = tf.concat(context_outputs, -1)



        with tf.variable_scope('reduce_layer'):
            emb_weight = tf.get_variable(name='emb_weight',
                                         shape=[emb_dim * 2, emb_dim],
                                         initializer=tf.glorot_normal_initializer(dtype=tf.float32))
            emb_bias = tf.get_variable(name='emb_bias',
                                       shape=[emb_dim],
                                       initializer=tf.constant_initializer(0.0))
            context_outputs = tf.reshape(context_outputs, [-1, emb_dim * 2])
            context_outputs = tf.matmul(context_outputs, emb_weight) + emb_bias
            context_outputs = tf.reshape(context_outputs, [batch_num, walk_len, emb_dim])


            emb_l2_loss = emb_l2_loss_wd * tf.nn.l2_loss(emb_weight)
            tf.add_to_collection('emb_loss_weight_decay', emb_l2_loss)
            context_outputs = tf.nn.sigmoid(context_outputs)

        with tf.variable_scope('output_gates'):
            o_weight = tf.get_variable(name='o_weight',
                                       shape=[emb_dim, emb_dim],
                                       initializer=tf.glorot_normal_initializer(dtype=tf.float32))
            o_diag = tf.get_variable(name='o_diag',
                                     shape=[emb_dim])
            o_bias = tf.get_variable(name='o_bias',
                                     shape=[emb_dim],
                                     initializer=tf.constant_initializer(0.0))
            o_emb = tf.reshape(walk_nodes_emb, [-1, emb_dim])
            o_outputs = tf.reshape(context_outputs, [-1, emb_dim])
            o_gates = tf.matmul(o_emb, o_weight) + o_diag * o_outputs + o_bias
            o_gates = tf.layers.batch_normalization(o_gates, axis=-1)
            o_gates = tf.sigmoid(o_gates)
            o_gates = tf.reshape(o_gates, [batch_num, walk_len, emb_dim])
            label_context = o_gates * context_outputs



        with tf.variable_scope('walk_context'):
            walk_context = list()

            for i in range(walk_len):
                tmp_context = tf.concat([context_outputs[:, :i, :], context_outputs[:, i + 1:, :]], axis=1)
                tmp_nodes_emb = tf.concat([walk_nodes_emb[:, :i, :], walk_nodes_emb[:, i + 1:, :]], axis=1)
                tmp_context = tmp_context + tmp_nodes_emb
                # [batch, walk_len - 1, emb]

                walk_context.append(tmp_context)

            context_vec = tf.stack(walk_context, axis=1)
            # [batch, walk_len, walk_len - 1, emb]


            context_mask = np.zeros([walk_len, walk_len - 1], dtype=np.float32)
            for i in range(walk_len):
                mask_min = np.max([0, i - context_size])
                mask_max = np.min([walk_len - 1, i + context_size])
                context_mask[i, mask_min: mask_max] = 1.0
            context_mask = tf.constant(context_mask, dtype=tf.float32)
            context_mask = tf.stack([context_mask] * batch_num, axis=0)
            context_mask = tf.stack([context_mask] * emb_dim, axis=3)
            # [batch, walk_len, walk_len - 1, emb]
            context_vec = context_mask * context_vec
            # [batch, walk_len, walk_len - 1, emb]

        with tf.variable_scope('loss'):
            label_context = tf.reshape(label_context, [-1, emb_dim])
            label_score = tf.matmul(label_context, labels_emb, transpose_b=True)
            label_bias = tf.get_variable(name='label_bias',
                                         shape=[labels_num],
                                         dtype=tf.float32,
                                         initializer=tf.constant_initializer(0.0))
            label_score = label_score + label_bias
            label_score = tf.reshape(label_score, [batch_num, walk_len, labels_num])
            label_loss = tf.nn.weighted_cross_entropy_with_logits(targets=walk_nodes_labels, logits=label_score,
                                                                  pos_weight=pos_weight)
            label_loss = label_loss / pos_weight
            # [batch, walk_len, label]
            label_loss = tf.reduce_sum(label_loss, 2)
            # [batch, walk_len]
            label_loss = tf.reduce_sum(label_loss, 1)
            # [batch]
            label_loss = tf.reduce_mean(label_loss, 0)
            label_loss_wd = tf.add_n(tf.get_collection('label_loss_weight_decay'))
            label_loss = label_loss + label_loss_wd
            loss_summ = tf.summary.scalar('label_loss', label_loss)
            tf.add_to_collection('loss_train_summary', loss_summ)
            layer['label_loss'] = label_loss * 1.0

            context_vec = tf.stack([context_vec] * (neg_sample_num + 1), axis=2)
            # [batch, walk_len, node, walk_len - 1, emb]
            true_emb = tf.expand_dims(true_sup_emb, axis=2)
            # [batch, walk_len, 1, emb]
            true_neg_emb = tf.concat([true_emb, tf.negative(neg_sup_emb)], 2)
            # [batch, walk_len, node, emb]
            true_neg_emb = tf.stack([true_neg_emb] * (walk_len - 1), axis=3)
            # [batch, walk_len, node, walk_len - 1, emb]


            sig = tf.sigmoid(tf.reduce_sum(tf.multiply(context_vec, true_neg_emb), 4))
            # [batch, walk_len, node, walk_len - 1]
            sig_log = tf.log(sig)
            sig_log = tf.reduce_sum(sig_log, 2)
            # [batch, walk_len, walk_len - 1]
            sig_log = tf.reduce_sum(sig_log, 2)
            # [batch, walk_len]

            sig_log_batch = tf.reduce_sum(sig_log, 1)
            # [batch]
            emb_loss = tf.reduce_mean(sig_log_batch, 0, name='emb_loss')
            emb_loss = tf.negative(emb_loss)
            emb_loss_wd = tf.add_n(tf.get_collection('emb_loss_weight_decay'))
            emb_loss = emb_loss + emb_loss_wd
            layer['emb_loss'] = emb_loss
            loss_summ = tf.summary.scalar('emb_loss', emb_loss)
            tf.add_to_collection('loss_train_summary', loss_summ)

            layer['loss'] = layer['label_loss'] + layer['emb_loss']
            loss_summ = tf.summary.scalar('loss', layer['loss'])
            tf.add_to_collection('loss_train_summary', loss_summ)

        return layer

    def setup_train_op(self):
        with tf.variable_scope('train_op'):
            loss_step = tf.Variable(0, trainable=False)
            starter_learning_rate = 0.0005
            decay_steps = 10000
            learning_rate = tf.train.exponential_decay(starter_learning_rate, loss_step, decay_steps, 0.5,
                                                       staircase=True)
            learning_rate = tf.maximum(0.0001, learning_rate)
            loss_summ = tf.summary.scalar('learning_rate', learning_rate)
            tf.add_to_collection('loss_train_summary', loss_summ)
            optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
            grads_pre, tvars = zip(*optimizer.compute_gradients(self.layer['loss']))
            grads, _ = tf.clip_by_global_norm(grads_pre, self.config.max_grad_norm)
            train_op = optimizer.apply_gradients(zip(grads, tvars), global_step=loss_step)


            pre_optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
            pre_tvars = [var for var in tf.trainable_variables() if var.name != 'emb/emb:0']
            pre_grads_pre, _ = zip(*pre_optimizer.compute_gradients(self.layer['loss'], var_list=pre_tvars))
            pre_grads_pre, _ = tf.clip_by_global_norm(pre_grads_pre, self.config.max_grad_norm)
            pre_train_op = pre_optimizer.apply_gradients(zip(pre_grads_pre, pre_tvars))
            return [train_op, pre_train_op]



    def train(self, passes, new_training=True):
        sess_config = tf.ConfigProto()
        sess_config.gpu_options.allow_growth = True
        sess_config.allow_soft_placement = True
        with tf.Session(config=sess_config) as sess:
            if new_training:
                saver, global_step = Model.start_new_session(sess)
            else:
                saver, global_step = Model.continue_previous_session(sess,
                                                                     model_file='model/saver/{}'.format(self.saveFile),
                                                                     ckpt_file='model/saver/{}/checkpoint'.format(
                                                                         self.saveFile))

            self.train_writer.add_graph(sess.graph, global_step=global_step)

            walk_times = 1
            for step in range(1 + global_step, 1 + passes + global_step):
                with tf.variable_scope('Train'):
                    walk_nodes = self.reader.nodes_walk_reader()
                    neg_walk_nodes = [self.g.negative_sample(walk_nodes[i],
                                                             self.config.loss1_neg_sample_num,
                                                             self.g.nodes_degree_table)
                                      for i in range(len(walk_nodes))]
                    neg_walk_nodes = np.array(neg_walk_nodes)
                    walk_nodes_labels = list()
                    for node_list in walk_nodes:
                        nodes_label_tmp = self.g.get_train_node_label(node_list)
                        walk_nodes_labels.append(nodes_label_tmp)
                    walk_nodes_labels = np.array(walk_nodes_labels)

                    # if (step - 1) % int(self.g.nodes_num / self.config.nodes_seq_batch_num) == 0:
                    #     print(walk_times)
                    #     walk_times += 1

                    if step < 200 and self.init_emb_file is not None:
                        train_op = self.train_op[1]
                    else:
                        train_op = self.train_op[0]
                    _, train_summary, loss = sess.run(
                        [train_op,
                         self.loss_train_merged,
                         self.layer['loss']],
                        feed_dict={self.layer['walk_nodes']: walk_nodes,
                                   self.layer['walk_nodes_labels']: walk_nodes_labels,
                                   self.layer['neg_walk_nodes']: neg_walk_nodes})

                    self.train_writer.add_summary(train_summary, step)

                    if step % 500 == 0 or step == 1:
                        [node_emb, sup_emb] = sess.run([self.layer['emb'],
                                                        self.layer['sup_emb']])
                        node_emb = np.concatenate((node_emb, sup_emb), axis=1)
                        print("gobal_step {},loss {}".format(step, loss))

                    if step % 1000 == 0 or step == 1:
                        micro_f1, macro_f1 = self.multi_label_node_classification(node_emb)
                        [test_summary] = sess.run([self.test_merged],
                                                  feed_dict={self.test_metrics['micro_f1']: micro_f1,
                                                             self.test_metrics['macro_f1']: macro_f1})
                        print("micro_f1 {},macro_f1 {}".format(micro_f1, macro_f1))
                        self.train_writer.add_summary(test_summary, step)
                        saver.save(sess, 'model/saver/{}/MPRSNE'.format(self.saveFile), global_step=step)
                        print('checkpoint saved')

    def get_emb(self):
        sess_config = tf.ConfigProto()
        sess_config.gpu_options.allow_growth = True
        with tf.Session(config=sess_config) as sess:
            saver, global_step = Model.continue_previous_session(sess,
                                                                 model_file='model/saver/{}'.format(self.saveFile),
                                                                 ckpt_file='model/saver/{}/checkpoint'.format(self.saveFile))
            ids_set = np.array(range(self.g.nodes_num))
            emb_set = tf.nn.embedding_lookup(self.layer['emb'], ids_set)
            sup_emb_set = tf.nn.embedding_lookup(self.layer['sup_emb'], ids_set)
            [emb, sup_emb] = sess.run([emb_set, sup_emb_set])
            emb = np.concatenate([emb, sup_emb], axis=1)
        return emb

    def multi_label_node_classification(self, emb):
        g = self.g
        emb = emb[:, :self.config.emb_dim]
        classes = self.g.validate_labels_set
        train_nodes_id = list()
        train_y = list()
        train_x = list()
        for node in g.train_nodes_labels.keys():
            train_nodes_id.append(node)
            train_y.append(g.train_nodes_labels[node])
            train_x.append(emb[g.nodes_ids[node], :])

        train_x = np.array(train_x)
        prepocess_y = sklearn.preprocessing.MultiLabelBinarizer(classes=classes)
        train_y = prepocess_y.fit_transform(train_y)

        test_nodes_id = list()
        test_y = list()
        test_x = list()
        for node in g.test_nodes_labels.keys():
            test_nodes_id.append(node)
            test_y.append(g.test_nodes_labels[node])
            test_x.append(emb[g.nodes_ids[node], :])

        test_x = np.array(test_x)
        test_y = prepocess_y.fit_transform(test_y)

        multi_label_classifier = sklearn.multiclass.OneVsRestClassifier(sklearn.linear_model.LogisticRegression(),
                                                                        n_jobs=1)
        multi_label_classifier.fit(train_x, train_y)
        pred_y = multi_label_classifier.predict(test_x)
        mirco_f1 = sklearn.metrics.f1_score(test_y, pred_y, average='micro')
        marco_f1 = sklearn.metrics.f1_score(test_y, pred_y, average='macro')
        return mirco_f1, marco_f1

    def get_test_metrics(self):
        micro_f1 = tf.placeholder(dtype=tf.float32, shape=None, name='micro_f1')
        macro_f1 = tf.placeholder(dtype=tf.float32, shape=None, name='macro_f1')
        loss_summ = tf.summary.scalar('micro_f1', micro_f1)
        tf.add_to_collection('multi_label_classification', loss_summ)
        loss_summ = tf.summary.scalar('macro_f1', macro_f1)
        tf.add_to_collection('multi_label_classification', loss_summ)
        test_metrics = dict()
        test_metrics['micro_f1'] = micro_f1
        test_metrics['macro_f1'] = macro_f1
        return test_metrics