Пример #1
0
def SentimentAnalysis(param, text):
    if param['model'] == 'cnn':
        out = cnn_predict(text)
        return out
    else:
        ins = Instructor(model_name=param['model'])
        out = ins.run(text)
        return out
Пример #2
0
def main(args):
    instr = Instructor(args)
    if not args.test:
        if args.category == 'abae':
            instr.abae_train()
        elif args.category == 'absc':
            instr.train()
    else:
        if args.category == 'abae':
            instr.abae_test()
        elif args.category == 'absc':
            instr.test()
Пример #3
0
        'adadelta': optim.Adadelta,
        'adagrad': optim.Adagrad,
        'adam': optim.Adam,
        'adamax': optim.Adamax,
        'asgd': optim.ASGD,
        'sgd': optim.SGD
    }

    # 初始化其它参数
    opt.model_class = model_classes[opt.model_name]
    opt.optimizer = optimizers[opt.optim]
    opt.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') \
        if opt.device is None else torch.device(opt.device)

    # 初始化modelNet参数
    modelNet.initParameter(opt)

    instructor = Instructor(opt)

    # 显示训练前的结果
    instructor.beforeTrain()

    # 开始训练模型
    instructor.beginTrain()

    # 显示训练后在验证集上的结果
    instructor.verifyModel()

    # 运行测试集,保存模型
    instructor.testModel()
Пример #4
0
    os.system('nvidia-smi -q -d Utilization | grep Gpu > log/gpu')
    util_gpu = [int(line.strip().split()[2]) for line in open('log/gpu', 'r')]

    gpu_count = torch.cuda.device_count()
    device_choose = [i for i in range(gpu_count)]
    device = util_gpu.index(min(util_gpu))
    # device = 0
else:
    device_choose = []
    device = -1
device = device_dict[device]
device_choose.append(-1)
'''train type choices'''
from train import Instructor

inst = Instructor()
TRAIN_TYPE_CHO = {
    'weak_train': inst.weakly_train,
    'clas_train': inst.classification_train,
    'clas_train_fix': inst.classification_train_fix,
    'align_clas_train': inst.align_classification_train,
    'clas_train_crf': inst.classification_train_crf,
    'align_clas_train_crf': inst.align_classification_train_crf,
    'aspect_train': inst.aspect_train,
}


def init_config(opt):
    global clas_lr, weak_lr, epoch, batch_size, save_mode, train_type, train_phase, \
        clas_model, lambda_, pretrained_model
    global d_input, n_layers, embed_dim, hidden_dim, dropout, need_pos
Пример #5
0
    def __init__(self):
        class Option(object):
            pass

        opt = Option()

        opt.model_name = "bert_spc"
        opt.dataset = "twitter"
        opt.datasets = ["twitter", "restaurant", "laptop"]
        opt.optimizer = "adam"
        opt.initializer = "xavier_uniform_"
        opt.learning_rate = 2e-5
        opt.dropout = 0.1
        opt.l2reg = 0.1
        opt.num_epoch = 5
        opt.batch_size = 24
        opt.log_step = 5
        opt.logdir = 'log'
        opt.embed_dim = 300
        opt.hidden_dim = 300
        opt.bert_dim = 768
        opt.pretrained_bert_name = 'bert-base-uncased'
        opt.max_seq_len = 80
        opt.polarities_dim = 3
        opt.hops = 3
        opt.device = None

        model_classes = {
            'lstm': LSTM,
            'td_lstm': TD_LSTM,
            'atae_lstm': ATAE_LSTM,
            'ian': IAN,
            'memnet': MemNet,
            'ram': RAM,
            'cabasc': Cabasc,
            'tnet_lf': TNet_LF,
            'aoa': AOA,
            'mgan': MGAN,
            'bert_spc': BERT_SPC,
            'aen': AEN,
            'aen_bert': AEN_BERT,
        }
        dataset_files = {
            'twitter': {
                'train': './datasets/acl-14-short-data/train.raw',
                'test': './datasets/acl-14-short-data/test.raw'
            },
            'restaurant': {
                'train': './datasets/semeval14/Restaurants_Train.xml.seg',
                'test': './datasets/semeval14/Restaurants_Test_Gold.xml.seg'
            },
            'laptop': {
                'train': './datasets/semeval14/Laptops_Train.xml.seg',
                'test': './datasets/semeval14/Laptops_Test_Gold.xml.seg'
            }
        }
        input_colses = {
            'lstm': ['text_raw_indices'],
            'td_lstm': [
                'text_left_with_aspect_indices',
                'text_right_with_aspect_indices'
            ],
            'atae_lstm': ['text_raw_indices', 'aspect_indices'],
            'ian': ['text_raw_indices', 'aspect_indices'],
            'memnet': ['text_raw_without_aspect_indices', 'aspect_indices'],
            'ram': ['text_raw_indices', 'aspect_indices', 'text_left_indices'],
            'cabasc': [
                'text_raw_indices', 'aspect_indices',
                'text_left_with_aspect_indices',
                'text_right_with_aspect_indices'
            ],
            'tnet_lf':
            ['text_raw_indices', 'aspect_indices', 'aspect_in_text'],
            'aoa': ['text_raw_indices', 'aspect_indices'],
            'mgan':
            ['text_raw_indices', 'aspect_indices', 'text_left_indices'],
            'bert_spc': ['text_bert_indices', 'bert_segments_ids'],
            'aen': ['text_raw_indices', 'aspect_indices'],
            'aen_bert': ['text_raw_bert_indices', 'aspect_bert_indices'],
        }
        initializers = {
            'xavier_uniform_': torch.nn.init.xavier_uniform_,
            'xavier_normal_': torch.nn.init.xavier_normal,
            'orthogonal_': torch.nn.init.orthogonal_,
        }
        optimizers = {
            'adadelta': torch.optim.Adadelta,  # default lr=1.0
            'adagrad': torch.optim.Adagrad,  # default lr=0.01
            'adam': torch.optim.Adam,  # default lr=0.001
            'adamax': torch.optim.Adamax,  # default lr=0.002
            'asgd': torch.optim.ASGD,  # default lr=0.01
            'rmsprop': torch.optim.RMSprop,  # default lr=0.01
            'sgd': torch.optim.SGD,
        }

        opt.model_class = model_classes[opt.model_name]
        opt.dataset_file = dataset_files[opt.dataset]
        opt.dataset_files = dataset_files
        opt.inputs_cols = input_colses[opt.model_name]
        opt.initializer = initializers[opt.initializer]
        opt.optimizer = optimizers[opt.optimizer]
        opt.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') \
            if opt.device is None else torch.device(opt.device)
        self.opt = opt
        self.tokenizer = Tokenizer4Bert(opt.max_seq_len,
                                        opt.pretrained_bert_name)
        opt.state_dict_path = 'state_dict/trained.torch'
        try:
            bert = BertModel.from_pretrained(opt.pretrained_bert_name,
                                             state_dict=torch.load(
                                                 opt.state_dict_path))
            self.model = opt.model_class(bert, opt).to(opt.device)
        except FileNotFoundError:
            bert = BertModel.from_pretrained(opt.pretrained_bert_name)
            ins = Instructor(opt)

            self.model = ins.run()

            self.model = ins.model
            torch.save(self.model.state_dict(), opt.state_dict_path)
Пример #6
0
        # two categories model
        ['all_train_2.tsv', 'all_test_2.tsv', 2, 0],
        ['all_train_2.tsv', 'lap_test_2.tsv', 2, 0],
        ['all_train_2.tsv', 'rest_test_2.tsv', 2, 0],
        ['lap_train_2.tsv', 'lap_test_2.tsv', 2, 0],
        ['rest_train_2.tsv', 'rest_test_2.tsv', 2, 0],
    ]

    # for param in program_run_param:
    #     print('=' * 100)
    #     print('>>> Current program param: {}'.format(param))

    # init_program(param)
    pre_dir = create_path()
    if not config.pretrain:
        for i in range(config.save_model_num):
            print('=' * 100)
            print('>>> Current run times {} of {}'.format(
                i + 1, config.save_model_num))
            instructor = Instructor(pre_dir)
            instructor.begin_train()
            instructor.test_model()
        avg_ac = instructor.load_model_and_test(pre_dir)
        os.rename(pre_dir, pre_dir + '_{:6f}'.format(avg_ac))
    else:
        instructor = Instructor(pre_dir)
        instructor.begin_train()
        '''测试模型'''
        # instructor.test_model()
Пример #7
0
 def __init__(self):
     """prepare data and train instruction"""
     self.data_prepare = DataPrepare()
     self.my_loader = CornerData()
     self.instructor = Instructor()
Пример #8
0
class MainInstructor:
    def __init__(self):
        """prepare data and train instruction"""
        self.data_prepare = DataPrepare()
        self.my_loader = CornerData()
        self.instructor = Instructor()

    def start_(self):
        """decide train phase"""
        if config.train_phase == 'weakly':
            self.create_path()  # create save model path
            self.weak_train()
        elif config.train_phase == 'classify':
            self.clas_train()
        elif config.train_phase == 'aspect':
            self.aspect_train()
        elif config.train_phase == 'ae_apriori':
            self.asp_extra_apriori()

    def create_path(self):
        current = time.strftime('%m-%d_%H:%M', time.localtime())
        path = '/media/sysu2018/4TBDisk/william/corner_weakly_model/{}_retain{}_step{}_thres{}/'.format(
            current, '1' if config.if_retain else '0', config.valid_step, config.valid_thres)
        folder = os.path.exists(path)
        if not folder:
            os.makedirs(path)
            print('>>> New save model path:', path)
            # set config save_model_path
            config.save_model_path = path
        else:
            print('>>> Folder {} already exists!'.format(path))

    def weak_train(self):
        """start weakly training"""
        '''obtain test, valid and test data and dataloader'''
        all_data, final_embedding, asp_list, test_pos, test_neg = self.data_prepare.weakly_data_process
        embedding, train_dataloader = self.my_loader.pp_dataloader_weak(all_data, final_embedding)

        '''calculate accuracy'''
        print('=' * 100)
        print('Begin train...')
        compare_acc = []
        acc = self.instructor.weakly_train(train_dataloader, test_pos, test_neg, embedding, asp_list)
        compare_acc.append(acc)

    def clas_train(self):
        """start classification training"""
        '''obtain test, valid and test data and dataloader'''
        clas_train, clas_valid, classify_test_data, classify_final_embedding \
            = self.data_prepare.clas_data_process
        embedding, train_dataloader, valid_dataloader, test_dataloader \
            = self.my_loader.pp_dataloader_clas(
            (clas_train,
             clas_valid,
             classify_test_data,
             classify_final_embedding))

        '''calculate accuracy'''

        print('=' * 100)
        print('Begin train...')
        compare_acc = []
        acc = self.instructor.classification_train(train_dataloader,
                                                   valid_dataloader,
                                                   test_dataloader,
                                                   embedding)
        # acc = Train.classification_train_fix(config.clas_model,
        #                                      train_dataloader,
        #                                      test_dataloader,
        #                                      embedding)
        compare_acc.append(acc)

        '''if use plot'''
        if config.plot:
            import matplotlib.pyplot as plt

            acc = self.instructor.classification_train(train_dataloader,
                                                       valid_dataloader,
                                                       test_dataloader,
                                                       embedding,
                                                       pretrain=False)
            compare_acc.append(acc)
            plt.figure()
            plt.plot(compare_acc[0], 'r', marker='o')
            plt.plot(compare_acc[1], 'b', marker='*')
            plt.ylim((0.7, 0.9))
            plt.show()

    def aspect_train(self):
        """start aspect extracting training"""
        '''obtain test, valid and test data and dataloader'''
        train_data, final_embedding, sentence = self.data_prepare.aspect_extract_data_process
        embedding, train_data_loader = self.my_loader.pp_dataloader_aspect((train_data, final_embedding))

        '''start training'''
        print('=' * 100)
        print('Begin train...')
        self.instructor.aspect_train(train_data_loader, embedding, sentence)

    def asp_extra_apriori(self):
        """start aspect extraction based on Apriori"""
        # all_data = self.data_prepare.apriori_data

        # file_name = 'transaction_file1.txt'
        # with open(file_name, mode='w') as file:
        #     for idx, sent in enumerate(all_data):
        #         file.write(sent + '\n')
        #         file.write('\t'.join([' '.join(NP) for NP in cleaned_sent[idx]]) + '\n')
        pass