def run(self): start = datetime.now() thread_id = start.strftime('%Y%m%d%H%M%S') logging.info("Thread %s - %s started" % (thread_id, self.file_path)) sftp_reader = SFTPReader(self.host, self.port, self.username, self.password, self.ssh_key_path, self.sftp_max_retry) byte_io = sftp_reader.load_file(self.file_path) sftp_reader.close() step = datetime.now() logging.info("Thread %s - %s loaded data - Time: %d" % (thread_id, self.file_path, (step - start).seconds)) if self.try_send_data: data_loader = DataLoader() processed_df = data_loader.load(byte_io, self.columns_seletion, fill_na_dict=self.fill_na_dict, concat_dict=self.concat_dict, rename_dict=self.rename_dict) step = datetime.now() logging.info("Thread %s - %s parsed data - Time: %d" % (thread_id, self.file_path, (step - start).seconds)) event_sender = EventSender(self.connection_string, self.eventhub_name, self.max_event_per_batch, self.eventhub_max_retry, self.metadata, self.zvelo_helper) event_sender.send(processed_df) event_sender.close() step = datetime.now() logging.info("Thread %s - %s sent data - Time: %d" % (thread_id, self.file_path, (step - start).seconds)) # Copy raw data to ADLS if (not self.blob_name == False) or (not self.blob_key == False): blob_helper = BlobHelper(self.blob_name, self.blob_key) file_name = self.file_path[self.file_path.rindex("/") + 1 : ] blob_path = "%s/%s" % (self.blob_path, file_name) byte_io.seek(0) blob_helper.upload_data(byte_io, self.blob_container, blob_path, overwrite=True) step = datetime.now() logging.info("Thread %s - %s stopped - Time: %d" % (thread_id, self.file_path, (step - start).seconds))
parser.add_argument('--file_path', type=str, default='./data/source.pt') parser.add_argument('--train_epochs', type=int, default=10000) parser.add_argument('--lr', default=0.01) parser.add_argument('--momentum', default=0.9) args = parser.parse_args() logging.basicConfig( level='INFO', format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logging.info('{}-way, {}-shot'.format(args.n_way, args.k_shot)) # prepare data data_loader = DataLoader() data_loader.load(args.file_path) embedding = data_loader.embedded(vocab_size=args.vocab_size, embedding_size=args.embedding_size) # model model = EncoderInductionRelation(vocab_size=args.vocab_size, embedding_size=args.embedding_size, class_num=args.n_way, hidden_size=args.hidden_size, embedding=embedding) # optimize loss_f = nn.MSELoss() optimize = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) # train training_loss = 0 for epoch in range(args.train_epochs):
return [token for token, tag in sent] def crf_extract_feature_train(_data_train, _data_test): _x_train = [get_features(s) for s in _data_train] _y_train = [get_tags(s) for s in _data_train] _x_test = [get_features(s) for s in _data_test] _y_test = [get_tags(s) for s in _data_test] return _x_train, _y_train, _x_test, _y_test if __name__ == '__main__': dloader = DataLoader('./data/') dloader.load() if mini_data: dt_train, dt_test = dloader.transform_data( sub_train=1000, sub_test=100 ) else: dt_train, dt_test = dloader.transform_data() x_train, y_train, x_test, y_test = crf_extract_feature_train(dt_train, dt_test) crf = sklearn_crfsuite.CRF( algorithm='lbfgs', c1=0.1, c2=0.1,