예제 #1
0
    def run(self):
        start = datetime.now()
        thread_id = start.strftime('%Y%m%d%H%M%S')
        logging.info("Thread %s - %s started" % (thread_id, self.file_path))

        sftp_reader = SFTPReader(self.host, self.port, self.username, self.password,
                                 self.ssh_key_path, self.sftp_max_retry)
        byte_io = sftp_reader.load_file(self.file_path)

        sftp_reader.close()
        step = datetime.now()
        logging.info("Thread %s - %s loaded data - Time: %d" % (thread_id, self.file_path, (step - start).seconds))

        if self.try_send_data:
            data_loader = DataLoader()

            processed_df = data_loader.load(byte_io, self.columns_seletion, fill_na_dict=self.fill_na_dict,
                                            concat_dict=self.concat_dict, rename_dict=self.rename_dict)
            step = datetime.now()
            logging.info("Thread %s - %s parsed data - Time: %d" % (thread_id, self.file_path, (step - start).seconds))

            event_sender = EventSender(self.connection_string, self.eventhub_name, self.max_event_per_batch,
                                       self.eventhub_max_retry, self.metadata, self.zvelo_helper)
            event_sender.send(processed_df)
            event_sender.close()

            step = datetime.now()
            logging.info("Thread %s - %s sent data - Time: %d" % (thread_id, self.file_path, (step - start).seconds))

        # Copy raw data to ADLS
        if (not self.blob_name == False) or (not self.blob_key == False):
            blob_helper = BlobHelper(self.blob_name, self.blob_key)
            file_name = self.file_path[self.file_path.rindex("/") + 1 : ]
            blob_path = "%s/%s" % (self.blob_path, file_name)
            byte_io.seek(0)
            blob_helper.upload_data(byte_io, self.blob_container, blob_path, overwrite=True)

        step = datetime.now()
        logging.info("Thread %s - %s stopped - Time: %d" % (thread_id, self.file_path, (step - start).seconds))
예제 #2
0
parser.add_argument('--file_path', type=str, default='./data/source.pt')
parser.add_argument('--train_epochs', type=int, default=10000)
parser.add_argument('--lr', default=0.01)
parser.add_argument('--momentum', default=0.9)

args = parser.parse_args()

logging.basicConfig(
    level='INFO',
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')

logging.info('{}-way, {}-shot'.format(args.n_way, args.k_shot))

# prepare data
data_loader = DataLoader()
data_loader.load(args.file_path)
embedding = data_loader.embedded(vocab_size=args.vocab_size,
                                 embedding_size=args.embedding_size)
# model
model = EncoderInductionRelation(vocab_size=args.vocab_size,
                                 embedding_size=args.embedding_size,
                                 class_num=args.n_way,
                                 hidden_size=args.hidden_size,
                                 embedding=embedding)
# optimize
loss_f = nn.MSELoss()
optimize = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)

# train
training_loss = 0
for epoch in range(args.train_epochs):
예제 #3
0
    return [token for token, tag in sent]


def crf_extract_feature_train(_data_train, _data_test):
    _x_train = [get_features(s) for s in _data_train]
    _y_train = [get_tags(s) for s in _data_train]

    _x_test = [get_features(s) for s in _data_test]
    _y_test = [get_tags(s) for s in _data_test]

    return _x_train, _y_train, _x_test, _y_test


if __name__ == '__main__':
    dloader = DataLoader('./data/')
    dloader.load()

    if mini_data:
        dt_train, dt_test = dloader.transform_data(
            sub_train=1000,
            sub_test=100
        )
    else:
        dt_train, dt_test = dloader.transform_data()

    x_train, y_train, x_test, y_test = crf_extract_feature_train(dt_train, dt_test)

    crf = sklearn_crfsuite.CRF(
        algorithm='lbfgs',
        c1=0.1,
        c2=0.1,