Exemplo n.º 1
0
def do_train(args):

    pretrained_embeddings, token2id = util.load_word_embedding(input_file=args.vectors, cache='cache')
    stopwords = util.load_stopwords()
    stopwords = None
    train_data = util.Data(args.data_train, args.ltp_data, stopwords=stopwords)
    dev_data = util.Data(args.data_dev, args.ltp_data, max_length=train_data.max_length, stopwords=stopwords)
    config = Config(args)
    print(train_data.max_length)
    # 配置参数. 测试集如何设置?
    _, config.max_length = train_data.get_metadata()
    config.n_classes = len(train_data.LABELS)
    config.n_word_embed_size = len(pretrained_embeddings[0])


    with tf.Graph().as_default():
        logger.info("Building model...",)
        start = time.time()
        model = Classifier(pretrained_embeddings, token2id, config)
        logger.info("took %.2f seconds", time.time() - start)
        init = tf.global_variables_initializer()
        saver = tf.train.Saver()
        gpu_options = tf.GPUOptions(allow_growth=True)
        with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as session:
            
            session.run(init)
            score = model.fit(session, saver, train_data, dev_data) 
            print("\n")
            logger.info("training finished, took %.2f seconds with P: %.2f", time.time() - start, score)
Exemplo n.º 2
0
def do_predict(args):

    pretrained_embeddings, token2id = util.load_word_embedding(input_file=args.vectors, cache='cache')
    stopwords = util.load_stopwords()
    stopwords = None
    train_data = util.Data(args.data_train, args.ltp_data, stopwords=stopwords)
    test_data = util.Data(args.data_test, args.ltp_data, max_length=train_data.max_length, stopwords=stopwords)
    config = Config(args)
    # 配置参数. 测试集如何设置?
    _, config.max_length = train_data.get_metadata()
    config.n_classes = len(train_data.LABELS)
    config.n_word_embed_size = len(pretrained_embeddings[0])
    config.batch_size = len(test_data.data)
    

    with tf.Graph().as_default():
        logger.info("Building model...",)
        start = time.time()
        model = Classifier(pretrained_embeddings, token2id, config)
        logger.info("took %.2f seconds", time.time() - start)
        init = tf.global_variables_initializer()
        saver = tf.train.Saver()
        gpu_options = tf.GPUOptions(allow_growth=True)
        with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as session:
            
            session.run(init)
            saver.restore(session, model.config.output_model)
            labels, prediction = model.output(session, test_data, None)
            print(labels)
            print(prediction)
            
            test_data.update_labels(prediction).save_result()
Exemplo n.º 3
0
def svhn_test(model, num_label=500):
    # Load SVHN dataset
    # load data on the cpu
    with tf.device('/CPU:0'):
        X_train, y_train = tfds.as_numpy(
            tfds.load('svhn_cropped',
                      split='train',
                      as_supervised=True,
                      batch_size=-1))
        X_test, y_test = tfds.as_numpy(
            tfds.load('svhn_cropped',
                      split='test',
                      as_supervised=True,
                      batch_size=-1))

        # one-hot encode the outs
        y_train = np.eye(10)[y_train.reshape(-1)]
        y_test = np.eye(10)[y_test.reshape(-1)]

        # cast it all to floats for image augmentation, rescale to [0,1]
        X_train = X_train.astype('float32') / np.float(255.0)
        X_test = X_test.astype('float32') / np.float(255.0)

        # whiten the data or apply zca
        X_train = whiten_norm(X_train)
        X_test = whiten_norm(X_test)

        print('loaded svfn', X_train.shape, X_test.shape)
        # Setup test set
        test = util.Data(X_test, y_test, None)

        # Split training test into labeled and unlabeled
        train = util.label_unlabel_split(X_train, y_train, num_label, 10)

        # Split training data into training and validation
        (train, valid) = util.train_test_valid_split(train.X,
                                                     train.y,
                                                     split=(0.9, 0.1),
                                                     U=train.U)

        print('TR:', train.X.shape, train.y.shape, train.U.shape)
        print('v', valid.X.shape, valid.y.shape)
    # fit on the gpu
    with tf.device('/GPU:0'):
        # Train model using training and validation sets
        hist = model.fit(train, valid)

    print('evaluating on (subset) of test set...')
    with tf.device('/CPU:0'):
        # Test the model using test set
        y_pred = model.predict(test.X[0:1000])

        acc = float(
            tf.reduce_mean(
                tf.keras.metrics.categorical_accuracy(test.y[0:1000], y_pred)))
        print(model.name, ' : acc:', acc)

    return model, {'hist': hist, 'acc': acc}
Exemplo n.º 4
0
    def make_request(self, is_part_upload=False, part_index=None,
                     file_location=None, save_path_parent=None,
                     file_name=None, is_range_download=False,
                     part_download_queue=None, range_start=None,
                     stop_flag_obj=None, is_last_retry=False):
        has_none_been_put = False
        chunk_size = 65536
        peer_addr = self.my_http_connection.host
        local_addr = ''
        http_response = None
        recv_body = ''
        self.response.start_time = time.time()
        try:
            self.my_http_connection.connection.putrequest(
                self.obs_request.method, self.obs_request.url, skip_host=1)
            # send headers
            for k in self.obs_request.headers.keys():
                if isinstance(self.obs_request.headers[k], list):
                    for i in self.obs_request.headers[k]:
                        self.my_http_connection.connection.putheader(k, i)
                else:
                    self.my_http_connection.connection.putheader(k,
                                                                 self.obs_request.headers[
                                                                     k])
            self.my_http_connection.connection.endheaders()
            local_addr = str(
                self.my_http_connection.connection.sock._sock.getsockname())
            peer_addr = str(
                self.my_http_connection.connection.sock._sock.getpeername())
            logging.debug(
                'Request:[%s], conn:[%s->%s], sendURL:[%s], sendHeaders:[%r], sendContent:[%s]' % (
                    self.obs_request.request_type, local_addr, peer_addr,
                    self.obs_request.url, self.obs_request.headers,
                    self.obs_request.send_content[0:1024]))

            if self.obs_request.content_length > 0 and not self.obs_request.send_content:
                if is_part_upload:
                    with open(file_location, 'rb') as obj_to_put:
                        obj_to_put.seek(part_index)
                        while self.response.send_bytes < self.obs_request.content_length:
                            if stop_flag_obj.flag:
                                raise Exception(
                                    'Stop Because Some Part_upload Failed')
                            if self.obs_request.content_length - self.response.send_bytes >= chunk_size:
                                chunk = obj_to_put.read(chunk_size)
                                self.response.send_bytes += chunk_size
                            else:
                                chunk = obj_to_put.read(
                                    self.obs_request.content_length -
                                    self.response.send_bytes)
                                self.response.send_bytes += (
                                    self.obs_request.content_length -
                                    self.response.send_bytes)
                            self.my_http_connection.connection.send(chunk)
                else:
                    with open(file_location, 'rb') as obj_to_put:
                        while self.response.send_bytes < self.obs_request.content_length:
                            if self.obs_request.content_length - self.response.send_bytes >= chunk_size:
                                chunk = obj_to_put.read(chunk_size)
                                self.response.send_bytes += chunk_size
                            else:
                                chunk = obj_to_put.read(
                                    self.obs_request.content_length -
                                    self.response.send_bytes)
                                self.response.send_bytes += (
                                    self.obs_request.content_length -
                                    self.response.send_bytes)
                            self.my_http_connection.connection.send(chunk)
            else:
                self.my_http_connection.connection.send(
                    self.obs_request.send_content)
                self.response.send_bytes += len(self.obs_request.send_content)
            wait_response_time_start = time.time()
            logging.debug('total send bytes: %d, content-length: %d' % (
                self.response.send_bytes, self.obs_request.content_length))
            # get response
            http_response = self.my_http_connection.connection.getresponse(
                buffering=True)
            wait_response_time = time.time() - wait_response_time_start
            logging.debug('get response, wait time %.3f' % wait_response_time)
            # read the body
            content_length = int(
                http_response.getheader('Content-Length', '-1'))
            logging.debug('get ContentLength: %d' % content_length)
            self.response.content_length = content_length
            self.response.request_id = http_response.getheader(
                'x-amz-request-id', '9999999999999998')
            self.response.id2 = http_response.getheader('x-amz-id-2', 'None')
            if http_response.status < 300 and self.obs_request.request_type == 'GetObject':
                if not is_range_download:
                    file_path = os.path.join(save_path_parent, file_name)
                    save_path = os.path.dirname(file_path)
                    if not os.path.isdir(save_path):
                        try:
                            os.makedirs(save_path)
                        except:
                            pass
                    with open(file_path, 'wb') as f:
                        try:
                            while True:
                                chunk = http_response.read(65536)
                                if not chunk:
                                    logging.info('chunk is empty, break cycle')
                                    recv_body = '[receive content], length: %d' % self.response.recv_bytes
                                    break
                                self.response.recv_bytes += len(chunk)
                                f.write(chunk)
                        except Exception, e:
                            logging.error('download file(%s) error(%s)' % (
                                self.obs_request.key, e))
                            try:
                                os.remove(file_path)
                            except Exception:
                                pass
                else:
                    count = 0
                    chunk_size = 65536
                    while not stop_flag_obj.flag:
                        chunk = http_response.read(chunk_size)
                        if not chunk:
                            logging.info('chunk is empty, break cycle')
                            recv_body = '[receive content], length: %d' % self.response.recv_bytes
                            break
                        self.response.recv_bytes += len(chunk)
                        offset = range_start + chunk_size * count
                        data = util.Data(chunk=chunk, offset=offset)
                        while not stop_flag_obj.flag:
                            try:
                                part_download_queue.put(data, block=True,
                                                        timeout=1)
                                break
                            except Full:
                                pass
                        else:
                            logging.info(
                                'stop put data, range_start: %d' % range_start)
                            raise Exception(
                                'Stop Because Some Range_download Failed')
                        count += 1
            else:
Exemplo n.º 5
0
def cifar10_test(model, num_label=4000):

    # load data on the cpu
    with tf.device('/CPU:0'):

        # Load in training and test data
        X_train, y_train = tfds.as_numpy(
            tfds.load('cifar10',
                      split='train',
                      as_supervised=True,
                      batch_size=-1))  #cifar_10.load_cifar_10()
        X_test, y_test = tfds.as_numpy(
            tfds.load('cifar10',
                      split='test',
                      as_supervised=True,
                      batch_size=-1))

        # one-hot encode the outs
        y_train = np.eye(10)[y_train.reshape(-1)]
        # print('y_train sample:', y_train[0:10])
        y_test = np.eye(10)[y_test.reshape(-1)]
        # print('y_test sample:', y_test[0:10])

        # cast it all to floats for image augmentation, rescale to [0,1]
        X_train = X_train.astype('float32') / np.float(255.0)
        X_test = X_test.astype('float32') / np.float(255.0)

        # whiten the data or apply zca
        X_train = whiten_norm(X_train)
        X_test = whiten_norm(X_test)
        # X_train = whiten_norm(X_train)
        # X_test  = whiten_norm(X_test)

        # X_train, y_train, X_test, y_test = cifar_10.load_cifar_10()

        print('loaded cifar10', X_train.shape, X_test.shape)
        # Setup test set
        test = util.Data(X_test, y_test, None)

        # Split training test into labeled and unlabeled
        train = util.label_unlabel_split(X_train, y_train, num_label, 10)

        # Split training data into training and validation
        (train, valid) = util.train_test_valid_split(train.X,
                                                     train.y,
                                                     split=(0.9, 0.1),
                                                     U=train.U)

        # One-hot encode cifar_10.y_train and cifar_10.y_test?
        ## ^^ yes. Done.
        print('TR:', train.X.shape, train.y.shape, train.U.shape)
        print('v', valid.X.shape, valid.y.shape)

    # fit on the gpu
    with tf.device('/GPU:0'):

        # Train model using training and validation sets
        hist = model.fit(train, valid)

    print('evaluating on (subset) of test set...')
    with tf.device('/CPU:0'):
        # Test the model using test set
        y_pred = model.predict(test.X[0:1000])

        # if outputs are one-hot encoded, need to decode for correctness test
        # wrong = util.percent_wrong(y_pred, test.y)
        # acc = 1.0 - wrong
        acc = float(
            tf.reduce_mean(
                tf.keras.metrics.categorical_accuracy(test.y[0:1000], y_pred)))
        print(model.name, ' : acc:', acc)

    return model, {'hist': hist, 'acc': acc}
# TRAIN_BUFFER_SIZE = len(input_tensor_train)
TEST_BUFFER_SIZE = len(input_tensor_test)
BATCH_SIZE = 64
# TRAIN_N_BATCH = TRAIN_BUFFER_SIZE // BATCH_SIZE
TEST_N_BATCH = TEST_BUFFER_SIZE // BATCH_SIZE

embedding_dim = 256
units = 1024
vocab_inp_size = len(globalVocab.word2idx)
target_size = len(emotion_dict)

torch.cuda.set_device(0)

# train_dataset = util.Data(input_tensor_train, target_tensor_train)
test_dataset = util.Data(input_tensor_test, target_tensor_test)

# train_dataset = DataLoader(train_dataset, batch_size = BATCH_SIZE,
# 					 drop_last=True,
# 					 shuffle=True)
test_dataset = DataLoader(test_dataset,
                          batch_size=BATCH_SIZE,
                          drop_last=False,
                          shuffle=False)

# print(val_dataset.batch_size)

# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# torch.cuda.set_device(0)
Exemplo n.º 7
0
import util

amazon = "amazon_cells_labelled.txt"
imdb = "imdb_labelled.txt"
yelp = "yelp_labelled.txt"

v2 = util.Data(amazon, imdb, yelp, quiet=False)
res = v2.test()
print(res)
print(len(input_tensor_test))

TRAIN_BUFFER_SIZE = len(input_tensor_train)
TEST_BUFFER_SIZE = len(input_tensor_test)
BATCH_SIZE = 64
TRAIN_N_BATCH = TRAIN_BUFFER_SIZE // BATCH_SIZE
TEST_N_BATCH = TEST_BUFFER_SIZE // BATCH_SIZE

embedding_dim = 256
units = 1024
vocab_inp_size = len(globalVocab.word2idx)
target_size = len(emotion_dict)

torch.cuda.set_device(0)

train_dataset = util.Data(input_tensor_train, target_tensor_train)
test_dataset = util.Data(input_tensor_test, target_tensor_test)

train_dataset = DataLoader(train_dataset,
                           batch_size=BATCH_SIZE,
                           drop_last=True,
                           shuffle=True)
test_dataset = DataLoader(test_dataset,
                          batch_size=BATCH_SIZE,
                          drop_last=True,
                          shuffle=True)

# print(val_dataset.batch_size)

# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")