示例#1
0
文件: 22.py 项目: TNMR-m/NLP100knock
def art_uk(file_name):
    with g_open(file_name, 'rt', encoding = 'utf-8') as f0:
        for f0_line in f0:
            f0_dict = json.loads(f0_line)
            if f0_dict['title'] == 'イギリス':
                article_uk = f0_dict['text']
    
    return article_uk
def load_mnist(path=join(getcwd(), 'Data'), kind='train'):
    labels_path = join(path, f'{kind}-labels-idx1-ubyte.gz')
    images_path = join(path, f'{kind}-images-idx3-ubyte.gz')

    with g_open(labels_path, 'rb') as lbpath:
        labels = np.frombuffer(lbpath.read(), dtype=np.uint8, offset=8)

    with g_open(images_path, 'rb') as imgpath:
        images = np.frombuffer(imgpath.read(), dtype=np.uint8,
                               offset=16).reshape(len(labels), 784)

    size = 1000
    index_label2 = np.where((labels == 2))
    x_label2 = images[index_label2][:size, :].astype('float64')

    index_label4 = np.where((labels == 4))
    x_label4 = images[index_label4][:size, :].astype('float64')

    index_label6 = np.where((labels == 6))
    x_label6 = images[index_label6][:size, :].astype('float64')

    y_label2 = np.array([2] * 1000)
    y_label4 = np.array([4] * 1000)
    y_label6 = np.array([6] * 1000)

    label2_data = np.append(x_label2, y_label2.reshape(1000, 1), axis=1)
    label4_data = np.append(x_label4, y_label4.reshape(1000, 1), axis=1)
    label6_data = np.append(x_label6, y_label6.reshape(1000, 1), axis=1)

    all_data = np.append(np.append(label2_data, label4_data, axis=0),
                         label6_data,
                         axis=0)

    data_x = all_data[:, :-1]
    data_y = all_data[:, -1]
    train_x, test_x, train_y, test_y = train_test_split(data_x,
                                                        data_y,
                                                        test_size=0.2,
                                                        random_state=SEED)

    scaler = StandardScaler()
    train_x = scaler.fit_transform(train_x)
    test_x = scaler.transform(test_x)

    return train_x, test_x, train_y, test_y
示例#3
0
def load_mnist(path=join(getcwd(), 'Data'), kind='train'):
    labels_path = join(path, f'{kind}-labels-idx1-ubyte.gz')
    images_path = join(path, f'{kind}-images-idx3-ubyte.gz')

    with g_open(labels_path, 'rb') as lbpath:
        labels = np.frombuffer(lbpath.read(), dtype=np.uint8, offset=8)

    with g_open(images_path, 'rb') as imgpath:
        images = np.frombuffer(imgpath.read(), dtype=np.uint8,
                               offset=16).reshape(len(labels), 784)

    index_label2 = np.where((labels == 2))
    x_label2 = images[index_label2][:1000, :].astype('float64')
    y_label2 = labels[index_label2][:1000].astype('float64')

    index_label4 = np.where((labels == 4))
    x_label4 = images[index_label4][:1000, :].astype('float64')
    y_label4 = labels[index_label4][:1000].astype('float64')

    # converting labels of classes 2 and 4 into +1 and -1, respectively
    y_label2 = y_label2 / 2.0
    y_label4 = y_label4 / -4.0

    x_label_24 = np.vstack((x_label2, x_label4))
    y_label_24 = np.concatenate((y_label2, y_label4))

    # Normalize the data
    scaler = StandardScaler()
    scaler.fit(x_label_24)
    x_label_24 = scaler.transform(x_label_24)

    data = train_test_split(x_label_24,
                            y_label_24,
                            test_size=0.3,
                            random_state=SEED)

    x_train24, x_test24, y_train24, y_test24 = data

    return x_train24, y_train24, x_test24, y_test24
def memos(event, context):
    print("Loading Function...")

    bucket = event['Records'][0]['s3']['bucket']['name']
    key = unquote_plus(event['Records'][0]['s3']['object']['key']).decode('utf8')

    S3.download_file(bucket, key, '/tmp/%s' % (key.split('/')[-1]))

    # If queue already exists it will just fetch the url of the queue.
    queue_url = SQS.create_queue(QueueName=QUEUE_NAME,
                                 Attributes={'MessageRetentionPeriod': MESSAGE_RETENTION_PERIOD})['QueueUrl']

    with g_open('/tmp/%s' % (key.split('/')[-1]), 'r+') as f:
        batch_of_mess = []
        for identifier, content in enumerate(f.readlines()):
            # Gets rid of odd unicode characters that SQS does not like and the message would fail to send.
            batch_of_mess.append({'Id': str(identifier),
                                  'MessageBody': ''.join(l for l in content if l in printable)})

            # Maximum size of a batch is 256 KB and/or 10 messages.
            if getsizeof(batch_of_mess) >= 225 or len(batch_of_mess) == 10:

                message = SQS.send_message_batch(QueueUrl=queue_url,
                                                 Entries=batch_of_mess)

                # SQS does not throw up an error if a message fails to send.
                if 'Failed' in message:
                    print(message)

                del batch_of_mess[:]
        # Takes the remainder of the messages and sends them to SQS.
        if batch_of_mess:
            last_message = SQS.send_message_batch(QueueUrl=queue_url,
                                                  Entries=batch_of_mess)
            if 'Failed' in last_message:
                print(last_message)

    print("Done!")