Python load_data примеры, tools.datatools.load_data Python примеры использования

Пример #1

0

Показать файл

Файл: ngram_model.py Проект: AlexCrosby/AI-SOCO

def main(args):
    print('Settings:')
    print(str(args)[10:-1])

    transform_size = 1000  # Batch size to be used during transformation.

    num_features = args.features
    print('Loading data')
    train_x, train_y, dev_x, dev_y, dev_pid, test_x, test_y, test_pid = load_data(
        args.data, bytes=args.byte, preprocess=args.preprocessed)

    if args.no_recalc:  # Fit vectorizer and transform source codes to n-gram bag vectors.
        print('Calculating vectors.')
        vec = train_vectorizer(train_x, args.mode, args.ngram, num_features)
        dump(vec, 'vectorizer.joblib'
             )  # Saves the vectorizer to be used in n-gram error analysis
        batch_transform(vec, train_x, 'train', num_features, transform_size)
        batch_transform(vec, dev_x, 'dev', num_features, transform_size)
        batch_transform(vec, test_x, 'test', num_features, transform_size)
        if args.mode == 'c':
            print("Rescaling count values.")
            rescale(num_features)
    else:
        print('Vector calculation skipped, loading from pre-calculated files.')
    predictions_dev, predictions_test, history = run_model(
        args.batch, num_features, train_y, dev_y, args.skip, args.fullpredict)
    if not args.skip:
        plot_history(
            history
        )  # Plot training and validation accuracy and loss per epoch.
    if args.results:
        write_predictions(predictions_dev, dev_pid, 'dev_predictions')
        write_predictions(predictions_test, test_pid, 'test_predictions')

Пример #2

0

Показать файл

Файл: scapbulk.py Проект: AlexCrosby/AI-SOCO

def main(args):
    print('Settings:')
    print(str(args)[10:-1])
    train_x, train_y, dev_x, dev_y, _, _, _, _ = load_data(args.data, bytes=args.byte, preprocess=args.preprocessed)
    dev_orig = dev_x.copy()  # [:2500] # To do shorter set, uncomment [:2500]
    print("Dataset loaded.")
    ngram_sizes = [int(n) for n in args.ngrams.split()]
    profile_lens = [int(n) for n in args.features.split()]
    print(ngram_sizes)
    print(profile_lens)
    # ngram_sizes = [2, 3]
    # profile_lens = [-1, 200]

    for ngram_size in ngram_sizes:
        author_profiles = {}
        dev_x = dev_orig.copy()
        for i in range(len(train_x)):
            single_profile = generate_profile(train_x[i], ngram_size)

            if train_y[i] in author_profiles:
                author_profiles[train_y[i]] = append_profile(author_profiles[train_y[i]], single_profile)
            else:
                author_profiles[train_y[i]] = single_profile
        author_profiles_backup = author_profiles.copy()
        # for author in author_profiles:
        #     author_profiles[author] = dictionary_to_list(author_profiles[author])

        dev_x = [dictionary_to_list(generate_profile(x, ngram_size)) for x in dev_x]

        #############################

        for profile_len in profile_lens:
            author_profiles = author_profiles_backup.copy()  # First load all the author ngrams and fix to the correct length
            for author in author_profiles:
                if profile_len >= 0:
                    author_profiles[author] = set(dictionary_to_list(author_profiles[author])[:profile_len])
                # Each author profile is now a set of the top profile_len number of features.
                elif profile_len == -1:
                    # print("HYPER")
                    auth_dict = author_profiles[author]  # count dictionary for author
                    keys = list(auth_dict)  # list of ngrams
                    for key in keys:  # if a key only appears once, remove it
                        if auth_dict[key] == 1:
                            del auth_dict[key]
                    author_profiles[author] = set(dictionary_to_list(author_profiles[author]))

            print("Running {}@{}".format(ngram_size, profile_len))
            count_total = 0
            count_success = 0

            for i in range(len(dev_x)):
                actual = dev_y[i]
                result = compare_to_profiles(dev_x[i], author_profiles)
                count_total += 1
                if actual == result:
                    count_success += 1
            print('Total Guesses: {}'.format(count_total))
            print('Correct Guesses: {}'.format(count_success))
            print('Guess accuracy: {}'.format(count_success / count_total))
            print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')

Пример #3

0

Показать файл

Файл: bow_model.py Проект: AlexCrosby/AI-SOCO

def main(args):
    train_x, train_y, dev_x, dev_y, _, test_x, _, _ = load_data('../data_dir')
    features = 60770  # Number of unique words in training set
    if args.no_recalc:
        vectorizer = CountVectorizer(binary=True)
        # vectorizer = TfidfVectorizer()

        # Convert train, dev and test to 60770-D vectors
        train_x = vectorizer.fit_transform(train_x).astype('float32').toarray()  # Convert train set to vector
        features = train_x.shape[1]
        t = np.memmap('vectors/train.mm', dtype='float32', mode='w+', shape=(50000, features))
        t[:] = train_x[:]
        del t, train_x
        dev_x = vectorizer.transform(dev_x).astype('float32').toarray()
        d = np.memmap('vectors/dev.mm', dtype='float32', mode='w+', shape=(25000, features))
        d[:] = dev_x[:]
        del d, dev_x
        test_x = vectorizer.transform(test_x).astype('float32').toarray()
        te = np.memmap('vectors/test.mm', dtype='float32', mode='w+', shape=(25000, features))
        te[:] = test_x[:]
        del te, test_x
    t = np.memmap('vectors/train.mm', dtype='float32', mode='r', shape=(50000, features))
    d = np.memmap('vectors/dev.mm', dtype='float32', mode='r', shape=(25000, features))
    te = np.memmap('vectors/test.mm', dtype='float32', mode='r', shape=(25000, features))
    # Setup generators
    train = Generator(t, train_y, 128)
    dev = Generator(d, dev_y, 128)
    test = GeneratorX(te, 128)
    #  Model
    callback_list = [EarlyStopping(monitor='val_acc', patience=5),
                     ModelCheckpoint(filepath='word_model.h5', monitor='val_acc', save_best_only=True),
                     ReduceLROnPlateau(monitor='val_acc', factor=0.1, patience=3)]

    opt = RMSprop(learning_rate=0.001)
    model = Sequential()
    model.add(Dense(500, activation='relu', input_shape=(features,)))
    model.add(Dropout(0.5))
    model.add(Dense(1000, activation='softmax'))
    model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['acc'])
    model.summary()
    model.fit(train, epochs=1000, validation_data=dev, callbacks=callback_list)
    model.load_weights('word_model.h5')
    model.evaluate(dev)
    # Write dev and test predictions to file.
    predict_vec = np.memmap('vectors/dev_word.mm', dtype='float32', mode='w+', shape=(25000, 1000))
    predict_vec[:] = model.predict(dev)[:]
    del predict_vec
    predict_vec2 = np.memmap('vectors/test_word.mm', dtype='float32', mode='w+', shape=(25000, 1000))
    predict_vec2[:] = model.predict(test)[:]
    del predict_vec2

Пример #4

0

Показать файл

Файл: datagen.py Проект: ChrisE087/3D_cell_counting

    def __data_generation(self, filenames):
        # Load a batch of data
        X, y = datatools.load_data(
            path_to_dataset=self.path_to_dataset,
            data_list=filenames,
            input_shape=self.dim,
            standardization_mode=self.standardization_mode,
            border=self.border)

        # Debugging
        #        if self.val == False:
        #            f = open('traingen.log', 'a+')
        #            f.write('-------------------->New Epoch\n')
        #            for i in range(len(filenames)):
        #                f.write(filenames[i]+'\n')
        #            f.close()
        #        else:
        #            f = open('valgen.log', 'a+')
        #            f.write('-------------------->New Epoch\n')
        #            for i in range(len(filenames)):
        #                f.write(filenames[i]+'\n')
        #            f.close()

        #        if self.standardization_mode != None:
        #            standardize = True
        #            #print('Datagen performing standardization...')
        #        else:
        #            standardize = False
        #            #print('Datagen without standardization...')
        #        X, y = datatools.load_data2(path_to_dataset=self.path_to_dataset,
        #                                   data_list=filenames, input_shape=self.dim,
        #                                   standardize=standardize,
        #                                   border=self.border)

        # Scale the data
        y = y * self.linear_output_scaling_factor

        # Expand the dimension for channels
        X = X[:, :, :, :, np.newaxis]
        y = y[:, :, :, :, np.newaxis]

        return X, y

Пример #5

0

Показать файл

Файл: bert_tokenizer.py Проект: AlexCrosby/AI-SOCO

def main():
    train_x, _, dev_x, _, _, test_x, _, _ = load_data(r'../data_dir/',
                                                      bytes=False,
                                                      preprocess=True)
    del _

    print("Tokenizing.")
    length = 100
    start = time.time()
    processes = [
        Process(target=tokenize, args=(train_x, 'train', length)),
        Process(target=tokenize, args=(dev_x, 'dev', length)),
        Process(target=tokenize, args=(test_x, 'test', length))
    ]
    for p in processes:
        p.start()
    for p in processes:
        p.join()
    print("Finalising writing tokens to file.")
    print(time.time() - start)

Пример #6

0

Показать файл

import sys

sys.path.append('..')
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
import numpy as np
from tensorflow.keras.layers import Embedding, Dense, LSTM, Conv1D, MaxPool1D
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import plot_model
from tools.datatools import load_data

train_x, train_y, dev_x, dev_y, _, test_x, _, _ = load_data('../data_dir')

# Word CNN model.

tokenizer = Tokenizer(
    num_words=2048)  # Tokenize source codes using top 2048 words
tokenizer.fit_on_texts(train_x)
train_y = np.array(train_y)
dev_y = np.array(dev_y)
train_x = tokenizer.texts_to_sequences(train_x)
dev_x = tokenizer.texts_to_sequences(dev_x)

train_x = pad_sequences(train_x, maxlen=512)  # Pad sequences to uniform length
dev_x = pad_sequences(dev_x, maxlen=512)
callback_list = [
    EarlyStopping(monitor='val_acc', patience=5),
    ModelCheckpoint(filepath='word_model2.h5',
                    monitor='val_acc',
                    save_best_only=True),

Пример #7

0

Показать файл

Файл: scap.py Проект: AlexCrosby/AI-SOCO

def main(args):
    start = time.time()
    print('Settings:')
    print(str(args)[10:-1])
    ngram_size = args.ngram
    profile_len = args.features
    author_profiles = {}  # author n-gram profiles 0-999
    # _, train_x, train_y = prep_inputs('train', bytes=True)
    # _, dev_x, dev_y = prep_inputs('dev', bytes=True)
    train_x, train_y, dev_x, dev_y, _, _, _, _ = load_data(
        '../data_dir/', bytes=args.byte, preprocess=args.preprocessed)

    print("Dataset loaded.")
    for i in range(len(train_x)):
        single_profile = generate_profile(
            train_x[i], ngram_size)  # creates profile for each code file

        if train_y[
                i] in author_profiles:  # appends to existing author profile or creates a new one if it doesn't
            # already exist
            author_profiles[train_y[i]] = append_profile(
                author_profiles[train_y[i]], single_profile)
        else:
            author_profiles[train_y[i]] = single_profile
    for author in author_profiles:
        if profile_len >= 0:
            author_profiles[author] = set(
                dictionary_to_list(author_profiles[author])[:profile_len])
        # Each author profile is now a set of the top profile_len number of features.
        elif profile_len == -1:
            print("HYPER")
            auth_dict = author_profiles[author]  # count dictionary for author
            keys = list(auth_dict)  # list of ngrams
            for key in keys:  # if a key only appears once, remove it
                if auth_dict[key] == 1:
                    del auth_dict[key]
            author_profiles[author] = set(
                dictionary_to_list(author_profiles[author]))

    print("Author profiles ready.")

    count_total = 0
    count_success = 0

    dev_x = [
        dictionary_to_list(generate_profile(x, ngram_size)) for x in dev_x
    ]
    print('Dev ready for comparisons.')
    start_time = time.time()
    for i in range(len(dev_x)):

        actual = dev_y[i]
        result = compare_to_profiles(dev_x[i], author_profiles)
        count_total += 1
        if actual == result:
            count_success += 1
        if ((i + 1) % 250) == 0:
            percent = int((i + 1) / 250)
            print("Progress: {}%".format(percent))
            print("Accuracy so far: {}".format(count_success / count_total))
            time_secs = (
                (time.time() - start_time) / percent) * (100 - percent)
            time_mins = int(time_secs // 60)
            time_secs = str(int(time_secs % 60)).zfill(2)
            print("Time remaining: {}:{}".format(time_mins, time_secs))
    print('Total Guesses: {}'.format(count_total))
    print('Correct Guesses: {}'.format(count_success))
    print('Guess accuracy: {}'.format(count_success / count_total))
    print('n-grams: {}'.format(ngram_size))
    print('Profile length: {}'.format(profile_len))
    print(time.time() - start)

Пример #8

0

Показать файл

linear_output_scaling_factor = 409600000000
path_to_dataset = os.path.join('..', '..', '..', 'Daten',
                               'dataset_size32_stride16_split')
data_list = datatools.get_balanced_dataset(path_to_dataset=path_to_dataset,
                                           clip=5000)

# Shuffle the dataset
np.random.shuffle(data_list)

train_list = data_list[0:10000]
val_list = data_list[10000:15000]
test_list = data_list[15000:200000]

X_train, y_train = datatools.load_data(path_to_dataset=path_to_dataset,
                                       data_list=train_list,
                                       input_shape=(32, 32, 32),
                                       standardization_mode='per_sample',
                                       border=None)
X_val, y_val = datatools.load_data(path_to_dataset=path_to_dataset,
                                   data_list=val_list,
                                   input_shape=(32, 32, 32),
                                   standardization_mode='per_sample',
                                   border=None)
X_test, y_test = datatools.load_data(path_to_dataset=path_to_dataset,
                                     data_list=test_list,
                                     input_shape=(32, 32, 32),
                                     standardization_mode=None,
                                     border=None)

# Expand the dimensions for channels
X_train = X_train[:, :, :, :, np.newaxis]

Пример #9

0

Показать файл

def main(args):
    start = time.time()
    print('Settings:')
    print(str(args)[10:-1])
    ngram_size = args.ngram

    profile_len = args.features
    author_profiles = {}  # author n-gram profiles 0-999
    # _, train_x, train_y = prep_inputs('train', bytes=True)
    # _, dev_x, dev_y = prep_inputs('dev', bytes=True)
    train_x, train_y, dev_x, dev_y, _, _, _, _ = load_data(
        '../data_dir/', bytes=args.byte, preprocess=args.preprocessed)

    print("Dataset loaded.")
    for i in range(len(train_x)):
        single_profile = generate_profile(
            train_x[i], ngram_size)  # creates profile for each code file

        if train_y[
                i] in author_profiles:  # appends to existing author profile or creates a new one if it doesn't
            # already exist
            author_profiles[train_y[i]] = append_profile(
                author_profiles[train_y[i]], single_profile)
        else:
            author_profiles[train_y[i]] = single_profile

    # for author in author_profiles:
    #     author_profiles[author] = set(dictionary_to_list(author_profiles[author])[:profile_len])

    for author in author_profiles:
        if profile_len >= 0:
            author_profiles[author] = set(
                dictionary_to_list(author_profiles[author])[:profile_len])
        # Each author profile is now a set of the top profile_len number of features.
        elif profile_len == -1:
            auth_dict = author_profiles[author]  # count dictionary for author
            keys = list(auth_dict)  # list of ngrams
            for key in keys:  # if a key only appears once, remove it
                if auth_dict[key] == 1:
                    del auth_dict[key]
            author_profiles[author] = set(
                dictionary_to_list(author_profiles[author]))
    # lowest=999999999
    # highest=0
    # cx=0
    # for a in author_profiles:
    #     cx+=len(author_profiles[a])
    #     lowest=min(lowest,len(author_profiles[a]))
    #     highest = max(highest, len(author_profiles[a]))
    # print(cx)
    # print(lowest)
    # print(highest)
    # exit()

    print("Author profiles ready.")

    dev_x = [
        dictionary_to_list(generate_profile(x, ngram_size)) for x in dev_x
    ]
    print('Dev ready for comparisons.')
    start_time = time.time()

    size = int(25000 / cpu_count())

    smaller_chunks = [dev_x[x:x + size] for x in range(0, len(dev_x), size)]
    labels = [dev_y[x:x + size] for x in range(0, len(dev_y), size)]
    with ProcessPoolExecutor() as executor:
        results = [
            executor.submit(calculate_profiles, smaller_chunks[i], labels[i],
                            author_profiles, i)
            for i in range(len(smaller_chunks))
        ]

        total = 0
        success = 0
        for r in as_completed(results):
            r = r.result()
            success += r[0]
            total += r[1]
    print(success)
    print(total)
    print(success / total)
    print(time.time() - start_time)

Пример #10

0

Показать файл

Файл: 02-train_net.py Проект: ChrisE087/3D_cell_counting

    standardization_mode=standardization_mode,
    linear_output_scaling_factor=linear_output_scaling_factor,
    border=border)

history = cnn.fit_generator(epochs=epochs,
                            train_generator=train_generator,
                            val_generator=val_generator,
                            callbacks=callbacks)

#%%############################################################################
# Evaluate the model
###############################################################################
# Load unstandardized test data
X_test_data, y_test_data = datatools.load_data(path_to_dataset=path_to_dataset,
                                               data_list=test_list,
                                               input_shape=data_shape,
                                               standardization_mode=None,
                                               border=border)
if evaluate == True:
    test_loss = cnn.evaluate_model(X_test=np.expand_dims(X_test_data, axis=4),
                                   y_test=np.expand_dims(y_test_data, axis=4),
                                   batch_size=batch_size)
    print(test_loss)

#%%############################################################################
# Save the model
###############################################################################

cnn.save_model_json(model_export_path, 'model_json')
cnn.save_model_weights(model_export_path, 'model_weights')
cnn.save_model_single_file(model_export_path, 'model_single')

Пример #11

0

Показать файл

Файл: scapbulkmulti.py Проект: AlexCrosby/AI-SOCO

def main(args):
    print('Settings:')
    print(str(args)[10:-1])
    train_x, train_y, dev_x, dev_y, _, _, _, _ = load_data(
        args.data, bytes=args.byte, preprocess=args.preprocessed)
    dev_orig = dev_x.copy()  # [:2500]  # To do shorter set, uncomment [:2500]
    print("Dataset loaded.")
    ngram_sizes = [int(n) for n in args.ngrams.split()]
    profile_lens = [int(n) for n in args.features.split()]
    print(ngram_sizes)
    print(profile_lens)

    for ngram_size in ngram_sizes:
        author_profiles = {}
        dev_x = dev_orig.copy()
        for i in range(len(train_x)):
            single_profile = generate_profile(train_x[i], ngram_size)

            if train_y[i] in author_profiles:
                author_profiles[train_y[i]] = append_profile(
                    author_profiles[train_y[i]], single_profile)
            else:
                author_profiles[train_y[i]] = single_profile
        author_profiles_backup = author_profiles.copy()

        dev_x = [
            dictionary_to_list(generate_profile(x, ngram_size)) for x in dev_x
        ]

        #############################

        for profile_len in profile_lens:
            author_profiles = author_profiles_backup.copy(
            )  # First load all the author ngrams and fix to the correct length
            for author in author_profiles:
                if profile_len >= 0:
                    author_profiles[author] = set(
                        dictionary_to_list(
                            author_profiles[author])[:profile_len])
                # Each author profile is now a set of the top profile_len number of features.
                elif profile_len == -1:
                    # print("HYPER")
                    auth_dict = author_profiles[
                        author]  # count dictionary for author
                    keys = list(auth_dict)  # list of ngrams
                    for key in keys:  # if a key only appears once, remove it
                        if auth_dict[key] == 1:
                            del auth_dict[key]
                    author_profiles[author] = set(
                        dictionary_to_list(author_profiles[author]))
            print("Running {}@{}".format(ngram_size, profile_len))

            start_time = time.time()
            processes = cpu_count()
            size = int(25000 / processes)

            smaller_chunks = [
                dev_x[x:x + size] for x in range(0, len(dev_x), size)
            ]
            labels = [dev_y[x:x + size] for x in range(0, len(dev_y), size)]
            with ProcessPoolExecutor() as executor:
                results = [
                    executor.submit(calculate_profiles, smaller_chunks[i],
                                    labels[i], author_profiles)
                    for i in range(len(smaller_chunks))
                ]

                total = 0
                success = 0
                for r in as_completed(results):
                    r = r.result()
                    success += r[0]
                    total += r[1]
                executor.shutdown(wait=True)

            # print(success)
            # print(total)
            # print(success / total)
            print(time.time() - start_time)
            print('Total Guesses: {}'.format(total))
            print('Correct Guesses: {}'.format(success))
            print('Guess accuracy: {}'.format(success / total))
            print(
                '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'
            )

Пример #12

0

Показать файл

Файл: stylometry_model.py Проект: AlexCrosby/AI-SOCO

def main(args):
    print('Settings:')
    print(str(args)[10:-1])

    length = 136
    print('Loading data...')

    if args.no_recalc:
        train_x, _, dev_x, _, _, test_x, _, _ = load_data(
            '../data_dir'
        )  # Makes use of both raw and preprocessed source codes.
        train_x2, _, dev_x2, _, _, test_x2, _, _ = load_data('../data_dir',
                                                             preprocess=True)
        print("Extracting stylometric features...")
        vec = Vectorizer(
            'lexical'
        )  # Runs the stylometry_vectorizer from the vectorizer.py file so characters can be grabbed
        # simultaneously.
        train_x = vec.vectorize(train_x, train_x2)  # Vectorize all 3 subsets
        dev_x = vec.vectorize(dev_x, dev_x2)
        test_x = vec.vectorize(test_x, test_x2)
        del train_x2, dev_x2, test_x2
        scaler = MinMaxScaler()  # Rescale values between 0 and 1.
        print("Rescaling...")
        train_x = scaler.fit_transform(train_x)
        dev_x = scaler.transform(dev_x)
        test_x = scaler.transform(test_x)
        length = len(train_x[0])
        print(length)
        trainmm = np.memmap('vectors/train.mm',
                            dtype='float32',
                            mode='w+',
                            shape=(50000, length))
        trainmm[:] = train_x[:]
        devmm = np.memmap('vectors/dev.mm',
                          dtype='float32',
                          mode='w+',
                          shape=(25000, length))
        devmm[:] = dev_x[:]
        testmm = np.memmap('vectors/test.mm',
                           dtype='float32',
                           mode='w+',
                           shape=(25000, length))
        testmm[:] = test_x[:]
        del trainmm, devmm, testmm, train_x, dev_x, test_x  # Save and flush all vectors.
        print("Finished building vectors.")
    # Load data from file.
    train_y, dev_y, _ = load_all_labels('../data_dir')
    dev = np.array(
        np.memmap('vectors/dev.mm',
                  dtype='float32',
                  mode='r',
                  shape=(25000, length)))
    test = np.array(
        np.memmap('vectors/test.mm',
                  dtype='float32',
                  mode='r',
                  shape=(25000, length)))

    train = np.array(
        np.memmap('vectors/train.mm',
                  dtype='float32',
                  mode='r',
                  shape=(50000, length)))
    # Model.
    callback_list = [
        EarlyStopping(monitor='val_acc', patience=10),
        ModelCheckpoint(filepath='style_model.h5',
                        monitor='val_acc',
                        save_best_only=True),
        ReduceLROnPlateau(monitor='val_acc', factor=0.1, patience=5)
    ]
    model = Sequential()
    model.add(Dense(500, activation='relu', input_shape=(136, )))
    model.add(Dropout(0.3))
    model.add(Dense(500, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(1000, activation='softmax'))
    opt = RMSprop(learning_rate=0.001)
    model.compile(optimizer=opt,
                  loss='sparse_categorical_crossentropy',
                  metrics=['acc'])
    model.summary()
    model.fit(train,
              train_y,
              epochs=1000,
              batch_size=250,
              validation_data=(dev, dev_y),
              shuffle=True,
              callbacks=callback_list)
    model = load_model('style_model.h5')
    print(model.evaluate(dev, dev_y))
    # Generate predictions.
    predict_vec = np.memmap('vectors/dev_style.mm',
                            dtype='float32',
                            mode='w+',
                            shape=(25000, 1000))
    predict_vec[:] = model.predict(dev)[:]
    del predict_vec

    predict_vec2 = np.memmap('vectors/test_style.mm',
                             dtype='float32',
                             mode='w+',
                             shape=(25000, 1000))
    predict_vec2[:] = model.predict(test)[:]
    del predict_vec2

Python load_data примеры использования