Пример #1
0
def init_ff_mt5():
    """
    Initializes the FlexFlow representation of the HuggingFace mT5 model.

    Returns:
        (ffmodel, input_dls, label_dl)

        ffmodel (FFModel): Compiled and initialized FlexFlow model representing
            HuggingFace mT5.
        input_dls (List[SingleDataLoader]): List consisting of the encoder
            input IDs, encoder attention mask, and decoder input IDs
            dataloaders.
        label_dl (SingleDataLoader): Label dataloader.
    """
    ffconfig = FFConfig()
    ffmodel = FFModel(ffconfig)
    mt5_torch = MT5ForConditionalGeneration.from_pretrained(
        PRETRAINED_MODEL_NAME,
    )
    input_ids, attention_mask, decoder_input_ids, labels = load_batch_ff()
    input_tensors = [
        ffmodel.create_tensor(input_ids.shape, DataType.DT_INT64),
        ffmodel.create_tensor(attention_mask.shape, DataType.DT_INT64),
        ffmodel.create_tensor(decoder_input_ids.shape, DataType.DT_INT64),
    ]
    mt5_model = PyTorchModel(
        mt5_torch,
        is_hf_model=True,
        input_names=["input_ids", "attention_mask", "decoder_input_ids"],
        batch_size=ffconfig.batch_size,
        seq_length=(input_ids.shape[1], decoder_input_ids.shape[1]),
    )
    output_tensors = mt5_model.torch_to_ff(ffmodel, input_tensors)
    ffoptimizer = SGDOptimizer(ffmodel, lr=0.01)
    ffmodel.compile(
        optimizer=ffoptimizer,
        loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY,
        metrics=[
            MetricsType.METRICS_ACCURACY,
            MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY,
        ],
    )
    input_ids_dl = ffmodel.create_data_loader(input_tensors[0], input_ids)
    attention_mask_dl = ffmodel.create_data_loader(
        input_tensors[1], attention_mask,
    )
    decoder_input_ids_dl = ffmodel.create_data_loader(
        input_tensors[2], decoder_input_ids,
    )
    # NOTE: We cast down the label tensor data to 32-bit to accomomodate the
    # label tensor's bitwidth requirement
    label_dl = ffmodel.create_data_loader(
        ffmodel.label_tensor, labels.astype("int32"),
    )
    input_dls = [input_ids_dl, attention_mask_dl, decoder_input_ids_dl]
    ffmodel.init_layers()
    return (ffmodel, input_dls, label_dl)
Пример #2
0
def top_level_task():
    ffconfig = FFConfig()
    print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" %
          (ffconfig.batch_size, ffconfig.workers_per_node, ffconfig.num_nodes))
    ffmodel = FFModel(ffconfig)

    dims = [ffconfig.batch_size, 784]
    input_tensor = ffmodel.create_tensor(dims, DataType.DT_FLOAT)

    num_samples = 60000

    model = MLP()

    ff_torch_model = PyTorchModel(model)
    output_tensors = ff_torch_model.torch_to_ff(ffmodel, [input_tensor])

    ffoptimizer = SGDOptimizer(ffmodel, 0.01)
    ffmodel.optimizer = ffoptimizer
    ffmodel.compile(loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY,
                    metrics=[
                        MetricsType.METRICS_ACCURACY,
                        MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY
                    ])
    label_tensor = ffmodel.label_tensor

    (x_train, y_train), (x_test, y_test) = mnist.load_data()

    print(x_train.shape)
    x_train = x_train.reshape(60000, 784)
    x_train = x_train.astype('float32')
    x_train /= 255
    y_train = y_train.astype('int32')
    y_train = np.reshape(y_train, (len(y_train), 1))

    dataloader_input = ffmodel.create_data_loader(input_tensor, x_train)
    dataloader_label = ffmodel.create_data_loader(label_tensor, y_train)

    ffmodel.init_layers()

    epochs = ffconfig.epochs

    ts_start = ffconfig.get_current_time()

    ffmodel.fit(x=dataloader_input, y=dataloader_label, epochs=epochs)

    ts_end = ffconfig.get_current_time()
    run_time = 1e-6 * (ts_end - ts_start)
    print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" %
          (epochs, run_time, num_samples * epochs / run_time))
Пример #3
0
def top_level_task():
    ffconfig = FFConfig()
    ffmodel = FFModel(ffconfig)
    model = MT5ForConditionalGeneration.from_pretrained("google/mt5-small")

    # Load train data as numpy arrays
    print("Loading data...")
    ids = np.load(os.path.join(NUMPY_DIR, "train_source_ids.npy"))
    mask = np.load(os.path.join(NUMPY_DIR, "train_source_mask.npy"))
    y_ids = np.load(os.path.join(NUMPY_DIR, "train_y_ids.npy"))
    lm_labels = np.load(os.path.join(NUMPY_DIR, "train_lm_labels.npy"))

    batch_size = ffconfig.batch_size
    input_ids_shape = (batch_size, ids.shape[1])
    attention_mask_shape = (batch_size, mask.shape[1])
    decoder_input_ids_shape = (batch_size, y_ids.shape[1])
    input_tensors = [
        ffmodel.create_tensor(input_ids_shape, DataType.DT_INT64),  # input_ids
        ffmodel.create_tensor(attention_mask_shape,
                              DataType.DT_INT64),  # attention_mask
        ffmodel.create_tensor(decoder_input_ids_shape,
                              DataType.DT_INT64),  # decoder_input_ids
    ]
    encoder_seq_length = ids.shape[1]
    decoder_seq_length = y_ids.shape[1]
    seq_length = (encoder_seq_length, decoder_seq_length)
    input_names = ["input_ids", "attention_mask", "decoder_input_ids"]

    print("Tracing the model...")
    hf_model = PyTorchModel(
        model,
        is_hf_model=True,
        input_names=input_names,
        batch_size=batch_size,
        seq_length=seq_length,
    )
    output_tensors = hf_model.torch_to_ff(ffmodel, input_tensors, verbose=True)
    ffoptimizer = SGDOptimizer(ffmodel, lr=0.01)

    print("Compiling the model...")
    ffmodel.compile(
        optimizer=ffoptimizer,
        loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY,
        metrics=[
            MetricsType.METRICS_ACCURACY,
            MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY,
        ],
    )

    print("Creating data loaders...")
    input_ids_dl = ffmodel.create_data_loader(input_tensors[0], ids)
    attention_mask_dl = ffmodel.create_data_loader(input_tensors[1], mask)
    decoder_input_ids_dl = ffmodel.create_data_loader(input_tensors[2], y_ids)
    # NOTE: We cast down the label tensor data to 32-bit to accommodate the
    # label tensor's required dtype
    labels_dl = ffmodel.create_data_loader(ffmodel.label_tensor,
                                           lm_labels.astype("int32"))

    print("Initializing model layers...")
    ffmodel.init_layers()

    print("Training...")
    epochs = ffconfig.epochs
    ffmodel.fit(
        x=[input_ids_dl, attention_mask_dl, decoder_input_ids_dl],
        y=labels_dl,
        batch_size=batch_size,
        epochs=epochs,
    )