Python random_split示例，utils.random_split Python示例

示例#1

0

显示文件

文件： data.py 项目： AI-Core/AiCorExample

 def __init__(self, args):
     self.args = args
     data = torchvision.datasets.FakeData(
         size=args.size,
         image_size=(1, 32, 32),
         num_classes=args.num_classes,
         transform=torchvision.transforms.ToTensor(),
     )
     self.validation_data, self.train_data = utils.random_split(
         data, args.validation_percent)

示例#2

0

显示文件

 def generate_sets(self):
     dataset_classes = glob.glob(self.path + "/*")
     for folder in dataset_classes:
         if "/" in folder:
             class_name = folder.split("/")[-1]
         else:
             class_name = folder.split("\\")[-1]
         self.classes.append(class_name)
         class_files = glob.glob(folder + "/*.JPEG")
         test_size = len(class_files) / 3
         train, test = utils.random_split(class_files, test_size)
         self.train_set.append(train)
         self.test_set.append(test)

示例#3

0

显示文件

文件： dataset.py 项目： HenrYxZ/pattern-recognition

	def generate_sets(self):
		dataset_classes = glob.glob(self.path + "/*")
		for folder in dataset_classes:
			if "/" in folder:
				class_name = folder.split("/")[-1]
			else:
				class_name = folder.split("\\")[-1]
			self.classes.append(class_name)
			class_files = glob.glob(folder + "/*.JPEG")
			test_size = len(class_files) / 3
			train, test = utils.random_split(class_files, test_size)
			self.train_set.append(train)
			self.test_set.append(test)

示例#4

0

显示文件

文件： dnn.py 项目： wohlert/atone

    merge_nets = merge(convnets, mode="concat")
    reshape = Reshape((128, 7))(merge_nets)
    lstm = LSTM(128, input_dim=frames, input_length=128, return_sequences=False, activation="tanh")(reshape)
    # dropout1 = Dropout(0.5)(lstm)
    dense1 = Dense(512, activation="relu")(lstm)
    # dropout2 = Dropout(0.5)(dense1)
    prediction = Dense(1, activation="sigmoid")(dense1)

    return Model(input=inputs, output=prediction)


# Load data
from utils import random_split

X_train, X_test, y_train, y_test = random_split("images/", 32, 7)
_, frames, channels, width, height = np.shape(X_train)

# Reshape to match CNN shapes
X_train = list(X_train.reshape(frames, -1, channels, width, height))
X_test = list(X_test.reshape(frames, -1, channels, width, height))
image_shape = (channels, width, height)

# Create model
model = functional_model(image_shape, frames)
model.compile(loss='binary_crossentropy',
              metrics=['accuracy'],
              optimizer="adam")

#SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)

示例#5

0

显示文件

文件： models.py 项目： szhou12/MachineLearning4PublicPolicy

def experiment_clfs(df,
                    target_col,
                    unused_cols,
                    numeric_cols,
                    clfs,
                    model_lst,
                    grid,
                    test_length=6,
                    is_temporal=True,
                    draw=True,
                    table=True):
    '''
    Experiment with different parameters for classifiers.
    Loop through each model and evaluate correspondingly.
    Inputs:
        df: dataframe (joint table)
        target_col: (numpy array) target variable
        unused_cols: (numpy array) unused varaibles in df
        numeric_cols: (numpy array) numerical variables in df
        clfs: (dictionary) classifiers from create_clfs_params() function
        model_lst: (list of strings) model names to use
        grid: (dictionary) grid from create_clfs_params() function
        test_length: (positive int) testing window (unit=month)
        is_temporal: (bool) True if use temporal validation to split data; False if use random split
        draw: (bool) True if plot PR curve for each variable
        table: (bool) True if output evaluation results
    Outputs:
        PR-curves
        classifiers_eval.csv: csv file that stores evaluation results
    '''
    output_cols = ('model', 'parameters', 'train_time', 'test_time',
                   'accuracy', 'F1_score', 'auc', 'p@1', 'p@2', 'p@5', 'p@10',
                   'p@20', 'p@30', 'p@50', 'r@1', 'r@2', 'r@5', 'r@10', 'r@20',
                   'r@30', 'r@50')

    output_df = pd.DataFrame(columns=output_cols)

    if is_temporal:
        x_train, x_test, y_train, y_test = utils.split_data(
            df, target_col, unused_cols, test_length)
        if x_train is None and x_test is None and y_train is None and y_test is None:
            print(
                "Temporal split failed. Switch to random split at test size=30%."
            )
            x_train, x_test, y_train, y_test = utils.random_split(
                df, target_col, unused_cols)
    else:
        x_train, x_test, y_train, y_test = utils.random_split(
            df, target_col, unused_cols)

    #discretize numeric cols:
    x_train, x_test = preprocess.discretize(x_train, x_test, numeric_cols)

    clf_lst = [clfs[x] for x in model_lst]
    for i, clf in enumerate(clf_lst):
        print(model_lst[i])
        params = grid[model_lst[i]]
        for p in ParameterGrid(params):
            try:
                model = clf.set_params(**p)
                start_train = time.time()
                model.fit(x_train, y_train)
                end_train = time.time()
                train_time = end_train - start_train

                start_test = time.time()
                y_pred = model.predict(x_test)
                end_test = time.time()
                test_time = end_test - start_test

                y_pred_probs = model.predict_proba(x_test)[:, 1]

                scores = evaluate(y_pred, y_pred_probs, y_test)

                index = len(output_df)
                output_df.loc[index] = [
                    model_lst[i], p, train_time, test_time, scores['accuracy'],
                    scores['F1_score'], scores['auc'], scores['p@1'],
                    scores['p@2'], scores['p@5'], scores['p@10'],
                    scores['p@20'], scores['p@30'], scores['p@50'],
                    scores['r@1'], scores['r@2'], scores['r@5'],
                    scores['r@10'], scores['r@20'], scores['r@30'],
                    scores['r@50']
                ]

                if draw:
                    model_name = model_lst[i] + str(index)
                    plot_pr_curve(y_test, y_pred_probs, model_name, p)
                    index += 1

            except Exception as e:
                print(e)
                pass
        print("1 classifier completed.")
    if table:
        output_df.to_csv('eval_results/classifiers_eval.csv')

    return output_df

示例#6

0

显示文件

def load_numpy_states(train_idx=None, test_idx=None):
    X = load_numpy()['states']
    if train_idx is None:
        train_idx, test_idx = random_split(np.arange(X.shape[0]))

    return X[], X_test

示例#7

0

显示文件

def test_randsplit():
    l = range(40)
    sample_size = 5
    reminder, sample = utils.random_split(l, sample_size)
    print("reminder = {0}".format(reminder))
    print("sample = {0}".format(sample))

示例#8

0

显示文件

文件： test.py 项目： HenrYxZ/pattern-recognition

def test_randsplit():
	l = range(40)
	sample_size = 5
	reminder, sample = utils.random_split(l, sample_size)
	print("reminder = {0}".format(reminder))
	print("sample = {0}".format(sample))

示例#9

0

显示文件

 sfsnet_model.load_state_dict(torch.load("/home/hd8t/xiangyu.yin/results/metadata/checkpoints/Skip_First.pkl"))
 face = []
 name = []
 for img in glob.glob(Celeb_path + "*.png"):
     n_suffix = img.split('/')[-1]
     face.append(img)
     name.append(n_suffix.split('.')[0])
 datasize = len(face)
 validation_count = int(2 * datasize / 100)
 train_count = datasize - validation_count
 transform = transforms.Compose([
     transforms.Resize(Size_for_Image),
     transforms.ToTensor()
 ])
 full_dataset = CelebDataset(face, name, transform)
 train_dataset, val_dataset = random_split(full_dataset, [train_count, validation_count])
 celeb_dl = DataLoader(train_dataset, batch_size=1, shuffle=True)
 wandb.init(tensorboard=True)
 for bix, data in enumerate(celeb_dl):
     fa, na = data
     na = na[0]
     predicted_normal, predicted_albedo, predicted_sh, predicted_shading, out_recon = sfsnet_model(fa)
     print('Consecute the {}th face'.format(bix))
     if bix % 10 == 0:
         print(predicted_albedo)
         print(predicted_shading)
         out_celeb = out_dir + 'Celeb' + str(bix)
         if not os.path.exists(out_celeb):
             os.system("mkdir " + out_celeb)
         out_celeb += "/"
         wandb_log_images(wandb, predicted_normal, None, suffix + "Predicted Normal", \

示例#10

0

显示文件

文件： dnn.py 项目： thesujitroy/atone

                input_dim=frames,
                input_length=128,
                return_sequences=False,
                activation="tanh")(reshape)
    # dropout1 = Dropout(0.5)(lstm)
    dense1 = Dense(512, activation="relu")(lstm)
    # dropout2 = Dropout(0.5)(dense1)
    prediction = Dense(1, activation="sigmoid")(dense1)

    return Model(input=inputs, output=prediction)


# Load data
from utils import random_split

X_train, X_test, y_train, y_test = random_split("images/", 32, 7)
_, frames, channels, width, height = np.shape(X_train)

# Reshape to match CNN shapes
X_train = list(X_train.reshape(frames, -1, channels, width, height))
X_test = list(X_test.reshape(frames, -1, channels, width, height))
image_shape = (channels, width, height)

# Create model
model = functional_model(image_shape, frames)
model.compile(loss='binary_crossentropy',
              metrics=['accuracy'],
              optimizer="adam")

#SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)