Пример #1
0
 def __init__(self, vocab_fname):
     with open(vocab_fname) as f:
         vocab = json.load(f)
     self.tgt_itos = vocab['tgt_vocab']['itos']
     self.input_stoi = vocab['input_vocab']['stoi']
     self.ent_stoi = vocab['ent_vocab']['stoi']
     self.dataset = Dataset()
Пример #2
0
def train_model_main(option):
    #load dataset
    dataset = Dataset (DATABASE_ROOT_DIR)
    dataset.main()
    #build model
    VGG_16 = Model()
    VGG_16.build_model(dataset) 
    train_vgg = Train(VGG_16)

    train_vgg.setCallbacks(option)
    global MODEL_PATH
    if option == 'DataShuffleSplit':
        MODEL_PATH = os.path.abspath(os.path.join(MODEL_PATH, "ShuffleSplit_model.h5"))
        train_vgg.train_with_SplitedData(dataset)
        VGG_16.evaluate_model(train_vgg.test_image, train_vgg.test_label)
        VGG_16.save_model(MODEL_PATH)
    
    elif option == 'KFoldM':
        MODEL_PATH = os.path.abspath(os.path.join(MODEL_PATH, "KFold_Manual_model.h5"))
        train_vgg.train_with_CrossValidation_Manual(dataset)
        VGG_16.save_model(MODEL_PATH)

    elif option == 'KFoldW':
        MODEL_PATH = os.path.abspath(os.path.join(MODEL_PATH, "KFold_Wrapper_model.h5"))
        train_vgg.train_with_CrossValidation_Wrapper(dataset)
        VGG_16.save_model(MODEL_PATH)

    elif option == 'GridSearch':
        print ("[WARNING!!!] THIS mode is not available!")
        exit(0)
        train_vgg.train_with_GridSearchCV(dataset)
    
    elif option == 'help':
        print_usage(sys.argv[0])
    
    else:
        print_usage(sys.argv[0])
Пример #3
0
 def __init__(self, datadir, save_dir=current_dir, **kwargs):
     self.datadir = datadir
     self.save_dir = save_dir
     self.gpu = False
     for k, v in kwargs.items():
         setattr(self, k, v)
         ''' Default are:
                 arch=vgg13,
                 learning_rate=0.01
                 hidden_units=512
                 epochs=20
                 gpu=False
                 '''
     self.device = self.setDevice()
     self.dataset = DS(self.datadir)
     self.dataset.transform()
     self.trainloader, self.validloader, self.testloader = self.dataset.init_loaders(
     )
Пример #4
0
    print('{0} = {1}'.format(arg, getattr(args, arg)))
torch.manual_seed(args.seed)
# training on the first GPU if not on CPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('Training on device = {}'.format(device))
"""
===========================================================================
Loading data
===========================================================================
"""
data = Data(path=args.data_path, dataset=args.dataset, split=args.split)
print('Loaded {0} dataset with {1} nodes and {2} edges'.format(
    args.dataset, data.n_node, data.n_edge))
feature = data.feature.to(device)
label = data.label.to(device)
train = Dataset(data.idx_train)
val = Dataset(data.idx_val)
test = Dataset(data.idx_test)
train_loader = DataLoader(dataset=train, batch_size=args.batch_size)
val_loader = DataLoader(dataset=val, batch_size=args.batch_size)
test_loader = DataLoader(dataset=test, batch_size=args.batch_size)
sampler = Sampler(data.adj, args.aggregator)
"""
===========================================================================
Training
===========================================================================
"""
model = SupervisedGraphSAGE(n_feature=data.n_feature,
                            n_hidden=args.hidden,
                            n_class=data.n_class,
                            agg_type=args.aggregator,
Пример #5
0
import random
import matplotlib.pyplot as plt
from load_data import Dataset, TestDataset
from model import Net

batch_size = 1
n_iter = 150
lr = 0.001
random_seed = 60
save_path = 'save_model/'

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

torch.manual_seed(random_seed)

dataset = Dataset()
loader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True)
print('dataset: {}'.format(len(dataset)))

mse = nn.MSELoss()
bce = nn.BCELoss()
L1_loss = nn.L1Loss()

load_model_path = 'save_model/epoch150_loss_35.7894.pth.tar'

print('==> Building model...')
net = Net()

net.load_state_dict(torch.load(load_model_path))

criterion = mse
Пример #6
0
        lambda row: normalize(row, maximums, minimums), axis=1)
    print(data.columns)
    for month in range(1, 13):
        for day in range(1, 32):
            data_by_day = data[data['month'] == month]
            data_by_day = data_by_day[data_by_day['day'] == day]
            if len(data_by_day) > 0:
                by_day_y.append(
                    statistics.mean(data_by_day['normalized_volume']))
                by_day_x.append(f'{month}-{day}')
    plt.plot(by_day_x, by_day_y)
    print('weak days')
    for day, mean_volume in zip(by_day_x, by_day_y):
        if mean_volume < 0.35:
            print(day)
    print('peak days')
    for day, mean_volume in zip(by_day_x, by_day_y):
        if mean_volume > 0.55:
            print(day)

    plt.show()


dataset = Dataset(file_name)
model = BaselineModel(dataset, {})
for year in range(2002, 2017, 2):
    predictions = model.predict(f'{year}-01-01', f'{year+1}-12-31')
    gold = dataset.get_subset(f'{year}-01-01', f'{year+1}-12-31')['Volume']
    mse, r2 = evaluate(gold, predictions)
    print(f'{year}, {year+1}: MSE: {mse}, R2: {r2}')
Пример #7
0
                test += 1
                aux_clusters[curr_indx].remove(k)
                aux_clusters[pred_idx].append(k)
                self.obj_to_cluster[k] = pred_idx
        self.clusters = aux_clusters
        return test

    def objective_function(self):
        print("calculating J_kcm_f_gh...")
        result = 0.
        self.part2 = self.__all_against_all_cluster_sum()
        for i in range(self.c):
            for k in self.clusters[i]:
                result += 1 - self.__object_against_cluster_sum(k, i) + self.part2[i]
        return result

    def rand_score(self):
        return adjusted_rand_score(labels_true=self.y,
                labels_pred=self.obj_to_cluster.values())

if __name__ == "__main__":
    import pandas as pd
    from load_data import Dataset


    datadir='../../data/segmentation_2.test'
    df = pd.read_csv(datadir, sep=',')
    mydata = Dataset()
    mydata.load(df, 'rgb')
    kcm = KCM_F_GH(c=7, p=mydata.X.values.shape[1], data=mydata)
Пример #8
0
    print(args)
    torch.manual_seed(args.seed)
    device = torch.device("cuda")

    if not os.path.exists('./ckpt/'):
        os.makedirs('./ckpt/')
    if not os.path.exists('./iter_num/' + args.model_name):
        os.makedirs('./iter_num/' + args.model_name)
    if not os.path.exists('./logs/' + args.model_name):
        os.makedirs('./logs/' + args.model_name)
    if not os.path.exists('./labels/' + args.model_name):
        os.makedirs('./labels/' + args.model_name)
    if not os.path.exists('./c/'):
        os.makedirs('./c/')

    dataset = Dataset(args)
    change_itr = range(8000, 100000, 4000)
    logger = Logger('./logs/' + args.model_name)
    if args.env_name == 'bimgame':
        model = ConvModel(3, args.num_subgoals, use_rnn=False).to(device)
    else:
        model = MLPModel(46, args.num_subgoals, use_rnn=False).to(device)

    start_itr = 0
    c = []
    if args.one_class:
        if args.pretrained_ckpt is not None:
            model.load_state_dict(
                torch.load('./ckpt/' + args.pretrained_ckpt + '.pkl'))
            start_itr = np.load('./iter_num/' + args.pretrained_ckpt + '.npy')
            c = torch.from_numpy(
Пример #9
0
print('Training on device = {}'.format(device))

"""
===========================================================================
Loading data
===========================================================================
"""
data = Data(path=args.data_path, dataset=args.dataset, split=args.split)
print('Loaded {0} dataset with {1} nodes and {2} edges'.format(args.dataset, data.n_node, data.n_edge))
feature = data.feature.to(device)
norm_adj = data.norm_adj.to(device)
label = data.label.to(device)
label_train = label[data.idx_train]
label_val = label[data.idx_val]
label_test = label[data.idx_test]
train = Dataset(torch.arange(len(data.idx_train)))
train_loader = DataLoader(dataset=train, batch_size=args.batch_size)
sampler = Sampler(data.feature[data.idx_train], data.norm_adj_train, args.sample)

"""
===========================================================================
Training
===========================================================================
"""
# Model and optimizer
model = FastGCN(n_feature=data.n_feature, n_hidden=args.hidden, n_class=data.n_class, dropout=args.dropout).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
metric = torchmetrics.Accuracy().to(device)

for epoch in range(1, args.epoch+1):
    t = time.time()
Пример #10
0
from load_data import Dataset

import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import os
from create_model import create_model
data = Dataset()

data_train, data_test, labels_train, labels_test = train_test_split(
    data.dataset, data.labels, test_size=0.20, random_state=42)

class_names = ["Not fire", "Fire"]

plt.figure()
plt.imshow(data_train[1])
plt.colorbar()
plt.grid(False)
plt.show()
data_train = data_train / 255
data_test = data_test / 255
plt.figure(figsize=(10, 10))
for i in range(25):
    plt.subplot(5, 5, i + 1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(data_train[i], cmap=plt.cm.binary)
    plt.xlabel(class_names[labels_train[i]])
plt.show()
Пример #11
0
from load_data import Dataset
from policy import Network

d = Dataset()
d.prepare("/tftpboot/cv/data2/")

n = Network()
n.initialize_variables("./saved_network/")

print("dataset length = ", d.length)

for i in range(100000):
    traindata = d.getdata(32)
    n.train(traindata, i)

n.save_variables("./saved_network/test", 1)
Пример #12
0
    else:
        device_id = torch.cuda.current_device()
        print('using device', device_id, torch.cuda.get_device_name(device_id))

device = torch.device("cuda")
print('DEVICE:', device)

if __name__ == '__main__':
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    # load data
    ds = args.dataSet
    data = Dataset(dataset='cora', path='../GraphSAGE/')
    data.load_data()

    feat = torch.FloatTensor(data.cora_feats).to(device)
    num_labels = len(set(getattr(data, ds + '_labels')))

    graphSage = GraphSage(2,
                          feat.size(1),
                          128,
                          feat,
                          getattr(data, ds + '_adj_lists'),
                          device,
                          gcn=args.gcn,
                          agg_func=args.agg_func).to(device)

    classification = Classification(128, num_labels).to(device)
parser.add_argument('--hidden_size', type=int, default=300,
					help='size of hidden tensor')

parser.add_argument('--lr', type=float, default=1e-3)
parser.add_argument('--wdecay', type=float, default=1e-8)
parser.add_argument('--cuda', default=0, type=int)
parser.add_argument('--eval', action='store_true', help='evaluate model')

args = parser.parse_args()

sys.stdout = open('train.log', 'w+')

# create a corpus to save time
if args.create_corpus == 1:
	print("Creating New Corpus")
	corpus = Dataset()
	corpus_save_path = "nyt_corpus.pkl"
	with open(corpus_save_path, 'wb') as output:
		pickle.dump(corpus, output, pickle.HIGHEST_PROTOCOL)
	print("Number of training samples = ", len(corpus.train_sentences))
	print("Number of testing samples = ", len(corpus.test_sentences))
	assert len(corpus.train_sentences) == len(corpus.train_labels)
	assert len(corpus.test_sentences) == len(corpus.test_labels)

else:
	print("Loading saved Corpus")
	corpus_save_path = "nyt_corpus.pkl"
	with open(corpus_save_path, 'rb') as input_:
		corpus = pickle.load(input_)
	print("Number of training samples = ", len(corpus.train_sentences))
	print("Number of testing samples = ", len(corpus.test_sentences))
Пример #14
0
if __name__ == '__main__':

    datadir = '../../data/segmentation_2.test'
    result_dir = '../../results/clustering'
    result_file = 'results'
    bresult_file = 'best_result'
    view = 'rgb'
    norm = True

    result_file = '{}{}_{}'.format(view, '_norm' if norm else '', result_file)
    bresult_file = '{}{}_{}'.format(view, '_norm' if norm else '',
                                    bresult_file)

    df = pd.read_csv(datadir, sep=',')
    mydata = Dataset()
    mydata.load(df, view)

    # Variáveis para armazenar resultado da execução do algoritmo
    res_obj_function = []
    res_cluster = []
    res_obj_to_cluster = []
    res_hp = []
    res_ari = []  # adjusted rand indexes list
    res_J = []  # uma lista com a melhor serie de convergência de J
    # executar o algoritmo 100x
    for epoch in range(5):
        start_total_time = time.time()
        # inicialização do algoritmo
        kcm = KCM_F_GH(c=7, p=mydata.X.shape[1], data=mydata, norm=norm)
        kcm.initialization()
Пример #15
0
class Model:
    current_dir = os.path.dirname(os.path.realpath(__file__))

    def __init__(self, datadir, save_dir=current_dir, **kwargs):
        self.datadir = datadir
        self.save_dir = save_dir
        self.gpu = False
        for k, v in kwargs.items():
            setattr(self, k, v)
            ''' Default are:
                    arch=vgg13,
                    learning_rate=0.01
                    hidden_units=512
                    epochs=20
                    gpu=False
                    '''
        self.device = self.setDevice()
        self.dataset = DS(self.datadir)
        self.dataset.transform()
        self.trainloader, self.validloader, self.testloader = self.dataset.init_loaders(
        )

    def __str__(self):
        return '{0.__class__.__name__}:(\n\tarch={0.arch}\n ' \
               '\tlearning_rate={0.learning_rate}\n' \
               '\thidden_units={0.hidden_units}\n' \
               '\tepochs={0.epochs}\n' \
               '\tsave_Dir={0.save_dir}\n' \
               '\tgpu={0.gpu})\n' \
               '\tdevice={0.device}\n'.format(self)

    # Use GPU if it's available
    def setDevice(self):
        if self.gpu and torch.cuda.is_available():
            self.device = torch.device("cuda")

        else:
            self.device = torch.device("cpu")
        print('device set to {}'.format(self.device))

        return self.device

    def setModel(self, arch):
        print("\nsetting up model...\n")
        #         alexnet = models.alexnet(pretrained=True)
        #         squeezenet = models.squeezenet1_0(pretrained=True)
        #         vgg11 = models.vgg11(pretrained=True)
        #         vgg13 = models.vgg13(pretrained=True)
        #         vgg16 = models.vgg16(pretrained=True)
        #         vgg19 = models.vgg19(pretrained=True)
        #         densenet = models.densenet161(pretrained=True)
        #         inception = models.inception_v3(pretrained=True)
        #         googlenet = models.googlenet(pretrained=False)
        #         shufflenet = models.shufflenet_v2_x1_0(pretrained=True)
        #         mobilenet = models.mobilenet_v2(pretrained=True)
        #         resnext50_32x4d = models.resnext50_32x4d(pretrained=True)
        switcher = {
            'alexnet': models.alexnet(pretrained=True),
            'squeezenet': models.squeezenet1_0(pretrained=True),
            'vgg16': models.vgg16(pretrained=True),
            'inception': models.inception_v3(pretrained=True),
            #'googlenet': models.googlenet(pretrained=False),
            #'mobilenet': models.mobilenet_v2(pretrained=True),
            #'resnext50_32x4d' : models.resnext50_32x4d(pretrained=True),
            'vgg11': models.vgg11(pretrained=True),
            'vgg13': models.vgg13(pretrained=True),
            'vgg16': models.vgg16(pretrained=True),
        }
        error = "\nThat model is not supported yet. The supported models are : 'alexnet', squeezenet',\
        'vgg11', 'vgg13', 'vgg16', 'vgg19', 'inception', 'googlenet', 'mobilenet', 'resnext50_32x4d' "

        self.model = switcher.get(arch, error)
        if self.model == error:
            print(error)
        else:
            print('\nmodel successfully set to {}'.format(arch))

    # Freeze parameters so we don't backprop through them

    def create_classifier(self):
        print("\ncreating classifier...")
        for param in self.model.parameters():
            param.requires_grad = False

        self.model.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4000), nn.ReLU(), nn.Dropout(0.2),
            nn.Linear(4000, 1280), nn.Linear(1280, self.hidden_units),
            nn.Linear(self.hidden_units, 102), nn.LogSoftmax(dim=1))

        self.criterion = nn.NLLLoss()

        # Only train the classifier parameters, feature parameters are frozen
        self.optimizer = optim.Adam(self.model.classifier.parameters(),
                                    lr=0.003)
        #optimizer.zero_grad()
        images, labels = next(iter(self.validloader))
        ps = torch.exp(self.model(images))
        #print("shape should be [64, 102]", ps.shape)
        top_p, top_class = ps.topk(1, dim=1)
        print(top_class[:10, :])
        equals = top_class == labels.view(*top_class.shape)
        accuracy = torch.mean(equals.type(torch.FloatTensor))
        print(f'Accuracy: {accuracy.item() * 100}%')
        print('\ncheck results to see if classifer is configured correctly.')

    def train_model(self):

        device = self.setDevice()
        self.model.to(self.device)
        print(
            '\ntraining {} on {}, for {} epochs. Optimizer learning rate set to {}...'
            .format(self.arch, self.device, self.epochs, self.learning_rate))
        epochs = self.epochs
        steps = 0
        criterion = nn.NLLLoss()
        optimizer = optim.Adam(self.model.classifier.parameters(),
                               lr=self.learning_rate)
        train_losses, test_losses = [], []
        for epoch in range(epochs):
            running_loss = 0
            for inputs, labels in self.trainloader:

                inputs, labels = inputs.to(self.device), labels.to(self.device)

                optimizer.zero_grad()

                logps = self.model.forward(inputs)
                loss = criterion(logps, labels)
                loss.backward()
                optimizer.step()

                running_loss += loss.item()
            else:
                ## TODO: Implement the validation pass and print out the validation accuracy
                test_loss = 0
                accuracy = 0

                # Turn off gradients for validation, saves memory and computations
                with torch.no_grad():
                    for images, labels in self.validloader:
                        images, labels = images.to(self.device), labels.to(
                            self.device)
                        log_ps = self.model(images)
                        batch_loss = criterion(log_ps, labels)

                        test_loss += batch_loss.item()

                        ps = torch.exp(log_ps)
                        top_p, top_class = ps.topk(1, dim=1)
                        equals = top_class == labels.view(*top_class.shape)
                        accuracy += torch.mean(equals.type(
                            torch.FloatTensor)).item()

                train_losses.append(running_loss / len(self.trainloader))
                test_losses.append(test_loss / len(self.validloader))

                print(
                    "Epoch: {}/{}.. ".format(epoch + 1, epochs),
                    "Training Loss: {:.3f}.. ".format(running_loss /
                                                      len(self.trainloader)),
                    "Test Loss: {:.3f}.. ".format(test_loss /
                                                  len(self.validloader)),
                    "Test Accuracy: {:.3f}".format(accuracy /
                                                   len(self.validloader)))

    def validate_model(self):
        test_loss = 0
        accuracy = 0
        test_losses = []
        # Turn off gradients for validation, saves memory and computations
        with torch.no_grad():
            for images, labels in self.testloader:
                images, labels = images.to(self.device), labels.to(self.device)
                log_ps = self.model(images)
                batch_loss = self.criterion(log_ps, labels)

                test_loss += batch_loss.item()

                ps = torch.exp(log_ps)
                top_p, top_class = ps.topk(1, dim=1)
                equals = top_class == labels.view(*top_class.shape)
                accuracy += torch.mean(equals.type(torch.FloatTensor)).item()

        test_losses.append(test_loss / len(self.testloader))
        print("Test Loss: {:.3f}.. ".format(test_loss / len(self.testloader)),
              "Test Accuracy: {:.3f}".format(accuracy / len(self.testloader)))

    def save_model_checkpoint(self):
        print("\nOur model: \n\n", self.model, '\n')
        self.model.epochs = self.epochs
        self.model.class_to_idx = self.trainloader.dataset.class_to_idx
        print('model.epochs: ', self.model.epochs)
        print("The state dict keys: \n\n", self.model.state_dict().keys())
        checkpoint_out = self.save_dir + '/' + 'checkpoint2.pth'
        checkpoint = {
            'input_size': [3, 224, 224],
            'output_size': 102,
            'arch': self.arch,
            'state_dict': self.model.state_dict(),
            'epoch': self.model.epochs,
            'class_to_idx': self.model.class_to_idx
        }
        print('\n\nsaving to {}.'.format(checkpoint_out))
        try:
            torch.save(checkpoint, checkpoint_out)
        except:
            print('Checkpoint did not save.')
        print('Checkpoint successful.')
from load_data import Dataset
import numpy as np
import os
np.random.seed(1729)

if __name__ == '__main__':
    data = Dataset(os.getcwd())
    """MNIST params"""
    num_examples = 60000
    inp_shape = 784
    num_classes = 10
    assert (data.num_examples() == num_examples)
    assert (data.inp_shape() == inp_shape)
    assert (data.num_classes() == num_classes)

    for _ in range(10000):
        batch_size = np.random.randint(low=1, high=num_examples)
        batch_x, batch_y = data.next_batch(batch_size)
        assert (batch_x.shape[0] == batch_size)
        assert (batch_y.shape[0] == batch_size)

        assert (batch_x.shape[1] == inp_shape)