示例#1
0
def main():
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    DATASET_DIRECTORY = '../data_part1'

    X, y, X_hidden = dataset_manip.load_dataset(DATASET_DIRECTORY)
    num_classes = len(set(y))

    print('X.shape = ' + str(X.shape))
    print('X_hidden.shape = ' + str(X_hidden.shape))

    ens = Ensemble(input_shape=(77, 71, 1),
                   num_classes=10,
                   num_models=11,
                   batch_size=512,
                   path='./ensemble_files',
                   load=False)
    ens.train(X=X, y=y, epochs_per_model=300, split_rate=0.9)
    print(ens.measure_accuracy(X, y))

    return
    X_train, X_validation, y_train, y_validation = dataset_manip.split_dataset(
        X, y, rate=0.5)

    model = Model(image_shape=X.shape[1:],
                  num_classes=num_classes,
                  model_path='./model_files/model',
                  batch_size=512,
                  first_run=True)  # 1250

    model.train(X_train, y_train, X_validation, y_validation, 500)
    model.train_unsupervised(X_hidden, X_validation, y_validation, 200)

    print('Final Accuracy: {}'.format(
        model.measure_accuracy(X_validation, y_validation)))
示例#2
0
    def __init__(self, sourceFile, targetFile):
        self.SWindow = []
        self.TWindow = []
        self.TPredictWindow = []

        self.SDataBuffer = []  #Queue
        self.TDataBuffer = []  #Queue

        self.SInitialDataBuffer = []
        self.TInitialDataBuffer = []

        self.changeDetector = ChangeDetection(Properties.GAMMA,
                                              Properties.SENSITIVITY,
                                              Properties.MAX_WINDOW_SIZE)
        self.ensemble = Ensemble(Properties.ENSEMBLE_SIZE)

        classNameList = []
        self.source = Stream(sourceFile, classNameList,
                             Properties.INITIAL_DATA_SIZE)
        self.target = Stream(targetFile, classNameList,
                             Properties.INITIAL_DATA_SIZE)

        Properties.MAXVAR = self.source.MAXVAR

        self.gateway = JavaGateway(
            start_callback_server=True,
            gateway_parameters=GatewayParameters(port=Properties.PY4JPORT),
            callback_server_parameters=CallbackServerParameters(
                port=Properties.PY4JPORT + 1))
        self.app = self.gateway.entry_point
示例#3
0
def process_all_images(config):
    filenames = sorted(config.files)
    tables = {}
    segmentations = {}
    ensembles = {}
    for segmentation in config.segmentations:
        segmentations[segmentation] = {}
    ensembles['walker_binary'] = {}
    ensembles['opt'] = {}
    erosions = {}
    methods = {'unet': {}, 'walker_binary': {}, 'opt': {}}
    for key in ['jac', 'af1', 'merge_rate', 'split_rate']:
        tables[key] = pd.DataFrame(columns=list(methods.keys()), copy=True)
    os.makedirs(config.output, exist_ok=True)
    root_dir = os.path.join(config.output, config.filename)
    if os.path.exists(root_dir):
        shutil.rmtree(root_dir)
    os.makedirs(root_dir, exist_ok=True)
    counter = 0
    for file in filenames:
        if counter % 50 == 0:
            print(counter)
        if counter == config.counter:
            break
        annot_path = os.path.join(config.annot, file.strip())
        annot = skimage.io.imread(annot_path, as_gray=True)
        for segmentation in segmentations.keys():
            path = os.path.join(config.root, segmentation, file.strip())
            segmentations[segmentation]['orig'] = skimage.io.imread(
                path, as_gray=True)
            segmentations[segmentation][
                'results'] = comp.get_per_image_metrics(
                    annot, segmentations[segmentation]['orig'], False)
            segmentations[segmentation]['mask'] = np.where(
                segmentations[segmentation]['orig'] > 0, 255, 0)
        for ensemble in ensembles.keys():
            ensembles[ensemble]['orig'] = Ensemble(
                segmentations, config.erosions, config.beta).ensemble(ensemble)
            ensembles[ensemble]['results'] = comp.get_per_image_metrics(
                annot, ensembles[ensemble]['orig'], False)
            ensembles[ensemble]['mask'] = np.where(
                ensembles[ensemble]['orig'] > 0, 255, 0)
        for key in tables.keys():
            results = {}
            # for segmentation in segmentations.keys():
            #     results[segmentation] = segmentations[segmentation]['results'][key]
            avg = statistics.mean([
                segmentations[segmentation]['results'][key]
                for segmentation in segmentations
            ])
            results['unet'] = avg
            for ensemble in ensembles.keys():
                if ensemble != 'union':
                    results[ensemble] = ensembles[ensemble]['results'][key]
            tables[key] = tables[key].append(results, ignore_index=True)
        counter += 1

    os.makedirs(os.path.join(root_dir, 'stats'), exist_ok=True)
    output_charts(tables, list(methods.keys()),
                  os.path.join(root_dir, 'stats'), config)
示例#4
0
def main():
    print(args)

    print("=> creating model '{}'".format(args.arch))
    model = Ensemble()
    model = torch.nn.DataParallel(model).cuda()
    print(model)
    
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
    std=[0.229, 0.224, 0.225])
    test_data = datautil.SceneDataset(args.data,img_transform=
                                            transforms.Compose([
                                                transforms.Resize((args.img_size,args.img_size)),
                                                transforms.ToTensor(),
                                                normalize]))
    test_loader = torch.utils.data.DataLoader(test_data,batch_size=args.batch_size,shuffle=False,num_workers=4,pin_memory=True)
    checkpoint = torch.load(args.test_model)
    model.load_state_dict(checkpoint['state_dict'])
    #model.load_state_dict(checkpoint)
    if os.path.isdir(args.data):
        ret = test(test_loader,model)
        imgs = [i[:-4] for i in os.listdir(args.data)]
        with open('result3_.csv', 'w') as f:
            '''
            f.write(','.join(['FILE_ID','CATEGORY_ID'])+'\n')
            f.write('\n'.join([','.join([str(a),str(b)]) for a,b in zip(imgs,ret)]))
            '''
            #FILE_ID,CATEGORY_ID0,CATEGORY_ID1,CATEGORY_ID2
            f.write(','.join(['FILE_ID','CATEGORY_ID0','CATEGORY_ID1','CATEGORY_ID2'])+'\n')
            f.write('\n'.join([','.join([str(a)]+[str(int(i)) for i in b]) for a,b in zip(imgs,ret)]))
    else:
        test_labeled(test_loader,model)
示例#5
0
async def main():
    this_dir = os.path.dirname(os.path.abspath(__file__))
    input_path = os.path.join(this_dir, "input.txt")
    with open(input_path) as f:
        raw_code = f.readline()
        e = Ensemble(raw_code)
        await e.run()
示例#6
0
def create_song(graph_attributes={
    'graph_type': 'Small World',
    'average_degree': 4,
    'rewiring_prob': 0.3
},
                number_players=20,
                number_time_steps=300,
                tempo=108,
                player_attributes=None):
    """
    arguments:
        graph_type : 'Small World', 'Random', 'Configuration', 'Structured'
        average_degree
        number_of_players
        rewiring_prob
        number_time_steps
        tempo
        player_attributes: {
            duration: (min_duration, max_duration)
            note_change_choices: 'All', 'Neighbors of Neighbors'
            harmonicity threshold: 'Fixed' or 'Moving Average'
                fixed threshold
                moving average threshold
            susceptibility to influence
        }
    """

    graph_type = graph_attributes['graph_type']

    #create player graph
    if graph_type == 'Small World':
        #assert rewiring_prob
        G = nx.watts_strogatz_graph(number_players,
                                    graph_attributes['average_degree'],
                                    graph_attributes['rewiring_prob'])
    elif graph_type == 'Random':
        pass
    elif graph_type == 'Structured':
        pass

    #add starting pitch to node
    starting_pitches = {i: 'random' for i in range(len(G))}
    nx.set_node_attributes(G, starting_pitches, 'starting_pitch')

    #create ensembel object5
    ensemble = Ensemble(G, player_attributes)

    #evolve ensemble
    ensemble.evolve(number_time_steps)

    #show pitch history
    pitch_history_data = ensemble.get_pitch_history_data()
    harmonicity_data = ensemble.get_harmonicity_data()

    #create file
    filename = create_midi_file(ensemble, tempo)
    create_data_file(filename.replace('.mid', '.txt'), pitch_history_data,
                     harmonicity_data)

    return filename, pitch_history_data, harmonicity_data
示例#7
0
    def __init__(self, sourceFile, targetFile):
        self.SDataBufferArr = None  #2D array representation of self.SDataBuffer
        self.SDataLabels = None
        self.TDataBufferArr = None  #2D array representation of self.TDataBuffer
        self.TDataLabels = None

        self.useKliepCVSigma = Properties.useKliepCVSigma

        self.kliep = None

        self.useSvmCVParams = Properties.useSvmCVParams

        self.ensemble = Ensemble(Properties.ENSEMBLE_SIZE)

        self.initialWindowSize = int(Properties.INITIAL_DATA_SIZE)
        self.maxWindowSize = int(Properties.MAX_WINDOW_SIZE)

        self.enableForceUpdate = int(Properties.enableForceUpdate)
        self.forceUpdatePeriod = int(Properties.forceUpdatePeriod)
        """
		- simulate source and target streams from corresponding files.
		"""
        print("Reading the Source Dataset")
        self.source = Stream(sourceFile, Properties.INITIAL_DATA_SIZE)
        print("Reading the Target Dataset")
        self.target = Stream(targetFile, Properties.INITIAL_DATA_SIZE)
        print("Finished Reading the Target Dataset")

        Properties.MAXVAR = self.source.initialData.shape[0]
示例#8
0
def worker(fold, n_users, n_items, dataset_dir):
    traFilePath = dataset_dir + 'ratings__' + str(fold + 1) + '_tra.txt'
    trasR = lil_matrix(
        matBinarize(loadSparseR(n_users, n_items, traFilePath),
                    binarize_threshold))

    print(
        dataset_dir.split('/')[-2] + '@%d:' % (fold + 1), trasR.shape,
        trasR.nnz, '%.2f' % (trasR.nnz / float(trasR.shape[0])))

    tstFilePath = dataset_dir + 'ratings__' + str(fold + 1) + '_tst.txt'
    tstsR = lil_matrix(
        matBinarize(loadSparseR(n_users, n_items, tstFilePath),
                    binarize_threshold))

    sampler = Sampler(trasR=trasR, batch_size=batch_size)

    en = Ensemble(n_users, n_items, kensemble, topN, split_method,
                  eval_metrics, reg, n_factors, batch_size)
    scores = en.train(fold + 1, trasR, tstsR, sampler)

    print(
        dataset_dir.split('/')[-2] + '@%d:' % (fold + 1),
        ','.join(['%s' % eval_metric for eval_metric in eval_metrics]) +
        '@%d=' % (topN) + ','.join(['%.6f' % (score) for score in scores]))

    en.close()
    return scores
示例#9
0
文件: main.py 项目: sky1307/ga
def reward_func(sigma_index_lst=[1, 2, 3],
                default_n=20,
                epoch_num=4,
                epoch_min=100,
                epoch_step=50):
    '''
    input
    sigma_lst - The component index from the ssa gene for example the gen [0, 1, 0] -> sigma_lst=[1] #the index where gen=1
    default_n - the window length for ssa - <= N /2 where N is the length of the time series - default 20
    epoch_num - The number of submodel used
    epoch_min - Min epoch of submodel
    epoch_step - number of epoch difference bw 2 submodels

    output
    a tuple contain 2 value (nse_q, nse_h)
    '''
    K.clear_session()

    with open('./settings/model/config.yaml', 'r') as f:
        config = yaml.load(f, Loader=yaml.FullLoader)
    # train
    model = Ensemble(mode='train',
                     model_kind='rnn_cnn',
                     sigma_lst=sigma_index_lst,
                     default_n=default_n,
                     epoch_num=epoch_num,
                     epoch_min=epoch_min,
                     epoch_step=epoch_step,
                     **config)
    model.train_model_outer()

    # test
    model = Ensemble(mode='test',
                     model_kind='rnn_cnn',
                     sigma_lst=sigma_index_lst,
                     default_n=default_n,
                     epoch_num=epoch_num,
                     epoch_min=epoch_min,
                     epoch_step=epoch_step,
                     **config)
    model.train_model_outer()
    model.retransform_prediction(mode='roll')
    return model.evaluate_model(mode='roll')
示例#10
0
def get_unique_model():
    xg = xgb.XGBRegressor(n_estimators=200,
                          learning_rate=0.02,
                          gamma=0,
                          subsample=0.75,
                          colsample_bytree=1,
                          max_depth=6)
    en = ElasticNet(l1_ratio=0.95, alpha=0.15, max_iter=50000)
    ada = AdaBoostRegressor(learning_rate=0.01,
                            loss='square',
                            n_estimators=100)
    lr = Ilbeom_Linear()

    lst = [xg, en, ada, lr]

    return Ensemble(lst)
    def __init__(self, saved_model: str = None):
        """Create a new object.

        Args:
            - saved_model (str optional): load a pre-treined model if `saved_name` is not None

        """
        super().__init__()

        # Creating a XGBoost model for stacking
        xgb_params = {}
        xgb_params['learning_rate'] = 0.01
        xgb_params['n_estimators'] = 750
        xgb_params['max_depth'] = 6
        xgb_params['colsample_bytree'] = 0.6
        xgb_params['min_child_weight'] = 0.6
        xgb_model = XGBClassifier(**xgb_params)

        # Creating a random forest model for stacking
        rf_params = {}
        rf_params['n_estimators'] = 200
        rf_params['max_depth'] = 6
        rf_params['min_samples_split'] = 70
        rf_params['min_samples_leaf'] = 30
        rf_model = RandomForestClassifier(**rf_params)

        # Creating a Logist Regression model to act as a stacker of other base models
        log_model = LogisticRegression()

        # Creating the stack
        stack = Ensemble(n_splits=3,
                         stacker=log_model,
                         base_models=(rf_model, xgb_model))

        # To use as a prefix of model and processed dataset
        self.datetime_prefix = datetime.datetime.now().replace(
            microsecond=0).isoformat().replace(':', '-')

        # Loads a saved model or create a new one
        if saved_model:
            self.model_name = saved_model
        else:
            self.model_name = self.datetime_prefix + '_fraud_ensemble.bin'

        # The final model
        self.model = stack
        print('Model: {}'.format(self.model_name))
示例#12
0
文件: test.py 项目: felipegnunes/tcv3
def main():
    #print(sys.argv)
    test_set_path = sys.argv[1]
    output_file_path = sys.argv[2]

    X_test = dataset_manip.load_images(load_directory(test_set_path)) / 255

    #model = Model(image_shape = (77, 71, 1), num_classes = 10, model_path = './model_files/model', batch_size = 512, first_run = False)
    #dataset_manip.store_predictions(dataset_manip.get_filenames(test_set_path), model.predict(X_test), output_file_path)

    ens = Ensemble(input_shape=(77, 71, 1),
                   num_classes=10,
                   num_models=11,
                   batch_size=512,
                   path='./ensemble_files',
                   load=True)
    dataset_manip.store_predictions(dataset_manip.get_filenames(test_set_path),
                                    ens.predict(X_test), output_file_path)
def main():
    # Dataset path
    dataset_name = ['credit_card_clients_balanced', 'credit_card_clients']

    for data_name in dataset_name:
        dataset_path = os.getcwd() + "\\dataset\\" + data_name + ".csv"
        dataset = pd.read_csv(dataset_path, encoding='utf-8')

        # Datasets columns
        data_x = dataset[[
            'X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7', 'X8', 'X9', 'X10', 'X11',
            'X12', 'X13', 'X14', 'X15', 'X16', 'X17', 'X18', 'X19', 'X20',
            'X21', 'X22', 'X23'
        ]]
        data_y = dataset['Y']

        # Preprocessing data
        min_max_scaler = preprocessing.MinMaxScaler()
        X_normalized = min_max_scaler.fit_transform(data_x)

        acc_rate = []
        reject_rate = []

        # Runs to test the model
        for i in range(20):
            print('---------------- Ensemble -----------------')
            print('--- MLP - SVM - KNN - GMM - Naive Bayes ---')
            print(i + 1, 'of 20 iterations')
            X_train, X_test, y_train, y_test = train_test_split(X_normalized,
                                                                data_y,
                                                                test_size=0.2)
            y_train = np.array(y_train)
            y_test = np.array(y_test)

            model = Ensemble()
            model.train(X_train, y_train, gridSearch=False)
            y_hat = model.predict(X_test)

            error, reject = model.evaluate(y_hat, y_test)
            acc_rate.append(1 - error)
            reject_rate.append(reject)

        graphics(acc_rate, reject_rate, data_name)
示例#14
0
    def __init__(self,
                 selected_algorithms='all',
                 selected_hyperparameters='default',
                 ensemble_size=3,
                 ensemble_method='Logit',
                 error_matrix_values='default',
                 verbose=True):
        """instantiates an AutoLearner object """

        self.error_matrix = ErrorMatrix(selected_algorithms,
                                        selected_hyperparameters,
                                        ensemble_size, error_matrix_values,
                                        verbose)
        """error matrix defined for specific dataset"""

        self.ensemble = Ensemble(ensemble_size=ensemble_size,
                                 ensemble_method=ensemble_method,
                                 verbose=verbose)
        """instantiate empty ensemble object"""
示例#15
0
def run(ncyc,
        N=1,
        lim=(20, 20),
        T=300,
        ensemble=None,
        animation=False,
        dframe=0.001):
    #initialize system
    time_total = 0  #total "time" of the system
    time_pulse = 0  #time of the pulse
    if ensemble == None:
        ensemble = Ensemble(N, lim, T)
        ensemble.Plot("Initial Configuration")
    else:
        ensemble.Plot("Initial Configuration")
    #start simulation
    y = [ensemble.Energy_Total()]
    start_time = time.time()
    #for i in trange(ncyc):
    for i in range(ncyc):
        if (i in list(range(0, ncyc, int(ncyc / 20)))):
            print("{0} cycles: {1} s".format(i, time.time() - start_time))
        ensemble.Cycle(time_pulse=time_pulse, time_total=time_total)
        y.append(ensemble.Energy_Total())
        time_total += dt
        if time_pulse + dt > 120:
            time_pulse += dt - 120
        else:
            time_pulse += dt
    print("Elapsed time:", time.time() - start_time, "(s)")
    print("Initial energy:", y[0], "(J)")
    print("Final energy:", y[-1], "(J)")
    print("Average energy:", ensemble.Average(), "(J)")

    ensemble.Plot("Final Configuration")
    x = range(0, len(y))
    fig, ax = plt.subplots(figsize=(20, 10))
    plt.plot(x, y)
    plt.xlim([0, ncyc])
    plt.ylim([min(y), max(y)])
    plt.title("Total Energy vs. Cycle")
    return ensemble
示例#16
0
def main():
    ATOM_NUM = 1
    CYCLE_NUM = 50

    paricles = [Particle() for i in range(ATOM_NUM)]

    ensemble = Ensemble(paricles)
    ensemble.positions += 2

    print("初期位置\n", ensemble.positions)
    print("初速度\n", ensemble.velocities)

    myfield = Field(ensemble, dt=0.01)
    for i in range(CYCLE_NUM):
        myfield.update()

        print("t:", myfield.dt * (i + 1))
        print("x:", myfield.ensemble.positions)
        print("v:", myfield.ensemble.velocities)
        print()
示例#17
0
def ensemble_test():
    ATOM_NUM = 2

    paricles = [Particle() for i in range(ATOM_NUM)]

    ensemble = Ensemble(paricles)

    print("位置の配列\n", ensemble.positions)

    ensemble.positions += 1  #全体に+1
    print("+1\n", ensemble.positions)

    ensemble.positions *= 2  #全体に*2
    print("*2\n", ensemble.positions)

    ensemble.positions *= np.array([1, 2, 3])  #x*1, y*2, z*3
    print("x*1, y*2, z*3\n", ensemble.positions)

    ensemble.positions = np.ones((ensemble.N, 3)) * 100  #100にセット
    print("=100\n", ensemble.positions)

    print("速度の配列\n", ensemble.velocities)
示例#18
0
 def test(texts, classes, models, nn_params, folds=4):
     '''
         Check the performance on an SVM implementation,
         given a list of texts and their classes (negative/neutral/positive)
         Uses k-fold cross-validation (keeping in mind to divide the data
         appropriately, depending on the class)
     '''
     classes = np.array(classes)
     texts = np.array(texts)
     
     wrongs = []
     auc_sum = 0
     
     for train, test in cross_validation.StratifiedKFold(classes, folds):
         texts_train = texts[train]
         classes_train = classes[train]
         texts_test = texts[test]
         classes_test = classes[test]
         n = Ensemble(texts_train, classes_train, nn_params, models)
         predictions = n.classify(texts_test)
         predictions[predictions<0] = 0
         
         auc = calculate_auc(classes_test, predictions)
         print auc
         auc_sum += auc
         
         for i in range(len(texts_test)):
             if abs(classes_test[i] - predictions[i]) > 0.5:
                 wrongs.append((classes_test[i], predictions[i], texts_test[i]))
         
     '''
     import csv
     writer = open('wrongs.csv', 'w')
     for w in wrongs:
         writer.write('%s,%s,%s\n' % w)
     writer.close()
     '''
     
     return auc_sum / folds
示例#19
0
print('\n___PARTITIONS 2___')
partitions2 = np.transpose(partitions)
ensemble2 = Ensemble(partitions=partitions2, n_cluster=3, partitions_format='PE')
e2, ts2, pr2 = ensemble2.mcla(times=True, partial_results=True)
for t in ts2:
  print(f'{t[0]}: {t[1]}s')
for r in pr2:
  print(r[0])
  print(r[1])
"""

print('\n___PARTITIONS 3___')
partitions3 = np.random.randint(8, size=(8, 100000))
ensemble3 = Ensemble(partitions=partitions3,
                     n_cluster=8,
                     partitions_format='PE')
e3, ts3, _pr3 = ensemble3.mcla(times=True)
for t in ts3:
    print(f'{t[0]}: {t[1]}s')
"""
hypergraph4 = np.array([
  [1, 1, 1, 0, 0, 0, 0],
  [0, 0, 0, 1, 1, 0, 0],
  [0, 0, 0, 0, 0, 1, 1],
  [0, 0, 0, 0, 0, 1, 1],
  [1, 1, 1, 0, 0, 0, 0],
  [0, 0, 0, 1, 1, 0, 0],
  [1, 1, 0, 0, 0, 0, 0],
  [0, 0, 1, 1, 0, 0, 0],
  [0, 0, 0, 0, 1, 1, 1],
示例#20
0
def main():
    input_dir = "/amit/kaggle/tgs"
    output_dir = "/artifacts"
    image_size_target = 128
    batch_size = 32
    epochs_to_train = 300
    bce_loss_weight_gamma = 0.98
    sgdr_min_lr = 0.0001  # 0.0001, 0.001
    sgdr_max_lr = 0.001  # 0.001, 0.03
    sgdr_cycle_epochs = 20
    sgdr_cycle_epoch_prolongation = 3
    sgdr_cycle_end_patience = 3
    train_abort_epochs_without_improval = 30
    ensemble_model_count = 3
    swa_epoch_to_start = 30

    model_dir = sys.argv[1] if len(sys.argv) > 1 else None

    train_data = TrainData(input_dir)

    train_set = TrainDataset(train_data.train_set_df, image_size_target, augment=True)
    train_set_data_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=8)

    val_set = TrainDataset(train_data.val_set_df, image_size_target, augment=False)
    val_set_data_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False, num_workers=2)

    if model_dir:
        model = create_model(pretrained=False).to(device)
        model.load_state_dict(torch.load("{}/model.pth".format(model_dir), map_location=device))
    else:
        model = create_model(pretrained=True).to(device)

    torch.save(model.state_dict(), "{}/model.pth".format(output_dir))

    swa_model = create_model(pretrained=False).to(device)

    print("train_set_samples: %d, val_set_samples: %d" % (len(train_set), len(val_set)))

    global_val_precision_best_avg = float("-inf")
    global_swa_val_precision_best_avg = float("-inf")
    sgdr_cycle_val_precision_best_avg = float("-inf")

    epoch_iterations = len(train_set) // batch_size

    # optimizer = optim.SGD(model.parameters(), lr=sgdr_max_lr, weight_decay=0, momentum=0.9, nesterov=True)
    optimizer = optim.Adam(model.parameters(), lr=sgdr_max_lr)
    lr_scheduler = CosineAnnealingLR(optimizer, T_max=sgdr_cycle_epochs, eta_min=sgdr_min_lr)

    optim_summary_writer = SummaryWriter(log_dir="{}/logs/optim".format(output_dir))
    train_summary_writer = SummaryWriter(log_dir="{}/logs/train".format(output_dir))
    val_summary_writer = SummaryWriter(log_dir="{}/logs/val".format(output_dir))
    swa_val_summary_writer = SummaryWriter(log_dir="{}/logs/swa_val".format(output_dir))

    sgdr_iterations = 0
    sgdr_reset_count = 0
    batch_count = 0
    epoch_of_last_improval = 0
    sgdr_next_cycle_end_epoch = sgdr_cycle_epochs + sgdr_cycle_epoch_prolongation
    swa_update_count = 0

    ensemble_model_index = 0
    for model_file_path in glob.glob("{}/model-*.pth".format(output_dir)):
        model_file_name = os.path.basename(model_file_path)
        model_index = int(model_file_name.replace("model-", "").replace(".pth", ""))
        ensemble_model_index = max(ensemble_model_index, model_index + 1)

    print('{"chart": "best_val_precision", "axis": "epoch"}')
    print('{"chart": "val_precision", "axis": "epoch"}')
    print('{"chart": "val_loss", "axis": "epoch"}')
    print('{"chart": "sgdr_reset", "axis": "epoch"}')
    print('{"chart": "precision", "axis": "epoch"}')
    print('{"chart": "loss", "axis": "epoch"}')
    print('{"chart": "swa_val_precision", "axis": "epoch"}')
    print('{"chart": "swa_val_loss", "axis": "epoch"}')

    train_start_time = time.time()

    criterion = nn.BCEWithLogitsLoss()

    for epoch in range(epochs_to_train):
        epoch_start_time = time.time()
        model.train()

        train_loss_sum = 0.0
        train_precision_sum = 0.0
        train_step_count = 0
        for batch in train_set_data_loader:
            images, masks, mask_weights = \
                batch[0].to(device, non_blocking=True), \
                batch[1].to(device, non_blocking=True), \
                batch[2].to(device, non_blocking=True)

            lr_scheduler.step(epoch=min(sgdr_cycle_epochs, sgdr_iterations / epoch_iterations))

            optimizer.zero_grad()
            prediction_logits = model(images)
            predictions = torch.sigmoid(prediction_logits)
            criterion.weight = mask_weights
            loss = criterion(prediction_logits, masks)
            loss.backward()
            optimizer.step()

            train_loss_sum += loss.item()
            train_precision_sum += np.mean(precision_batch(predictions, masks))
            sgdr_iterations += 1
            train_step_count += 1
            batch_count += 1

            optim_summary_writer.add_scalar("lr", get_learning_rate(optimizer), batch_count + 1)

        train_loss_avg = train_loss_sum / train_step_count
        train_precision_avg = train_precision_sum / train_step_count

        val_loss_avg, val_precision_avg = evaluate(model, val_set_data_loader, criterion)

        model_improved_within_sgdr_cycle = val_precision_avg > sgdr_cycle_val_precision_best_avg
        if model_improved_within_sgdr_cycle:
            torch.save(model.state_dict(), "{}/model-{}.pth".format(output_dir, ensemble_model_index))
            sgdr_cycle_val_precision_best_avg = val_precision_avg

        model_improved = val_precision_avg > global_val_precision_best_avg
        ckpt_saved = False
        if model_improved:
            torch.save(model.state_dict(), "{}/model.pth".format(output_dir))
            global_val_precision_best_avg = val_precision_avg
            ckpt_saved = True

        swa_model_improved = False
        if epoch + 1 >= swa_epoch_to_start:
            if model_improved_within_sgdr_cycle:
                swa_update_count += 1
                moving_average(swa_model, model, 1.0 / swa_update_count)
                bn_update(train_set_data_loader, swa_model)

            swa_model_improved = val_precision_avg > global_swa_val_precision_best_avg
            if swa_model_improved:
                torch.save(swa_model.state_dict(), "{}/swa_model.pth".format(output_dir))
                global_swa_val_precision_best_avg = val_precision_avg

        if model_improved or swa_model_improved:
            epoch_of_last_improval = epoch

        sgdr_reset = False
        if (epoch + 1 >= sgdr_next_cycle_end_epoch) and (epoch - epoch_of_last_improval >= sgdr_cycle_end_patience):
            sgdr_iterations = 0
            sgdr_next_cycle_end_epoch = epoch + 1 + sgdr_cycle_epochs + sgdr_cycle_epoch_prolongation
            ensemble_model_index += 1
            sgdr_cycle_val_precision_best_avg = float("-inf")
            sgdr_reset_count += 1
            sgdr_reset = True

        swa_val_loss_avg, swa_val_precision_avg = evaluate(swa_model, val_set_data_loader, criterion)

        optim_summary_writer.add_scalar("sgdr_reset", sgdr_reset_count, epoch + 1)

        train_summary_writer.add_scalar("loss", train_loss_avg, epoch + 1)
        train_summary_writer.add_scalar("precision", train_precision_avg, epoch + 1)

        val_summary_writer.add_scalar("loss", val_loss_avg, epoch + 1)
        val_summary_writer.add_scalar("precision", val_precision_avg, epoch + 1)

        swa_val_summary_writer.add_scalar("loss", swa_val_loss_avg, epoch + 1)
        swa_val_summary_writer.add_scalar("precision", swa_val_precision_avg, epoch + 1)

        epoch_end_time = time.time()
        epoch_duration_time = epoch_end_time - epoch_start_time

        print(
            "[%03d/%03d] %ds, lr: %.6f, loss: %.3f, val_loss: %.3f|%.3f, prec: %.3f, val_prec: %.3f|%.3f, ckpt: %d, rst: %d" % (
                epoch + 1,
                epochs_to_train,
                epoch_duration_time,
                get_learning_rate(optimizer),
                train_loss_avg,
                val_loss_avg,
                swa_val_loss_avg,
                train_precision_avg,
                val_precision_avg,
                swa_val_precision_avg,
                int(ckpt_saved),
                int(sgdr_reset)),
            flush=True)

        print('{"chart": "best_val_precision", "x": %d, "y": %.3f}' % (epoch + 1, global_val_precision_best_avg))
        print('{"chart": "val_precision", "x": %d, "y": %.3f}' % (epoch + 1, val_precision_avg))
        print('{"chart": "val_loss", "x": %d, "y": %.3f}' % (epoch + 1, val_loss_avg))
        print('{"chart": "sgdr_reset", "x": %d, "y": %.3f}' % (epoch + 1, sgdr_reset_count))
        print('{"chart": "precision", "x": %d, "y": %.3f}' % (epoch + 1, train_precision_avg))
        print('{"chart": "loss", "x": %d, "y": %.3f}' % (epoch + 1, train_loss_avg))
        print('{"chart": "swa_val_precision", "x": %d, "y": %.3f}' % (epoch + 1, swa_val_precision_avg))
        print('{"chart": "swa_val_loss", "x": %d, "y": %.3f}' % (epoch + 1, swa_val_loss_avg))

        if sgdr_reset and sgdr_reset_count >= ensemble_model_count and epoch - epoch_of_last_improval >= train_abort_epochs_without_improval:
            print("early abort")
            break

    optim_summary_writer.close()
    train_summary_writer.close()
    val_summary_writer.close()

    train_end_time = time.time()
    print()
    print("Train time: %s" % str(datetime.timedelta(seconds=train_end_time - train_start_time)))

    eval_start_time = time.time()

    print()
    print("evaluation of the training model")

    model.load_state_dict(torch.load("{}/model.pth".format(output_dir), map_location=device))

    analyze(Ensemble([model]), train_data.val_set_df, use_tta=False)
    analyze(Ensemble([model]), train_data.val_set_df, use_tta=True)

    score_to_model = {}
    ensemble_model_candidates = glob.glob("{}/model-*.pth".format(output_dir))
    ensemble_model_candidates.append("{}/swa_model.pth".format(output_dir))
    for model_file_path in ensemble_model_candidates:
        model_file_name = os.path.basename(model_file_path)
        m = create_model(pretrained=False).to(device)
        m.load_state_dict(torch.load(model_file_path, map_location=device))
        val_loss_avg, val_precision_avg = evaluate(m, val_set_data_loader, criterion)
        print("ensemble '%s': val_loss=%.3f, val_precision=%.3f" % (model_file_name, val_loss_avg, val_precision_avg))
        if len(score_to_model) < ensemble_model_count or min(score_to_model.keys()) < val_precision_avg:
            del score_to_model[min(score_to_model.keys())]
            score_to_model[val_precision_avg] = m

    ensemble_models = list(score_to_model.values())
    for ensemble_model in ensemble_models:
        val_loss_avg, val_precision_avg = evaluate(ensemble_model, val_set_data_loader, criterion)
        print("ensemble: val_loss=%.3f, val_precision=%.3f" % (val_loss_avg, val_precision_avg))

    model = Ensemble(ensemble_models)
    mask_threshold_global, mask_threshold_per_cc = analyze(model, train_data.val_set_df, use_tta=True)

    eval_end_time = time.time()
    print()
    print("Eval time: %s" % str(datetime.timedelta(seconds=eval_end_time - eval_start_time)))

    print()
    print("submission preparation")

    submission_start_time = time.time()

    test_data = TestData(input_dir)
    calculate_predictions(test_data.df, model, use_tta=True)
    calculate_prediction_masks(test_data.df, mask_threshold_global)

    print()
    print(test_data.df.groupby("predictions_cc").agg({"predictions_cc": "count"}))

    write_submission(test_data.df, "prediction_masks", "{}/{}".format(output_dir, "submission.csv"))
    write_submission(test_data.df, "prediction_masks_best", "{}/{}".format(output_dir, "submission_best.csv"))

    submission_end_time = time.time()
    print()
    print("Submission time: %s" % str(datetime.timedelta(seconds=submission_end_time - submission_start_time)))
示例#21
0

@child('e3')
def i2(x):
    return x**2


@child('e3')
def i3(x, y):
    return x**3 + y


if __name__ == '__main__':

    # create our first ensemble and give it a name
    e1 = Ensemble('e1')
    # create a second ensemble
    e2 = Ensemble('e2')

    # you may use the ensembles as long as you specify which model you use
    print(e1(child='f', x=2))
    print(e1(child='g', y=3))
    print(e2(child='f', x=2))

    # try to use model `g` but it's not in ensemble `e2`
    try:
        print(e2(child='g', y=3))
    except ValueError:
        pass

    # try to use model `h` but it's not decorated with @model
示例#22
0
            model = resnet101()
            num_ftrs = model.fc.in_features
            model.fc = nn.Linear(num_ftrs, 1)

        if latest_model_path != "":
            model.load_state_dict(torch.load(latest_model_path))

        model.cuda()

        # Set parameters for model
        criterion = Loss(Wt1, Wt0)
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                               mode='min',
                                                               patience=1,
                                                               verbose=True)

        # Train model
        model = train_model(model, criterion, optimizer, dataloaders,
                            scheduler, dataset_sizes, epochs - current_epoch,
                            costs, accs, num_ID, model_type)

    # For testing ensemble model
    else:
        model = Ensemble("models/best_model_dense_4.pth",
                         "models/best_model_res_12.pth",
                         "models/best_model_vgg_19.pth")
        model.cuda()
        criterion = Loss(Wt1, Wt0)
        test_acc, test_loss = test_ensemble_mean(model, criterion, dataloaders,
                                                 dataset_sizes)
示例#23
0
#     classes.append(int(row[0]))
# #results = n.classify(texts)
# #results[results<0] = 0
# #print calculate_auc(classes, results)
# r1 = np.array(m1.classify(texts))
# print calculate_auc(classes, r1)
# r2 = np.array(m2.classify(texts))
# print calculate_auc(classes, r2)
# r = (1.2*r1 + 0.8*r2) / 2
# r[r>1] = 1
# r[r<0] = 0
# print calculate_auc(classes, r)

#print TestSVM.test_model(texts, classes, models[-1])
#print TestSVM.test(texts, classes, models, nn_params)
n = Ensemble(texts, classes, nn_params, models)
end = time.time()
# print "training time="
# print end-start

start = time.time()

# evaluate the classfier on verification dataset
texts = []
inp = raw_input()
while inp:
    texts.append(inp.decode('utf8'))
    inp = raw_input()

results = n.classify(texts)
示例#24
0
    positives = sum(1 for label in labels if label)
    predicted_positives = sum(1 for pred in preds if pred)
    true_positives = sum(1 for label, pred in zip(labels, preds)
                         if label and pred)
    return 100.0 * true_positives / predicted_positives, 100.0 * true_positives / positives


def evaluate(model):
    def wrapper(dataset):
        preds = [model(x=x) for x, _ in dataset]
        precision, recall = get_results(dataset, preds)
        return {
            'precision': f'{precision:.1f}%',
            'recall': f'{recall:.1f}%',
        }

    return wrapper


if __name__ == '__main__':
    e = Ensemble('ensemble', children=[model1, model2], mode='all')
    results = Ensemble('results', children=[model1, model2, e])
    results.decorate_children(evaluate)
    print(results)
    pprint(results(dataset=get_dataset()))
"""
{'ensemble': {'precision': '100.0%', 'recall': '100.0%'},
 'model1': {'precision': '18.2%', 'recall': '100.0%'},
 'model2': {'precision': '30.0%', 'recall': '100.0%'}}
"""
示例#25
0
        print(
            "[epoch: {:d}] avg train_loss: {:.3f}   eval ll: {:.3f}   ({:.1f}s)"
            .format(epoch,
                    sum(losses) / len(losses), eval_ll,
                    time.time() - tic))

        print("running test code")
        name = "sample_" + str(epoch) + ".txt"
        test_code(model, name=name)

        print("ran test code")


if __name__ == '__main__':
    import args
    from model import Model
    from ensemble import Ensemble

    if method == 'ensemble':
        model = Ensemble(vectors).to(device)
    else:
        model = Model(vectors).to(device)

    train(model)
    # import dill
    # with open('model.p', 'rb') as h:
    #     model = dill.load(h)

    # visualize_attn(model)
示例#26
0
    experiment_path = "./experiments/ensemble/"
    data_path = "./datasets/paper_ultimate_conv/"
    cf_data = imp.load_source("cf_data",
                              data_path + "cfg_testing_data_creation.py")

    nets = []
    net_paths = glob.glob(experiment_path + "*/")
    # Load the networks
    for net_path in net_paths:
        net = NetworkConvDropout()
        net.init(29, 29, 13, 134, 135)
        net.load_parameters(open_h5file(net_path + "net.net"))
        n_out = net.n_out
        nets.append(net)

    ensemble_net = Ensemble(nets)

    # Load the scaler
    scaler = pickle.load(open(net_paths[0] + "s.scaler", "rb"))

    # Files on which to evaluate the network
    file_list = list_miccai_files(**{
        "mode": "folder",
        "path": "./datasets/miccai/2/"
    })
    n_files = len(file_list)

    # Options for the generation of the dataset
    # The generation/evaluation of the dataset has to be split into batches as a whole brain does not fit into memory
    batch_size = 100000
    select_region = SelectWholeBrain()
示例#27
0
 def __init__(self):
     self.ensemble = Ensemble(instruments)
示例#28
0
from saveobject import save_obj

N = 100
steps = 1000
repeat = 30
res = 0.01
b1 = 0
b2 = 3

B = np.arange(b1, b2, res)
B = B[B != 0]
B = 1 / B

M = np.array([0])

ensemble = Ensemble(N, B, M, steps, repeat, False)
ensemble.getStats()

beta = ensemble.beta
mu = ensemble.mu

stats = ensemble.stats

save_obj(stats, "stats8")

# keys = ["energy","magnetization","population","entropy"]
# def calcStats(size,beta,mu,steps,times):
# 	global keys
# 	stats = {}
# 	arr = {}
# 	for key in keys:
示例#29
0
    'max_depth': 6,
    'n_estimators': 1000,
    'learning_rate': 0.025,
    'subsample': 0.9
}

models = {
    "LGB-1": LGBMClassifier(**lgb_params),
    "XGB-1": XGBClassifier(**xgb_params),
    "LGB-2": LGBMClassifier(**lgb_params2),
    #"LGB-3": LGBMClassifier(**lgb_params3),
    "XGB-2": XGBClassifier(**xgb_params2),
    #"CAT": CatBoostClassifier(**cat_params),
    #"GBM": GradientBoostingClassifier(**gb_params),
    #"RF": RandomForestClassifier(**rf_params),
    #"ET": ExtraTreesClassifier(**et_params),
    #"ABC": AdaBoostClassifier(n_estimators=100),
}

start = time.time()
stack = Ensemble(4,
                 models.values(),
                 stacker=SGDClassifier(loss="log", max_iter=1000))
y_pred = stack.fit_predict(X, y, X_test)
print("Finished ensembling in %.2f seconds" % (time.time() - start))

sub = pd.DataFrame()
sub['id'] = id_test
sub['target'] = y_pred
sub.to_csv("%s.csv" % ("-".join(models.keys())), index=False)
示例#30
0
def main():
    system = Ensemble(3, 3, lim=(30, 30), T=300)
    run(5000, ensemble=system)