示例#1
0
    def __init__(self, sourceFile, targetFile):
        self.SDataBufferArr = None  #2D array representation of self.SDataBuffer
        self.SDataLabels = None
        self.TDataBufferArr = None  #2D array representation of self.TDataBuffer
        self.TDataLabels = None

        self.useKliepCVSigma = Properties.useKliepCVSigma

        self.kliep = None

        self.useSvmCVParams = Properties.useSvmCVParams

        self.ensemble = Ensemble(Properties.ENSEMBLE_SIZE)

        self.initialWindowSize = int(Properties.INITIAL_DATA_SIZE)
        self.maxWindowSize = int(Properties.MAX_WINDOW_SIZE)

        self.enableForceUpdate = int(Properties.enableForceUpdate)
        self.forceUpdatePeriod = int(Properties.forceUpdatePeriod)
        """
		- simulate source and target streams from corresponding files.
		"""
        print("Reading the Source Dataset")
        self.source = Stream(sourceFile, Properties.INITIAL_DATA_SIZE)
        print("Reading the Target Dataset")
        self.target = Stream(targetFile, Properties.INITIAL_DATA_SIZE)
        print("Finished Reading the Target Dataset")

        Properties.MAXVAR = self.source.initialData.shape[0]
示例#2
0
def worker(fold, n_users, n_items, dataset_dir):
    traFilePath = dataset_dir + 'ratings__' + str(fold + 1) + '_tra.txt'
    trasR = lil_matrix(
        matBinarize(loadSparseR(n_users, n_items, traFilePath),
                    binarize_threshold))

    print(
        dataset_dir.split('/')[-2] + '@%d:' % (fold + 1), trasR.shape,
        trasR.nnz, '%.2f' % (trasR.nnz / float(trasR.shape[0])))

    tstFilePath = dataset_dir + 'ratings__' + str(fold + 1) + '_tst.txt'
    tstsR = lil_matrix(
        matBinarize(loadSparseR(n_users, n_items, tstFilePath),
                    binarize_threshold))

    sampler = Sampler(trasR=trasR, batch_size=batch_size)

    en = Ensemble(n_users, n_items, kensemble, topN, split_method,
                  eval_metrics, reg, n_factors, batch_size)
    scores = en.train(fold + 1, trasR, tstsR, sampler)

    print(
        dataset_dir.split('/')[-2] + '@%d:' % (fold + 1),
        ','.join(['%s' % eval_metric for eval_metric in eval_metrics]) +
        '@%d=' % (topN) + ','.join(['%.6f' % (score) for score in scores]))

    en.close()
    return scores
示例#3
0
def main():
    print(args)

    print("=> creating model '{}'".format(args.arch))
    model = Ensemble()
    model = torch.nn.DataParallel(model).cuda()
    print(model)
    
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
    std=[0.229, 0.224, 0.225])
    test_data = datautil.SceneDataset(args.data,img_transform=
                                            transforms.Compose([
                                                transforms.Resize((args.img_size,args.img_size)),
                                                transforms.ToTensor(),
                                                normalize]))
    test_loader = torch.utils.data.DataLoader(test_data,batch_size=args.batch_size,shuffle=False,num_workers=4,pin_memory=True)
    checkpoint = torch.load(args.test_model)
    model.load_state_dict(checkpoint['state_dict'])
    #model.load_state_dict(checkpoint)
    if os.path.isdir(args.data):
        ret = test(test_loader,model)
        imgs = [i[:-4] for i in os.listdir(args.data)]
        with open('result3_.csv', 'w') as f:
            '''
            f.write(','.join(['FILE_ID','CATEGORY_ID'])+'\n')
            f.write('\n'.join([','.join([str(a),str(b)]) for a,b in zip(imgs,ret)]))
            '''
            #FILE_ID,CATEGORY_ID0,CATEGORY_ID1,CATEGORY_ID2
            f.write(','.join(['FILE_ID','CATEGORY_ID0','CATEGORY_ID1','CATEGORY_ID2'])+'\n')
            f.write('\n'.join([','.join([str(a)]+[str(int(i)) for i in b]) for a,b in zip(imgs,ret)]))
    else:
        test_labeled(test_loader,model)
示例#4
0
def main():
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    DATASET_DIRECTORY = '../data_part1'

    X, y, X_hidden = dataset_manip.load_dataset(DATASET_DIRECTORY)
    num_classes = len(set(y))

    print('X.shape = ' + str(X.shape))
    print('X_hidden.shape = ' + str(X_hidden.shape))

    ens = Ensemble(input_shape=(77, 71, 1),
                   num_classes=10,
                   num_models=11,
                   batch_size=512,
                   path='./ensemble_files',
                   load=False)
    ens.train(X=X, y=y, epochs_per_model=300, split_rate=0.9)
    print(ens.measure_accuracy(X, y))

    return
    X_train, X_validation, y_train, y_validation = dataset_manip.split_dataset(
        X, y, rate=0.5)

    model = Model(image_shape=X.shape[1:],
                  num_classes=num_classes,
                  model_path='./model_files/model',
                  batch_size=512,
                  first_run=True)  # 1250

    model.train(X_train, y_train, X_validation, y_validation, 500)
    model.train_unsupervised(X_hidden, X_validation, y_validation, 200)

    print('Final Accuracy: {}'.format(
        model.measure_accuracy(X_validation, y_validation)))
示例#5
0
    def __init__(self, sourceFile, targetFile):
        self.SWindow = []
        self.TWindow = []
        self.TPredictWindow = []

        self.SDataBuffer = []  #Queue
        self.TDataBuffer = []  #Queue

        self.SInitialDataBuffer = []
        self.TInitialDataBuffer = []

        self.changeDetector = ChangeDetection(Properties.GAMMA,
                                              Properties.SENSITIVITY,
                                              Properties.MAX_WINDOW_SIZE)
        self.ensemble = Ensemble(Properties.ENSEMBLE_SIZE)

        classNameList = []
        self.source = Stream(sourceFile, classNameList,
                             Properties.INITIAL_DATA_SIZE)
        self.target = Stream(targetFile, classNameList,
                             Properties.INITIAL_DATA_SIZE)

        Properties.MAXVAR = self.source.MAXVAR

        self.gateway = JavaGateway(
            start_callback_server=True,
            gateway_parameters=GatewayParameters(port=Properties.PY4JPORT),
            callback_server_parameters=CallbackServerParameters(
                port=Properties.PY4JPORT + 1))
        self.app = self.gateway.entry_point
示例#6
0
    def iterpose(self, rmsd=0.0001):

        confs = self._confs.copy()
        Ensemble.iterpose(self, rmsd)
        self._confs = confs
        LOGGER.info("Final superposition to calculate transformations.")
        self.superpose()
示例#7
0
    def delCoordset(self, index):
        """Delete a coordinate set from the ensemble."""

        Ensemble.delCoordset(self, index)
        if isinstance(index, int):
            index = [index]
        else:
            index = list(index)
        index.sort(reverse=True)
        for i in index:
            self._labels.pop(i)
示例#8
0
def process_all_images(config):
    filenames = sorted(config.files)
    tables = {}
    segmentations = {}
    ensembles = {}
    for segmentation in config.segmentations:
        segmentations[segmentation] = {}
    ensembles['walker_binary'] = {}
    ensembles['opt'] = {}
    erosions = {}
    methods = {'unet': {}, 'walker_binary': {}, 'opt': {}}
    for key in ['jac', 'af1', 'merge_rate', 'split_rate']:
        tables[key] = pd.DataFrame(columns=list(methods.keys()), copy=True)
    os.makedirs(config.output, exist_ok=True)
    root_dir = os.path.join(config.output, config.filename)
    if os.path.exists(root_dir):
        shutil.rmtree(root_dir)
    os.makedirs(root_dir, exist_ok=True)
    counter = 0
    for file in filenames:
        if counter % 50 == 0:
            print(counter)
        if counter == config.counter:
            break
        annot_path = os.path.join(config.annot, file.strip())
        annot = skimage.io.imread(annot_path, as_gray=True)
        for segmentation in segmentations.keys():
            path = os.path.join(config.root, segmentation, file.strip())
            segmentations[segmentation]['orig'] = skimage.io.imread(
                path, as_gray=True)
            segmentations[segmentation][
                'results'] = comp.get_per_image_metrics(
                    annot, segmentations[segmentation]['orig'], False)
            segmentations[segmentation]['mask'] = np.where(
                segmentations[segmentation]['orig'] > 0, 255, 0)
        for ensemble in ensembles.keys():
            ensembles[ensemble]['orig'] = Ensemble(
                segmentations, config.erosions, config.beta).ensemble(ensemble)
            ensembles[ensemble]['results'] = comp.get_per_image_metrics(
                annot, ensembles[ensemble]['orig'], False)
            ensembles[ensemble]['mask'] = np.where(
                ensembles[ensemble]['orig'] > 0, 255, 0)
        for key in tables.keys():
            results = {}
            # for segmentation in segmentations.keys():
            #     results[segmentation] = segmentations[segmentation]['results'][key]
            avg = statistics.mean([
                segmentations[segmentation]['results'][key]
                for segmentation in segmentations
            ])
            results['unet'] = avg
            for ensemble in ensembles.keys():
                if ensemble != 'union':
                    results[ensemble] = ensembles[ensemble]['results'][key]
            tables[key] = tables[key].append(results, ignore_index=True)
        counter += 1

    os.makedirs(os.path.join(root_dir, 'stats'), exist_ok=True)
    output_charts(tables, list(methods.keys()),
                  os.path.join(root_dir, 'stats'), config)
示例#9
0
async def main():
    this_dir = os.path.dirname(os.path.abspath(__file__))
    input_path = os.path.join(this_dir, "input.txt")
    with open(input_path) as f:
        raw_code = f.readline()
        e = Ensemble(raw_code)
        await e.run()
示例#10
0
文件: test.py 项目: felipegnunes/tcv3
def main():
    #print(sys.argv)
    test_set_path = sys.argv[1]
    output_file_path = sys.argv[2]

    X_test = dataset_manip.load_images(load_directory(test_set_path)) / 255

    #model = Model(image_shape = (77, 71, 1), num_classes = 10, model_path = './model_files/model', batch_size = 512, first_run = False)
    #dataset_manip.store_predictions(dataset_manip.get_filenames(test_set_path), model.predict(X_test), output_file_path)

    ens = Ensemble(input_shape=(77, 71, 1),
                   num_classes=10,
                   num_models=11,
                   batch_size=512,
                   path='./ensemble_files',
                   load=True)
    dataset_manip.store_predictions(dataset_manip.get_filenames(test_set_path),
                                    ens.predict(X_test), output_file_path)
示例#11
0
    def __init__(self,
                 selected_algorithms='all',
                 selected_hyperparameters='default',
                 ensemble_size=3,
                 ensemble_method='Logit',
                 error_matrix_values='default',
                 verbose=True):
        """instantiates an AutoLearner object """

        self.error_matrix = ErrorMatrix(selected_algorithms,
                                        selected_hyperparameters,
                                        ensemble_size, error_matrix_values,
                                        verbose)
        """error matrix defined for specific dataset"""

        self.ensemble = Ensemble(ensemble_size=ensemble_size,
                                 ensemble_method=ensemble_method,
                                 verbose=verbose)
        """instantiate empty ensemble object"""
示例#12
0
def create_song(graph_attributes={
    'graph_type': 'Small World',
    'average_degree': 4,
    'rewiring_prob': 0.3
},
                number_players=20,
                number_time_steps=300,
                tempo=108,
                player_attributes=None):
    """
    arguments:
        graph_type : 'Small World', 'Random', 'Configuration', 'Structured'
        average_degree
        number_of_players
        rewiring_prob
        number_time_steps
        tempo
        player_attributes: {
            duration: (min_duration, max_duration)
            note_change_choices: 'All', 'Neighbors of Neighbors'
            harmonicity threshold: 'Fixed' or 'Moving Average'
                fixed threshold
                moving average threshold
            susceptibility to influence
        }
    """

    graph_type = graph_attributes['graph_type']

    #create player graph
    if graph_type == 'Small World':
        #assert rewiring_prob
        G = nx.watts_strogatz_graph(number_players,
                                    graph_attributes['average_degree'],
                                    graph_attributes['rewiring_prob'])
    elif graph_type == 'Random':
        pass
    elif graph_type == 'Structured':
        pass

    #add starting pitch to node
    starting_pitches = {i: 'random' for i in range(len(G))}
    nx.set_node_attributes(G, starting_pitches, 'starting_pitch')

    #create ensembel object5
    ensemble = Ensemble(G, player_attributes)

    #evolve ensemble
    ensemble.evolve(number_time_steps)

    #show pitch history
    pitch_history_data = ensemble.get_pitch_history_data()
    harmonicity_data = ensemble.get_harmonicity_data()

    #create file
    filename = create_midi_file(ensemble, tempo)
    create_data_file(filename.replace('.mid', '.txt'), pitch_history_data,
                     harmonicity_data)

    return filename, pitch_history_data, harmonicity_data
示例#13
0
 def test(texts, classes, models, nn_params, folds=4):
     '''
         Check the performance on an SVM implementation,
         given a list of texts and their classes (negative/neutral/positive)
         Uses k-fold cross-validation (keeping in mind to divide the data
         appropriately, depending on the class)
     '''
     classes = np.array(classes)
     texts = np.array(texts)
     
     wrongs = []
     auc_sum = 0
     
     for train, test in cross_validation.StratifiedKFold(classes, folds):
         texts_train = texts[train]
         classes_train = classes[train]
         texts_test = texts[test]
         classes_test = classes[test]
         n = Ensemble(texts_train, classes_train, nn_params, models)
         predictions = n.classify(texts_test)
         predictions[predictions<0] = 0
         
         auc = calculate_auc(classes_test, predictions)
         print auc
         auc_sum += auc
         
         for i in range(len(texts_test)):
             if abs(classes_test[i] - predictions[i]) > 0.5:
                 wrongs.append((classes_test[i], predictions[i], texts_test[i]))
         
     '''
     import csv
     writer = open('wrongs.csv', 'w')
     for w in wrongs:
         writer.write('%s,%s,%s\n' % w)
     writer.close()
     '''
     
     return auc_sum / folds
示例#14
0
def ensemble_test():
    ATOM_NUM = 2

    paricles = [Particle() for i in range(ATOM_NUM)]

    ensemble = Ensemble(paricles)

    print("位置の配列\n", ensemble.positions)

    ensemble.positions += 1  #全体に+1
    print("+1\n", ensemble.positions)

    ensemble.positions *= 2  #全体に*2
    print("*2\n", ensemble.positions)

    ensemble.positions *= np.array([1, 2, 3])  #x*1, y*2, z*3
    print("x*1, y*2, z*3\n", ensemble.positions)

    ensemble.positions = np.ones((ensemble.N, 3)) * 100  #100にセット
    print("=100\n", ensemble.positions)

    print("速度の配列\n", ensemble.velocities)
示例#15
0
def run(ncyc,
        N=1,
        lim=(20, 20),
        T=300,
        ensemble=None,
        animation=False,
        dframe=0.001):
    #initialize system
    time_total = 0  #total "time" of the system
    time_pulse = 0  #time of the pulse
    if ensemble == None:
        ensemble = Ensemble(N, lim, T)
        ensemble.Plot("Initial Configuration")
    else:
        ensemble.Plot("Initial Configuration")
    #start simulation
    y = [ensemble.Energy_Total()]
    start_time = time.time()
    #for i in trange(ncyc):
    for i in range(ncyc):
        if (i in list(range(0, ncyc, int(ncyc / 20)))):
            print("{0} cycles: {1} s".format(i, time.time() - start_time))
        ensemble.Cycle(time_pulse=time_pulse, time_total=time_total)
        y.append(ensemble.Energy_Total())
        time_total += dt
        if time_pulse + dt > 120:
            time_pulse += dt - 120
        else:
            time_pulse += dt
    print("Elapsed time:", time.time() - start_time, "(s)")
    print("Initial energy:", y[0], "(J)")
    print("Final energy:", y[-1], "(J)")
    print("Average energy:", ensemble.Average(), "(J)")

    ensemble.Plot("Final Configuration")
    x = range(0, len(y))
    fig, ax = plt.subplots(figsize=(20, 10))
    plt.plot(x, y)
    plt.xlim([0, ncyc])
    plt.ylim([min(y), max(y)])
    plt.title("Total Energy vs. Cycle")
    return ensemble
示例#16
0
def get_unique_model():
    xg = xgb.XGBRegressor(n_estimators=200,
                          learning_rate=0.02,
                          gamma=0,
                          subsample=0.75,
                          colsample_bytree=1,
                          max_depth=6)
    en = ElasticNet(l1_ratio=0.95, alpha=0.15, max_iter=50000)
    ada = AdaBoostRegressor(learning_rate=0.01,
                            loss='square',
                            n_estimators=100)
    lr = Ilbeom_Linear()

    lst = [xg, en, ada, lr]

    return Ensemble(lst)
    def __init__(self, saved_model: str = None):
        """Create a new object.

        Args:
            - saved_model (str optional): load a pre-treined model if `saved_name` is not None

        """
        super().__init__()

        # Creating a XGBoost model for stacking
        xgb_params = {}
        xgb_params['learning_rate'] = 0.01
        xgb_params['n_estimators'] = 750
        xgb_params['max_depth'] = 6
        xgb_params['colsample_bytree'] = 0.6
        xgb_params['min_child_weight'] = 0.6
        xgb_model = XGBClassifier(**xgb_params)

        # Creating a random forest model for stacking
        rf_params = {}
        rf_params['n_estimators'] = 200
        rf_params['max_depth'] = 6
        rf_params['min_samples_split'] = 70
        rf_params['min_samples_leaf'] = 30
        rf_model = RandomForestClassifier(**rf_params)

        # Creating a Logist Regression model to act as a stacker of other base models
        log_model = LogisticRegression()

        # Creating the stack
        stack = Ensemble(n_splits=3,
                         stacker=log_model,
                         base_models=(rf_model, xgb_model))

        # To use as a prefix of model and processed dataset
        self.datetime_prefix = datetime.datetime.now().replace(
            microsecond=0).isoformat().replace(':', '-')

        # Loads a saved model or create a new one
        if saved_model:
            self.model_name = saved_model
        else:
            self.model_name = self.datetime_prefix + '_fraud_ensemble.bin'

        # The final model
        self.model = stack
        print('Model: {}'.format(self.model_name))
示例#18
0
def main():
    ATOM_NUM = 1
    CYCLE_NUM = 50

    paricles = [Particle() for i in range(ATOM_NUM)]

    ensemble = Ensemble(paricles)
    ensemble.positions += 2

    print("初期位置\n", ensemble.positions)
    print("初速度\n", ensemble.velocities)

    myfield = Field(ensemble, dt=0.01)
    for i in range(CYCLE_NUM):
        myfield.update()

        print("t:", myfield.dt * (i + 1))
        print("x:", myfield.ensemble.positions)
        print("v:", myfield.ensemble.velocities)
        print()
def main():
    # Dataset path
    dataset_name = ['credit_card_clients_balanced', 'credit_card_clients']

    for data_name in dataset_name:
        dataset_path = os.getcwd() + "\\dataset\\" + data_name + ".csv"
        dataset = pd.read_csv(dataset_path, encoding='utf-8')

        # Datasets columns
        data_x = dataset[[
            'X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7', 'X8', 'X9', 'X10', 'X11',
            'X12', 'X13', 'X14', 'X15', 'X16', 'X17', 'X18', 'X19', 'X20',
            'X21', 'X22', 'X23'
        ]]
        data_y = dataset['Y']

        # Preprocessing data
        min_max_scaler = preprocessing.MinMaxScaler()
        X_normalized = min_max_scaler.fit_transform(data_x)

        acc_rate = []
        reject_rate = []

        # Runs to test the model
        for i in range(20):
            print('---------------- Ensemble -----------------')
            print('--- MLP - SVM - KNN - GMM - Naive Bayes ---')
            print(i + 1, 'of 20 iterations')
            X_train, X_test, y_train, y_test = train_test_split(X_normalized,
                                                                data_y,
                                                                test_size=0.2)
            y_train = np.array(y_train)
            y_test = np.array(y_test)

            model = Ensemble()
            model.train(X_train, y_train, gridSearch=False)
            y_hat = model.predict(X_test)

            error, reject = model.evaluate(y_hat, y_test)
            acc_rate.append(1 - error)
            reject_rate.append(reject)

        graphics(acc_rate, reject_rate, data_name)
示例#20
0
def main():
    input_dir = "/amit/kaggle/tgs"
    output_dir = "/artifacts"
    image_size_target = 128
    batch_size = 32
    epochs_to_train = 300
    bce_loss_weight_gamma = 0.98
    sgdr_min_lr = 0.0001  # 0.0001, 0.001
    sgdr_max_lr = 0.001  # 0.001, 0.03
    sgdr_cycle_epochs = 20
    sgdr_cycle_epoch_prolongation = 3
    sgdr_cycle_end_patience = 3
    train_abort_epochs_without_improval = 30
    ensemble_model_count = 3
    swa_epoch_to_start = 30

    model_dir = sys.argv[1] if len(sys.argv) > 1 else None

    train_data = TrainData(input_dir)

    train_set = TrainDataset(train_data.train_set_df, image_size_target, augment=True)
    train_set_data_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=8)

    val_set = TrainDataset(train_data.val_set_df, image_size_target, augment=False)
    val_set_data_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False, num_workers=2)

    if model_dir:
        model = create_model(pretrained=False).to(device)
        model.load_state_dict(torch.load("{}/model.pth".format(model_dir), map_location=device))
    else:
        model = create_model(pretrained=True).to(device)

    torch.save(model.state_dict(), "{}/model.pth".format(output_dir))

    swa_model = create_model(pretrained=False).to(device)

    print("train_set_samples: %d, val_set_samples: %d" % (len(train_set), len(val_set)))

    global_val_precision_best_avg = float("-inf")
    global_swa_val_precision_best_avg = float("-inf")
    sgdr_cycle_val_precision_best_avg = float("-inf")

    epoch_iterations = len(train_set) // batch_size

    # optimizer = optim.SGD(model.parameters(), lr=sgdr_max_lr, weight_decay=0, momentum=0.9, nesterov=True)
    optimizer = optim.Adam(model.parameters(), lr=sgdr_max_lr)
    lr_scheduler = CosineAnnealingLR(optimizer, T_max=sgdr_cycle_epochs, eta_min=sgdr_min_lr)

    optim_summary_writer = SummaryWriter(log_dir="{}/logs/optim".format(output_dir))
    train_summary_writer = SummaryWriter(log_dir="{}/logs/train".format(output_dir))
    val_summary_writer = SummaryWriter(log_dir="{}/logs/val".format(output_dir))
    swa_val_summary_writer = SummaryWriter(log_dir="{}/logs/swa_val".format(output_dir))

    sgdr_iterations = 0
    sgdr_reset_count = 0
    batch_count = 0
    epoch_of_last_improval = 0
    sgdr_next_cycle_end_epoch = sgdr_cycle_epochs + sgdr_cycle_epoch_prolongation
    swa_update_count = 0

    ensemble_model_index = 0
    for model_file_path in glob.glob("{}/model-*.pth".format(output_dir)):
        model_file_name = os.path.basename(model_file_path)
        model_index = int(model_file_name.replace("model-", "").replace(".pth", ""))
        ensemble_model_index = max(ensemble_model_index, model_index + 1)

    print('{"chart": "best_val_precision", "axis": "epoch"}')
    print('{"chart": "val_precision", "axis": "epoch"}')
    print('{"chart": "val_loss", "axis": "epoch"}')
    print('{"chart": "sgdr_reset", "axis": "epoch"}')
    print('{"chart": "precision", "axis": "epoch"}')
    print('{"chart": "loss", "axis": "epoch"}')
    print('{"chart": "swa_val_precision", "axis": "epoch"}')
    print('{"chart": "swa_val_loss", "axis": "epoch"}')

    train_start_time = time.time()

    criterion = nn.BCEWithLogitsLoss()

    for epoch in range(epochs_to_train):
        epoch_start_time = time.time()
        model.train()

        train_loss_sum = 0.0
        train_precision_sum = 0.0
        train_step_count = 0
        for batch in train_set_data_loader:
            images, masks, mask_weights = \
                batch[0].to(device, non_blocking=True), \
                batch[1].to(device, non_blocking=True), \
                batch[2].to(device, non_blocking=True)

            lr_scheduler.step(epoch=min(sgdr_cycle_epochs, sgdr_iterations / epoch_iterations))

            optimizer.zero_grad()
            prediction_logits = model(images)
            predictions = torch.sigmoid(prediction_logits)
            criterion.weight = mask_weights
            loss = criterion(prediction_logits, masks)
            loss.backward()
            optimizer.step()

            train_loss_sum += loss.item()
            train_precision_sum += np.mean(precision_batch(predictions, masks))
            sgdr_iterations += 1
            train_step_count += 1
            batch_count += 1

            optim_summary_writer.add_scalar("lr", get_learning_rate(optimizer), batch_count + 1)

        train_loss_avg = train_loss_sum / train_step_count
        train_precision_avg = train_precision_sum / train_step_count

        val_loss_avg, val_precision_avg = evaluate(model, val_set_data_loader, criterion)

        model_improved_within_sgdr_cycle = val_precision_avg > sgdr_cycle_val_precision_best_avg
        if model_improved_within_sgdr_cycle:
            torch.save(model.state_dict(), "{}/model-{}.pth".format(output_dir, ensemble_model_index))
            sgdr_cycle_val_precision_best_avg = val_precision_avg

        model_improved = val_precision_avg > global_val_precision_best_avg
        ckpt_saved = False
        if model_improved:
            torch.save(model.state_dict(), "{}/model.pth".format(output_dir))
            global_val_precision_best_avg = val_precision_avg
            ckpt_saved = True

        swa_model_improved = False
        if epoch + 1 >= swa_epoch_to_start:
            if model_improved_within_sgdr_cycle:
                swa_update_count += 1
                moving_average(swa_model, model, 1.0 / swa_update_count)
                bn_update(train_set_data_loader, swa_model)

            swa_model_improved = val_precision_avg > global_swa_val_precision_best_avg
            if swa_model_improved:
                torch.save(swa_model.state_dict(), "{}/swa_model.pth".format(output_dir))
                global_swa_val_precision_best_avg = val_precision_avg

        if model_improved or swa_model_improved:
            epoch_of_last_improval = epoch

        sgdr_reset = False
        if (epoch + 1 >= sgdr_next_cycle_end_epoch) and (epoch - epoch_of_last_improval >= sgdr_cycle_end_patience):
            sgdr_iterations = 0
            sgdr_next_cycle_end_epoch = epoch + 1 + sgdr_cycle_epochs + sgdr_cycle_epoch_prolongation
            ensemble_model_index += 1
            sgdr_cycle_val_precision_best_avg = float("-inf")
            sgdr_reset_count += 1
            sgdr_reset = True

        swa_val_loss_avg, swa_val_precision_avg = evaluate(swa_model, val_set_data_loader, criterion)

        optim_summary_writer.add_scalar("sgdr_reset", sgdr_reset_count, epoch + 1)

        train_summary_writer.add_scalar("loss", train_loss_avg, epoch + 1)
        train_summary_writer.add_scalar("precision", train_precision_avg, epoch + 1)

        val_summary_writer.add_scalar("loss", val_loss_avg, epoch + 1)
        val_summary_writer.add_scalar("precision", val_precision_avg, epoch + 1)

        swa_val_summary_writer.add_scalar("loss", swa_val_loss_avg, epoch + 1)
        swa_val_summary_writer.add_scalar("precision", swa_val_precision_avg, epoch + 1)

        epoch_end_time = time.time()
        epoch_duration_time = epoch_end_time - epoch_start_time

        print(
            "[%03d/%03d] %ds, lr: %.6f, loss: %.3f, val_loss: %.3f|%.3f, prec: %.3f, val_prec: %.3f|%.3f, ckpt: %d, rst: %d" % (
                epoch + 1,
                epochs_to_train,
                epoch_duration_time,
                get_learning_rate(optimizer),
                train_loss_avg,
                val_loss_avg,
                swa_val_loss_avg,
                train_precision_avg,
                val_precision_avg,
                swa_val_precision_avg,
                int(ckpt_saved),
                int(sgdr_reset)),
            flush=True)

        print('{"chart": "best_val_precision", "x": %d, "y": %.3f}' % (epoch + 1, global_val_precision_best_avg))
        print('{"chart": "val_precision", "x": %d, "y": %.3f}' % (epoch + 1, val_precision_avg))
        print('{"chart": "val_loss", "x": %d, "y": %.3f}' % (epoch + 1, val_loss_avg))
        print('{"chart": "sgdr_reset", "x": %d, "y": %.3f}' % (epoch + 1, sgdr_reset_count))
        print('{"chart": "precision", "x": %d, "y": %.3f}' % (epoch + 1, train_precision_avg))
        print('{"chart": "loss", "x": %d, "y": %.3f}' % (epoch + 1, train_loss_avg))
        print('{"chart": "swa_val_precision", "x": %d, "y": %.3f}' % (epoch + 1, swa_val_precision_avg))
        print('{"chart": "swa_val_loss", "x": %d, "y": %.3f}' % (epoch + 1, swa_val_loss_avg))

        if sgdr_reset and sgdr_reset_count >= ensemble_model_count and epoch - epoch_of_last_improval >= train_abort_epochs_without_improval:
            print("early abort")
            break

    optim_summary_writer.close()
    train_summary_writer.close()
    val_summary_writer.close()

    train_end_time = time.time()
    print()
    print("Train time: %s" % str(datetime.timedelta(seconds=train_end_time - train_start_time)))

    eval_start_time = time.time()

    print()
    print("evaluation of the training model")

    model.load_state_dict(torch.load("{}/model.pth".format(output_dir), map_location=device))

    analyze(Ensemble([model]), train_data.val_set_df, use_tta=False)
    analyze(Ensemble([model]), train_data.val_set_df, use_tta=True)

    score_to_model = {}
    ensemble_model_candidates = glob.glob("{}/model-*.pth".format(output_dir))
    ensemble_model_candidates.append("{}/swa_model.pth".format(output_dir))
    for model_file_path in ensemble_model_candidates:
        model_file_name = os.path.basename(model_file_path)
        m = create_model(pretrained=False).to(device)
        m.load_state_dict(torch.load(model_file_path, map_location=device))
        val_loss_avg, val_precision_avg = evaluate(m, val_set_data_loader, criterion)
        print("ensemble '%s': val_loss=%.3f, val_precision=%.3f" % (model_file_name, val_loss_avg, val_precision_avg))
        if len(score_to_model) < ensemble_model_count or min(score_to_model.keys()) < val_precision_avg:
            del score_to_model[min(score_to_model.keys())]
            score_to_model[val_precision_avg] = m

    ensemble_models = list(score_to_model.values())
    for ensemble_model in ensemble_models:
        val_loss_avg, val_precision_avg = evaluate(ensemble_model, val_set_data_loader, criterion)
        print("ensemble: val_loss=%.3f, val_precision=%.3f" % (val_loss_avg, val_precision_avg))

    model = Ensemble(ensemble_models)
    mask_threshold_global, mask_threshold_per_cc = analyze(model, train_data.val_set_df, use_tta=True)

    eval_end_time = time.time()
    print()
    print("Eval time: %s" % str(datetime.timedelta(seconds=eval_end_time - eval_start_time)))

    print()
    print("submission preparation")

    submission_start_time = time.time()

    test_data = TestData(input_dir)
    calculate_predictions(test_data.df, model, use_tta=True)
    calculate_prediction_masks(test_data.df, mask_threshold_global)

    print()
    print(test_data.df.groupby("predictions_cc").agg({"predictions_cc": "count"}))

    write_submission(test_data.df, "prediction_masks", "{}/{}".format(output_dir, "submission.csv"))
    write_submission(test_data.df, "prediction_masks_best", "{}/{}".format(output_dir, "submission_best.csv"))

    submission_end_time = time.time()
    print()
    print("Submission time: %s" % str(datetime.timedelta(seconds=submission_end_time - submission_start_time)))
示例#21
0

@child('e3')
def i2(x):
    return x**2


@child('e3')
def i3(x, y):
    return x**3 + y


if __name__ == '__main__':

    # create our first ensemble and give it a name
    e1 = Ensemble('e1')
    # create a second ensemble
    e2 = Ensemble('e2')

    # you may use the ensembles as long as you specify which model you use
    print(e1(child='f', x=2))
    print(e1(child='g', y=3))
    print(e2(child='f', x=2))

    # try to use model `g` but it's not in ensemble `e2`
    try:
        print(e2(child='g', y=3))
    except ValueError:
        pass

    # try to use model `h` but it's not decorated with @model
示例#22
0
    def __str__(self):

        return "PDB" + Ensemble.__str__(self)
示例#23
0
    def __repr__(self):

        return "<PDB" + Ensemble.__repr__(self)[1:]
示例#24
0
    def __init__(self, title="Unknown"):

        self._labels = []
        Ensemble.__init__(self, title)
        self._trans = None
示例#25
0
class Manager(object):
    def __init__(self, sourceFile, targetFile):
        self.SDataBufferArr = None  #2D array representation of self.SDataBuffer
        self.SDataLabels = None
        self.TDataBufferArr = None  #2D array representation of self.TDataBuffer
        self.TDataLabels = None

        self.useKliepCVSigma = Properties.useKliepCVSigma

        self.kliep = None

        self.useSvmCVParams = Properties.useSvmCVParams

        self.ensemble = Ensemble(Properties.ENSEMBLE_SIZE)

        self.initialWindowSize = int(Properties.INITIAL_DATA_SIZE)
        self.maxWindowSize = int(Properties.MAX_WINDOW_SIZE)

        self.enableForceUpdate = int(Properties.enableForceUpdate)
        self.forceUpdatePeriod = int(Properties.forceUpdatePeriod)
        """
		- simulate source and target streams from corresponding files.
		"""
        print("Reading the Source Dataset")
        self.source = Stream(sourceFile, Properties.INITIAL_DATA_SIZE)
        print("Reading the Target Dataset")
        self.target = Stream(targetFile, Properties.INITIAL_DATA_SIZE)
        print("Finished Reading the Target Dataset")

        Properties.MAXVAR = self.source.initialData.shape[0]

    """
	Detect drift on a given data stream.
	Returns the change point index on the stream array.
	"""

    def __detectDrift(self, slidingWindow, flagStream):
        changePoint = -1
        if flagStream == 0:
            changePoint = self.changeDetector.detectSourceChange(slidingWindow)
        elif flagStream == 1:
            changePoint = self.changeDetector.detectTargetChange(slidingWindow)
        else:
            raise Exception('flagStream var has value ' + str(flagStream) +
                            ' that is not supported.')
        return changePoint

    """
	Write value (accuracy or confidence) to a file with DatasetName as an identifier.
	"""

    def __saveResult(self, acc, datasetName):
        with open(datasetName + '_' + Properties.OUTFILENAME, 'a') as f:
            f.write(str(acc) + "\n")
        f.close()

    def convListOfDictToNDArray(self, listOfDict):
        arrayRep = []
        if not listOfDict:
            return arrayRep
        arrayRep = np.array([[float(v)] for k, v in listOfDict[0].items()
                             if k != -1])
        for i in range(1, len(listOfDict)):
            arrayRep = np.append(arrayRep,
                                 np.array([[float(v)]
                                           for k, v in listOfDict[i].items()
                                           if k != -1]),
                                 axis=1)
        return arrayRep

    def collectLabels(self, listOfDict):
        labels = []
        for d in listOfDict:
            labels.append(str(d[-1]))
        return labels

    """
	The main method handling multistream classification using KLIEP.
	"""

    def startFusion(self, datasetName, probFromSource):
        #save the timestamp
        globalStartTime = time.time()
        Properties.logger.info('Global Start Time: ' +
                               datetime.datetime.fromtimestamp(globalStartTime)
                               .strftime('%Y-%m-%d %H:%M:%S'))
        #open files for saving accuracy and confidence
        fAcc = open(datasetName + '_' + Properties.OUTFILENAME, 'w')
        fConf = open(
            datasetName + '_confidence' + '_' + Properties.OUTFILENAME, 'w')
        #initialize gaussian models
        gmOld = gm.GaussianModel()
        gmUpdated = gm.GaussianModel()
        #variable to track forceupdate period
        idxLastUpdate = 0

        #Get data buffer
        self.SDataBufferArr = self.source.initialData
        self.SDataLabels = self.source.initialDataLabels

        self.TDataBufferArr = self.target.initialData

        #first choose a suitable value for sigma
        self.kliep = Kliep(Properties.kliepParEta, Properties.kliepParLambda,
                           Properties.kliepParB, Properties.kliepParThreshold,
                           Properties.kliepDefSigma)
        #self.kliep = Kliep(Properties.kliepParEta, Properties.kliepParLambda, Properties.kliepParB, Properties.MAXVAR*Properties.kliepParThreshold, Properties.kliepDefSigma)

        if self.useKliepCVSigma == 1:
            self.kliep.kliepDefSigma = self.kliep.chooseSigma(
                self.SDataBufferArr, self.TDataBufferArr)

        #calculate alpha values
        #self.kliep.kliepDefSigma = 0.1
        Properties.logger.info('Estimating initial DRM')
        gmOld.alphah, kernelMatSrcData, kernelMatTrgData, gmOld.refPoints = self.kliep.KLIEP(
            self.SDataBufferArr, self.TDataBufferArr)
        #initialize the updated gaussian model
        gmUpdated.setAlpha(gmOld.alphah)
        gmUpdated.setRefPoints(gmOld.refPoints)
        #now resize the windows appropriately
        self.SDataBufferArr = self.SDataBufferArr[:,
                                                  -Properties.MAX_WINDOW_SIZE:]
        self.SDataLabels = self.SDataLabels[-Properties.MAX_WINDOW_SIZE:]

        self.TDataBufferArr = self.TDataBufferArr[:,
                                                  -Properties.MAX_WINDOW_SIZE:]

        kernelMatSrcData = kernelMatSrcData[-Properties.MAX_WINDOW_SIZE:, :]
        kernelMatTrgData = kernelMatTrgData[-Properties.MAX_WINDOW_SIZE:, :]
        #meanDistSrcData = self.kliep.colWiseMeanTransposed(kernelMatSrcData)

        Properties.logger.info('Initializing Ensemble with the first model')
        #target model
        #first calculate weight for source instances
        weightSrcData = self.kliep.calcInstanceWeights(kernelMatSrcData,
                                                       gmUpdated.alphah)
        #since weightSrcData is a column matrix, convert it to a list before sending to generating new model
        SDataBufferArrTransposed = self.SDataBufferArr.T
        TDataBufferArrTransposed = self.TDataBufferArr.T

        if self.useSvmCVParams == 1:
            params = {'gamma': [2**2, 2**-16], 'C': [2**-6, 2**15]}
            svr = svm.SVC()
            opt = grid_search.GridSearchCV(svr, params)
            opt.fit(SDataBufferArrTransposed.tolist(), self.SDataLabels)
            optParams = opt.best_params_

            self.ensemble.generateNewModelKLIEP(SDataBufferArrTransposed,
                                                self.SDataLabels,
                                                TDataBufferArrTransposed,
                                                weightSrcData[0].tolist(),
                                                optParams['C'],
                                                optParams['gamma'])
        else:
            self.ensemble.generateNewModelKLIEP(
                SDataBufferArrTransposed.tolist(), self.SDataLabels,
                TDataBufferArrTransposed.tolist(), weightSrcData[0].tolist(),
                Properties.svmDefC, Properties.svmDefGamma,
                Properties.svmKernel)

        Properties.logger.info(self.ensemble.getEnsembleSummary())

        sDataIndex = 0
        tDataIndex = 0
        trueTargetNum = 0
        targetConfSum = 0
        #enoughInstToUpdate is used to see if there are enough instances in the windows to
        #estimate the weights

        Properties.logger.info(
            'Starting MultiStream Classification with FUSION')
        while self.target.data.shape[1] > tDataIndex:
            """
			if source stream is not empty, do proper sampling. Otherwise, just take
			the new instance from the target isntance.
			"""
            if self.source.data.shape[1] > sDataIndex:
                fromSource = random.uniform(0, 1) < probFromSource
            else:
                print("\nsource stream sampling not possible")
                fromSource = False

            if fromSource:
                # Source Stream: '.' means sampling from source
                print('.', end="")
                #print("Source data index: ", sDataIndex)
                #print("\nlen(self.SDataBufferList) = ", len(self.SDataBufferList), ": source window slides")
                #remove the first instance, and add the new instance in the buffers
                newSrcDataArr = self.source.data[:, sDataIndex][np.newaxis].T
                self.SDataBufferArr = self.SDataBufferArr[:, 1:]
                self.SDataLabels = self.SDataLabels[1:]
                kernelMatSrcData = kernelMatSrcData[1:, :]
                #add new instance to the buffers
                self.SDataBufferArr = np.append(self.SDataBufferArr,
                                                newSrcDataArr,
                                                axis=1)
                self.SDataLabels.append(self.source.dataLabels[sDataIndex])

                #update kernelMatSrcData
                dist_tmp = np.power(
                    np.tile(newSrcDataArr, (1, gmUpdated.refPoints.shape[1])) -
                    gmUpdated.refPoints, 2)
                dist_2 = np.sum(dist_tmp, axis=0, dtype='float64')
                kernelSDataNewFromRefs = np.exp(
                    -dist_2 / (2 * math.pow(self.kliep.kliepDefSigma, 2)),
                    dtype='float64')
                kernelMatSrcData = np.append(
                    kernelMatSrcData,
                    kernelSDataNewFromRefs[np.newaxis],
                    axis=0)

                #print("Satisfying the constrains.")
                gmUpdated.alphah, kernelMatSrcData = self.kliep.satConstraints(
                    self.SDataBufferArr, self.TDataBufferArr,
                    gmUpdated.refPoints, gmUpdated.alphah, kernelMatSrcData)
                sDataIndex += 1
            else:
                # Target Stream
                print('#', end="")  # '#' indicates new point from target
                newTargetDataArr = self.target.data[:,
                                                    tDataIndex][np.newaxis].T
                # get Target Accuracy on the new instance
                resTarget = self.ensemble.evaluateEnsembleKLIEP(
                    np.reshape(newTargetDataArr, (1, -1)))
                if isinstance(resTarget[0], float) and abs(
                        resTarget[0] -
                        self.target.dataLabels[tDataIndex]) < 0.0001:
                    trueTargetNum += 1
                elif resTarget[0] == self.target.dataLabels[tDataIndex]:
                    trueTargetNum += 1
                acc = float(trueTargetNum) / (tDataIndex + 1)
                if (tDataIndex % 100) == 0:
                    Properties.logger.info('\nTotal test instance: ' +
                                           str(tDataIndex + 1) +
                                           ', correct: ' + str(trueTargetNum) +
                                           ', accuracy: ' + str(acc))
                fAcc.write(str(acc) + "\n")

                conf = resTarget[1]  # confidence
                # save confidence
                targetConfSum += conf
                fConf.write(
                    str(float(targetConfSum) / (tDataIndex + 1)) + "\n")

                #update alpha, and satisfy constraints
                #print("Update alpha and satisfy constrains")
                gmUpdated.alphah, kernelMatSrcData = self.kliep.updateAlpha(
                    self.SDataBufferArr, self.TDataBufferArr, newTargetDataArr,
                    gmUpdated.refPoints, gmUpdated.alphah, kernelMatSrcData)

                #print("\nlen(self.TDataBufferList) = ", len(self.TDataBufferList), ": target window slides")
                #remove the first instance from buffers
                self.TDataBufferArr = self.TDataBufferArr[:, 1:]
                #update ref points
                gmUpdated.refPoints = gmUpdated.refPoints[:, 1:]
                # update kernelMatSrcData, as ref points has been updated
                kernelMatSrcData = kernelMatSrcData[:, 1:]
                # update kernelMatTrgData, as ref points has been updated
                kernelMatTrgData = kernelMatTrgData[1:, 1:]

                #update ref points
                gmUpdated.refPoints = np.append(gmUpdated.refPoints,
                                                newTargetDataArr,
                                                axis=1)

                #add to kernelMatSrcData for the last ref point
                dist_tmp = np.power(
                    np.tile(newTargetDataArr,
                            (1, self.SDataBufferArr.shape[1])) -
                    self.SDataBufferArr, 2)
                dist_2 = np.sum(dist_tmp, axis=0, dtype='float64')
                kernel_dist_2 = np.exp(
                    -dist_2 / (2 * math.pow(self.kliep.kliepDefSigma, 2)),
                    dtype='float64')
                kernelMatSrcData = np.append(kernelMatSrcData,
                                             kernel_dist_2[np.newaxis].T,
                                             axis=1)
                #now update kernelMatTrgData, as ref points has been updated
                #first add distance from the new ref points to all the target points
                dist_tmp = np.power(
                    np.tile(newTargetDataArr,
                            (1, self.TDataBufferArr.shape[1])) -
                    self.TDataBufferArr, 2)
                dist_2 = np.sum(dist_tmp, axis=0, dtype='float64')
                kernel_dist_2 = np.exp(
                    -dist_2 / (2 * math.pow(self.kliep.kliepDefSigma, 2)),
                    dtype='float64')
                kernelMatTrgData = np.append(kernelMatTrgData,
                                             kernel_dist_2[np.newaxis].T,
                                             axis=1)

                #now add distances for the newly added instance to all the ref points
                #add the new instance to the buffers
                self.TDataBufferArr = np.append(self.TDataBufferArr,
                                                newTargetDataArr,
                                                axis=1)

                dist_tmp = np.power(
                    np.tile(newTargetDataArr,
                            (1, gmUpdated.refPoints.shape[1])) -
                    gmUpdated.refPoints, 2)
                dist_2 = np.sum(dist_tmp, axis=0, dtype='float64')
                kernelTDataNewFromRefs = np.exp(
                    -dist_2 / (2 * math.pow(self.kliep.kliepDefSigma, 2)),
                    dtype='float64')
                kernelMatTrgData = np.append(
                    kernelMatTrgData,
                    kernelTDataNewFromRefs[np.newaxis],
                    axis=0)

                tDataIndex += 1

            #print "sDataIndex: ", str(sDataIndex), ", tDataIndex: ", str(tDataIndex)
            enoughInstToUpdate = self.SDataBufferArr.shape[
                1] >= Properties.kliepParB and self.TDataBufferArr.shape[
                    1] >= Properties.kliepParB
            if enoughInstToUpdate:
                #print("Enough points in source and target sliding windows. Attempting to detect any change of distribution.")
                changeDetected, changeScore, kernelMatTrgData = self.kliep.changeDetection(
                    self.TDataBufferArr, gmOld.refPoints, gmOld.alphah,
                    gmUpdated.refPoints, gmUpdated.alphah, kernelMatTrgData)
                #print("Change Score: ", changeScore)

            #instances from more than one class are needed for svm training
            if len(set(self.SDataLabels)) > 1 and (
                    changeDetected or
                (self.enableForceUpdate and
                 (tDataIndex + sDataIndex - idxLastUpdate) >
                 self.forceUpdatePeriod)
            ):  #or (tDataIndex>0 and (targetConfSum/tDataIndex)<0.1):
                fConf.write(str(7777777.0) + "\n")
                Properties.logger.info(
                    '\n-------------------------- Change of Distribution ------------------------------------'
                )
                Properties.logger.info('Change of distribution found')
                Properties.logger.info('sDataIndex=' + str(sDataIndex) +
                                       '\ttDataIndex=' + str(tDataIndex))
                Properties.logger.info('Change Detection Score: ' +
                                       str(changeScore) + ', Threshold: ' +
                                       str(self.kliep.kliepParThreshold))

                #Build a new model
                #First calculate the weights for each source instances
                gmOld.alphah, kernelMatSrcData, kernelMatTrgData, gmOld.refPoints = self.kliep.KLIEP(
                    self.SDataBufferArr, self.TDataBufferArr)
                #update the updated gaussian model as well
                gmUpdated.setAlpha(gmOld.alphah)
                gmUpdated.setRefPoints(gmOld.refPoints)

                weightSrcData = self.kliep.calcInstanceWeights(
                    kernelMatSrcData, gmUpdated.alphah)
                #Build a new model
                Properties.logger.info(
                    'Training a model due to change detection')
                SDataBufferArrTransposed = self.SDataBufferArr.T
                TDataBufferArrTransposed = self.TDataBufferArr.T
                if self.useSvmCVParams == 1:
                    params = {'gamma': [2**2, 2**-16], 'C': [2**-6, 2**15]}
                    svr = svm.SVC()
                    opt = grid_search.GridSearchCV(svr, params)
                    opt.fit(SDataBufferArrTransposed.tolist(),
                            self.SDataLabels)
                    optParams = opt.best_params_

                    self.ensemble.generateNewModelKLIEP(
                        SDataBufferArrTransposed.tolist(), self.SDataLabels,
                        TDataBufferArrTransposed.tolist(),
                        weightSrcData[0].tolist(), optParams['C'],
                        optParams['gamma'])
                else:
                    self.ensemble.generateNewModelKLIEP(
                        SDataBufferArrTransposed.tolist(), self.SDataLabels,
                        TDataBufferArrTransposed.tolist(),
                        weightSrcData[0].tolist(), Properties.svmDefC,
                        Properties.svmDefGamma, Properties.svmKernel)

                Properties.logger.info(self.ensemble.getEnsembleSummary())
                #update the idx
                idxLastUpdate = tDataIndex + sDataIndex
                changeDetected = False
                #keep the latest 1/4th of data and update the arrays and lists
                #Properties.logger.info('Updating source and target sliding windows')
                """
				In the target window, we want to keep (3x/4) instances, where x is the number of gaussian kernel centers,
				So that we will try for detecting change point again after (x/4) instances. Since there might be a diff
				between arrival rate in the source and target, we calculate number of points to retain in the source
				keeping that in mind.
				"""
                #numberOfPointsInTargetToRetain = Properties.kliepParB - int(((1-probFromSource)*3*Properties.kliepParB)/4)
                #numberOfPointsInSourceToRetain = Properties.kliepParB - int((probFromSource*3*Properties.kliepParB)/4)
        #save the timestamp
        fConf.close()
        fAcc.close()
        globalEndTime = time.time()
        Properties.logger.info('\nGlobal Start Time: ' +
                               datetime.datetime.fromtimestamp(globalEndTime).
                               strftime('%Y-%m-%d %H:%M:%S'))
        Properties.logger.info('Total Time Spent: ' +
                               str(globalEndTime - globalStartTime) +
                               ' seconds')
        Properties.logger.info('Done !!')
示例#26
0
print('\n___PARTITIONS 2___')
partitions2 = np.transpose(partitions)
ensemble2 = Ensemble(partitions=partitions2, n_cluster=3, partitions_format='PE')
e2, ts2, pr2 = ensemble2.mcla(times=True, partial_results=True)
for t in ts2:
  print(f'{t[0]}: {t[1]}s')
for r in pr2:
  print(r[0])
  print(r[1])
"""

print('\n___PARTITIONS 3___')
partitions3 = np.random.randint(8, size=(8, 100000))
ensemble3 = Ensemble(partitions=partitions3,
                     n_cluster=8,
                     partitions_format='PE')
e3, ts3, _pr3 = ensemble3.mcla(times=True)
for t in ts3:
    print(f'{t[0]}: {t[1]}s')
"""
hypergraph4 = np.array([
  [1, 1, 1, 0, 0, 0, 0],
  [0, 0, 0, 1, 1, 0, 0],
  [0, 0, 0, 0, 0, 1, 1],
  [0, 0, 0, 0, 0, 1, 1],
  [1, 1, 1, 0, 0, 0, 0],
  [0, 0, 0, 1, 1, 0, 0],
  [1, 1, 0, 0, 0, 0, 0],
  [0, 0, 1, 1, 0, 0, 0],
  [0, 0, 0, 0, 1, 1, 1],
示例#27
0
            model = resnet101()
            num_ftrs = model.fc.in_features
            model.fc = nn.Linear(num_ftrs, 1)

        if latest_model_path != "":
            model.load_state_dict(torch.load(latest_model_path))

        model.cuda()

        # Set parameters for model
        criterion = Loss(Wt1, Wt0)
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                               mode='min',
                                                               patience=1,
                                                               verbose=True)

        # Train model
        model = train_model(model, criterion, optimizer, dataloaders,
                            scheduler, dataset_sizes, epochs - current_epoch,
                            costs, accs, num_ID, model_type)

    # For testing ensemble model
    else:
        model = Ensemble("models/best_model_dense_4.pth",
                         "models/best_model_res_12.pth",
                         "models/best_model_vgg_19.pth")
        model.cuda()
        criterion = Loss(Wt1, Wt0)
        test_acc, test_loss = test_ensemble_mean(model, criterion, dataloaders,
                                                 dataset_sizes)
示例#28
0
#     classes.append(int(row[0]))
# #results = n.classify(texts)
# #results[results<0] = 0
# #print calculate_auc(classes, results)
# r1 = np.array(m1.classify(texts))
# print calculate_auc(classes, r1)
# r2 = np.array(m2.classify(texts))
# print calculate_auc(classes, r2)
# r = (1.2*r1 + 0.8*r2) / 2
# r[r>1] = 1
# r[r<0] = 0
# print calculate_auc(classes, r)

#print TestSVM.test_model(texts, classes, models[-1])
#print TestSVM.test(texts, classes, models, nn_params)
n = Ensemble(texts, classes, nn_params, models)
end = time.time()
# print "training time="
# print end-start

start = time.time()

# evaluate the classfier on verification dataset
texts = []
inp = raw_input()
while inp:
    texts.append(inp.decode('utf8'))
    inp = raw_input()

results = n.classify(texts)
示例#29
0
    positives = sum(1 for label in labels if label)
    predicted_positives = sum(1 for pred in preds if pred)
    true_positives = sum(1 for label, pred in zip(labels, preds)
                         if label and pred)
    return 100.0 * true_positives / predicted_positives, 100.0 * true_positives / positives


def evaluate(model):
    def wrapper(dataset):
        preds = [model(x=x) for x, _ in dataset]
        precision, recall = get_results(dataset, preds)
        return {
            'precision': f'{precision:.1f}%',
            'recall': f'{recall:.1f}%',
        }

    return wrapper


if __name__ == '__main__':
    e = Ensemble('ensemble', children=[model1, model2], mode='all')
    results = Ensemble('results', children=[model1, model2, e])
    results.decorate_children(evaluate)
    print(results)
    pprint(results(dataset=get_dataset()))
"""
{'ensemble': {'precision': '100.0%', 'recall': '100.0%'},
 'model1': {'precision': '18.2%', 'recall': '100.0%'},
 'model2': {'precision': '30.0%', 'recall': '100.0%'}}
"""
import csv
from data_present import Data
from ensemble import Ensemble
from sklearn.metrics import accuracy_score

counter = 892
data = Data()
submission = [['PassengerId', 'Survived']]

# Set up pred
ensemble = Ensemble(data)
ensemble.pred = map(int, ensemble.pred)

for entry in ensemble.pred:
	submission.append([counter, entry])
	counter += 1

with open('submission.csv', 'wb') as f:
    writer = csv.writer(f)
    for val in submission:
        writer.writerow(val)


示例#31
0
        print(
            "[epoch: {:d}] avg train_loss: {:.3f}   eval ll: {:.3f}   ({:.1f}s)"
            .format(epoch,
                    sum(losses) / len(losses), eval_ll,
                    time.time() - tic))

        print("running test code")
        name = "sample_" + str(epoch) + ".txt"
        test_code(model, name=name)

        print("ran test code")


if __name__ == '__main__':
    import args
    from model import Model
    from ensemble import Ensemble

    if method == 'ensemble':
        model = Ensemble(vectors).to(device)
    else:
        model = Model(vectors).to(device)

    train(model)
    # import dill
    # with open('model.p', 'rb') as h:
    #     model = dill.load(h)

    # visualize_attn(model)
示例#32
0
文件: main.py 项目: sky1307/ga
def reward_func(sigma_index_lst=[1, 2, 3],
                default_n=20,
                epoch_num=4,
                epoch_min=100,
                epoch_step=50):
    '''
    input
    sigma_lst - The component index from the ssa gene for example the gen [0, 1, 0] -> sigma_lst=[1] #the index where gen=1
    default_n - the window length for ssa - <= N /2 where N is the length of the time series - default 20
    epoch_num - The number of submodel used
    epoch_min - Min epoch of submodel
    epoch_step - number of epoch difference bw 2 submodels

    output
    a tuple contain 2 value (nse_q, nse_h)
    '''
    K.clear_session()

    with open('./settings/model/config.yaml', 'r') as f:
        config = yaml.load(f, Loader=yaml.FullLoader)
    # train
    model = Ensemble(mode='train',
                     model_kind='rnn_cnn',
                     sigma_lst=sigma_index_lst,
                     default_n=default_n,
                     epoch_num=epoch_num,
                     epoch_min=epoch_min,
                     epoch_step=epoch_step,
                     **config)
    model.train_model_outer()

    # test
    model = Ensemble(mode='test',
                     model_kind='rnn_cnn',
                     sigma_lst=sigma_index_lst,
                     default_n=default_n,
                     epoch_num=epoch_num,
                     epoch_min=epoch_min,
                     epoch_step=epoch_step,
                     **config)
    model.train_model_outer()
    model.retransform_prediction(mode='roll')
    return model.evaluate_model(mode='roll')
示例#33
0
    classes.append(int(row[0]))
#results = n.classify(texts)
#results[results<0] = 0
#print calculate_auc(classes, results)
r1 = m1.classify(texts)
print calculate_auc(classes, r1)
r2 = np.array(m2.classify(texts))
print calculate_auc(classes, r2)
r = (1.2*r1 + 0.8*r2) / 2
r[r>1] = 1
r[r<0] = 0
print calculate_auc(classes, r)
  
#print TestSVM.test_model(texts, classes, models[-1])
#print TestSVM.test(texts, classes, models, nn_params)
n = Ensemble(texts, classes, nn_params, models)



texts = []
csvr = csv.reader(open('test.csv', 'rb'), delimiter=',', quotechar='"')
csvr.next()
for row in csvr:
    texts.append(row[1].decode('utf8'))
results = n.classify(texts)


results[results<0] = 0
results[results>1] = 1
writer = open('rez.csv', 'w')
for r in results:
示例#34
0
 def __repr__(self):
     
     return '<PDB' + Ensemble.__repr__(self)[1:]
示例#35
0
 def __str__(self):
     
     return 'PDB' + Ensemble.__str__(self)
示例#36
0
plt.rcParams["figure.figsize"] = (14, 12)
plt.ticklabel_format(style='plain', useOffset=False)

#%%
#data = pd.read_csv('../tommi_test_data.csv', sep=";", header=0)
data = pd.read_csv('../tommi_test_data_more_diff_steps.csv', sep=";", header=0)

data = data.loc[data["Warning_code"] == 0]
data = data.reset_index(drop=True)

tforce_DF = DataHandler.calculateTotalForce(data)
step_t_DF = DataHandler.calculateStepTime(data)

#%% Bagging test

avg_acc, real_label, pred_label = Ensemble.testBagging(step_t_DF)

pred_label_df = pred_label
real_label_df = real_label

pred_label_df = pred_label_df.replace("Normal", 0)
pred_label_df = pred_label_df.replace("Fall", 1)

real_label_df = real_label_df.replace("Normal", 0)
real_label_df = real_label_df.replace("Fall", 1)

avg_auc = roc_auc_score(real_label_df, pred_label_df)
print("AUC score: ", round(avg_auc, 2))

#%% 2d scatter
from sklearn.decomposition import PCA
示例#37
0
from saveobject import save_obj

N = 100
steps = 1000
repeat = 30
res = 0.01
b1 = 0
b2 = 3

B = np.arange(b1, b2, res)
B = B[B != 0]
B = 1 / B

M = np.array([0])

ensemble = Ensemble(N, B, M, steps, repeat, False)
ensemble.getStats()

beta = ensemble.beta
mu = ensemble.mu

stats = ensemble.stats

save_obj(stats, "stats8")

# keys = ["energy","magnetization","population","entropy"]
# def calcStats(size,beta,mu,steps,times):
# 	global keys
# 	stats = {}
# 	arr = {}
# 	for key in keys:
示例#38
0
    'max_depth': 6,
    'n_estimators': 1000,
    'learning_rate': 0.025,
    'subsample': 0.9
}

models = {
    "LGB-1": LGBMClassifier(**lgb_params),
    "XGB-1": XGBClassifier(**xgb_params),
    "LGB-2": LGBMClassifier(**lgb_params2),
    #"LGB-3": LGBMClassifier(**lgb_params3),
    "XGB-2": XGBClassifier(**xgb_params2),
    #"CAT": CatBoostClassifier(**cat_params),
    #"GBM": GradientBoostingClassifier(**gb_params),
    #"RF": RandomForestClassifier(**rf_params),
    #"ET": ExtraTreesClassifier(**et_params),
    #"ABC": AdaBoostClassifier(n_estimators=100),
}

start = time.time()
stack = Ensemble(4,
                 models.values(),
                 stacker=SGDClassifier(loss="log", max_iter=1000))
y_pred = stack.fit_predict(X, y, X_test)
print("Finished ensembling in %.2f seconds" % (time.time() - start))

sub = pd.DataFrame()
sub['id'] = id_test
sub['target'] = y_pred
sub.to_csv("%s.csv" % ("-".join(models.keys())), index=False)
示例#39
0
 def __init__(self):
     self.ensemble = Ensemble(instruments)