Python loadDataset示例，utils.loadDataset Python示例

示例#1

0

显示文件

文件： three_step_optmz.py 项目： keepitsimple/ocrtest

def firstStep(metaOptimizer, smallTrainFilename, smallTestFilename):
    trainData, trainLabel = loadDataset(smallTrainFilename)
    testData, testLabel = loadDataset(smallTestFilename)
    metaOptimizer.initialize_optimizer(
        "random", None, trainData, trainLabel, testData, testLabel, jobs=-1, iterations=700, scoresCsvFilename=None
    )
    metaOptimizer.optimized = metaOptimizer.algorithm()

    sortedScores = sorted(
        [(mean_score, scores.std() / 2, params, scores) for params, mean_score, scores in optimized.grid_scores_],
        reverse=True,
    )

示例#2

0

显示文件

def mix(gans, num_real, num_synth):
    """
    :return: a Dataloader with num_real real and num_synth synthetic images
    """
    if num_real <= 0:
        return utils.gen_synth_data(gans, n_entries=num_synth)
    elif num_synth <= 0:
        return (utils.loadDataset(train_size=num_real, batch_size=100))[1]
    else:
        synth_data_loader = utils.gen_synth_data(gans, n_entries=num_synth)
        _, orig_data_loader = utils.loadDataset(train_size=num_real, batch_size=100)
        mixed_data_loader = torch.utils.data.DataLoader(torch.cat((orig_data_loader.dataset, synth_data_loader.dataset), 0),
                                                        batch_size=100, shuffle=True)
        return mixed_data_loader

示例#3

0

显示文件

def main(outputName):
    print("Welcome into RNN implementation, (recording will be on ",
          outputName, ")")

    random.seed("MetaMind")  # Lucky seed ? Fixed seed for replication
    np.random.seed(7)

    print("Parsing dataset, creating dictionary...")
    # Dictionary initialisation
    vocabulary.initVocab()

    # Loading dataset
    datasets = {}
    datasets['training'] = utils.loadDataset("trees/train.txt")
    print("Training loaded !")

    datasets['testing'] = utils.loadDataset("trees/test.txt")
    print("Testing loaded !")
    datasets['validating'] = utils.loadDataset("trees/dev.txt")
    print("Validation loaded !")

    print("Datasets loaded !")
    print("Nb of words", vocabulary.vocab.length())

    # Datatransform (normalisation, remove outliers,...) ?? > Not here

    # Grid search on our hyperparameters (too long for complete k-fold cross validation so just train/test)
    for mBS in miniBatchSize:
        for aRNI in adagradResetNbIter:
            for lR in learningRate:
                for rT in regularisationTerm:
                    params = {}
                    params["nbEpoch"] = nbEpoch
                    params["learningRate"] = lR
                    params["regularisationTerm"] = rT
                    params["adagradResetNbIter"] = aRNI
                    params["miniBatchSize"] = mBS
                    # No need to reset the vocabulary values (contained in model.L so automatically reset)
                    # Same for the training and testing set (output values recomputed at each iterations)
                    model = train.train(outputName, datasets, params)

    # TODO: Plot the cross-validation curve
    # TODO: Plot a heat map of the hyperparameters cost to help tunning them ?

    ## Validate on the last computed model (Only used for final training)
    #print("Training complete, validating...")
    #vaError = model.computeError(datasets['validating'], True)
    #print("Validation error: ", vaError)

    print("The End. Thank you for using this program!")

示例#4

0

显示文件

文件： main.py 项目： Conchylicultor/SentimentAnalysis

def main(outputName):
    print("Welcome into RNTN implementation 0.6 (recording will be on ", outputName, ")")
    
    random.seed("MetaMind") # Lucky seed ? Fixed seed for replication
    np.random.seed(7)
    
    print("Parsing dataset, creating dictionary...")
    # Dictionary initialisation
    vocabulary.initVocab()
    
    # Loading dataset
    datasets = {}
    datasets['training'] = utils.loadDataset("trees/train.txt");
    print("Training loaded !")
    datasets['testing'] = utils.loadDataset("trees/test.txt");
    print("Testing loaded !")
    datasets['validating'] = utils.loadDataset("trees/dev.txt");
    print("Validation loaded !")
    
    print("Datasets loaded !")
    print("Nb of words", vocabulary.vocab.length());
    
    # Datatransform (normalisation, remove outliers,...) ?? > Not here
    
    # Grid search on our hyperparameters (too long for complete k-fold cross validation so just train/test)
    for mBS in miniBatchSize:
        for aRNI in adagradResetNbIter:
            for lR in learningRate:
                for rT in regularisationTerm:
                    params = {}
                    params["nbEpoch"]            = nbEpoch
                    params["learningRate"]       = lR
                    params["regularisationTerm"] = rT
                    params["adagradResetNbIter"] = aRNI
                    params["miniBatchSize"]      = mBS
                    # No need to reset the vocabulary values (contained in model.L so automatically reset)
                    # Same for the training and testing set (output values recomputed at each iterations)
                    model, errors = train.train(outputName, datasets, params)

    # TODO: Plot the cross-validation curve
    # TODO: Plot a heat map of the hyperparameters cost to help tunning them ?

    ## Validate on the last computed model (Only used for final training)
    #print("Training complete, validating...")
    #vaError = model.computeError(datasets['validating'], True)
    #print("Validation error: ", vaError)

    print("The End. Thank you for using this program!")

示例#5

0

显示文件

def main(args):
    extractor = args.extractor
    classifier = args.classifier
    #print("Use {} is the feature extractor".format(extractor))
    #print("Use {} is the classifier".format(classifier))

    # Load dataset into memory
    dataset, labelset = loadDataset()
    # Feature extraction
    feature_vectors = featureExtraction(dataset=dataset, method=extractor)
    # Split dataset
    print("Step 3. Split dataset into training data and test data")
    print("\tSplitting data...")
    X_train, X_test, y_train, y_test = train_test_split(feature_vectors, labelset, test_size=TEST_SIZE)
    print("\tSplit dataset successfully !")
    print("\tThe size of training data: {}".format(len(X_train)))
    print("\tThe size of test data: {}".format(len(X_test)))

    # Training model using explicited classifier."
    model = trainModel(dataset=X_train, label=y_train, classifier=classifier)

    # Test model on test data.
    print("Step 5. Test model on test data")
    print("\tTesing model ...")
    accuary = model.score(X_test, y_test)
    print("\tThe accuary of model is {} %".format(accuary*100))

    # Save model
    print("Step 6. Save model into disk")
    print("\tSaving ...")
    #with open('model/{}_{}_{}.pkl'.format(extractor,classifier, datetime.datetime.today().strftime('%d-%m-%Y')), 'wb') as fid:
        #cPickle.dump(gnb, fid)
    with open('model/{}_{}.pkl'.format(extractor,classifier), 'wb') as fid:
        cPickle.dump(gnb, fid)
    print("\tSave model successully with name {}".format(classifier, datetime.datetime.today().strftime('%d-%m-%Y')))

示例#6

0

显示文件

文件： check_all_classifiers.py 项目： keepitsimple/ocrtest

def process_arguments():
    parser = argparse.ArgumentParser(description='Optimize set of different classifiers with meta-parameter optimization')
    parser.add_argument('train', help='Train dataset')
    parser.add_argument('test', help='Test dataset')
    parser.add_argument('modelDirectory', help='Directory to save best models')
    parser.add_argument('evaluationsFilename', help='Filename to save  models result')
    parser.add_argument('-t', '--type', default='grid', choices=['grid', 'random', 'pso'], help='Search type')
    parser.add_argument('-i', '--iterations', default=-1, type=int, help='Iterations amount for pso and random search')
    parser.add_argument('-j', '--jobs', default=-1, type=int, help='Processes amount for learning')

    args = parser.parse_args()
    trainData, trainLabel = loadDataset(args.train)
    testData, testLabel = loadDataset(args.test)

    ctp = ClassifierTestParams(args.type, args.iterations, args.modelDirectory, args.evaluationsFilename, trainData, trainLabel, testData, testLabel, args.jobs)
    return ctp

示例#7

0

显示文件

文件： self_learn.py 项目： dliud/gan

def main():
    real_size = 100  # number of real images per Dataloader to use at the beginning
    dataloaders, _ = utils.loadDataset(
        train_size=real_size,
        batch_size=25,
        image_path='./mnist/train-images-idx3-ubyte',
        label_path='./mnist/train-labels-idx1-ubyte')
    self_learn(dataloaders, train_size=real_size)

示例#8

0

显示文件

文件： train_gans.py 项目： ramshankar99/AugGan

def main(model_size=1, trial=2):
    num_gans = 1
    dataloaders = utils.loadDataset()

    #repeatTrain(dataloaders, trial = trial, epoch_len = 500, end = 2000, alpha = model_size)
    repeatTrain(dataloaders,
                trial=trial,
                epoch_len=50,
                end=100,
                alpha=model_size)
    """

示例#9

0

显示文件

def main(train_size, model_size, trial):
    num_gans = 10
    dataloaders, labeledDataLoader = utils.loadDataset(
        train_size=train_size,
        batch_size=25,
        image_path='./mnist/train-images-idx3-ubyte',
        label_path='./mnist/train-labels-idx1-ubyte')

    repeatTrain(dataloaders,
                trial=trial,
                epoch_len=500,
                end=2000,
                alpha=model_size)
    """

示例#10

0

显示文件

文件： test.py 项目： Conchylicultor/SentimentAnalysis

def main():
    print("Welcome into RNTN implementation 0.1")
    
    print("Loading dictionary...")
    # Dictionary initialisation
    vocabulary.initVocab(inputModel)
    
    # Loading dataset
    validationSet = utils.loadDataset("trees/dev.txt");
    print("Validation loaded !")
    
    # Creating the model
    model = rntnmodel.Model(inputModel)
            
    print("Computation validation...")
    vaError = model.computeError(validationSet, True)
    print("Validation error: ", vaError)

示例#11

0

显示文件

 def train_step(test_xs):
     batch_size = flags.batch_size
     # batch_size = 50
     n_epochs = flags.num_epoch
     mask = np.random.binomial(
         1, 1 - flags.corrupt_prob,
         (int(np.round(batch_size * flags.validation)) + 1, 225))
     # print(mask[:5])
     for epoch_i in range(n_epochs):
         # print dataset_train.shape[1] // batch_size
         datasets = utils.loadDataset(batch_size=batch_size,
                                      max=flags.max,
                                      dataset_dir=flags.datasetPath)
         f = 0
         for dataset in datasets:
             dataset_train, dataset_test = partition(dataset,
                                                     shuffle=False)
             mean_img = np.mean(dataset_train, axis=1)
             dataset_train = np.array(
                 [img - mean_img for img in dataset_train.T])
             dataset_train = dataset_train.T
             dataset_train_, dataset_train = corrupt(dataset_train,
                                                     mask=mask)
             _, score, step, summaries = sess.run(
                 [train_op, ae.score, global_step, train_summary_op],
                 feed_dict={
                     ae.x: dataset_train,
                     ae.x_: dataset_train_
                 })
             current_step = tf.train.global_step(sess, global_step)
             if current_step % 100 == 0:
                 print("epoch:{} step:{} score:{}".format(
                     epoch_i, step, score))
             train_summary_writer.add_summary(summaries, step)
             if current_step % 1000 == 0:
                 path = saver.save(sess,
                                   checkpoint_prefix,
                                   global_step=current_step)
                 print("Saved model checkpoint to {}\n".format(path))
         # score, step, summaries, output, W= sess.run([ae.score, global_step, dev_summary_op, ae.output, ae.encoder], feed_dict={
         #         ae.x: test_xs,
         #         ae.x_: test_xs})
         # print("evaluation:\nscore:{}".format(score))
     test_xs = np.asarray(test_xs)
     print("Testxs : " + str(test_xs.shape))
     return test_xs

示例#12

0

显示文件

def main():
    print("Welcome into RNTN implementation 0.1")

    print("Loading dictionary...")
    # Dictionary initialisation
    vocabulary.initVocab(inputModel)

    # Loading dataset
    validationSet = utils.loadDataset("trees/dev.txt")
    print("Validation loaded !")

    # Creating the model
    model = rntnmodel.Model(inputModel)

    print("Computation validation...")
    vaError = model.computeError(validationSet, True)
    print("Validation error: ", vaError)

示例#13

0

显示文件

文件： atcTwitterCorpus.py 项目： ClimbsRocks/nlpSentiment

def getFeatures(numWordsToUse):

    allTweets, allTweetsSentiment, allRows = utils.loadDataset('twitterCorpus/aggregatedCorpusCleaned.csv',1,2)

    tweets = []
    sentiment = []


    rowCount = 0
    for row in allRows:
        # skip header row
        if rowCount == 0:
            rowCount += 1
        else:
            # the aggregatedCorpus has sentiment scores from 1 - 5, while STS has scores from 0 - 4
            rowSentiment = str( int(row[0]) - 1 )

            # only include the row if this is a fairly extreme sentiment
            if rowSentiment in ('0','4'):
                if rowSentiment == '4':
                    rowSentiment = 1
                else:
                    rowSentiment = 0

                tweets.append(row[2])
                sentiment.append(rowSentiment)


    tokenizedTweets, cleanedSentiment = utils.tokenize(tweets, sentiment)

    global popularWords
    formattedTweets, sentiment, popularWords = utils.nlpFeatureEngineering(
            tokenizedTweets, cleanedSentiment,0,numWordsToUse,'counts'
        )

    # transform list of dictionaries into a sparse matrix
    sparseFeatures = dv.fit_transform(formattedTweets)

    return sparseFeatures, sentiment

示例#14

0

显示文件

文件： atcTwitterCorpus.py 项目： suvigyavijay/nlpSentiment

def getFeatures(numWordsToUse):

    allTweets, allTweetsSentiment, allRows = utils.loadDataset(
        'twitterCorpus/aggregatedCorpusCleaned.csv', 1, 2)

    tweets = []
    sentiment = []

    rowCount = 0
    for row in allRows:
        # skip header row
        if rowCount == 0:
            rowCount += 1
        else:
            # the aggregatedCorpus has sentiment scores from 1 - 5, while STS has scores from 0 - 4
            rowSentiment = str(int(row[0]) - 1)

            # only include the row if this is a fairly extreme sentiment
            if rowSentiment in ('0', '4'):
                if rowSentiment == '4':
                    rowSentiment = 1
                else:
                    rowSentiment = 0

                tweets.append(row[2])
                sentiment.append(rowSentiment)

    tokenizedTweets, cleanedSentiment = utils.tokenize(tweets, sentiment)

    global popularWords
    formattedTweets, sentiment, popularWords = utils.nlpFeatureEngineering(
        tokenizedTweets, cleanedSentiment, 0, numWordsToUse, 'counts')

    # transform list of dictionaries into a sparse matrix
    sparseFeatures = dv.fit_transform(formattedTweets)

    return sparseFeatures, sentiment

示例#15

0

显示文件

        model.add(LSTM(150, dropout=0.2, recurrent_dropout=0.2))
    elif networkType == GRU_LABEL:
        model.add(GRU(150, dropout=0.2, recurrent_dropout=0.2))
    elif networkType == MLP_LABEL:
        model.add(Flatten())
    model.add(Dense(50, activation='relu'))
    model.add(Dense(27, activation='softmax'))
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    return model


if __name__ == '__main__':
    arguments = parseArgs()
    dataset = utils.loadDataset(arguments.reprocessDataset)
    xData, yData = dataset[constants.TWEET_COLUMN], dataset[
        constants.LABEL_COLUMN]

    vocabularySize = 13000
    xEncoder, yEncoder = Tokenizer(
        num_words=vocabularySize), preprocessing.OneHotEncoder()

    print('Encoding and splitting xData, yData')
    xDataEncoded, yDataEncoded = encodeX(xEncoder,
                                         xData), encodeY(yEncoder, yData)
    xTrain, xValid, yTrain, yValid = model_selection.train_test_split(
        xDataEncoded, yDataEncoded)

    model = buildModel(vocabularySize, arguments.networkType)
    print(model.summary())

示例#16

0

显示文件

    # Number of max historicaal records to keep for predicting
    time_window_threshold = 30

    refresh_time_interval = 15

    if args.mode == 'train':
        logging.info('Mode: Training')
        logging.info('Evaluation: ' +
                     str(True if args.evaluate in ('True', 'true') else False))

        # Initialization of training params
        # Time interval to resample data
        sample_time = '60S'

        logging.info('Loading train & validation dataset')
        df = loadDataset(trainset)

        # Resample every 15 sec, because prometheus & amari exporter have different timestamps
        df = df.resample('15S', closed='right', label='left').mean()

        # Fill train df with 0s if there are NaNs and Ifns
        df.fillna(0, inplace=True)

        logging.info('Saving normalization values')
        normalization_stats = saveNormalizationStats(df, cols_to_normalize)

        saveDictJson(normalization_stats, stats_json)

        logging.info('Normalizing train & validation data')
        for col in cols_to_normalize:
            df[col + '_normalized'] = normalizeFeature(

示例#17

0

显示文件

from Net import Net, test_model
from utils import confusion, F1_score, loadDataset, saveNNParas
import time

# Loading the previous network status.
feature_num = 11
hidden_num = 30
output_num = 3

load_net = Net(feature_num, hidden_num, output_num)
load_net.load_state_dict(torch.load('net_model_genre.pt'))
#load_net.load_state_dict(torch.load('net_model_subjective_rating.pt'))
load_net.eval()

# Loading testing dataset to evaluate new network.
x_train, y_train = loadDataset('testing')
x_test, y_test = loadDataset('testing')

# Loading the information of vector.
vectors = pd.read_excel('vector_angle_sample.xls', header=None)
raw_df = pd.DataFrame({
    'row': vectors.iloc[:, 0],
    'col': vectors.iloc[:, 1],
    'vector': vectors.iloc[:, 2]
})

# Sorting by the values of vector angle in ascending order.
increase_res = raw_df.sort_values('vector', ascending=True)
unique_row = increase_res.row.unique()
unique_col = increase_res.col.unique()

示例#18

0

显示文件

    for i in xrange(stateNum):
       for j in xrange(stateNum):
           fout.write(str(A[i,j]))
           fout.write(" ")
       fout.write("\n")
    
    

def dataCorrectRate(Set):
    correct=0
    totalNum=0
    for i in xrange(len(Set[0])):
        maxNum=0
        maxState=-1
        for j in xrange(len(Set[0][i])):    
            if(Set[0][i][j]>maxNum):
                maxNum=Set[0][i][j]
                maxState=j
        if(Set[1][i]==maxState):
            correct=correct+1
        totalNum+=1
    print "correct rate:",float(correct)/float(totalNum)
    
if __name__ == '__main__':
    dataSet=loadDataset("../../pkl/t26v31.pkl",3)
    
    #dataSet=loadDataset("/home/roylu/share/DNNResult/t23.7v29.95/t23.7v29.95.pkl",3)
    dataCorrectRate(dataSet[0])
    trainHMM(dataSet)
    #get_result("A_with_error:0.281822988462")

示例#19

0

显示文件

文件： app.py 项目： theGuyWithBlackTie/pytorch-BERT-GCN

def run():
    trainDataset, testDataset, labelGenerator = utils.loadDataset()

    # Making DataLoaders
    trainDataLoader = torch.utils.data.DataLoader(
        trainDataset,
        batch_size=config.TRAIN_BATCH_SIZE,
        shuffle=True,
        num_workers=4,
        pin_memory=True)
    testDataLoader = torch.utils.data.DataLoader(
        testDataset, batch_size=config.TEST_BATCH_SIZE, num_workers=1)

    totalNOsOfLabels = len(labelGenerator.classes_)

    device = torch.device(config.DEVICE)

    # Defining Model
    print("Making model:- ", config.modelName)
    citeModel = None
    if config.modelName == "BertBase":
        citemodel = model.BERTBaseUncased(numOfLabels=totalNOsOfLabels,
                                          dropout=config.DROPOUT)
    elif config.modelName == "SciBert":
        citemodel = model.SciBertUncased(numOfLabels=totalNOsOfLabels,
                                         dropout=config.DROPOUT)
    citemodel.to(device)

    param_optimizer = list(citemodel.named_parameters())
    '''
        There is generally no need to apply L2 penalty (i.e. weight decay) to biases and LayerNorm.weight. 
        Hence, we have following line.
        Update: There is need to apply L2 to LayerNorm.weight as per Google TF implementation so reverting it ;)
    '''
    no_decay = ["bias", "LayerNorm.bias",
                "LayerNorm.weight"]  # Removed "LayerNorm.bias",

    optimizer_parameters = [
        {
            "params": [
                p for n, p in param_optimizer
                if not any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            0.01,  # changed this from 0.001 to 0.1
        },
        {
            "params":
            [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
            "weight_decay":
            0.0,
        }
    ]

    num_train_steps = int(len(trainDataLoader) * config.EPOCHS)
    optimizer = AdamW(optimizer_parameters, lr=config.LEARNING_RATE)
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=num_train_steps * config.WARMUP_PROPORTION,
        num_training_steps=num_train_steps)

    if config.dotrain:
        print('In Training')
        for epoch in range(config.EPOCHS):
            trainingLoss = engine.train(trainDataLoader, citemodel, optimizer,
                                        device, scheduler)
            print("Epoch: ", epoch, " Loss: ", trainingLoss, '\n')

        # Saving the model
        os.makedirs(os.path.dirname(config.MODEL_SAVED.format(
            config.modelName)),
                    exist_ok=True)
        torch.save(citemodel.state_dict(),
                   config.MODEL_SAVED.format(config.modelName))
        print('Model is saved at: ',
              config.MODEL_SAVED.format(config.modelName))
    '''
     Evaluating the model
    '''

    print("Loading the model")
    #citemodel = model.BERTBaseUncased(*args, **kwargs)
    citemodel.load_state_dict(
        torch.load(config.MODEL_SAVED.format(config.modelName)))
    outputs, targets = engine.eval(testDataLoader, citemodel, device)

    # Saving the results with corresponding targets
    os.makedirs(os.path.dirname(
        config.PREDICTIONS_PATH.format(config.modelName)),
                exist_ok=True)
    with open(config.PREDICTIONS_PATH.format(config.modelName), 'wb') as f:
        pickle.dump(outputs, f)  # First saved the predicted outputs
        pickle.dump(targets, f)  # Then saved the corresponding targets

    print('Starting Evaluation...')
    utils.metric(outputs, targets)

示例#20

0

显示文件

def optimize_parameters(
    parameters, dataset, train_steps=1000, pretrain_steps=250
):
    """
    Returns the best the hyperparameters tuning for the ARL model.

    args:
        parameters: a dictionary with the hyperparameters and their values,
                    e.g. {'batch_size': [32, 64, 256], [...]}.
        dataset: name of the dataset ([toy_data, uci_adult, compas, law]).
    """

    # Load the training data.
    train_dataset = loadDataset(
        dataset=dataset,
        train_or_test="train",
        embedding_size=32,
    )

    # Create the default model parameters.
    model_params = {
        "embedding_size": train_dataset.categorical_embedding_sizes,
        "n_num_cols": len(train_dataset.mean_std.keys()),
        "learner_hidden_units": [64, 32],
        "adversary_hidden_units": [32],
        "batch_size": None,
    }

    lr_params = {"learner": None, "adversary": None}

    cross_val = KFold(n_splits=5)
    steps = None

    # Create a defaultdict for the results.
    params2aucs = defaultdict(list)

    # Get all possible combinations of parameters.
    options = itertools.product(*parameters.values())
    n_options = len(list(itertools.product(*parameters.values())))

    for i, (batch_size, learner_lr, adversary_lr) in enumerate(options, 1):
        iter_start = time.time()
        model_params["batch_size"] = batch_size
        lr_params["learner"] = learner_lr
        lr_params["adversary"] = adversary_lr

        print(
            f"--- ({i}/{n_options}) batch_size: {batch_size}, "
            f"learner_lr: {learner_lr}, adversary_lr: {adversary_lr}"
        )

        # 5-fold cross-validation
        for train_index, test_index in cross_val.split(train_dataset):

            # Get the performance of the model
            metrics = train_for_n_iters(
                train_dataset.get_split(train_index),
                train_dataset.get_split(test_index),
                model_params,
                lr_params,
                average_over=5,
                train_steps=train_steps,
                pretrain_steps=pretrain_steps,
                print_loss=False,
            )
            params2aucs[(batch_size, learner_lr, adversary_lr)].append(
                metrics.auc_avg
            )
            steps = metrics.steps
        iter_stop = time.time()

        mean_best_auc = np.mean(
            params2aucs[(batch_size, learner_lr, adversary_lr)], axis=0
        )

        best_auc_idx = np.argmax(mean_best_auc)

        print(
            f"\t took {iter_stop - iter_start:.0f} seconds | "
            f"best AUC is {mean_best_auc[best_auc_idx]:.3f} on step "
            f"{steps[best_auc_idx]}"
        )

    # Average the folds.
    params2aucs = {
        option: np.mean(aucs, axis=0) for option, aucs in params2aucs.items()
    }

    # Find the highest AUC.
    params = list(params2aucs.keys())
    aucs = np.array(list(params2aucs.values()))

    param_idx, step_idx = np.unravel_index(np.argmax(aucs), aucs.shape)
    best_auc = aucs[param_idx, step_idx]
    best_params = params[param_idx]
    best_step = steps[step_idx]

    # Return the results.
    results = {
        "batch_size": best_params[0],
        "lr_learner": best_params[1],
        "lr_adversary": best_params[2],
    }
    return results, best_auc, best_step

示例#21

0

显示文件

文件： app.py 项目： ClimbsRocks/nlpSentiment

from sklearn.feature_extraction import DictVectorizer
from sklearn.cross_validation import train_test_split

import utils
import trainClassifiers

from sentimentCorpora import nltkMovieReviews
from sentimentCorpora import stsTwitterCorpus
from sentimentCorpora import atcTwitterCorpus
from sentimentCorpora import nltkTwitterCorpus
from sentimentCorpora import nltkTwitterNoEmoticonsCorpus


# load the "training" data
trainingTweets, trainingSentiment, allRows = utils.loadDataset('training.1600000.processed.noemoticon.csv', 10)
trainingTweets, trainingSentiment = utils.tokenize(trainingTweets, trainingSentiment)


# load the test data
testTweets, testSentiment, testRows = utils.loadDataset('testdata.manual.2009.06.14.csv', 1)
testTweets, testSentiment = utils.tokenize(testTweets, testSentiment)


# instead of predicting two categories ('0', and '4') that the algorithm doesn't inherently understand are mutually exclusive, we will explicitly turn this into a single binary classification problem (0 or 1)
cleanedTrainingSentiment = []
for score in trainingSentiment:
    if score == '4':
        cleanedTrainingSentiment.append(1)
    else:
        cleanedTrainingSentiment.append(0)

示例#22

0

显示文件

文件： dae.py 项目： zxy14120448/SDAE_abnormal_detection

    def pretrain(self, batch_size, num_epoch, sess):
        graph = tf.Graph()
        # init = tf.global_variables_initializer()
        saver = tf.train.Saver(tf.global_variables(), max_to_keep=5)
        timestamp = str(int(time.time()))
        out_dir = os.path.abspath(
            os.path.join(os.path.curdir, "runs", timestamp))
        print("Writing to {}\n".format(out_dir))

        checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
        checkpoint_prefix = os.path.join(checkpoint_dir, "model")
        params_prefix = os.path.join(out_dir, 'params')
        os.makedirs(params_prefix)
        if not os.path.exists(checkpoint_dir):
            os.makedirs(checkpoint_dir)
        with open("%s\\params.txt" % params_prefix, 'w') as params_file:
            params_file.writelines(flags)

        sess.run(tf.global_variables_initializer())
        # sess.run(init)
        for i in range(len(self.dimensions)):

            # learning_rate = 0.01

            global_step = tf.Variable(0, trainable=False, name="global_step")
            learning_rate = 0.01
            # learning_rate = tf.train.exponential_decay(0.01, global_step, flags.max / flags.batch_size, 0.98, staircase=True)
            optimizer = tf.train.RMSPropOptimizer(learning_rate,
                                                  momentum=self.momentum)
            # optimizer = tf.train.AdamOptimizer(learning_rate)
            # optimizer = tf.train.GradientDescentOptimizer(learning_rate)
            grads_and_vars = optimizer.compute_gradients(self.scores[i])
            train_op = optimizer.apply_gradients(grads_and_vars,
                                                 global_step=global_step)
            sess.run(tf.initialize_all_variables())
            # define summaries
            # grad_summaries = []
            # for g, v in grads_and_vars:
            #     if g is not None:
            #         grad_hist_summary = tf.summary.histogram("{}/grad/hist".format(v.name), g)
            #         sparsity_summary = tf.summary.scalar("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g))
            #         grad_summaries.append(grad_hist_summary)
            #         grad_summaries.append(sparsity_summary)
            # grad_summaries_merged = tf.summary.merge(grad_summaries)
            score_summary = tf.summary.scalar("score", self.scores[i])

            train_summary_op = tf.summary.merge([score_summary])
            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.summary.FileWriter(
                train_summary_dir, sess.graph)

            for j in range(num_epoch):
                for batch in utils.loadDataset(batch_size,
                                               max=flags.max,
                                               dataset_dir=flags.datasetPath):
                    mask_t = np.random.binomial(1, 1 - flags.corrupt_prob,
                                                batch.shape)
                    self.batch = batch
                    # mean_img = np.mean(batch, axis=1)
                    # batch = np.array([img - mean_img for img in batch.T])
                    # batch = batch.T
                    _, score, step = sess.run(
                        [train_op, self.scores[i], global_step],
                        feed_dict={
                            self.input_x: batch,
                            self.mask: mask_t
                        })
                    current_step = tf.train.global_step(sess, global_step)
                    time_str = datetime.datetime.now().isoformat()
                    if current_step % 100 == 0:
                        print("{}: traning Layer_{} ".format(time_str, i) +
                              "epoch:%d " % j + "step: %d" % step +
                              "  score: {}".format(score))

                    if current_step % 2000 == 0:
                        _, score, step, summaries = sess.run([
                            train_op, self.scores[i], global_step,
                            train_summary_op
                        ],
                                                             feed_dict={
                                                                 self.input_x:
                                                                 batch,
                                                                 self.mask:
                                                                 mask_t
                                                             })
                        train_summary_writer.add_summary(summaries, step)
                        path = saver.save(sess,
                                          checkpoint_prefix,
                                          global_step=current_step)
                        print("Saved model checkpoint to {}\n".format(path))

        self.finetuning(batch_size, num_epoch, sess, saver, out_dir)

示例#23

0

显示文件

文件： dae.py 项目： zxy14120448/SDAE_abnormal_detection

    def finetuning(self, batch_size, num_epoch, sess, saver, out_dir):

        current_input = self.layer_output[len(self.dimensions) - 1]
        self.ft_losses = [tf.constant(0.0) for _ in self.dimensions]
        for layer_i, dimension in enumerate(self.dimensions):
            print(2 - layer_i)
            if layer_i == 3:
                n_output = 225
            else:
                n_output = self.dimensions[2 - layer_i]
                print(n_output)
            with tf.name_scope("finetuning_decoder_%i" % layer_i):
                W = tf.transpose(self.Ws[3 - layer_i], name="W")
                b = tf.Variable(tf.constant(0.1, shape=[n_output]), name="b")
                self.out_put = tf.nn.sigmoid(tf.matmul(current_input, W) + b)
                self.ft_losses[layer_i] += tf.nn.l2_loss(W)
                current_input = self.out_put
        with tf.name_scope('fn_score'):
            loss = tf.pow(self.out_put - self.input_x, 2)
            self.score = tf.reduce_sum(
                loss,
                name="score") + self.ft_losses[layer_i] * flags.l2_reg_lambda
        global_step = tf.Variable(0, trainable=False, name="global_step")
        learning_rate = 0.001
        # learning_rate = tf.train.exponential_decay(0.01, global_step, flags.max / flags.batch_size, 0.98, staircase=True)
        saver = tf.train.Saver(tf.global_variables())
        # optimizer = tf.train.RMSPropOptimizer(learning_rate, momentum=self.momentum)
        # optimizer = tf.train.AdamOptimizer(learning_rate)
        optimizer = tf.train.GradientDescentOptimizer(learning_rate)
        grads_and_vars = optimizer.compute_gradients(self.score)
        finetune_op = optimizer.apply_gradients(grads_and_vars,
                                                global_step=global_step)
        sess.run(tf.initialize_all_variables())
        print("Writing to {}\n".format(out_dir))

        checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
        checkpoint_prefix = os.path.join(checkpoint_dir, "model")
        if not os.path.exists(checkpoint_dir):
            os.makedirs(checkpoint_dir)
        # grad_summaries = []
        # for g, v in grads_and_vars:
        #     if g is not None:
        #         grad_hist_summary = tf.summary.histogram("{}/grad/hist".format(v.name), g)
        #         sparsity_summary = tf.summary.scalar("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g))
        #         grad_summaries.append(grad_hist_summary)
        #         grad_summaries.append(sparsity_summary)
        # grad_summaries_merged = tf.summary.merge(grad_summaries)
        score_summary = tf.summary.scalar("score", self.score)

        finetune_summary_op = tf.summary.merge([score_summary])
        finetune_summary_dir = os.path.join(out_dir, "summaries", "finetune")
        finetune_summary_writer = tf.summary.FileWriter(
            finetune_summary_dir, sess.graph)
        sess.run(tf.global_variables_initializer())
        print("Starting finetuning")
        for j in range(num_epoch):
            for batch in utils.loadDataset(batch_size,
                                           max=flags.max,
                                           dataset_dir=flags.datasetPath):
                mask_t = np.random.binomial(1, 1 - flags.corrupt_prob,
                                            batch.shape)
                # mean_img = np.mean(batch, axis=1)
                # batch = np.array([img - mean_img for img in batch.T])
                # batch = batch.T
                _, score, step, summaries = sess.run([
                    finetune_op, self.score, global_step, finetune_summary_op
                ],
                                                     feed_dict={
                                                         self.input_x: batch,
                                                         self.mask: mask_t
                                                     })
                current_step = tf.train.global_step(sess, global_step)
                time_str = datetime.datetime.now().isoformat()
                if current_step % 100 == 0:
                    print("{}: finetuning  step: {}".format(time_str, step) +
                          "  score: {}".format(score))

                if current_step % 10000 == 0:
                    finetune_summary_writer.add_summary(summaries, step)
                    path = saver.save(sess,
                                      checkpoint_prefix,
                                      global_step=current_step)
                    print("Saved model checkpoint to {}\n".format(path))
        # with graph.as_default():
        #     with sess.as_default():
        n_examples = 15
        with tf.device('/cpu:0'):
            test_xs = utils.load_whole_dataset(1000000, flags.datasetPath)
            mask = np.random.binomial(1, 1, test_xs.shape)
        score, recon, encodes = sess.run(
            [self.score, self.out_put, self.layer_output],
            feed_dict={
                self.input_x: test_xs,
                self.mask: mask
            })
        # fig, axs = plt.subplots(2, n_examples, figsize=(10, 2))
        # for example_i in range(n_examples):
        #     axs[0][example_i].imshow(
        #         # np.reshape(test_xs[example_i, :], (28, 28)))
        #         np.reshape(test_xs[example_i, :], (15, 15)))
        #     axs[1][example_i].imshow(
        #         # np.reshape([recon[example_i, :] + mean_img], (28, 28)))
        #         np.reshape([recon[example_i, :]], (15, 15)))
        # print ('Plot complete now showing...')
        clf = svm.OneClassSVM(kernel='rbf', gamma='auto', nu=1e-3)
        clf.fit(encodes[3])
        with open('./svm.model', 'wb') as m:
            pickle.dump(clf, m)

示例#24

0

显示文件

        'If specified, reads and processes the dataset again. ' +
        'Else reads an already processed dataset from ' +
        constants.CLASSIFICATION_DATA_PATH)
    return parser.parse_args(sys.argv[1:])


def printTopics(model):
    predicted_topics = model.print_topics(num_topics=5, num_words=5)
    for i, topics in predicted_topics:
        print('Words in Topic {}:\n {}'.format(i + 1, topics))


if __name__ == '__main__':
    arguments = parseArgs()
    dataset = utils.loadDataset(arguments.reprocessDataset,
                                classification=False,
                                splitWords=True)

    # Creating dictionary from dataset, where each unique term is assigned an index
    dictionary = corpora.Dictionary(dataset)

    # Converting list of documents into Bag of Words using dictionary
    doc_term_matrix = [dictionary.doc2bow(doc) for doc in dataset]

    # Training models on the document term matrix
    modelList = [
        LdaModel(doc_term_matrix, num_topics=10, id2word=dictionary, passes=2),
        LsiModel(doc_term_matrix, num_topics=10, id2word=dictionary)
    ]

    for model in modelList:

示例#25

0

显示文件

文件： app.py 项目： suvigyavijay/nlpSentiment

warnings.filterwarnings("ignore", category=DeprecationWarning)

from sklearn.feature_extraction import DictVectorizer
from sklearn.cross_validation import train_test_split

import utils
import trainClassifiers

from sentimentCorpora import nltkMovieReviews
from sentimentCorpora import stsTwitterCorpus
from sentimentCorpora import atcTwitterCorpus
from sentimentCorpora import nltkTwitterCorpus
from sentimentCorpora import nltkTwitterNoEmoticonsCorpus

# load the "training" data
trainingTweets, trainingSentiment, allRows = utils.loadDataset(
    'training.1600000.processed.noemoticon.csv', 10)
trainingTweets, trainingSentiment = utils.tokenize(trainingTweets,
                                                   trainingSentiment)

# load the test data
testTweets, testSentiment, testRows = utils.loadDataset(
    'testdata.manual.2009.06.14.csv', 1)
testTweets, testSentiment = utils.tokenize(testTweets, testSentiment)

# instead of predicting two categories ('0', and '4') that the algorithm doesn't inherently understand are mutually exclusive, we will explicitly turn this into a single binary classification problem (0 or 1)
cleanedTrainingSentiment = []
for score in trainingSentiment:
    if score == '4':
        cleanedTrainingSentiment.append(1)
    else:
        cleanedTrainingSentiment.append(0)

示例#26

0

显示文件

文件： run.py 项目： boru-roylu/Course_PA


class Logger(object):
    def __init__(self, logFilename):
        self.terminal = sys.stdout
        self.log = open(logFilename, "a")

    def write(self, message):
        self.terminal.write(message)
        self.log.write(message)


if __name__ == '__main__':
    P = dnnUtils.Parameters(setting)
    print P.outputFilename
    datasets = utils.loadDataset(filename=P.datasetFilename, totalSetNum=3)

    if not USE_EXIST_MODEL:
        sys.stdout = Logger(P.logFilename)
        bestModel = dnn.trainDNN(datasets, P)
        bestModelFilename = '../model/' + P.outputFilename + '.model'
        utils.makePkl(bestModel, P.bestModelFilename)
    else:
        # TODO use filename to build P
        bestModelFilename = sys.argv[2]
        bestModel = utils.loadPkl(bestModelFilename)

    dnn.getResult(bestModel, datasets[1], P, 'valid', P.validResultFilename)
    dnn.getResult(bestModel, datasets[2], P, 'test', P.testResultFilename)
    dnn.getProb(bestModel, datasets[0], P.trainProbFilename, P)
    dnn.getProb(bestModel, datasets[1], P.validProbFilename, P)

示例#27

0

显示文件

文件： dec.py 项目： tom1092/Deep-Embeddig-Clustering

        cmdString = int(raw_input("\nDEC> "))

        # Exit
        if cmdString == 0:
            import sys
            sys.exit(0)

        # Image preprocessing and segmentation
        elif cmdString == 1:
            imgPath = str(raw_input('\nInsert the path of the folder images: '))
            imS.doSegmentation(imgPath)

        # Model Fitting
        elif cmdString == 2:
            datasetPath = str(raw_input('Insert the path of dataset: '))
            dataset, datasetfileNames = loadDataset(datasetPath)
            epochs = int(raw_input('Insert number of epochs: '))
            batchSize = int(raw_input('Insert the batch size: '))
            dec.fit('adam', 'mean_squared_error', 'mae', dataset, epochs, batchSize)
            dec.save_model()

        # Clustering
        elif cmdString == 3:
            dec.load_model()
            testSetPath = str(raw_input('Insert the path of the test set: '))
            testSet, testSetFileNames = loadDataset(testSetPath)

            print '\n', ' ' * 3, '0) KMeans'

            clustType = int(raw_input("\nDEC> "))

示例#28

0

显示文件

文件： filler.py 项目： douduck08/2015-NTU-MLDS

def filler():
    dataSet=loadDataset("../pkl/small_data.pkl",3)
    fill_trainSet=fillerCore(dataSet[0])
    fill_validSet=fillerCore(dataSet[1])
    fill_testSet =fillerCore(dataSet[2])
    return fill_trainSet,fill_validSet,fill_testSet

示例#29

0

显示文件

文件： model.py 项目： shannonLIU404/duplicate-question-identification

MODEL_NAME = 'base'
###########################################################

# Define constants
REGULARIZE = 0.0001
GRU_REGULARIZE = 0.0005
MAX_LENGTH = 30
DROPOUT = 0.2
HIDDEN_RNN_UNITS = 192
HIDDEN_DENSE_UNITS = 2048
LEARNING_RATE = 0.001
EPOCHS = 100
BATCH_SIZE = 64

## Load Datasets
train_x1, train_x2, train_features, train_y, valid_x1, valid_x2, valid_y, valid_features = loadDataset(
)
print('Dataset Loaded')

start_time = time.time()

## Load Embedding Matrix
(embedding_matrix, vocab_size) = load_embedding_matrix()


## Define Model
def build_model():
    input_1 = Input(shape=(MAX_LENGTH, ))
    input_2 = Input(shape=(MAX_LENGTH, ))

    e = Embedding(vocab_size,
                  300,

示例#30

0

显示文件

文件： create_evaluation_plots.py 项目： 5genesis/Security-Framework

def evaluate(thresholds_file, cpu_testset, iperf_testset, trainset,
             time_window_threshold):
    plt.clf()

    stats_json = data_prefix + 'normalization_stats.json'

    model = load_model('model/5g_autoencoder.h5')
    normalization_stats = loadDictJson(stats_json)

    cols_to_normalize = getFeatures()
    cols = [c + '_normalized' for c in cols_to_normalize]

    time_window_threshold = 30
    refresh_time_interval = 15

    n_steps = 4
    n_features = len(cols)

    logging.info('Loading evaluation datasets')
    val_df = loadDataset(trainset)
    cpu_df = loadDataset(cpu_testset)
    iperf_df = loadDataset(iperf_testset)

    cpu_df.fillna(method='backfill', inplace=True)
    cpu_df.replace([np.inf, -np.inf], 0.0, inplace=True)

    iperf_df.fillna(method='backfill', inplace=True)
    iperf_df.replace([np.inf, -np.inf], 0.0, inplace=True)

    val_df.fillna(method='backfill', inplace=True)
    val_df.replace([np.inf, -np.inf], 0.0, inplace=True)

    logging.info('Normalizing evaluation data')
    for col in cols_to_normalize:
        cpu_df[col + '_normalized'] = normalizeFeature(
            cpu_df, col, normalization_stats[col + '_min'],
            normalization_stats[col + '_max'])
        iperf_df[col + '_normalized'] = normalizeFeature(
            iperf_df, col, normalization_stats[col + '_min'],
            normalization_stats[col + '_max'])
        val_df[col + '_normalized'] = normalizeFeature(
            val_df, col, normalization_stats[col + '_min'],
            normalization_stats[col + '_max'])

    logging.info('Evaluating for CPU and memory metrics')

    cpu_xs = []
    cpu_ys = []

    net_up_xs = []
    net_up_ys = []

    net_down_xs = []
    net_down_ys = []

    mem_xs_a1 = []
    mem_ys_a1 = []
    for sample_start in range(0, len(cpu_df) - time_window_threshold):
        sample_end = sample_start + time_window_threshold
        cpu_df_sample = cpu_df.iloc[sample_start:sample_end]

        # Select required columns for evaluation data batch
        cpu_dataset = cpu_df_sample[cols].to_numpy()

        # Prepare evaluation dataset batch
        X_test_cpu, y_test_cpu = split_sequences(cpu_dataset, n_steps)
        X_test_cpu = X_test_cpu.reshape((len(X_test_cpu), n_steps, n_features))

        # Predict for evaluation dataset batch
        yhat_cpu = model.predict(X_test_cpu, verbose=0)

        cpu_rmse_dict = printPredictionErrors(y_test_cpu, yhat_cpu)

        net_up_xs.append(len(net_up_xs))
        net_up_ys.append(cpu_rmse_dict['net_up_rmse'])

        net_down_xs.append(len(net_down_xs))
        net_down_ys.append(cpu_rmse_dict['net_down_rmse'])

        cpu_xs.append(len(cpu_xs))
        cpu_ys.append(cpu_rmse_dict['cpu_rmse'])

        mem_xs_a1.append(len(mem_xs_a1))
        mem_ys_a1.append(cpu_rmse_dict['mem_rmse'])

    plt.plot(cpu_xs,
             cpu_ys,
             color='blue',
             label='CPU Percentage Rate (mode=user)')
    #plt.plot(mem_xs_a1, mem_ys_a1, color='red', label='Memory Percentage Rate')
    plt.title('CPU Attack Dataset')
    plt.xlabel('# of Sequence')
    plt.ylabel('RMSE')
    plt.legend()
    plt.savefig('plots/evaluate_cpu.png')
    plt.clf()

    logging.info('Evaluating for network and 5G metrics')

    net_up_xs = []
    net_up_ys = []

    net_down_xs = []
    net_down_ys = []

    net_5g_up_xs = []
    net_5g_up_ys = []

    net_5g_down_xs = []
    net_5g_down_ys = []

    mem_xs_a2 = []
    mem_ys_a2 = []
    for sample_start in range(0, len(iperf_df) - time_window_threshold):
        sample_end = sample_start + time_window_threshold
        iperf_df_sample = iperf_df.iloc[sample_start:sample_end]

        # Select required columns for evaluation data batch
        iperf_dataset = iperf_df_sample[cols].to_numpy()

        # Prepare evaluation dataset batch
        X_test_iperf, y_test_iperf = split_sequences(iperf_dataset, n_steps)
        X_test_iperf = X_test_iperf.reshape(
            (len(X_test_iperf), n_steps, n_features))

        # Predict for evaluation dataset batch
        yhat_iperf = model.predict(X_test_iperf, verbose=0)

        iperf_rmse_dict = printPredictionErrors(y_test_iperf, yhat_iperf)

        net_up_xs.append(len(net_up_xs))
        net_up_ys.append(iperf_rmse_dict['net_up_rmse'])

        net_down_xs.append(len(net_down_xs))
        net_down_ys.append(iperf_rmse_dict['net_down_rmse'])

        net_5g_up_xs.append(len(net_5g_up_xs))
        net_5g_up_ys.append(iperf_rmse_dict['net_up_5g_rmse'])

        net_5g_down_xs.append(len(net_5g_down_xs))
        net_5g_down_ys.append(iperf_rmse_dict['net_down_5g_rmse'])

        mem_xs_a2.append(len(mem_xs_a2))
        mem_ys_a2.append(iperf_rmse_dict['mem_rmse'])

    plt.plot(net_up_xs, net_up_ys, color='green', label='Network Up Rate')
    plt.plot(net_down_xs,
             net_down_ys,
             color='purple',
             label='Network Down Rate')
    #plt.plot(mem_xs_a2, mem_ys_a2, color='red', label='Memory Percentage Rate')
    plt.title('iperf Attack Dataset')
    plt.xlabel('# of Sequence')
    plt.ylabel('RMSE')
    plt.legend()
    plt.savefig('plots/evaluate_iperf_net.png')
    plt.clf()

    plt.plot(net_5g_up_xs,
             net_5g_up_ys,
             color='green',
             label='5G Network Up Rate')
    plt.plot(net_5g_down_xs,
             net_5g_down_ys,
             color='blue',
             label='5G Network Down Rate')
    plt.title('iperf Attack Dataset')
    plt.xlabel('# of Sequence')
    plt.ylabel('RMSE')
    plt.legend()
    plt.savefig('plots/evaluate_iperf_5g.png')
    plt.clf()

    logging.info('Evaluating with training data')

    cpu_xs = []
    cpu_ys = []

    net_up_xs = []
    net_up_ys = []

    net_down_xs = []
    net_down_ys = []

    net_5g_up_xs = []
    net_5g_up_ys = []

    net_5g_down_xs = []
    net_5g_down_ys = []

    mem_xs_n = []
    mem_ys_n = []
    for sample_start in range(0, len(val_df) - time_window_threshold):
        sample_end = sample_start + time_window_threshold
        val_df_sample = val_df.iloc[sample_start:sample_end]

        # Select required columns for evaluation data batch
        val_dataset = val_df_sample[cols].to_numpy()

        # Prepare evaluation dataset batch
        X_test_val, y_test_val = split_sequences(val_dataset, n_steps)
        X_test_val = X_test_val.reshape((len(X_test_val), n_steps, n_features))

        # Predict for evaluation dataset batch
        yhat_val = model.predict(X_test_val, verbose=0)

        val_rmse_dict = printPredictionErrors(y_test_val, yhat_val)

        cpu_xs.append(len(cpu_xs))
        cpu_ys.append(val_rmse_dict['cpu_rmse'])

        mem_xs_n.append(len(mem_xs_n))
        mem_ys_n.append(val_rmse_dict['mem_rmse'])

        net_up_xs.append(len(net_up_xs))
        net_up_ys.append(val_rmse_dict['net_up_rmse'])

        net_down_xs.append(len(net_down_xs))
        net_down_ys.append(val_rmse_dict['net_down_rmse'])

        net_5g_up_xs.append(len(net_5g_up_xs))
        net_5g_up_ys.append(val_rmse_dict['net_up_5g_rmse'])

        net_5g_down_xs.append(len(net_5g_down_xs))
        net_5g_down_ys.append(val_rmse_dict['net_down_5g_rmse'])

    plt.plot(cpu_xs,
             cpu_ys,
             color='blue',
             label='CPU Percentage Rate (mode=user)')
    plt.plot(mem_xs_n, mem_ys_n, color='red', label='Memory Percentage Rate')
    plt.plot(net_up_xs, net_up_ys, color='green', label='Network Up Rate')
    plt.plot(net_down_xs,
             net_down_ys,
             color='purple',
             label='Network Down Rate')
    plt.title('Training Dataset (Edge Metrics)')
    plt.xlabel('# of Sequence')
    plt.ylabel('RMSE')
    plt.legend()
    plt.savefig('plots/evaluate_val_1.png')
    plt.clf()

    plt.plot(net_5g_up_xs,
             net_5g_up_ys,
             color='orange',
             label='5G Network Up Rate')
    plt.plot(net_5g_down_xs,
             net_5g_down_ys,
             color='cyan',
             label='5G Network Down Rate')
    plt.title('Training Dataset (5G Metrics)')
    plt.xlabel('# of Sequence')
    plt.ylabel('RMSE')
    plt.legend()
    plt.savefig('plots/evaluate_val_2.png')
    plt.clf()

示例#31

0

显示文件

文件： main_ANN_pruning.py 项目： OddUlrich/Bio-inspired-Computing

from Net import Net, test_model
from utils import confusion, F1_score, loadDataset, saveNNParas
import time

# Loading the previous network status.
feature_num = 11
hidden_num = 30
output_num = 3

load_net = Net(feature_num, hidden_num, output_num)
load_net.load_state_dict(torch.load('ann_net_model_genre.pt'))
#load_net.load_state_dict(torch.load('net_model_subjective_rating.pt'))
load_net.eval()

# Loading testing dataset to evaluate new network.
x_test, y_test = loadDataset('testing')

# Loading the information of vector.
vectors = pd.read_excel('ann_vector_angle_sample.xls', header=None)
raw_df = pd.DataFrame({
    'row': vectors.iloc[:, 0],
    'col': vectors.iloc[:, 1],
    'vector': vectors.iloc[:, 2]
})

# Sorting by the values of vector angle in ascending order.
increase_res = raw_df.sort_values('vector', ascending=True)
unique_row = increase_res.row.unique()
unique_col = increase_res.col.unique()

# Initialize all the status parameters.

示例#32

0

显示文件

def evaluate(thresholds_file, cpu_testset, iperf_testset, trainset,
             time_window_threshold):
    """
    Evaluate trained model. If user has not set all thresholds for anomalies, evaluation will also set
    the remaining thresholds. Evaluation uses a dataset, that contains a CPU stress test, an iperf 
    stress test and predicting the data used for training. Thresholds are defined by calculating the 
    RMSEs from actual values and taking the 99th percentile of these errors for each feature separately and overall.
    If user has set all thresholds when starting the program,these thresholds will be used.

    param thresholds_file: File, where user-defined thresholds are saved. This will updated if new thresholds are proposed.
    param cpu_testset: File containing dataset with CPU stress test.
    param iperf_testset: File containing dataset with iperf stress test.
    param trainset: File containing the dataset used for training.
    param time_window_threshold: Time window for keeping the last-n records. In evaluation data are predicted in batches of n.

    return: None.
    """

    # Loading thresholds from file. Create an empty dict if no file exists
    thresholds_dict = {}
    if path.exists(thresholds_file):
        thresholds_dict = loadDictJson(thresholds_file)

    logging.info('Loading evaluation datasets')
    val_df = loadDataset(trainset)
    cpu_df = loadDataset(cpu_testset)
    iperf_df = loadDataset(iperf_testset)

    cpu_df.fillna(method='backfill', inplace=True)
    cpu_df.replace([np.inf, -np.inf], 0.0, inplace=True)

    iperf_df.fillna(method='backfill', inplace=True)
    iperf_df.replace([np.inf, -np.inf], 0.0, inplace=True)

    val_df.fillna(method='backfill', inplace=True)
    val_df.replace([np.inf, -np.inf], 0.0, inplace=True)

    logging.info('Normalizing evaluation data')
    for col in cols_to_normalize:
        cpu_df[col + '_normalized'] = normalizeFeature(
            cpu_df, col, normalization_stats[col + '_min'],
            normalization_stats[col + '_max'])
        iperf_df[col + '_normalized'] = normalizeFeature(
            iperf_df, col, normalization_stats[col + '_min'],
            normalization_stats[col + '_max'])
        val_df[col + '_normalized'] = normalizeFeature(
            val_df, col, normalization_stats[col + '_min'],
            normalization_stats[col + '_max'])

    cpu_rmse = []
    cpu_rx_rmse = []
    cpu_tx_rmse = []
    net_down_rmse = []
    net_up_rmse = []
    net_down_5g_rmse = []
    net_up_5g_rmse = []
    mem_rmse = []
    total_rmse = []

    logging.info('Evaluating for CPU and memory metrics')

    for sample_start in range(0, len(cpu_df) - time_window_threshold):
        sample_end = sample_start + time_window_threshold
        cpu_df_sample = cpu_df.iloc[sample_start:sample_end]

        # Select required columns for evaluation data batch
        cpu_dataset = cpu_df_sample[cols].to_numpy()

        # Prepare evaluation dataset batch
        X_test_cpu, y_test_cpu = split_sequences(cpu_dataset, n_steps)
        X_test_cpu = X_test_cpu.reshape((len(X_test_cpu), n_steps, n_features))

        # Predict for evaluation dataset batch
        yhat_cpu = model.predict(X_test_cpu, verbose=0)

        cpu_rmse_dict = printPredictionErrors(y_test_cpu, yhat_cpu)

        total_rmse.append(cpu_rmse_dict['rmse_total'])
        cpu_rmse.append(cpu_rmse_dict['cpu_rmse'])
        cpu_rx_rmse.append(cpu_rmse_dict['cpu_rx_rmse'])
        cpu_tx_rmse.append(cpu_rmse_dict['cpu_tx_rmse'])
        mem_rmse.append(cpu_rmse_dict['mem_rmse'])

    logging.info('Evaluating for network and 5G metrics')

    for sample_start in range(0, len(iperf_df) - time_window_threshold):
        sample_end = sample_start + time_window_threshold
        iperf_df_sample = iperf_df.iloc[sample_start:sample_end]

        # Select required columns for evaluation data batch
        iperf_dataset = iperf_df[cols].to_numpy()

        # Prepare evaluation dataset batch
        X_test_iperf, y_test_iperf = split_sequences(iperf_dataset, n_steps)
        X_test_iperf = X_test_iperf.reshape(
            (len(X_test_iperf), n_steps, n_features))

        # Predict for evaluation dataset batch
        yhat_iperf = model.predict(X_test_iperf, verbose=0)

        iperf_rmse_dict = printPredictionErrors(y_test_iperf, yhat_iperf)

        total_rmse.append(iperf_rmse_dict['rmse_total'])
        net_down_rmse.append(iperf_rmse_dict['net_down_rmse'])
        net_up_rmse.append(iperf_rmse_dict['net_up_rmse'])
        net_down_5g_rmse.append(iperf_rmse_dict['net_down_5g_rmse'])
        net_up_5g_rmse.append(iperf_rmse_dict['net_up_5g_rmse'])
        mem_rmse.append(iperf_rmse_dict['mem_rmse'])

    logging.info('Evaluating with training data')

    for sample_start in range(0, len(val_df) - time_window_threshold):
        sample_end = sample_start + time_window_threshold
        val_df_sample = val_df.iloc[sample_start:sample_end]

        # Select required columns for evaluation data batch
        val_dataset = val_df_sample[cols].to_numpy()

        # Prepare evaluation dataset batch
        X_test_val, y_test_val = split_sequences(val_dataset, n_steps)
        X_test_val = X_test_val.reshape((len(X_test_val), n_steps, n_features))

        # Predict for evaluation dataset batch
        yhat_val = model.predict(X_test_val, verbose=0)

        val_rmse_dict = printPredictionErrors(y_test_val, yhat_val)

        total_rmse.append(val_rmse_dict['rmse_total'])
        cpu_rmse.append(val_rmse_dict['cpu_rmse'])
        cpu_rx_rmse.append(val_rmse_dict['cpu_rx_rmse'])
        cpu_tx_rmse.append(val_rmse_dict['cpu_tx_rmse'])
        mem_rmse.append(val_rmse_dict['mem_rmse'])
        net_down_rmse.append(val_rmse_dict['net_down_rmse'])
        net_up_rmse.append(val_rmse_dict['net_up_rmse'])
        net_down_5g_rmse.append(val_rmse_dict['net_down_5g_rmse'])
        net_up_5g_rmse.append(val_rmse_dict['net_up_5g_rmse'])

    # For thresholds, that are not defined by user, use suggested values
    if 'cpu_threshold' not in thresholds_dict.keys():
        thresholds_dict['cpu_threshold'] = np.percentile(cpu_rmse, 0.99)
    if 'mem_threshold' not in thresholds_dict.keys():
        thresholds_dict['mem_threshold'] = np.percentile(mem_rmse, 0.99)
    if 'cpu_tx_threshold' not in thresholds_dict.keys():
        thresholds_dict['cpu_tx_threshold'] = np.percentile(cpu_tx_rmse, 0.99)
    if 'cpu_rx_threshold' not in thresholds_dict.keys():
        thresholds_dict['cpu_rx_threshold'] = np.percentile(cpu_rx_rmse, 0.99)
    if 'net_up_threshold' not in thresholds_dict.keys():
        thresholds_dict['net_up_threshold'] = np.percentile(net_up_rmse, 0.99)
    if 'net_down_threshold' not in thresholds_dict.keys():
        thresholds_dict['net_down_threshold'] = np.percentile(
            net_down_rmse, 0.99)
    if 'net_5g_up_threshold' not in thresholds_dict.keys():
        thresholds_dict['net_5g_up_threshold'] = np.percentile(
            net_up_5g_rmse, 0.99)
    if 'net_5g_down_threshold' not in thresholds_dict.keys():
        thresholds_dict['net_5g_down_threshold'] = np.percentile(
            net_down_5g_rmse, 0.99)
    if 'overall_threshold' not in thresholds_dict.keys():
        thresholds_dict['overall_threshold'] = np.percentile(total_rmse, 0.99)

    # Save new thresholds in same file
    saveDictJson(thresholds_dict, thresholds_file)

示例#33

0

显示文件

from reducing_net import reduced_rnn_net
from utils import confusion, F1_score, loadDataset, saveNNParas
import time

# Loading the previous network status.
input_dim = 1
hidden_dim = 50
layer_dim = 1
output_dim = 3  # Four kinds of genres within 12 songs.

load_rnn = RNN_model(input_dim, hidden_dim, layer_dim, output_dim)
load_rnn.load_state_dict(torch.load('rnn_model.pt'))
load_rnn.eval()

# Loading testing dataset to evaluate new network.
x_test, y_test = loadDataset('testing_sequence')
flat_input_test = x_test.unsqueeze(-1)

# Various sequence length used for padding sequence and packed sequence in rnn modol.
l = [1104, 1028, 980, 964, 960, 956, 956, 932, 868, 840, 836, 808]
test_seq_lens = np.zeros((4 * 12))
for i in range(len(l)):
    test_seq_lens[i * 4:(i + 1) * 4] = l[i]

# Loading the information of vector.
vectors = pd.read_excel('rnn_vector_angle_sample.xls', header=None)
raw_df = pd.DataFrame({
    'row': vectors.iloc[:, 0],
    'col': vectors.iloc[:, 1],
    'angle': vectors.iloc[:, 2]
})

示例#34

0

显示文件

文件： main_reduction_subjective_rating.py 项目： OddUlrich/Bio-inspired-Computing

# Operation of addition.
load_net.hidden.weight[2] += load_net.hidden.weight[16]
load_net.hidden.weight[5] += load_net.hidden.weight[23]
load_net.hidden.weight[8] += load_net.hidden.weight[22]

# Slicing the remained weight values and bias values in a new-sized network.
new_net = Net(11, 27, 3)
new_net.hidden.weight[:16] = load_net.hidden.weight[:16]
new_net.hidden.weight[16:21] = load_net.hidden.weight[17:22]
new_net.hidden.weight[21:] = load_net.hidden.weight[24:]

new_net.hidden.bias[:16] = load_net.hidden.bias[0:16]
new_net.hidden.bias[16:21] = load_net.hidden.bias[17:22]
new_net.hidden.bias[21:] = load_net.hidden.bias[24:]

new_net.output.weight[:, :16] = load_net.output.weight[:, 0:16]
new_net.output.weight[:, 16:21] = load_net.output.weight[:, 17:22]
new_net.output.weight[:, 21:] = load_net.output.weight[:, 24:]

new_net.output.bias[:] = load_net.output.bias[:]
new_net.eval()

# Reload the test dateset and evaluate the shrinked network.
x_test, y_test = loadDataset()
acc, pred = test_model(new_net, x_test, y_test)

mat = confusion(x_test.size(0), 3, pred, y_test)
print("Confusion Matrix (after pruning)：")
print(mat)
F1_score(mat)

示例#35

0

显示文件

文件： run.py 项目： douduck08/2015-NTU-MLDS

    label = pp.correctLabel(endIndxGroup = endIndxGroup, name = name, label = label)
    pp.writeFile(filename = smoothedFilename, name = name, label = label)

class Logger(object):
    def __init__(self, logFilename):
        self.terminal = sys.stdout
        self.log = open(logFilename, "a")

    def write(self, message):
        self.terminal.write(message)
        self.log.write(message)  

if __name__ == '__main__':
    P = dnnUtils.Parameters(setting)
    print P.outputFilename
    datasets  = utils.loadDataset(filename = P.datasetFilename, totalSetNum=3)

    if not USE_EXIST_MODEL: 
        sys.stdout = Logger(P.logFilename)
        bestModel = dnn.trainDNN(datasets, P)
        bestModelFilename = '../model/' + P.outputFilename + '.model'
        utils.makePkl(bestModel, P.bestModelFilename)
    else:
        # TODO use filename to build P
        bestModelFilename = sys.argv[2]
        bestModel = utils.loadPkl(bestModelFilename)
    
    dnn.getResult(bestModel, datasets[1], P, 'valid', P.validResultFilename)
    dnn.getResult(bestModel, datasets[2], P, 'test', P.testResultFilename)
    dnn.getProb(bestModel, datasets[0], P.trainProbFilename, P)
    dnn.getProb(bestModel, datasets[1], P.validProbFilename, P)

示例#36

0

显示文件

def build_new_image(path,
                    k_means_instance,
                    input_dim,
                    encoder,
                    new_name='_new.png'):

    import sys

    print '\nSelect the distance you want for the choice of the best centroid'
    print '\n', ' ' * 3, '0) Mean Absolute Error compute on the encoded representation'
    print ' ' * 3, '1) Mean Squared Error compute on the encoded representation'
    print ' ' * 3, '2) Hausdorff distance on the binarized original images'

    distances = ['mae', 'mse', 'hausdorff']
    distance_type = int(raw_input("\nDEC> "))
    image = io.imread(path, as_grey=True)

    processed = image
    normalized_height = int(np.sqrt(input_dim))
    normalized_width = normalized_height

    processed = imageSegmentation.image_preproc(
        processed, binary_threshold=threshold_otsu(processed))
    labeled_image, num_features, max_width, max_height, max_label = imageSegmentation.find_connected_components(
        processed)

    centroidSet, centroidNames = loadDataset('Centroids')

    centroidImages = centroidSet

    # Get the encoded representation of the centroids
    centroidSet = encoder.predict(centroidSet)

    # Here I create a new image all white that will host each replaced char
    base_image = 255 * np.ones(image.shape)

    predictes = list()
    for i in range(1, max_label):

        r_s, c_s = np.where(labeled_image == i)
        if len(r_s) > 1 and len(c_s) > 1:

            # get the char to be replaced
            to_replace = image[min(r_s) - 1:max(r_s) + 2,
                               min(c_s) - 1:max(c_s) + 2]

            # resize before passing through the net
            to_replace = imresize(to_replace,
                                  (normalized_height, normalized_width))
            to_predict = to_replace

            if distance_type != 2:
                # get the encoded representation of the char
                to_predict = np.array(
                    to_replace.reshape(
                        (1, normalized_height * normalized_width)))
                to_predict = to_predict.astype('float32') / 255.
                to_predict = encoder.predict(to_predict)

            predictes.append([to_predict, r_s, c_s])

    pool = ThreadPoolExecutor(8)
    list_futures = list()
    for i in range(1, len(predictes)):
        list_futures.append(
            pool.submit(find_and_subst,
                        i,
                        base_image,
                        centroidSet,
                        predictes,
                        centroidImages,
                        metric=distances[distance_type]))

    for i in range(len(list_futures)):
        r = list_futures[i].result()
        sys.stdout.write(u'\u001b[1000D' + bcolors.RED + 'Creating: ' +
                         str(ceil(i * 100 / len(list_futures))) + '%')
        sys.stdout.flush()

    sys.stdout.write(bcolors.RESET)
    plt.imsave('temp.png', base_image, cmap=plt.cm.gray)