def load_data(self): col = self.data_copy.columns.values[self.len_info:] t_set = np.array(self.training_set) v_set = np.array(self.val_set) training_frame = pd.DataFrame(t_set, index=None, columns=col) val_frame = pd.DataFrame(v_set, index=None, columns=col) train_dataset = datasets(training_frame) val_dataset = datasets(val_frame) self.train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=self.bs, shuffle=True) self.val_loader = torch.utils.data.DataLoader(dataset=val_dataset, batch_size=self.bs, shuffle=True) return self.train_loader
def main(): args = parse_args() logger, final_output_dir, tb_log_dir = create_logger( config, args.cfg, 'valid') logger.info(pprint.pformat(args)) logger.info(pprint.pformat(config)) # cudnn related setting cudnn.benchmark = config.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = config.CUDNN.ENABLED model = eval('models.' + config.MODEL.NAME + '.get_cls_net')(config) dump_input = torch.rand( (1, 3, config.MODEL.IMAGE_SIZE[1], config.MODEL.IMAGE_SIZE[0])) logger.info(get_model_summary(model, dump_input)) if config.TEST.MODEL_FILE: logger.info('=> loading model from {}'.format(config.TEST.MODEL_FILE)) # model.load_state_dict(torch.load(config.TEST.MODEL_FILE)) model.load_state_dict({ k.replace('module.', ''): v for k, v in torch.load(config.TEST.MODEL_FILE) ['state_dict'].items() }) else: model_state_file = os.path.join(final_output_dir, 'final_state.pth.tar') logger.info('=> loading model from {}'.format(model_state_file)) model.load_state_dict(torch.load(model_state_file)) gpus = [0, 1] model = torch.nn.DataParallel(model, device_ids=gpus).cuda() # define loss function (criterion) and optimizer criterion = torch.nn.CrossEntropyLoss().cuda() # Data loading code valdir = os.path.join(config.DATASET.ROOT, config.DATASET.TEST_SET) normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) test_dataset = datasets(dataset_root='./Data/', split='test', size=config.MODEL.IMAGE_SIZE[0]) valid_loader = torch.utils.data.DataLoader( test_dataset, batch_size=config.TRAIN.BATCH_SIZE_PER_GPU * len(gpus), shuffle=True, num_workers=config.WORKERS, pin_memory=True) # evaluate on validation set validate(config, valid_loader, model, criterion, final_output_dir, tb_log_dir, None)
def __init__(self, bkps=[], mean=[], var=[], para_known=False): #用来逐一加入数据 self.ts = [] #用来逐一加入数据,每次检测到切换点之后更新 self.ts_temp = [] #存放累计和 self.s_kt = [] #存放反向累计和 self.s_kt_reverse = [] #生成PRI雷达数据 self.data = datasets() #输出切换点 self.bkp_detect = [] #存放真实切换点 self.bkps = bkps #存放真实均值 self.mean = mean #存放真实方差 self.var = var #在中间列表中用来计数 self.index = 0 #存放真实的均值 self.average = [] self.para_known = para_known j = 0 for i in range(len(mean)): while j < bkps[i]: self.average.append(mean[i]) j += 1
def testrandomforest(self,dataset_name): datasetObj=datasets() dataset_files=datasetObj.get_datasetsFirstFilename(dataset_name) dataset_filestest=datasetObj.get_datasetsFirstFilenametest(dataset_name) #to do remove nsl-forest & predictor #bashCommand="$HADOOP_PREFIX/bin/hadoop fs -rmr /user/ubuntu/nsl-forest" bashCommand='curl -i -X DELETE "http://54.186.225.72:50070/webhdfs/v1/user/ubuntu/nsl-forest?op=DELETE&user.name=ubuntu&recursive=true"' process = subprocess.Popen(bashCommand, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) output, error = process.communicate() print ("output is testrandomforest" +output) print (error) #bashCommand="$HADOOP_PREFIX/bin/hadoop fs -rmr /user/ubuntu/predictions" bashCommand='curl -i -X DELETE "http://54.186.225.72:50070/webhdfs/v1/user/ubuntu/predictions?op=DELETE&user.name=ubuntu&recursive=true"' process = subprocess.Popen(bashCommand, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) output, error = process.communicate() print ("output is testrandomforest" +output) print (error) dataset_files=datasetObj.get_datasetsFirstFilename(dataset_name) bashCommand="$HADOOP_PREFIX/bin/hadoop jar $MAHOUT_HOME/examples/target/mahout-examples-1.0-SNAPSHOT-job.jar org.apache.mahout.classifier.df.mapreduce.BuildForest -Dmapred.max.split.size=1874231 -d /data/"+dataset_name+"/data/test/"+dataset_filestest[0]+" -ds /data/"+dataset_name+"/data/train/"+dataset_files[0]+".info -sl 5 -p -t 100 -o nsl-forest" process = subprocess.Popen(bashCommand, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) output, error = process.communicate() print ("output is testrandomforest" +output) print (error) return "randomforestTestDataset done"
def runrandomforest(self,dataset_name): datasetObj=datasets() dataset_files=datasetObj.get_datasetsFirstFilename(dataset_name) dataset_filestest=datasetObj.get_datasetsFirstFilenametest(dataset_name) bashCommand="$HADOOP_PREFIX/bin/hadoop jar $MAHOUT_HOME/examples/target/mahout-examples-1.0-SNAPSHOT-job.jar org.apache.mahout.classifier.df.mapreduce.TestForest -i /data/"+dataset_name+"/data/test/"+dataset_filestest[0]+" -ds /data/"+dataset_name+"/data/train/"+dataset_files[0]+".info -m nsl-forest -a -mr -o predictions" process = subprocess.Popen(bashCommand, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) output, error = process.communicate() print ("output is runrandomforest" +output + "got it") print ("error is : "+error+" : got the error :)") result=error.split("Summary")[1] print "result is "+result new_result=result.split("\n") final_result="<br/>".join(new_result) return final_result
def __init__(self, net, dataset_path, save_path, isCuda=True): self.net = net self.dataset = DataLoader(datasets(dataset_path), batch_size=2500, shuffle=True, drop_last=True, num_workers=2) self.save_path = save_path self.isCuda = isCuda if isCuda: self.net.cuda() self.confidence_loss_function = nn.BCELoss() self.offset_loss_function = nn.MSELoss() self.landmarks_loss_function = nn.MSELoss() self.optimizer = torch.optim.Adam(self.net.parameters()) if os.path.exists(save_path): self.net.load_state_dict(torch.load(save_path)) else: self.net.apply(weight_init)
import os from numpy import random as rd import numpy as np from dataset import datasets as datasets import display from ChangeFinder import CUsum from ChangeFinder import FSS if __name__ == '__main__': data = datasets() signal, bkps, mean, var = data.PRI_Gauss_Jitter() # display.display_signal_score(signal) # detector = FSS(signal, bkps=[], mean=[], var=[], para_known=False, fixed_threshold=1000, fixed_size=50) # indicater = detector.fss_detection() detector = CUsum(bkps=[], mean=[], var=[], para_known=False) score = [] for sig in signal: scor = detector.update(sig) score.append(scor) display.display_signal_score(signal, score, mode='PRI')
import numpy as np import ChangeFinder as CF from ChangeFinder import CUsum from ChangeFinder import FSS import display import dataset import warnings import time warnings.filterwarnings('ignore') ################################################################################################## ####################################### 生成数据集 ################################################# ################################################################################################## dataset = dataset.datasets(1000, 10) # 生成均值跳跃数据集 # signal, bkps, outliers = dataset.jumping_mean_random(n_samples=1000, n_bkps=10) # 生成均值方差跳跃数据集 # signal, bkps = dataset.jumping_mean_variance(n_samples=1000, n_bkp=10) # 生成高斯抖动PRI脉冲 # signal, bkps, mean, var = dataset.PRI_Gauss_Jitter() # 生成均匀抖动PRI脉冲 signal, bkps, lower_bound, upper_bound = dataset.PRI_norm_Jitter() # 生成锐利抖动PRI脉冲 # signal, bkps, scale = dataset.PRI_rayleigh_Jitter() # 高斯抖动PRI脉冲中添加缺失脉冲 # PRI_miss = dataset.miss_PRI(signal, miss_ratio=0) # 高斯抖动PRI脉冲中添加虚假脉冲 # PRI_spur = dataset.add_spur_PRI(signal, para=0.0002, mode='pulse_ratio') #添加虚假脉冲
import torch.nn as nn from torch.utils.data import DataLoader import os import numpy as np from net import trainNet from dataset import datasets import torch '''该类用于训练模型''' data = datasets(r"./datasets/train_img") if os.path.exists("models/net.pth"): net = torch.load("models/net.pth") else: net = trainNet().cuda() train_data = DataLoader(data, batch_size=10, shuffle=True, drop_last=True) loss_mse = nn.MSELoss() loss_bce = nn.BCELoss() optim = torch.optim.Adam(net.parameters()) for epoch in range(10): for i, (x, y) in enumerate(train_data): x = x.cuda() y = y.cuda() # 切片获取坐标和置信度 coordinate = y[:, 0:4] / 224 confidence = y[:, 4:5] coordinate_output, confidence_output = net(x) coordinate_loss = loss_mse(coordinate_output, coordinate) confidence_loss = loss_bce(confidence_output, confidence)
def main(): args = parse_args() logger, final_output_dir, tb_log_dir = create_logger( config, args.cfg, 'train') logger.info(pprint.pformat(args)) logger.info(pprint.pformat(config)) # cudnn related setting cudnn.benchmark = config.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = config.CUDNN.ENABLED model = eval('models.'+config.MODEL.NAME+'.get_cls_net')( config) dump_input = torch.rand( (1, 3, config.MODEL.IMAGE_SIZE[1], config.MODEL.IMAGE_SIZE[0]) ) logger.info(get_model_summary(model, dump_input)) # copy model file this_dir = os.path.dirname(__file__) models_dst_dir = os.path.join(final_output_dir, 'models') if os.path.exists(models_dst_dir): shutil.rmtree(models_dst_dir) shutil.copytree(os.path.join(this_dir, '../lib/models'), models_dst_dir) writer_dict = { 'writer': SummaryWriter(log_dir=tb_log_dir), 'train_global_steps': 0, 'valid_global_steps': 0, } gpus = [0,1] model = torch.nn.DataParallel(model, device_ids=gpus).cuda() # define loss function (criterion) and optimizer criterion = torch.nn.CrossEntropyLoss().cuda() optimizer = get_optimizer(config, model) best_perf = 0.0 best_model = False last_epoch = config.TRAIN.BEGIN_EPOCH if config.TRAIN.RESUME: model_state_file = os.path.join(final_output_dir, 'checkpoint.pth.tar') if os.path.isfile(model_state_file): checkpoint = torch.load(model_state_file) last_epoch = checkpoint['epoch'] best_perf = checkpoint['perf'] model.module.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger.info("=> loaded checkpoint (epoch {})" .format(checkpoint['epoch'])) best_model = True if isinstance(config.TRAIN.LR_STEP, list): lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR, last_epoch-1 ) else: lr_scheduler = torch.optim.lr_scheduler.StepLR( optimizer, config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR, last_epoch-1 ) # Data loading code traindir = os.path.join(config.DATASET.ROOT, config.DATASET.TRAIN_SET) valdir = os.path.join(config.DATASET.ROOT, config.DATASET.TEST_SET) train_dataset = datasets(dataset_root='./Data/',split='train',size= config.MODEL.IMAGE_SIZE[0]) test_dataset = datasets(dataset_root='./Data/',split='test',size= config.MODEL.IMAGE_SIZE[0]) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN.BATCH_SIZE_PER_GPU*len(gpus), shuffle=True, num_workers=config.WORKERS, pin_memory=True ) valid_loader = torch.utils.data.DataLoader( test_dataset, batch_size=config.TRAIN.BATCH_SIZE_PER_GPU*len(gpus), shuffle=True, num_workers=config.WORKERS, pin_memory=True ) for epoch in range(last_epoch, config.TRAIN.END_EPOCH): lr_scheduler.step() # train for one epoch train(config, train_loader, model, criterion, optimizer, epoch, final_output_dir, tb_log_dir, writer_dict) # evaluate on validation set perf_indicator = validate(config, valid_loader, model, criterion, final_output_dir, tb_log_dir, writer_dict) if perf_indicator > best_perf: best_perf = perf_indicator best_model = True torch.save(best_model, './best-model.pth') else: best_model = False logger.info('=> saving checkpoint to {}'.format(final_output_dir)) save_checkpoint({ 'epoch': epoch + 1, 'model': config.MODEL.NAME, 'state_dict': model.module.state_dict(), 'perf': perf_indicator, 'optimizer': optimizer.state_dict(), }, best_model, final_output_dir, filename='checkpoint.pth.tar') final_model_state_file = os.path.join(final_output_dir, 'final_state.pth.tar') logger.info('saving final model state to {}'.format( final_model_state_file)) torch.save(model.module.state_dict(), final_model_state_file) writer_dict['writer'].close()
''' Created on 2018年3月21日 @author: qimy 该部分利用softmax训练后的结果进行测试。 ''' import tensorflow as tf from dataset import datasets test_text_dir = "D:/Users/qimy/softWare/eclipse/workspace/voiceasr-tensorflow/test_set" model_dir = "D:/Users/qimy/softWare/eclipse/workspace/voiceasr-tensorflow" data_sets = datasets() data_sets.read_test_data(test_text_dir, True) sess = tf.InteractiveSession() x = tf.placeholder(tf.float32, [None, 5000]) #None占位符标示输入样本的数量,5000为单个样本的输入维度,对应字典维度 w = tf.Variable(tf.zeros([5000, 8])) # 权重矩阵,行为输入维度,列为输出维度,这里为类别的数目8 b = tf.Variable(tf.zeros([8])) # 偏重为8对应输出的维度 y = tf.nn.softmax( tf.matmul(x, w) + b) # 定义训练输出结果,使用softmax作为激励函数,tf.matmul(x, W) + b为输入参数,tf.matmul为矩阵乘 y_ = tf.placeholder(tf.float32, [None, 8]) saver = tf.train.Saver() saver.restore(sess, model_dir + "/model2/model.md") correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
import subprocess import requests import json from dataset import datasets from fileFormatting import fileformatting fileformattingObj=fileformatting() datasetObj=datasets() class algorithms: def trainlogistic(self,dataset_name,predictors,target): str_predictor="" count=0 for i in predictors: count+=1 str_predictor+=i+" " #datasetObj=datasets() dataset_files=datasetObj.get_datasetsFirstFilename(dataset_name) bashCommand="$MAHOUT_HOME/bin/mahout trainlogistic --input ~/code/bdmaas/data/"+dataset_name+"/data/train/"+dataset_files[0]+" --output ~/code/bdmaas/data/"+dataset_name+"/model --target "+target+" --categories 2 --predictors "+str_predictor+" --types word --features 20 --passes 100 --rate 50" process = subprocess.Popen(bashCommand, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) output, error = process.communicate() print ("output is "+output) print (error) return "trainlogistic done" def runlogistic(self,dataset_name): #datasetObj=datasets() dataset_files=datasetObj.get_datasetsFirstFilenametest(dataset_name) bashCommand="$MAHOUT_HOME/bin/mahout runlogistic --input ~/code/bdmaas/data/"+dataset_name+"/data/test/"+dataset_files[0]+" --model ~/code/bdmaas/data/"+dataset_name+"/model --auc --confusion"
if config.verbose == True: logging.basicConfig( level=logging.DEBUG, # 定义输出到文件的log级别,大于此级别的都被输出 format= '%(asctime)s %(filename)s : %(levelname)s %(message)s', # 定义输出log的格式 datefmt='%Y-%m-%d %A %H:%M:%S', # 时间 filename=os.path.join(os.getcwd(), config.log_name), # log文件名 filemode='w') # 写入模式“w”或“a” batch_size = config.batch_size num_epochs = config.num_epochs learn_rate = config.learn_rate device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') if config.display_plot == True: vis = Visualizer(config.visualize_win) train_datasets = datasets(is_train=True) train_loader = Data.DataLoader(train_datasets, batch_size=batch_size, shuffle=True) val_datasets = datasets(is_train=False) val_loader = Data.DataLoader(val_datasets, batch_size=batch_size, shuffle=False) model1 = SENet(SEResNetBottleneck, [3, 4, 6, 3], groups=1, reduction=16, dropout_p=0.5, inplanes=64, input_3x3=False,