def extract(self): """ 提取有效数据 """ print '\n\t in extract' files = util.get_dir_list(self.config['requests_dir']) output_path = self.config['dump_dir'] util.check_path(output_path) files.sort(key=lambda x:int(x[:-4])) content = "" for file in files: path = self.config['requests_dir'] + '/' + file f = open(path) text = f.readlines() f.close() print path content += self._filter(text) util.output(output_path, 'total', content)
def extract(self): """ 提取有效数据 """ print '\n\t in extract' files = util.get_dir_list(self.config['requests_dir']) output_path = self.config['dump_dir'] util.check_path(output_path) files.sort(key=lambda x: int(x[:-4])) content = "" for file in files: path = self.config['requests_dir'] + '/' + file f = open(path) text = f.readlines() f.close() print path content += self._filter(text) util.output(output_path, 'total', content)
def main(): argparser = argparse.ArgumentParser() argparser.add_argument('--load-model', default=None) argparser.add_argument('-e', '--num_epoch', type=int, default=10) argparser.add_argument('-t', '--test', default=False, action='store_true') argparser.add_argument('--pt', default=False, action='store_true', help='prototype mode') argparser.add_argument('-b', '--batchsize', type=int, default=32) argparser.add_argument('--log-interval', type=int, default=10) argparser.add_argument('--save-interval', type=int, default=100) argparser.add_argument('-r', '--restore', default=False, action='store_true', help='restore from checkpoint') argparser.add_argument('--ckpt', default='saved_model/embedder_ckpt.pth') argparser.add_argument('--save', default='saved_model/embedder.pth') args = argparser.parse_args() logging.info('reading data') device = 'cuda' if torch.cuda.is_available() else 'cpu' with open('data/multim_poem.json') as f: multim = json.load(f) multim = filter_multim(multim) train_data = multim test_data = multim logging.info('number of training data:{}, number of testing data:{}'. format(len(train_data), len(test_data))) if args.pt: train_data = train_data[:1000] test_data = test_data[:20] logging.info('building model...') load_model = args.load_model if args.load_model is None and args.restore and os.path.exists(args.ckpt): load_model = args.ckpt sentiment_model = 'saved_model/sentiment_all.pth' embed_trainer = PoemImageEmbedTrainer(train_data, test_data, sentiment_model, args.batchsize, load_model, device) check_path('saved_model') if args.test: pass else: logging.info('start traning') for e in range(args.num_epoch): embed_trainer.train_epoch(e+1, args.log_interval, args.save_interval, args.ckpt) embed_trainer.save_model(args.ckpt) embed_trainer.save_model(args.save)
def get_moods(self, qqnumber): '''Use cookie and header to get moods file and save it to result folder with QQnumber name''' referer = 'http://user.qzone.qq.com/' + qqnumber self.headers['Referer'] = referer # Create a folder with qq number to save it's result file util.check_path('mood_result/' + qqnumber) # Get the goal url, except the position argument. url_base = util.parse_moods_url(qqnumber) pos = 0 key = True while key: print("\tDealing with position:\t%d" % pos) url = url_base + "&pos=%d" % pos print(url) res = self.session.get(url, headers=self.headers) con = res.text with open('mood_result/' + qqnumber + '/' + str(pos), 'w', encoding='utf-8') as f: f.write(con) if '''"msglist":null''' in con: key = False # Cannot access... if '''"msgnum":0''' in con: with open('crawler_log.log', 'a', encoding='utf-8') as log_file: log_file.write("%s Cannot access..\n" % qqnumber) key = False # Cookie expried if '''"subcode":-4001''' in con: with open('crawler_log.log', 'a', encoding='utf-8') as log_file: log_file.write('Cookie Expried! Time is %s\n' % time.ctime()) sys.exit() pos += 20 time.sleep(5)
def __init__(self, path="./config.ini"): """ 检查配置文件是否存在,不存在则直接return 检查输出文件夹是否存在,不存在则创建 config.ini必须要有的字段 - dump_dir - base_url """ print u'\n in Spider __init__' if not util.check_file(path): print '\n\t no file ' + path return self.config = util.init(path) if self.config.has_key('ua'): self.config['ua'] = util.init(self.config['ua']) self.config['requests_dir'] = self.config['dump_dir'] + '/requests' print self.config util.check_path(self.config['dump_dir'])
def get_moods_start(self): app = Get_moods() #app.get_rest_number() with open('qqnumber.inc', encoding='utf-8') as qnumber_file: qnumber_string = qnumber_file.read() qnumber_list = eval(qnumber_string) # check if there is a mood_result folder to save the result file # if not create it util.check_path('mood_result') while qnumber_list != []: save_back_qnumber = qnumber_list[:] item = qnumber_list.pop() qq = item['data'] print("Dealing with:\t%s" % qq) start_time = time.ctime() with open('crawler_log.log', 'a', encoding='utf-8') as log_file: log_file.write("Program run at: %s\tGetting %s data...\n" % (start_time, qq)) try: app.get_moods(qq) except KeyboardInterrupt: print('User Interrupt, program will exit') sys.exit() except Exception as e: # Write the rest item back to qqnumber.inc with open('qqnumber.inc', 'w', encoding='utf-8') as qnumber_file: qnumber_file.write(str(save_back_qnumber)) # Write the log with open('crawler_log.log', 'a', encoding='utf-8') as log_file: exception_time = time.ctime() log_file.write("Exception occured: %s\n%s\n" % (exception_time, e)) else: print("%s Finish!" % qq) else: print("Finish All!")
def getInfo(friendqq): header = util.headers cookie = header['Cookie'] qq_start = cookie.find('uin=o') qq_end = cookie.find(';', qq_start) qqnumber = cookie[qq_start + 5:qq_end] if qqnumber[0] == 0: qqnumber = qqnumber[1:] host = 'https://h5.qzone.qq.com/proxy/domain/base.qzone.qq.com/cgi-bin/user/cgi_userinfo_get_all?' params = {'uin': friendqq, 'vuin': qqnumber, 'g_tk': util.g_tk} url = host + parse.urlencode(params) print(url) resp = requests.get(url, headers=util.headers) print(resp.text) util.check_path('persionInfo') with open('persionInfo/info' + friendqq, 'w', encoding='utf-8') as f: f.write(resp.text)
def __init__(self): self.headers = util.headers self.base_url = util.parse_friends_url() util.check_path('friends') print('Start to get friends list and save it for ./friends folder')
def __init__(self, model_name, fw): check_path(model_name) self.model_name = model_name return
def main(args): check_path(args) # CIFAR-10的全部类别,一共10类 classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') # 数据集 data_builder = DataBuilder(args) dataSet = DataSet(data_builder.train_builder(), data_builder.test_builder(), classes) # 选择模型 if args.lenet: net = LeNet() model_name = args.name_le elif args.vgg: net = Vgg16_Net() model_name = args.name_vgg elif args.resnet18: net = ResNet18() model_name = args.name_res18 elif args.resnet34: net = ResNet34() model_name = args.name_res34 elif args.resnet50: net = ResNet50() model_name = args.name_res50 elif args.resnet101: net = ResNet101() model_name = args.name_res101 elif args.resnet152: net = ResNet152() model_name = args.name_res152 # 交叉熵损失函数 criterion = nn.CrossEntropyLoss() # SGD优化器 optimizer = optim.SGD(net.parameters(), lr=args.learning_rate, momentum=args.sgd_momentum, weight_decay=args.weight_decay) # 余弦退火调整学习率 scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=150) # 模型的参数保存路径 model_path = os.path.join(args.model_path, model_name) # 启动训练 if args.do_train: print("Training...") trainer = Trainer(net, criterion, optimizer, scheduler, dataSet.train_loader, dataSet.test_loader, model_path, args) trainer.train(epochs=args.epoch) # t.save(net.state_dict(), model_path) # 启动测试,如果--do_train也出现,则用刚刚训练的模型进行测试 # 否则就使用已保存的模型进行测试 if args.do_eval: if not args.do_train and not os.path.exists(model_path): print( "Sorry, there's no saved model yet, you need to train first.") return # --do_eval if not args.do_train: checkpoint = t.load(model_path) net.load_state_dict(checkpoint['net']) accuracy = checkpoint['acc'] epoch = checkpoint['epoch'] print("Using saved model, accuracy : %f epoch: %d" % (accuracy, epoch)) tester = Tester(dataSet.test_loader, net, args) tester.test() if args.show_model: if not os.path.exists(model_path): print( "Sorry, there's no saved model yet, you need to train first.") return show_model(args) if args.do_predict: device = t.device("cuda" if t.cuda.is_available() else "cpu") checkpoint = t.load(model_path, map_location=device) net.load_state_dict(checkpoint['net']) predictor = Predictor(net, classes) img_path = 'test' img_name = [os.path.join(img_path, x) for x in os.listdir(img_path)] for img in img_name: predictor.predict(img)
def main(args): check_path(args) # CIFAR-10的全部类别,一共10类 classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') # 数据集 data_builder = DataBuilder(args) dataSet = DataSet(data_builder.train_builder(), data_builder.test_builder(), classes) # 选择模型 if args.lenet: net = LeNet() model_name = args.name_le elif args.vgg: net = Vgg16_Net() model_name = args.name_vgg else: raise "Sorry, you can only select LeNet or VGG." # 交叉熵损失函数 criterion = nn.CrossEntropyLoss() # SGD优化器 optimizer = optim.SGD(net.parameters(), lr=args.learning_rate, momentum=args.sgd_momentum, weight_decay=args.weight_decay) # 模型的参数保存路径,默认为 "./model/state_dict" model_path = os.path.join(args.model_path, model_name) # 指定在GPU / CPU上运行程序 device = t.device("cuda:0" if ( t.cuda.is_available() and not args.no_cuda) else "cpu") # 启动训练 if args.do_train: print("Training...") trainer = Trainer(net, criterion, optimizer, dataSet.train_loader, args) trainer.train(epochs=args.epoch) # 只保存模型参数 t.save(net.state_dict(), model_path) # 启动测试 if args.do_eval: if not os.path.exists(model_path): print( "Sorry, there's no saved model yet, you need to train first.") return print("Testing...") device = t.device("cuda:0" if t.cuda.is_available() else "cpu") net.load_state_dict(t.load(model_path, map_location=device)) # net.eval() tester = Tester(dataSet.test_loader, net, args) tester.test() if args.show_model: if not os.path.exists(model_path): print( "Sorry, there's no saved model yet, you need to train first.") return show_model(args) if args.do_predict: net.load_state_dict(t.load(model_path, map_location=device)) predictor = Predictor(net, classes) # img_path = 'test' # img_name = [os.path.join(img_path, x) for x in os.listdir(img_path)] # for img in img_name: # predictor.predict(img) img_path = 'test/cat0.jpg' predictor.predict(img_path)
import argparse import os from pathlib import Path from util import prepare_manual_clusters, check_path, check_file parser = argparse.ArgumentParser( description="Manually cluster the California housing market data") parser.add_argument( "--source_file", type=lambda s: Path(os.path.expanduser(s)), required=True, help="The path to for the raw data", ) parser.add_argument( "--sink_path", type=lambda s: os.path.expanduser(s), required=True, help="The path for the processed data to be saved", ) args = parser.parse_args() check_file(args.source_file) check_path(args.sink_path) prepare_manual_clusters(args.source_file, args.sink_path)
from model import LeNet5_2neurons from data import set_up_data from util import check_path import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt colors = [ "red", "blue", "black", "yellow", "green", "yellowgreen", "gold", "royalblue", "peru", "purple" ] pretrained = "train_baseline_lenet5/trained_weights_*2neurons/w*/*E21S0*.pth" pretrained = check_path(pretrained) gpu_id = 4 # change your GPU here net = LeNet5_2neurons(pretrained).cuda(gpu_id) train_loader, num_train, test_loader, num_test = set_up_data("MNIST", 500) for step, (img, label) in enumerate(train_loader): print("train step %s / %s" % (step, len(train_loader))) img = img.cuda(gpu_id) feat = net.forward_2neurons(img) feat = feat.data.cpu().numpy() label = label.data.cpu().numpy() for x, y in zip(feat, label): plt.scatter(x[0], x[1], color=colors[y]) plt.savefig("./mnist_trainset_feat_visualization.jpg") plt.close() for step, (img, label) in enumerate(test_loader): print("test step %s / %s" % (step, len(test_loader))) img = img.cuda(gpu_id)
"MNIST_2neurons": "train_baseline_lenet5/trained_weights_*2neurons/w*/*E21S0*.pth", "CIFAR10": "../../ZeroShot*/Pretrained/CIFAR10/WRN-16-2/last.pth.tar", # "models/model_best.pth.tar", } assert (args.num_se == 1) assert (args.num_dec == 1) assert (args.mode in AutoEncoders.keys()) assert (args.dataset in ["MNIST", "CIFAR10", "CIFAR100"]) if args.e1 == None: if "CIFAR" in args.dataset: args.e1 = pretrained_be_path["CIFAR10"] elif args.dataset == "MNIST": key = "MNIST" + args.which_lenet args.e1 = pretrained_be_path[key] args.e1 = check_path(args.e1) args.e2 = check_path(args.e2) args.pretrained_dir = check_path(args.pretrained_dir) args.adv_train = int(args.mode[-1]) num_channel = 1 if args.dataset == "MNIST" else 3 # Set up directories and logs, etc. TimeID, ExpID, rec_img_path, weights_path, log = set_up_dir( args.project_name, args.resume, args.CodeID) logprint = LogPrint(log) args.ExpID = ExpID if __name__ == "__main__": # Set up model AE = AutoEncoders[args.mode] ae = AE(args).cuda()
def get_new_kernel_specs(): logger.debug("Adding a new Kernel...") print(sys.version) newKernelSpec = request.form.to_dict() for s in newKernelSpec: newKernelSpec[s] = "{0}".format(newKernelSpec[s]) #step 1, names and paths for folders are prepared kernel_path, conda_env_name, folder_name = util.prepare_paths_and_names( newKernelSpec['kernelEnvironment-displayName']) if util.check_path(kernel_path): logger.debug("Folder exists already. Aborting...") return "Folder exists already. Aborting..." #step 2, creating actual folders prepare_folders_success = util.prepare_folders(kernel_path, conda_env_name) if prepare_folders_success == False: util.clean_up_folders(kernel_path) logger.debug("Error while creating folder structure") return "Error while creating folder structure" #step 3, creating a yaml file for the conda env, dict with params from UI as input create_yaml_success = util.create_conda_env_yaml( folder_name, conda_env_name, newKernelSpec['kernelEnvironment-extraCondaChannels'], newKernelSpec['kernelEnvironment-language'], newKernelSpec['kernelEnvironment-extraCondaPackages'], newKernelSpec['kernelEnvironment-extraPIPPackages']) if create_yaml_success == False: util.clean_up_folders(kernel_path) logger.debug("Error while creating yml env file for Conda") return "Error while creating yml env file for Conda" #step 4, installing conda env from created yaml install_and_zip_env_success = util.create_conda_env_from_yaml( kernel_path, conda_env_name) if install_and_zip_env_success == False: util.clean_up_folders(kernel_path) logger.debug("Error while install & zip environment") return "Error while install & zip environment" #step 5, creating the JSON file (copy and modify a template) create_kernel_json__success = util.prepare_kernel_specs_json( newKernelSpec, kernel_path, conda_env_name) if create_kernel_json__success == False: logger.debug("Error with creating kernel JSON file") util.clean_up_folders(kernel_path) return "Error with creating kernel JSON file" #step 6, copy other files, e.g. launcher or run.sh util.provide_rest_files(kernel_path, newKernelSpec['kernelEnvironment-language']) print("Adding kernel finished...") return "finish"
def main(): argparser = argparse.ArgumentParser() argparser.add_argument('--load-model', default=None) argparser.add_argument('-e', '--num_epoch', type=int, default=5) argparser.add_argument('-t', '--test', default=False, action='store_true') argparser.add_argument('--pt', default=False, action='store_true', help='prototype mode') argparser.add_argument('-b', '--batchsize', type=int, default=32) argparser.add_argument('--log-interval', type=int, default=10) argparser.add_argument('--save-interval', type=int, default=100) argparser.add_argument('-r', '--restore', default=False, action='store_true', help='restore from checkpoint') argparser.add_argument('--ckpt', default='saved_model/sentiment_ckpt.pth') argparser.add_argument('--save', default='saved_model/sentiment.pth') args = argparser.parse_args() logging.info('reading data') device = 'cuda' if torch.cuda.is_available() else 'cpu' trainfile = 'data/image-sentiment-polarity-all.csv' testfile = 'data/image-sentiment-polarity-test.csv' # trainfile = 'data/visual_sentiment_train.csv' # testfile = 'data/visual_sentiment_test.csv' img_dir = 'data/polarity_image/' train_data = pd.read_csv(trainfile, dtype={'id': int}) test_data = pd.read_csv(testfile, dtype={'id': int}) train_data = filter_sentiment(train_data, img_dir) test_data = filter_sentiment(test_data, img_dir) logging.info( 'number of training data:{}, number of testing data:{}'.format( len(train_data), len(test_data))) if args.pt: train_data = train_data[:1000] test_data = test_data[:100] logging.info('building model...') load_model = args.load_model if args.load_model is None and args.restore and os.path.exists(args.ckpt): load_model = args.ckpt sentiment_trainer = VisualSentimentTrainer(train_data, test_data, img_dir, args.batchsize, load_model, device) check_path('saved_model') if args.test: sentiment_trainer.test() else: logging.info('start traning') for e in range(args.num_epoch): sentiment_trainer.train_epoch(e + 1, args.log_interval, args.save_interval, args.ckpt) sentiment_trainer.scheduler.step() sentiment_trainer.test() sentiment_trainer.save_model(args.ckpt) sentiment_trainer.save_model(args.save)
def __init__(self): self.headers = util.headers self.base_url = util.parse_friends_url() util.check_path('friends') print('开始获取好友列表,并把文件保存到 friends 文件夹')