示例#1
0
	def extract(self):
		""" 提取有效数据 """
		print '\n\t in extract'
		files = util.get_dir_list(self.config['requests_dir'])
		output_path = self.config['dump_dir']
		util.check_path(output_path)
		files.sort(key=lambda x:int(x[:-4]))
		content = ""
		for file in files:
			path = self.config['requests_dir'] + '/' + file
			f = open(path)
			text = f.readlines()
			f.close()
			print path
			content += self._filter(text)
		util.output(output_path, 'total', content)
示例#2
0
 def extract(self):
     """ 提取有效数据 """
     print '\n\t in extract'
     files = util.get_dir_list(self.config['requests_dir'])
     output_path = self.config['dump_dir']
     util.check_path(output_path)
     files.sort(key=lambda x: int(x[:-4]))
     content = ""
     for file in files:
         path = self.config['requests_dir'] + '/' + file
         f = open(path)
         text = f.readlines()
         f.close()
         print path
         content += self._filter(text)
     util.output(output_path, 'total', content)
def main():
    argparser = argparse.ArgumentParser()
    argparser.add_argument('--load-model', default=None)
    argparser.add_argument('-e', '--num_epoch', type=int, default=10)
    argparser.add_argument('-t', '--test', default=False, action='store_true')
    argparser.add_argument('--pt', default=False, action='store_true', help='prototype mode')
    argparser.add_argument('-b', '--batchsize', type=int, default=32)
    argparser.add_argument('--log-interval', type=int, default=10)
    argparser.add_argument('--save-interval', type=int, default=100)
    argparser.add_argument('-r', '--restore', default=False, action='store_true',
                           help='restore from checkpoint')
    argparser.add_argument('--ckpt', default='saved_model/embedder_ckpt.pth')
    argparser.add_argument('--save', default='saved_model/embedder.pth')
    args = argparser.parse_args()

    logging.info('reading data')
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    with open('data/multim_poem.json') as f:
        multim = json.load(f)

    multim = filter_multim(multim)

    train_data = multim
    test_data = multim
    logging.info('number of training data:{}, number of testing data:{}'.
                 format(len(train_data), len(test_data)))

    if args.pt:
        train_data = train_data[:1000]
        test_data = test_data[:20]

    logging.info('building model...')
    load_model = args.load_model
    if args.load_model is None and args.restore and os.path.exists(args.ckpt):
        load_model = args.ckpt

    sentiment_model = 'saved_model/sentiment_all.pth'
    embed_trainer = PoemImageEmbedTrainer(train_data, test_data, sentiment_model, args.batchsize, load_model, device)
    check_path('saved_model')
    if args.test:
        pass
    else:
        logging.info('start traning')
        for e in range(args.num_epoch):
            embed_trainer.train_epoch(e+1, args.log_interval, args.save_interval, args.ckpt)
            embed_trainer.save_model(args.ckpt)
        embed_trainer.save_model(args.save)
示例#4
0
    def get_moods(self, qqnumber):
        '''Use cookie and header to get moods file and save it to result folder with QQnumber name'''

        referer = 'http://user.qzone.qq.com/' + qqnumber
        self.headers['Referer'] = referer

        # Create a folder with qq number to save it's result file
        util.check_path('mood_result/' + qqnumber)

        # Get the goal url, except the position argument.
        url_base = util.parse_moods_url(qqnumber)

        pos = 0
        key = True

        while key:
            print("\tDealing with position:\t%d" % pos)
            url = url_base + "&pos=%d" % pos
            print(url)
            res = self.session.get(url, headers=self.headers)
            con = res.text
            with open('mood_result/' + qqnumber + '/' + str(pos),
                      'w',
                      encoding='utf-8') as f:
                f.write(con)

            if '''"msglist":null''' in con:
                key = False

            # Cannot access...
            if '''"msgnum":0''' in con:
                with open('crawler_log.log', 'a',
                          encoding='utf-8') as log_file:
                    log_file.write("%s Cannot access..\n" % qqnumber)
                key = False

            # Cookie expried
            if '''"subcode":-4001''' in con:
                with open('crawler_log.log', 'a',
                          encoding='utf-8') as log_file:
                    log_file.write('Cookie Expried! Time is %s\n' %
                                   time.ctime())
                sys.exit()

            pos += 20
            time.sleep(5)
示例#5
0
	def __init__(self, path="./config.ini"):
		"""
		检查配置文件是否存在,不存在则直接return
		检查输出文件夹是否存在,不存在则创建
		config.ini必须要有的字段
			- dump_dir
			- base_url
		"""
		print u'\n in Spider __init__'
		if not util.check_file(path):
			print '\n\t no file ' + path
			return
		self.config = util.init(path)
		if self.config.has_key('ua'):
			self.config['ua'] = util.init(self.config['ua'])
		self.config['requests_dir'] = self.config['dump_dir'] + '/requests'
		print self.config
		util.check_path(self.config['dump_dir'])
示例#6
0
    def __init__(self, path="./config.ini"):
        """
		检查配置文件是否存在,不存在则直接return
		检查输出文件夹是否存在,不存在则创建
		config.ini必须要有的字段
			- dump_dir
			- base_url
		"""
        print u'\n in Spider __init__'
        if not util.check_file(path):
            print '\n\t no file ' + path
            return
        self.config = util.init(path)
        if self.config.has_key('ua'):
            self.config['ua'] = util.init(self.config['ua'])
        self.config['requests_dir'] = self.config['dump_dir'] + '/requests'
        print self.config
        util.check_path(self.config['dump_dir'])
示例#7
0
    def get_moods_start(self):
        app = Get_moods()
        #app.get_rest_number()

        with open('qqnumber.inc', encoding='utf-8') as qnumber_file:
            qnumber_string = qnumber_file.read()
        qnumber_list = eval(qnumber_string)

        # check if there is a mood_result folder to save the result file
        # if not create it
        util.check_path('mood_result')

        while qnumber_list != []:
            save_back_qnumber = qnumber_list[:]
            item = qnumber_list.pop()
            qq = item['data']
            print("Dealing with:\t%s" % qq)

            start_time = time.ctime()
            with open('crawler_log.log', 'a', encoding='utf-8') as log_file:
                log_file.write("Program run at: %s\tGetting %s data...\n" %
                               (start_time, qq))

            try:
                app.get_moods(qq)
            except KeyboardInterrupt:
                print('User Interrupt, program will exit')
                sys.exit()
            except Exception as e:
                # Write the rest item back to qqnumber.inc
                with open('qqnumber.inc', 'w',
                          encoding='utf-8') as qnumber_file:
                    qnumber_file.write(str(save_back_qnumber))

                # Write the log
                with open('crawler_log.log', 'a',
                          encoding='utf-8') as log_file:
                    exception_time = time.ctime()
                    log_file.write("Exception occured: %s\n%s\n" %
                                   (exception_time, e))
            else:
                print("%s Finish!" % qq)
        else:
            print("Finish All!")
示例#8
0
def getInfo(friendqq):
    header = util.headers
    cookie = header['Cookie']
    qq_start = cookie.find('uin=o')
    qq_end = cookie.find(';', qq_start)
    qqnumber = cookie[qq_start + 5:qq_end]
    if qqnumber[0] == 0:
        qqnumber = qqnumber[1:]

    host = 'https://h5.qzone.qq.com/proxy/domain/base.qzone.qq.com/cgi-bin/user/cgi_userinfo_get_all?'
    params = {'uin': friendqq, 'vuin': qqnumber, 'g_tk': util.g_tk}

    url = host + parse.urlencode(params)
    print(url)
    resp = requests.get(url, headers=util.headers)
    print(resp.text)

    util.check_path('persionInfo')

    with open('persionInfo/info' + friendqq, 'w', encoding='utf-8') as f:
        f.write(resp.text)
示例#9
0
    def __init__(self):

        self.headers = util.headers
        self.base_url = util.parse_friends_url()
        util.check_path('friends')
        print('Start to get friends list and save it for ./friends folder')
示例#10
0
 def __init__(self, model_name, fw):
     check_path(model_name)
     self.model_name = model_name
     return
示例#11
0
def main(args):

    check_path(args)

    # CIFAR-10的全部类别,一共10类
    classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse',
               'ship', 'truck')

    # 数据集
    data_builder = DataBuilder(args)
    dataSet = DataSet(data_builder.train_builder(),
                      data_builder.test_builder(), classes)

    # 选择模型
    if args.lenet:
        net = LeNet()
        model_name = args.name_le
    elif args.vgg:
        net = Vgg16_Net()
        model_name = args.name_vgg
    elif args.resnet18:
        net = ResNet18()
        model_name = args.name_res18
    elif args.resnet34:
        net = ResNet34()
        model_name = args.name_res34
    elif args.resnet50:
        net = ResNet50()
        model_name = args.name_res50
    elif args.resnet101:
        net = ResNet101()
        model_name = args.name_res101
    elif args.resnet152:
        net = ResNet152()
        model_name = args.name_res152

    # 交叉熵损失函数
    criterion = nn.CrossEntropyLoss()

    # SGD优化器
    optimizer = optim.SGD(net.parameters(),
                          lr=args.learning_rate,
                          momentum=args.sgd_momentum,
                          weight_decay=args.weight_decay)

    # 余弦退火调整学习率
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=150)

    # 模型的参数保存路径
    model_path = os.path.join(args.model_path, model_name)

    # 启动训练
    if args.do_train:
        print("Training...")

        trainer = Trainer(net, criterion, optimizer, scheduler,
                          dataSet.train_loader, dataSet.test_loader,
                          model_path, args)

        trainer.train(epochs=args.epoch)
        # t.save(net.state_dict(), model_path)

    # 启动测试,如果--do_train也出现,则用刚刚训练的模型进行测试
    # 否则就使用已保存的模型进行测试
    if args.do_eval:
        if not args.do_train and not os.path.exists(model_path):
            print(
                "Sorry, there's no saved model yet, you need to train first.")
            return
        # --do_eval
        if not args.do_train:
            checkpoint = t.load(model_path)
            net.load_state_dict(checkpoint['net'])
            accuracy = checkpoint['acc']
            epoch = checkpoint['epoch']
            print("Using saved model, accuracy : %f  epoch: %d" %
                  (accuracy, epoch))
        tester = Tester(dataSet.test_loader, net, args)
        tester.test()

    if args.show_model:
        if not os.path.exists(model_path):
            print(
                "Sorry, there's no saved model yet, you need to train first.")
            return
        show_model(args)

    if args.do_predict:
        device = t.device("cuda" if t.cuda.is_available() else "cpu")
        checkpoint = t.load(model_path, map_location=device)
        net.load_state_dict(checkpoint['net'])
        predictor = Predictor(net, classes)
        img_path = 'test'
        img_name = [os.path.join(img_path, x) for x in os.listdir(img_path)]
        for img in img_name:
            predictor.predict(img)
示例#12
0
def main(args):

    check_path(args)

    # CIFAR-10的全部类别,一共10类
    classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse',
               'ship', 'truck')

    # 数据集
    data_builder = DataBuilder(args)
    dataSet = DataSet(data_builder.train_builder(),
                      data_builder.test_builder(), classes)

    # 选择模型
    if args.lenet:
        net = LeNet()
        model_name = args.name_le
    elif args.vgg:
        net = Vgg16_Net()
        model_name = args.name_vgg
    else:
        raise "Sorry, you can only select LeNet or VGG."

    # 交叉熵损失函数
    criterion = nn.CrossEntropyLoss()

    # SGD优化器
    optimizer = optim.SGD(net.parameters(),
                          lr=args.learning_rate,
                          momentum=args.sgd_momentum,
                          weight_decay=args.weight_decay)

    # 模型的参数保存路径,默认为 "./model/state_dict"
    model_path = os.path.join(args.model_path, model_name)

    # 指定在GPU / CPU上运行程序
    device = t.device("cuda:0" if (
        t.cuda.is_available() and not args.no_cuda) else "cpu")

    # 启动训练
    if args.do_train:
        print("Training...")
        trainer = Trainer(net, criterion, optimizer, dataSet.train_loader,
                          args)
        trainer.train(epochs=args.epoch)
        # 只保存模型参数
        t.save(net.state_dict(), model_path)

    # 启动测试
    if args.do_eval:
        if not os.path.exists(model_path):
            print(
                "Sorry, there's no saved model yet, you need to train first.")
            return
        print("Testing...")
        device = t.device("cuda:0" if t.cuda.is_available() else "cpu")
        net.load_state_dict(t.load(model_path, map_location=device))
        # net.eval()
        tester = Tester(dataSet.test_loader, net, args)
        tester.test()

    if args.show_model:
        if not os.path.exists(model_path):
            print(
                "Sorry, there's no saved model yet, you need to train first.")
            return
        show_model(args)

    if args.do_predict:
        net.load_state_dict(t.load(model_path, map_location=device))
        predictor = Predictor(net, classes)
        # img_path = 'test'
        # img_name = [os.path.join(img_path, x) for x in os.listdir(img_path)]
        # for img in img_name:
        #     predictor.predict(img)
        img_path = 'test/cat0.jpg'
        predictor.predict(img_path)
示例#13
0
import argparse
import os
from pathlib import Path
from util import prepare_manual_clusters, check_path, check_file

parser = argparse.ArgumentParser(
    description="Manually cluster the California housing market data")
parser.add_argument(
    "--source_file",
    type=lambda s: Path(os.path.expanduser(s)),
    required=True,
    help="The path to for the raw data",
)
parser.add_argument(
    "--sink_path",
    type=lambda s: os.path.expanduser(s),
    required=True,
    help="The path for the processed data to be saved",
)
args = parser.parse_args()

check_file(args.source_file)
check_path(args.sink_path)
prepare_manual_clusters(args.source_file, args.sink_path)
示例#14
0
from model import LeNet5_2neurons
from data import set_up_data
from util import check_path
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt

colors = [
    "red", "blue", "black", "yellow", "green", "yellowgreen", "gold",
    "royalblue", "peru", "purple"
]
pretrained = "train_baseline_lenet5/trained_weights_*2neurons/w*/*E21S0*.pth"
pretrained = check_path(pretrained)
gpu_id = 4  # change your GPU here
net = LeNet5_2neurons(pretrained).cuda(gpu_id)

train_loader, num_train, test_loader, num_test = set_up_data("MNIST", 500)
for step, (img, label) in enumerate(train_loader):
    print("train step %s / %s" % (step, len(train_loader)))
    img = img.cuda(gpu_id)
    feat = net.forward_2neurons(img)
    feat = feat.data.cpu().numpy()
    label = label.data.cpu().numpy()
    for x, y in zip(feat, label):
        plt.scatter(x[0], x[1], color=colors[y])
plt.savefig("./mnist_trainset_feat_visualization.jpg")
plt.close()

for step, (img, label) in enumerate(test_loader):
    print("test step %s / %s" % (step, len(test_loader)))
    img = img.cuda(gpu_id)
示例#15
0
    "MNIST_2neurons":
    "train_baseline_lenet5/trained_weights_*2neurons/w*/*E21S0*.pth",
    "CIFAR10":
    "../../ZeroShot*/Pretrained/CIFAR10/WRN-16-2/last.pth.tar",  # "models/model_best.pth.tar",
}
assert (args.num_se == 1)
assert (args.num_dec == 1)
assert (args.mode in AutoEncoders.keys())
assert (args.dataset in ["MNIST", "CIFAR10", "CIFAR100"])
if args.e1 == None:
    if "CIFAR" in args.dataset:
        args.e1 = pretrained_be_path["CIFAR10"]
    elif args.dataset == "MNIST":
        key = "MNIST" + args.which_lenet
        args.e1 = pretrained_be_path[key]
args.e1 = check_path(args.e1)
args.e2 = check_path(args.e2)
args.pretrained_dir = check_path(args.pretrained_dir)
args.adv_train = int(args.mode[-1])
num_channel = 1 if args.dataset == "MNIST" else 3

# Set up directories and logs, etc.
TimeID, ExpID, rec_img_path, weights_path, log = set_up_dir(
    args.project_name, args.resume, args.CodeID)
logprint = LogPrint(log)
args.ExpID = ExpID

if __name__ == "__main__":
    # Set up model
    AE = AutoEncoders[args.mode]
    ae = AE(args).cuda()
示例#16
0
文件: routes.py 项目: inovex/ispark
def get_new_kernel_specs():
    logger.debug("Adding a new Kernel...")
    print(sys.version)
    newKernelSpec = request.form.to_dict()

    for s in newKernelSpec:
        newKernelSpec[s] = "{0}".format(newKernelSpec[s])

    #step 1, names and paths for folders are prepared
    kernel_path, conda_env_name, folder_name = util.prepare_paths_and_names(
        newKernelSpec['kernelEnvironment-displayName'])

    if util.check_path(kernel_path):
        logger.debug("Folder exists already. Aborting...")
        return "Folder exists already. Aborting..."

    #step 2, creating actual folders
    prepare_folders_success = util.prepare_folders(kernel_path, conda_env_name)

    if prepare_folders_success == False:
        util.clean_up_folders(kernel_path)
        logger.debug("Error while creating folder structure")
        return "Error while creating folder structure"

    #step 3, creating a yaml file for the conda env, dict with params from UI as input
    create_yaml_success = util.create_conda_env_yaml(
        folder_name, conda_env_name,
        newKernelSpec['kernelEnvironment-extraCondaChannels'],
        newKernelSpec['kernelEnvironment-language'],
        newKernelSpec['kernelEnvironment-extraCondaPackages'],
        newKernelSpec['kernelEnvironment-extraPIPPackages'])

    if create_yaml_success == False:
        util.clean_up_folders(kernel_path)
        logger.debug("Error while creating yml env file for Conda")
        return "Error while creating yml env file for Conda"

    #step 4, installing conda env from created yaml
    install_and_zip_env_success = util.create_conda_env_from_yaml(
        kernel_path, conda_env_name)

    if install_and_zip_env_success == False:
        util.clean_up_folders(kernel_path)
        logger.debug("Error while install & zip environment")
        return "Error while install & zip environment"

    #step 5, creating the JSON file (copy and modify a template)
    create_kernel_json__success = util.prepare_kernel_specs_json(
        newKernelSpec, kernel_path, conda_env_name)

    if create_kernel_json__success == False:
        logger.debug("Error with creating kernel JSON file")
        util.clean_up_folders(kernel_path)
        return "Error with creating kernel JSON file"

    #step 6, copy other files, e.g. launcher or run.sh
    util.provide_rest_files(kernel_path,
                            newKernelSpec['kernelEnvironment-language'])

    print("Adding kernel finished...")

    return "finish"
示例#17
0
def main():
    argparser = argparse.ArgumentParser()
    argparser.add_argument('--load-model', default=None)
    argparser.add_argument('-e', '--num_epoch', type=int, default=5)
    argparser.add_argument('-t', '--test', default=False, action='store_true')
    argparser.add_argument('--pt',
                           default=False,
                           action='store_true',
                           help='prototype mode')
    argparser.add_argument('-b', '--batchsize', type=int, default=32)
    argparser.add_argument('--log-interval', type=int, default=10)
    argparser.add_argument('--save-interval', type=int, default=100)
    argparser.add_argument('-r',
                           '--restore',
                           default=False,
                           action='store_true',
                           help='restore from checkpoint')
    argparser.add_argument('--ckpt', default='saved_model/sentiment_ckpt.pth')
    argparser.add_argument('--save', default='saved_model/sentiment.pth')
    args = argparser.parse_args()

    logging.info('reading data')
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    trainfile = 'data/image-sentiment-polarity-all.csv'
    testfile = 'data/image-sentiment-polarity-test.csv'
    # trainfile = 'data/visual_sentiment_train.csv'
    # testfile = 'data/visual_sentiment_test.csv'

    img_dir = 'data/polarity_image/'
    train_data = pd.read_csv(trainfile, dtype={'id': int})
    test_data = pd.read_csv(testfile, dtype={'id': int})
    train_data = filter_sentiment(train_data, img_dir)
    test_data = filter_sentiment(test_data, img_dir)

    logging.info(
        'number of training data:{}, number of testing data:{}'.format(
            len(train_data), len(test_data)))

    if args.pt:
        train_data = train_data[:1000]
        test_data = test_data[:100]

    logging.info('building model...')
    load_model = args.load_model
    if args.load_model is None and args.restore and os.path.exists(args.ckpt):
        load_model = args.ckpt
    sentiment_trainer = VisualSentimentTrainer(train_data, test_data, img_dir,
                                               args.batchsize, load_model,
                                               device)
    check_path('saved_model')
    if args.test:
        sentiment_trainer.test()
    else:
        logging.info('start traning')
        for e in range(args.num_epoch):
            sentiment_trainer.train_epoch(e + 1, args.log_interval,
                                          args.save_interval, args.ckpt)
            sentiment_trainer.scheduler.step()
            sentiment_trainer.test()
            sentiment_trainer.save_model(args.ckpt)
        sentiment_trainer.save_model(args.save)
示例#18
0
    def __init__(self):

        self.headers = util.headers
        self.base_url = util.parse_friends_url()
        util.check_path('friends')
        print('开始获取好友列表,并把文件保存到 friends 文件夹')