예제 #1
0
def zip():
    png_zip = str(uuid.uuid1()) + '.zip'
    startdir = os.path.join(os.getcwd(), 'PNG')
    file_news = os.path.join(os.getcwd(), 'static', png_zip)
    ZipFileHelper.zip_ya(startdir, file_news)
    FileHelper.clearUploadedData()
    return png_zip
예제 #2
0
def go_config():
    spectrum = request.args.get('spectrum')
    file_location = os.path.join(os.path.dirname(__file__), 'CSV\\uploads')
    png_location = os.path.join(os.path.dirname(__file__), 'PNG')
    FileHelper.create_csv_folder(file_location, png_location)
    return render_template('config.html',
                           spectrum=spectrum,
                           file_location=file_location)
예제 #3
0
 def get_csv_lst(cls, file_path: str) -> list:
     """
     :param file_path:   未被处理过的文件夹
     :return:   csv_list ---> list
     """
     csv_lst = FileHelper.get_csv_list(file_path)
     return csv_lst
예제 #4
0
    def check_everyday_with_merge_domain(cls):
        """
        对每天更新的domain数据进行验证去重
        每天的数据与之前合并后的数据校验
        """
        log.info("开始与历史domain校验去重......")
        need_check_domain_txt_lst = FileHelper.get_txt_list(config.DATA_DIR)
        check_lst = []
        check_txt_dir = config.MERGE_DIR
        with open(check_txt_dir, 'r', encoding='utf-8') as fp:
            for line in fp.readlines():
                check_lst.append(line.strip())

        for lst in need_check_domain_txt_lst:
            is_checked = False
            _d = re.search("\d{8}", lst).group()
            is_cleaned = cls.check_record(_d, is_checked)
            if is_cleaned:
                log.warning("文件 【{}】 已经处理过!".format(_d))
                continue
            else:
                log.info("文件 【{}】 开始处理!".format(_d))
                with open(lst, 'r', encoding='utf-8') as old_reader:
                    check_lst_lines = old_reader.readlines()
                with open(lst, 'w', encoding='utf-8') as new_reader:
                    for line in check_lst_lines:
                        if line not in check_lst:
                            new_reader.write(line)
                        else:
                            log.warning("重复domain: %s" % line)

        log.info("验证去重结束!")
예제 #5
0
def make_chart():
    try:
        result = []
        data = json.loads(request.args.get('data'))
        spectrum = request.args.get('spectrum')
        path = os.path.join(os.path.join(os.getcwd(), 'testItemList'), spectrum)
        for item in data:
            result.append(FileHelper.get_chart_data(path, item['file_name'], item['file_path'], item['config'], item['overlay']))
        for index_1 in result:
            count = 0
            for index_2 in index_1:
                count = count + 1
                # if count == 8:
                #     config = index_2['config']
                #     overlay = index_2['overlay']
                #     csv_name = index_2['csv']
                #     name = index_2['name']
                #     categories = index_2['x']
                #     series = index_2['data']
                #     PhantomjsHelper.process_json(csv_name, name, categories, series, config, overlay)

                config = index_2['config']
                overlay = index_2['overlay']
                csv_name = index_2['csv']
                name = index_2['name']
                categories = index_2['x']
                series = index_2['data']
                PhantomjsHelper.process_json(csv_name, name, categories, series, config, overlay)
        png_zip = PhantomjsHelper.createPNG()
        result = {'state': 'success','png_zip': png_zip}
    except BaseException as exp:
        result = {'state': 'error'}
    jsonstr = json.dumps(result)
    return jsonstr
예제 #6
0
파일: text.py 프로젝트: c24b/clean_crawtext
 def __init__(self, language='en'):
     # TODO replace 'x' with class
     # to generate dynamic path for file to load
     if not language in self._cached_stop_words:
         path = os.path.join('stopwords', 'stopwords-%s.txt' % language)
         self._cached_stop_words[language] = set(FileHelper.loadResourceFile(path).splitlines())
     self.STOP_WORDS = self._cached_stop_words[language]
예제 #7
0
def modify_blog():
    current_user = auth_login_required()  # check token
    if type(current_user) is dict:
        return jsonify(current_user)
    title = request.json.get('title')
    body = request.json.get('body')
    amount = request.json.get('amount')
    tags = request.json.get('tag')
    description = request.json.get('description')
    if title is None:
        # missing arguments
        result = 0
        msg = "missing arguments"
        return jsonify({'result': result, 'msg': msg})
    current_blog = Blog.query.filter_by(title=title, userid=current_user.id)
    if body:
        blog_txt = os.path.join(os.path.join(os.getcwd(), 'blogs'),
                                '{0}_{1}.txt'.format(title, str(uuid1())))
        FileHelper.saveFile(blog_txt, body)
        current_blog.body = ulord_transmitter.upload(blog_txt)
    if amount:
        current_blog.amount = amount
    if tags:
        current_blog.tag = tags
    if description:
        current_blog.description = description

    db.session.add(current_blog)
    # TODO publish
    # init data schema
    data = ulord_helper.ulord_publish_data
    data['author'] = current_user.username
    data['title'] = current_blog.title
    data['tag'] = current_blog.tag
    data['ipfs_hash'] = current_blog.body
    data['price'] = current_blog.amount
    data['pay_password'] = current_user.pay_password
    data['description'] = current_blog.description
    current_blog.claimID = ulord_helper.publish(data)
    if current_blog.claimID:
        db.session.commit()
        return jsonify({'result': 1, 'msg': 'None'})
    else:
        return jsonify({
            'result': 0,
            'msg': "error publish to the UlordPlatform"
        })
예제 #8
0
    def scan_domain_write_txt(cls, domains: list, dir_name: str):
        """

        :param domains:      扫描hbase后的domains
        :param dir_name:     每次扫描后的结果存放地址
        """
        abspath = FileHelper.make_dir(dir_name)
        with open(abspath, 'a', encoding='utf-8') as fp:
            for domain in domains:
                fp.write(domain)
예제 #9
0
def createPNG():
    phantomjs_path = os.path.join(os.getcwd(), 'utils', 'phantomJS',
                                  'phantomjs.exe')
    highcharts_convert_path = os.path.join(os.getcwd(), 'utils', 'phantomJS',
                                           'highcharts-convert.js')
    rootdir = os.path.join(os.getcwd(), 'utils', 'phantomJS', 'JSON')
    list = os.listdir(rootdir)
    for i in range(0, len(list)):
        json_path = os.path.join(rootdir, list[i])
        json_list = os.listdir(json_path)
        for j in range(0, len(json_list)):
            json_file_path = os.path.join(json_path, json_list[j])
            png_path = os.path.join(os.getcwd(), 'PNG', list[i],
                                    json_list[j]) + '.png'
            command = phantomjs_path + " " + highcharts_convert_path + " -infile " + "\"" + json_file_path + "\"" + ' -outfile ' + "\"" + png_path + "\""
            os.system(command)
    png_zip = str(uuid.uuid1()) + '.zip'
    startdir = os.path.join(os.getcwd(), 'PNG')
    file_news = os.path.join(os.getcwd(), 'static', png_zip)
    ZipFileHelper.zip_ya(startdir, file_news)
    FileHelper.clearUploadedData()
    return png_zip
예제 #10
0
 def merge_txt(cls):
     """
     合并历史去重完成的txt文件
     此后的新增domain在此文件中进行判重过滤
     """
     log.info("过滤后的txt文件开始合并.....")
     txt_files_lst = FileHelper.get_txt_list(config.DATA_DIR)
     dir_name = config.MERGE_DIR
     abspath = FileHelper.make_dir(dir_name)
     for lst in txt_files_lst:
         is_merged = False
         _d = re.search("\d{8}", lst).group()
         is_cleaned = cls.merge_record(_d, is_merged)
         if is_cleaned:
             log.warning("文件 【{}】 已经处理过!".format(_d))
             continue
         else:
             log.info("文件 【{}】 开始处理!".format(_d))
             with open(abspath, 'a', encoding='utf-8') as ff:
                 with open(lst, 'r', encoding='utf-8') as fm:
                     reader = fm.readlines()
                     for line in reader:
                         ff.write(line)
     log.info("合并完成!")
예제 #11
0
    def scanner(cls):
        """
        比较查询domain
        """
        log.info("开始扫描whois......")
        need_scan_domain_txt_lst = FileHelper.get_txt_list(config.DATA_DIR)
        not_in_hbase_domains = []
        for lst in need_scan_domain_txt_lst:
            is_scanned = False
            _d = re.search("\d{8}", lst).group()
            is_scanned = cls.scan_record(_d, is_scanned)
            if is_scanned:
                log.warning("文件 【{}】 已经处理过!".format(_d))
                continue
            else:
                log.info("文件 【{}】 开始处理!".format(_d))
                try:
                    with open(lst, 'r', encoding='utf-8') as old_reader:
                        scan_lst_domain_lines = old_reader.readlines(
                        )  # 取出的本地过滤后的domains
                    for domain in scan_lst_domain_lines:
                        log.info("正在查询:【{}】".format(domain))
                        dom_key = domain2rowkey(domain.strip())
                        item = cls.table.row(dom_key)
                        if item:
                            continue
                        else:
                            not_in_hbase_domains.append(domain)

                    cls.scan_domain_write_txt(
                        not_in_hbase_domains,
                        dir_name=NOT_IN_HBASE_DOMAIN.format(_d, _d))
                    log.info("扫描hbase得到domain个数为: {}".format(
                        len(not_in_hbase_domains)))
                    log.info("文件【{}】扫描完成!\n".format(_d))
                    not_in_hbase_domains.clear()

                except Exception as e:
                    log.error(str(e))

        log.info("本次hbase扫描结束!")
예제 #12
0
 def domain_write_to_txt(cls, domains, dir_name):
     abspath = FileHelper.make_dir(dir_name)
     with open(abspath, 'a', encoding='utf-8') as fp:
         for domain in domains:
             fp.write(domain + '\n')
예제 #13
0
 def __init__(self, language='en'):
     if language not in self._cached_stop_words:
         path = os.path.join('text', 'stopwords-%s.txt' % language)
         self._cached_stop_words[language] = \
             set(FileHelper.loadResourceFile(path).splitlines())
     self.STOP_WORDS = self._cached_stop_words[language]
예제 #14
0
def blog_publish():
    current_user = auth_login_required()  # check token
    if type(current_user) is dict:
        return jsonify(current_user)
    title = request.json.get('title')
    body = request.json.get('body')
    amount = request.json.get('amount')
    tags = request.json.get('tag')
    description = request.json.get('description')
    if title is None or body is None or amount is None:
        # missing arguments
        return jsonify({'result': 0, 'msg': "missing arguments"})
    if Blog.query.filter_by(title=title,
                            userid=current_user.id).first() is not None:
        # existing title
        return jsonify({'result': 0, 'msg': "existing title"})
    if current_user.balance < 1:
        # less balance
        return jsonify({'result': 0, 'msg': "Insufficient amount"})
    # TODO upload body to IPFS
    try:
        body_txt = os.path.join(os.path.join(os.getcwd(), 'blogs'),
                                '{}.txt'.format(title))
    except:
        print("Doesn't support chinese.Using uuid")
        body_txt = os.path.join(os.path.join(os.getcwd(), 'blogs'),
                                '{}.txt'.format(str(uuid1())))
    if FileHelper.saveFile(body_txt, body):
        file_hash = ulord_transmitter.upload(body_txt)
        try:
            os.remove(body_txt)
        except:
            print("Error rm {}".format(body_txt))

        # TODO publish
        # init data schema
        data = ulord_helper.ulord_publish_data
        data['author'] = current_user.username
        data['title'] = title
        data['tag'] = tags
        data['ipfs_hash'] = file_hash
        data['price'] = amount
        data['pay_password'] = current_user.pay_password
        data['description'] = description
        claimID = ulord_helper.publish(data)
        if claimID:
            # cost balance
            current_user.balance -= 1
            # save blog to local DB
            new_blog = Blog(id=str(uuid1()),
                            title=title,
                            amount=amount,
                            views=0)
            if tags:
                for tag in tags:
                    if Tag.query.filter_by(tagname=tag).first() is None:
                        new_blog.tags.append(Tag(tag))
            if description:
                new_blog.description = description
            new_blog.body = file_hash
            new_blog.date = int(time.time())
            new_blog.userid = current_user.id
            new_blog.claimID = claimID
            db.session.add(new_blog)
            db.session.commit()
            return jsonify({'result': 1, 'msg': 'None'})
        else:
            return jsonify({
                'result': 0,
                'msg': "error publish to the UlordPlatform"
            })
    else:
        return jsonify({'result': 0, 'msg': "save file failed"})
예제 #15
0
def blog_publish():
    current_user = auth_login_required()  # check token
    if type(current_user) is dict:
        return jsonify(current_user)
    title = request.json.get('title')
    body = request.json.get('body')
    amount = request.json.get('amount')
    tags = request.json.get('tag')
    description = request.json.get('description')
    if title is None or body is None or amount is None:
        # missing arguments
        return jsonify({'errcode': 60100, 'reason': "缺少参数"})
    # TODO upload body to IPFS
    start = time.time()
    try:
        body_txt = os.path.join(os.path.join(os.getcwd(), 'blogs'),
                                '{}.txt'.format(title))
    except:
        app.logger.warn("Doesn't support chinese.Using uuid")
        body_txt = os.path.join(os.path.join(os.getcwd(), 'blogs'),
                                '{}.txt'.format(str(uuid1())))
    if FileHelper.saveFile(body_txt, body):
        end_save = time.time()
        app.logger.debug({
            'start': start,
            'end_save': end_save,
            'total': end_save - start
        })
        file_hash = ulord_transmitter.upload(body_txt)
        end_upload = time.time()
        app.logger.debug({
            'start': end_save,
            'end_upload': end_upload,
            'total': end_upload - end_save
        })
        try:
            os.remove(body_txt)
            end_remove = time.time()
            app.logger.debug({
                'start': end_upload,
                'end_remove': end_remove,
                'total': end_remove - end_upload
            })
        except:
            app.logger.error("Error rm {}".format(body_txt))

        # TODO publish
        # init data schema
        data = ulord_helper.ulord_publish_data
        data['author'] = current_user.wallet
        data['title'] = title
        data['tag'] = tags
        data['udfs_hash'] = file_hash
        data['price'] = amount
        data['pay_password'] = current_user.pay_password
        data['description'] = description
        result = ulord_helper.publish(data)
        end_publish = time.time()
        app.logger.debug({
            'start': end_remove,
            'end_publish': end_publish,
            'total': end_publish - end_remove
        })
        return jsonify(result)
    else:
        return jsonify({'errcode': 60200, 'reason': "上传文件失败"})
예제 #16
0
def go_index_2():
    # file_location = os.path.join(os.path.dirname(__file__), 'CSV\\uploads')
    # del_file(file_location)
    FileHelper.clearUploadedData(True)
    return render_template('index.html')
예제 #17
0
def deleteFile():
    file_name = request.args.get('file_name')
    result = FileHelper.deleteFile(file_name)
    jsonstr = json.dumps(result)
    return jsonstr