示例#1
0
class MvList(object):
    def __init__(self):
        self.db = DBConfig()

    def __del__(self):
        self.db.closeDB()

    @classmethod
    def run(cls):
        """
        start get movie list
        :return:
        """
        mv_list_thread = MvListThread()
        mv_list_thread.run()

    def get_mv_list(self):
        """
        :return:
        """
        select_arr = {
            "table": "list"
        }
        mv_list = self.db.select(select_arr, is_close_db=False)
        return mv_list
示例#2
0
class MvImg(object):
    def __init__(self):
        self.db = DBConfig()

    def __del__(self):
        self.db.closeDB()

    def run(self):
        self.handle_data()

    def handle_data(self):
        data = self.get_img_list()
        thread_pool = ThreadPoolExecutor(max_workers=15)
        task_list = list()
        for item in data:
            task = thread_pool.submit(self.__handle_data, item)
            task_list.append(task)
            # break
        for i in as_completed(task_list):
            result = i.result()

    def __handle_data(self, item):
        url = "https://www.pelisplay.tv" + item['img_src']
        header = {
            # "Referer": "https://www.pelisplay.tv/",
            "User-Agent": getUserAgent(),
            "Accept": "image/webp,image/apng,image/*,*/*;q=0.8"
        }
        data = curlData(url, header=header)
        with open("static/images/{id}.jpg".format(id=item['id']), "wb") as f:
            try:
                data = data.encode("utf-8")
            except Exception as e:
                debug(e)
            f.write(data)
            self.__update_data(item)
            f.close()
        return {"code": 0}

    def __update_data(self, item):
        update_arr = {
            "table": "list",
            "set": {
                "img_status": 1
            },
            "condition": ['id={id}'.format(id=item['id'])]
        }
        lock.acquire()
        result = self.db.update(update_arr, is_close_db=False)
        lock.release()
        return result

    def get_img_list(self):
        select_arr = {
            "table": "list",
            "columns": ["id", "img_src"],
            "condition": ['img_status=0']
        }
        data = self.db.select(select_arr, is_close_db=False)
        return data
示例#3
0
class MvListData(object):
    def __init__(self):
        self.db = DBConfig()

    def __del__(self):
        self.db.closeDB()

    def run(self):
        self.handle()

    def handle(self):
        data = self.get_data()
        for item in data:
            self.__mv_data(item)

    def __mv_data(self, item):
        select_arr = {
            "table": "list",
            "condition": ["id={id}".format(id=item['list_id'])]
        }
        try:
            data = self.db.select(select_arr, is_close_db=False)[0]
        except Exception as e:
            debug(e)
            return
        sql = self.db.getInsertSql(data, "tmp_list")
        debug(sql)
        result = self.db.insert(sql, is_close_db=False)
        return result

    def get_data(self):
        select_arr = {
            "table": "tmp_content",
        }
        data = self.db.select(select_arr, is_close_db=False)
        return data
示例#4
0
class ClearNullData(object):
    def __init__(self):
        self.db = DBConfig()

    def __del__(self):
        self.db.closeDB()

    def run(self):
        self.handle()

    def handle(self):
        data = self.get_data()
        debug(data)
        self.del_list(data)
        self.del_content(data)

    def del_list(self, data):
        for item in data:
            self.__del_list(item)

    def __del_list(self, item):
        delete_arr = {
            "table": "list",
            "condition": ["id={id}".format(id=item['parent_id'])]
        }
        result = self.db.delete(delete_arr, is_close_db=False)
        return result

    def del_content(self, data):
        for item in data:
            self.__del_content(item)

    def __del_content(self, item):
        delete_arr = {
            "table": "content",
            "condition": ["parent_id={id}".format(id=item['parent_id'])]
        }
        result = self.db.delete(delete_arr, is_close_db=False)
        return result

    def get_data(self):
        select_arr = {
            "table": "content",
            "limit": [0, 10],
            "condition": ["video_src=''", "and", "url=''"]
        }
        data = self.db.select(select_arr, is_close_db=False)
        return data
示例#5
0
class GetImgUrlLarge(object):
    def __init__(self):
        self.db = DBConfig()

    def __del__(self):
        self.db.closeDB()

    def run(self):
        self.handle()

    def handle(self):
        data = self.get_data()
        for item in data:
            result = self.__handle(item)
            if result['result_1'] == 1 and result['result_2'] == 1 and result[
                    'result_3'] == 1:
                debug(item['img_url'])
            else:
                break

    def __handle(self, item):
        img_url = item['img_url']
        try:
            s = re.findall('squarethumbnails\/([\w\W]*.)', img_url)[0]
        except Exception as e:
            s = ''
            debug(e)
        if s == '':
            return
        s = 'http://www.laurainthekitchen.com/largethumbnails/' + s
        result = self.__update_data(s, item)
        return result

    def __update_data(self, s, item):
        update_arr_list = {
            "table": "list",
            "set": {
                "img_url_large": s,
                "status": 1
            },
            "condition": ["id={id}".format(id=item['id'])]
        }
        result_1 = self.db.update(update_arr_list, is_close_db=False)
        del update_arr_list['set']['status']
        update_arr_list['table'] = "tmp_list"
        result_2 = self.db.update(update_arr_list, is_close_db=False)
        update_arr_list['table'] = "content"
        update_arr_list['condition'] = ["list_id={id}".format(id=item['id'])]
        result_3 = self.db.update(update_arr_list, is_close_db=False)
        return {
            "result_1": result_1,
            "result_2": result_2,
            "result_3": result_3
        }

    def get_data(self):
        select_arr = {
            "table": "list",
            "columns": ["img_url", "id"],
            "condition": ["status=0"]
        }
        data = self.db.select(select_arr, is_close_db=False)
        return data
示例#6
0
class GetImages(object):
    def __init__(self):
        self.db = DBConfig()

    def __del__(self):
        self.db.closeDB()

    def run(self):
        self.handle()

    def handle(self):
        data = self.get_data()
        self.get_images(data, "large", img_url="img_url_large")

    @classmethod
    def start_thread(cls, data, fun, path, img_url, prefix):
        thread_pool = ThreadPoolExecutor(max_workers=15)
        task_list = list()
        result = list()
        for item in data:
            task = thread_pool.submit(fun, item, path, img_url, prefix)
            task_list.append(task)
        for i in as_completed(task_list):
            result.append(i.result())
        return result

    def get_images(self, data, path, img_url, prefix=""):
        self.start_thread(data, self.__get_images, path, img_url, prefix)

    def __get_images(self, item, path, img_url, prefix):
        page_resource = self.get_page_resource(prefix + item[img_url])
        with open(
                "static/images/{path}/{id}.jpg".format(path=path,
                                                       id=item['id']),
                "wb") as f:
            try:
                page_resource = page_resource.encode("utf-8")
            except Exception as e:
                debug(e)
            f.write(page_resource)
            f.close()
            update_data = {"status": 1}
            condition = ["id={id}".format(id=item['id'])]
            self.__update_data(update_data, "list", condition)

    @classmethod
    def get_page_resource(cls, url):
        data = curlData(url, open_virtual_ip=True)
        return data

    def __update_data(self, update_data, table, condition):
        update_arr = {
            "table": table,
            "set": update_data,
            "condition": condition
        }
        lock.acquire()
        self.db.update(update_arr, is_close_db=False)
        lock.release()

    def get_data(self):
        data = self.db.select(
            {
                "table": "list",
                "columns": ["id", "img_url", "img_url_large"],
                "condition": ["status=0"]
            },
            is_close_db=False)
        return data
示例#7
0
class GetVideoSrc(object):
    def __init__(self):
        self.cookie = {}
        # self.get_cookie()
        self.db = DBConfig()

    def __del__(self):
        self.db.closeDB()

    def run(self):
        data = self.get_content_list()
        self.handle_data(data)

    def handle_data(self, data):
        thread_pool = ThreadPoolExecutor(max_workers=15)
        task_list = list()
        for item in data:
            if item['url'] == '':
                continue
            else:
                task = thread_pool.submit(self.__handle_data, item)
                task_list.append(task)
                # self.__handle_data(item)
        for i in as_completed(task_list):
            result = i.result()

    def __handle_data(self, item):
        update_data = dict()
        update_data['status'] = 1
        update_data['video_src'] = self.__get_video_src(item)
        debug(update_data['video_src'])
        self.__update_data(item['id'], update_data)
        return {"code": 0}

    def __get_video_src(self, item):
        header = {
            # "Referer": "http://www.wyysdsa.com/",
            "User-Agent": getUserAgent(),
            # "Cache-Control": "max-age=0",
            # "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3"
        }
        # url = "http://zeus.pelisplay.tv/embed/vip.php?u=Q1A5NUZJM1VDTWlUTk8wTEFmWGNQZDhnbWRIcmt6UVU0VGIxakpXOUF4Mi9yZW51Zi9yaXZlcXFoYnlwL3picC5hYm1uem4uampqLy86ZmNnZ3U&fondo_requerido="
        # url = "https://nl.tan90.club/test/testHeader.html"
        data = curlData(url=item['url'], header=header, cookie=self.cookie)
        # with open("tmp/content_detail.txt", "rb") as f:
        #     data = f.read().decode("utf-8")
        #     f.close()
        try:
            src = re.findall("JSON\.parse\('([\w\W]*?)'\)\);", data)[0]
            src = src.replace("\\", "")
            src = json.loads(src)
            src = src[0]['file']
        except Exception as e:
            src = ""
            debug(e)
        return src

    def __update_data(self, content_id, update_data):
        update_arr = {
            "table": "content",
            "set": update_data,
            "condition": ['id={content_id}'.format(content_id=content_id)]
        }
        lock.acquire()
        result = self.db.update(update_arr, is_close_db=False)
        lock.release()
        return result

    def get_content_list(self):
        data = self.db.select({
            "table": "content",
            "columns": ['id', 'url'],
            "condition": ['status=0']
        }, is_close_db=False)
        return data

    def get_cookie(self):
        header = {
            "User-Agent": getUserAgent(),
            # "Cache-Control": "max-age=0",
            # "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3"
        }
        url = "https://www.pelisplay.tv/"
        self.cookie = getCookie(url, header=header)
        debug(self.cookie)
示例#8
0
class RecipeType(object):
    def __init__(self):
        self.db = DBConfig()

    def __del__(self):
        self.db.closeDB()

    def get(self):
        """
        :return:
        """
        select_arr = {"table": "type", "condition": ['nav_type=2']}
        data = self.db.select(select_arr, is_close_db=False)
        # check whether have any data, if not, get all category
        if not data:
            self.get_recipe_type()
            data = self.db.select(select_arr, is_close_db=False)
        return data

    def get_category(self):
        """
        :return:
        """
        select_arr = {"table": "type", "condition": ['status=0']}
        return self.db.select(select_arr, is_close_db=False)

    def get_recipe_type(self):
        """
        :return:
        """
        category_li = self.__handle_category()
        self.__handle_category_data(category_li)

    @classmethod
    def __handle_category(cls):
        """
        :return:
        """
        url = CommonFunc().generate_url()
        page_resource = curlData(url, open_virtual_ip=True)
        bs_data = BeautifulSoup(page_resource, "html.parser")
        category_ul = bs_data.find_all("ul", attrs={"class": "sub-menu"})
        # only get the next level's li(tag), not include offspring(need to add 'recursive=False')
        return category_ul[0].find_all("li", recursive=False)

    def __handle_category_data(self, category_li, handle_type=1, parent_id=0):
        """
        :param category_li:
        :param handle_type:
        :param parent_id:
        :return:
        """
        table_columns = (("id", "int"), ("name", "varchar"),
                         ("page_num", "longtext"), ("nav_type", "int"),
                         ("keyword", "varchar"), ("parent_id", "int"))
        for item in category_li:
            insert_arr = {"parent_id": 0, "nav_type": 2}
            try:
                href = item.find("a").attrs['href']
                try:
                    insert_arr['keyword'] = re.findall('category=([\w\W]*.)',
                                                       href)[0]
                except Exception as e:
                    debug(e)
                if handle_type == 2:
                    insert_arr['parent_id'] = parent_id
                if href == "#":
                    insert_arr['name'] = item.find("span").getText().strip()
                    insert_arr['nav_type'] = 1
                    sql = self.db.getInsertSql(insert_arr,
                                               "type",
                                               table_columns=table_columns)
                    lastest_id = self.db.insertLastId(sql, is_close_db=False)
                    if lastest_id == 0:
                        debug("get data error")
                        continue
                    self.__handle_category_data(item.find_all("li"), 2,
                                                lastest_id)
                else:
                    insert_arr['name'] = item.getText().strip()
                    sql = self.db.getInsertSql(insert_arr,
                                               "type",
                                               table_columns=table_columns)
                    self.db.insert(sql, is_close_db=False)
            except Exception as e:
                debug(e)
示例#9
0
class MvCategory(object):
    def __init__(self):
        self.db = DBConfig()

    def __del__(self):
        self.db.closeDB()

    def get_category(self):
        """
        :return:
        """
        select_arr = {"table": "type"}
        category = self.db.select(select_arr, is_close_db=False)
        if not category:
            return []
        return category

    def get(self):
        """
        :return:
        """
        # page_resource = self.get_data()
        with open("tmp/index_page.txt", "rb") as f:
            page_resource = f.read().decode("utf-8")
            f.close()
        bs = BeautifulSoup(page_resource, "html.parser")
        category_list = self.__get_category_list(bs)
        for item in category_list:
            self.handle_data(item)

    def handle_data(self, item):
        """
        :param item:
        :return:
        """
        insert_arr = dict()
        insert_arr['status'] = 0
        insert_arr['url'] = self.__get_category_url(item)
        insert_arr['img_src'] = self.__get_category_img_src(item)
        insert_arr['icon_img_src'] = self.__get_category_icon_img_src(item)
        insert_arr['name'] = self.__get_category_name(item)
        insert_arr['description'] = self.__get_category_description(item)
        if self.__save_date(insert_arr):
            debug("类型存储成功")
        else:
            debug("类型存储失败")

    def __save_date(self, insert_arr):
        """
        :param insert_arr:
        :return:
        """
        table_columns = (("id", "int"), ("img_src", "varchar"),
                         ("icon_img_src", "varchar"), ("url", "varchar"),
                         ("name", "varchar"), ("description", "text"))
        sql = self.db.getInsertSql(insert_arr,
                                   table="type",
                                   table_columns=table_columns)
        result = self.db.insert(sql, is_close_db=False)
        if result == 0:
            return False
        return True

    @classmethod
    def get_data(cls):
        """
        :return:
        """
        url = settings.DOMAIN
        data = curlData(url, open_virtual_ip=True)
        return data

    @classmethod
    def __get_category_list(cls, bs):
        """
        :param bs:
        :return:
        """
        category_list = bs.find_all("ul", attrs={"class": "owl-carousel"})
        try:
            category_list = category_list[0].find_all("li",
                                                      attrs={"class": "item"})
        except Exception as e:
            category_list = list()
            debug("类型列表获取失败,错误信息:{error}".format(error=e))
        return category_list

    @classmethod
    def __get_category_url(cls, item):
        """
        :param item:
        :return:
        """
        category_url = item.find("a")
        try:
            category_url = category_url.attrs['href']
        except Exception as e:
            category_url = ""
            debug("分类url链接获取失败,错误信息:{error}".format(error=e))
        return category_url

    @classmethod
    def __get_category_img_src(cls, item):
        """
        :param item:
        :return:
        """
        category_img_src = item.find("img")
        try:
            category_img_src = category_img_src.attrs['src']
        except Exception as e:
            category_img_src = ""
            debug("图片地址获取失败,错误信息:{error}".format(error=e))
        return category_img_src

    @classmethod
    def __get_category_icon_img_src(cls, item):
        """
        get icon img src
        :param item:
        :return:
        """
        category_icon_img_src = item.find("img")
        try:
            category_icon_img_src = category_icon_img_src.attrs['src']
        except Exception as e:
            category_icon_img_src = ""
            debug("icon图片地址获取失败,错误信息:{error}".format(error=e))
        return category_icon_img_src

    @classmethod
    def __get_category_name(cls, item):
        """
        get category name
        :param item:
        :return:
        """
        category_name = item.find("div", attrs={"class": "category-name"})
        try:
            category_name = category_name.get_text().strip()
        except Exception as e:
            category_name = ""
            debug("类型名获取失败,错误信息:{error}".format(error=e))
        return category_name

    @classmethod
    def __get_category_description(cls, item):
        """
        :param item:
        :return:
        """
        category_description = item.find(
            "div", attrs={"class": "category-description"})
        try:
            category_description = category_description.get_text().strip()
        except Exception as e:
            category_description = ""
            debug("类型描述获取失败,错误信息:{error}".format(error=e))
        return category_description
示例#10
0
class GetRecipeVideo(object):
    def __init__(self):
        self.db = DBConfig()

    def __del__(self):
        self.db.closeDB()

    def run(self):
        self.download()

    def download(self):
        data = self.get_tmp_content()
        self.__download(data)

    def __download(self, data):
        for item in data:
            url = "https://www.youtube.com/watch?v=%s" % item['video_id']
            debug("开始抓取:--> {video_id}".format(video_id=item['video_id']))
            try:
                youtube = YouTube(url)
                youtube.streams.filter(subtype="mp4").first().download(
                    "/Users/cpx/code/py/recipe/data/recipe/",
                    filename=item['video_id'])
                self.__update_data(item['id'])
            except Exception as e:
                debug(e)

    def __update_data(self, list_id):
        """
        :param list_id:
        :return:
        """
        update_arr = {
            "table": "tmp_content",
            "set": {
                "status": 1
            },
            "condition": ['id={list_id}'.format(list_id=str(list_id))]
        }
        result = self.db.update(update_arr, is_close_db=False)
        return result

    def get_tmp_content(self):
        data = self.db.select(
            {
                "table": "tmp_content",
                "columns": ['id', 'video_id'],
                "condition": ['status=0']
            },
            is_close_db=False)
        return data

    def handle_data(self):
        self.move_data()
        # data = self.get_data()

    def get_data(self):
        select_arr = {"table": "recipe_content"}
        data = self.db.select(select_arr, is_close_db=False)
        return data

    def move_data(self):
        category = self.get_category()
        for item in category:
            data = self.get_list_by_type_id(item['id'])
            self.__move_data(data)

    def __move_data(self, data):
        for item in data:
            content = self.get_content_by_list_id(item['id'])
            try:
                content = content[0]
                content['status'] = 0
                self.__insert_data(content)
            except Exception as e:
                debug(e)

    def __insert_data(self, insert_arr):
        sql = self.db.getInsertSql(insert_arr, "tmp_content")
        result = self.db.insert(sql, is_close_db=False)
        return result

    def get_list_by_type_id(self, type_id):
        data = self.db.select(
            {
                "table": "list",
                "condition":
                ['recipe_type_id={type_id}'.format(type_id=type_id)],
                "limit": [0, 20]
            },
            is_close_db=False)
        return data

    def get_content_by_list_id(self, list_id):
        data = self.db.select(
            {
                "table": "content",
                "columns": ['video_id', 'list_id'],
                "condition":
                ["list_id={list_id}".format(list_id=str(list_id))]
            },
            is_close_db=False)
        return data

    def get_category(self):
        data = self.db.select({
            "table": "type",
            "condition": ['keyword<>""']
        },
                              is_close_db=False)
        return data
示例#11
0
class RecipeListSpider(object):
    def __init__(self):
        self.db = DBConfig()
        self.recipe_type = RecipeType()

    def __del__(self):
        self.db.closeDB()

    def run(self):
        """
        start get recipe list
        :return:
        """
        self.get_recipe_list()

    def get_list(self, condition=[], limit=[]):
        """
        :return:
        """
        select_arr = {"table": "list", "condition": ['status=0']}
        data = self.db.select(select_arr, is_close_db=False)
        return data

    def get_category(self):
        """
        get a category's all page num
        :return:
        """
        return self.recipe_type.get_category()

    def get_recipe_list(self):
        """
        :return:
        """
        self.__get_recipe_list()

    def __get_recipe_list(self):
        """
        :return:
        """
        info = self.get_category()
        for item in info:
            self.__get_recipe_list_child(item)

    def __set_status(self, category_id):
        """
        :param category_id:
        :return:
        """
        update_arr = {
            "table": "type",
            "set": {
                "status": 1
            },
            "condition": ['id={category_id}'.format(category_id=category_id)]
        }
        result = self.db.update(update_arr, is_close_db=False)
        if result == 0:
            debug("更新状态出错, 出错原因:unknown")
            return

    def __get_recipe_list_child(self, info):
        """
        :param info:
        :return:
        """
        try:
            page_list = json.loads(info['page_num'])['page_list']
        except Exception as e:
            debug(e)
            self.__set_status(info['id'])
            return
        category = info['keyword']
        if category == "":
            self.__set_status(info['id'])
            return
        page_list = page_list.split(",")
        recipe_list_thread = RecipeListThread(page_list, info)
        recipe_list_thread.run()
        self.__set_status(info['id'])
示例#12
0
class GetConstitutionList(object):
    def __init__(self):
        # 数据库连接全局变量
        # self.ws_db = phoenix_db.DBConfig()
        self.count = 0
        self.db = DBConfig()

    def __del__(self):
        self.db.closeDB()
        debug("本次一共获取到了%s条数据" % str(self.count))
        # self.ws_db.closeDB()

    def getAllConstitutionStart(self):
        try:
            record = self.db.select({"table": "constitutions_record", "condition": ['is_over=0']}, is_close_db=False)
            next_page = record[0]['page']
        except:
            next_page = 1
        while True:
            try:
                data = self.getConstitutionList(next_page)
            except:
                debug("内容获取出错,重新获取")
                continue
            # 获取下一页的页码
            try:
                tmpNextPage = re.findall('href="javascript:toUpDownPage\(\'(\d+)\'\);">下一页<\/a>', data)[0]
                debug("当前的页码是:%s" % str(next_page))
                debug("获取到的下一页页码是:%s" % str(tmpNextPage))
            except:
                debug("下一页的页码获取出错")
                break
            self.getAllConstitutionHandle(data, "北京")
            updatetArr = {
                "table": "constitutions_record",
                "condition": ['id=2'],
                "set": {
                    "page": tmpNextPage,
                    "is_over": 0
                }
            }
            self.db.update(updatetArr, is_close_db=False)
            if int(next_page) >= int(tmpNextPage):
                break
            else:
                next_page = tmpNextPage
        debug("本次抓取完毕")
        updatetArr = {
            "table": "constitutions_record",
            "condition": ['id=2'],
            "set": {
                "is_over": 1
            }
        }
        self.db.update(updatetArr, is_close_db=False)

    def getAllConstitution(self, fun):
        """
        获取所有法律法规
        :param fun:
        :return:
        """
        url = "http://210.82.32.100:8081/FLFG/"
        dcap = dict(DesiredCapabilities.FIREFOX)
        ip = virtualIp()
        dcap['phantomjs.page.customHeaders.X-FORWARDED-FOR'] = ip
        dcap['phantomjs.page.customHeaders.CLIENT-IP'] = ip
        firefox_options = Options()
        firefox_options.add_argument("--headless")
        firefox_options.add_argument('--disable-gpu')
        driver = webdriver.Firefox(firefox_options=firefox_options, desired_capabilities=dcap)
        driver.get(url)
        sleep(3)
        cloumn = driver.find_elements_by_class_name("cloumn")
        try:
            cloumn = cloumn[3]
        except:
            while True:
                try:
                    cloumn = cloumn[3]
                    break
                except:
                    sleep(1)
        cloumntitle = cloumn.find_elements_by_class_name("threecloumntitle")
        cloumntitleLength = len(cloumntitle)
        current_handle = driver.current_window_handle
        for i in range(cloumntitleLength):
            try:
                list_a = cloumntitle[i].find_elements_by_tag_name("a")
            except:
                list_a = list()
                debug("省份列表获取出错")
            list_a_len = len(list_a)
            for k in range(list_a_len):
                # 获取省份名
                try:
                    province = list_a[k].text
                except:
                    debug("省份获取出错,继续执行,省份标记锚点为" + str(k))
                    province = str(k)
                debug(province + ":")
                try:
                    list_a[k].click()
                except:
                    debug("点击失败")
                sleep(3)
                all_handles = driver.window_handles
                sleep(3)
                for handle in all_handles:
                    if handle != current_handle:
                        driver.switch_to_window(handle)
                        sleep(1)
                        data = driver.page_source
                        htmlData = BeautifulSoup(data, "html.parser")
                        try:
                            url = htmlData.find_all("iframe", attrs={"id": "rightpage"})[0].attrs['src']
                            url = re.sub("(有效)", "有效,已被修正,失效", url)
                            driver.execute_script("location.href='" + url + "'")
                            sleep(3)
                            # 进行点击50篇每页
                            try:
                                driver.find_element_by_id("span_pagesize_50").click()
                                sleep(3)
                            except:
                                pass
                            data = driver.page_source
                            # 获取每一页的text以便稍后判断
                            try:
                                nextPage = re.findall(r'<a[\w\W]*?href="([\w\W]*?)">下一页', data)
                                nextPage = re.findall(r'<a[\w\W]*?href="([\w\W]*?)\)', nextPage[0])
                                nextPage = re.findall("(\d+)", nextPage[1])
                                nextPage = nextPage[0]
                            except:
                                nextPage = 0
                            # 处理数据
                            while True:
                                tmpPage = int(nextPage) - 1
                                debug("第" + str(tmpPage) + "页:")
                                fun(data, province)
                                nextPageElement = driver.find_element_by_class_name("td")
                                try:
                                    nextPageElement = nextPageElement.find_elements_by_tag_name("a")[1]
                                    nextPageElement.click()
                                    sleep(3)
                                    data = driver.page_source
                                    try:
                                        tmpNextPage = re.findall(r'<a[\w\W]*?href="([\w\W]*?)">下一页', data)
                                        tmpNextPage = re.findall(r'<a[\w\W]*?href="([\w\W]*?)\)', tmpNextPage[0])
                                        tmpNextPage = re.findall("(\d+)", tmpNextPage[1])
                                        tmpNextPage = tmpNextPage[0]
                                        if nextPage == tmpNextPage:
                                            break
                                        else:
                                            nextPage = tmpNextPage
                                    except:
                                        break
                                except:
                                    break
                            # 点击下一页
                        except Exception as e:
                            debug(e)
                        debug("")
                        driver.close()
                        sleep(1)
                        driver.switch_to_window(all_handles[0])
                        sleep(2)
        driver.quit()

    def getConstitutionList(self, cur_page):
        url = "http://210.82.32.100:8081/FLFG/flfgGjjsAction.action"
        referer = "http://210.82.32.100:8081/FLFG/flfgGjjsAction.action"
        post = {
            "pagesize": "20",
            "pageCount": "500",
            "curPage": cur_page,
            "resultSearch": "false",
            # "lastStrWhere": "+SFYX:(有效)++^+ZLSX:(01~02~03~04~05~06~08~09~10~11~12~23)+NOT+TXTID=bj+^+SFFB=Y+",
            "lastStrWhere": "  SFYX:(有效~已被修正~失效) ^(ZLSX:1111 ~ZLSX=01)  ^ BMFL:(03)  ^ SFFB=Y ",
            "bt": "",
            "flfgnr": "",
            "sxx": "有效,已被修正,失效",
            # "sxx": "有效",
            "zlsxid": "12",
            "bmflid": "",
            "xldj": "",
            "bbrqbegin": "2018-09-01",
            "bbrqend": "2018-12-17",
            "sxrqbegin": "",
            "sxrqend": "",
            "zdjg": "",
            "bbwh": ""
        }
        header = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"
        }
        data = curlData(url=url, value=post, referer=referer, header=header)
        return data

    def getAllConstitutionHandle(self, data, province):
        data = re.findall(r'<a[\w\W]*?href="javascript:showLocation([\w\W]*?);"', data)
        old1 = ""
        old5 = ""
        i = 0
        thread_list = list()
        for k, v in enumerate(data):
            data[k] = tuple(v.split("'"))
            try:
                if data[k][1] == old1 and data[k][7] == old5:
                    continue
                i = i + 1
                thread_list.append(ConstitutionThread(data[k][1], data[k][7], data[k][3], province, i))
                old1 = data[k][1]
                old5 = data[k][7]
            except:
                pass
        i = len(thread_list)
        for m in range(i):
            thread_list[m].start()
        for m in range(i):
            thread_list[m].join()
        i = thread_list[0].getRv()
        # 重置计数器
        thread_list[0].reset()
        self.count = self.count + i
        return 1

    def getConstitutionData(self, flfgID, zlsxid, showDetailType, province):
        # 经过浏览。很明显,具体的宪法数据源url为如下的url,包含两个get类型参数  flfgID zlsxid keyword 前两个是必须的,通过列表传递的js数据拿到
        flag = False
        url = "http://210.82.32.100:8081/FLFG/flfgByID.action"
        get = dict()
        get['flfgID'] = flfgID
        get['showDetailType'] = showDetailType
        get['zlsxid'] = zlsxid
        get['keyword'] = ""
        get = urlencode(get)
        url = url + "?" + get
        while True:
            try:
                data = curlData(url, get, url)
                break
            except:
                pass
        try:
            data = data.decode("utf-8")
        except:
            pass
        # with open("constitution.txt", "wb") as f:
        #     f.write(data.encode("utf-8"))
        #     f.close()
        # with open("constitution.txt", "rb") as f:
        #     data = f.read().decode("utf-8")
        #     f.close()
        handleDataAll = BeautifulSoup(data, "html.parser")
        handleData = handleDataAll.find_all("table")
        columns_list = ['type', "department_type", 'office', 'reference_num', 'issue_date', 'execute_date',
                        'timeliness']
        columns_name_list = ['资料属性:', '部门分类:', '制定机关:', '颁布文号:', '颁布日期:', '施行日期:', '时 效 性:']
        # 获取头部基本信息
        try:
            table_data = handleData[0].find_all("td")
        except:
            table_data = "数据获取出错"
            flag = True
        type_data = dict()
        type_data['url'] = url
        for k, v in enumerate(table_data):
            try:
                if (k + 1) % 2 == 1:
                    type_data[columns_list[columns_name_list.index(table_data[k].getText().strip())]] = table_data[
                        k + 1].getText().strip()
            except:
                type_data[columns_list[columns_name_list.index(table_data[k].getText().strip())]] = "数据获取出错"
        # 接下来获取标题和内容
        try:
            type_data['title'] = handleDataAll.find_all("div", attrs={"class": "bt"})[0].getText().strip()
        except:
            type_data['title'] = "标题获取出错"
            flag = True
        # 进行内容获取
        try:
            type_data['content'] = str(handleDataAll.find_all("div", attrs={"id": "content"})[0])
        except:
            flag = True
        type_data['province'] = province
        if flag:
            type_data['is_get_error'] = 1
        else:
            type_data['is_get_error'] = 0
        while True:
            try:
                sql = self.db.getInsertSql(type_data, "constitutions")
                result = self.db.insert(sql, is_close_db=False)
                break
            except Exception as e:
                debug(e)
        return result