Пример #1
0
 def __init__(self):
     self.db_oper = db_helper_class(conf.db_conf)
     # 分页大小
     self.page_size = 20
     # 读取偏移值
     self.r_offset = 0
     self.app_id = conf.app_conf.app_zygw_collect_id
 def __init__(self):
     self.db_oper = db_helper_class(conf.db_conf)
     # 分页大小
     self.page_size = 50
     # 读取偏移值
     self.r_offset = 0
     pass
Пример #3
0
 def __init__(self):
     # 数据库对象
     self.db_oper = db_helper_class(conf.db_conf)
     # 原创标识
     self.original_word = '''<span\sid="copyright_logo"\sclass="rich_media_meta\smeta_original_tag">原创</span>'''
     # 简介
     self.intro = '''(?:<p\sclass="txt-info".*?>)(?P<intro>.*?)(?:</p>)'''
     # cookie设置
     self.cookie = {"SUV":"00175E2BDA6B097D57CBCDBE7DA71981",
     "IPLOC":"CN4401",
     "CXID":"860E089DD32C47EE1ECF55ED807B8483",
     "weixinIndexVisited":"1",
     "Hm_lvt_96d9d92b8a4aac83bc206b6c9fb2844a":"1474170419,1474181444",
     "pgv_pvi":"5441667072",
     "m":"3B3BF1EE5348A83F1126D107D0679A9B",
     "GOTO":"Af99046",
     "ld":"1kllllllll2gc8WVQx31FOkMUKPY9rSnLu7JKyllll9lllll4klll5@@@@@@@@@@",
     "ad":"IZllllllll2g8FjPlllllVkcVMklllllLu7JKyllllklllllpqxlw@@@@@@@@@@@",
     "SUID":"7D096BDA3320910A0000000057CD2EFF",
     "YYID":"3B3BF1EE5348A83F1126D107D0679A9B",
     "ABTEST":"6|1480328071|v1",
     "SNUID":"176301B06A6F2830D6BC3C936AE5FEE5",
     "SUIR":"176301B06A6F2830D6BC3C936AE5FEE5",
     "ppinf":"5|1480472199|1481681799|dHJ1c3Q6MToxfGNsaWVudGlkOjQ6MjAxN3x1bmlxbmFtZTo0NTolRTYlQTElOTElRTQlQkIlQTMlRTUlODUlOEIlRTclOUElODQlRTclOEMlQUJ8Y3J0OjEwOjE0ODA0NzIxOTl8cmVmbmljazo0NTolRTYlQTElOTElRTQlQkIlQTMlRTUlODUlOEIlRTclOUElODQlRTclOEMlQUJ8dXNlcmlkOjQ0OjBGRUY3RjYyNURFRURDNENENTQ1QTQzRUVGM0NGN0I1QHFxLnNvaHUuY29tfA",
     "pprdig":"RiQVGnfvU914niX77Vxk8xtJThF_ux3DA7UjmVNJrxBwrzpx6CWo38ZIEm4AfXZ3Ez6dKMnwA0pj3kXHiP8-Qytg8HVJfNCd0NwWrGOjgLT3CHcQtHeHy7oqhJhwGtjlIqataNhNA-JbcH6KTtliGMwLlDw7GmF_Odfl0Wz7JQE",
     "JSESSIONID":"aaa-ISi7b1d8ufz7S_UIv",
     "sct":"45",
     "usid":"NNV9tIXWQrAUgBuz",
     "clientId":"3A41D977ADC76C0D1A18D0A0AC666DB9",}
Пример #4
0
 def __init__(self):
     # 接口
     self.interface = "http://askapi.jianke.com/app/article/AddFromWeiXinArticle"
     # 数据库对象
     self.db_oper = db_helper_class(conf.db_conf)
     # 偏移值
     self.offset = 0
Пример #5
0
    def __init__(self):
        # 浏览器对象
        agent = comm.random_useragent.getRandomUAItem()
        # self.m_browser = spynner.Browser()
        self.m_browser = spynner.Browser(user_agent=agent)
        # self.m_browser.set_proxy("58.52.201.119:8080")
        self.m_browser.hide()
        # self.m_browser.show()

        # 创建数据库对象
        self.db_oper = db_helper_class(conf.db_conf)
        # 分页大小
        self.page_size = 20
        # 读取偏移值
        self.r_offset = 0

        # 当前进度
        self.curr_prog = 0
        # 总共丢弃记录数
        self.drop_count = 0

        # 应用:康爱多天猫全量商品详情
        #TODO: 
        # self.app_id = conf.app_conf.app_tmall_all_products_detail
        # self.class_id = conf.class_conf.cls_tmall_all_products_detail
        self.app_id = 999
        self.class_id = 999
 def __init__(self):
     # 数据库对象
     self.db_oper = db_helper_class(conf.db_conf)
     # 原创标识
     self.original_word = '''<span\sid="copyright_logo"\sclass="rich_media_meta\smeta_original_tag">原创</span>'''
     # 简介
     self.intro = '''(?:<p\sclass="txt-info".*?>)(?P<intro>.*?)(?:</p>)'''
     # cookie
     self.cookie = {
         "SUV":"00175E2BDA6B097D57CBCDBE7DA71981",
         "CXID":"860E089DD32C47EE1ECF55ED807B8483",
         "weixinIndexVisited":"1",
         "Hm_lvt_96d9d92b8a4aac83bc206b6c9fb2844a":"1474170419,1474181444",
         "pgv_pvi":"5441667072",
         "m":"3B3BF1EE5348A83F1126D107D0679A9B",
         "GOTO":"Af99046",
         "ABTEST":"6|1480328071|v1",
         "sw_uuid":"798063453",
         "sg_uuid":"8945378958",
         "ssuid":"5959137290",
         "SUID":"7D096BDA3320910A0000000057CD2EFF",
         "YYID":"3B3BF1EE5348A83F1126D107D0679A9B",
         "SUIR":"2A5F3C8D57521489E59E561E57F0BC8D",
         "ad":"PZllllllll2g8FjPlllllVPweaclllllLu7JKyllll9llllljVxlw@@@@@@@@@@@",
         "SNUID":"0D781BAB7074326DFB22BCF37153560F",
         "PHPSESSID":"kps01kgi2681ridvjbt5kgnu13",
         "JSESSIONID":"aaaBC8pLZXqTT85C1JCJv",
         "sct":"82",
         "IPLOC":"CN4401",
         "usid":"NNV9tIXWQrAUgBuz",
         "clientId":"36239E5B32D161140C0797BA330D60E4",
         "ppinf":"5|1481254589|1482464189|dHJ1c3Q6MToxfGNsaWVudGlkOjQ6MjAxN3x1bmlxbmFtZToyNzolRTclQjIlODklRTclQkElQTIlRTclOEMlQUF8Y3J0OjEwOjE0ODEyNTQ1ODl8cmVmbmljazoyNzolRTclQjIlODklRTclQkElQTIlRTclOEMlQUF8dXNlcmlkOjQ0OkM0RTE3Q0YwNkQyNEU2RDNENEI2RkNCOTU4RDlFM0MwQHFxLnNvaHUuY29tfA",
         "pprdig":"JyxNQefqTXRhf21x-VU-PgZrGMZrYBHF4YL8YdZqZ600VXLrd3ndG7TH3VgOsWSaJz8XIwVR3CWqGUzgWM7SMJG25PeaI2hyhpWOlsx3y23cAPuazlv8e5PXQu0eRBxveRVuW-rbzTaczCHJQySveDKH60BVf3AJxjB1s3JP1Rc",
         "ld":"wkllllllll2gc8WVQx31FOP3J7IY9rSnLu7JKyllll9llllljylll5@@@@@@@@@@"
     }
     # 请求头
     self.headers = {
         "Accept":"*/*",
         "Accept-Encoding":"gzip, deflate, sdch",
         "Accept-Language":"zh-CN,zh;q=0.8",
         "Cache-Control":"max-age=0",
         "Cookie":'''SUV=00175E2BDA6B097D57CBCDBE7DA71981; CXID=860E089DD32C47EE1ECF55ED807B8483; weixinIndexVisited=1; Hm_lvt_96d9d92b8a4aac83bc206b6c9fb2844a=1474170419,1474181444; pgv_pvi=5441667072; m=3B3BF1EE5348A83F1126D107D0679A9B; GOTO=Af99046; ABTEST=6|1480328071|v1; sw_uuid=798063453; sg_uuid=8945378958; ssuid=5959137290; SUID=7D096BDA3320910A0000000057CD2EFF; YYID=3B3BF1EE5348A83F1126D107D0679A9B; SUIR=2A5F3C8D57521489E59E561E57F0BC8D; ad=PZllllllll2g8FjPlllllVPweaclllllLu7JKyllll9llllljVxlw@@@@@@@@@@@; SNUID=0D781BAB7074326DFB22BCF37153560F; ld=wkllllllll2gc8WVQx31FOP3J7IY9rSnLu7JKyllll9llllljylll5@@@@@@@@@@; sct=83; JSESSIONID=aaaaT7jc4kb2Tevy-LCJv; clientId=044136D5677C6A4D386D3F3466DD6B92; PHPSESSID=aivb167lgfhavakjhaf49j0d20; usid=NNV9tIXWQrAUgBuz; ppinf=5|1481276797|1482486397|dHJ1c3Q6MToxfGNsaWVudGlkOjQ6MjAxN3x1bmlxbmFtZToyNzolRTclQjIlODklRTclQkElQTIlRTclOEMlQUF8Y3J0OjEwOjE0ODEyNzY3OTd8cmVmbmljazoyNzolRTclQjIlODklRTclQkElQTIlRTclOEMlQUF8dXNlcmlkOjQ0OkM0RTE3Q0YwNkQyNEU2RDNENEI2RkNCOTU4RDlFM0MwQHFxLnNvaHUuY29tfA; pprdig=ag8bzSGYb76QsS6dyxdJpo52aXhgH9BfTlDOl-Mw552saDnXmMRrt4DPZGQ1A8Q7sp9-mdQtOr9lpK0-9kYIurCYYXaGDYUDUm5ug2_oJuWVPXqOmBL4ZhNnbVVp8gGH1rmjn7FvmKXlPh0pThqlYYldDeYm7AcKPiC-b5ToaUk; ppmdig=1481276797000000ecefe85006bd7f2e159fe5464c707c32; IPLOC=CN8100''',
         "Host":"weixin.sogou.com",
         "If-Modified-Since":"Tue, 29 Nov 2016 05:15:10 GMT",
         "If-None-Match":"583d0ede-123",
         "Proxy-Connection":"keep-alive",
         "Referer":"http://weixin.sogou.com/weixin?type=2&query=cctv&ie=utf8&_sug_=n&_sug_type_=",
         "User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.22 Safari/537.36 SE 2.X MetaSr 1.0",
     }
     # 代理IP
     # self.IP = {"http":"121.14.6.236:80"}
     self.IP = {"http":"218.103.60.205:8080"}
     # 本地图片存储路径
     self.pictrue_uri = "F:\\pictrue\\"
     # 服务器图片后缀
     self.pictrue_suffix = ".jpg"
    def __init__(self):
        # 图片保存路径
        self.m_save_root = ""
        # 分页大小
        self.page_size = 20
        # 偏移大小
        self.off_size = 0

        self.m_db_oper = db_helper_class(conf.db_conf)

        pass
    def __init__(self):
        self.db_oper = db_helper_class(conf.db_conf)

        # 应用:淘宝抢购
        self.app_id = conf.app_conf.app_tbhdqg_collect_id
        # 类别:淘宝抢购活动
        self.class_id = conf.class_conf.cls_tbhdqg
        # 名称
        self.web_name = "淘宝抢购"

        self.job_stat = JobSta()
        self.job_id = "%s_%s_%s" % (time.strftime('T%Y%m%d%H%M'), self.app_id,
                                    self.class_id)
Пример #9
0
    def __init__(self):
        self.m_browser = spynner.Browser()
        self.m_browser.hide()

        # 数据库对象
        self.db_oper = db_helper_class(conf.db_conf)
        # 分页大小
        self.page_size = 20
        # 读取偏移值
        self.r_offset = 0

        self.app_id = conf.app_conf.app_360haoyao_id
        self.class_id = conf.class_conf.class_360haoyao_all
Пример #10
0
 def __init__(self):
     # 创建数据库对象
     self.db_oper = db_helper_class(conf.db_conf)
     # 分页大小
     self.page_size = 20
     # 读取偏移值
     self.r_offset = 0
     # 处理商品数
     self.curr_prog = 0
     # 商品入库数
     self.load_num = 0
     # 解析HTML字符实体
     self.html_analysis = HTMLParser.HTMLParser()
Пример #11
0
 def __init__(self):
     # 数据库对象
     self.db_oper = db_helper_class(conf.db_conf)
     # 原创标识
     self.original_word = '''<span\sid="copyright_logo"\sclass="rich_media_meta\smeta_original_tag">原创</span>'''
     # 简介
     self.intro = '''(?:<p\sclass="txt-info".*?>)(?P<intro>.*?)(?:</p>)'''
     # 代理IP
     # self.IP = {"http":"121.14.6.236:80"}
     self.IP = {"http": "218.103.60.205:8080"}
     # 本地图片存储路径
     self.pictrue_uri = "F:\\pictrue_1\\"
     # 服务器图片后缀
     self.pictrue_suffix = "_1.jpg"
Пример #12
0
    def __init__(self):
        self.db_oper = db_helper_class(conf.db_conf)
        # 分页大小
        self.page_size = 20
        # 读取偏移值
        self.r_offset = 0

        # 当前进度
        self.curr_prog = 0
        # 总共丢弃记录数
        self.drop_count = 0

        # 应用:天猫处方药详情
        self.app_id = conf.app_conf.app_tmfcy_detail_id
Пример #13
0
    def __init__(self):
        # 浏览器对象
        agent = comm.random_useragent.getRandomUAItem()
        # self.m_browser = spynner.Browser()
        self.m_browser = spynner.Browser(user_agent=agent)
        self.m_browser.hide()
        # self.m_browser.show()

        self.db_oper = db_helper_class(conf.db_conf)
        # 分页大小
        self.page_size = 20
        # 读取偏移值
        self.r_offset = 0

        # 当前进度
        self.curr_prog = 0
        # 总共丢弃记录数
        self.drop_count = 0

        # 应用:天猫保健品详情
        self.app_id = conf.app_conf.app_tmall_health_prods_detail
        self.class_id = conf.class_conf.cls_tmall_health_prods_detail
Пример #14
0
    def __init__(self):
        self.db_oper = db_helper_class(conf.db_conf)
        # 分页大小
        self.page_size = 20
        # 读取偏移值
        self.r_offset = 0

        self.kad_id = 100001

        # 应用:淘宝抢购
        self.app_id = conf.app_conf.app_yyw_taozhang
        # 类别:淘宝抢购活动
        self.class_id = conf.class_conf.cls_web_taozhuang_yyw

        self.job_stat = JobSta()
        self.job_id = "%s_%s_%s" % (time.strftime(
            'T%Y%m%d%H%M'), self.app_id, self.class_id)

        # 名称
        self.web_name = "壹药网疗程装套装"

        self.prog = 0
    def __init__(self):
        # 扫描路径
        self.m_scan_path = ""
        # 文件模板
        self.m_file_tmpl = ""
        # 文件分隔符
        self.m_fld_sep = ""
        # 目标表
        self.m_target_table = ""
        # 字段列表
        self.m_fld_list = ""
        # 改名模板
        self.m_load_rename = ""
        # 是否ZIP
        self.m_iszip = False
        # ZIP路径
        self.m_zip_path = ""

        # 字段数量
        self.m_flds_count = 0

        # 数据库连接
        self.db_oper = db_helper_class(buz_db_conf)
Пример #16
0
 def __init__(self):
     # 数据库对象
     self.db_oper = db_helper_class(conf.db_conf)
Пример #17
0
 def __init__(self):
     # 数据库对象
     self.db_oper = db_helper_class(conf.db_conf)
     self.page_size = 20
     self.r_offset = 0
Пример #18
0
 def __init__(self):
     # 数据库对象
     self.db_oper = db_helper_class(conf.db_conf)
     self.pictrue_suffix = "_pc.jpg"
     self.original_word = '''<span\sid="copyright_logo"\sclass="rich_media_meta\smeta_original_tag">原创</span>'''
     self.pictrue_uri = "F:\\pictrue_pc\\"