Пример #1
0
def run():
    keys = getTasks.getTasks().btc123()
    db = getTasks.getTasks().getMongo()
    headers = {
        'User-Agent':
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36',
        "Connection": "close",
    }
    conn = connRedis.OPRedis()
    item = {}
    for i in keys:
        url = 'https://apioperate.btc123.com/api/content/selectPageFlashNews?pageNumber=1&title={}&sourceId=1'.format(
            str(i['key']))
        j = 0
        while j < 5:
            try:
                response = requests.get(
                    url,
                    headers=headers,
                    proxies={'https': conn.randomOneIp('proxy:new_ip_list')},
                    timeout=3)
                break
            except:
                j += 1
                print(url + "请求失败")
        data = json.loads(response.text)
        lists = data['data']['list']
        for list in lists:
            item['post_title'] = list['title']
            item['created_at'] = list['createTime']
            item['read_count'] = 0
            item[
                'original_url'] = 'https://www.btc123.com/search?type=flash&keyword={}'.format(
                    str(i['key']))
            item['page_url'] = item['original_url']
            item['source_host'] = list['source']
            item['screen_name'] = '匿名'
            item['text'] = list['content']
            item['time'] = int(time.time())
            item['floor'] = int(list['id'])
            item['column'] = i['key']
            item['platform'] = '区块链快讯'
            item['column1'] = i['column1']
            item['originalPlatformId'] = i['originalPlatformId']
            item['keywordId'] = i['keywordId']
            item['reptileType'] = i['reptileType']
            item['contentType'] = i['contentType']
            title = db.btc123.find_one({'post_title': item['post_title']})
            if title is None:
                print(item)
                getTasks.post_data(item)
                db.btc123.insert(deepcopy(item))
    print('end')
Пример #2
0
def run():
    keys = getTasks.getTasks().heCaijing()
    db = getTasks.getTasks().getMongo()
    headers = {
        'Cookie':
        'PHPSESSID=dq7c7te4bmvco8ddmj4kt171p7; _ga=GA1.2.1003817106.1566180235; _gid=GA1.2.2147195660.1566180235; Hm_lvt_b94ff1ee8863337601c8a7baf17d031c=1566180235; Hm_lpvt_b94ff1ee8863337601c8a7baf17d031c=1566209238; _gat_gtag_UA_122528065_1=1',
        'Host': 'www.hecaijing.com',
        'Referer': 'https://www.hecaijing.com/kuaixun/',
        'Sec-Fetch-Mode': 'cors',
        'Sec-Fetch-Site': 'same-origin',
        'User-Agent':
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36',
        'X-Requested-With': 'XMLHttpRequest',
    }
    conn = connRedis.OPRedis()
    item = {}
    for i in range(1, 5):
        url = 'https://www.hecaijing.com/express/loadmore?coin=&pn={}'.format(
            i)
        response = requests.get(
            url,
            headers=headers,
            proxies={'https': conn.randomOneIp('proxy:new_ip_list')},
            timeout=3)
        data = json.loads(response.text)
        lists = data['data'][0]['buttom']
        for list in lists:
            item['post_title'] = list['title']
            item['created_at'] = list['update_time']
            item['read_count'] = 0
            item['original_url'] = 'https://www.hecaijing.com/kuaixun/'
            item['page_url'] = item['original_url']
            item['source_host'] = ""
            item['screen_name'] = list['publish_adminuser']
            item['text'] = list['main']
            item['time'] = int(time.time())
            item['floor'] = int(list['id'])
            item['column'] = '火币'
            item['platform'] = keys['platform']
            item['column1'] = keys['column1']
            item['originalPlatformId'] = keys['originalPlatformId']
            item['keywordId'] = 12235
            item['reptileType'] = keys['reptileType']
            item['contentType'] = keys['contentType']
            title = db.hecaijing.find_one({'post_title': item['post_title']})
            if title is None:
                print(item)
                getTasks.post_data(item)
                db.hecaijing.insert(deepcopy(item))

    print('end')
Пример #3
0
def run():
    keys = getTasks.getTasks().bitKan()
    db = getTasks.getTasks().getMongo()
    headers = {
        'user-agent':
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36',
    }
    conn = connRedis.OPRedis()
    item = {}
    url = 'https://bitkan.com/api/news/weibo/705014?locale=zh'
    response = requests.get(
        url,
        headers=headers,
        proxies={'https': conn.randomOneIp('proxy:new_ip_list')},
        timeout=3)
    data = json.loads(response.text)
    lists = data['briefs']
    for list in lists:
        item['post_title'] = list['title']
        item['created_at'] = list['updated_at']
        item['read_count'] = 0
        item['original_url'] = 'https://bitkan.com/zh/news'
        item['page_url'] = item['original_url']
        item['source_host'] = ""
        item['screen_name'] = list['name']
        item['text'] = list['content']['text']
        item['time'] = int(time.time())
        item['floor'] = int(list['id'])
        item['column'] = '火币'
        item['platform'] = '比特币快讯'
        item['column1'] = '比特币快讯'
        item['originalPlatformId'] = 188
        item['keywordId'] = 12235
        item['reptileType'] = keys['reptileType']
        item['contentType'] = keys['contentType']
        title = db.bitkan.find_one({'post_title': item['post_title']})
        if title is None:
            print(item)
            getTasks.post_data(item)
            db.bitkan.insert(deepcopy(item))

    print('end')
Пример #4
0
def run():
    keys = getTasks.getTasks().huoXing()
    db = getTasks.getTasks().getMongo()
    headers = {
        "Connection":
        "close",
        'accept':
        'application/json, text/plain, */*',
        'accept-encoding':
        'gzip, deflate, br',
        'accept-language':
        'zh-CN,zh;q=0.9',
        'cookie':
        'UM_distinctid=16c99cbf4b9d1-08963401d03938-7373e61-100200-16c99cbf4ba0; _ga=GA1.2.955456149.1565948376; Hm_lvt_d70f8822d1ff168453d5ea7b3e359297=1567396038,1567646981,1569203292; CNZZDATA1272858809=353535772-1566174481-https%253A%252F%252Fflash.huoxing24.com%252F%7C1569198357; _gid=GA1.2.620194914.1569203293; _gat_gtag_UA_121795392_1=1; USD=6.833898; rightAdImgCloseTime=2019-09-23; Hm_lpvt_d70f8822d1ff168453d5ea7b3e359297=1569203315; SERVERID=29dcb2c2e0682adea06ad95c2d4fe0cc|1569203446|1569203415',
        'referer':
        'https://www.huoxing24.com/search/%E7%81%AB%E5%B8%81',
        'sec-fetch-mode':
        'cors',
        'sec-fetch-site':
        'same-origin',
        'sign-param':
        'eyJwbGF0Zm9ybSI6InBjIiwibm9uY2UiOiJQR1NObk8iLCJ0aW1lc3RhbXAiOjE1NjYyMDUxNjUyMDYsInNpZyI6IjhhODg5MDdiMmFmYjhiNGM4ODVjMTc4MmY2NjNkZjUxIn0=',
        'user-agent':
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36',
    }
    conn = connRedis.OPRedis()
    item = {}
    for i in keys:
        url = 'https://www.huoxing24.com/info/news/multisearch?page=1&pageSize=18&type=2&q={}&deviceSource=web'.format(
            str(i['key']))
        # quote(key, encoding="gbk")
        j = 0
        while j < 5:
            try:
                response = requests.get(
                    url,
                    headers=headers,
                    proxies={'https': conn.randomOneIp('proxy:new_ip_list')},
                    timeout=3)
                print(response.text)
                break
            except:
                j += 1
                print(url + "请求失败")
        data = json.loads(response.text)
        if data['code'] == 1:
            lists = data['obj']['inforList']
            for list in lists:
                p1 = re.compile(r'[【](.*?)[】]', re.S)  # 最小匹配
                item['post_title'] = re.findall(p1, list['content'])[0]
                year = list['id'][:4]
                mounth = list['id'][4:6]
                day = list['id'][6:8]
                hour = list['id'][8:10]
                minute = list['id'][10:12]
                item[
                    'created_at'] = year + "-" + mounth + "-" + day + " " + hour + ":" + minute
                item['read_count'] = 0
                item[
                    'original_url'] = 'https://www.huoxing24.com/search/{}'.format(
                        str(i['key']))
                item['page_url'] = item['original_url']
                item['source_host'] = ""
                item['screen_name'] = list['author']
                item['text'] = list['content']
                item['time'] = int(time.time())
                item['floor'] = int(list['id'][8:])
                item['column'] = i['key']
                item['platform'] = '火星财经'
                item['column1'] = i['column1']
                item['originalPlatformId'] = i['originalPlatformId']
                item['keywordId'] = i['keywordId']
                item['reptileType'] = i['reptileType']
                item['contentType'] = i['contentType']
                title = db.huoxing.find_one({'post_title': item['post_title']})
                if title is None:
                    print(item)
                    getTasks.post_data(item)
                    db.huoxing.insert(deepcopy(item))
    print('end')
Пример #5
0
def run():
    db = getTasks.getTasks().getMongo()
    keys = getTasks.getTasks().huoxun()
    headers = {
        "Connection": "close",
        'accept': 'application/json, text/javascript, */*; q=0.01',
        'accept-encoding': 'gzip, deflate, br',
        'accept-language': 'zh-CN,zh;q=0.9',
        'cookie':
        'PHPSESSID=0clbstiheafjj5gr2ob50p0rj6; Hm_lvt_f396f0424d21da4c5df398bf0ca78f23=1566180318; Hm_lvt_b7769c8d87ab17b2001f99ab6b37c33d=1566180318; Hm_lpvt_f396f0424d21da4c5df398bf0ca78f23=1566206928; Hm_lpvt_b7769c8d87ab17b2001f99ab6b37c33d=1566206928',
        'referer': 'https://huoxun.com/search.html',
        'sec-fetch-mode': 'cors',
        'sec-fetch-site': 'same-origin',
        'user-agent':
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36',
        'x-requested-with': 'XMLHttpRequest',
    }
    conn = connRedis.OPRedis()
    item = {}
    for i in keys:
        url = 'https://huoxun.com/cms/api/search_quick.html?search_field=title&keyword={}&page=1'.format(
            str(i['key']))
        j = 0
        while j < 5:
            try:
                response = requests.get(
                    url,
                    headers=headers,
                    proxies={'https': conn.randomOneIp('proxy:new_ip_list')},
                    timeout=3)
                break
            except:
                j += 1
                print(url + "请求失败")
        data = json.loads(response.text)
        lists = data['data']
        for list in lists:
            item['post_title'] = list['title']
            timeStamp = list['update_time']
            timeArray = time.localtime(timeStamp)
            item['created_at'] = time.strftime("%Y-%m-%d %H:%M:%S", timeArray)
            item['read_count'] = 0
            item['original_url'] = 'https://huoxun.com/search.html'
            item['page_url'] = item['original_url']
            item['source_host'] = ""
            item['screen_name'] = '匿名'
            item['text'] = list['des']
            item['time'] = int(time.time())
            item['floor'] = int(list['id'])
            item['column'] = i['key']
            item['platform'] = '火讯财经'
            item['column1'] = i['column1']
            item['originalPlatformId'] = i['originalPlatformId']
            item['keywordId'] = i['keywordId']
            item['reptileType'] = i['reptileType']
            item['contentType'] = i['contentType']
            title = db.huoxun.find_one({'post_title': item['post_title']})
            if title is None:
                print(item)
                getTasks.post_data(item)
                db.huoxun.insert(deepcopy(item))
    print('success')