示例#1
0
def main():
    bs = BackServer.BackServer(
        config.GAME_NEWS_URL,
        config.GAME_NEWS_DB,
        config.GAME_NEWS_TABLE,
        config.WX_CRT_GAME_NEWS,
        config.WX_AGTID_GAME_NEWS,
    )

    loggingset.logger.info("----------开始抓取游戏新闻数据----------")
    robot006 = robot.G3dmNewsRobot("https://www.3dmgame.com/")
    g3dmdj_data = robot006.parse()
    bs.save_data(g3dmdj_data, "link")
    bs.packaging_mes("title", "link")
    bs.send_message()

    robot010 = robot.PSNRobot("http://psnine.com/")
    psndata = robot010.parse()
    bs.save_data(psndata, "link")
    bs.packaging_mes("title", "link")
    bs.send_message()

    robot001 = robot.tgbusRobot("https://www.tgbus.com/")
    tgbusdata = robot001.parse()
    bs.save_data(tgbusdata, "link")
    bs.packaging_mes("title", "link")
    bs.send_message()

    loggingset.logger.info("----------游戏新闻数据已抓取完成----------")
示例#2
0
def main():
    bs = BackServer.BackServer(
        config.NEWS_URL,
        config.NEWS_DB,
        config.NEWS_TABLE,
        config.WX_CRT_NEWS,
        config.WX_AGTID_NEWS,
    )

    loggingset.logger.info("----------开始抓取新闻数据----------")
    robot001 = robot.ToutiaoRobot(
        "https://www.toutiao.com/api/pc/feed/?min_behot_time=0&category=news_hot&utm_source="
        "toutiao&widen=1&tadrequire=true")
    toutiao_data = robot001.parse()
    bs.save_data(toutiao_data, "link")
    bs.packaging_mes("title", "link")
    bs.send_message()

    robot002 = robot.SinaSportRobot("http://sports.sina.com.cn/nba/")
    sinasport_data = robot002.parse()
    bs.save_data(sinasport_data, "link")
    bs.packaging_mes("title", "link")
    bs.send_message()

    robot003 = robot.WeiBoRobot("https://s.weibo.com/top/summary")
    weibo_data = robot003.parse()
    bs.save_data(weibo_data, "link")
    bs.packaging_mes("title", "link")
    bs.send_message()

    robot004 = robot.TieBaRobot(
        "http://tieba.baidu.com/hottopic/browse/topicList")
    tieba_data = robot004.parse()
    bs.save_data(tieba_data, "link")
    bs.packaging_mes("title", "link")
    bs.send_message()

    robot005 = robot.TieBa2Robot(
        "https://tieba.baidu.com/f?kw=%E7%BB%8F%E5%85%B8jrpg&fr=index",
        "经典jrpg")
    tieba_rpg_data = robot005.parse()
    bs.save_data(tieba_rpg_data, "link")
    bs.packaging_mes("title", "link")
    bs.send_message()

    # robot006 = robot.TieBa2Robot("https://tieba.baidu.com/f?kw=%E7%94%B5%E8%84%91&fr=home", "电脑吧")
    # tieba_pc_data = robot006.parse()
    # bs.save_data(tieba_pc_data, "link")
    # bs.packaging_mes("title", "link")
    # bs.send_message()

    robot007 = robot.TieBa2Robot(
        "https://tieba.baidu.com/f?kw=%E7%AC%91%E8%AF%9D", "笑话吧")
    tieba_haha_data = robot007.parse()
    bs.save_data(tieba_haha_data, "link")
    bs.packaging_mes("title", "link")
    bs.send_message()
    loggingset.logger.info("----------新闻数据已抓取完成----------")
示例#3
0
def main():
    bs = BackServer.BackServer(
        config.HOUSE_URL,
        config.HOUSE_DB,
        config.HOUSE_TABLE,
        config.WX_CRT_HOUSE,
        config.WX_AGTID_HOUSE,
    )

    robot001 = robot.AnjkRobot("https://cs.fang.anjuke.com/loupan/")
    anjk_data = robot001.parse()
    bs.save_data(anjk_data, "link")
    bs.packaging_mes("title", "link")
    bs.send_message()
示例#4
0
def main():
    bs = BackServer.BackServer(
        config.ZHIHU_URL,
        config.ZHIHU_DB,
        config.ZHIHU_TABLE,
        config.WX_CRT_ZHIHU,
        config.WX_AGTID_ZHIHU,
    )

    loggingset.logger.info("----------开始抓取知乎数据----------")
    robot001 = robot.ZhiHuRobot(
        "https://api.zhihu.com/topstory/hot-list?limit=10")
    zhihu_data = robot001.parse()
    bs.save_data(zhihu_data, "link")
    bs.packaging_mes("title", "link")
    bs.send_message()
    loggingset.logger.info("----------知乎数据已抓取完成----------")
示例#5
0
def main():
    bs = BackServer.BackServer(
        config.MOVIE_URL,
        config.MOVIE_DB,
        config.MOVIE_TABLE,
        config.WX_CRT_MOVIE,
        config.WX_AGTID_MOVIE,
    )

    loggingset.logger.info("----------开始抓取电影数据----------")
    robot001 = robot.MaoYanRobot("https://maoyan.com/")
    maoyan_data = robot001.parse()
    bs.save_data(maoyan_data, "link")
    bs.packaging_mes("title", "link")
    bs.send_message()

    loggingset.logger.info("----------电影数据已抓取完成----------")
示例#6
0
def main():
    bs = BackServer.BackServer(
        config.BiliBili_URL,
        config.BiliBili_DB,
        config.BiliBili_TABLE,
        config.WX_CRT_BiliBili,
        config.WX_AGTID_BiliBili,
    )

    loggingset.logger.info("----------开始抓取Bilibili数据----------")

    def get_bilibili_Data(mid):
        robotx = robot.BilibiliRobot("https://api.bilibili.com/x/space/arc/search?mid=" + mid  +"&pn=1&ps=25&jsonp=jsonp")
        bilibili_data = robotx.parse()
        bs.save_data(bilibili_data, "link")
        bs.packaging_mes("title", "link", "date", "author")
        bs.send_message()

    quene = [
        883968, 168243072, 80467330, 359439463, 15773384, 396692077, 16682415, 131494610, 207539637, 928123, 71302461
    ]

    bdquene = [
        282994, 11073, 345630501, 3046429, 415479453, 79577853, 596324576, 375375, 176037767, 450595813, 4162287,
        604003146, 6574487, 483879799, 590490400, 26139491, 369750017, 4474705, 2920960, 777536, 433351, 326246517,
        62540916, 562197, 416128940, 35359510, 147166910, 26240675, 37663924, 946974, 16539048, 13354765, 10119428,
        5970160, 470156882, 168598, 94281836, 452309333, 96070394, 7788379, 3066511, 1958342, 39627524, 23604445,
        515993, 454719565, 5293668, 730732, 59905809, 414641554, 7487399, 27756469, 927587, 1577804, 18202105,
        517327498, 51896064, 79061224, 14110780, 519872016, 19642758, 279583114, 163637592, 546195, 250111460, 3766866,
        279991456, 16794231, 43222001, 9824766, 25150941, 466272, 258150656, 2206456, 25422790, 1420982, 3353026,
        10874201, 436473455, 72270557, 63231, 29329085, 5294454, 285499073, 17819768, 99157282, 648113003, 2200736,
        378885845, 254463269, 113362335, 353539995, 122879, 7552204, 38351330, 21837784, 585267, 8366990, 116683,
        295723
    ]
    for mid in quene:
        get_bilibili_Data(str(mid))

    for mid in bdquene:
        get_bilibili_Data(str(mid))

    # robot1 = robot.BilibiliRobot("https://www.bilibili.com/activity/web/view/data/814?csrf=9ed488abc43c1d2721e7e99e8d70c2a5")
    # robot1.parsebd()
    # print(list(set(quene).difference(set(bdquene))))

    loggingset.logger.info("----------Bilibili数据已抓取完成----------")
示例#7
0
def main():
    bs = BackServer.BackServer(
        config.GAL_URL,
        config.GAL_DB,
        config.GAL_TABLE,
        config.WX_CRT_GAL,
        config.WX_AGTID_GAL,
    )
    loggingset.logger.info("----------开始抓取Galgame 帖子----------")
    robot001 = robot.GalRobot("https://bbs.kfmax.com/")

    islogintag = robot001.islogin()
    if islogintag is False:
        loggingset.logger.info(f"没有登陆网页,用户重新登录...")
        robot001.login("flyklmwl", "482009")

    gal_dict = robot001.parse()
    bs.save_update_data(gal_dict, "tz_link")
    bs.packaging_mes("tz_title", "tz_link")
    bs.send_message()
    loggingset.logger.info("----------Galgame 抓取完成----------")
示例#8
0
def main():
    bs = BackServer.BackServer(
        config.WORK_URL,
        config.WORK_DB,
        config.WORK_TABLE,
        config.WX_CRT_WORK,
        config.WX_AGTID_WORK,
    )

    loggingset.logger.info("----------开始抓取工作数据----------")
    # robot001 = robot.LagouRobot("https://www.lagou.com/jobs/list_运维")
    # lagou_data = robot001.parse()
    # bs.save_data(lagou_data, "link")
    # bs.packaging_mes("jobName", "updateDate", "salary", "companyName", "link")
    # bs.send_message()

    robot002 = robot.zhilianRobot("http://www.zhilian.com")
    zhilian_data = robot002.parse()
    bs.save_data(zhilian_data, "link")
    bs.packaging_mes("jobName", "updateDate", "salary", "companyName", "link")
    bs.send_message()

    loggingset.logger.info("----------工作数据已抓取完成----------")
示例#9
0
from Config import config
from Tools import robot, BackServer, loggingset

bs = BackServer.BackServer(config.DJYX_URL, config.DJYX_DB, config.DJYX_TABLE,
                           config.WX_CRT_DJYX, config.WX_AGTID_DJYX)


@loggingset.logtrace
def main():
    loggingset.logger.info("----------开始抓取游戏评测数据----------")
    robot001 = robot.G3dmDJRobot("https://dl.3dmgame.com/")
    _3dm_news_data = robot001.parse()
    bs.save_data(_3dm_news_data, "link")
    bs.packaging_mes("title", "link")
    bs.send_message()
    loggingset.logger.info("----------游戏评测数据已抓取完成----------")


if __name__ == "__main__":
    main()
示例#10
0
from Tools import BackServer, loggingset
from Config import config
from datetime import datetime
import http.cookiejar as cookielib
from dateutil.parser import parse
import requests
import os
import re
import time
import parsel


bs = BackServer.BackServer(
    config.GAL_URL,
    config.GAL_DB,
    config.GAL_TABLE,
    config.WX_CRT_GAL,
    config.WX_AGTID_GAL,
)


class Robot:
    # headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:63.0) Gecko/20100101 Firefox/63.0'}
    def sourcepage(self):
        print(self.result.text)

    headers = {
        "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
        "(KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36"
    }
    open_session = requests.session()
示例#11
0
from Tools import robot, loggingset
from Tools import BackServer
from Config import config
import re
import json
import sys

bs = BackServer.BackServer(config.BOOK_URL, config.BOOK_DB, config.BOOK_TABLE,
                           config.WX_CRT_GAL, config.WX_AGTID_GAL)

bibi_movie_dict = {
    "0:名侦探柯南": "https://www.bilibili.com/bangumi/play/ss33378/",
    "1:黑色四叶草": "https://www.bilibili.com/bangumi/play/ss6422/"
}


def get_konan_date():
    robot001 = robot.Robot("www.baidu.com")
    result = robot001.connectpage(
        "https://www.ytv.co.jp/conan/data/story.json")  # 这个只有1-960
    print(result.text)


def get_index(url):
    robot001 = robot.Robot("www.baidu.com")
    result = robot001.connectpage(url)
    jsonstr = re.search("\"epList\":(.*),\"epInfo\"", result.text)
    items = json.loads(jsonstr.group(1))
    i = 0
    for item in items:
        i += 1
示例#12
0
文件: getBq.py 项目: flyklmwl/getWeb
from Config import config
from Tools import robot, loggingset
from Tools import BackServer
import requests
import time

robot001 = robot.Robot("www.baidu.com")
bs1 = BackServer.BackServer(
    config.TECH_URL,
    config.TECH_DB,
    config.TECH_TABLE,
    config.WX_CRT_TECH,
    config.WX_AGTID_TECH,
)


def parse_bq(url):
    result = robot001.connectpage(url)
    # print(result.text)
    items = robot001.get_items(".tagbqppdiv")

    for item in items:
        # print(item)
        img_url = item("img").attr("data-original")
        hz = img_url[-4:]
        title = item("a").attr("title") + hz
        download(img_url, "img\\" + title)
        time.sleep(3)
        # print(title)