示例#1
0
def get_link(friendpage_link, config):
    friend_poor = []

    # get gitee_issue
    if config['setting']['gitee_friends_links']['enable'] and config[
            'setting']['gitee_friends_links']['type'] == 'normal':
        try:
            kang_api(friend_poor, config)
        except:
            print('读取gitee友链失败')
    else:
        print('未开启gitee友链获取')

    # get github_issue
    if config['setting']['github_friends_links']['enable'] and config[
            'setting']['github_friends_links']['type'] == 'normal':
        try:
            github_issuse(friend_poor, config)
        except:
            print('读取github友链失败')

    # get theme_link
    for themelinkfun in themes:
        themelinkfun.get_friendlink(friendpage_link, friend_poor)
    friend_poor = delete_same_link(friend_poor)
    friend_poor = block_link(friend_poor)

    print('当前友链数量', len(friend_poor))
    return friend_poor
示例#2
0
def get_link(friendpage_link, config):
    friend_poor = []

    # get gitee_issue
    # if config['setting']['gitee_friends_links']['enable'] and config['setting']['gitee_friends_links']['type'] == 'normal':
    if configs.GITEE_FRIENDS_LINKS['enable'] and configs.GITEE_FRIENDS_LINKS[
            'type'] == 'normal':
        try:
            kang_api(friend_poor, config)
        except:
            pass
            # print('读取gitee友链失败')
    # else:
    # print('未开启gitee友链获取')

    # get github_issue
    # if config['setting']['github_friends_links']['enable'] and config['setting']['github_friends_links']['type'] == 'normal':
    if configs.GITHUB_FRIENDS_LINKS['enable'] and configs.GITHUB_FRIENDS_LINKS[
            'type'] == 'normal':
        try:
            github_issuse(friend_poor, config)
        except:
            pass
            # print('读取github友链失败')

    # get theme_link
    for themelinkfun in themes:
        try:
            themelinkfun.get_friendlink(friendpage_link, friend_poor)
        except:
            pass
    print("----------------------")
    friend_poor = delete_same_link(friend_poor)
    friend_poor = block_link(friend_poor)

    print("----------------------")
    print('当前友链数量', len(friend_poor))
    print("----------------------")
    return friend_poor
示例#3
0
def main():
        # 引入leancloud验证
        if configs.DEBUG:
            leancloud.init(configs.LC_APPID, configs.LC_APPKEY)
            friendpage_link = configs.FRIENPAGE_LINK
        else:
            leancloud.init(sys.argv[1], sys.argv[2])
            friendpage_link = sys.argv[3]

        # 导入yml配置文件
        # config = load_config()
        config = configs.yml

        # 执行主方法
        print('----------------------')
        print('-----------!!开始执行爬取文章任务!!----------')
        print('----------------------')
        print('\n')
        # 分离到handlers.coreDatas.py
        # today = datetime.datetime.today()
        # time_limit = 60
        friend_poor = []
        post_poor = []
        print('----------------------')
        print('-----------!!开始执行友链获取任务!!----------')
        print('----------------------')
        if config['setting']['gitee_friends_links']['enable'] and config['setting']['gitee_friends_links']['type'] == 'normal':
            try:
                kang_api(friend_poor)
            except:
                print('读取gitee友链失败')
        else:
            print('未开启gitee友链获取')
        if config['setting']['github_friends_links']['enable'] and config['setting']['github_friends_links']['type'] == 'normal':
            try:
                github_issuse(friend_poor)
            except:
                print('读取github友链失败')
        else:
            print('未开启gihub友链获取')
        try:
            butterfly.butterfly_get_friendlink(friendpage_link,friend_poor)
        except:
            print('不是butterfly主题')
        try:
            matery.matery_get_friendlink(friendpage_link,friend_poor)
        except:
            print('不是matery主题')
        try:
            volantis.volantis_get_friendlink(friendpage_link,friend_poor)
        except:
            print('不是volantis主题或未配置gitee友链')
        friend_poor = delete_same_link(friend_poor)
        friend_poor = block_link(friend_poor)
        print('当前友链数量', len(friend_poor))
        print('----------------------')
        print('-----------!!结束友链获取任务!!----------')
        print('----------------------')
        total_count = 0
        error_count = 0
        for index, item in enumerate(friend_poor):
            error = 'false'
            try:
                total_count += 1
                error = butterfly.get_last_post_from_butterfly(item, post_poor)
                if error == 'true':
                    error = matery.get_last_post_from_matery(item, post_poor)
                if error == 'true':
                    error = volantis.get_last_post_from_volantis(item, post_poor)
                if error == 'true':
                    print("-----------获取主页信息失败,采取sitemap策略----------")
                    error, post_poor = sitmap_get(item, post_poor)
            except Exception as e:
                print('\n')
                print(item, "运用主页及sitemap爬虫爬取失败!请检查")
                print('\n')
                print(e)
                error_count += 1
            item.append(error)
        print('\n')
        print('----------------------')
        print("一共进行%s次" % total_count)
        print("一共失败%s次" % error_count)
        print('----------------------')
        print('\n')
        print('----------------------')
        print('-----------!!执行用户信息上传!!----------')
        print('----------------------')
        leancloud_push_userinfo(friend_poor)
        print('----------------------')
        print('-----------!!用户信息上传完毕!!----------')
        print('----------------------')
        post_poor.sort(key=itemgetter('time'), reverse=True)
        print('----------------------')
        print('-----------!!执行文章信息上传!!----------')
        print('----------------------')
        leancloud_push(post_poor)
        print('----------------------')
        print('-----------!!文章信息上传完毕!!----------')
        print('----------------------')
示例#4
0
def main():
    # 引入leancloud验证
    if configs.DEBUG:
        leancloud.init(configs.LC_APPID, configs.LC_APPKEY)
        friendpage_link = configs.FRIENPAGE_LINK
    else:
        leancloud.init(sys.argv[1], sys.argv[2])
        friendpage_link = sys.argv[3]

    # 导入yml配置文件
    # config = load_config()
    config = configs.yml

    # 执行主方法
    print('----------------------')
    print('-----------!!开始执行爬取文章任务!!----------')
    print('----------------------')
    print('\n')
    # 分离到handlers.coreDatas.py
    # today = datetime.datetime.today()
    # time_limit = 60
    friend_poor = []
    post_poor = []
    print('----------------------')
    print('-----------!!开始执行友链获取任务!!----------')
    print('----------------------')
    if config['setting']['gitee_friends_links']['enable'] and config[
            'setting']['gitee_friends_links']['type'] == 'normal':
        try:
            kang_api(friend_poor, config)
        except:
            print('读取gitee友链失败')
    else:
        print('未开启gitee友链获取')
    if config['setting']['github_friends_links']['enable'] and config[
            'setting']['github_friends_links']['type'] == 'normal':
        try:
            github_issuse(friend_poor, config)
        except:
            print('读取github友链失败')
    else:
        print('未开启gihub友链获取')
    try:
        butterfly.butterfly_get_friendlink(friendpage_link, friend_poor)
    except:
        print('不是butterfly主题')
    try:
        matery.matery_get_friendlink(friendpage_link, friend_poor)
    except:
        print('不是matery主题')
    try:
        volantis.volantis_get_friendlink(friendpage_link, friend_poor)
    except:
        print('不是volantis主题或未配置gitee友链')
    friend_poor = delete_same_link(friend_poor)
    friend_poor = block_link(friend_poor)
    print('当前友链数量', len(friend_poor))
    print('----------------------')
    print('-----------!!结束友链获取任务!!----------')
    print('----------------------')
    total_count = 0
    error_count = 0

    def spider(item):
        nonlocal total_count
        nonlocal post_poor
        nonlocal error_count
        error = 'false'
        try:
            total_count += 1
            error = butterfly.get_last_post_from_butterfly(item, post_poor)
            if error == 'true':
                error = matery.get_last_post_from_matery(item, post_poor)
            if error == 'true':
                error = volantis.get_last_post_from_volantis(item, post_poor)
            if error == 'true':
                print("-----------获取主页信息失败,采取sitemap策略----------")
                error, post_poor = sitmap_get(item, post_poor)
        except Exception as e:
            print('\n')
            print(item, "运用主页及sitemap爬虫爬取失败!请检查")
            print('\n')
            print(e)
            error_count += 1
        item.append(error)
        return item

    '''
        for item in friend_poor:
            item = spider(item)
        '''

    # 多线程------
    Q = Queue()

    for i in range(len(friend_poor)):
        Q.put(i)

    def multitask():
        while not Q.empty():
            i = Q.get()
            item = friend_poor[i]
            item = spider(item)

    cores = 128
    threads = []
    for _ in range(cores):
        t = Thread(target=multitask)
        threads.append(t)
    for t in threads:
        t.start()
    for t in threads:
        t.join()
    # ---------------------

    print('\n')
    print('----------------------')
    print("一共进行%s次" % total_count)
    print("一共失败%s次" % error_count)
    print('----------------------')
    print('\n')
    print('----------------------')
    print('-----------!!执行用户信息上传!!----------')
    print('----------------------')
    leancloud_push_userinfo(friend_poor)
    print('----------------------')
    print('-----------!!用户信息上传完毕!!----------')
    print('----------------------')
    post_poor.sort(key=itemgetter('time'), reverse=True)
    print('----------------------')
    print('-----------!!执行文章信息上传!!----------')
    print('----------------------')
    leancloud_push(post_poor)
    print('----------------------')
    print('-----------!!文章信息上传完毕!!----------')
    print('----------------------')