示例#1
0
 def __init__(self, params=None, page=None, rankRange=None):
     super(HSDecksSpider, self).__init__()
     if params == 'trending':
         self.start_urls = ['https://hsreplay.net/decks/trending/']
     elif params == 'interrupt':
         self.start_urls = ['https://hsreplay.net/decks/#page=48']
     elif params == 'page':
         # url = 'https://hsreplay.net/decks/#includedSet=YEAR_OF_THE_DRAGON&page={}'.format(page)
         url = 'https://hsreplay.net/decks/#playerClasses=DEMONHUNTER'
         # url = 'https://hsreplay.net/decks/#excludedCards=55441&includedCards=55006'
         self.start_urls = [url]
     else:
         # self.start_urls = ['https://hsreplay.net/decks/#playerClasses=SHAMAN&archetypes=360']
         self.start_urls = [
             'https://hsreplay.net/decks/#rankRange=DIAMOND_THROUGH_LEGEND'
         ]
         #  self.start_urls = ['https://hsreplay.net/decks/#rankRange=DIAMOND_THROUGH_LEGEND&timeRange=LAST_7_DAYS&playerClasses=DRUID',
         #                     'https://hsreplay.net/decks/#rankRange=DIAMOND_THROUGH_LEGEND&timeRange=LAST_7_DAYS&playerClasses=HUNTER',
         #                     'https://hsreplay.net/decks/#rankRange=DIAMOND_THROUGH_LEGEND&timeRange=LAST_7_DAYS&playerClasses=PALADIN',
         #                     'https://hsreplay.net/decks/#rankRange=DIAMOND_THROUGH_LEGEND&timeRange=LAST_7_DAYS&playerClasses=PRIEST',
         #                     'https://hsreplay.net/decks/#rankRange=DIAMOND_THROUGH_LEGEND&timeRange=LAST_7_DAYS&playerClasses=SHAMAN',
         #                     'https://hsreplay.net/decks/#rankRange=DIAMOND_THROUGH_LEGEND&timeRange=LAST_7_DAYS&playerClasses=DEMONHUNTER']
         # self.start_urls = ['https://hsreplay.net/decks/#playerClasses=ROGUE&archetypes=383&rankRange=DIAMOND_THROUGH_LEGEND']
         # self.start_urls = ['https://hsreplay.net/decks/#minGames=3000&includedSet=YEAR_OF_THE_DRAGON']
         # self.start_urls = ['https://hsreplay.net/decks/#playerClasses=ROGUE']
         # self.start_urls = ['https://hsreplay.net/decks/#playerClasses=MAGE&archetypes=393&timeRange=LAST_7_DAYS&rankRange=DIAMOND_THROUGH_LEGEND']
     chrome_opt = webdriver.ChromeOptions()
     chrome_opt.add_argument('--disable-gpu')
     chrome_opt.add_argument('--no-sandbox')
     if platform.platform().find('Linux') != -1:
         chrome_opt.add_argument(
             'blink-settings=imagesEnabled=false')  # 无图模式
         chrome_opt.add_argument('--headless')  # 无页面模式
     else:
         chrome_opt.add_argument(
             'blink-settings=imagesEnabled=false')  # 无图模式
     self.browser = webdriver.Chrome(chrome_options=chrome_opt)
     dispatcher.connect(
         self.spider_closed,
         signals.spider_closed)  # scrapy信号量,spider退出时关闭browser
     dispatcher.connect(self.engine_stopped, signals.engine_stopped)
     self.ifanr = iFanr()
     # 48到80页的数据跳过
     self.interrupt_page = 80 if params == 'interrupt' else 100
     # self.interrupt_page = 80 if params == 'interrupt' else 5
     self.current_page = self.interrupt_page + 1 if params == 'interrupt' else 1  # 70页需要关闭chrome重新开启
     self.params = params
     self.rankRange = rankRange
     self.total_page = 0
     self.langToggleClicked = False
     self.addCookieFlag = True
 def __init__(self):
     super(HSDecksSpider, self).__init__()
     chrome_opt = webdriver.ChromeOptions()
     chrome_opt.add_argument('--disable-gpu')
     chrome_opt.add_argument('--no-sandbox')
     if platform.platform().find('Linux') != -1:
         chrome_opt.add_argument('blink-settings=imagesEnabled=false') # 无图模式
         chrome_opt.add_argument('--headless')  # 无页面模式
     else:
         chrome_opt.add_argument('blink-settings=imagesEnabled=false')  # 无图模式
     self.browser = webdriver.Chrome(chrome_options=chrome_opt)
     dispatcher.connect(self.spider_closed, signals.spider_closed)  # scrapy信号量,spider退出时关闭browser
     self.ifanr = iFanr()
     self.current_page = 1
     self.total_page = 0
     self.langToggleClicked = False
     self.addCookieFlag = True
 def __init__(self, params=None, card_hsid=None, local_update=False):
     super(HSArenaCardsSpider, self).__init__()
     self.local_update = eval(local_update)
     if not self.local_update:
         chrome_opt = webdriver.ChromeOptions()
         chrome_opt.add_argument('--disable-gpu')
         chrome_opt.add_argument('--no-sandbox')
         if platform.platform().find('Linux') != -1:
             chrome_opt.add_argument('blink-settings=imagesEnabled=false')  # 无图模式
             chrome_opt.add_argument('--headless')  # 无页面模式
         self.browser = webdriver.Chrome(chrome_options=chrome_opt)
     dispatcher.connect(self.spider_closed, signals.spider_closed)  # scrapy信号量,spider退出时关闭browser
     self.ifanr = iFanr()
     self.total_count = 0
     self.scraped_count = 0
     self.temp_count = 0
     self.cards_series = {}
     self.extra_data_flag = True if params=='extra_data' else False
     self.single_card = card_hsid
     self.addCookieFlag = True
def update_new_cards_schedule():
    file = 'new_cards.html'
    with open(file, 'r', encoding='UTF-8') as f:
        text = f.read()
        t_selector = Selector(text=text)
        items = t_selector.css('div.card_revealed_item')
        ifanr = iFanr()
        tableID = ifanr.tablesID['new_cards']
        for item in items:
            cover = item.css(
                'div.card_revealed_img img::attr(src)').extract_first('')
            u_time = item.css('div.card_revealed_time::text').extract_first(
                '').strip()
            timestamp = int(
                time.mktime(time.strptime(u_time, "%Y-%m-%d %H:%M")))
            utc_reveal_time = (
                datetime.datetime.strptime(u_time, '%Y-%m-%d %H:%M') -
                datetime.timedelta(hours=8)).isoformat()
            print(cover, time, utc_reveal_time)
            data = {'cover': cover, 'reveal_time': timestamp}
            res = ifanr.add_table_data(tableID=tableID, data=data)
            print(res)
示例#5
0
 def __init__(self, faction=None):
     super(BestdeckSpider, self).__init__()
     chrome_opt = webdriver.ChromeOptions()
     chrome_opt.add_argument('--disable-gpu')
     chrome_opt.add_argument('--no-sandbox')
     if platform.platform().find('Linux') != -1:
         chrome_opt.add_argument(
             'blink-settings=imagesEnabled=false')  # 无图模式
         chrome_opt.add_argument('--headless')  # 无页面模式
     else:
         chrome_opt.add_argument(
             'blink-settings=imagesEnabled=false')  # 无图模式
     self.browser = webdriver.Chrome(chrome_options=chrome_opt)
     dispatcher.connect(
         self.spider_closed,
         signals.spider_closed)  # scrapy信号量,spider退出时关闭browser
     dispatcher.connect(self.engine_stopped, signals.engine_stopped)
     # dispatcher.connect(self.item_scraped, signals.item_scraped)
     self.ifanr = iFanr()
     self.langToggleClicked = False
     self.addCookieFlag = True
     self.faction = eval(faction) if faction else None
                    print('update', re_dict)
            else:
                print('请传入需要筛选卡牌的dbfid')
            pass
    if res.get('meta').get('next'):
        page += 1
        filter_decks(ifanr,
                     query,
                     query_card=query_card,
                     limit=20,
                     page=page,
                     offset=limit * page)
    pass


if __name__ == '__main__':
    ifanr = iFanr()
    dt = '2020-04-10 00:00:00'
    ts = int(time.mktime(time.strptime(dt, "%Y-%m-%d %H:%M:%S")))
    query_card = [56394]
    query = {
        'where':
        json.dumps({
            # 'last_30_days': {'$eq': False},
            # 'updated_at': {'$gt': ts},
            'card_array': {
                '$in': query_card
            }
        }),
    }
    filter_decks(ifanr, query, query_card)
def update_new_cards(card_list):
    ifanr = iFanr()
    tableID = ifanr.tablesID['new_cards']
    file = 'new_cards.json'
    cardClassDict = {
        'Druid': 2,
        'Hunter': 3,
        'Mage': 4,
        'Paladin': 5,
        'Priest': 6,
        'Rogue': 7,
        'Shaman': 8,
        'Warlock': 9,
        'Warrior': 10,
        'Neutral': 12,
        'DemonHunter': 14
    }
    cardTypeDict = {
        'MINION': 4,
        'SPELL': 5,
        'HERO': 3,
        'HERO_POWER': 10,
        'WEAPON': 7
    }
    rarityDict = {'free': 2, 'common': 1, 'rare': 3, 'epic': 4, 'legendary': 5}
    raceDict = {
        'ALL': 26,
        'DRAGON': 24,
        'DEMON': 15,
        'PIRATE': 23,
        'BEAST': 20,
        'TOTEM': 21,
        'MURLOC': 14,
        'ELEMENTAL': 18,
        'MECHANICAL': 17
    }

    def format_data(list):
        for item in list:
            if item['classId'] == 1:
                print('aaa', item)
                break
            cardClass = [
                k for k, v in cardClassDict.items() if v == item['classId']
            ][0] if item.get('classId') else ''
            multiClass = []
            if item.get('multiClassIds'):
                for class_id in item.get('multiClassIds'):
                    multiClass.append([
                        k for k, v in cardClassDict.items() if v == class_id
                    ][0])
            else:
                multiClass = [[
                    k for k, v in cardClassDict.items() if v == item['classId']
                ][0]] if item.get('classId') else []
            type = [
                k for k, v in cardTypeDict.items() if v == item['cardTypeId']
            ][0] if item.get('cardTypeId') else ''
            rarity = [
                k for k, v in rarityDict.items() if v == item['rarityId']
            ][0] if item.get('rarityId') else ''
            race = [
                k for k, v in raceDict.items() if v == item['minionTypeId']
            ][0] if item.get('minionTypeId') else ''
            # try:
            #     race = [k for k,v in raceDict.items() if v==item['minionTypeId']][0] if item.get('minionTypeId') else ''
            # except Exception as e:
            #     print('aaaaaaaaaaaaaa', item, item.get('minionTypeId'))
            set_id = 28
            if item.get('cardSetId') == 1414:
                set_id = 23
            elif item.get('cardSetId') == 2:
                set_id = 1
            elif item.get('cardSetId') == 1463:
                set_id = 24
            data = {
                'name': item.get('name'),
                'dbfId': item.get('id'),
                'cost': item.get('manaCost'),
                'health': item.get('health'),
                'attack': item.get('attack'),
                'text': item.get('text'),
                'img_card_link': item.get('image'),
                'flavor': item.get('flavorText'),
                'entourage': item.get('childIds'),
                'cardClass': cardClass,
                'multiClass': multiClass,
                'collectible': item.get('collectible'),
                'artist': item.get('artistName'),
                'type': type,
                'rarity': rarity,
                'race': race,
                'set_id': set_id,
                'invalid': 0
            }
            query = {
                'where': json.dumps({'dbfId': {
                    '$eq': item['id']
                }}),
            }
            res = ifanr.get_table_data(tableID=tableID, query=query)
            if res:
                if (res.get('meta').get('total_count')):
                    card = res.get('objects')[0] if res.get(
                        'objects') else 'not found card:%s' % item['id']
                    # 首批公布的卡牌,没有发布日期则直接以当天发布的时间作为发布日期
                    # 最后一次性发布的卡,修改他的发布时间,使其显示在最前面
                    # if card['created_at'] > 1605110400 and not card.get('reveal_time'):
                    #     data['reveal_time'] = 1605117600
                    ifanr.put_table_data(tableID=tableID,
                                         id=card['id'],
                                         data=data)
                    print('update', res)
                else:
                    res = ifanr.add_table_data(tableID=tableID, data=data)
                    print('add', res)
            else:
                print('res is none')

    if card_list:
        format_data(card_list)
    else:
        with open(file, 'r', encoding='UTF-8') as f:
            list = json.load(f)
            format_data(list['cards'])