Exemplo n.º 1
0
    def start_requests(self):
        # 本地测试
        # with open(r'./files/data.txt', 'r', encoding='utf8')as f:
        #     for key_word in f.readlines()[100:120]:
        #         keyword = key_word.strip().split("\t")[0]
        #         keyword_len = len(keyword)
        #         pj = Py4Js()
        #         tk = pj.get_tk(keyword)
        #         # url = "https://translate.google.cn/translate_a/single?client=webapp&sl=en&tl=zh-CN&hl=zh-CN&dt=at&" \
        #         #       "dt=bd&dt=ex&dt=ld&dt=md&dt=qca&dt=rw&dt=rm&dt=ss&dt=t&ssel=3&tsel=3&kc=0&tk={tk}&q={keyword}".format(**locals())
        #         url = 'https://translate.google.cn/translate_tts?ie=UTF-8&q={keyword}&tl=en&total=1&idx=0' \
        #               '&textlen={keyword_len}&tk={tk}&client=webapp&prev=input'.format(**locals())
        #         yield scrapy.Request(url=url, callback=self.parse, dont_filter=True, meta={"keyword": keyword})

        ssdb_con = SSDBCon().connection()
        for i in range(200000):
            item = ssdb_con.lpop("google_video_word_urls")
            keyword = item.decode("utf8")
            keyword_len = len(keyword)
            pj = Py4Js()
            tk = pj.get_tk(keyword)
            #     url = "https://translate.google.cn/translate_a/single?client=webapp&sl=en&tl=zh-CN&hl=zh-CN&dt=at&" \
            #           "dt=bd&dt=ex&dt=ld&dt=md&dt=qca&dt=rw&dt=rm&dt=ss&dt=t&ssel=3&tsel=3&kc=0" \
            #           "&tk={tk}&q={keyword}".format(**locals())
            #     yield scrapy.Request(url=url, callback=self.parse, dont_filter=True)
            url = 'https://translate.google.cn/translate_tts?ie=UTF-8&q={keyword}&tl=en&total=1&idx=0&textlen' \
                  '={keyword_len}&tk={tk}&client=webapp&prev=input'.format(**locals())
            yield scrapy.Request(url=url,
                                 callback=self.parse,
                                 dont_filter=True,
                                 meta={"keyword": keyword})
        ssdb_con.close()
Exemplo n.º 2
0
 def start_requests(self):
     ssdb_con = SSDBCon().connection()
     for i in range(200000):
         item = ssdb_con.lpop("iciba_word_urls")
         keyword = item.decode("utf8")
         url = "http://www.iciba.com/word?w={}".format(keyword)
         yield scrapy.Request(url=url,
                              callback=self.parse,
                              dont_filter=True,
                              meta={"keyword": keyword})
Exemplo n.º 3
0
    def start_requests(self):
        # keyword = "words"
        # url = "https://www.dictionary.com/browse/{}".format(keyword)
        # yield scrapy.Request(url=url, callback=self.parse, dont_filter=True, meta={"keyword": keyword})

        ssdb_con = SSDBCon().connection()
        for i in range(200000):
            item = ssdb_con.lpop("dictionary_word_urls")
            keyword = item.decode("utf8")
            url = "https://www.dictionary.com/browse/{}".format(keyword)
            yield scrapy.Request(url=url, callback=self.parse, dont_filter=True, meta={"keyword": keyword})
 def start_requests(self):
     ssdb_con = SSDBCon().connection()
     for i in range(200000):
         item = ssdb_con.lpop("oxfordlearners_word_urls")
         keyword = item.decode("utf8")
         url = "https://www.oxfordlearnersdictionaries.com/search/english/direct/?q={keyword}".format(
             keyword=keyword)
         yield scrapy.Request(url=url,
                              callback=self.parse,
                              dont_filter=True,
                              meta={"keyword": keyword})
Exemplo n.º 5
0
 def start_requests(self):
     ssdb_con = SSDBCon().connection()
     for i in range(200000):
         item = ssdb_con.lpop("collins_word_urls")
         keyword = item.decode("utf8")
         url = "https://www.collinsdictionary.com/dictionary/english/{}".format(
             keyword)
         yield scrapy.Request(url=url,
                              callback=self.parse,
                              dont_filter=True,
                              meta={"keyword": keyword})
Exemplo n.º 6
0
    def start_requests(self):
        url = 'http://fanyi.baidu.com/translate/'
        # keyword = "Appenans"
        # yield scrapy.Request(url=url, meta={"query": keyword}, callback=self.parse, dont_filter=True)

        ssdb_con = SSDBCon().connection()
        for i in range(200000):
            item = ssdb_con.lpop("baidu_video_word_urls")
            keyword = item.decode("utf8")
            yield scrapy.Request(url=url,
                                 meta={"query": keyword},
                                 callback=self.parse,
                                 dont_filter=True)