def start_requests(self): # 本地测试 # with open(r'./files/data.txt', 'r', encoding='utf8')as f: # for key_word in f.readlines()[100:120]: # keyword = key_word.strip().split("\t")[0] # keyword_len = len(keyword) # pj = Py4Js() # tk = pj.get_tk(keyword) # # url = "https://translate.google.cn/translate_a/single?client=webapp&sl=en&tl=zh-CN&hl=zh-CN&dt=at&" \ # # "dt=bd&dt=ex&dt=ld&dt=md&dt=qca&dt=rw&dt=rm&dt=ss&dt=t&ssel=3&tsel=3&kc=0&tk={tk}&q={keyword}".format(**locals()) # url = 'https://translate.google.cn/translate_tts?ie=UTF-8&q={keyword}&tl=en&total=1&idx=0' \ # '&textlen={keyword_len}&tk={tk}&client=webapp&prev=input'.format(**locals()) # yield scrapy.Request(url=url, callback=self.parse, dont_filter=True, meta={"keyword": keyword}) ssdb_con = SSDBCon().connection() for i in range(200000): item = ssdb_con.lpop("google_video_word_urls") keyword = item.decode("utf8") keyword_len = len(keyword) pj = Py4Js() tk = pj.get_tk(keyword) # url = "https://translate.google.cn/translate_a/single?client=webapp&sl=en&tl=zh-CN&hl=zh-CN&dt=at&" \ # "dt=bd&dt=ex&dt=ld&dt=md&dt=qca&dt=rw&dt=rm&dt=ss&dt=t&ssel=3&tsel=3&kc=0" \ # "&tk={tk}&q={keyword}".format(**locals()) # yield scrapy.Request(url=url, callback=self.parse, dont_filter=True) url = 'https://translate.google.cn/translate_tts?ie=UTF-8&q={keyword}&tl=en&total=1&idx=0&textlen' \ '={keyword_len}&tk={tk}&client=webapp&prev=input'.format(**locals()) yield scrapy.Request(url=url, callback=self.parse, dont_filter=True, meta={"keyword": keyword}) ssdb_con.close()
def start_requests(self): ssdb_con = SSDBCon().connection() for i in range(200000): item = ssdb_con.lpop("iciba_word_urls") keyword = item.decode("utf8") url = "http://www.iciba.com/word?w={}".format(keyword) yield scrapy.Request(url=url, callback=self.parse, dont_filter=True, meta={"keyword": keyword})
def start_requests(self): # keyword = "words" # url = "https://www.dictionary.com/browse/{}".format(keyword) # yield scrapy.Request(url=url, callback=self.parse, dont_filter=True, meta={"keyword": keyword}) ssdb_con = SSDBCon().connection() for i in range(200000): item = ssdb_con.lpop("dictionary_word_urls") keyword = item.decode("utf8") url = "https://www.dictionary.com/browse/{}".format(keyword) yield scrapy.Request(url=url, callback=self.parse, dont_filter=True, meta={"keyword": keyword})
def start_requests(self): ssdb_con = SSDBCon().connection() for i in range(200000): item = ssdb_con.lpop("oxfordlearners_word_urls") keyword = item.decode("utf8") url = "https://www.oxfordlearnersdictionaries.com/search/english/direct/?q={keyword}".format( keyword=keyword) yield scrapy.Request(url=url, callback=self.parse, dont_filter=True, meta={"keyword": keyword})
def start_requests(self): ssdb_con = SSDBCon().connection() for i in range(200000): item = ssdb_con.lpop("collins_word_urls") keyword = item.decode("utf8") url = "https://www.collinsdictionary.com/dictionary/english/{}".format( keyword) yield scrapy.Request(url=url, callback=self.parse, dont_filter=True, meta={"keyword": keyword})
def start_requests(self): url = 'http://fanyi.baidu.com/translate/' # keyword = "Appenans" # yield scrapy.Request(url=url, meta={"query": keyword}, callback=self.parse, dont_filter=True) ssdb_con = SSDBCon().connection() for i in range(200000): item = ssdb_con.lpop("baidu_video_word_urls") keyword = item.decode("utf8") yield scrapy.Request(url=url, meta={"query": keyword}, callback=self.parse, dont_filter=True)