def load_match_words(match_word_file): match_words = [] for line in open(match_word_file): if line.strip()[0:1] != "#": for word in line.split(): # in case more than one per line match_words.append(lower(word)) return match_words
def pdf_miner_word(pdf, path): #得到文档abstract中的内容 try: # 用文件对象来创建一个pdf文档分析器 praser = PDFParser(open(path, 'rb')) # 创建一个PDF文档 doc = PDFDocument() # 连接分析器 与文档对象 praser.set_document(doc) doc.set_parser(praser) # 提供初始化密码 # 如果没有密码 就创建一个空的字符串 doc.initialize() # 检测文档是否提供txt转换,不提供就忽略 if not doc.is_extractable: raise PDFTextExtractionNotAllowed else: # 创建PDf 资源管理器 来管理共享资源 rsrcmgr = PDFResourceManager() # 创建一个PDF设备对象 laparams = LAParams() device = PDFPageAggregator(rsrcmgr, laparams=laparams) # 创建一个PDF解释器对象 interpreter = PDFPageInterpreter(rsrcmgr, device) # 循环遍历列表,每次处理一个page的内容 for page in doc.get_pages(): interpreter.process_page(page) # 接受该页面的LTPage对象 layout = device.get_result() # 这里layout是一个LTPage对象,里面存放着这个 page 解析出的各种对象 # 包括 LTTextBox, LTFigure, LTImage, LTTextBoxHorizontal 等 list = [] for x in layout: if isinstance(x, LTTextBox): list.append(lower(x.get_text().strip())) strinfo = re.compile(' ') for i in range(len(list)): if (strinfo.sub('', list[i]) == 'abstract'): if (path[-6:-4] == 'h6'): return list[i + 3] elif (path[-6:-4] == 'h8'): return list[i + 4] else: return list[i + 1] elif (list[i][0:8] == 'abstract'): return list[i][9:] elif (list[i] == '1 introduction'): return list[i + 1] elif (list[i] == 'summary'): return list[i + 1] except PDFSyntaxError: dict = {"title": pdf['title'], "problem": "fail to open pdf"} list5.append(dict)
def word_lookup(chosen_letters: str): """Finds dictionary words made up of the characters input. Returns matching words Input argument: characters is the 9 randomly selected characters in the game """ # gets the words from the dictionary file in a list format all_words = dictionary_reader('words.txt') matching_words = list() all_letters = list() for word in all_words: for c in range(len(word)): all_letters = [1] * 26 for chars in chosen_letters: all_letters[ord(chars) - 97] += 1 valid = True for letters in word: all_letters[ord(lower(letters)) - 97] -= 1 if all_letters[ord(lower(letters)) - 97] == 0: valid = False break if valid: matching_words.append(word) return list(set(matching_words))
def StartSearch(): try: search = input("Search for:") searched = lower((str(re.sub(" ", "", search)))) dir_name = search.replace(" ", "_").lower() # Parse az-lyrics url = "https://www.azlyrics.com/lyrics/phoebebridgers/" url_edit = url + searched + ".html" r = requests.get(url_edit) soup = BeautifulSoup(r.text, "html.parser") text = soup.get_text() g_text = (text.split("Phoebe Bridgers Lyrics", 1)[1]).split("Submit Corrections", 1)[0] edit_text = (g_text.split('"' + search.title() + '"', 1)[1]).strip() f = open("./lyrics/" + search + ".txt", "w+") f.write(edit_text) except: print("That's not a Phoebe song") run = False
def get_weight(string): weight = 0 letterValue = { 'a': 1, 'b': 2, 'c': 3, 'ç': 4, 'd': 5, 'e': 6, 'f': 7, 'g': 8, 'ğ': 9, 'h': 10, 'ı': 11, 'i': 12, 'j': 13, 'k': 14, 'l': 15, 'm': 16, 'n': 17, 'o': 18, 'ö': 19, 'p': 20, 'r': 21, 's': 22, 'ş': 23, 't': 24, 'u': 25, 'ü': 26, 'v': 27, 'y': 28, 'z': 29 } lowerString = lower(string) for i in lowerString: if i in letterValue: weight += letterValue[i] return weight
def parse(self, response): a_selectors = response.xpath("//h3//a") links = [] for selector in a_selectors: link = selector.xpath("@href").extract_first() links.append(link) print(links) fields = [ 'mileage', 'frontbrake', 'fuelcapacity', 'rearbrake', 'enginetype', 'displacement', 'bodytype', 'abs', 'headlamp', 'wheeltype', 'enginetype', 'displacement', 'maximumpower', 'maximumtorque', 'coolingsystem', 'gearbox', 'clutch', 'noofcylinders', 'drivetype', 'supplysystem', 'transmissiontype', 'tyresize', 'tyretype', 'wheelsize', 'frontbrake', 'rearbrake', 'abs', 'ebs', 'tractioncontrol', 'cruisecontrol', 'navigation', 'quickshifter', 'launchcontrol', 'powermodes', 'adjustablewindscreen', 'mobileconnectivity', 'frontsuspension', 'rearsuspension', 'kerbweight', 'wheelbase', 'fuelcapacity', 'headlamp', 'taillamp', 'turnsignallamp', 'passswitch' ] items = MyprojectItem() data = response.css('.right').css('::text').extract() spec = response.css('td:nth-child(1)').css('::text').extract() specs = [] for i in spec: j = i.replace(" ", "") j = j.replace(".", "") k = lower(j) specs.append(k) # j=0 # for i in data: # items[spec[j]]=i # j+=1 # # print(items) # items['data'] = data # items['specs'] = specs final = [] for l in data: # new=dictionary.get(i) if (l == None or l == "-"): l = "N/A" final.append(l) dictionary = dict(zip(specs, final)) yield dictionary all_bikes = [ '/hero/splendor/specifications', '/hero/super-splendor/specifications', '/hero/hf-deluxe/specifications', '/hero/xpulse-200/specifications', '/hero/passion-pro/specifications', '/hero/xtreme-200-s/specifications', '/hero/glamour-2017/specifications', '/hero/xpulse-200t/specifications', '/hero/splendor-pro/specifications', '/hero/pleasure/specifications', '/hero/passion-pro-110/specifications', '/hero/maestro-edge/specifications', '/hero/passion-xpro/specifications', '/hero/destini-125/specifications', '/hero/karizma-zmr/specifications', '/hero/duet/specifications', '/hero/xtreme-200s/specifications', '/hero/xtreme-sports/specifications', '/hero/splendor-ismart-110/specifications', '/hero/achiever/specifications', '/hero/hf-dawn/specifications', '/royal-enfield/classic-350/specifications', '/royal-enfield/bullet-350/specifications', '/royal-enfield/interceptor-650/specifications', '/royal-enfield/himalayan/specifications', '/royal-enfield/classic-500/specifications', '/royal-enfield/thunderbird-350x/specifications', '/royal-enfield/bullet-500/specifications', '/royal-enfield/thunderbird-350/specifications', '/royal-enfield/continental-gt-650/specifications', '/royal-enfield/thunderbird-500x/specifications', '/royal-enfield/thunderbird-500/specifications', '/honda/activa/specifications', '/honda/shine/specifications', '/honda/dio/specifications', '/honda/cb-hornet-160-r/specifications', '/honda/activa-125/specifications', '/honda/unicorn/specifications', '/honda/shine-sp/specifications', '/honda/activa-i/specifications', '/honda/livo/specifications', '/honda/cbr-250-r/specifications', '/honda/grazia/specifications', '/honda/unicorn-160/specifications', '/honda/xblade/specifications', '/honda/cd-110-dream/specifications', '/honda/dream-yuga/specifications', '/honda/navi/specifications', '/honda/cb300r/specifications', '/honda/aviator/specifications', '/honda/cbr650r/specifications', '/honda/gold-wing/specifications', '/honda/cbr-1000-rr/specifications', '/honda/cliq/specifications', '/honda/dream-neo/specifications', '/honda/crf-1000l-africa-twin/specifications', '/honda/cb1000r-plus/specifications', '/tvs/apache-160/specifications', '/tvs/apache/specifications', '/tvs/apache-rtr-200-4v/specifications', '/tvs/apache-rtr-180/specifications', '/tvs/akula-310/specifications', '/tvs/jupiter/specifications', '/tvs/ntorq-125/specifications', '/tvs/scooty/specifications', '/tvs/jupiter-grande/specifications', '/tvs/sport/specifications', '/tvs/xl-100/specifications', '/tvs/radeon/specifications', '/tvs/star-city-plus/specifications', '/tvs/scooty-zest/specifications', '/tvs/victor/specifications', '/tvs/wego/specifications', '/bajaj/bajaj-pulsar-200-ns/specifications', '/bajaj/pulsar-150/specifications', '/bajaj/pulsar-220/specifications', '/bajaj/pulsar-rs-200/specifications', '/bajaj/pulsar-180/specifications', '/bajaj/pulsar-150-ns/specifications', '/bajaj/pulsar-180f/specifications', '/bajaj/ct-100/specifications', '/bajaj/dominar-400/specifications', '/bajaj/avenger/specifications', '/bajaj/platina/specifications', '/bajaj/v/specifications', '/bajaj/avenger-cruise-220/specifications', '/bajaj/discover-125/specifications', '/bajaj/avenger-160/specifications', '/bajaj/discover-110/specifications', '/bajaj/avenger-street-180/specifications', '/bajaj/v12/specifications', '/yamaha/yzf-r15-v3/specifications', '/yamaha/mt-15/specifications', '/yamaha/fz-s/specifications', '/yamaha/fz-fi-version-3/specifications', '/yamaha/fz-250/specifications', '/yamaha/fz-fi/specifications', '/yamaha/fascino/specifications', '/yamaha/fz-s-fi-version-3/specifications', '/yamaha/sz-rr/specifications', '/yamaha/mt-09/specifications', '/yamaha/yzf-r3/specifications', '/yamaha/ray-zr/specifications', '/yamaha/fazer/specifications', '/yamaha/yzf-r15s/specifications', '/yamaha/fazer-250/specifications', '/yamaha/saluto/specifications', '/yamaha/ray/specifications', '/yamaha/saluto-rx/specifications', '/yamaha/yzf-r15-v3-moto-gp-edition/specifications', '/yamaha/yzf-r1/specifications', '/yamaha/alpha/specifications', '/suzuki/access-125/specifications', '/suzuki/hayabusa/specifications', '/suzuki/intruder-150/specifications', '/suzuki/gixxer/specifications', '/suzuki/burgman-street/specifications', '/suzuki/gixxer-sf/specifications', '/suzuki/gsx-s750/specifications', '/suzuki/v-strom-650/specifications', '/suzuki/dr-z50/specifications', '/suzuki/gsx-s1000/specifications', '/suzuki/gsx-r1000r/specifications', '/suzuki/v-storm/specifications', '/suzuki/rm-z250/specifications', '/suzuki/rm-z450/specifications' ] for i in all_bikes: next_url = 'https://www.bikedekho.com' + i yield response.follow(next_url, callback=self.parse)
characters_array = [0] * 26 for char in characters: characters_array[ord(char) - 97] += 1 print("please wait while the computer processes your input...") computer_answers = word_lookup(characters) longest_computer_answers = [] longest_computer_answer_length = 0 user_points = int() start_time = int(time.time()) user_answer = input( "You have 30 seconds to guess a word made up of only these letters: " + characters + " > ") time_to_answer = int(time.time()) - start_time user_word_correct_letters = True for char in user_answer: characters_array[ord(lower(char)) - 97] -= 1 if characters_array[ord(lower(char)) - 97] == -1: user_word_correct_letters = False print( "You overused the letter '" + char + "' in your answer. You may only use the 9 letters shown above, " "which are randomly selected") if time_to_answer > 30: print("You exceeded the 30 second countdown. Zero points awarded.") print("You took " + str(time_to_answer) + " seconds to answer") for computer_answer in computer_answers: if lower(computer_answer) == lower( user_answer) and user_word_correct_letters: if time_to_answer <= 30: user_points = len(user_answer)
def get_product_page_class(store_name): store_name = lower(store_name) class_name = 'product_page_{}'.format(store_name) return import_class(store_name, class_name)