Exemplo n.º 1
0
def load_match_words(match_word_file):
    match_words = []
    for line in open(match_word_file):
        if line.strip()[0:1] != "#":
            for word in line.split():  # in case more than one per line
                match_words.append(lower(word))
    return match_words
Exemplo n.º 2
0
def pdf_miner_word(pdf, path):  #得到文档abstract中的内容
    try:
        # 用文件对象来创建一个pdf文档分析器
        praser = PDFParser(open(path, 'rb'))
        # 创建一个PDF文档
        doc = PDFDocument()
        # 连接分析器 与文档对象
        praser.set_document(doc)
        doc.set_parser(praser)

        # 提供初始化密码
        # 如果没有密码 就创建一个空的字符串
        doc.initialize()

        # 检测文档是否提供txt转换,不提供就忽略
        if not doc.is_extractable:
            raise PDFTextExtractionNotAllowed
        else:
            # 创建PDf 资源管理器 来管理共享资源
            rsrcmgr = PDFResourceManager()
            # 创建一个PDF设备对象
            laparams = LAParams()
            device = PDFPageAggregator(rsrcmgr, laparams=laparams)
            # 创建一个PDF解释器对象
            interpreter = PDFPageInterpreter(rsrcmgr, device)

            # 循环遍历列表,每次处理一个page的内容
            for page in doc.get_pages():
                interpreter.process_page(page)
                # 接受该页面的LTPage对象
                layout = device.get_result()
                # 这里layout是一个LTPage对象,里面存放着这个 page 解析出的各种对象
                # 包括 LTTextBox, LTFigure, LTImage, LTTextBoxHorizontal 等
                list = []
                for x in layout:
                    if isinstance(x, LTTextBox):
                        list.append(lower(x.get_text().strip()))
                strinfo = re.compile(' ')
                for i in range(len(list)):
                    if (strinfo.sub('', list[i]) == 'abstract'):
                        if (path[-6:-4] == 'h6'):
                            return list[i + 3]
                        elif (path[-6:-4] == 'h8'):
                            return list[i + 4]
                        else:
                            return list[i + 1]
                    elif (list[i][0:8] == 'abstract'):
                        return list[i][9:]
                    elif (list[i] == '1 introduction'):
                        return list[i + 1]
                    elif (list[i] == 'summary'):
                        return list[i + 1]
    except PDFSyntaxError:
        dict = {"title": pdf['title'], "problem": "fail to open pdf"}
        list5.append(dict)
Exemplo n.º 3
0
def word_lookup(chosen_letters: str):
    """Finds dictionary words made up of the characters input. Returns matching words

    Input argument:
        characters is the 9 randomly selected characters in the game
    """
    # gets the words from the dictionary file in a list format
    all_words = dictionary_reader('words.txt')
    matching_words = list()
    all_letters = list()
    for word in all_words:
        for c in range(len(word)):
            all_letters = [1] * 26
            for chars in chosen_letters:
                all_letters[ord(chars) - 97] += 1
            valid = True
            for letters in word:
                all_letters[ord(lower(letters)) - 97] -= 1
                if all_letters[ord(lower(letters)) - 97] == 0:
                    valid = False
                    break
            if valid:
                matching_words.append(word)
    return list(set(matching_words))
Exemplo n.º 4
0
def StartSearch():
    try:
        search = input("Search for:")
        searched = lower((str(re.sub(" ", "", search))))
        dir_name = search.replace(" ", "_").lower()

        # Parse az-lyrics
        url = "https://www.azlyrics.com/lyrics/phoebebridgers/"
        url_edit = url + searched + ".html"
        r = requests.get(url_edit)

        soup = BeautifulSoup(r.text, "html.parser")
        text = soup.get_text()
        g_text = (text.split("Phoebe Bridgers Lyrics",
                             1)[1]).split("Submit Corrections", 1)[0]
        edit_text = (g_text.split('"' + search.title() + '"', 1)[1]).strip()

        f = open("./lyrics/" + search + ".txt", "w+")
        f.write(edit_text)
    except:
        print("That's not a Phoebe song")
        run = False
Exemplo n.º 5
0
def get_weight(string):
    weight = 0
    letterValue = {
        'a': 1,
        'b': 2,
        'c': 3,
        'ç': 4,
        'd': 5,
        'e': 6,
        'f': 7,
        'g': 8,
        'ğ': 9,
        'h': 10,
        'ı': 11,
        'i': 12,
        'j': 13,
        'k': 14,
        'l': 15,
        'm': 16,
        'n': 17,
        'o': 18,
        'ö': 19,
        'p': 20,
        'r': 21,
        's': 22,
        'ş': 23,
        't': 24,
        'u': 25,
        'ü': 26,
        'v': 27,
        'y': 28,
        'z': 29
    }
    lowerString = lower(string)
    for i in lowerString:
        if i in letterValue:
            weight += letterValue[i]
    return weight
Exemplo n.º 6
0
    def parse(self, response):
        a_selectors = response.xpath("//h3//a")
        links = []
        for selector in a_selectors:
            link = selector.xpath("@href").extract_first()
            links.append(link)

        print(links)

        fields = [
            'mileage', 'frontbrake', 'fuelcapacity', 'rearbrake', 'enginetype',
            'displacement', 'bodytype', 'abs', 'headlamp', 'wheeltype',
            'enginetype', 'displacement', 'maximumpower', 'maximumtorque',
            'coolingsystem', 'gearbox', 'clutch', 'noofcylinders', 'drivetype',
            'supplysystem', 'transmissiontype', 'tyresize', 'tyretype',
            'wheelsize', 'frontbrake', 'rearbrake', 'abs', 'ebs',
            'tractioncontrol', 'cruisecontrol', 'navigation', 'quickshifter',
            'launchcontrol', 'powermodes', 'adjustablewindscreen',
            'mobileconnectivity', 'frontsuspension', 'rearsuspension',
            'kerbweight', 'wheelbase', 'fuelcapacity', 'headlamp', 'taillamp',
            'turnsignallamp', 'passswitch'
        ]
        items = MyprojectItem()
        data = response.css('.right').css('::text').extract()
        spec = response.css('td:nth-child(1)').css('::text').extract()
        specs = []
        for i in spec:
            j = i.replace(" ", "")
            j = j.replace(".", "")
            k = lower(j)
            specs.append(k)
        # j=0
        # for i in data:
        #     items[spec[j]]=i
        #     j+=1
        #
        # print(items)
        # items['data'] = data
        # items['specs'] = specs

        final = []
        for l in data:
            # new=dictionary.get(i)
            if (l == None or l == "-"):
                l = "N/A"
            final.append(l)
        dictionary = dict(zip(specs, final))
        yield dictionary

        all_bikes = [
            '/hero/splendor/specifications',
            '/hero/super-splendor/specifications',
            '/hero/hf-deluxe/specifications',
            '/hero/xpulse-200/specifications',
            '/hero/passion-pro/specifications',
            '/hero/xtreme-200-s/specifications',
            '/hero/glamour-2017/specifications',
            '/hero/xpulse-200t/specifications',
            '/hero/splendor-pro/specifications',
            '/hero/pleasure/specifications',
            '/hero/passion-pro-110/specifications',
            '/hero/maestro-edge/specifications',
            '/hero/passion-xpro/specifications',
            '/hero/destini-125/specifications',
            '/hero/karizma-zmr/specifications', '/hero/duet/specifications',
            '/hero/xtreme-200s/specifications',
            '/hero/xtreme-sports/specifications',
            '/hero/splendor-ismart-110/specifications',
            '/hero/achiever/specifications', '/hero/hf-dawn/specifications',
            '/royal-enfield/classic-350/specifications',
            '/royal-enfield/bullet-350/specifications',
            '/royal-enfield/interceptor-650/specifications',
            '/royal-enfield/himalayan/specifications',
            '/royal-enfield/classic-500/specifications',
            '/royal-enfield/thunderbird-350x/specifications',
            '/royal-enfield/bullet-500/specifications',
            '/royal-enfield/thunderbird-350/specifications',
            '/royal-enfield/continental-gt-650/specifications',
            '/royal-enfield/thunderbird-500x/specifications',
            '/royal-enfield/thunderbird-500/specifications',
            '/honda/activa/specifications', '/honda/shine/specifications',
            '/honda/dio/specifications',
            '/honda/cb-hornet-160-r/specifications',
            '/honda/activa-125/specifications',
            '/honda/unicorn/specifications', '/honda/shine-sp/specifications',
            '/honda/activa-i/specifications', '/honda/livo/specifications',
            '/honda/cbr-250-r/specifications', '/honda/grazia/specifications',
            '/honda/unicorn-160/specifications',
            '/honda/xblade/specifications',
            '/honda/cd-110-dream/specifications',
            '/honda/dream-yuga/specifications', '/honda/navi/specifications',
            '/honda/cb300r/specifications', '/honda/aviator/specifications',
            '/honda/cbr650r/specifications', '/honda/gold-wing/specifications',
            '/honda/cbr-1000-rr/specifications', '/honda/cliq/specifications',
            '/honda/dream-neo/specifications',
            '/honda/crf-1000l-africa-twin/specifications',
            '/honda/cb1000r-plus/specifications',
            '/tvs/apache-160/specifications', '/tvs/apache/specifications',
            '/tvs/apache-rtr-200-4v/specifications',
            '/tvs/apache-rtr-180/specifications',
            '/tvs/akula-310/specifications', '/tvs/jupiter/specifications',
            '/tvs/ntorq-125/specifications', '/tvs/scooty/specifications',
            '/tvs/jupiter-grande/specifications', '/tvs/sport/specifications',
            '/tvs/xl-100/specifications', '/tvs/radeon/specifications',
            '/tvs/star-city-plus/specifications',
            '/tvs/scooty-zest/specifications', '/tvs/victor/specifications',
            '/tvs/wego/specifications',
            '/bajaj/bajaj-pulsar-200-ns/specifications',
            '/bajaj/pulsar-150/specifications',
            '/bajaj/pulsar-220/specifications',
            '/bajaj/pulsar-rs-200/specifications',
            '/bajaj/pulsar-180/specifications',
            '/bajaj/pulsar-150-ns/specifications',
            '/bajaj/pulsar-180f/specifications',
            '/bajaj/ct-100/specifications',
            '/bajaj/dominar-400/specifications',
            '/bajaj/avenger/specifications', '/bajaj/platina/specifications',
            '/bajaj/v/specifications',
            '/bajaj/avenger-cruise-220/specifications',
            '/bajaj/discover-125/specifications',
            '/bajaj/avenger-160/specifications',
            '/bajaj/discover-110/specifications',
            '/bajaj/avenger-street-180/specifications',
            '/bajaj/v12/specifications', '/yamaha/yzf-r15-v3/specifications',
            '/yamaha/mt-15/specifications', '/yamaha/fz-s/specifications',
            '/yamaha/fz-fi-version-3/specifications',
            '/yamaha/fz-250/specifications', '/yamaha/fz-fi/specifications',
            '/yamaha/fascino/specifications',
            '/yamaha/fz-s-fi-version-3/specifications',
            '/yamaha/sz-rr/specifications', '/yamaha/mt-09/specifications',
            '/yamaha/yzf-r3/specifications', '/yamaha/ray-zr/specifications',
            '/yamaha/fazer/specifications', '/yamaha/yzf-r15s/specifications',
            '/yamaha/fazer-250/specifications',
            '/yamaha/saluto/specifications', '/yamaha/ray/specifications',
            '/yamaha/saluto-rx/specifications',
            '/yamaha/yzf-r15-v3-moto-gp-edition/specifications',
            '/yamaha/yzf-r1/specifications', '/yamaha/alpha/specifications',
            '/suzuki/access-125/specifications',
            '/suzuki/hayabusa/specifications',
            '/suzuki/intruder-150/specifications',
            '/suzuki/gixxer/specifications',
            '/suzuki/burgman-street/specifications',
            '/suzuki/gixxer-sf/specifications',
            '/suzuki/gsx-s750/specifications',
            '/suzuki/v-strom-650/specifications',
            '/suzuki/dr-z50/specifications',
            '/suzuki/gsx-s1000/specifications',
            '/suzuki/gsx-r1000r/specifications',
            '/suzuki/v-storm/specifications', '/suzuki/rm-z250/specifications',
            '/suzuki/rm-z450/specifications'
        ]

        for i in all_bikes:
            next_url = 'https://www.bikedekho.com' + i
            yield response.follow(next_url, callback=self.parse)
Exemplo n.º 7
0
    characters_array = [0] * 26
    for char in characters:
        characters_array[ord(char) - 97] += 1
    print("please wait while the computer processes your input...")
    computer_answers = word_lookup(characters)
    longest_computer_answers = []
    longest_computer_answer_length = 0
    user_points = int()
    start_time = int(time.time())
    user_answer = input(
        "You have 30 seconds to guess a word made up of only these letters: " +
        characters + "  >  ")
    time_to_answer = int(time.time()) - start_time
    user_word_correct_letters = True
    for char in user_answer:
        characters_array[ord(lower(char)) - 97] -= 1
        if characters_array[ord(lower(char)) - 97] == -1:
            user_word_correct_letters = False
            print(
                "You overused the letter '" + char +
                "' in your answer. You may only use the 9 letters shown above, "
                "which are randomly selected")
    if time_to_answer > 30:
        print("You exceeded the 30 second countdown. Zero points awarded.")
    print("You took " + str(time_to_answer) + " seconds to answer")

    for computer_answer in computer_answers:
        if lower(computer_answer) == lower(
                user_answer) and user_word_correct_letters:
            if time_to_answer <= 30:
                user_points = len(user_answer)
def get_product_page_class(store_name):
    store_name = lower(store_name)
    class_name = 'product_page_{}'.format(store_name)
    return import_class(store_name, class_name)