Exemplo n.º 1
0
def main():
    pagedict = {
        "title": "",
        'screen_size': "",
        'camera_res': "",
        'screen_resolution': "",
        'battery_capacity': "",
        'ram': "",
        'internal_memory': "",
        'processor_speed': "",
        'weight': "",
        "body": ""
    }

    n = 0
    p = Path('files_new/')
    for folder in p.iterdir():
        print(f"Converting folder: {folder.name}...")
        if folder.is_dir():
            for file in folder.iterdir():
                if file.is_file() and file.name.endswith(".html"):
                    soup = getsoup(file)
                    specs = specsection(soup)

                    for spec_name, rx in regexes.items():
                        pagedict[spec_name] = getspec(specs, rx)
                    pagedict["title"] = getsouptitle(soup)
                    pagedict["body"] = getbodytext(soup)

                    qtd_specs = [*filter(lambda val: val, pagedict.values())]
                    if len(qtd_specs) < 8:
                        print(f"skiped {file.name}")
                        continue

                    xml = dicttoxml(pagedict,
                                    attr_type=None,
                                    custom_root="page")
                    prettyxml = parseString(xml).toprettyxml()
                    print(f"Writing file: {file.name}")
                    write_to_file(prettyxml, f'{n}.xml')
                    n += 1
        print(f"Done with {folder.name}!")
    print("Finished")
Exemplo n.º 2
0
def main():
    soup = getsoup(PATH)
    cleansoup = preprocess(soup)
    specs = getspecs(cleansoup)
    print(specs)
Exemplo n.º 3
0
def main():
    soup = getsoup(SPEC_PAGE)
    cleansoup = preprocess(soup)
    getspecs(cleansoup)
Exemplo n.º 4
0
def main():
    soup = getsoup(PHONE_SPECS_PAGE)
    cleansoup = preprocess(soup)
    specs = cleansoup.find_all(class_="techspecs-section")
    specsdict = getspecs(specs)
    print(specsdict)
Exemplo n.º 5
0
def main():
    soup = getsoup(SPEC_PAGE)
    cleansoup = preprocess(soup)
    items = cleansoup.find_all(class_="tech_spec_wrap spec_toggle")
    specs = getspecs(items)
    print(specs)