def search(query, file, num):
    parent_dir = 'RawHTML/' + file
    dir = str(num)
    path = os.path.join(parent_dir, dir)
    os.mkdir(path)
    se = ws.SearchEngine()
    se.search(query)
    soup = ws.make_soup(se.html)
    results = ws.parse_serp(soup)
    se.save_serp(save_dir=path)
    results = [dict(item, question_number=num) for item in results]
    return results
Exemplo n.º 2
0
""" Test parse
"""

import argparse
import pandas as pd
import WebSearcher as ws

parser = argparse.ArgumentParser()
parser.add_argument("-f", "--filepath", help="The SERP html file")
args = parser.parse_args()

if not args.filepath:
    print('Must include -f arg')
else:
    soup = ws.load_soup(args.filepath)
    parsed = ws.parse_serp(soup)
    results = pd.DataFrame(parsed)

    cmpts = ws.extract_components(soup)