Пример #1
0
def test_parser_find_all_divs_with_specific_attrs():
    # 21 elements represents the real results + the one row of localized results
    h = ParseHTML(SALARIES)
    attrs = {'class': 'dataRow first'}
    tag = 'tr'
    res = [ele for ele in h.find_all(tag, attrs)]
    assert(len(res) == 21)
Пример #2
0
def parse_salaries_data(data, tag="tbody", class_type="dataRow"):
    data = ParseHTML(data)
    for row in data.find_all(tag):
        tr = row.tr
        if not tr.has_key("class"):
            continue
        if tr["class"].find(class_type) == -1:
            continue
        yield tr
Пример #3
0
def parse_jobs_data(data, tag="div", class_type="jobListing"):
    data = ParseHTML(data)
    for row in data.find_all(tag):
        if not row.has_key("class"):
            continue
        this_class = row["class"].split(" ")
        if not class_type in this_class:
            continue
        yield row
Пример #4
0
def test_parser_find_all_elements():
    h = ParseHTML(SALARIES)
    res = [ele for ele in h.find_all('div')]
    assert(len(res) == 260)
Пример #5
0
def test_parser_find_all_elements():
    h = ParseHTML(SALARIES)
    res = [ele for ele in h.find_all(True)]
    assert(len(res) == 1054)