Python listCatch примеры использования

Язык программирования: Python

Пространство имен/Пакет: scrapeBase

Метод/Функция: listCatch

Примеров на hotexamples.com: 9

Python listCatch - 9 примеров найдено. Это лучшие примеры Python кода для scrapeBase.listCatch, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

def jsonify_page(urls, topicId, switch="JSON"):
    outlist = list()
    for url in urls:
        # download the page
        soup = sb.getSoup(url)

        # get the page content
        title = titleFormat(sb.listCatch(getTitle(soup)))
        author = sb.listCatch(getAuthor(soup))
        date = getDate(soup)
        # get the image urls
        imageUrls = getImURLS(soup)
        # body comes in list of paragraphs
        body = grabPageText(soup)
        body = getBodyAsString(body)
        if (len(body) == 0):
            body = "/empty"
        # now convert to json dict
        bornAgain = {'title': title, 'author': author,
        'date': date, 'body': body,
        'images': imageUrls, 'url': url,
        'publication': publicationId, 'topic': topicId,
        'posted': False, 'id': 0}
        outlist.append(bornAgain)

    if switch == "JSON":
        return json.dumps(outlist, sort_keys = True, indent = 4)
    else:
        return outlist

Пример #2

Показать файл

def getTitle(soup):
    element = soup.select(".post-meta h1")
    dirtyTitle = sb.listCatch(element)
    if dirtyTitle != "/empty":
        return buildTitle(dirtyTitle.split(" "))
    else:
        return dirtyTitle

Пример #3

Показать файл

def getDate(soup):
    element = soup.select(".post-date")
    # if the date field is empty, return "/empty"
    dirtyDate = sb.listCatch(element)
    if dirtyDate != "/empty":
        return sb.parseDate(dirtyDate)
    else:
        return dirtyDate

Пример #4

Показать файл

def getAuthor(soup):
    body = soup.select(".hentry-content p")
    if len(body) != 0:
        dtitle = [body[-1]]
        title = sb.listCatch(dtitle)
        return title
    else:
        return "/empty"

Пример #5

Показать файл

def getAuthor(soup):
    element = soup.select(".post-author")
    # if the author field is empty, return "/empty"
    return sb.listCatch(element)

Пример #6

Показать файл

def getDate(soup):
    elements = soup.select(".author")
    # catch the list (if empty) then parse the date
    return sb.parseDate(sb.listCatch(elements))

Пример #7

Показать файл

def getTitle(soup):
    dirtyTitle = soup.select(".hentry-meta h1")
    title = sb.listCatch(dirtyTitle)
    return title

Пример #8

Показать файл

def getBody(soup):
    body = soup.select(".hentry-content p")
    outText = ""
    for b in body:
        outText = outText + sb.listCatch([b]) + "\n\n"
    return outText

Пример #9

Показать файл

def getDate(soup):
    dirtyDate = soup.select(".hentry-meta p")
    date = sb.listCatch(dirtyDate)
    # now sanitize the date
    clean = sb.parseDate(date)
    return clean