Пример #1
0
def getEntriesFromUrl(url, delay=10):
    # print("Starting")

    try:
        page = helper.urlopen(url, delay=delay)
    except:
        logging.error("Failed to open url: " + url)
        return []

    # print("Downloaded")

    html = page.read()
    soup = BeautifulSoup(html, "html.parser")
    result = []

    baslik_id = soup.find('h1', id='title').attrs['data-id']
    # baslik_id = soup.find('h1', id='title').attrs['data-slug']

    baslikText = soup.find('h1', id='title').attrs['data-title'].strip()

    for j in soup.find('ul', id='entry-list').find_all('li'):
        # import re
        # i = re.sub('<br\s*?>', '\n', i)
        # i = j.find('article')
        i = j
        # number = int(j.attrs['value'])
        id_ = str(i.attrs['data-id'])

        text = textWithNewlines(i.find('div', class_='content'))
        text = text.strip()

        author = str(i.attrs['data-author'])
        # author = i.find('span', itemprop='name').getText().strip()
        favoriteCount = int(i.attrs['data-favorite-count'])
        # date = i.find('a',class_='entry-date').find('time',class_='creation-time').attrs['datetime']
        dateText = i.find('a',class_='entry-date').text

        try:
            timestamp = helper.datetimeToTimestamp(getDatetimesFromEntryDate(dateText)[0])
        except:
            timestamp = None

        # datetimeObject = datetime.datetime.strptime(date, '%Y-%m-%dT%H:%M:%S')
        # text = " ".join(i.find_all(text=lambda t: True)).encode('utf-8').strip()
        result.append(Entry(text = text,
                            author = author,
                            timestamp = timestamp,
                            baslik_id = baslik_id,
                            favoriteCount = favoriteCount,
                            id_ = id_))
    return result
Пример #2
0
 def setTimestamp(self, dt):
     self.timestamp = helper.datetimeToTimestamp(dt)