Пример #1
0
from pattern import web
from pattern.web import URL, Element
import re
from datetime import datetime, date, timedelta

url = URL(
    'https://docs.google.com/spreadsheets/d/1J2I40hglES63YZHROcOL3oAjDPqiiKLRPE_ikAWsR-Q/pubhtml?gid=1267634591'
).read()
dom = Element(url)
dom = dom.by_tag('tbody')[0]

#date Get the date from the header
today = date = dom.by_class('s0')[1].content

#places Read the place from available class='s4' inside <td>
places = []
for ix in dom.by_class('s4'):
    places.append(ix.content)

reading_row = [4, 10, 16, 22]
pol_reading = []


def cleanhtml(raw_html):
    cleanr = re.compile('<.*?>')
    cleantext = re.sub(cleanr, '', raw_html)
    cleantext = cleantext.strip("Highest: ")
    return cleantext


def cleandate(txt):
Пример #2
0
from pattern import web
from pattern.web import URL, Element

url = URL('https://docs.google.com/spreadsheets/d/1J2I40hglES63YZHROcOL3oAjDPqiiKLRPE_ikAWsR-Q/pubhtml?gid=1267634591').read()
dom = Element(url)
dom = dom.by_tag('tbody')[0]

#date Get the date from the header
date = dom.by_class('s0')[1].content


#places Read the place from available class='s4' inside <td>
places = []
for ix in dom.by_class('s4'):
       		places.append(ix.content)

try:
       reading_row = [4, 10, 16, 22]
       pol_reading = []

       for row in reading_row:
               	reading = dom.by_tag('tr')[row]
               	reading = reading.by_tag('td')
               	for i in reading:
               		if len(i) >= 1:
               			pol_reading.append(i.content)


       pol_updated_row = [5, 11, 17, 23]
       pol_updated = []