Python HTMLSession.HTMLSession示例，requests_html.HTMLSession.HTMLSession Python示例

示例#1

0

显示文件

文件： __init__.py 项目： Kyraminol/anime-enforce

 def __init__(self):
     self._s = HTMLSession()

示例#2

0

显示文件

 def start_request(self, url):
     session = HTMLSession()
     header = dict()
     header['user-agent'] = random.choice(self.USER_AGENT_LIST)
     response = session.get(url, headers=header)
     return response

示例#3

0

显示文件

def lego(message, vacancy):
    usdrate = 75
    avgsalarylist = list()
    vacancywithSalary = 0

    def responce(page, per_page, text=vacancy):
        responce = requests.get(
            'https://api.hh.ru/vacancies/',
            headers={'User-Agent': 'Python ([email protected])'},
            params={
                'text': text,
                'page': page,
                'per_page': per_page
            })
        return responce.json()

    # вывести все вакансии в столбик
    for x in range(responce(page=0, per_page=100)['pages']):
        templist = responce(page=x, per_page=100)['items']
        for i in templist:
            if i['salary'] is None:
                continue
            else:
                vacancywithSalary += 1
                if i['salary']['currency'] == 'RUR':
                    if i['salary']['from'] is None and i['salary'][
                            'to'] is not None:
                        avgsalarylist.append(i['salary']['to'])
                    elif i['salary'][
                            'from'] is not None and i['salary']['to'] is None:
                        avgsalarylist.append(i['salary']['from'])
                    elif i['salary']['from'] is not None and i['salary'][
                            'to'] is not None:
                        avgsalarylist.append(
                            int(
                                round(
                                    (i['salary']['from'] + i['salary']['to']) /
                                    2)))
                    else:
                        print('ERRRRRORRRR')
                # USD
                else:
                    if i['salary']['from'] is None and i['salary'][
                            'to'] is not None:
                        avgsalarylist.append(i['salary']['to'] * usdrate)
                    elif i['salary'][
                            'from'] is not None and i['salary']['to'] is None:
                        avgsalarylist.append(i['salary']['from'] * usdrate)
                    elif i['salary']['from'] is not None and i['salary'][
                            'to'] is not None:
                        avgsalarylist.append(
                            int(
                                round((
                                    (i['salary']['from'] + i['salary']['to']) /
                                    2) * usdrate)))
                    else:
                        print('ERRRRRORRRR')

    textik = 'Профессия: ' + str(vacancy)
    bot.send_message(message.chat.id, '---------------')
    bot.send_message(message.chat.id, textik)

    if responce(page=0, per_page=1)['pages'] == 2000:
        session = HTMLSession()
        url = 'https://hh.ru/search/vacancy?L_is_autosearch=false&area=113&clusters=true&enable_snippets=true&text=%' + vacancy + '&page=0'
        page = session.get(url)
        vacNum = ''
        vacNumPage = page.html.find(
            'body > div.HH-MainContent.HH-Supernova-MainContent > div > div > div > div.bloko-columns-wrapper > div > '
            'div.bloko-column.bloko-column_xs-0.bloko-column_s-8.bloko-column_m-12.bloko-column_l-16 > div > h1'
        )
        xRegex = re.compile('\d')
        allnumbers = re.findall(xRegex, vacNumPage[0].text)
        for i in allnumbers:
            vacNum += i
        textik = 'Всего вакансий: ' + str(vacNum)
        bot.send_message(message.chat.id, textik)
    else:
        if responce(page=0, per_page=1)['pages'] == 1:
            bot.send_message(message.chat.id, "Вакансий не найдено")
        else:
            textik = 'Всего вакансий: ' + str(
                responce(page=0, per_page=1)['pages'])
            bot.send_message(message.chat.id, textik)
    try:
        textik = 'Средняя зарплата: ' + str(
            int(round(statistics.median(avgsalarylist))))
        bot.send_message(message.chat.id, textik)
    except:
        pass

    textik = 'По ссылке можешь ознакомиться подробнее'
    bot.send_message(message.chat.id, '---------------')
    bot.send_message(message.chat.id, 'Теперь можешь написать другую')

示例#4

0

显示文件

文件： test_requests_html.py 项目： zhangjialepc/requests-html

async def test_browser_session_fail():
    """ HTMLSession.browser should not be call within an existing event loop> """
    session = HTMLSession()
    with pytest.raises(RuntimeError):
        session.browser

示例#5

0

显示文件

 def __init__(self, main_url):
     self.main_url = main_url
     self.session = HTMLSession()
     self.result_map = set()

示例#6

0

显示文件

def get_fia_data(force: bool = False):
    if not os.path.exists('data/fia'):
        os.makedirs('data/fia')

    html_list = [
        "https://www.fia.com/documents/season/season-2020-1059",
        "https://www.fia.com/documents/season/season-2019-971"
    ]

    races = pd.read_csv('data/ergast/races.csv')
    drivers = pd.read_csv('data/ergast/drivers.csv')
    standings = pd.read_csv('data/ergast/driver_standings.csv')
    session = HTMLSession()

    data = pd.DataFrame()

    for html in html_list:
        r = session.get(html)
        r.html.render()
        race_id = "Unknown"
        year = html.split("-")[-2]
        for line in r.html.text.split("\n"):
            if "Grand Prix" in line and not year in line:
                line = line.replace("Formula 1 ", "")
                race_id = races.loc[(races['year'] == int(year))
                                    & (races['name'] == line)]
                if race_id.empty:
                    print('Warning: Missing race for ' + year + " " + line)
                # print(race_id[['raceId','year','name']])
                # print(line + " = " + race_id)
            if "Offence" in line and not "Corrected" in line:
                # print(line)
                doctored = re.sub(r"^.*?offence - ", "", line.lower())
                doctored = re.sub(r"\)$", "", doctored)
                doctored = doctored.replace(" (", " - ")
                if doctored == "car 8 parc ferme":
                    doctored_list = ["car 8", "parc ferme"]
                elif doctored == "car 26 track limits turn 10 2nd":
                    doctored_list = ["car 26", "track limits turn 18 2nd"]
                else:
                    doctored_list = doctored.split(" - ", maxsplit=1)
                if "car" in doctored_list[0]:
                    offence = pd.DataFrame(
                        [[doctored_list[0], doctored_list[1], line]],
                        columns=["Car", "Warning", "Entire Line"])
                    driver_num = doctored_list[0].split()[1]
                    driver = drivers.loc[drivers['number'] == driver_num]
                    if len(driver) > 1:
                        correct_driver = standings.loc[
                            (standings['raceId'] == race_id['raceId'].iloc[0])
                            & (standings['driverId'].isin(
                                list(driver['driverId'].values)))]['driverId']
                        driver = driver.loc[driver['driverId'] ==
                                            correct_driver.iloc[0]]
                    if driver.empty:
                        print(' Warning: No driver found for ' + driver_num)
                    else:
                        driver = driver.rename(columns={'url': 'driver_url'})
                        temp_dataframe = pd.concat([
                            race_id.reset_index(drop=True),
                            driver.reset_index(drop=True),
                            offence.reset_index(drop=True)
                        ],
                                                   axis=1)
                        data = data.append(temp_dataframe, ignore_index=True)
    data.to_csv("data/fia/driver_offence.csv", index=False)

示例#7

0

显示文件

def fetch_html(url):
    session = HTMLSession()
    response = session.get(url=url)
    return response

示例#8

0

显示文件

文件： school_parser.py 项目： smile4maggie/anova_site_rubric

def get_school_info(school):
    """
	Web scrapes a school's GreatSchools page.
	school: a dictionary
	"""
    session = HTMLSession()
    r = session.get(school['link'])
    r.html.render()

    ranking = 0
    num_metrics = 0

    # Get Sections
    test_scores = r.html.find('#TestScores', first=True)
    equity = r.html.find('#Equity', first=True)
    students = r.html.find('#Students', first=True)
    teachers = r.html.find('#TeachersStaff', first=True)

    # Test Scores
    if test_scores:
        school['english_prof'] = 0
        school['math_prof'] = 0
        subjects = test_scores.find('.subject')
        scores = test_scores.find('.score')
        score_zip = zip(subjects, scores)
        for pair in score_zip:
            if pair[0].text == 'English':
                english_prof = percent_check(pair[1].text)
                school['english_prof'] = english_prof
            elif pair[0].text == 'Math':
                math_prof = percent_check(pair[1].text)
                school['math_prof'] = math_prof
        test_scores_avg = (school['english_prof'] + school['math_prof']) / 2

    # Student Demographics
    if students:

        # Racial Demographics
        demographics = {}
        for demo in students.find('.legend-separator'):
            dblock = demo.find('.legend-title')
            ethnicity = dblock[0].text
            percent = percent_check(dblock[1].text)
            demographics[ethnicity] = percent
        if 'Hispanic' not in demographics:
            school['hispanic'] = 0
        else:
            school['hispanic'] = demographics['Hispanic']

        if 'Black' not in demographics:
            school['black'] = 0
        else:
            school['black'] = demographics['Black']
        urm_percent = school['hispanic'] + school['black']
        school['urm_percent'] = urm_percent
        ranking += (urm_percent / 100) * 0.35
        num_metrics += 1

        # English Learners
        ell = students.find('#english-learners', first=True)
        if ell:
            ell_percent = percent_check(ell.find('tspan')[0].text)
            school['ell'] = ell_percent
            ranking += (ell_percent / 100) * 0.15
            num_metrics += 1

        # Low-income
        low_income = students.find(
            '#students-participating-in-free-or-reduced-price-lunch-program',
            first=True)
        if low_income:
            low_income_percent = percent_check(
                low_income.find('tspan')[0].text)
            school['low_income'] = low_income_percent
            ranking += (low_income_percent / 100) * 0.5
            num_metrics += 1

    # Teachers with 3+ Years Experience
    if teachers:
        teacher_experience = teachers.find('.score')[0].text
        t_exp_percent = percent_check(teacher_experience)

    # Generate ANova Ranking
    if num_metrics == 0:
        school['ranking'] = 0
    else:
        school['ranking'] = ranking / num_metrics
    session.close()
    return school

示例#9

0

显示文件

 def parse_url(url):
     # 解析并返回页面内容
     session = HTMLSession()
     response = session.get(url)
     return response.content.decode()

示例#10

0

显示文件

文件： 14. Parse_Website.py 项目： goncalo-reis/Automate-Boring-Stuff-with-Python

from requests_html import HTMLSession

response = HTMLSession().get('https://coreyms.com/')

articles = response.html.find('article')
for article in articles:
        headline = article.find('.entry-title-link', first=True).text
        print(headline)
        print()
        
        summary = article.find('.entry-content p', first=True).text
        print(summary)
        print()
        
        try:
            video_code = article.find('.embed-youtube iframe', first=True).attrs['src']
            video_code = video_code.split('/')[4]
            video_code = video_code.split('?')[0]
            video_url = f'https://www.youtube.com/watch?v={video_code}'
            print(video_url)
            print()
            print()
        except:
            print('No video.')
            print()
            print()

示例#11

0

显示文件

文件： jobs.py 项目： skallywaks/sumbua-websites

 def retrieve(self, job_state):
     from requests_html import HTMLSession
     session = HTMLSession()
     response = session.get(self.navigate)
     return response.html.html

示例#12

0

显示文件

文件： automatize.py 项目： hirios/automatize

 def set_args_browser_html(self, *args):
     if args:
         parameter = args[0]
         self.js_session = HTMLSession(browser_args=parameter)

示例#13

0

显示文件

def crawl_one(url):
    author = get_authors()
    try:
        with HTMLSession() as session:
            response = session.get(url)

        name = response.html.xpath('//h1')[0].text
        content = response.html.xpath('//article//p')
        image_url = response.html.xpath('//figure//img/@src')[0]
        pub_date = response.html.xpath('//time/@datetime')
        cats = response.html.xpath(
            '//article//div[@class="ssrcss-1emjddl-Cluster e1ihwmse0"]//ul//li'
        )

        my_content = ''
        short_description = ''

        for element in content:
            my_content += f'<{element.tag}>' + element.text + f'<{element.tag}>'
            if len(short_description) < 200:
                short_description += element.text + ' '

        image_name = slugify(name)
        img_type = image_url.split('.')[-1]

        img_path = f'images/{image_name}.{img_type}'

        with open(f'media/{img_path}', 'wb') as f:
            with HTMLSession() as session:
                response = session.get(image_url)
                f.write(response.content)

        # pub_date = datetime.strptime(pub_date, '%d %B %Y')
        pub_date = datetime.strptime(pub_date[0][0:10], '%Y-%m-%d')

        categories = []

        for cat in cats:
            categories.append({
                'name': cat.text.strip(),
                'slug': slugify(cat.text)
            })

        article = {
            'name': name,
            'slug': slugify(name),
            'content': my_content,
            'short_description': short_description.strip(),
            'main_image': img_path,
            'pub_date': pub_date,
            'author': author,
        }

        article, created = Article.objects.get_or_create(**article)

        for category in categories:
            cat, created = Category.objects.get_or_create(**category)
            article.categories.add(cat)

        logger.debug(f" Try to parser {url}")
        print(article)

    except Exception as e:
        logger.debug(f' Try to parse {url}')
        print(f'[{url}]', e, type(e), sys.exc_info()[-1].tb_lineno)

示例#14

0

显示文件

文件： __init__.py 项目： Kyraminol/anime-enforce

 def _get_download(self):
     s = HTMLSession()
     r = s.get(self._url)
     html = getattr(r, "html")
     download = html.find(".btn.btn-primary.btn-lg.btn-block", first=True)
     return download.attrs["data-href"] if download else ""

示例#15

0

显示文件

文件： santiebeati.py 项目： JackNova/santapi

from helpers import cached_in_db
from helpers import cache_in_calendar
from requests_html import HTMLSession

SaintDay = namedtuple('SaintDay', ['month', 'day'])
SaintResult = namedtuple('SaintResult',
                         ['full_name', 'first_name', 'role', 'dates'])

SPLIT_SAINTS_REGEX = re.compile(r"\d+ >")
SPLIT_LINES_REGEX = re.compile(r"\n+ ")
ITALIAN_MONTHS = [
    "gennaio", "febbraio", "marzo", "aprile", "maggio", "giugno", "luglio",
    "agosto", "settembre", "ottobre", "novembre", "dicembre"
]
DATE_REGEX = re.compile(r"\d{1,2} (?:" + "|".join(ITALIAN_MONTHS) + ")")
SESSION = HTMLSession()


def create_url(letter, page):
    page_label = f'more{page}.html' if page > 1 else ''
    return f"http://www.santiebeati.it/{letter}/{page_label}"


def create_calendar_url(month, day):
    return "http://www.santiebeati.it/{:02d}/{:02d}".format(month, day)


def parse_saint_days(days):
    results = []
    for day in days:
        d, m = day.split()

示例#16

0

显示文件

文件： koodoo.py 项目： Chidozieaubie/scanalot

from utils.email import send_email, checkIfSent
from bs4 import BeautifulSoup
from requests_html import HTMLSession

store_name = "Koodoo"

if checkIfSent(store_name) is False:
    session = HTMLSession()  # create an HTML Session object
    notify = False
    inStock = ""

    link = "https://koodoo.co.za/collections/all-consoles/products/playstation-5-ps5-digital"

    resp = session.get(
        link)  # Use the object above to connect to needed webpage
    resp.html.render()  # Run JavaScript code on webpage

    soup = BeautifulSoup(resp.html.html, "html.parser")

    try:
        inStock = soup.find(id="addToCartText-product-template").getText()
    except:
        pass  # do nothing if the item doesn't exist

    if inStock.lower() != "sold out":
        notify = True

    print(f"{store_name} has stock: ", notify)

    if notify:  # Send a notification
        send_email(store_name, link)

示例#17

0

显示文件

def crawler(token):
    session = HTMLSession()
    request = session.get("https://google.com/search?q=" + token)
    for link in request.html.links:
        if re.search("stick=", link):
            crawler2("https://google.com" + link)

示例#18

0

显示文件

def get_from_meteojob(url):
    session = HTMLSession()

    headers = {
        "Host": "www.meteojob.com",
        "User-Agent":
        "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:72.0) Gecko/20100101 Firefox/72.0",
        "Accept":
        "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
        "Accept-Language": "en-GB,en;q=0.5",
        "Accept-Encoding": "gzip, deflate, br",
        "Connection": "keep-alive",
        "Cookie":
        "ABTasty=uid%3D20012809565792105%26fst%3D1580201817549%26pst%3D1581517299643%26cst%3D1581598588773%26ns%3D3%26pvt%3D11%26pvis%3D1%26th%3D; _fbp=fb.1.1580201818340.973396398; _ga=GA1.2.1214576890.1580201818; cikneeto_uuid=id:811e2d38-8a86-4a72-9b53-d92cb2fb87a7; cto_bundle=Dye-hl9qTnpXZEpDNkh1dURJTHdLdE1zQ1c1NlNJaXJJMEJzb2pmY01yZWdXUyUyRkpDWVB2dmEyR0daSXRsWU9EdElsJTJGJTJCUTElMkZTQkZqOTNBSlVnVzRFMHBycVhyUXhWZnZUdDNueVJ5UjZ4Zk41eXBDTWRvSHZsVlM3bENzQUslMkZ4U3hsako5ajQwdk0xSWpaUm9jJTJCRUFMY1dqdFElM0QlM0Q; __gads=ID=f83e1fa8b3f0b798:T=1580201871:S=ALNI_MZiG2mYjCj0SCw_MdkWUT9HiMCATw; _gid=GA1.2.872070098.1581517301; _tac=false~self|not-available; _ta=fr~4~8d7faf049ef65861988a9f7855c1ca28; exit_modal_closed=true; cikneeto=date:1581598601434; autocomplete_history_job=[{%22type%22:%22JOB%22%2C%22id%22:11540%2C%22label%22:%22D%C3%A9veloppeur%20Big%20Data%20(H/F)%22%2C%22count%22:990%2C%22ambiguous%22:false}]; web_user_id=41a78692-e95a-4835-b086-610970bc4126; ABTastySession=sen%3D3__referrer%3D__landingPage%3Dhttps%3A//www.meteojob.com/candidat/offres/offre-d-emploi-data-scientist-h-f-brest-bretagne-cdi-12007746%3Fscroll%3DaW5kZXg9MTUmdG90YWw9NTkmd2hhdD1EJUMzJUE5dmVsb3BwZXVyK0JpZytEYXRhKyhIJTJGRikmcGFnZT0y; _tty=2083560087699625985; _tas=y8pte1n6i5i; _gat=1; _gat_raw=1",
        "Upgrade-Insecure-Requests": "1",
        "Cache-Control": "max-age=0"
    }
    #    url = "https://www.meteojob.com/candidat/offres/offre-d-emploi-data-scientist-h-f-paris-ile-de-france-cdi-12341413?what=data"
    meteo = session.get(url, headers=headers)

    contenu = meteo.html.find(".mj-offer-details", first=True)
    if contenu is None:
        print(meteo.html.text)
        return None
    annonce = {}

    annonce["Titre"] = "NaN"
    annonce["Date_publication"] = "NaN"
    annonce["intitule"] = "NaN"
    annonce["ville"] = "NaN"
    annonce["code_dep"] = "NaN"
    annonce["Type_contrat"] = "NaN"
    annonce["Exp"] = "NaN"
    annonce["Diplome"] = "NaN"
    annonce["Entreprise"] = "NaN"
    annonce["Salaire"] = "NaN"
    annonce["corps"] = "NaN"
    annonce["Lien"] = "NaN"

    annonce["Titre"] = contenu.find("h1", first=True).text
    annonce["Date_publication_txt"] = contenu.find(".publication-date",
                                                   first=True).text
    annonce["Date_publication"] = annonce["Date_publication_txt"]
    cont_json = meteo.html.find(".mj-column-content script")

    for j in cont_json:
        if j.attrs["type"] == "application/ld+json":
            cont = j.text
            if cont is not None and cont != "":
                try:
                    cont = json.loads(cont)
                    annonce["Date_publication"] = cont["datePosted"].split(
                        "T")[0]
                except Exception as e:
                    #print(cont)
                    #annonce["Date_publication"] = annonce["Date_publication_txt"]
                    pass
        else:
            print(j.attrs["type"])

    items = contenu.find(".matching-criterion-wrapper")
    criteres = []
    for bal in items:
        criteres.append(bal.text)

    for crit in criteres:
        if "(H/F)" in crit:
            annonce["intitule"] = crit
        elif crit.endswith(")"):
            lieu = crit.split("(")
            annonce["ville"] = lieu[0]
            annonce["code_dep"] = lieu[1].replace(')', '')
        elif crit in ("CDI", "CDI-C", "CDD", "Interim", "Stage"):
            annonce["Type_contrat"] = crit
        elif crit.startswith("Expérience"):
            annonce["Exp"] = crit.split(" : ")[1]
        elif crit.startswith("Niveau"):
            annonce["Diplome"] = crit.split(" : ")[1]

    sections = contenu.find("section")
    corps = ""

    for sect in sections:
        if sect.attrs.get(
                "class") and "offer-apply-form" not in sect.attrs.get("class"):
            corps += sect.text + "\n"
        if sect.attrs.get("class") and "company-description" in sect.attrs.get(
                "class"):
            annonce["Entreprise"] = sect.find("h3 span", first=True).text
        elif not sect.attrs.get("class"):
            if sect.find("h3") and sect.find(
                    "h3", first=True).text == "Salaire et avantages":
                if sect.find("div", first=True):
                    annonce["Salaire"] = sect.find("div", first=True).text
                elif sect.find("p", first=True):
                    annonce["Salaire"] = sect.find("p", first=True).text
                else:
                    print(sect.html)

    annonce["corps"] = corps

    annonce["Lien"] = url

    return annonce

示例#19

0

显示文件

文件： ch24_7.py 项目： Phil-Gith/Study

# ch24_7.py
from requests_html import HTMLSession

session = HTMLSession()  # 定義Session
url = 'https://python.org/'
r = session.get(url)  # get()
txt = r.html.search('Python is a {} language')[0]
print(txt)

示例#20

0

显示文件

文件： api.py 项目： christippett/timepro-timesheet

 def __init__(self):
     self.session = HTMLSession()
     self.user_context_id = None
     self.staff_id = None
     self.logged_in = False

示例#21

0

显示文件

def scraper(tipo,region,a):




    fechahoy = datetime.datetime.now()
    fechascrap=str(fechahoy.year)+'-'+str(fechahoy.month)+'-'+str(fechahoy.day)
    try:
        link = "https://www.portalinmobiliario.com/venta/"+tipo+"/"+region+"?ca=1&ts=1&mn=1&or=&sf=0&sp=0&at=0&pg=" + str(a)
        page = requests.get(link, timeout=20, headers={'User-Agent': agentCreator.generateAgent()})
    except:
        return
        print("continue")



    for j in range (1,28):

        proyecto = []

        id=None
        nombre=None
        tipo=None
        comuna=None
        barrio=None
        direccion=None
        lat=None
        lon=None
        entrega=None
        propietario=None
        construye=None
        arquitecto=None
        propietario=None
        vende=None
        bodega=None
        bdesde=None
        bhasta=None
        bprom=None
        estacionamiento=None
        edesde=None
        ehasta=None
        eprom=None
        link2=None

        tree = html.fromstring(page.content)
        link2=tree.xpath('//*[@id="wrapper"]/section[2]/div/div/div[1]/article/div[3]/div['+str(j)+']/div[2]/div/div[1]/h4/a')
        try:
            link2=link2[0]
        except:
            continue
        link2=str(link2.attrib)
        link2=link2.split(': ')
        link2=link2[1]
        link2=str(link2)
        link2=link2[2:-2]
        link2 = "https://www.portalinmobiliario.com/" + link2

        try:
            page2 = requests.get(link2, timeout=30, headers={'User-Agent': agentCreator.generateAgent()})
        except:
            continue
            print("continue")
        tree2 = html.fromstring(page2.content)

        bye=tree2.xpath('//*[@id="project-descr"]/div/div[3]/div/p')
        try:
            bye=bye[0]
            bye=bye.text
            if ("no" in bye):
                if ("UF" in bye):
                    try:
                        byes=bye.split(' ')
                        byen=0
                        for i in byes:
                            if ("bodega" in i):
                                if ("(desde" in byes[byen+1]):
                                    bdesde=byes[byen+3]
                                    if ("," in bdesde):
                                        bdesde=bdesde[:-1]
                                    if (")" in bdesde):
                                        bdesde=bdesde[:-1]
                                else:
                                    bprom=byes[byen+2]
                                    if ("," in bprom) or ("." in bprom):
                                        bprom=bprom[:-1]
                                    if (")" in bprom):
                                        bprom=bprom[:-1]
                                if ("hasta" in byes[byen+4]):
                                    bhasta=byes[byen+6]
                                    if ("," in bhasta) or ("." in bhasta):
                                        bhasta=bhasta[:-1]
                                    if (")" in bhasta):
                                        bhasta=bhasta[:-1]
                            if ("estacionamiento" in i):
                                if ("(desde" in byes[byen+1]):
                                    edesde=byes[byen+3]
                                    if ("," in edesde) or ("." in edesde):
                                        edesde=edesde[:-1]
                                    if (")" in edesde):
                                        edesde=edesde[:-1]
                                else:
                                    eprom=byes[byen+2]
                                    if ("," in eprom) or ("." in eprom):
                                        eprom=eprom[:-1]
                                    if (")" in eprom):
                                        eprom=eprom[:-1]
                                if ("hasta" in byes[byen+4]):
                                    ehasta=byes[byen+6]
                                    if ("," in ehasta) or ("." in ehasta):
                                        ehasta=ehasta[:-1]
                                    if (")" in ehasta):
                                        ehasta=ehasta[:-1]
                                    if ")" in ehasta:
                                        ehasta.split(")")
                                        ehasta=ehasta[0]

                            byen=byen+1
                        bodega=0
                        estacionamiento=0
                    except:
                        bodega = 0
                        estacionamiento = 0
                else:
                    bodega=0
                    estacionamiento=0
            else:
                bodega=1
                estacionamiento=1
            try:
                bprom=(float(bdesde)+float(bhasta))/2
            except:
                try:
                    bprom=float(bprom)
                except:
                    bprom=None
            try:
                eprom = (float(edesde) + float(ehasta)) / 2
            except:
                eprom=None
            try:
                bdesde=float(bdesde)
            except:
                bdesde=None
            try:
                bhasta=float(bhasta)
            except:
                bhasta=None
            try:
                edesde = float(edesde)
            except:
                edesde=None
            try:
                ehasta = float(ehasta)
            except:
                ehasta=None
            try:
                eprom = float(eprom)
            except:
                eprom=None
            try:
                bodega=float(bodega)
            except:
                bodega=None
            try:
                estacionamiento=float(estacionamiento)
            except:
                estacionamiento=None
            byen=0
            byes = bye.split(' ')
            for j in byes:
                if ("incluye") in j:
                    if ("no" not in byes[byen-1]) or (byen==0):
                        if ("estacionamiento" in byes[byen+1]):
                            estacionamiento=1
                            if ("y" in byes[byen+2]):
                                if("bodega" in byes[byen+3]):
                                    bodega=1
                                if("bodega" in byes[byen+4]):
                                    bodega=float(byes[byen+3])
                        if ("estacionamiento") in byes[byen+2]:
                            try:
                                estacionamiento=float(byes[byen+1])
                            except:
                                estacionamiento=0
                            if ("y" in byes[byen+3]):
                                if("bodega" in byes[byen+4]):
                                    bodega=1
                                if("bodega" in byes[byen+5]):
                                    try:
                                        bodega=float(byes[byen+4])
                                    except:
                                        bodega=0

                        if ("bodega" in byes[byen+1]):
                            bodega=1
                            if ("y" in byes[byen+2]):
                                if("estacionamiento" in byes[byen+3]):
                                    estacionamiento=1
                                if("estacionamiento" in byes[byen+4]):
                                    estacionamiento=float(byes[byen+3])
                        if ("bodega") in byes[byen+2]:
                            try:
                                bodega=float(byes[byen+1])
                            except:
                                bodega=0
                            if ("y" in byes[byen+3]):
                                if("estacionamiento" in byes[byen+4]):
                                    estacionamiento=1
                                if("estacionamiento" in byes[byen+5]):
                                    try:
                                        estacionamiento=float(byes[byen+4])
                                    except:
                                        estacionamiento=0

                else:
                    byen=byen+1
        except:
            bodega=0
            estacionamiento=0


        try:
            bodega=int(bodega)
            bdesde=float(bdesde)
            bhasta=float(bhasta)
            bprom=float(bprom)
            estacionamiento=int(estacionamiento)
            edesde=float(edesde)
            ehasta=float(ehasta)
            eprom=float(eprom)
        except:
            alfa=None


        tester=tree2.xpath('//*[@id="project-descr"]/div/div[4]/div[2]/div/div[1]/strong')
        validator=4
        try:
            tester=tester[0].text
        except:
            validator=5
        for v in range (1,6):
            tester=tree2.xpath('//*[@id="project-descr"]/div/div['+str(validator)+']/div[2]/div/div['+str(v)+']/strong')
            try:
                if("Fecha" in tester[0].text):
                    entrega=tree2.xpath('//*[@id="project-descr"]/div/div['+str(validator)+']/div[2]/div/div['+str(v)+']/text()')
                    entrega=entrega[0]
                    entrega=str(entrega)
                    entrega=entrega[1:]
                if ("Propietario" in tester[0].text):
                    propietario =tree2.xpath('//*[@id="project-descr"]/div/div['+str(validator)+']/div[2]/div/div['+str(v)+']/text()')
                    propietario=propietario[0]
                    propietario=str(propietario)
                    propietario=propietario[1:]
                if ("Arquitecto" in tester[0].text):
                    arquitecto=tree2.xpath('//*[@id="project-descr"]/div/div['+str(validator)+']/div[2]/div/div['+str(v)+']/text()')
                    arquitecto=arquitecto[0]
                    arquitecto=str(arquitecto)
                    arquitecto=arquitecto[1:]
                if ("Construye" in tester[0].text):
                    construye=tree2.xpath('//*[@id="project-descr"]/div/div['+str(validator)+']/div[2]/div/div['+str(v)+']/text()')
                    construye=construye[0]
                    construye=str(construye)
                    construye=construye[1:]
                if ("Vende" in tester[0].text):
                    vende=tree2.xpath('//*[@id="project-descr"]/div/div['+str(validator)+']/div[2]/div/div['+str(v)+']/text()')
                    vende=vende[0]
                    vende=str(vende)
                    vende=vende[1:]
            except:
                continue
        session = HTMLSession()

        r = session.get(link2, timeout=20)
        script=r.html.find('.page-project')
        texto=script[0]
        texto=texto.text
        try:
            texto=texto.split(',')
        except: continue
        b=0
        for t in texto:
            if ("lat") in t:
                try:
                    lat=texto[b]
                    if ("Nombre") not in t:
                        lat=lat.split(': ')
                        lat=lat[1]
                        lat=lat.split(' ')
                        lat=lat[0]
                        lat=float(lat)

                        lon=texto[b+1]
                        lon=lon.split(' ')
                        lon=lon[2]
                        lon=float(lon)

                        id=texto[b+1]
                        id=id.split(':')
                        id=id[2]
                        id=int(id)

                        nombre=texto[b+2]
                        nombre=nombre.split(':')
                        nombre=nombre[1]
                        nombre=nombre[1:-1]

                        tipo=texto[b+3]
                        tipo=tipo.split(':')
                        tipo=tipo[1]
                        tipo=tipo[1:-1]

                        direccion=texto[b+5]
                        direccion=direccion.split(':')
                        direccion=direccion[1]
                        direccion=direccion[1:]
                        if "?" in texto[b+7]:
                            comuna=texto[b+6]
                            comuna=comuna[1:-1]
                        else:
                            comuna=texto[b+8]
                            comuna=comuna[1:-1]
                            barrio=texto[b+7]
                            barrio=barrio[1:]

                except:
                    b=b+1
                    continue


            b=b+1

        a=0

        proyecto.append(id)
        proyecto.append(nombre)
        proyecto.append(tipo)
        proyecto.append(comuna)
        proyecto.append(barrio)
        proyecto.append(direccion)
        proyecto.append(lat)
        proyecto.append(lon)
        proyecto.append(entrega)
        proyecto.append(propietario)
        proyecto.append(arquitecto)
        proyecto.append(construye)
        proyecto.append(vende)
        proyecto.append(bodega)
        proyecto.append(bdesde)
        proyecto.append(bhasta)
        proyecto.append(bprom)
        proyecto.append(estacionamiento)
        proyecto.append(edesde)
        proyecto.append(ehasta)
        proyecto.append(eprom)
        proyecto.append(link2)
        proyecto.append(fechascrap)
        proyecto.append(fechascrap)
        print(len(proyecto))
        print(proyecto)
        insertarProyecto(proyecto)
        time.sleep(random.uniform(0.5,1.5))
        for t in texto:
            if ("Numero" in t):
                try:
                    propiedad = []
                    n=str(texto[a])
                    try:
                        n=n.split(':')
                        n=str(n[1])
                        n=n[1:-1]
                    except:
                        n="nn"
                    try:
                        id3=(int(n)*100000)
                        id3=id3+id
                    except:
                        continue

                    precio=str(texto[a+1])
                    precio=precio.split(":")
                    precio=str(precio[1])
                    try:
                        precio=float(precio)
                    except:
                        precio=None

                    dormitorios=str(texto[a+3])
                    dormitorios=dormitorios.split(":")
                    dormitorios=str(dormitorios[1])
                    try:
                        dormitorios=int(dormitorios)
                    except:
                        dormitorios=None
                    banos=str(texto[a+4])
                    banos=banos.split(":")
                    banos=str(banos[1])
                    banos=int(banos)

                    piso=str(texto[a+5])
                    piso=piso.split(":")
                    piso=str(piso[1])
                    try:
                        piso=int(piso)
                    except:
                        piso=None
                    orientacion=str(texto[a+6])
                    orientacion=orientacion.split(":")
                    orientacion=str(orientacion[1])
                    orientacion=orientacion[1:-1]

                    util=str(texto[a+8])
                    util=util.split(":")
                    util=str(util[1])
                    try:
                        util=float(util)
                    except:
                        util=None
                    total=str(texto[a+9])
                    total=total.split(":")
                    total=str(total[1])
                    try:
                        total=float(total)
                    except:
                        total=None
                    terraza=str(texto[a+10])
                    terraza=terraza.split(":")
                    terraza=str(terraza[1])
                    try:
                        terraza=float(terraza)
                    except: terraza=None

                    propiedad.append(id)
                    propiedad.append(id3)
                    propiedad.append(n)
                    propiedad.append(precio)
                    propiedad.append(dormitorios)
                    propiedad.append(banos)
                    propiedad.append(piso)
                    propiedad.append(orientacion)
                    propiedad.append(util)
                    propiedad.append(total)
                    propiedad.append(terraza)
                    propiedad.append(fechascrap)
                    print(id)
                    print(id3)
                    print(n)
                    print(precio)
                    print(dormitorios)
                    print(banos)
                    print(piso)
                    print(orientacion)
                    print(util)
                    print(total)
                    print(terraza)
                    print(fechascrap)
                    propiedad.append(fechascrap)
                    insertarDepto(propiedad)
                except:
                    continue
            a=a+1
    sleep(5)
    sleep(120)

示例#22

0

显示文件

from requests_html import HTMLSession
from bs4 import BeautifulSoup
import pandas as pd

s = HTMLSession()

searchterm = 'dslr+camera'

# url for amazon website
#url = 'https://www.amazon.co.uk/s?k={searchterm}&qid=1616907527&ref=sr_pg_1'
url = f'https://www.amazon.co.uk/s?k={searchterm}&i=black-friday'


#
def getdata(url):
    r = s.get(url)
    r.html.render(sleep=1)
    soup = BeautifulSoup(r.html.html, 'html.parser')
    return soup


#
dealslist = []


def getdeals(soup):
    products = soup.find_all('div', {'data-component-type': 's-search-result'})
    for item in products:

        # Scraping product name
        title = item.find('a', {

示例#23

0

显示文件

文件： spiderReqeusts.py 项目： Lcd7/holywiser

def get_flask_TripleDES_Decode(aesKey, text):
    url = f'http://127.0.0.1:8058/decode?key={aesKey}&text={text}'
    session = HTMLSession()
    session.get(url)
    time.sleep(0.5)

示例#24

0

显示文件

文件： sentdebot.py 项目： saran-gangster/sentdebot

async def on_message(message):
    print(
        f"{message.channel}: {message.author}: {message.author.name}: {message.content}"
    )
    sentdex_guild = client.get_guild(405403391410438165)
    author_roles = message.author.roles
    #print(author_roles)
    #author_role_ids = [r.id for r in author_roles]

    if random.choice(range(500)) == 30:
        matches = [r for r in author_roles if r.id in vanity_role_ids]
        #print(matches)

        if len(matches) == 0:
            try:
                role_id_choice = random.choice(vanity_role_ids)
                actual_role_choice = sentdex_guild.get_role(role_id_choice)
                #print(type(message.author))
                author_roles.append(actual_role_choice)
                await message.author.edit(roles=author_roles)
            except Exception as e:
                print('EDITING ROLES ISSUE:', str(e))

    with open(f"{path}/msgs.csv", "a") as f:
        if message.author.id not in chatbots:
            f.write(
                f"{int(time.time())},{message.author.id},{message.channel}\n")

    with open(f"{path}/log.csv", "a") as f:
        if message.author.id not in chatbots:
            try:
                f.write(
                    f"{int(time.time())},{message.author.id},{message.channel},{message.content}\n"
                )
            except Exception as e:
                f.write(f"{str(e)}\n")

    if "sentdebot.member_count()" == message.content.lower():
        await message.channel.send(f"```py\n{sentdex_guild.member_count}```")

    elif "sentdebot.community_report()" == message.content.lower(
    ) and message.channel.id in image_chan_ids:
        online, idle, offline = community_report(sentdex_guild)

        file = discord.File(f"{path}/online.png",
                            filename=f"{path}/online.png")
        await message.channel.send("", file=file)

        await message.channel.send(
            f'```py\n{{\n\t"Online": {online},\n\t"Idle/busy/dnd": {idle},\n\t"Offline": {offline}\n}}```'
        )

    elif "sentdebot.p6()" == message.content.lower():
        await message.channel.send(
            f"```\nThe Neural Networks from Scratch video series will resume when the NNFS book is completed. This means the videos will resume around Sept or Oct 2020.\n\nIf you are itching for the content, you can buy the book and get access to the draft now. The draft is over 500 pages, covering forward pass, activation functions, loss calcs, backward pass, optimization, train/test/validation for classification and regression. You can pre-order the book and get access to the draft via https://nnfs.io```"
        )

    elif "sentdebot.user_activity()" == message.content.lower(
    ) and message.channel.id in image_chan_ids:  # and len([r for r in author_roles if r.id in admins_mods_ids]) > 0:

        file = discord.File(f"{path}/activity.png",
                            filename=f"{path}/activity.png")
        await message.channel.send("", file=file)

        #await message.channel.send(f'```py\n{{\n\t"Online": {online},\n\t"Idle/busy/dnd": {idle},\n\t"Offline": {offline}\n}}```')

    elif "help(sentdebot)" == message.content.lower(
    ) or "sentdebot.commands()" == message.content.lower():
        await message.channel.send(commands_available)

    # if it doesnt work later.
    #elif "sentdebot.logout()" == message.content.lower() and message.author.id == 324953561416859658:
    elif "sentdebot.logout()" == message.content.lower() and str(
            message.author).lower() == "sentdex#7777":
        await client.close()
    elif "sentdebot.gtfo()" == message.content.lower() and str(
            message.author).lower() == "sentdex#7777":
        await client.close()

    elif "sentdebot.get_history()" == message.content.lower() and str(
            message.author).lower() == "sentdex#7777":

        channel = sentdex_guild.get_channel(channel_ids[0])

        async for message in channel.history(limit=999999999999999):
            if message.author.id == 324953561416859658:
                with open(f"{path}/history_out.csv", "a") as f:
                    f.write(f"{message.created_at},1\n")

    else:
        query = search_term(message.content)
        if query:
            #query = match.group(1)
            print(query)

            qsearch = query.replace(" ", "%20")
            full_link = f"https://pythonprogramming.net/search/?q={qsearch}"
            session = HTMLSession()
            r = session.get(full_link)

            specific_tutorials = [(tut.text, list(tut.links)[0])
                                  for tut in r.html.find("a")
                                  if "collection-item" in tut.html]

            if len(specific_tutorials) > 0:
                return_str = "\n---------------------------------------\n".join(
                    f'{tut[0]}: <https://pythonprogramming.net{tut[1]}>'
                    for tut in specific_tutorials[:3])
                return_str = f"```Searching for '{query}'```\n" + return_str + f"\n----\n...More results: <{full_link}>"

                await message.channel.send(return_str)
            else:
                await message.channel.send(f"""```py
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
NotFoundError: {query} not found```""")

示例#25

0

显示文件

 def start_request(self, url):
     headers = {'user-agent': random.choice(self.USER_AGENT_LIST)}
     session = HTMLSession()
     response = session.get(url, headers=headers)
     return response

示例#26

0

显示文件

 def test_is_goodreads_available(self):
     url = 'https://www.bbc.com/news/technology'
     with HTMLSession() as session:
         response = session.get(url)
     self.assertTrue(response.status_code == 200)

示例#27

0

显示文件

    def handle(self, *args, **options):

        # 曲リストページの配列
        music_list_page_url_list = [
            "http://textage.cc/score/index.html?sA11B00",
            "http://textage.cc/score/index.html?sB11B00"
        ]
        for music_list_page_url in music_list_page_url_list:
            print(f"■music_list_page_url:{music_list_page_url}")
            music_list_page_session = HTMLSession()
            music_list_page_response = music_list_page_session.get(
                music_list_page_url)

            # shift_jisのページをスクレイピングする際のUnicodeEncodeError対策
            music_list_page_response.content.decode("shift_jis")

            print("before render music_list_page...")
            # 結構重いので大きめにタイムアウト時間を取る必要あり
            music_list_page_response.html.render(timeout=300)
            print("after  render music_list_page!!!")

            # 文字化け対策
            music_list_page_response.html.encoding = "utf-8"

            # find()で要素を探索
            music_tr_list = music_list_page_response.html \
                .find("table")[1] \
                .find("tr")
            first_tr_flg = True
            for music_tr in music_tr_list:
                if first_tr_flg:
                    first_tr_flg = False
                    continue
                td_second = music_tr.find("td")[1]
                td_second_class = td_second.attrs["class"][0]
                music_deleted_flag = False
                if td_second_class.startswith("x"):
                    music_deleted_flag = True
                    music_tr_diff_code = td_second_class[2:3]
                else:
                    music_tr_diff_code = td_second_class[1:2]

                # 曲リストページの難易度コードを曲マスタで保持する難易度コードに変換
                if music_tr_diff_code == "n":
                    music_tr_diff_code_mstcode = "N"
                elif music_tr_diff_code == "h":
                    music_tr_diff_code_mstcode = "H"
                elif music_tr_diff_code == "a":
                    music_tr_diff_code_mstcode = "A"
                elif music_tr_diff_code == "x":
                    music_tr_diff_code_mstcode = "L"
                else:
                    raise Exception(
                        f"想定外のmusic_tr_diff_code:{music_tr_diff_code}")

                # 曲名
                music_name = music_tr.find("td")[3].text
                print(f"music_name:{music_name}")
                print(
                    f"music_tr_diff_code_mstcode:{music_tr_diff_code_mstcode}")

                # 曲マスタを取得
                old_music_mst_list = MstMusic.objects.filter(
                    music_name=music_name,
                    difficulty_code=music_tr_diff_code_mstcode)
                if len(old_music_mst_list) == 0:
                    old_music_mst = None
                elif len(old_music_mst_list) == 1:
                    old_music_mst = old_music_mst_list[0]
                else:
                    raise Exception("複数レコード取得は想定外")

                if music_deleted_flag:
                    if old_music_mst is not None:
                        setattr(mst_music, "music_deleted_flag", \
                            music_deleted_flag)
                        setattr(mst_music, "upd_user", \
                            "music-mst-mainte.py")
                        setattr(mst_music, "upd_date", \
                            timezone.localtime())
                        old_music_mst.save()

                    continue

                else:
                    # レコードがすでに存在する場合は何もしない
                    if old_music_mst is not None:
                        print("MstMusic exists.")
                        continue

                    # 譜面ページURL
                    try:
                        note_page_url = "http://textage.cc/score/" \
                                    + td_second.find("a")[0] \
                                            .attrs["href"]
                    except Exception:
                        # aタグがない＝譜面ページがまだない
                        continue

                    # 譜面ページにアクセス
                    music_page_session = HTMLSession()
                    music_page_response = music_page_session.get(note_page_url)
                    music_page_response.content.decode("shift_jis")
                    print("before render music_page...")
                    music_page_response.html.render(timeout=300)
                    print("after  render music_page!!!")
                    music_page_response.html.encoding = "utf-8"

                    # 曲情報をパース(画面上部)
                    music_info_top_lines = music_page_response.html.find(
                        "nobr")[0]
                    music_info_top_line_text_arr = music_info_top_lines \
                                                       .text \
                                                       .split("\n", maxsplit=2)
                    for music_info_top_line_text in music_info_top_line_text_arr:
                        print(music_info_top_line_text)
                    music_info_style_diff = music_info_top_line_text_arr[1] \
                                                .replace("[", "") \
                                                .replace("]", "")
                    music_info_top_style_diff_arr = music_info_style_diff.split(
                        " ")
                    # V2等、曲名に改行が入る場合のため改行を空文字に置換しておく
                    # 「V2/ TAKA」→「V2 / TAKA」対応
                    # TODO: ★10のV2より前の曲が正しくスクレイピングできるか？
                    music_info_top_others = music_info_top_line_text_arr[2] \
                                                .replace("\n", "") \
                                                .replace("V2/", "V2 /") \
                                                .replace(" bpm:", ":::::") \
                                                .replace(" - ★", ":::::") \
                                                .replace(" Notes:", ":::::") \
                                                .split(":::::")
                    music_info_top_name_artist_arr = music_info_top_others[
                        0].split(" / ")
                    music_info_top_bpm_arr = music_info_top_others[1].split(
                        "～")

                    # 曲情報をパース(画面下部)
                    music_info_bottom_line = music_page_response.html.find(
                        "table + font")[0]
                    print(music_info_bottom_line.text)
                    music_info_bottom_line_text_arr = music_info_bottom_line \
                                                          .text \
                                                          .replace(", ", ",") \
                                                          .split(" ", maxsplit=1) # TODO: DPのページは左右ノーツ数があるのでこのままだとだめ
                    music_info_bottom_score_border_arr = music_info_bottom_line_text_arr[0] \
                                                             .split(",")
                    music_info_bottom_special_notes_arr = music_info_bottom_line_text_arr[1] \
                                                             .replace("(", "") \
                                                             .replace(")", "") \
                                                             .split(" ")

                    # 曲名は設定済(music_name)

                    # 曲削除済フラグは設定済(music_deleted_flag)

                    # アーティスト名
                    artist_name = music_info_top_name_artist_arr[1]

                    # ジャンル
                    genre = music_info_top_line_text_arr[0].replace("\"", "")

                    # 難易度(NORMAL or HYPER or ANOTHER or LEGGENDARIA)
                    difficulty_and_difficulty_code_map = {
                        "NORMAL": "N",
                        "HYPER": "H",
                        "ANOTHER": "A",
                        "LEGGENDARIA": "L",
                    }
                    difficulty_code = difficulty_and_difficulty_code_map.get(
                        music_info_top_style_diff_arr[1])

                    # BPM
                    bpm_min = int(music_info_top_bpm_arr[0])
                    bpm_max = int(music_info_top_bpm_arr[0]) if len(music_info_top_bpm_arr) == 1 \
                         else int(music_info_top_bpm_arr[1])

                    # ★
                    level = int(music_info_top_others[2])

                    # SP総ノーツ数
                    sp_notes_num_all = int(music_info_top_others[3])

                    # SP特殊ノーツ数(スクラッチ)
                    sp_notes_num_scr = 0
                    # SP特殊ノーツ数(チャージノート)
                    sp_notes_num_cn = 0
                    # SP特殊ノーツ数(バックスピンスクラッチ)
                    sp_notes_num_bss = 0
                    for special_notes_info in music_info_bottom_special_notes_arr:
                        special_notes_info_arr = special_notes_info.split("=")
                        if special_notes_info_arr[0] == "SCR":
                            sp_notes_num_scr = int(special_notes_info_arr[1])
                        elif special_notes_info_arr[0] == "CN":
                            sp_notes_num_cn = int(special_notes_info_arr[1])
                        elif special_notes_info_arr[0] == "BSS":
                            sp_notes_num_bss = int(special_notes_info_arr[1])
                        else:
                            raise Exception(
                                f"想定外の特殊ノーツ]{special_notes_info_arr[0]}")

                    # スコアボーダー
                    sp_score_border_aaa = int(
                        music_info_bottom_score_border_arr[0].replace(
                            "AAA:", ""))
                    sp_score_border_aa = int(
                        music_info_bottom_score_border_arr[1].replace(
                            "AA:", ""))
                    sp_score_border_a = int(
                        music_info_bottom_score_border_arr[2].replace(
                            "A:", ""))

                    # DB用カラムの計算・設定
                    # TODO: CN、BSSは開始・終了で2ノーツ扱い
                    db_notes_num_scr = 0
                    db_notes_num_cn = sp_notes_num_cn * 2
                    db_notes_num_bss = 0
                    db_notes_num_all = sp_notes_num_all * 2 \
                                     - sp_notes_num_scr \
                                     - sp_notes_num_bss * 2
                    db_score_max = db_notes_num_all * 2
                    db_score_border_maxminus = math.ceil(db_notes_num_all * 2 *
                                                         8.5 / 9)
                    db_score_border_aaa = math.ceil(db_notes_num_all * 2 * 8 /
                                                    9)
                    db_score_border_aa = math.ceil(db_notes_num_all * 2 * 7 /
                                                   9)
                    db_score_border_a = math.ceil(db_notes_num_all * 2 * 6 / 9)

                    # TODO: 更新対応
                    mst_music = MstMusic()
                    setattr(mst_music, "music_name", \
                        music_name)
                    setattr(mst_music, "music_deleted_flag", \
                        music_deleted_flag)
                    setattr(mst_music, "artist_name", \
                        artist_name)
                    setattr(mst_music, "genre", \
                        genre)
                    setattr(mst_music, "difficulty_code", \
                        difficulty_code)
                    setattr(mst_music, "bpm_min", \
                        bpm_min)
                    setattr(mst_music, "bpm_max", \
                        bpm_max)
                    setattr(mst_music, "level", \
                        level)
                    setattr(mst_music, "sp_notes_num_all", \
                        sp_notes_num_all)
                    setattr(mst_music, "sp_notes_num_scr", \
                        sp_notes_num_scr)
                    setattr(mst_music, "sp_notes_num_cn", \
                        sp_notes_num_cn)
                    setattr(mst_music, "sp_notes_num_bss", \
                        sp_notes_num_bss)
                    setattr(mst_music, "sp_score_max", \
                        9999) # TODO: calc and set
                    setattr(mst_music, "sp_score_border_maxminus", \
                        9999) # TODO: calc and set
                    setattr(mst_music, "sp_score_border_aaa", \
                        sp_score_border_aaa)
                    setattr(mst_music, "sp_score_border_aa", \
                        sp_score_border_aa)
                    setattr(mst_music, "sp_score_border_a", \
                        sp_score_border_a)
                    setattr(mst_music, "dp_notes_num_all", \
                        9999) # TODO: calc and set
                    setattr(mst_music, "dp_notes_num_scr", \
                        9999) # TODO: calc and set
                    setattr(mst_music, "dp_notes_num_cn", \
                        9999) # TODO: calc and set
                    setattr(mst_music, "dp_notes_num_bss", \
                        9999) # TODO: calc and set
                    setattr(mst_music, "dp_score_max", \
                        9999) # TODO: calc and set
                    setattr(mst_music, "dp_score_border_maxminus", \
                        9999) # TODO: calc and set
                    setattr(mst_music, "dp_score_border_aaa", \
                        9999) # TODO: calc and set
                    setattr(mst_music, "dp_score_border_aa", \
                        9999) # TODO: calc and set
                    setattr(mst_music, "dp_score_border_a", \
                        9999) # TODO: calc and set
                    setattr(mst_music, "db_notes_num_all", \
                        db_notes_num_all)
                    setattr(mst_music, "db_notes_num_scr", \
                        db_notes_num_scr)
                    setattr(mst_music, "db_notes_num_cn", \
                        db_notes_num_cn)
                    setattr(mst_music, "db_notes_num_bss", \
                        db_notes_num_bss)
                    setattr(mst_music, "db_score_max", \
                        db_score_max)
                    setattr(mst_music, "db_score_border_maxminus", \
                        db_score_border_maxminus)
                    setattr(mst_music, "db_score_border_aaa", \
                        db_score_border_aaa)
                    setattr(mst_music, "db_score_border_aa", \
                        db_score_border_aa)
                    setattr(mst_music, "db_score_border_a", \
                        db_score_border_a)
                    setattr(mst_music, "db_withscr_notes_num_all", \
                        9999) # TODO: calc and set
                    setattr(mst_music, "db_withscr_notes_num_scr", \
                        9999) # TODO: calc and set
                    setattr(mst_music, "db_withscr_notes_num_cn", \
                        9999) # TODO: calc and set
                    setattr(mst_music, "db_withscr_notes_num_bss", \
                        9999) # TODO: calc and set
                    setattr(mst_music, "db_withscr_score_max", \
                        9999) # TODO: calc and set
                    setattr(mst_music, "db_withscr_score_border_maxminus", \
                        9999) # TODO: calc and set
                    setattr(mst_music, "db_withscr_score_border_aaa", \
                        9999) # TODO: calc and set
                    setattr(mst_music, "db_withscr_score_border_aa", \
                        9999) # TODO: calc and set
                    setattr(mst_music, "db_withscr_score_border_a", \
                        9999) # TODO: calc and set
                    setattr(mst_music, "note_page_url", \
                        note_page_url)
                    setattr(mst_music, "ins_user", \
                        "music-mst-mainte.py")
                    setattr(mst_music, "ins_date", \
                        timezone.localtime())
                    setattr(mst_music, "upd_user", \
                        "music-mst-mainte.py")
                    setattr(mst_music, "upd_date", \
                        timezone.localtime())
                    mst_music.save()

                    music_page_response.close()

示例#28

0

显示文件

文件： main.py 项目： NikhilMaske/JavaScriptScrapperPython

def getHTMLwithJavascriptContent(url):
    session = HTMLSession()
    resp = session.get(url)
    resp.html.render(timeout=20)
    return resp.html.html

示例#29

0

显示文件

from bs4 import BeautifulSoup as soup
from selenium import webdriver
from requests_html import HTMLSession
from urllib.request import urlopen as uReq
session = HTMLSession()
import time

your_exec_path = r"C:\Users\PARULEKAR\Downloads\chromedriver_win32\chromedriver.exe"
driver = webdriver.Chrome(executable_path=your_exec_path)
driver.get("https://www.who.int/publications/en/")
article_link = driver.find_elements_by_xpath('//a[@class="buffet_headline"]')
for i in range(len(article_link)):
    print(
        "---------------------------------------------------------------------------------------------------"
    )
    abc = article_link[i].get_attribute("href")
    r = session.get(abc)
    html = r.html.html
    pageSoup = soup(html, "html.parser")
    element = pageSoup.find("li", {"first"})
    try:
        pdf_url = element.a.get('href')
        print(pdf_url)

    except:
        print("pdf not found")
    # try:
    # 	element = pageSoup.find("li",{"first"})
    # 	print(element)

# try:

示例#30

0

显示文件

def UploadImageAsset(client, url, image_ref_on_file, image_name, width,
                     height):
    """Uploads the image from the specified url.
  Args:
    client: An AdWordsClient instance.
    url: The image URL.
  Returns:
    The ID of the uploaded image.
  """
    # Initialize appropriate service.
    asset_service = client.GetService('AssetService', version='v201809')

    # Download the image.
    headers = {
        'User-Agent':
        'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.90 Safari/537.36'
    }
    session__ = HTMLSession()
    """ image_request = session__.get(url, headers=headers, verify=True)
  #print("URL: "+ url)
  print(image_request.content)
  print(image_request.html) """
    print(url)
    tab = url.split('&')
    #print(type(url))
    image_request = session__.get(tab[0], headers=headers, verify=True)
    #print(tab[0])
    #image_asset = BytesIO(urlopen(tab[0]).read())
    image_asset = image_request.content
    #print(image_asset)

    # Create the image asset.
    try:
        source = tinify.tinify.tinify.from_url(url)
        #print(source)
        resized_image = source.resize(method="fit",
                                      width=int(width),
                                      height=int(height))
        data = resized_image.to_file(image_ref_on_file)
        #print(sys.getsizeof(data))
        #print(data)
    except:
        try:
            source = tinify.tinify.tinify.from_url(url)
            print(source)
            resized_image = source.resize(method="fit",
                                          width=int(width),
                                          height=int(height))
            data = resized_image.to_file(image_ref_on_file)
            print(sys.getsizeof(data))
            #print(data)
        except Exception as e:
            print(e)
    print(image_name)
    file_url = url_for('uploaded_file', filename=image_name, _external=True)
    image_asset = {
        'xsi_type': 'ImageAsset',
        'imageData': urlopen(file_url).read(),
        # This field is optional, and if provided should be unique.
        # 'assetName': 'Image asset ' + str(uuid.uuid4()),
    }

    # Create the operation.
    operation = {'operator': 'ADD', 'operand': image_asset}

    # Create the asset and return the ID.
    result = asset_service.mutate([operation])

    return result['value'][0]['assetId']