Python get_html_data示例，get_post_services.get_html_data Python示例

示例#1

0

显示文件

文件： bse.py 项目： YogendraSontakke/BSE_52_Week

def get_52_week_high_html():
    #52 week high html
    url = 'http://www.bseindia.com/markets/equity/EQReports/HighLow.aspx?expandable=2'
    high_html = None
    while high_html is None:
        high_html = get_post_services.get_html_data(url)
    return high_html

示例#2

0

显示文件

文件： bse.py 项目： YogendraSontakke/BSE_52_Week

def get_52_week_high_html():
    # 52 week high html
    url = "http://www.bseindia.com/markets/equity/EQReports/HighLow.aspx?expandable=2"
    high_html = None
    while high_html is None:
        high_html = get_post_services.get_html_data(url)
    return high_html

示例#3

0

显示文件

文件： bse.py 项目： YogendraSontakke/BSE_52_Week

def get_requester_url(i_security_code):
    url = "http://www.bseindia.com/SiteCache/1D/GetQuoteData.aspx"
    data = {"Type": "EQ", "text": i_security_code}
    html = get_post_services.get_html_data(url, params=data)
    while html is None:
        html = get_post_services.get_html_data(url, params=data)

    soup = BeautifulSoup(html, "lxml")
    tag = soup.find("a")

    if tag is None:
        data["Type"] = "MF"
        html = get_post_services.get_html_data(url, params=data)
        while html is None:
            html = get_post_services.get_html_data(url, params=data)
        soup = BeautifulSoup(html, "lxml")
        tag = soup.find("a")

    return tag["href"]

示例#4

0

显示文件

文件： bse.py 项目： YogendraSontakke/BSE_52_Week

def get_requester_url(i_security_code):
    url = 'http://www.bseindia.com/SiteCache/1D/GetQuoteData.aspx'
    data = {'Type': 'EQ', 'text': i_security_code}
    html = get_post_services.get_html_data(url, params=data)
    while html is None:
        html = get_post_services.get_html_data(url, params=data)

    soup = BeautifulSoup(html, 'lxml')
    tag = soup.find('a')

    if tag is None:
        data['Type'] = 'MF'
        html = get_post_services.get_html_data(url, params=data)
        while html is None:
            html = get_post_services.get_html_data(url, params=data)
        soup = BeautifulSoup(html, 'lxml')
        tag = soup.find('a')

    return tag['href']

示例#5

0

显示文件

文件： mc.py 项目： YogendraSontakke/BSE_52_Week

def get_ratios(ratio_urls):
    ratios = {}
    for url in ratio_urls:
        html_for_ratios = gps.get_html_data(url)
        if html_for_ratios is None:
            continue        
        parsers = ['html.parser', 'lxml', 'html5lib']
        for parser in parsers:
            ratios = parse_the_ratios_page(html_for_ratios, parser)
            if ratios is not None:
                break
    return ratios

示例#6

0

显示文件

文件： bse.py 项目： YogendraSontakke/BSE_52_Week

def get_market_cap_and_name(i_security_code):
    url = "http://www.bseindia.com/stock-share-price/SiteCache/Stock_Trading.aspx"
    data = {"Type": "EQ", "text": i_security_code}
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:41.0) Gecko/20100101 Firefox/41.0",
        "Accept": "text/html, */*; q=0.01",
        "X-Requested-With": "XMLHttpRequest",
        "Referer": "",
    }
    headers["Referer"] = get_requester_url(i_security_code)

    html = get_post_services.get_html_data(url, params=data, headers=headers)
    while html is None:
        html = get_post_services.get_html_data(url, params=data, headers=headers)
    soup = BeautifulSoup(html, "lxml")

    return [
        i_security_code,
        soup.find(id="ehd6")["value"].replace(",", "").replace("-", "0"),
        headers["Referer"].split("/")[4],
    ]

示例#7

0

显示文件

def get_ratios(ratio_urls):
    ratios = {}
    for url in ratio_urls:
        html_for_ratios = gps.get_html_data(url)
        if html_for_ratios is None:
            continue
        parsers = ['html.parser', 'lxml', 'html5lib']
        for parser in parsers:
            ratios = parse_the_ratios_page(html_for_ratios, parser)
            if ratios is not None:
                break
    return ratios

示例#8

0

显示文件

文件： bse.py 项目： YogendraSontakke/BSE_52_Week

def get_market_cap_and_name(i_security_code):
    url = 'http://www.bseindia.com/stock-share-price/SiteCache/Stock_Trading.aspx'
    data = {'Type': 'EQ', 'text': i_security_code}
    headers = {
        'User-Agent':
        'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:41.0) Gecko/20100101 Firefox/41.0',
        'Accept': 'text/html, */*; q=0.01',
        'X-Requested-With': 'XMLHttpRequest',
        'Referer': ''
    }
    headers['Referer'] = get_requester_url(i_security_code)

    html = get_post_services.get_html_data(url, params=data, headers=headers)
    while html is None:
        html = get_post_services.get_html_data(url,
                                               params=data,
                                               headers=headers)
    soup = BeautifulSoup(html, 'lxml')

    return [
        i_security_code,
        soup.find(id='ehd6')['value'].replace(',', '').replace('-', '0'),
        headers['Referer'].split('/')[4]
    ]

示例#9

0

显示文件

文件： mc.py 项目： YogendraSontakke/BSE_52_Week

def get_company_names_for_url(search_keywords):
    url = 'http://www.moneycontrol.com/stocks/cptmarket/compsearchnew.php'
    data = {
        'search_data' : '',
        'cid' : '',
        'mbsearch_str' : '',
        'topsearch_type' : '1',
        'search_str': ''
    }
    for keyword in search_keywords:
        data['search_str'] = keyword
        html = gps.get_html_data(url, params=data)
        soup = BeautifulSoup(html, 'lxml')
        standalone = soup.find(attrs={'class':'home act'})
        if standalone is not None:
            break
    if standalone is None:
        return None
    return standalone

示例#10

0

显示文件

def get_company_names_for_url(search_keywords):
    url = 'http://www.moneycontrol.com/stocks/cptmarket/compsearchnew.php'
    data = {
        'search_data': '',
        'cid': '',
        'mbsearch_str': '',
        'topsearch_type': '1',
        'search_str': ''
    }
    for keyword in search_keywords:
        data['search_str'] = keyword
        html = gps.get_html_data(url, params=data)
        soup = BeautifulSoup(html, 'lxml')
        standalone = soup.find(attrs={'class': 'home act'})
        if standalone is not None:
            break
    if standalone is None:
        return None
    return standalone