Пример #1
0
def get_courses():
    try:
        response = requests.get(url, headers=headers, verify=False)
        parser = HTMLTableParser()
        parser.feed(response.text)
        info = parser.tables[0][0][1]
        summary = parser.tables[2][2]
        courses = []
        for i in range(5, len(parser.tables[2])):
            course = parser.tables[2][i]
            if len(course[2]) > 0:  # Non TA!
                courses.append({
                    'title': course[1],
                    'title2': course[2],
                    'code': course[3],
                    'v': course[4],
                    'grp': course[5],
                    'score': course[6],
                    'prof': course[8],
                })
        return {
            'info': info,
            'summary': summary,
            'courses': courses,
        }
    except:
        return None
Пример #2
0
def crawl(fileName):
    stockCodes = getInputStockCode(fileName)
    hp = HTMLTableParser()
    for code in stockCodes:
        finalDF = pd.DataFrame()
        for i in range(1, 51):
            tableDF = hp.parse_url(code, i)
            if (tableDF.empty):
                break
            if (finalDF.empty and not tableDF.empty):
                finalDF = tableDF
            elif (not tableDF.empty):
                finalDF = finalDF.append(tableDF)

        if (not finalDF.empty):
            finalDF.sort_values(by=['date'], inplace=True, ascending=True)
            finalDF.to_csv("./results/" + ''.join(code) + ".csv", index=False)
Пример #3
0
#!/usr/bin/env python

from pprint import pprint
from HTMLTableParser import HTMLTableParser

# Create the parser
p = HTMLTableParser()

try:
    # Create some html data to feed in the parser
    myData = """
 
        <html>
        <body>
            <table id="pricingTable"> 
                <thead> 
                  <tr> 
                    <th class="rowHeader"> 
                       Server Sizes:
                    </th> 
                    <th> 
                       Linux&reg;<span style="font-size:70%; vertical-align: top;">***</span> 
                         <div class="subtitle">Hourly (Estimated Monthly)</div> 
                    </th> 
                    <th> 
                       Windows&reg;
                       <div class="subtitle">Hourly (Estimated Monthly)</div> 
                    </th> 
                  </tr> 
                </thead> 
                <tbody> 
Пример #4
0
#!/usr/bin/env python

import urllib
from pprint import pprint
from HTMLTableParser import HTMLTableParser

# Create the parser
p = HTMLTableParser()

try:
    # Get tables from this webpage
    url = "http://www.franjeado.com/stats.php"
    req = urllib.urlopen(url)

    # Parse the data
    p.feed(req.read())

except Exception, e:
    print e

# Show results
pprint(p.tables)