示例#1
0
from pyquery import PyQuery as pq
import scraper
import constants

sports = { "Football" : constants.FOOTBALL, "Men's Basketball" : constants.MENS_BASKETBALL,
           "Men's Golf" : constants.MENS_GOLF, "Men's Ice Hockey" : constants.MENS_ICE_HOCKEY,
           "Nordic Skiiing" : [constants.MENS_NORDIC_SKIING, constants.WOMENS_NORDIC_SKIING],
           "Women's Basketball" : constants.WOMENS_BASKETBALL, "Women's Soccer" : constants.WOMENS_SOCCER,
           "Women's Swimming & Diving" : constants.WOMENS_SWIMMING_DIVING,
           "Women's Track & Field" : constants.WOMENS_TRACK_FIELD,
           "Women's Volleyball" : constants.WOMENS_VOLLEYBALL,
           "Women's Cross Country" : constants.WOMENS_CROSS_COUNTRY }


print ("Scraping Northern Michigan")
scraper.scrape_roster_row_site("Northern Michigan", sports, "h2", ["name", "title", "phone", "email"])


示例#2
0
from pyquery import PyQuery as pq
import scraper
import constants

scraper.sports["Women's Sailing"] = constants.SAILING
scraper.sports["Coed Sailing"] = constants.SAILING
scraper.sports["Heavyweight Crew"] = constants.MENS_ROWING
scraper.sports["Lightweight Crew"] = constants.MENS_ROWING

def get_table(header):
    return header.parent().next().next()

def get_finder(header_tag, key):
    return header_tag + ':contains("Yale ' + key + '")'

print ("Scraping Yale")
scraper.scrape_roster_row_site("Yale University", scraper.sports, "b",
                               fields=['name', 'title', 'phone', 'email'],
                               get_table=get_table, get_finder=get_finder)
示例#3
0
from pyquery import PyQuery as pq
import scraper
import constants

sports = { "Cheerleading" : constants.CHEERLEADING, "Baseball" : constants.BASEBALL,
           "Men's Basketball" : constants.MENS_BASKETBALL, "Women's Basketball" : constants.WOMENS_BASKETBALL,
           "Field Hockey" : constants.FIELD_HOCKEY, "Football" : constants.FOOTBALL,
           "Men's and Women's Golf" : [constants.MENS_GOLF, constants.WOMENS_GOLF],
           "Men's Ice Hockey" : constants.MENS_ICE_HOCKEY, "Women's Ice Hockey" : constants.WOMENS_ICE_HOCKEY,
           "Men's Lacrosse" : constants.MENS_LACROSSE, "Women's Lacrosse" : constants.WOMENS_LACROSSE,
           "Men's & Women's Rowing" : [constants.MENS_ROWING, constants.WOMENS_ROWING],
           "Men's Soccer" : constants.MENS_SOCCER, "Women's Soccer" : constants.WOMENS_SOCCER,
           "Softball" : constants.SOFTBALL,
           "Men's & Women's Swimming & Diving" : [constants.MENS_SWIMMING_DIVING, constants.WOMENS_SWIMMING_DIVING],
           "Men's & Women's Tennis" : [constants.MENS_TENNIS, constants.WOMENS_TENNIS],
           "Men's & Women's Track & Field / Cross Country" : [constants.MENS_CROSS_COUNTRY, constants.WOMENS_CROSS_COUNTRY,
                                                              constants.MENS_TRACK_FIELD, constants.WOMENS_TRACK_FIELD],
           "Volleyball" : constants.WOMENS_VOLLEYBALL }


def get_table(header):
    return header.parent().next()

print ("Scraping Holy Cross")
scraper.scrape_roster_row_site("Holy Cross", sports, "h3", ["name", "title", "phone", "email"], get_table=get_table)

示例#4
0
from pyquery import PyQuery as pq
import scraper
import constants

sports = { "Cross Country" : [constants.MENS_CROSS_COUNTRY, constants.WOMENS_CROSS_COUNTRY],
           "Football" : constants.FOOTBALL,  "Men's Basketball" : constants.MENS_BASKETBALL,
           "Men's Ice Hockey" : constants.MENS_ICE_HOCKEY, "Men's Tennis" : constants.MENS_TENNIS,
           "Nordic Skiing" : [constants.MENS_NORDIC_SKIING, constants.WOMENS_NORDIC_SKIING],
           "Track & Field" : [constants.MENS_TRACK_FIELD, constants.WOMENS_TRACK_FIELD],
           "Women's Basketball" : constants.WOMENS_BASKETBALL, "Women's Soccer" : constants.WOMENS_SOCCER,
           "Women's Tennis" : constants.WOMENS_TENNIS, "Women's Volleyball" : constants.WOMENS_VOLLEYBALL }


print ("Scraping Michigan Tech")
scraper.scrape_roster_row_site("Michigan Tech", sports, "h2", ["name", "title", "phone", "email"])