示例#1
0
文件: reg.py 项目: mor1/uon
def scrape_register(cookies, module, year_id):
    page, _ = scraper.fetch(
        "%s?%s" % (SEARCH_URL, urllib.urlencode({ 'form_id': 3,
                                                  'exclude': '',
                                                  'year_id': year_id,
                                                  'mnem': module,
                                                  })),
        headers={'Cookie': cookies,}
        )

    doc = scraper.parse(page)
##     print page
    title = doc.find(scraper.path("h4", "a")).text

    for table in doc.findall(scraper.path("table")):
        if 'bordercolor' in table.keys():
            headings = [ t.text for t in table.findall(scraper.path("th", "font", "b")) ]
            if headings != [ 'Name', 'Category', 'Course', 'Misc' ]: BARF

##             for row in table.findall(scraper.path("tr"))[1:]:
##                 for c in row.findall(scraper.path("td", "font")):
##                     print c, c.text, c.tail

            students = [
                dict(zip(headings[:-1],
                         (c.text.strip()
                          for c in row.findall(scraper.path("td", "font"))
                          if c.text)))
                for row in table.findall(scraper.path("tr"))[1:]
                ]

            return {
                'title': title,
                'students': students,
                }
示例#2
0
def main():
    parser = argparse.ArgumentParser(
        description="Use frequencies to assign sentiment score.")
    parser.add_argument("stock_sym", help="stock symbol of company")
    parser.add_argument(
        "filename", help="name of JSON file with links to financial articles")
    args = parser.parse_args()
    return parse(args.filename)
示例#3
0
文件: main.py 项目: thinkski/marcus
    def get(self):
        from google.appengine.api import urlfetch
        import scraper

        url = "http://www.marctracker.com/PublicView/status.jsp"
        result = urlfetch.fetch(url)
        if result.status_code == 200:
            response = scraper.parse(result.content)

        self.response.write(response)
示例#4
0
    def test_parser(self):
        result = scraper.parse(self.content)

        self.assertIsInstance(result, list, "Parse result not list."
                                            "Must be a list of tuples."
                                            "Check it !")
        self.assertIsInstance(result[0], tuple, "Parse result list is not a tuple."
                                                "Must be a tuple 9 elements."
                                                "Check it !")
        self.assertEqual(len(result[0]), 9, "Tuple size not 9."
                                            "Must be a 9 elements."
                                            "Check it !")
示例#5
0
def scrape():
    errors = []
    results = ""
    if request.method == "POST":
        try:
            target_url = request.form['urlname']
            results = parse(target_url)

        except:
            errors.append("unable to get url...")

    return render_template('/anotherpage.html', errors=errors, results=results)
示例#6
0
def main():

    song = input("What's the song you want to visualize? ")
    artist = input("And who's it by? ")

    lyrics = scraper.parse(song, artist)
    mc = markov.build_mkch(lyrics)
    json_generator.build_json(mc)

    # the list generated by the following print function is a text-based visualization of
    # the markov chain (adjacency list)

    # each node (unique word) is listed with all its edges (words it links to), along with `the 'probability'
    # that a word in the edge-list follows the node
    markov.print_mkch(mc)
示例#7
0
def process(request, keyword, city):
  scraped_data = scraper.parse(keyword, city)
  # location = scraper.getLocation()
  #print location
  if scraped_data is None:
    #error = "Your search for %s, in %s does not match any jobs"%(keyword,city)
    salary_dict = []
    rating_dict = []
    return salary_dict, rating_dict
  salary_dict = []
  rating_dict = []
  #for data in scraped_data:
  if scraped_data:
    for d in scraped_data:
      if len(d['Rating'])>0:
        rating_dict.append({'Company': d['Company'], 'Rating': d['Rating'],'Url': d['Url'], 'Location': d['Location'], 'Salary': d['Salary'], 'Name': d['Name']})
      if len(d['Salary'])>0:
        salary_dict.append({'Company': d['Company'], 'Rating': d['Rating'], 'Int_Salary': int(d['Salary'].split('-')[1][1:-1]),'Salary': d['Salary'],'Url': d['Url'], 'Location': d['Location'], 'Name': d['Name']})
    rating_dict = sorted(rating_dict, key=itemgetter('Rating'), reverse=True)
    salary_dict = sorted(salary_dict, key=itemgetter('Int_Salary'), reverse=True)
    return salary_dict, rating_dict
  else:
    print "Your search for %s, in %s does not match any jobs"%(keyword,city)
示例#8
0
文件: tt.py 项目: mor1/uon
    if not (dump_ascii or dump_json): dump_ascii = True
    if "".join(map(lambda s:s.lower(), args)) in Courses:
        courses = "%0D%0A".join(map(urllib.quote_plus, Courses[args[0]]))
    elif specify_courses:
        courses = "%0D%0A".join(map(urllib.quote_plus, args))

    if courses:
        url = "%s;%s" % (TT_URL, COURSES_URL % { "courses": courses, })
    else:
        modules = "%0D%0A".join(args)
        url = "%s;%s" % (TT_URL, MODULES_URL % { "modules": modules, })

    if not (courses or modules): die_with_usage("", 1)
    
    
    modules = scrape_timetable(scraper.parse(scraper.fetch(url)[0]))
    if module_detail:
        for m in modules:
            data = { 'year_id': '000110',
                     'mnem': m['code'],
                     }
            page, hdrs = scraper.fetch(MODULE_DETAIL_URL, data)
            m['detail'] = scrape_module_details(scraper.parse(page))

    ## dump scraped data
    if dump_json: print json.dumps(modules)
    elif dump_ascii:
        for module in modules:
            print "\x1b[0;1m%s\x1b[0m" % module['code'], "--", module['title']
            for (act, data) in sorted(module['acts'].items()):
                print "\t%-13s" % (act,), \