def run(): courses = db["courses"] for course in courses.find({"institution": "N21"}): try: print green(">> %s" % course["name"]) url = "http://www.ncl.ac.uk/undergraduate/degrees/%s/courseoverview" % course["code"].lower() print url soup = common.fetch_page(url) details = soup.find("div", id="contentArea") for image in details.find_all("img"): image.decompose() for p in details.find_all("p"): if p.string == "" or p.string == None: p.decompose() details = details.find_all(["p", "h4"], recursive=False) d = "" for di in details: d += str(di) course["details"] = d course["url"] = url courses.save(course) except Exception as ex: print ex
def run(): unis = db['university'] for (uni, url) in mapping.items(): print green(">> %s" % uni) soup = common.fetch_page(url) times_rank = soup.find("div", **{"class":"times-rank" }).find("strong").string t_extra = soup.find("ul", **{"class":"top-crs-list"}) times = { "rank" : times_rank, "satisfaction" : t_extra.find_all("li")[0].find("span").string, "job_percent" : t_extra.find_all("li")[1].find("span").string, "dropout" : t_extra.find_all("li")[2].find("span").string } r = soup.find("div", **{"class":"rating"}) rating = { "overall" : dig_rating(r), "city_life" : get_rating(" City Life ", soup), "facilities" : get_rating(" Uni Facilities ", soup), "union" : get_rating(" Student Union ", soup), "societies" : get_rating(" Clubs & Societies ", soup), "course_and_lecturers" : get_rating(" Course & Lecturers ", soup) } db_uni = unis.find_one({"code" : uni}) db_uni['ranking']['times'] = times db_uni['ranking']['whatuni'] = rating unis.save(db_uni)
def number_of_gears(self): time.sleep(30) app_url = OSConf.get_app_url(self.app_name) gears = list() suffix = { "php" : ".php", "nodejs" : '.js', "ruby" : ".rb", "ruby-1.9" : ".rb", "rack" : ".rb", "jbossas" : ".jsp", "jbosseap" : ".jsp", "perl" : ".pl", "python" : ".py", "wsgi" : ".py", "python-3.3" : ".py", "python-2.7" : ".py", } # Checking the output of gear dns script more times for i in range(1, 11): gear = common.fetch_page(app_url + "/gear" + suffix[self.test_variant]) #let's check the format if self.config.options.run_mode == 'OnPremise': re_str="example.com" else: re_str="rhcloud.com" if re.search(r".*%s$"%(re_str), gear): if gear not in gears: self.info("GEAR: [%s]"%gear) gears.append(gear) return len(gears)
def check_variables(self, *vars_to_check): app_url = OSConf.get_app_url(self.app_name) output = common.fetch_page(app_url+"/checker.php") for arr in vars_to_check: for var in arr: obj = re.search(r"%s="%var, output) if not obj: self.error("Unable to find %s in environemnt"%var) return False return True
def number_of_gears(self, app_name): app_url = OSConf.get_app_url(app_name) gears = list() # Checking the output of gear dns script more times for i in range(1, 11): gear = common.fetch_page(str(app_url) + "/gear.php") if gear not in gears: gears.append(gear) return len(gears)
def run(): baseurl = "http://www.gaybydegree.org.uk/index.php?dir=university&task=university" print green("> GayByDegree.co.uk scrapping starts now") checklist_str = { "Policy that protects LGB students from bullying" : "policy", "Compulsory Staff Training on LGB issues" : "training", "Society for LGB students" : "society", "Info for students on LGB issues" : "info", "Stonewall Diversity Champion" : "diversity", "Events for LGB students" : "events", "Explicit welfare support for LGB students" : "welfare", "Consultation with LGB students" : "consulation", "Specific career advice for LGB students" : "career" } soup = common.fetch_page(baseurl, "html5lib") # awkward shit = ['university', 'of', 'the'] for uni in common.universites.values(): print green(">> %s..." % uni), # Sanatize into what the website uses uni = ' '.join( filter( lambda x: x.lower() not in shit, uni.split(' ') ) ) # Now go for it! l = soup.find("a", text=re.compile(uni) ) link = htmlentities.decode(l['href']) page_soup = common.fetch_page("http://www.gaybydegree.org.uk/%s" % link, "html5lib") print green(bold(" got information")) checklist = {} for (txt, db_term) in checklist_str.items(): p = page_soup.find(text=txt) i = p.parent.find("img") has = "greentick" in i['src'] if has: print green(">>> Has: %s" % txt) else: print red(">>> Does not have: %s" % txt) checklist[db_term] = has print checklist
def check_app(self, app_name, *cart_list): app_url = OSConf.get_app_url(app_name) common.grep_web_page(app_url + '/env', "OPENSHIFT_APP_DNS") content = common.fetch_page(app_url +'/env') expected_list = self.get_env_list(self.cart,*cart_list) print "Expected env var list: %s" % (','.join(expected_list)) missing_list = self.check_env_var(content, expected_list) flag = True if missing_list != []: print "The following env vars are missing:" print ', '.join(missing_list) flag = False return flag
def run(): courseDB = db["courses"] print green("scrapping...") for course in courseDB.find({"institution": "B32"}): try: print green(">> %s" % course["name"]) if not "details" in course: url = ( "http://www.birmingham.ac.uk/students/courses/undergraduate/search.aspx?CourseListTextQuery=%s" % course["code"] ) soup = common.fetch_page(url) url = ( "http://www.birmingham.ac.uk%s" % soup.find("table", **{"class": "sys_uob-listing"}).find("a")["href"] ) soup = common.fetch_page(url) details = soup.find("div", id="CourseDetailsTab") details = details.find("h2", id="ProgrammeOverview").next_sibling for a in details.find_all("a"): a["href"] = "http://www.birmingham.ac.uk%s" % a["href"] if a.get("onclick") != None: a["onclick"] = None if a.get("onkeypress") != None: a["onkeypress"] = None for image in details.find_all("img"): image.decompose() course["details"] = str(details) course["url"] = url courseDB.save(course) except Exception as e: print e
def run(): unis = db['university'] baseurl = "http://www.thecompleteuniversityguide.co.uk/league-tables/rankings?v=wide&o=wide" soup = common.fetch_page(baseurl) for uni in soup.find(**{"class":"leagueTable"}).find("tbody").find_all("tr"): tds = uni.find_all("td") name = tds[3].find("a").text rank2013 = tds[2].text.strip() finish = tds[-2].text.strip().replace("%", "") print yellow(">> %s is #%s and have a %s percentage completion rate" % (name, rank2013, finish) ) if name in mapping: print mapping[name] uni = unis.find_one({"code" : mapping[name]}) print uni uni['ranking']['cig'] = { "place" : rank2013, "finish" : finish } unis.save(uni)
def test_method(self): # # Step 1 # self.info("---------------------------------") self.info("1. Create a JBoss application") self.info("---------------------------------") ret_code = common.create_app(self.app_name, common.app_types["jbossas"], clone_repo = True) self.assert_equal(ret_code, 0, "Application must be created successfully") # # Step 2 # self.info("---------------------------------") self.info("2. Deploy testing application") self.info("---------------------------------") ret_code = self.deploy_jgroups_testing_application("./" + self.app_name) self.assert_equal(ret_code, 0, "The application must be deployed successfully") # Waiting for the application to stand up sleep(30) # # Step 3 # self.info("---------------------------------") self.info("3. Verify JGroups version number") self.info("---------------------------------") user = OSConf.OSConf() user.load_conf() app_cache = OSConf.get_apps(user) app_url = app_cache[self.app_name]['url'] self.assert_true( int(common.fetch_page(app_url + "/jgroups.jsp")) > 6148, "JGroups version must be higher than 3.0.4" ) self.passed(self.summary)
def number_of_gears(self): app_url = OSConf.get_app_url(self.app_name) gears = list() # Checking the output of gear dns script more times for i in range(1, 20): if self.test_variant == 'php': gear = common.fetch_page(str(app_url) + "/gear.php") if self.test_variant == 'perl': gear = common.fetch_page(str(app_url) + "/gear.pl") if self.test_variant in ('ruby', 'ruby-1.9'): gear = common.fetch_page(str(app_url) + "/gear.rb") if self.test_variant == 'python': gear = common.fetch_page(str(app_url) + "/gear.py") if self.test_variant in ('jbosseap', 'jbossas', 'jbossews', 'jbossews2'): if i==1: time.sleep(60) gear = common.fetch_page(str(app_url) + "/gear.jsp") if self.test_variant == 'nodejs': gear = common.fetch_page(str(app_url) + "/gear.js") if gear not in gears: gears.append(gear) print "GEARS", gears return len(gears)
def fill_mysql(self): time.sleep(20) app_url = OSConf.get_app_url(self.app_name) common.fetch_page("%s/data1.js"% app_url) common.fetch_page("%s/data1.js"% app_url) return 0