def scrape(url: str, staleOnly: bool = False, fallback: bool = False) -> sc.ScrapeMeta: if sc._staleOnly: util.logMessage(f'skitter.scrape: HERMES_STALE only {url}') return sc.scrape(url) if staleOnly: util.logMessage(f'skitter.scrape: staleOnly {url}') for c in reversed(priv.skitterClients): ce = c.cache(url) if ce is not None: return ce raise Exception(f'skitter.scrape: unable to staleOnly scrape: {url}') for c in priv.skitterClients: try: #util.logMessage(f'skitter.scrape: calling {c.ident}.scrape({url})') r = c.scrape(url) return r except Exception as e: util.logMessage(f'skitter.scrape: {c.ident}.scrape failed: {e}') pass if fallback: return sc.scrape(url) raise Exception(f'skitter.scrape: unable to scrape: {url}')
def scrape_ec2(): ec2_file = 'AWSinstances.json' try: scrape(ec2_file) except Exception as e: print "ERROR: Unable to scrape data: %s" % e print traceback.print_exc()
def main(): """Code for scraper. Actual scraping is turned off because server doesn't like being pinged. """ # url = "https://www.scholarships.com/financial-aid/college-scholarships/scholarship-directory/academic-major" # # Use to make URL attribute of scholarship object usable # appendable_url = "https://www.scholarships.com" # # Setup output file # scan_time = date.today() # filename = 'scan_' + str(scan_time) + '.csv' # with open(filename, 'w', encoding='utf-8-sig') as f: # w = csv.DictWriter(f, ['name', 'url', 'amount', 'deadline', 'description']) # w.writeheader() # # get response # response = get_response(url) # soup = BeautifulSoup(response.content, 'html5lib') # url_table = soup.find(id="ullist") # url_list = url_table.find_all('a') # for link in url_list: # get_scholarshipscom_details(link.get('href'), appendable_url, filename) # # Wait 1 second between requests # sleep(1) print("Pushing file into the database.", flush=True) scrape(environ['MYSQL_USER'], environ['MYSQL_PASSWORD'], "db", environ['MYSQL_DB_NAME']) print("done")
def test_ScrapedUser(self): scrape.scrape(self.username) output_text = io.StringIO() sys.stdout = output_text scrape.scrape(self.username) sys.stdout = sys.__stdout__ self.assertEqual(output_text.getvalue(), 'My name is Kanish and my current city is Roorkee\n')
def export(self): urlName = self.varURLName.get() className = self.varClassName.get() csvName = self.varCSVName.get() tagType = self.varTAGType.get() scrape.WriteCSV(csvName) scrape.scrape(className, urlName, csvName, tagType)
def main(self): print(red + ''' _____ _ __ _ | __ \\ | |/ / | | | |__) | __ _____ ___ _| ' / ___| | _____ _ __ | ___/ '__/ _ \\ \\/ / | | | < / _ \\ |/ / _ \\ '__| | | | | | (_) > <| |_| | . \\ __/ < __/ | |_| |_| \\___/_/\\_\\\\__, |_|\\_\\___|_|\\_\\___|_| __/ | |___/ \n''') print(blue + 'by Nexolyte\n') m = get('Main Menu\n' +\ red + '[' + blue + '1' + red + '] - ' + white + 'Scrape\n' +\ red + '[' + blue + '2' + red + '] - ' + white + 'Check\n' +\ red + '[' + blue + 'e' + red + '] - ' + white + 'Exit\n') if m == '1': os.system('cls') scrape.scrape() elif m == '2': os.system('cls') check.check() elif m == 'e': os.system('cls') sys.exit(1) else: os.system('cls') error('Input not recognised. Please retype and try again.') self.main()
def main(self): print(red + """ dBBBBBb dBBBBBb dBBBBP`Bb .BP dBP dBP dBBBBBb dBBBP dBBBBBb dBBBBBb dBBBP dBBBBBb dB' dBP dBP.BP .BP dBP dBP BB dB' dBP dBBBP' dBBBBK dBP.BP dBBK dBP dBBBBK' dBBP dBP BB dBBBP' dBBP dBBBBK dBP dBP BB dBP.BP dB' dBP dBP BB dBP dBP BB dBP dBP dBP BB dBP dBP dB' dBBBBP dB' dBP dBP dBP dB' dBBBBP dBBBBBBB dBP dBBBBP dBP dB' v1.1""" ) print(blue + 'by Xenex\n') m = get('Main Menu\n' +\ red + '[' + blue + '1' + red + '] - ' + white + 'Scrape\n' +\ red + '[' + blue + '2' + red + '] - ' + white + 'Check\n' +\ red + '[' + blue + '3' + red + '] - ' + white + 'Exit\n') if m == '1': os.system('cls') scrape.scrape() elif m == '2': os.system('cls') check.check() elif m == 'e': os.system('cls') sys.exit(1) else: os.system('cls') error('Input not recognised. Please retype and try again.') self.main()
def main(): """Shows basic usage of the Sheets API. Prints values from a sample spreadsheet. """ # Authorization of google account creds = None if os.path.exists('token.pickle'): with open('token.pickle', 'rb') as token: creds = pickle.load(token) if not creds or not creds.valid: if creds and creds.expired and creds.refresh_token: creds.refresh(Request()) else: flow = InstalledAppFlow.from_client_secrets_file( 'credentials.json', SCOPES) creds = flow.run_local_server(port=0) with open('token.pickle', 'wb') as token: pickle.dump(creds, token) # Scraping method starts here scrape.scrape(creds)
def get_table(): username = input("Username: "******"twu_website.html") make_table(selected_term)
def build(): """Scrape AWS sources for data and build the site""" data_file = 'www/instances.json' try: scrape(data_file) except Exception, e: print "ERROR: Unable to scrape site data: %s" % e
def scrape_ec2(c): """Scrape EC2 data from AWS and save to local file""" ec2_file = "www/instances.json" try: scrape(ec2_file) except Exception as e: print("ERROR: Unable to scrape EC2 data") print(traceback.print_exc())
def scrape_ec2(): """Scrape EC2 data from AWS and save to local file""" ec2_file = 'www/instances.json' try: scrape(ec2_file) except Exception as e: print "ERROR: Unable to scrape data: %s" % e print traceback.print_exc()
def scrape_go(): print( f"---beginning scrape at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}" ) for group in groups: scrape.scrape(group) print( f"---finished scrape at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}" )
def build(): """Scrape AWS sources for data and build the site""" data_file = 'www/instances.json' try: scrape(data_file) except Exception as e: print "ERROR: Unable to scrape site data: %s" % e print traceback.print_exc() render_html()
def run(): recreate_schema() scrape.scrape() scheduler.start() try: asyncio.get_event_loop().run_forever() except (KeyboardInterrupt, SystemExit): print("Shutting down. Please wait...") scheduler.shutdown(wait=True) exit(0)
def main(): from scrape import scrape from do_etl import do_etl try: scrape() except ValueError as e: print(e) finally: do_etl(inital_load=False)
def upload_file(): file = request.files['image'] f = os.path.join(app.config['UPLOAD_FOLDER'], 'img.jpg')#file.filename) # add your custom code to check that the uploaded file is a valid image and not a malicious file (out-of-scope for this post) file.save(f) print('file uploaded successfully') text = recognize_text() print(text) print(scrape(text)) return scrape(recognize_text())[0]
def fetch(self): out = scrape.scrape({ "url": self.url, "css": 'div[data-tts="answers"]', "text": True }) return out.strip()
def fetch(self): out = scrape.scrape({ "url": self.url, "css": 'div.startupLogos a', "print_url": True, }) return out
def scraper(): mars_info = scrape.scrape() #listings = mongo.db.listings #listings.update({}, listings_result, upsert = True) return redirect("/", code=302)
def run(): data = scrape() grouped_data = group(data) analysis(grouped_data) statistics(grouped_data) # define more tasks here print("Run Completed Successfully")
def web_scrape(): db.collection.remove({}) mars_data = scrape.scrape() #print('----after getting data---') #print(mars_data) db.collection.insert_one(mars_data) return redirect("http://localhost:5000/", code=302)
def getCurrentInfo(self, fic: Fic) -> Fic: fic.url = self.baseUrl + str(fic.localId) url = fic.url.split('?')[0] + '?view_adult=true' # scrape fresh info data = scrape.scrape(url) return self.parseInfoInto(fic, data['raw'])
def hello_world(): # localhost:8000/ file_url = scrape() file = requests.get(file_url).content with open(basename('img.png'), "wb") as f: f.write(file) return send_file(f, mimetype='image/png')
def new(username, include): try: if not tweets.find_one({"username": username}): tweets.insert_one({ "username": username, "tweets": scrape(username) }) obj = tweets.find_one({"username": username}) model, rmodel = train([e['full_text'] for e in obj["tweets"]]) if include: text = generate_with(model, rmodel, include) else: text = generate(model) return json.dumps({ 'success': True, 'message': ' '.join(text), 'name': obj['tweets'][0]['user']['name'], 'avatar': obj['tweets'][0]['user']['profile_image_url_https'] }) except: return json.dumps({ 'success': False, 'message': 'Oops! An error occurred.', })
def post(self): year = int(self.request.get('year', 1990)) term = int(self.request.get('term', 92)) template = open('scrape_yearterm.html').read() try: scrape([year], [term]) self.response.write('YearTerm {:04d}-{:02d} has successfully been added to the database.<br><br>'.format(year, term)) year_value = str(year + 1 if term == 92 else year) # iterate year input_index = str([0, 92, 03, 14, 25, 39, 76].index(term)) # iterate term except Exception as e: traceback.print_exc() # only visible in terminal self.response.write('ERROR: {}<br><br>'.format(e)) year_value = str(year) # preserve year input_index = str([0, 03, 14, 25, 39, 76, 92].index(term)) # preserve term content = template.replace('{YEAR_VALUE}', year_value).replace('{INPUT_INDEX}', input_index) self.response.write(content)
def scraper(): # Run the scrape function we made to pull all the data from the sources mars_data = scrape.scrape() # Update the collection with new data db.db.collection.update({}, mars_data, upsert=True) return redirect("/")
def getCurrentInfo(self, fic: Fic) -> Fic: url = self.constructUrl(fic.localId) # scrape fresh info data = scrape.scrape(url) edumpContent('<!-- {} -->\n{}'.format(url, data['raw']), 'sugarquill_ec') return self.parseInfoInto(fic, data['raw'])
def get_scrape(): mars_data = knife.scrape() mars_db = mongo.db.mars mars_db.update({}, mars_data, upsert=True) return redirect("http://localhost:5000/", code=302)
def prediction(stock): if request.method == 'POST': # form = request.form stock = request.form['ticker'] req = request print(req.form) ticker = request.form['ticker'] ma1 = int(request.form['ma1']) ma2 = int(request.form['ma2']) from_date = request.form['from_date'] to_date = request.form['to_date'] crossover = '' # Parameters can now be passed through for calculations results = forecast(ma1,ma2,ticker,from_date,to_date) data = scrape(ticker) print(data.keys()) cap = data['cap'] price = data['price'] day = data['day'] week = data['week'] month = data['month'] quarter = data['quarter'] headlines = data['headlines'] trend = results['trend'] value=Markup(results['html']) # img = f'predict.png' return render_template("dynamicForecast.html",from_date=from_date,to_date=to_date,ma1=ma1,ma2=ma2,ticker=ticker,crossover=crossover,trend=trend,cap=cap,price=price,day=day,week=week,month=month,quarter=quarter,value=value,headlines=headlines) # return render_template('dynamicForecast.html',stock=stock) else: return render_template('dynamicForecast.html')
def get(self): titleLinkAssoc = scrape.scrape("http://www.metafilter.com/", "div.posttitle > a") formattedLinks = [ "http://www.metafilter.com" + v for k, v in titleLinkAssoc.items() ] # metafilter hosts their own content so you need to add http://www.metafilter to each link titles = [k for k, v in titleLinkAssoc.items()] formattedLinkAssoc = dict(zip(titles, formattedLinks)) self.render("scraped.html", titleLinks=formattedLinkAssoc, site="MetaFilter")
def analyze(): url=request.form['url'] text = scrape.scrape(url) # get end of url for naming pics i = url.rfind('/') + 1 url = url[i:] polarity_url = analysis.get_sentiment_analysis(text, url) wordmap_url = analysis.get_wordmap(text, url) return render_template("analysis.html", polarity=polarity_url, wordmap=wordmap_url)
def crawl(review_store, page=0): page = urllib2.urlopen("http://pitchfork.com/reviews/albums/" + ("" if page==0 else (str(page) + "/"))) soup = BeautifulSoup(page) main_grid = soup.find("ul", {"class" : "object-grid"}) for a_child in main_grid.findAll("a"): shelve_key = a_child['href'].encode('ASCII', 'ignore').split('/')[-2] print(shelve_key) if not review_store.has_key(shelve_key): print("key not in store") review_store[shelve_key] = scrape("http://pitchfork.com"+a_child['href'])
def scrape_controller(domain): base_url = "http://www.hm.com/us/products/search?" #''' #gets from bucket the input file and the last position of scrapping inp_file,pos = get_inp_line() mark =0 #move reader to the given position while mark <pos: mark+=1 #fetch next line from reader inp_line = get_next_line(mark) #''' #inp_line = "categories=men&term=gingham shirts|type=Gingham,source=H&M" while inp_line !="EOF" : search_q , db_entry = inp_line.split('|') #to sort by new arrivals if not check_for_domain("Orders=newfrom_desc",search_q): search_q = search_q+"&"+"Orders=newfrom_desc" #in case none of the domain if not check_for_domain("categories=men",search_q) and not check_for_domain("categories=ladies",search_q) and not check_for_domain("categories=kids",search_q) and not heck_for_domain("categories=sale",search_q) : search_q = domain+"&"+search_q #incase given domain elif check_for_domain(domain,search_q) : print" --" #incase of no doamin else : continue q_url = base_url + search_q print q_url try: my_items= scrape(q_url) #save_to_db(my_items) #for item in my_items: #print item except : print ("unable to scrape from "+q_url) #get the next imput line from the input file inp_line=get_next_line(mark+1) #update the position of reader in SQS update_reader_pos(mark+1) mark=mark+1
def get_communities(url, counties): """Returns a dict {id: name}""" communities = {} # could also be done with incomprehensible dict comprehension for county in counties: soup = scrape(url, county) pat = "{}\d+".format(county) options = filter_tags(soup, 'option', pat) communities.update(options) return communities
def seatcheck(medium,username): seats = scrape(db.getTemp(username)) if seats > 0: messenger.message(medium,username,"Good news! Your class has " + str(seats) + " open seats, so you can go sign up now! If you have the ID of another course that's closed that you'd like to track, let me know!") db.changeState(username,1) elif seats > -1000: messenger.message(medium,username,"You're all set. I'll monitor your course and message you here if a seat in your class opens up.") messenger.message(medium,username,"Anything else I can help you with? You can say 'commands' for a list of commands I understand.") temp = db.getTemp(username) db.addJob(username,temp) db.changeState(username,0) else: messenger.message(medium,username,"Couldn't figure out how many seats open. Is classfind down?") db.changeState(username,2)
def add_package(package): cur = get_db().cursor() cur.execute("insert into game (package) values (?)", (package,)) get_db().commit() id = cur.lastrowid res = scrape(id, package) name = res[0] desc = res[1] rating = float(res[2]) activity = res[4] category = res[5] cur.execute("update game set name='%s',description='%s',rating=%f,activity='%s',category='%s' where id=%d" % (name, desc, rating, activity, category, int(id))) get_db().commit() cur.connection.close() return json.dumps(id)
def _scrapes(self, include_sub, include_dir, expose=True, alert_when_done=True): try: count = 0 for x in scrape.scrape(self.settings, include_sub=include_sub, include_dir=include_dir): if isinstance(x, int): count += x continue if expose: reveal(x) except requests.ConnectionError: tkMessageBox.askokcancel("Connection Error", "Could not connect to Reddit. Check your internet settings, " "and make sure Reddit isn't down.") else: tkMessageBox.askokcancel("", "Scrape Complete! %d files downloaded." % count)
def post(self): blob = self.get_argument("blob", None) url = self.get_argument("url", None) self.write("<style>i{color:#aaa;}</style>") if blob: for line in blob.split("\n"): line = line.strip() if len(line) > 5: self.write(line+"<br>") self.write("<i>%s, %s</i><br><br>" % classifier.classify(line)) elif url: page_data = scrape(url) self.write("<p><b>source:</b> "+page_data["source"]+"</p>") self.write("<p><b>title:</b> "+page_data["title"]+"</p>") self.write("<p><b>url:</b> "+page_data["url"]+"</p>") self.write("<p><b>description:</b> "+page_data["description"]+"</p>") self.write("<hr>") for line in page_data["body"]: if len(line) > 5: # TODO drop any line < 2 words; NER for time score = classifier.classify(line) if score[1] > 0.5: self.write(line+"<br>") self.write("<i>%s, %s</i><br><br>" % score)
def autograder(url): '''Accepts the URL for a recipe, and returns a dictionary of the parsed results in the correct format. See project sheet for details on correct format.''' ingredient_strings, step_strings = scrape.scrape(url) # The ingredient template is # name, quantity, measurement, descriptor, preparation, pre-preparation fin_ingredients = [] for ingredient in ingredient_strings: name = unicode(parser.recognize_ingredient(ingredient)) number = parser.recognize_number(ingredient) unit = parser.recognize_unit(ingredient) descriptors = [unicode(i) for i in parser.recognize_descriptors(ingredient)] fin_ingredients.append({"name": name, "quantity": number, "measurement": [unicode(unit)], "descriptor": descriptors}) primary_method = None methods = set() for method in COOKING_METHODS.keys()[::-1]: for variation in COOKING_METHODS[method]: for step in step_strings: if variation in step: methods.add(unicode(method)) primary_method = unicode(method) cookware_set = set() for cookware in COOKWARE: for variation in COOKWARE[cookware]: for step in step_strings: if variation in step: cookware_set.add(unicode(cookware)) return {"ingredients": list(fin_ingredients), "cooking methods": list(methods), "primary cooking method": primary_method, "cooking tools": list(cookware_set)}
__author__ = 'rylan' from scrape import scrape from compare import compare from printToHTML import printToHTML # sourceOne = 'https://news.google.com/' # sourceTwo = 'https://news.yahoo.com/' sourceOne = raw_input('Please enter first newsource: ') sourceTwo = raw_input('Please enter second newsource: ') articleTitles = scrape(sourceOne, sourceTwo) pairings = compare(articleTitles[0], articleTitles[1]) printToHTML(pairings)
def get_counties(url): """Returns a dict {id: name}""" soup = scrape(url) pat = '^\d{2}$' return filter_tags(soup, 'option', pat)
from scrape import getinfo,scrape,statusUpdate from dbaccess import AuthDatabase from interface import Interface import datetime import json config = json.loads(open("/root/SBUCourseMonitor/config.json").read()) messenger = Interface(config) db = AuthDatabase(config["database_addr"]) for job in db.getJobs(): seats = scrape(job[2]) if seats > 0: user = db.getUserByID(job[1])[0] info = getinfo(str(job[2])) messenger.message(user[1],user[2],"Knock, knock! You course " + info + " now has " + str(seats) + " open seats. Go sign up!") print datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S') + "\t" + user[2] + "\t" + info db.deleteJob(job[0])
def get(self): titleLinkAssoc = scrape.scrape("http://www.digg.com/", "h2.story-title > a") self.render("scraped.html", titleLinks=titleLinkAssoc, site="Digg")
def get(self): titleLinkAssoc = scrape.scrape("http://www.slashdot.org/", "h2 > span > a") self.render("scraped.html", titleLinks=titleLinkAssoc, site="Slash Dot")
import proxy from scrape import scrape from bs4 import BeautifulSoup import urllib.request as req url = "http://intranet.iitg.ernet.in/" scrape(url) print("done")
def get(self): titleLinkAssoc = scrape.scrape("http://www.techcrunch.com/", "h2.headline > a") self.render("scraped.html", titleLinks=titleLinkAssoc, site="Tech Crunch")
def do_scrape(): return Response(i_league_scraper.scrape(), mimetype="text/plain")
def build(): """Scrape AWS sources for data and build the site""" data_file = "www/instances.json" scrape(data_file) render(data_file, "in/index.html.mako", "www/index.html")
def parse_results(message, db_collection): """ Function to parse the links drawn from an RSS feed. Parameters ---------- message: pattern.web.Results. Object containing data on the parsed RSS feed. Each item represents a unique entry in the RSS feed and contains relevant information such as the URL and title of the story. db_collection: pymongo Collection. Collection within MongoDB that in which results are stored. """ global proxies, proxy_user, proxy_pass if proxies: proxy_choice = {'http': random.choice(proxies)} proxy_login = requests.auth.HTTPProxyAuth(proxy_user, proxy_pass) else: proxy_choice = '' proxy_login = {} lang = message.get('lang') story_url = message.get('url') website = message.get('website') title = message.get('title') date = message.get('date') if lang == 'english': goose_extractor = Goose({'use_meta_language': False, 'target_language': 'en', 'enable_image_fetching': False}) elif lang == 'arabic': from goose.text import StopWordsArabic goose_extractor = Goose({'stopwords_class': StopWordsArabic, 'enable_image_fetching': False}) else: print(lang) if 'bnn_' in website: # story_url gets clobbered here because it's being replaced by # the URL extracted from the bnn content. #TODO: Deprecate this for now since using GhostJS is weird. logging.info('A BNN story.') # text, meta, story_url = scrape.bnn_scrape(story_url, goose_extractor) text = '' pass else: text, meta = scrape.scrape(story_url, goose_extractor, proxy_choice, proxy_login) text = text.encode('utf-8') if text: cleaned_text = _clean_text(text, website) # TODO: Figure out where the title, URL, and date should come from # TODO: Might want to pull title straight from the story since the RSS # feed is borked sometimes. entry_id = connectors.add_entry(db_collection, cleaned_text, title, story_url, date, website, lang) if entry_id: try: logging.info('Added entry from {} with id {}. {}.'.format(story_url, entry_id, datetime.datetime.now())) except UnicodeDecodeError: logging.info('Added entry from {}. Unicode error for id'.format(story_url)) else: logging.warning('No text from {}'.format(story_url))
import pickle import scrape scrape.scrape() dictPickle = open('barf','rb') outputDict = pickle.load(dictPickle) dictPickle.close() ##dictFile = open('barf.txt','w') ##dictFile.write(outputDict) ##dictFile.close()
def build(): """Scrape AWS sources for data and build the site""" data_file = 'www/instances.json' scrape(data_file) render(data_file, 'in/index.html.mako', 'www/index.html')
ATTR_DICT = { 'title' : '../csvs/exid_title.csv', 'date' : '../csvs/exid_date.csv' , 'url' : '../csvs/exid_url.csv' } def create_attr_csvs(index): ''' creates csvs for title, date, url input: index: dictionary of exhibit information output: writes csv files according to ATTR_DICT ''' for attr in ATTR_DICT: with open(ATTR_DICT[attr],'w') as f: line = 'ex_id|' + attr + '\n' f.write(line) for museum_id in index: for ex_id in index[museum_id]: line = '{}|{}\n'.format(str(ex_id), \ index[museum_id][ex_id][attr].encode('utf-8')) f.write(line) if __name__ == "__main__": index = scrape.scrape() if os.path.isfile('../pickled_search_object'): os.remove('../pickled_search_object') wd = build_word_dict(index) create_wordct_csv(wd) create_attr_csvs(index)
def main(original_recipe): # urls = ['http://allrecipes.com/recipe/easy-meatloaf/', # 'http://allrecipes.com/Recipe/Easy-Garlic-Broiled-Chicken/', # 'http://allrecipes.com/Recipe/Baked-Lemon-Chicken-with-Mushroom-Sauce/', # 'http://allrecipes.com/Recipe/Meatball-Nirvana/'] if original_recipe.url: scraped_ing, scraped_steps = scrape.scrape(original_recipe.url) # parse ingredient info, create objects ingredients = [] for ingredient in scraped_ing: new_ing = parse_ingredient(ingredient) cursor = db.ingredients.find({"name":new_ing.name}) i = 0 for document in cursor: i += 1 if i == 0: # add to DB db.ingredients.insert({"name":new_ing.name, "category":"????", "flag":"none"}) ingredients.append(new_ing) steps = [] for step in scraped_steps: #SPLIT STEP CONTENTS BEFORE PARSING if not step: continue # HANDLE EMPTY # for new_parser # parsed_steps = parse_step(step) # for p in parsed_steps: # steps.append(p) #for new_parser step_sent = nltk.sent_tokenize(step) for sent in step_sent: if contains_procedure(sent) == 1: new_proc = parse_step(sent) steps.append(new_proc) elif contains_procedure(sent) > 1: actions = double_action(sent) if actions: for a in actions: new_proc = parse_step(a) steps.append(new_proc) if contains_procedure(sent) == 2: break clause = sent.split(';') for c in clause: if contains_procedure(c) == 1: new_proc = parse_step(c) steps.append(new_proc) elif contains_procedure(c) > 1: more_clause = c.split(',') for more_c in more_clause: if contains_procedure(more_c) == 1: new_proc = parse_step(more_c) steps.append(new_proc) elif contains_procedure(more_c) > 1: actions = double_action(more_c) if actions: for a in actions: new_proc = parse_step(a) steps.append(new_proc) if contains_procedure(more_c) == 2: break else: new_proc = parse_step(more_c) steps.append(new_proc) original_recipe.in_list = ingredients original_recipe.pr_list = steps #call transform etc reconstruction.reconstruct(original_recipe) r = original_recipe try: transformed_recipe = transform.transform(r) except RuntimeError: return [original_recipe, Recipe()] #if transformed_recipe == original_recipe: # print "There are no changes to be made" #else: reconstruction.reconstruct(transformed_recipe) return [original_recipe, transformed_recipe]
def parse(message): if(message.content.startswith('!blood')): stamp = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') print(message.author.name) print(stamp) print(type(message.author.name)) print(type(stamp)) '''print('sending hello to ' + message.author.name + ' ' + stamp)''' return ('Is that blood I smell? ' + stamp) elif(message.content.startswith('!commands')): stamp = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') print('sending command list ' + stamp) return (commands) elif(message.content.startswith('!changelog')): stamp = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') print('sending changelog ' + stamp) return (changelog) elif(message.content.startswith('!source')): stamp = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') print('sending source ' + stamp) return (source) if(message.content.startswith('!beg')): stamp = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') print('Frozen Pizza ' + message.author.name + ' ' + stamp) return ('Can I have that Frozen Pizza? ' + stamp) if(message.content.startswith('!goodboy')): stamp = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') print('Treat ' + message.author.name + ' ' + stamp) return ('Can I have my Treat now? ' + stamp) elif(message.content.startswith('!elwiki')): stamp = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') searchterm = message.content[7:].strip() if (len(searchterm) == 0): print('no argument specified') return ('Tell me what to look for, and I shall deliver.') if (searchterm.lower().find('seris') != -1): print('not looking for seris') return ('Some old mistakes should not be touched upon. Mistakes are often a scab to an old, deep wound.') if (badword.has_profanity(searchterm)): return ('You should reconsider your words if you value your life, ' + message.author.mention()) print('looking up ' + searchterm) r = requests.get('http://elwiki.net/wiki/index.php?search=' + searchterm, allow_redirects=False) print(r.status_code) stamp = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') if (r.status_code == 302): answer = r.headers['Location'] print(answer + ' sent on ' + stamp) return ('Page for ' + searchterm + ' : ' + answer) if (r.status_code == 200): print('scraping') answer = scrape.scrape(r.text) if(answer is None): return 'I could not find a match for that.' else: return ('First match for ' + searchterm + ' : ' + answer) elif(message.content.startswith('!babel')): stamp = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') print('looking up babel on ' + stamp) babelfeed = feedparser.parse('http://elwiki.net/babel/?feed=rss2') answer = babelfeed.entries[0]['title'] + ' ' + babelfeed.entries[0]['link'] print(answer) return ('Last post on Babel - ' + answer) elif(message.content.startswith('!na')): stamp = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') print('looking up na on ' + stamp) nafeed = feedparser.parse('http://en.elswordonline.com/feed/') answer = nafeed.entries[0]['title'] + ' ' + nafeed.entries[0]['link'] print(answer) return ('Last NA update - ' + answer) elif(message.content.startswith('!uk')): stamp = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') print('looking up uk on ' + stamp) ukfeed = feedparser.parse('http://board.en.elsword.gameforge.com/index.php?page=ThreadsFeed&format=rss2&boardID=8') answer = ukfeed.entries[0]['title'] + ' ' + ukfeed.entries[0]['link'] print(answer) return ('Last UK update - ' + answer) elif(message.content.startswith('!void')): stamp = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') print('looking up void on ' + stamp) return scrape.scrape_void() elif(message.content.startswith('!events')): stamp = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') print('looking up void events and contests on ' + stamp) return scrape.vevent() elif(message.content.startswith('!promo')): stamp = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') print('looking up void promotions on ' + stamp) return scrape.vpromotions() elif(message.content.startswith('!general')): stamp = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') print('looking up void void general topics on ' + stamp) return scrape.vgeneral() elif(message.content.startswith('!suggest')): stamp = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') print('looking up void void suggestions on ' + stamp) return scrape.vsuggestions() elif(message.content.startswith('!intro')): stamp = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') print('looking up void Intro/Farewells on ' + stamp) return scrape.vintro() elif(message.content.startswith('!guild')): stamp = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') print('looking up void guild topics on ' + stamp) return scrape.vguilds() elif(message.content.startswith('!shots')): stamp = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') print('sending shots fired ' + stamp) return ('Hmm. It appears as if shots have been fired.') elif(message.content.startswith('!ibset')): searchterm = message.content[6:].strip() return elsgear.lookup(searchterm) elif(message.content.startswith('!google')): searchterm = message.content[7:].strip() if(len(searchterm) == 0): return ('Tell me what to look for, and I shall deliver.') if (badword.has_profanity(searchterm)): return ('You should reconsider your words if you value your life, ' + message.author.mention()) return ('https://www.google.com/search?q=' + urllib.parse.quote_plus(searchterm)) elif(message.content.startswith('!gimg')): searchterm = message.content[5:].strip() if(len(searchterm) == 0): return ('Tell me what to look for, and I shall deliver.') if (badword.has_profanity(searchterm)): return ('You should reconsider your words if you value your life, ' + message.author.mention()) return ('https://www.google.com/search?q=' + urllib.parse.quote_plus(searchterm) + '&tbm=isch') elif(message.content.startswith('!youtube')): searchterm = message.content[8:].strip() if(len(searchterm) == 0): return ('Tell me what to look for, and I shall deliver.') if (badword.has_profanity(searchterm)): return ('You should reconsider your words if you value your life, ' + message.author.mention()) return ('https://www.youtube.com/results?search_query=' + urllib.parse.quote_plus(searchterm)) # elif(message.content.startswith('!hall')): # print('delivering event message (10-22-15)') # return(halloween) elif(message.content.startswith('!lenify')): msg = message.content[7:].strip() return(msg) elif(message.content.startswith('!roast')): print('delivering roast') response = 'http://i.imgur.com/rSMtLIM.gif' for mention in message.mentions: print('mentioning ' + mention.name) response += (' ' + mention.mention()) return response elif(message.content.startswith('!salt')): print('delivering salt') response = '' for mention in message.mentions: print('mentioning ' + mention.name) response += (' ' + mention.mention()) return response + '\n\n' + salt elif(message.content.startswith('!lyyin')): response = '' for mention in message.mentions: print('mentioning ' + mention.name) response += (mention.mention() + ' ') response += lyying return response else: return None