def get_contents(): """ Checks for changes in ktu site and returns the new notifs """ global notifs contents = [] scraped = scrape() if scraped != []: datas = notifs for scrap in scraped: k = 0 for data in datas: # Can't do a "not in" comparison with dictionary element coz download links inside it are # unique to each request if data['title'] == scrap['title'] and data['date'] == scrap[ 'date']: k = 1 break if k == 0: relevance = relevant(scrap['content']) contents.append( dict({ 'data': scrap, 'relevance': str(relevance) })) notifs = scraped return contents else: return []
async def search(ctx, *message): query = (" ").join(message) print(message) URL = "https://www.google.com/search?q=" + query item, link = scrape(URL) await ctx.send(item.text) await ctx.send(link)
def index(): log_writer = logger.App_Logger() file_object = open("logs/imdb_scraper-{}.txt".format(datetime.now().date()), 'a+') if request.method == 'POST': log_writer.log(file_object, 'Getting the year') year = request.form['content'] year = "".join(year.split()) log_writer.log(file_object, 'received the year {}'.format(year)) try: log_writer.log(file_object, 'connecting to mongo server') dbConn = MongoClient("mongodb://localhost:27017/") # opening a connection to Mongo log_writer.log(file_object, 'connecting to db') db = dbConn['imdb_scrapper'] # connecting to the database called crawlerDB log_writer.log(file_object, 'creating/retrieving collection {}'.format(year)) collection_name = 'movies_{}'.format(year) collection = db[collection_name] movies = db[collection_name].find({}) # searching the collection with the name same as the keyword if movies.count() > 0: log_writer.log(file_object, 'showing results from db') return render_template('results.html', movies=movies) else: log_writer.log(file_object, 'callign scrape function') movies = scrapper.scrape(year, log_writer, file_object) filename = 'movies_{}.csv'.format(year) try: log_writer.log(file_object, 'creating dataframe and writing to CSV file') df = pd.DataFrame(movies) df.to_csv('./csv/{}'.format(filename)) except Exception as e: log_writer.log(file_object, "Exception occurred while creating csv file: {}".format(e)) try: files = os.listdir() for f in files: if f.endswith('.csv'): shutil.move(f, 'csv') except Exception as e: log_writer.log(file_object, "Exception occurred while moving csv file: {}".format(e)) log_writer.log(file_object, 'inserting into collection {}'.format(year)) collection.insert_many(df.to_dict('records')) return render_template('results.html', movies=movies[0:(len(movies) - 1)]) except Exception as e: log_writer.log(file_object, "Exception occurred : {}".format(e)) return 'something is wrong' else: return render_template('index.html')
def load_file(self): dir_path = os.getcwd() try: file = filedialog.askopenfilename(initialdir = dir_path ,title='Select file' , filetypes=(("PDF files",".PDF"),('All files','*.*'))) except: print('Error load file') raw_data = scrape(file) print(raw_data) self.lonseddler.append(raw_data)# Add lonseddel path to file list. self.loaded = True
def scrapeURL(): data = request.json url = data['url'] response = dict() if urlExists(url): image_urls = scrape(url) if len(image_urls) > 0: response['success'] = True response['output'] = image_urls else: response['success'] = False response['output'] = "NO_IMAGES_FOUND" else: response['success'] = False response['output'] = "INVALID_URL" return response
def fetch_notifs(message): """ view """ contents = scrape() #If dumb KTU is down as expected, fetch from previously scraped data if contents == [] or not contents: contents = notifs for i in range(10): content = contents[i] msg_content = content['date'] + '\n\n' + content[ "title"] + ':\n\n' + content["content"] for link in content["link"]: #telegram supports html like hyperlinks!! :) msg_link_text = "<a href=\"" + link["url"] + "\">" + link[ "text"] + "</a>" msg_content += "\n" + msg_link_text bot.send_message( message.chat.id, msg_content, parse_mode="html", )
def scores(): site = scrapper.pull_site() scores = scrapper.scrape(site) return jsonify({"scores": scores})
''' Main Module for Project ''' import sys import scrapper import deployment import report_builder if __name__ == "__main__": if len(sys.argv) > 1: if sys.argv[1] == '-d': deployment.deploy() scrapper = scrapper.Scrapper() report_builder = report_builder.ReportBuilder() report_builder.print_reports(scrapper.scrape())
def btc_can_rises(): bitcoin_max_market_cap = 21000000 bitcoin_supply = 18665937 global_market_cap = sum(scrape()) - bitcoin_supply total = str(global_market_cap / bitcoin_max_market_cap) print("The price of Bitcoin needs to rise " + total + " times yet")