Exemplo n.º 1
0
def get_contents():
    """
        Checks for changes in ktu site and returns the new notifs
    """
    global notifs
    contents = []
    scraped = scrape()
    if scraped != []:
        datas = notifs
        for scrap in scraped:
            k = 0
            for data in datas:
                # Can't do a "not in" comparison with dictionary element coz download links inside it are
                # unique to each request
                if data['title'] == scrap['title'] and data['date'] == scrap[
                        'date']:
                    k = 1
                    break

            if k == 0:
                relevance = relevant(scrap['content'])
                contents.append(
                    dict({
                        'data': scrap,
                        'relevance': str(relevance)
                    }))

        notifs = scraped
        return contents
    else:
        return []
Exemplo n.º 2
0
async def search(ctx, *message):
    query = (" ").join(message)
    print(message)
    URL = "https://www.google.com/search?q=" + query
    item, link = scrape(URL)
    await ctx.send(item.text)
    await ctx.send(link)
Exemplo n.º 3
0
def index():
    log_writer = logger.App_Logger()
    file_object = open("logs/imdb_scraper-{}.txt".format(datetime.now().date()), 'a+')
    if request.method == 'POST':
        log_writer.log(file_object, 'Getting the year')
        year = request.form['content']
        year = "".join(year.split())
        log_writer.log(file_object, 'received the year {}'.format(year))

        try:
            log_writer.log(file_object, 'connecting to mongo server')
            dbConn = MongoClient("mongodb://localhost:27017/")  # opening a connection to Mongo
            log_writer.log(file_object, 'connecting to db')
            db = dbConn['imdb_scrapper']  # connecting to the database called crawlerDB
            log_writer.log(file_object, 'creating/retrieving collection {}'.format(year))
            collection_name = 'movies_{}'.format(year)
            collection = db[collection_name]
            movies = db[collection_name].find({})  # searching the collection with the name same as the keyword
            if movies.count() > 0:
                log_writer.log(file_object, 'showing results from db')
                return render_template('results.html', movies=movies)
            else:
                log_writer.log(file_object, 'callign scrape function')
                movies = scrapper.scrape(year, log_writer, file_object)
                filename = 'movies_{}.csv'.format(year)

                try:
                    log_writer.log(file_object, 'creating dataframe and writing to CSV file')
                    df = pd.DataFrame(movies)
                    df.to_csv('./csv/{}'.format(filename))
                except Exception as e:

                    log_writer.log(file_object, "Exception occurred while creating csv file: {}".format(e))

                try:
                    files = os.listdir()
                    for f in files:
                        if f.endswith('.csv'):
                            shutil.move(f, 'csv')
                except Exception as e:
                    log_writer.log(file_object, "Exception occurred while moving csv file: {}".format(e))

                log_writer.log(file_object, 'inserting into collection {}'.format(year))
                collection.insert_many(df.to_dict('records'))

                return render_template('results.html', movies=movies[0:(len(movies) - 1)])

        except Exception as e:
            log_writer.log(file_object, "Exception occurred : {}".format(e))
            return 'something is wrong'
    else:
        return render_template('index.html')
Exemplo n.º 4
0
        def load_file(self):
            dir_path = os.getcwd()

            try:
                file = filedialog.askopenfilename(initialdir = dir_path ,title='Select file'
                                         , filetypes=(("PDF files",".PDF"),('All files','*.*')))

            except:
                print('Error load file')

            raw_data = scrape(file)
            print(raw_data)
            self.lonseddler.append(raw_data)# Add lonseddel path to file list.
            self.loaded = True
def scrapeURL():
    data = request.json
    url = data['url']
    response = dict()
    if urlExists(url):
        image_urls = scrape(url)
        if len(image_urls) > 0:
            response['success'] = True
            response['output'] = image_urls
        else:
            response['success'] = False
            response['output'] = "NO_IMAGES_FOUND"
    else:
        response['success'] = False
        response['output'] = "INVALID_URL"

    return response
Exemplo n.º 6
0
def fetch_notifs(message):
    """ view """
    contents = scrape()

    #If dumb KTU is down as expected, fetch from previously scraped data
    if contents == [] or not contents:
        contents = notifs

    for i in range(10):
        content = contents[i]
        msg_content = content['date'] + '\n\n' + content[
            "title"] + ':\n\n' + content["content"]
        for link in content["link"]:
            #telegram supports html like hyperlinks!! :)
            msg_link_text = "<a href=\"" + link["url"] + "\">" + link[
                "text"] + "</a>"
            msg_content += "\n" + msg_link_text
        bot.send_message(
            message.chat.id,
            msg_content,
            parse_mode="html",
        )
Exemplo n.º 7
0
def scores():
    site = scrapper.pull_site()
    scores = scrapper.scrape(site)
    return jsonify({"scores": scores})
Exemplo n.º 8
0
''' Main Module for Project '''

import sys
import scrapper
import deployment
import report_builder

if __name__ == "__main__":
    if len(sys.argv) > 1:
        if sys.argv[1] == '-d':
            deployment.deploy()

    scrapper = scrapper.Scrapper()
    report_builder = report_builder.ReportBuilder()

    report_builder.print_reports(scrapper.scrape())
Exemplo n.º 9
0
def btc_can_rises():
    bitcoin_max_market_cap = 21000000
    bitcoin_supply = 18665937
    global_market_cap = sum(scrape()) - bitcoin_supply
    total = str(global_market_cap / bitcoin_max_market_cap)
    print("The price of Bitcoin needs to rise " + total + " times yet")