def wrapper_timable(*args, **kwargs):
            start_time = time.time()

            value = func(*args, **kwargs)

            elapsed_time = time.time() - start_time
            message = 'Function: ' + str(
                process_name
                or func.__name__) + ' Time: ' + str(elapsed_time) + 's'
            log_info(message)
            return value
Exemplo n.º 2
0
def run():
    logging.log_info('Starting scraping data from boxofficemojo.com...\n\n')

    tasks = create_tasks_from_config()
    config = configuration.get_config()

    if config['execution'] is not None:
        if config['execution']['purgeExistingData'] is not None:
            if config['execution']['purgeExistingData'] == 'True':
                clear_all_tables()
        if config['execution']['executionMode'] is not None:
            execution_mode = ExecutionMode[config['execution']['executionMode']]
            filtered_tasks = list(filter(lambda t: t.executionMode == execution_mode, tasks))
            run_tasks(filtered_tasks)
        else:
            run_tasks(tasks)

    logging.log_info('Finished scraping data from boxofficemojo.com.')
    def scrape_studio_movies(self, studio_id, url):
        if studio_id is None or studio_id == '':
            return

        i = 1
        file_name = datafile.create_data_file_path(studio_id + '_Movies.tsv')
        self.files.append(file_name)

        if not datafile.is_data_file_complete(file_name):
            outfile = open(file_name, "w", newline='')
            writer = csv.writer(outfile, delimiter='\t')
            logging.log_info("\t\tScraping movies for studio %s...\n",
                             studio_id)
            while True:
                full_url = ("https://www.boxofficemojo.com/studio/" + url +
                            "&page=%d") % i
                rows = scrapeutil.scrape_table_rows(full_url,
                                                    attributes={
                                                        'border': '0',
                                                        'cellspacing': '1',
                                                        'cellpadding': '5'
                                                    })
                if len(rows) > 0:
                    for row in rows:
                        cell = row.find('a')
                        if hasattr(cell, 'text'):
                            movie_name = cell.text.replace(' ', '')
                            if movie_name and movie_name != 'Rank':
                                href = cell.get('href')
                                row_data = scrapeutil.scrape_movie(
                                    href, movie_name, studio_id)
                                if row_data is not None:
                                    writer.writerows([row_data])
                                    self.movies.add(row_data[0])
                    i += 1
                else:
                    break
            datafile.mark_data_file_complete(writer)
        else:
            logging.log_info("\t\tSkipped scraping studio %s\n", studio_id)
Exemplo n.º 4
0
def load_data(payload):
    log_info(payload, name='entity-get')
Exemplo n.º 5
0
def run_as_lambda(event, context):
    logging.log_info("Event received: %s", event)
    logging.log_info("Log stream name: %s", context.log_stream_name)
    logging.log_info("Log group name: %s", context.log_group_name)
    run()
Exemplo n.º 6
0
def run_tasks(tasks):
    tasks.sort(key=lambda t: t.order)
    for task in tasks:
        logging.log_info('Executing ' + type(task).__name__ + ' for table ' + task.tableName + '...\n')
        task.execute()
        logging.log_info('DONE!\n')
Exemplo n.º 7
0
 def execute(self):
     if self.enabled:
         logging.log_info("\tScraping web pages now...\n")
         self.scrape()
         if self.scrapeSuccess:
             logging.log_info("\tScrape successful.\n")
             if self.executionMode is ExecutionMode.completeRewrite:
                 logging.log_info("\tClearing table...\n")
                 self.clear_table()
                 if self.clearTableSuccess:
                     logging.log_info(
                         "\tCleared table. Started writing to table...\n")
                     self.write_to_db()
                     if self.writeToDbSuccess:
                         logging.log_info(
                             "Finished writing to table. Cleaning up data files...\n"
                         )
                         self.cleanup()
             else:
                 logging.log_info("\tStarted writing to table...\n")
                 self.write_to_db()
                 if self.writeToDbSuccess:
                     logging.log_info("\tFinished writing to table.\n")
                     logging.log_info("\tCleaning up data files...\n")
                     self.cleanup()
                     logging.log_info("\tClean up complete.\n")
     else:
         logging.log_info("\tTask was disabled. Skipping!\n")