def init_db_and_queue(): global db_parsed_articles, connection, consume_channel, publish_channel # Init DB db_parsed_articles = mongo.get_parsed_articles() # Init connection and channels to RabbitMQ connection, consume_channel = queue.init_connection() queue.init_scanned_files(consume_channel) publish_channel = connection.channel() queue.init_parsed_articles(publish_channel, True)
def init_db_and_queue(): global db_analyzed_articles, db_metric_data_daily, db_metric_data_monthly,\ connection, consume_channel # Init DB db_analyzed_articles = mongo.get_analyzed_articles() db_metric_data_daily = mongo.get_metric_data_daily() db_metric_data_monthly = mongo.get_metric_data_monthly() # Init connection and channels to RabbitMQ connection, consume_channel = queue.init_connection() queue.init_analyzed_articles(consume_channel)
def scan_directory(directory): """ Recursively scan the given directory file new articles """ print " Starting scan..." print " Directory: %s" % directory files_queued = 0 # Initialize a connection and channel to RabbitMQ connection, channel = queue.init_connection() queue.init_scanned_files(channel, True) for root, dirs, files in os.walk(directory): # Walk dirs and files in alphabetical order dirs.sort() files.sort() for file in files: filename = os.path.join(root, file) # Only queue files that match the pattern if re.match(scan_match_pattern, filename): add_scanned_file_to_queue(channel, filename) files_queued += 1 # Update status... if files_queued % updt_freq == 0: print " * Files Queued: %d..." % files_queued # Ignore any svn dirs if '.svn' in dirs: dirs.remove('.svn') queue.close_connection(connection) print "" print " ... scan complete" print " Files Queued: %d" % files_queued print ""