def _write_data_files_to_db(data_dir_name): ''' Reads all the files of a specified directory and writes the content to the memory cache and from there to the database. @param data_dir_name: Directory where to look for the files @type data_dir_name: string ''' if data_dir_name.endswith("/"): data_dir_name = data_dir_name[0:-1] if not data_dir_name: bibtask.write_message("Data directory not specified. Task failed.", stream=sys.stdout, verbose=0) return False if not osp.isdir(data_dir_name): bibtask.write_message("Specified Data directory is not a directory. " "Task failed.", stream=sys.stdout, verbose=0) return False job_dirs = os.listdir(data_dir_name) total = len(job_dirs) status = 0 for job_dir in job_dirs: status += 1 job_dir = "%s/%s" % (data_dir_name, job_dir) if not osp.isdir(job_dir): bibtask.write_message("This is not a directory and therefore " "skipped: %s." % job_dir, stream=sys.stdout, verbose=0) continue results_dir = "%s/results/" % (job_dir,) if not osp.isdir(results_dir): bibtask.write_message("No result set found in %s" % (results_dir,), stream=sys.stdout, verbose=0) continue log_name = osp.abspath(job_dir).split("/") logfile = "%s/%s.log" % (job_dir, log_name[-1]) logfile_lastline = "" if not osp.isfile(logfile): bibtask.write_message("No log file found in %s" % (job_dir,), stream=sys.stdout, verbose=0) continue try: logfile_lastline = tail(logfile) except IOError: logfile_lastline = "" if logfile_lastline.count("Finish! The computation finished in") < 1: bibtask.write_message("Log file indicates broken results for %s" % (job_dir,), stream=sys.stdout, verbose=0) continue correct_files = set(['realauthors.dat', 'ids.dat', 'virtual_author_clusters.dat', 'virtual_authors.dat', 'doclist.dat', 'virtual_author_data.dat', 'authornames.dat', 'virtual_author_cluster_cache.dat', 'realauthor_data.dat', 'ra_va_cache.dat'] ) result_files = os.listdir(results_dir) if not correct_files.issubset(set(result_files)): bibtask.write_message("Reults folder does not hold the " "correct files: %s" % (results_dir,), stream=sys.stdout, verbose=0) continue bibtask.task_update_progress('Loading job %s of %s: %s' % (status, total, log_name[-1])) if (populate_structs_from_files(results_dir, results=True) and write_mem_cache_to_tables(sanity_checks=True)): bibtask.write_message("All Done.", stream=sys.stdout, verbose=0) else: bibtask.write_message("Could not write data to the tables from %s" % (results_dir,), stream=sys.stdout, verbose=0)
def main(): """Main function """ arguments = sys.argv if len(arguments) <= 1: bconfig.LOGGER.error("Please provide parameters!") _display_help() run_daemon = True standalone_option = ("-S", "--standalone", "-j", "--job-dir") for option in standalone_option: for arg in arguments: if arg.startswith(option): run_daemon = False if run_daemon: daemon = None try: import bibauthorid_daemon as daemon except ImportError: bconfig.LOGGER.error("Hmm...No Daemon process running.") if daemon: daemon.bibauthorid_daemon() else: options = _read_options(arguments) if options["job_dir"]: job_dir = options["job_dir"] if job_dir.endswith("/"): job_dir = job_dir[0:-1] log_name = osp.abspath(job_dir).split("/") logfile = "%s/%s.log" % (job_dir, log_name[-1]) start = time.time() bconfig.init_logger(logfile) populate_structs_from_files(job_dir) bconfig.LOGGER.debug("| Loaded %s records." % len(dat.RELEVANT_RECORDS)) engine.start_computation(process_doclist=True, process_orphans=True, print_stats=True) result_path = "%s/results/" % (job_dir,) if make_directory(result_path): write_mem_cache_to_files(result_path, is_result=True) else: bconfig.LOGGER.error("Cannot write to destination: " "Cannot create directory") end = time.time() - start bconfig.LOGGER.log(25, "Finish! The computation finished in %.2fs" % (end)) bconfig.stop_and_close_logger() else: bconfig.LOGGER.error("Standalone mode without parameters " "does not do anything helpful. Please" "consult -h help message for usage")
def _write_data_files_to_db(data_dir_name): ''' Reads all the files of a specified directory and writes the content to the memory cache and from there to the database. @param data_dir_name: Directory where to look for the files @type data_dir_name: string ''' if data_dir_name.endswith("/"): data_dir_name = data_dir_name[0:-1] if not data_dir_name: bibtask.write_message("Data directory not specified. Task failed.", stream=sys.stdout, verbose=0) return False if not osp.isdir(data_dir_name): bibtask.write_message( "Specified Data directory is not a directory. " "Task failed.", stream=sys.stdout, verbose=0) return False job_dirs = os.listdir(data_dir_name) total = len(job_dirs) status = 0 for job_dir in job_dirs: status += 1 job_dir = "%s/%s" % (data_dir_name, job_dir) if not osp.isdir(job_dir): bibtask.write_message("This is not a directory and therefore " "skipped: %s." % job_dir, stream=sys.stdout, verbose=0) continue results_dir = "%s/results/" % (job_dir, ) if not osp.isdir(results_dir): bibtask.write_message("No result set found in %s" % (results_dir, ), stream=sys.stdout, verbose=0) continue log_name = osp.abspath(job_dir).split("/") logfile = "%s/%s.log" % (job_dir, log_name[-1]) logfile_lastline = "" if not osp.isfile(logfile): bibtask.write_message("No log file found in %s" % (job_dir, ), stream=sys.stdout, verbose=0) continue try: logfile_lastline = tail(logfile) except IOError: logfile_lastline = "" if logfile_lastline.count("Finish! The computation finished in") < 1: bibtask.write_message("Log file indicates broken results for %s" % (job_dir, ), stream=sys.stdout, verbose=0) continue correct_files = set([ 'realauthors.dat', 'ids.dat', 'virtual_author_clusters.dat', 'virtual_authors.dat', 'doclist.dat', 'virtual_author_data.dat', 'authornames.dat', 'virtual_author_cluster_cache.dat', 'realauthor_data.dat', 'ra_va_cache.dat' ]) result_files = os.listdir(results_dir) if not correct_files.issubset(set(result_files)): bibtask.write_message("Reults folder does not hold the " "correct files: %s" % (results_dir, ), stream=sys.stdout, verbose=0) continue bibtask.task_update_progress('Loading job %s of %s: %s' % (status, total, log_name[-1])) if (populate_structs_from_files(results_dir, results=True) and write_mem_cache_to_tables(sanity_checks=True)): bibtask.write_message("All Done.", stream=sys.stdout, verbose=0) else: bibtask.write_message( "Could not write data to the tables from %s" % (results_dir, ), stream=sys.stdout, verbose=0)
def main(): """Main function """ arguments = sys.argv if len(arguments) <= 1: bconfig.LOGGER.error("Please provide parameters!") _display_help() run_daemon = True standalone_option = ("-S", "--standalone", "-j", "--job-dir") for option in standalone_option: for arg in arguments: if arg.startswith(option): run_daemon = False if run_daemon: daemon = None try: import bibauthorid_daemon as daemon except ImportError: bconfig.LOGGER.error("Hmm...No Daemon process running.") if daemon: daemon.bibauthorid_daemon() else: options = _read_options(arguments) if options["job_dir"]: job_dir = options["job_dir"] if job_dir.endswith("/"): job_dir = job_dir[0:-1] log_name = osp.abspath(job_dir).split("/") logfile = "%s/%s.log" % (job_dir, log_name[-1]) start = time.time() bconfig.init_logger(logfile) populate_structs_from_files(job_dir) bconfig.LOGGER.debug("| Loaded %s records." % len(dat.RELEVANT_RECORDS)) engine.start_computation(process_doclist=True, process_orphans=True, print_stats=True) result_path = "%s/results/" % (job_dir, ) if make_directory(result_path): write_mem_cache_to_files(result_path, is_result=True) else: bconfig.LOGGER.error("Cannot write to destination: " "Cannot create directory") end = time.time() - start bconfig.LOGGER.log( 25, "Finish! The computation finished in %.2fs" % (end)) bconfig.stop_and_close_logger() else: bconfig.LOGGER.error("Standalone mode without parameters " "does not do anything helpful. Please" "consult -h help message for usage")