def process_obs_to_html(dataset_file): """ Looks up each Observation in the MOLES catalogue, matches phenomena to it from ES and then writes HTML pages listing them. """ lines = util.read_file_into_list(dataset_file) summary_info = [] counter = 0 for line in lines: path = line.split("=")[1].rstrip() try: print "searching path {}".format(path) results = fbs_api.get_dir_info(path) except: continue #if len(results["formats"]) > 0: #print "Formats in directory {} are {} and some files {}".format(data_path, results["formats"], results["sample_names"]) record = (line, results["formats"], results["sample_names"]) summary_info.append(record) #counter += 1 #if counter >10: # break print create_html_table(summary_info)
def parse_logs(com_args): log_directory = com_args["log_directory"] datasets_file = com_args["filename"] #find all files in log directroy. list_of_files = util.build_file_list(log_directory) num_files = len(list_of_files) summary_info = {} #open each file and exrtact info. for i in range(0, num_files): filename = list_of_files[i] content_list = util.read_file_into_list(filename) summary = util.find_in_list(content_list, "Summary") if summary is not None: words_list = summary.split("Summary", 1)[1].split(",") #dataset dataset = (words_list[0].split())[5] #indexed indexed = int(words_list[1].split()[3]) #database errors database_errors = int(words_list[2].split()[3]) #properties errors properties_errors = int(words_list[3].split()[3]) #total files total_files = int(words_list[4].split()[3]) if dataset not in summary_info: dataset_info = {} #dataset_info["dataset"] = dataset dataset_info["indexed"] = indexed dataset_info["database_errors"] = database_errors dataset_info["properties_errors"] = properties_errors dataset_info["total_files"] = total_files dataset_info["dataset_dir"] = util.find_dataset(datasets_file, dataset) summary_info[dataset] = dataset_info.copy() dataset_info = None else: dataset_info = {} dataset_info = summary_info[dataset] dataset_info["indexed"] = dataset_info["indexed"] + indexed dataset_info["database_errors"] = dataset_info["database_errors"] + database_errors dataset_info["properties_errors"] = dataset_info["properties_errors"] + properties_errors #dataset_info["total_files"] = dataset_info["total_files"] + total_files dataset_info = None #At the end print all information. return summary_info
def sample_files(in_path, out_path): #Get basic options. #Go to directory and create the file list. list_of_cache_files = util.build_file_list(in_path) counter = 0 for filename in list_of_cache_files: contents = util.read_file_into_list(filename) new_file_name = os.path.join(out_path, os.path.basename(filename) + "-sample") fd = open(new_file_name, "a") for item in contents: if item.rstrip().endswith(".pp"): fd.write(item) counter = counter + 1 if counter > 1000: break