Exemplo n.º 1
0
def store_datasets_to_files_in_localhost(status, config):

    """
    Finds and stores all files belonging to each dataset.
    """

    #Get file.
    filename = config["filename"]
    #Extract datasets ids and paths.
    datasets = util.find_dataset(filename, "all")
    datasets_ids = datasets.keys()
    num_datasets = len(datasets_ids)
    scan_commands = []
    current_dir = os.getcwd()
    directroy_to_save_files = config["make-list"]

    #Create the commands that will create the
    #files containing the paths to data files.
    for i in range(0, num_datasets):

        command = "python %s/scan_dataset.py -f %s -d  %s --make-list %s/%s.txt" \
                  %(current_dir, filename, datasets_ids[i],  \
                  directroy_to_save_files, datasets_ids[i])

        print "executing : %s" %(command)

        subprocess.call(command, shell=True)
Exemplo n.º 2
0
def store_datasets_to_files_in_lotus(status, config):

    """
    Finds and stores all files belonging to each dataset.
    """

    #Get file.
    filename = config["filename"]
    #Extract datasets ids and paths.
    datasets = util.find_dataset(filename, "all")
    datasets_ids = datasets.keys()
    num_datasets = len(datasets_ids)
    scan_commands = []
    current_dir = os.getcwd()
    directroy_to_save_files = config["make-list"]

    #Create the commands that will create the
    #files containing the paths to data files.
    for i in range(0, num_datasets):

        command = "python %s/scan_dataset.py -f %s -d  %s --make-list %s/%s.txt"\
                  %(current_dir, filename, datasets_ids[i],\
                  directroy_to_save_files, datasets_ids[i])

        scan_commands.append(command)


    lotus_max_processes = config["num-processes"]

    #Run each command in lotus.
    util.run_tasks_in_lotus(scan_commands, int(lotus_max_processes),\
                             user_wait_time=30)
Exemplo n.º 3
0
def parse_logs(com_args):

    log_directory = com_args["log_directory"]
    datasets_file = com_args["filename"]

    #find all files in log directroy.
    list_of_files = util.build_file_list(log_directory)
    num_files = len(list_of_files)

    summary_info = {}

    #open each file and exrtact info.
    for i in range(0, num_files):

        filename = list_of_files[i]
        content_list = util.read_file_into_list(filename)
        summary = util.find_in_list(content_list, "Summary")

        if summary is not None:
            words_list = summary.split("Summary", 1)[1].split(",")
            #dataset
            dataset = (words_list[0].split())[5]
            #indexed
            indexed = int(words_list[1].split()[3])
            #database errors
            database_errors = int(words_list[2].split()[3])
            #properties errors
            properties_errors = int(words_list[3].split()[3])
            #total files
            total_files = int(words_list[4].split()[3])

            if dataset not in  summary_info:
                dataset_info = {}
                #dataset_info["dataset"] = dataset
                dataset_info["indexed"] = indexed
                dataset_info["database_errors"] = database_errors
                dataset_info["properties_errors"] = properties_errors
                dataset_info["total_files"] = total_files
                dataset_info["dataset_dir"] = util.find_dataset(datasets_file, dataset)

                summary_info[dataset] = dataset_info.copy()
                dataset_info = None
            else:
                dataset_info = {}
                dataset_info = summary_info[dataset]
                dataset_info["indexed"] = dataset_info["indexed"] + indexed
                dataset_info["database_errors"] = dataset_info["database_errors"] + database_errors
                dataset_info["properties_errors"] = dataset_info["properties_errors"] + properties_errors
                #dataset_info["total_files"] = dataset_info["total_files"] + total_files
                dataset_info = None
        #At the end print all information.

    return summary_info
Exemplo n.º 4
0
def scan_datasets_in_localhost(config, scan_status):

    """
    Uses localhost in order to scan files in the filesystem.
    """

    # Get basic options.
    filename = config["filename"]
    level = config["level"]
    current_dir = os.getcwd()

    # Manage the options given.
    if scan_status == constants.Script_status.READ_AND_SCAN_DATASETS_SUB:
        dataset_id = config["dataset"]
        if "," in dataset_id:
            dataset_ids_list = dataset_id.split(",")
            for dataset_id_item in dataset_ids_list:
                command = "python %s/scan_dataset.py -f %s -d %s -l %s" % (
                    current_dir,
                    filename,
                    dataset_id_item,
                    level,
                )

                print "executng : %s" % (command)
                subprocess.call(command, shell=True)
                # os.system(command)
        else:
            command = "python %s/scan_dataset.py -f %s -d %s -l %s" % (current_dir, filename, dataset_id, level)

            print "executng : %s" % (command)
            subprocess.call(command, shell=True)

    elif scan_status == constants.Script_status.READ_AND_SCAN_DATASETS:
        dataset_ids = util.find_dataset(filename, "all")

        for key, value in dataset_ids.iteritems():
            dataset_id = key
            command = "python  %s/scan_dataset.py -f %s -d  %s -l %s" % (current_dir, filename, dataset_id, level)

            print "executng : %s" % (command)
            subprocess.call(command, shell=True)
    elif scan_status == constants.Script_status.READ_DATASET_FROM_FILE_AND_SCAN:
        read_datasets_from_files_and_scan_in_localhost(config)
Exemplo n.º 5
0
def read_and_scan_datasets_in_lotus(config):

    filename = config["filename"]
    level = config["level"]

    current_dir = os.getcwd()

    dataset_ids = util.find_dataset(filename, "all")
    keys = dataset_ids.keys()
    number_of_datasets = len(keys)
    commands = []

    for i in range(0, number_of_datasets):

        command = "python %s/scan_dataset.py -f  %s -d  %s  -l  %s" % (current_dir, filename, keys[i], level)

        print "created command :" + command
        commands.append(command)

    lotus_max_processes = config["num-processes"]
    util.run_tasks_in_lotus(commands, int(lotus_max_processes), user_wait_time=30)