示例#1
0
def write_dataset_files(dataset_files_info_filename, files_filename):
    files = []
    # datasets_files_info[dataset][filename] = {'number_events':int, 'check_sum':int, 'modification_date':int, 'file_size':int}
    dataset_files_info = nested_dict.load_json_file(
        dataset_files_info_filename)
    for dataset in dataset_files_info:
        for filename in dataset_files_info[dataset]:
            files.append(datasets.filename_to_parsed(filename))
    write_list(files, files_filename)
    print('Wrote to ' + files_filename)
示例#2
0
def check_entries(job_argument_string):
    #try:
    print(job_argument_string)
    args = get_args(['dataset_files_info_filename', 'command'],
                    job_argument_string)
    args_command = get_args_command(args['command'])

    print(args)
    print(args_command)
    file_path = get_file_path(args_command['output_path'], "",
                              args_command['input_path'])
    #print(file_path)
    if not os.path.isfile(file_path):
        # Wait for file to appear on raid
        file_exists = False
        for iWait in range(10):
            time.sleep(10)
            if os.path.isfile(file_path):
                file_exists = True
                break
        if not file_exists:
            return '[For queue_system] fail: no file named ' + file_path

    root_file = ROOT.TFile.Open(file_path)
    if not root_file:
        return '[For queue_system] fail: Failed in opening ' + file_path
    root_tree = root_file.Get('Events')
    root_number_entries = root_tree.GetEntries()

    #print(args['dataset_files_info_filename'][1:-1])
    #datasets_files_info[dataset][filename] = {'number_events':number_events}
    dataset_files_info = nested_dict.load_json_file(
        args['dataset_files_info_filename'], False)
    path_to_keys_dataset_files_info = datasets.get_path_to_keys_dataset_files_info(
        dataset_files_info)
    keys = path_to_keys_dataset_files_info[args_command['input_path']]
    #print(keys)
    #print(nested_dict.get_item_nested_dict(dataset_files_info,keys))
    dataset_number_entries = nested_dict.get_item_nested_dict(
        dataset_files_info, keys)['number_events']

    #print(root_number_entries)
    #print(dataset_number_entries)
    if root_number_entries == dataset_number_entries:
        return '[For queue_system] success'
    else:
        return '[For queue_system] fail: root_number_entries: ' + str(
            root_number_entries) + ' and dataset_number_entries: ' + str(
                dataset_number_entries) + ' do not match'
示例#3
0
def update_datasets_files_json(path_datasets_filename,
                               in_dataset_files_info_filename,
                               out_dataset_files_info_filename):
    list_dataset = dataset_files.get_list_dataset(path_datasets_filename)
    #dataset_files_info[dataset][filename] = {'number_events':number_events}
    in_dataset_files_info = nested_dict.load_json_file(
        in_dataset_files_info_filename)

    in_list_dataset = in_dataset_files_info.keys()
    append_list_dataset = list(set(list_dataset) - set(in_list_dataset))
    remove_list_dataset = list(set(in_list_dataset) - set(list_dataset))
    # Get files for each dataset
    append_dataset_file_commands = dataset_files.make_dataset_file_commands(
        append_list_dataset)
    #dataset_file_commands = [[dataset, commands]]
    append_dataset_file_results = dataset_files.run_list_command(
        append_dataset_file_commands)
    #datasets_files_info[dataset][filename] = {'number_events':number_events}
    append_dataset_files_info = dataset_files.parse_dataset_file_results(
        append_dataset_file_results)

    # Get meta for each file
    append_dataset_meta_commands = dataset_files.make_dataset_meta_commands(
        append_dataset_files_info)
    append_dataset_meta_results = dataset_files.run_list_command(
        append_dataset_meta_commands)
    dataset_files.parse_dataset_meta_results(append_dataset_meta_results,
                                             append_dataset_files_info)

    remove_dataset_files_info(in_dataset_files_info, remove_list_dataset)
    out_dataset_files_info = combine_dataset_files_info(
        in_dataset_files_info, append_dataset_files_info)

    print('appended list_dataset: ', str(append_list_dataset))
    print('removed list_dataset: ', str(remove_list_dataset))

    nested_dict.save_json_file(out_dataset_files_info,
                               out_dataset_files_info_filename)
    #  [mc_dataset_common_names_filename, ['2016', '2017', '2018']],
    #  [mc_dataset_2016_names_filename, ['2016']],
    #  [mc_dataset_2017_names_filename, ['2017']],
    #  [mc_dataset_2018_names_filename, ['2018']],
    #  ])
    #print ('dataset_names:', mc_dataset_names)
    # Ex) tag_meta[2016] = RunIISummer16, MiniAODv3, NanoAODv5
    mc_tag_meta = datasets.parse_mc_tag_meta(mc_tag_meta_filename)

  if make_data_datasets:
    # Ex) data_tag_meta[2016][B][MET][miniaod] = 17Jul2018
    data_tag_meta = datasets.parse_data_tag_meta(data_tag_meta_filename)

  if make_mc_datasets:
    # mc_datasets[mc_dataset_name][year][data_tier][path] = {"parent_chain":[], "children":[], "creation time":string, "size":int, "files":int, "events:"int}
    mc_datasets = nested_dict.load_json_file(mc_datasets_filename)
    datasets.check_false_none_mc_datasets(mc_datasets)

    # Make meta data
    path_to_keys_mc_datasets = datasets.get_path_to_keys_mc_datasets(mc_datasets)
    search_string_to_keys_mc_datasets =  datasets.get_search_string_to_keys_mc_datasets(mc_tag_meta, mc_datasets)
    same_parent_paths = datasets.get_same_parent_paths(mc_datasets)
    multiple_mc_datasets = datasets.get_multiple_mc_datasets(mc_datasets)
    mini_to_nanos_from_nanoaod = datasets.get_mini_to_nanos_from_nanoaod_mc_datasets(mc_datasets)
    nano_to_mini_from_miniaod = datasets.get_nano_to_mini_from_miniaod_mc_datasets(mc_datasets)

  if make_data_datasets:
    data_datasets = nested_dict.load_json_file(data_datasets_filename)
    datasets.check_false_none_data_datasets(data_datasets)
    datasets.print_multiple_data_datasets(data_datasets)
    nested_dict.save_json_file(data_datasets, selected_data_datasets_filename)
示例#5
0
  initialize_arguments(args)
  valid, log = are_arguments_valid(args)
  if not valid:
    print('[Error] '+log)
    sys.exit()

  queue = ucsb_queue.ucsb_queue()

  #jobs_info_filename = 'jsons/submitted_test_mc_jobs_info.json'
  #output_json = 'jsons/checked_test_mc_jobs_info.json'
  #jobscript_check_filename = './copy_aods_check_entries.py'
  #statuses = ['submitted']

  jobs_info_filename = args['jobs_info_filename']
  output_json = args['output_json']
  jobscript_check_filename = args['jobscript_check_filename']
  statuses = args['statuses']

  # Checks the jobs
  # jobs_info = [{'command_script':command_script, 'other_global_key':other_global_key, 'ignore_keys':['job_id', 'job_status', ...]},{'key_for_job':key_for_job},{'key_for_job':key_for_job},...]
  jobs_info = nested_dict.load_json_file(jobs_info_filename)

  # Each job type should make job_script, and job_check_script
  # The ./job_check_script job_log_string should return 'success' or 'fail' for a job_log_string
  # statuses: [status], where status = 'submitted', 'done', 'fail', 'success', 'to_submit'
  queue.check_jobs(jobs_info, statuses, jobscript_check_filename, args['debug'])
  #queue.check_jobs(jobs_info, ['submitted', 'done', 'fail', 'success', 'to_submit'], jobscript_check_filename)
  queue.print_jobs_status(jobs_info)

  nested_dict.save_json_file(jobs_info, output_json)
示例#6
0
      [mc_dataset_common_names_filename, ['2016', '2017', '2018']],
      [mc_dataset_2016_names_filename, ['2016']],
      [mc_dataset_2017_names_filename, ['2017']],
      [mc_dataset_2018_names_filename, ['2018']],
      ])
    #print ('dataset_names:', mc_dataset_names)
    # Ex) tag_meta[2016] = RunIISummer16, MiniAODv3, NanoAODv5
    mc_tag_meta = datasets.parse_mc_tag_meta(mc_tag_meta_filename)

  if make_data_datasets:
    # Ex) data_tag_meta[2016][B][MET][miniaod] = 17Jul2018
    data_tag_meta = datasets.parse_data_tag_meta(data_tag_meta_filename)

  if make_mc_datasets:
    # mc_datasets[mc_dataset_name][year][data_tier][path] = {"parent_chain":[], "children":[], "creation time":string, "size":int, "files":int, "events:"int}
    mc_datasets = nested_dict.load_json_file(mc_datasets_filename)
    datasets.check_overlapping_paths_mc_datasets(mc_datasets)

    #print(nested_dict.get_from_nested_dict(mc_datasets, '/DYJetsToLL_M-50_HT-70to100_TuneCP5_13TeV-madgraphMLM-pythia8/RunIIFall17MiniAODv2-PU2017_12Apr2018_94X_mc2017_realistic_v14-v1/MINIAODSIM'))
    #print(nested_dict.get_nested_dict(mc_datasets, ['ZJetsToNuNu_HT-600To800', '2017', 'miniaod']))

    # keys_mc_datasets = [ [mc_dataset_name, year, data_tier, search_string] ]
    keys_mc_datasets = datasets.get_keys_mc_datasets(mc_dataset_names, mc_tag_meta, data_tiers)
    #datasets.print_missing_mc_datasets(keys_mc_datasets, mc_datasets)
    ##nested_dict.fill_empty_nested_dict(mc_datasets, ['TTJets_SingleLeptFromT_Tune', '2016', 'miniaod'])
    ##datasets.check_false_none_mc_datasets(mc_datasets)
    #datasets.print_same_parent_mc_datasets(mc_datasets)
    #datasets.check_mini_nano_consistentcy_mc_datasets(mc_tag_meta, mc_datasets)

    filtered_mc_datasets = mc_datasets
    filtered_mc_datasets = filter_mc_datasets(filtered_mc_datasets, reject_string_ignore_case_mc_datasets, '_mtop1')
    #search_term = ""
    #search_term = "files<10"
    search_term = args['sql_search']

    database = sqlite3.connect(':memory:')
    cursor = database.cursor()

    files_to_download = []
    files_to_remove = []

    if do_mc or do_signal:
        dataset_files_info_filename = mc_dataset_files_info_filename
        # Make database
        # Load files
        # datasets_files_info[dataset][filename] = {'number_events':int, 'check_sum':int, 'modification_date':int, 'file_size':int}
        mc_dataset_files_info = nested_dict.load_json_file(
            mc_dataset_files_info_filename)

        # mc_dataset_names[year] = [(mc_dataset_name, mc_dir)]
        mc_dataset_names = datasets.parse_multiple_mc_dataset_names([
            [mc_dataset_common_names_filename, ['2016', '2017', '2018']],
            [mc_dataset_2016_names_filename, ['2016']],
            [mc_dataset_2017_names_filename, ['2017']],
            [mc_dataset_2018_names_filename, ['2018']],
        ])
        #print ('dataset_names:', mc_dataset_names)
        # Ex) tag_meta[2016] = RunIISummer16, MiniAODv3, NanoAODv5
        mc_tag_meta = datasets.parse_mc_tag_meta(mc_tag_meta_filename)
        # keys_mc_datasets = [ [mc_dataset_name, year, data_tier, search_string] ]
        keys_mc_datasets = datasets.get_keys_mc_datasets(
            mc_dataset_names, mc_tag_meta, data_tiers)