Пример #1
0
def left_to_renumber_mmCIF(
        default_input_path_to_mmCIF=current_directory + "/mmCIF",
        default_output_path_to_mmCIF=current_directory + "/output_mmCIF"):
    without_already_renumbered_mmCIF = list()
    # output_mmCIF_files_were_found_4Char = set()
    # input_mmCIF_files_were_found_4Char = set()
    output_mmCIF_files_were_found_set = set()
    input_mmCIF_files_were_found_set = set()

    mmCIF_files_were_found = look_what_is_inside(
        "mmCIF", default_input_path_to_mmCIF=default_input_path_to_mmCIF)
    output_mmCIF_files_were_found = look_what_is_inside(
        "output_mmCIF",
        default_output_path_to_mmCIF=default_output_path_to_mmCIF)

    for output_mmCIF_file in output_mmCIF_files_were_found:
        output_mmCIF_files_were_found_set.add(output_mmCIF_file)
    for input_mmCIF_file in mmCIF_files_were_found:
        input_mmCIF_files_were_found_set.add(input_mmCIF_file)

    set_difference = input_mmCIF_files_were_found_set - output_mmCIF_files_were_found_set

    for mmCIF_file in mmCIF_files_were_found:
        if mmCIF_file in set_difference:
            without_already_renumbered_mmCIF.append(mmCIF_file)

    return without_already_renumbered_mmCIF
Пример #2
0
def left_to_renumber_PDB(default_input_path_to_PDB=current_directory + "/PDB",
                         default_output_path_to_PDB=current_directory +
                         "/output_PDB"):
    without_already_renumbered_PDB = list()
    output_PDB_files_were_found_4Char = set()
    input_PDB_files_were_found_4Char = set()

    input_PDB_files_were_found = look_what_is_inside(
        "PDB", default_input_path_to_PDB=default_input_path_to_PDB)
    output_PDB_files_were_found = look_what_is_inside(
        "output_PDB", default_output_path_to_PDB=default_output_path_to_PDB)

    for output_PDB_file in output_PDB_files_were_found:
        output_PDB_files_were_found_4Char.add(output_PDB_file[:4])
    for input_PDB_file in input_PDB_files_were_found:
        input_PDB_files_were_found_4Char.add(input_PDB_file[3:7])

    set_difference = input_PDB_files_were_found_4Char - output_PDB_files_were_found_4Char
    list_difference = list(set_difference)

    for PDB_id in list_difference:
        without_already_renumbered_PDB.append("pdb" + PDB_id + ".ent.gz")

    return without_already_renumbered_PDB
Пример #3
0
def ProcessPool_run_renum_mmCIF(format_mmCIF, mmCIF_to_renumber,
                                default_input_path_to_mmCIF,
                                default_input_path_to_SIFTS,
                                default_output_path_to_mmCIF,
                                default_mmCIF_num, gzip_mode,
                                exception_AccessionIDs, nproc):
    first_res = 0

    for i in range(3):
        if not os.path.exists(default_output_path_to_mmCIF):
            os.makedirs(default_output_path_to_mmCIF)

        # renumber loop
        resulting = list()
        executor = ProcessPoolExecutor(max_workers=nproc)
        partial_master_mmCIF_renumber_function = partial(
            master_mmCIF_renumber_function,
            default_input_path_to_mmCIF=default_input_path_to_mmCIF,
            default_input_path_to_SIFTS=default_input_path_to_SIFTS,
            default_output_path_to_mmCIF=default_output_path_to_mmCIF,
            default_mmCIF_num=default_mmCIF_num,
            gzip_mode=gzip_mode,
            exception_AccessionIDs=exception_AccessionIDs)
        jobs = [
            executor.submit(partial_master_mmCIF_renumber_function,
                            mmCIF_files) for mmCIF_files in mmCIF_to_renumber
        ]
        for job in tqdm.tqdm(as_completed(jobs),
                             total=len(jobs),
                             miniters=1,
                             position=0,
                             leave=True,
                             desc="Renumbering " + format_mmCIF + " files"):
            result = job.result()
            if result is None:
                continue
            resulting.append(result)

        if i == 0:
            first_res = resulting

        if format_mmCIF == "mmCIF_assembly":
            output_mmCIF = look_what_is_inside(
                'output_mmCIF_assembly',
                default_output_path_to_mmCIF_assembly=
                default_output_path_to_mmCIF)
        else:
            output_mmCIF = look_what_is_inside(
                'output_mmCIF',
                default_output_path_to_mmCIF=default_output_path_to_mmCIF)

        # checker loop
        check_list = list()
        executor = ProcessPoolExecutor(max_workers=nproc)
        partial_reform_assembly = partial(
            check_assemblies,
            default_output_path_to_mmCIF_assembly=default_output_path_to_mmCIF)
        jobs = [
            executor.submit(partial_reform_assembly, assembly_files)
            for assembly_files in output_mmCIF
        ]
        for job in tqdm.tqdm(as_completed(jobs),
                             total=len(jobs),
                             miniters=1,
                             position=0,
                             leave=True,
                             desc="Checking " + format_mmCIF + " files"):
            resultus = job.result()
            check_list.append(resultus)

        if format_mmCIF == "mmCIF_assembly":
            output_mmCIF = look_what_is_inside(
                'output_mmCIF_assembly',
                default_output_path_to_mmCIF_assembly=
                default_output_path_to_mmCIF)
        else:
            output_mmCIF = look_what_is_inside(
                'output_mmCIF',
                default_output_path_to_mmCIF=default_output_path_to_mmCIF)

        output_mmCIF_4char = set()
        for n in output_mmCIF:
            output_mmCIF_4char.add(n[:4])

        if len(check_list) <= len(output_mmCIF):
            break
        else:
            new_round_mmCIF_to_renumber = set()
            for n in mmCIF_to_renumber:
                if n[:4] in output_mmCIF_4char:
                    continue
                else:
                    new_round_mmCIF_to_renumber.add(n)
            mmCIF_to_renumber = new_round_mmCIF_to_renumber

    return first_res
Пример #4
0
def run_downloads_with_ThreadPool(
        format_to_download="mmCIF",
        urls_to_target=(),
        default_input_path_to_mmCIF=current_directory + "/mmCIF",
        default_input_path_to_PDB=current_directory + "/PDB",
        default_input_path_to_SIFTS=current_directory + "/SIFTS",
        default_input_path_to_mmCIF_assembly=current_directory +
    "/mmCIF_assembly",
        default_input_path_to_PDB_assembly=current_directory +
    "/PDB_assembly"):
    for i in range(3):
        executor = ThreadPoolExecutor()
        partial_download_with_pool = partial(
            download_with_pool,
            default_input_path_to_mmCIF=default_input_path_to_mmCIF,
            default_input_path_to_PDB=default_input_path_to_PDB,
            default_input_path_to_SIFTS=default_input_path_to_SIFTS,
            default_input_path_to_mmCIF_assembly=
            default_input_path_to_mmCIF_assembly,
            default_input_path_to_PDB_assembly=
            default_input_path_to_PDB_assembly)

        jobs = [
            executor.submit(partial_download_with_pool, url)
            for url in urls_to_target
        ]

        for _ in tqdm.tqdm(as_completed(jobs),
                           total=len(jobs),
                           miniters=1,
                           position=0,
                           leave=True,
                           desc="Downloading " + format_to_download +
                           " files"):
            pass

        files_targeted = list()
        format_of_db = 0
        for url in urls_to_target:
            file_name_start_pos = url.rfind("/") + 1
            file_name = url[file_name_start_pos:]
            files_targeted.append(file_name)
            format_start_pos = file_name_start_pos - 4
            format_of_db = url[format_start_pos:format_start_pos + 3]

        if format_of_db == "CIF":
            input_files = look_what_is_inside(
                "mmCIF",
                default_input_path_to_mmCIF=default_input_path_to_mmCIF)
        elif format_of_db == "pdb":
            input_files = look_what_is_inside(
                'PDB', default_input_path_to_PDB=default_input_path_to_PDB)
        elif format_of_db == "xml":
            input_files = look_what_is_inside(
                'SIFTS',
                default_input_path_to_SIFTS=default_input_path_to_SIFTS)
        elif format_of_db == "all":
            input_files = look_what_is_inside(
                'PDB_assembly',
                default_input_path_to_PDB_assembly=
                default_input_path_to_PDB_assembly)
        elif format_of_db == "try":
            input_files = look_what_is_inside(
                'mmCIF_assembly',
                default_input_path_to_mmCIF_assembly=
                default_input_path_to_mmCIF_assembly)
        else:
            input_files = set()

        # check_if_all_files_in = False
        #
        # for files_in in files_targeted:
        #     if files_in in input_files:
        #         pass
        #     else:
        #         check_if_all_files_in = True
        #
        # if check_if_all_files_in:
        #     urls_to_target = list(set(files_targeted) - set(input_files))c
        # else:
        #     break

        output_4char = set()
        for n in input_files:
            output_4char.add(n[:4])

        new_round_files_targeted = set()
        for n in files_targeted:
            if n[:4] in output_4char:
                continue
            else:
                new_round_files_targeted.add(n)
        files_targeted = new_round_files_targeted

        if len(files_targeted) == 0:
            break
Пример #5
0
            urls_to_target_mmCIF_assembly_files = url_formation_for_pool("mmCIF_assembly", parsed_input_text,
                                                                         default_input_path_to_mmCIF_assembly=default_input_path_to_mmCIF_assembly)
            urls_to_target_PDB_files = url_formation_for_pool("PDB", parsed_input_text, default_input_path_to_PDB=default_input_path_to_PDB)
            urls_to_target_SIFTS_files = url_formation_for_pool("SIFTS", parsed_input_text, default_input_path_to_SIFTS=default_input_path_to_SIFTS)

            run_downloads_with_ThreadPool("mmCIF", urls_to_target_mmCIF_files, default_input_path_to_mmCIF=default_input_path_to_mmCIF)
            run_downloads_with_ThreadPool("mmCIF_assembly", urls_to_target_mmCIF_assembly_files,
                                          default_input_path_to_mmCIF_assembly=default_input_path_to_mmCIF_assembly)
            run_downloads_with_ThreadPool("PDB", urls_to_target_PDB_files, default_input_path_to_PDB=default_input_path_to_PDB)
            run_downloads_with_ThreadPool("SIFTS", urls_to_target_SIFTS_files, default_input_path_to_SIFTS=default_input_path_to_SIFTS)

            # renum PDB
            passed_as_arg_file_4Char_PDB = list()
            for file_name in parsed_input_text:
                passed_as_arg_file_4Char_PDB.append(file_name[:4])
            input_PDB_files_were_found = look_what_is_inside("PDB", default_input_path_to_PDB=default_input_path_to_PDB)
            target_files_list_PDB = list()
            for file_name in input_PDB_files_were_found:
                if file_name[3:7] in passed_as_arg_file_4Char_PDB:
                    target_files_list_PDB.append(file_name)
            ProcessPool_run_renum_PDB("PDB", target_files_list_PDB, default_input_path_to_PDB, default_input_path_to_SIFTS,
                                      default_output_path_to_PDB, default_PDB_num, gzip_mode, exception_AccessionIDs, nproc)

            # renum mmCIF_assembly
            input_mmCIF_files_were_found = look_what_is_inside("mmCIF_assembly",
                                                               default_input_path_to_mmCIF_assembly=default_input_path_to_mmCIF_assembly)
            passed_as_arg_file_4Char_mmCIF = list()
            for file_name in parsed_input_text:
                passed_as_arg_file_4Char_mmCIF.append(file_name[:4])
            target_files_list_mmCIF = list()
            for file_name in input_mmCIF_files_were_found:
Пример #6
0
def supreme_download_master(format_of_db, job_type=None,
                            default_input_path_to_mmCIF=current_directory + "/mmCIF",
                            default_input_path_to_PDB=current_directory + "/PDB",
                            default_input_path_to_SIFTS=current_directory + "/SIFTS",
                            default_input_path_to_mmCIF_assembly=current_directory + "/mmCIF_assembly",
                            default_input_path_to_PDB_assembly=current_directory + "/PDB_assembly",

                            default_output_path_to_mmCIF=current_directory + "/output_mmCIF",
                            default_output_path_to_PDB=current_directory + "/output_PDB",
                            default_output_path_to_mmCIF_assemblies=current_directory + "/output_mmCIF_assembly",
                            default_output_path_to_PDB_assemblies=current_directory + "/output_PDB_assembly"):

    catalogdownloader.catalog_downloader()

    if job_type == "refresh":
        if os.path.exists(default_input_path_to_SIFTS):
            shutil.rmtree(default_input_path_to_SIFTS)
        if format_of_db == "mmCIF":
            if os.path.exists(default_input_path_to_mmCIF):
                shutil.rmtree(default_input_path_to_mmCIF)
            if os.path.exists(default_output_path_to_mmCIF):
                shutil.rmtree(default_output_path_to_mmCIF)

        if format_of_db == "mmCIF_assembly":
            if os.path.exists(default_input_path_to_mmCIF_assembly):
                shutil.rmtree(default_input_path_to_mmCIF_assembly)
            if os.path.exists(default_output_path_to_mmCIF_assemblies):
                shutil.rmtree(default_output_path_to_mmCIF_assemblies)

        if format_of_db == "PDB":
            if os.path.exists(default_input_path_to_PDB):
                shutil.rmtree(default_input_path_to_PDB)
            if os.path.exists(default_output_path_to_PDB):
                shutil.rmtree(default_output_path_to_PDB)

        if format_of_db == "PDB_assembly":
            if os.path.exists(default_input_path_to_PDB_assembly):
                shutil.rmtree(default_input_path_to_PDB_assembly)
            if os.path.exists(default_output_path_to_PDB_assemblies):
                shutil.rmtree(default_output_path_to_PDB_assemblies)

        if format_of_db == "all":
            if os.path.exists(default_input_path_to_PDB):
                shutil.rmtree(default_input_path_to_PDB)
            if os.path.exists(default_input_path_to_mmCIF):
                shutil.rmtree(default_input_path_to_mmCIF)
            if os.path.exists(default_input_path_to_PDB_assembly):
                shutil.rmtree(default_input_path_to_PDB_assembly)
            if os.path.exists(default_input_path_to_mmCIF_assembly):
                shutil.rmtree(default_input_path_to_mmCIF_assembly)

            if os.path.exists(default_output_path_to_mmCIF):
                shutil.rmtree(default_output_path_to_mmCIF)
            if os.path.exists(default_output_path_to_mmCIF_assemblies):
                shutil.rmtree(default_output_path_to_mmCIF_assemblies)
            if os.path.exists(default_output_path_to_PDB):
                shutil.rmtree(default_output_path_to_PDB)
            if os.path.exists(default_output_path_to_PDB_assemblies):
                shutil.rmtree(default_output_path_to_PDB_assemblies)

    if format_of_db == "mmCIF":
        all_data_from_catreader = latestcatreader.latest_catalog_reader()
        all_mmCIF_files_from_latest_catalog = all_data_from_catreader[0]
        all_SIFTS_files_from_latest_catalog = all_data_from_catreader[2]

        input_mmCIF_files_were_found = lookfilesinside.look_what_is_inside("mmCIF", default_input_path_to_mmCIF=default_input_path_to_mmCIF)
        left_to_download_mmCIF = lefttodownload.what_is_left_to_download(input_mmCIF_files_were_found, all_mmCIF_files_from_latest_catalog)
        urls_to_target_mmCIF_files = url_formation_for_pool("mmCIF", left_to_download_mmCIF, default_input_path_to_mmCIF=default_input_path_to_mmCIF)
        run_downloads_with_ThreadPool("mmCIF", urls_to_target_mmCIF_files, default_input_path_to_mmCIF=default_input_path_to_mmCIF)

        input_SIFTS_files_were_found = lookfilesinside.look_what_is_inside("SIFTS", default_input_path_to_SIFTS=default_input_path_to_SIFTS)
        left_to_download_SIFTS = lefttodownload.what_is_left_to_download(input_SIFTS_files_were_found, all_SIFTS_files_from_latest_catalog)
        urls_to_target_SIFTS_files = url_formation_for_pool("SIFTS", left_to_download_SIFTS, default_input_path_to_SIFTS=default_input_path_to_SIFTS)
        run_downloads_with_ThreadPool("SIFTS", urls_to_target_SIFTS_files, default_input_path_to_SIFTS=default_input_path_to_SIFTS)
        return left_to_download_mmCIF

    if format_of_db == "mmCIF_assembly":
        all_data_from_catreader = latestcatreader.latest_catalog_reader()
        all_mmCIF_files = all_data_from_catreader[0]
        all_SIFTS_files_from_latest_catalog = all_data_from_catreader[2]

        lefttodownload_mmCIF_assemblies = list()
        input_mmCIF_assembly_files_were_found = lookfilesinside.look_what_is_inside(
            "mmCIF_assembly", default_input_path_to_mmCIF_assembly=default_input_path_to_mmCIF_assembly)

        all_mmCIF_files_4char = set()
        for mmCIF_file in all_mmCIF_files:
            all_mmCIF_files_4char.add(mmCIF_file[:4])

        input_mmCIF_assembly_files_were_found_4char = set()
        for mmCIF_assembly_file in input_mmCIF_assembly_files_were_found:
            input_mmCIF_assembly_files_were_found_4char.add(mmCIF_assembly_file[:4])

        set_difference = all_mmCIF_files_4char - input_mmCIF_assembly_files_were_found_4char
        list_difference = list(set_difference)

        for mmCIF_id in list_difference:
            lefttodownload_mmCIF_assemblies.append(mmCIF_id + ".cif.gz")

        urls_to_target_mmCIF_assembly_files = url_formation_for_pool("mmCIF_assembly", lefttodownload_mmCIF_assemblies)
        run_downloads_with_ThreadPool("mmCIF_assembly", urls_to_target_mmCIF_assembly_files,
                                      default_input_path_to_mmCIF_assembly=default_input_path_to_mmCIF_assembly)

        input_SIFTS_files_were_found = lookfilesinside.look_what_is_inside("SIFTS", default_input_path_to_SIFTS=default_input_path_to_SIFTS)
        left_to_download_SIFTS = lefttodownload.what_is_left_to_download(input_SIFTS_files_were_found, all_SIFTS_files_from_latest_catalog)
        urls_to_target_SIFTS_files = url_formation_for_pool("SIFTS", left_to_download_SIFTS, default_input_path_to_SIFTS=default_input_path_to_SIFTS)
        run_downloads_with_ThreadPool("SIFTS", urls_to_target_SIFTS_files, default_input_path_to_SIFTS=default_input_path_to_SIFTS)
        return lefttodownload_mmCIF_assemblies

    if format_of_db == "PDB":
        all_data_from_catreader = latestcatreader.latest_catalog_reader()
        all_PDB_files_from_latest_catalog = all_data_from_catreader[1]
        all_SIFTS_files_from_latest_catalog = all_data_from_catreader[2]

        input_PDB_files_were_found = lookfilesinside.look_what_is_inside("PDB", default_input_path_to_PDB=default_input_path_to_PDB)
        left_to_download_PDB = lefttodownload.what_is_left_to_download(input_PDB_files_were_found, all_PDB_files_from_latest_catalog)
        urls_to_target_PDB_files = url_formation_for_pool("PDB", left_to_download_PDB, default_input_path_to_PDB=default_input_path_to_PDB)
        run_downloads_with_ThreadPool("PDB", urls_to_target_PDB_files, default_input_path_to_PDB=default_input_path_to_PDB)

        input_SIFTS_files_were_found = lookfilesinside.look_what_is_inside("SIFTS", default_input_path_to_SIFTS=default_input_path_to_SIFTS)
        left_to_download_SIFTS = lefttodownload.what_is_left_to_download(input_SIFTS_files_were_found, all_SIFTS_files_from_latest_catalog)
        urls_to_target_SIFTS_files = url_formation_for_pool("SIFTS", left_to_download_SIFTS, default_input_path_to_SIFTS=default_input_path_to_SIFTS)
        run_downloads_with_ThreadPool("SIFTS", urls_to_target_SIFTS_files, default_input_path_to_SIFTS=default_input_path_to_SIFTS)
        return left_to_download_PDB

    if format_of_db == "PDB_assembly":
        all_data_from_catreader = latestcatreader.latest_catalog_reader()
        all_SIFTS_files_from_latest_catalog = all_data_from_catreader[2]

        download_all_PDB_assemblies = download_pdb_assemblies_list_with_lxml()
        input_PDB_assembly_files_were_found = lookfilesinside.look_what_is_inside(
            "PDB_assembly", default_input_path_to_PDB_assembly=default_input_path_to_PDB_assembly)
        try:
            len(download_all_PDB_assemblies)
        except TypeError:
            return print("Cannot reach https://ftp.wwpdb.org/pub/pdb/data/biounit/PDB/all/ maybe try again later")
        lefttodownload_PDB_assemblies = [assembly for assembly in download_all_PDB_assemblies
                                         if assembly.rsplit('/', 1)[-1] not in input_PDB_assembly_files_were_found]
        run_downloads_with_ThreadPool("PDB_assembly", lefttodownload_PDB_assemblies,
                                      default_input_path_to_PDB_assembly=default_input_path_to_PDB_assembly)

        input_SIFTS_files_were_found = lookfilesinside.look_what_is_inside("SIFTS", default_input_path_to_SIFTS=default_input_path_to_SIFTS)
        left_to_download_SIFTS = lefttodownload.what_is_left_to_download(input_SIFTS_files_were_found, all_SIFTS_files_from_latest_catalog)
        urls_to_target_SIFTS_files = url_formation_for_pool("SIFTS", left_to_download_SIFTS, default_input_path_to_SIFTS=default_input_path_to_SIFTS)
        run_downloads_with_ThreadPool("SIFTS", urls_to_target_SIFTS_files, default_input_path_to_SIFTS=default_input_path_to_SIFTS)
        return lefttodownload_PDB_assemblies

    if format_of_db == "all":
        all_data_from_catreader = latestcatreader.latest_catalog_reader()
        all_mmCIF_files_from_latest_catalog = all_data_from_catreader[0]
        all_PDB_files_from_latest_catalog = all_data_from_catreader[1]
        all_SIFTS_files_from_latest_catalog = all_data_from_catreader[2]

        input_mmCIF_files_were_found = lookfilesinside.look_what_is_inside("mmCIF", default_input_path_to_mmCIF=default_input_path_to_mmCIF)
        input_PDB_files_were_found = lookfilesinside.look_what_is_inside("PDB", default_input_path_to_PDB=default_input_path_to_PDB)
        input_SIFTS_files_were_found = lookfilesinside.look_what_is_inside("SIFTS", default_input_path_to_SIFTS=default_input_path_to_SIFTS)

        left_to_download_mmCIF = lefttodownload.what_is_left_to_download(input_mmCIF_files_were_found, all_mmCIF_files_from_latest_catalog)
        left_to_download_PDB = lefttodownload.what_is_left_to_download(input_PDB_files_were_found, all_PDB_files_from_latest_catalog)
        left_to_download_SIFTS = lefttodownload.what_is_left_to_download(input_SIFTS_files_were_found, all_SIFTS_files_from_latest_catalog)

        urls_to_target_mmCIF_files = url_formation_for_pool("mmCIF", left_to_download_mmCIF, default_input_path_to_mmCIF=default_input_path_to_mmCIF)
        urls_to_target_PDB_files = url_formation_for_pool("PDB", left_to_download_PDB, default_input_path_to_PDB=default_input_path_to_PDB)
        urls_to_target_SIFTS_files = url_formation_for_pool("SIFTS", left_to_download_SIFTS, default_input_path_to_SIFTS=default_input_path_to_SIFTS)

        run_downloads_with_ThreadPool("mmCIF", urls_to_target_mmCIF_files, default_input_path_to_mmCIF=default_input_path_to_mmCIF)
        run_downloads_with_ThreadPool("PDB", urls_to_target_PDB_files, default_input_path_to_PDB=default_input_path_to_PDB)
        run_downloads_with_ThreadPool("SIFTS", urls_to_target_SIFTS_files, default_input_path_to_SIFTS=default_input_path_to_SIFTS)

        # PDB_assembly
        download_all_PDB_assemblies = download_pdb_assemblies_list_with_lxml()
        input_PDB_assembly_files_were_found = lookfilesinside.look_what_is_inside("PDB_assembly")
        try:
            len(download_all_PDB_assemblies)
        except TypeError:
            return print("Cannot reach https://ftp.wwpdb.org/pub/pdb/data/biounit/PDB/all/ maybe try again later")
        lefttodownload_PDB_assemblies = [assembly for assembly in download_all_PDB_assemblies
                                         if assembly.rsplit('/', 1)[-1] not in input_PDB_assembly_files_were_found]
        run_downloads_with_ThreadPool("PDB_assembly", lefttodownload_PDB_assemblies,
                                      default_input_path_to_PDB_assembly=default_input_path_to_PDB_assembly)

        # mmCIF_assembly
        lefttodownload_mmCIF_assemblies = list()
        input_mmCIF_assembly_files_were_found = lookfilesinside.look_what_is_inside(
            "mmCIF_assembly", default_input_path_to_mmCIF_assembly=default_input_path_to_mmCIF_assembly)

        all_mmCIF_files_4char = set()
        for mmCIF_file in all_mmCIF_files_from_latest_catalog:
            all_mmCIF_files_4char.add(mmCIF_file[:4])

        input_mmCIF_assembly_files_were_found_4char = set()
        for mmCIF_assembly_file in input_mmCIF_assembly_files_were_found:
            input_mmCIF_assembly_files_were_found_4char.add(mmCIF_assembly_file[:4])

        set_difference = all_mmCIF_files_4char - input_mmCIF_assembly_files_were_found_4char
        list_difference = list(set_difference)

        for mmCIF_id in list_difference:
            lefttodownload_mmCIF_assemblies.append(mmCIF_id + ".cif.gz")

        urls_to_target_mmCIF_assembly_files = url_formation_for_pool("mmCIF_assembly", lefttodownload_mmCIF_assemblies,
                                                                     default_input_path_to_mmCIF_assembly=default_input_path_to_mmCIF_assembly)
        run_downloads_with_ThreadPool("mmCIF_assembly", urls_to_target_mmCIF_assembly_files,
                                      default_input_path_to_mmCIF_assembly=default_input_path_to_mmCIF_assembly)

        return [left_to_download_mmCIF, left_to_download_PDB, lefttodownload_mmCIF_assemblies, lefttodownload_PDB_assemblies]