Python printlog_warning примеры использования

Язык программирования: Python

Пространство имен/Пакет: src.printlog

Метод/Функция: printlog_warning

Примеров на hotexamples.com: 8

Python printlog_warning - 8 примеров найдено. Это лучшие примеры Python кода для src.printlog.printlog_warning, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

Файл: fastq.py Проект: masikol/barapost

def form_packet_totalbp(fastq_file, packet_size, fmt_func, max_seq_len):
    # Function reads lines from 'fastq_file' and composes a packet of 'packet_size' base pairs.

    # :param fastq_file: file instance from which to read;
    # :type fastq_file: _io.TextIOWrapper or gzip.File;
    # :param packet_size: number of base pairs to retrive from file;
    # :type packet_size: int;
    # :param fmt_func: formating functio nfrom FORMMATING_FUNCS tuple;
    # :param max_seq_len: maximum length of a sequence proessed;
    # :type max_seq_len: int (None if pruning is disabled);

    packet = ""
    qual_dict = dict() # {<seq_id>: <read_quality>}
    eof = False

    totalbp = 0

    while totalbp < packet_size:

        try:
            read_id = fmt_func(fastq_file.readline())
        except UnicodeDecodeError as err:
            print()
            printlog_warning("Warning: current file is broken: {}."\
                .format(str(err)))
            printlog_warning("File: `{}`".format(os.path.abspath(fastq_file.name)))
            printlog_warning("Ceasing reading sequences from this file.")
            eof = True
            break
        # end try

        if read_id == "": # if eof is reached, leave now
            eof = True
            break
        # end if

        read_id = fmt_read_id(read_id)
        seq = fmt_func(fastq_file.readline())
        fastq_file.readline() # pass comment
        avg_qual = get_read_avg_qual( fmt_func(fastq_file.readline()) )

        packet += read_id + '\n' + seq + '\n'
        qual_dict[read_id[1:]] = avg_qual

        totalbp += min(len(seq), max_seq_len)
    # end while

    if max_seq_len < float("inf"): # prune sequences
        packet = prune_seqs(packet, max_seq_len)
    # end if

    return {"fasta": packet, "qual": qual_dict}, eof

Пример #2

Показать файл

Файл: parallel_mult_files.py Проект: masikol/barapost

def process_paral(fq_fa_list, packet_size, tax_annot_res_dir, blast_algorithm,
                  use_index, db_path, nfiles):
    # Function performs 'many_files'-parallel mode of barapost-local.py.

    # :param fq_fa_list: list of paths to FASTA and FASTQ files meant to be processed;
    # :type fq_fa_list: list<str>;
    # :param packet_size: number of sequences processed by blast in a single launching;
    # :type packet_size: int;
    # :param tax_annot_res_dir: path to ouput directory that contains taxonomic annotation;
    # :type tax_annot_res_dir: str;
    # :param blast_algorithm: blast algorithm to use;
    # :type blast_algorithm: str;
    # :param use_index: logic value indicationg whether to use indes;
    # :type use_index: bool;
    # :param db_path: path to database;
    # :type db_path: str;
    # :param nfiles: total number of files;
    # :type nfiles: int;

    queries_tmp_dir = os.path.join(tax_annot_res_dir, "queries-tmp")

    # Iterate over source FASTQ and FASTA files
    for fq_fa_path in fq_fa_list:

        # Create the result directory with the name of FASTQ of FASTA file being processed:
        new_dpath = create_result_directory(fq_fa_path, tax_annot_res_dir)

        # "hname" means human readable name (i.e. without file path and extention)
        infile_hname = os.path.basename(fq_fa_path)
        infile_hname = re.search(r"(.+)\.(m)?f(ast)?(a|q)(\.gz)?$",
                                 infile_hname).group(1)

        # Look around and ckeck if there are results of previous runs of this script
        # If 'look_around' is None -- there is no data from previous run
        previous_data = look_around(new_dpath, fq_fa_path)

        if previous_data is None:  # If there is no data from previous run
            num_done_seqs = 0  # number of successfully processed sequences
            tsv_res_path = os.path.join(
                new_dpath, "classification.tsv")  # form result tsv file path
        else:  # if there is data from previous run
            num_done_seqs = previous_data[
                "n_done_reads"]  # get number of successfully processed sequences
            tsv_res_path = previous_data[
                "tsv_respath"]  # result tsv file sholud be the same as during previous run
        # end if

        how_to_open = OPEN_FUNCS[is_gzipped(fq_fa_path)]
        fmt_func = FORMATTING_FUNCS[is_gzipped(fq_fa_path)]

        if is_fastq(fq_fa_path):
            packet_generator = fastq_packets
            num_seqs = sum(
                1
                for line in how_to_open(fq_fa_path)) // 4  # 4 lines per record
        else:
            packet_generator = fasta_packets
            try:
                num_seqs = len(
                    tuple(
                        filter(
                            lambda l: True if l.startswith('>') else False,
                            map(fmt_func,
                                how_to_open(fq_fa_path).readlines()))))
            except UnicodeDecodeError as err:
                with print_lock:
                    print()
                    printlog_warning("Warning: current file is broken: {}."\
                        .format(str(err)))
                    printlog_warning("File: `{}`".format(
                        os.path.abspath(fq_fa_path)))
                    printlog_warning("This file will not be processed.")
                    continue
                # end with
            # end try
        # end if

        if num_seqs == num_done_seqs:
            with counter_lock:
                file_counter.value += 1
                i = file_counter.value  # save to local var and release lock
            # end with
            with print_lock:
                sys.stdout.write('\r')
                printlog_info_time("File #{}/{} (`{}`) has been already completely processed.".\
                    format(i, nfiles, fq_fa_path))
                printlog_info("Omitting it.")
                printn("Working...")
            # end with
            continue
        # end if

        for packet in packet_generator(fq_fa_path, packet_size, num_done_seqs):

            # Blast the packet
            align_xml_text = launch_blastn(packet["fasta"], blast_algorithm,
                                           use_index, queries_tmp_dir, db_path)

            # Cnfigure result TSV lines
            result_tsv_lines = parse_align_results_xml(align_xml_text,
                                                       packet["qual"])

            # Write the result to tsv
            write_classification(result_tsv_lines, tsv_res_path)
        # end for

        with counter_lock:
            file_counter.value += 1
            i = file_counter.value  # save to local var and release lock
        # end with
        with print_lock:
            sys.stdout.write('\r')
            printlog_info_time("File #{}/{} (`{}`) is processed.".\
                format(i, nfiles, os.path.basename(fq_fa_path)))
            printn("Working...")
        # end with
    # end for

    query_fpath = os.path.join(queries_tmp_dir,
                               "query{}_tmp.fasta".format(os.getpid()))
    remove_tmp_files(query_fpath)

Пример #3

Показать файл

def fasta_packets(fasta,
                  packet_size,
                  num_done_seqs,
                  packet_mode=0,
                  saved_packet_size=None,
                  saved_packet_mode=None,
                  max_seq_len=float("inf"),
                  probing_batch_size=float("inf")):
    # Generator yields fasta-formattedpackets of records from fasta file.
    # This function passes 'num_done_seqs' sequences (i.e. they will not be processed)
    #     to 'pass_processed_files'.
    #
    # :param fasta: path to fasta file;
    # :type fasta: str;
    # :param packet_size: number of sequences to align in one request ('blastn' launching);
    # :type packet_size: int;
    # :param num_done_seqs: number of sequnces in current file that have been already processed;
    # :type num_done_seqs: int;
    # :param packet_mode: packet mode (see -c option);
    # :type packet_mode: int;
    # :param saved_packet_size: size of last sent packet from tmp file. Necessary for resumption.
    #   It will be None, if no tmp file was in classification directory;
    # :type saved_packet_size: int;
    # :param saved_packet_mode: mode used whilst formig the last sent packet from tmp file.
    #   Necessary for resumption. It will be None, if no tmp file was in classification directory;
    # :type saved_packet_mode: int;
    # :param max_seq_len: maximum length of a sequence proessed;
    # :type max_seq_len: int (float("inf") if pruning is disabled);

    how_to_open = OPEN_FUNCS[is_gzipped(fasta)]
    fmt_func = FORMATTING_FUNCS[is_gzipped(fasta)]

    with how_to_open(fasta) as fasta_file:
        # Next line retrieving is implemented as simple line-from-file reading.
        get_next_line = lambda: fmt_func(fasta_file.readline())

        # Variable that contains ID of next sequence in current FASTA file.
        # If no or all sequences in current FASTA file have been already processed, this variable is None.
        # There is no way to count sequences in multi-FASTA file, accept of counting sequence IDs.
        # Therefore 'next_id_line' should be saved in memory just after moment when packet is formed.
        next_id_line = pass_processed_seqs(fasta_file, num_done_seqs, fmt_func)

        if next_id_line == "":
            yield {"fasta": "", "qual": dict()}
        # end if

        packet = ""

        # We are resuming, nucleotide sequence will be saved in 'line' variable here:
        try:
            line = get_next_line()
        except UnicodeDecodeError as err:
            print()
            printlog_warning("Warning: current file is broken: {}."\
                .format(str(err)))
            printlog_warning("File: `{}`".format(fasta))
            printlog_warning("Ceasing reading sequences from this file.")
            return
        # end try

        if line.startswith('>'):
            line = fmt_read_id(line)  # format sequence ID
        # end if

        # If some sequences have been passed, this if-statement will be executed.
        # New packet should start with sequence ID line.
        if not next_id_line is None:
            packet += next_id_line + '\n'
        # end if
        packet += line + '\n'  # add recently read line

        # Here goes check for saved packet size and mode:
        if not saved_packet_size is None:
            wrk_pack_size = saved_packet_size
        else:
            wrk_pack_size = packet_size
        # end if

        if not saved_packet_mode is None:
            wrk_pack_mode = saved_packet_mode
        else:
            wrk_pack_mode = packet_mode
        # end if

        eof = False
        while not eof:  # till the end of file

            counter = 0  # variable for counting sequences within packet
            seqlen = 0

            while counter < wrk_pack_size:

                try:
                    line = get_next_line()
                except UnicodeDecodeError as err:
                    print()
                    printlog_warning("Warning: current file is broken: {}."\
                        .format(str(err)))
                    printlog_warning("File: `{}`".format(fasta))
                    printlog_warning(
                        "Ceasing reading sequences from this file.")
                    line = ""
                    break
                # end try

                if line.startswith('>'):
                    line = fmt_read_id(line)
                    if packet_mode == 0:
                        counter += 1
                    else:
                        counter += min(seqlen, max_seq_len)
                        seqlen = 0
                    # end if
                # end if

                if line == "":  # if end of file (data) is reached
                    break
                # end if

                if not line.startswith('>'):
                    seqlen += len(line.strip())
                # end if

                packet += line + '\n'  # add line to packet
            # end while

            if line != "":
                next_id_line = packet.splitlines()[
                    -1]  # save sequence ID next packet will start with
                packet = '\n'.join(packet.splitlines()
                                   [:-1])  # exclude 'next_id_line' from packet
            else:
                eof = True
                next_id_line = None
            # end if

            # Get list of sequence IDs:
            names = filter(lambda l: l.startswith('>'), packet.splitlines())
            names = map(lambda l: l.replace('>', ''), names)

            # {<seq_id>: '-'}, as soon as it is a fasta file
            qual_dict = {name: '-' for name in names}

            if max_seq_len < float("inf"):  # prune sequences
                packet = prune_seqs(packet, max_seq_len)
            # end if

            if packet != "":
                yield {"fasta": packet, "qual": qual_dict}

                if packet_mode == 0:
                    probing_batch_size -= wrk_pack_size
                    wrk_pack_size = min(packet_size, probing_batch_size)
                else:
                    probing_batch_size -= len(qual_dict)
                # end if

                # Switch back to standart packet size
                # As Vorotos said, repeated assignment is the best check:
                if wrk_pack_mode != packet_mode:
                    wrk_pack_mode = packet_mode
                # end if

                if not next_id_line is None:
                    packet = next_id_line + '\n'
                # end if
            else:
                return

Пример #4

Показать файл

Файл: binning_spec.py Проект: masikol/barapost

def configure_resfile_lines(tsv_res_fpath, sens, taxonomy_path):
    # Function returns dictionary, where keys are sequence (i.e. sequences meant to be binned) IDs,
    #     and values are corresponding hit names.
    #
    # :param tsv_res_fpath: path to current TSV file. Binning will be performed accorfing to this TSV file;
    # :type tsv_res_fpath: str;
    # :param sens: binning sensitivity;
    # :type sens: str;
    # :parm taxonomy_path: path to taxonomy file;
    # :type taxonomy_file: str;

    resfile_lines = dict()

    tax_dict = src.taxonomy.get_tax_dict(taxonomy_path)

    with open(tsv_res_fpath, 'r') as brpst_resfile:

        brpst_resfile.readline()  # pass the head of the table
        line = brpst_resfile.readline().strip(
        )  # get the first informative line

        while line != "":
            splt = line.split('\t')
            read_name = sys.intern(splt[0])
            hit_name = splt[1]
            hit_acc = splt[2]

            try:
                quality = float(splt[8])  # we will filter by quality
            except ValueError as verr:
                if splt[8] == '-':
                    # Keep minus as quality if there is no quality information.
                    # Error will not be raised.
                    quality = splt[8]
                else:
                    printlog_error_time("query quality parsing error")
                    printlog_error(str(verr))
                    printlog_error("Please, contact the developer.")
                    platf_depend_exit(1)
                # end if
            # end try

            try:
                query_len = int(splt[3])  # we will filter by length
            except ValueError as verr:
                printlog_error_time("query length parsing error")
                printlog_error(str(verr))
                printlog_error("Please, contact the developer.")
                platf_depend_exit(1)
            # end try

            try:
                pident = float(splt[5])  # we will filter by identity
            except ValueError as verr:
                if splt[5] == '-':
                    # Keep minus as quality if there is no quality information.
                    # Error will not be raised.
                    pident = splt[5]
                else:
                    printlog_error_time(
                        "Alignment percent of identity parsing error")
                    printlog_error(str(verr))
                    printlog_error("Please, contact the developer.")
                    platf_depend_exit(1)
                # end if
            # end try

            try:
                coverage = float(splt[4])  # we will filter by coverage
            except ValueError as verr:
                if splt[4] == '-':
                    # Keep minus as quality if there is no quality information.
                    # Error will not be raised.
                    coverage = splt[4]
                else:
                    printlog_error_time("alignment coverage parsing error")
                    printlog_error(str(verr))
                    printlog_error("Please, contact the developer.")
                    platf_depend_exit(1)
                # end if
            # end try

            try:
                resfile_lines[read_name] = [
                    format_taxonomy_name(hit_acc, hit_name, sens, tax_dict),
                    quality, query_len, pident, coverage
                ]
            except NoTaxonomyError:
                printlog_warning(
                    "Can't find taxonomy for reference sequence `{}`".format(
                        hit_acc))
                printlog_warning("Trying to recover taxonomy.")

                # Recover
                src.taxonomy.recover_taxonomy(hit_acc, hit_name, taxonomy_path)
                printlog_info("Taxonomy for {} is recovered.".format(hit_acc))

                # Update tax_dict
                tax_dict = src.taxonomy.get_tax_dict(taxonomy_path)

                # Format again -- with new tax_dict
                resfile_lines[read_name] = [
                    format_taxonomy_name(hit_acc, hit_name, sens, tax_dict),
                    quality, query_len, pident, coverage
                ]
            # end try

            line = brpst_resfile.readline().strip()  # get next line
        # end while
    # end with

    return resfile_lines

Пример #5

Показать файл

def process(fq_fa_list, n_thr, packet_size, tax_annot_res_dir,
            blast_algorithm, use_index, db_path):
    # Function preforms "few_files"-parallel mode.
    #
    # :param fq_fa_list: list of paths to files meant to be processed;
    # :type fq_fa_list: list<str>;
    # :param n_thr: number of threads to launch;
    # :type n_thr: int;
    # :param packet_size: number of sequences processed by blast in a single launching;
    # :type packet_size: int;
    # :param tax_annot_res_dir: path to ouput directory that contains taxonomic annotation;
    # :type tax_annot_res_dir: str;
    # :param blast_algorithm: blast algorithm to use;
    # :type blast_algorithm: str;
    # :param use_index: logic value indicationg whether to use indes;
    # :type use_index: bool;
    # :param db_path: path to database;
    # :type db_path: str;

    nfiles = len(fq_fa_list)

    for i, fq_fa_path in enumerate(fq_fa_list):
        # Create the result directory with the name of FASTQ of FASTA file being processed:
        new_dpath = create_result_directory(fq_fa_path, tax_annot_res_dir)

        # "hname" means human readable name (i.e. without file path and extention)
        infile_hname = os.path.basename(fq_fa_path)
        infile_hname = re.search(r"(.+)\.(m)?f(ast)?(a|q)(\.gz)?$", infile_hname).group(1)

        # Look around and ckeck if there are results of previous runs of this script
        # If 'look_around' is None -- there is no data from previous run
        previous_data = look_around(new_dpath, fq_fa_path)

        if previous_data is None: # If there is no data from previous run
            num_done_seqs = 0 # number of successfully processed sequences
            tsv_res_path = os.path.join(new_dpath, "classification.tsv") # form result tsv file path
        else: # if there is data from previous run
            num_done_seqs = previous_data["n_done_reads"] # get number of successfully processed sequences
            tsv_res_path = previous_data["tsv_respath"] # result tsv file sholud be the same as during previous run
        # end if

        how_to_open = OPEN_FUNCS[ is_gzipped(fq_fa_path) ]
        fmt_func = FORMATTING_FUNCS[ is_gzipped(fq_fa_path) ]

        if is_fastq(fq_fa_path):
            packet_generator = fastq_packets
            num_seqs = sum(1 for line in how_to_open(fq_fa_path)) // 4 # 4 lines per record
        else:
            packet_generator = fasta_packets
            try:
                num_seqs = len(tuple(filter(lambda l: True if l.startswith('>') else False,
                    map(fmt_func, how_to_open(fq_fa_path).readlines()))))
            except UnicodeDecodeError as err:
                print()
                printlog_warning("Warning: current file is broken: {}."\
                    .format(str(err)))
                printlog_warning("File: `{}`".format(os.path.abspath(fq_fa_path)))
                printlog_warning("This file will not be processed.")
                continue
            # end try
        # end if

        packet_size = min(packet_size, num_seqs // n_thr)

        if num_seqs == num_done_seqs:
            sys.stdout.write('\r')
            printlog_info_time("File #{}/{} (`{}`) has been already completely processed."\
                .format(i+1, nfiles, fq_fa_path))
            printlog_info("Omitting it.")
            printn("Working...")
            return
        # end if

        # Get number of seqeunces to pass to each thread
        file_part_size = num_seqs // n_thr
        if num_seqs % n_thr != 0:
            file_part_size += 1
        # end if

        pool = mp.Pool(n_thr, initializer=init_proc_single_file_in_paral,
            initargs=(mp.Lock(), mp.Lock(),))

        pool.starmap(process_part_of_file, [(file_part,
            tsv_res_path,
            packet_size,
            tax_annot_res_dir,
            blast_algorithm,
            use_index,
            db_path) for file_part in packet_generator(fq_fa_path,
                file_part_size,
                num_done_seqs)])

        # Reaping zombies
        pool.close()
        pool.join()

        sys.stdout.write('\r')
        printlog_info_time("File #{}/{} (`{}`) is processed.".\
            format(i+1, nfiles, os.path.basename(fq_fa_path)))
        printn("Working...")    # end for

Пример #6

Показать файл

        printlog_error_time(str(err))
        platf_depend_exit(1)
    # end try
# end if
taxonomy_path = os.path.join(taxonomy_dir, "taxonomy.tsv")

# Check if there is legacy taxonomy file and, if so, reformat it to new (TSV) format
legacy_taxonomy_handling.check_deprecated_taxonomy(tax_annot_res_dir)

from src.barapost_local_modules.build_local_db import build_local_db

# Indexed discontiguous searches are not supported:
#    https://www.ncbi.nlm.nih.gov/books/NBK279668/#usermanual.Megablast_indexed_searches
if use_index == "true" and blast_algorithm == "dc-megablast":
    printlog_warning(
        "Warning: BLAST index cannot be used in alignment algorithm is DiscoMegablast."
    )
    printlog_warning("Index will be created anyway.")
# end if

# Build a database
db_path = build_local_db(tax_annot_res_dir, acc_fpath, your_own_fasta_lst,
                         accs_to_download, use_index)

if blast_algorithm == "dc-megablast":
    use_index = "false"
# end if

if use_index == "true" and len(
        glob(os.path.join(tax_annot_res_dir, "local_database", "*idx"))) == 0:
    printlog_warning(

Пример #7

Показать файл

def _get_related_replicons(acc, acc_dict):
    # Generator finds replicons (other chromosomes or plasmids, sometimes even proviruses),
    #   which are related to Genbank record "discovered" by barapost-prober.py.
    #
    # :param acc: accession of a record "discovered" by barapost-prober.py;
    # :type acc: str;
    # :param acc_dict: dictionary {<ACCESSION>: <HIT_DEFINITION>};
    # :type acc_dict: dict<str: tuple<str>>;
    #
    # Yields tuples of a following structure:
    #     (<ACCESSION>, <RECORD_DEFINITION>)

    # We will save all titles in order not to duplicate records in our database
    repl_list = [(acc, acc_dict[acc])]

    # Elink utility returns links in DB_1, that are connected to given ID in DB_2
    eutils_server = "eutils.ncbi.nlm.nih.gov"
    elink = "elink.fcgi"

    # = Find BioSample ID =

    # Configure URL
    nuc2biosmp_url = "/entrez/eutils/{}?dbfrom=nuccore&db=biosample&id={}".format(
        elink, acc)

    # Get XML with our links
    text_link_to_bsmp = lingering_https_get_request(eutils_server,
                                                    nuc2biosmp_url,
                                                    "BioSample page", acc)

    # Parse this XML
    root = ElementTree.fromstring(text_link_to_bsmp)
    linkset = next(iter(root.getchildren())).find("LinkSetDb")

    # XML should contain element "LinkSetDb"
    if linkset is None:
        printlog_warning(
            "Cannot check replicons for `{}`: there is no BioSample page for this record."
            .format(acc))
        return list()
    # end if

    # Here we have BioSample ID
    biosmp_id = linkset.find("Link").find("Id").text

    # = Find assembly assotiated with this BioSample ID =

    # We will pass this BioSample ID through nuccore in order not to
    #   allow requesting for over 7k transcripts, like for this fungus:
    #   https://www.ncbi.nlm.nih.gov/biosample/SAMN07457167
    # After this, only scaffolds (nearly 130 sequences) will be downloaded.

    # Configure URL
    biosmp2ass_url = "/entrez/eutils/{}?dbfrom=biosample&db=assembly&id={}".format(
        elink, biosmp_id)

    # Get XML with our links
    text_link_to_ass = lingering_https_get_request(
        eutils_server, biosmp2ass_url,
        "Assembly link assotiated with BioSample ID {}".format(biosmp_id))

    # Parse this XML
    root = ElementTree.fromstring(text_link_to_ass)
    linkset = next(iter(root.getchildren())).find("LinkSetDb")

    # XML should contain element "LinkSetDb"
    if linkset is None:
        printlog_warning(
            """Cannot check replicons for `{}`: there is no assembly page for this record."""
            .format(acc))
        return list()
    # end if

    # Here we have BioSample ID
    ass_id = linkset.find("Link").find("Id").text

    # = Find GIs in nuccore assotiated with this Assembly ID =

    # Configure URL
    ass2nuc_url = "/entrez/eutils/{}?dbfrom=assembly&db=nuccore&id={}".format(
        elink, ass_id)

    # Get XML with our links
    text_link_to_nuc = lingering_https_get_request(
        eutils_server, ass2nuc_url,
        "Nucleotide links assotiated with assembly {}".format(ass_id))

    # Parse this XML
    root = ElementTree.fromstring(text_link_to_nuc)
    linkset = next(iter(root.getchildren())).find("LinkSetDb")

    # XML should contain element "LinkSetDb"
    if linkset is None:
        printlog_error_time("""Cannot check replicons for `{}`:
  failed to find nuccore records for assembly {}.""".format(acc, ass_id))
        printlog_error("Please, contact the developer.")
        platf_depend_exit(1)
    # end if

    # We will ntertain user -- show him/her this spinning thing (like conda does
    #   indicating that the script is actually working.
    krutiolka = ('|', '/', '-', '\\')
    krut_i = 0
    sys.stdout.write("\r {}".format(krutiolka[3]))
    sys.stdout.flush()

    # Collect links
    for elem in linkset.iter():

        if elem.tag == "Id":  # element "Id" contains our GI

            # Get GI, title and accession:
            rel_gi = elem.text
            rel_def, rel_acc = _get_record_title(rel_gi)

            # Print this spinning thing
            sys.stdout.write("\r {}".format(krutiolka[krut_i]))
            sys.stdout.flush()
            krut_i = krut_i + 1 if krut_i != 3 else 0

            # If accession is new -- update list
            if not rel_acc in map(lambda x: x[0], repl_list):
                # acc_dict[rel_acc] = rel_def # update acc_dict
                repl_list.append((rel_acc, rel_def))
            # end if
        # end if
    # end for
    return repl_list

Пример #8

Показать файл

def _ling_https_getreq_handl_301(server, url, request_for=None, acc=None):
    # Name stands for "Lingering Https Get Request Handling 301".
    # Function performs a "lingering" HTTPS request.
    # It means that the function tries to get the response
    #     again and again if the request fails.
    # It handles 301-redirection in order to search for replicons related to "NC-records".
    #
    # :param server: server address;
    # :type server: str;
    # :param url: the rest of url;
    # :type url: str;
    # :param request_for: some comment for error message;
    # :type request_for: str;
    # :param acc: GenBank accession;
    # :type acc: str;
    #
    # Returns obtained response coded in UTF-8 ('str').

    error = True
    while error:
        try:
            conn = http.client.HTTPSConnection(server,
                                               timeout=10)  # create connection
            conn.request("GET", url)  # ask for if there areresults
            response = conn.getresponse()  # get the resonse

            # Handle redirection
            if response.code == 301:
                # Link to identical GenBank record is in "Location" header:
                redirect_url = response.getheader(
                    "Location") + "?report=accnlist&log$=seqview&format=text"
            else:
                raise _DoesNotRedirectError(
                    "NCBI does not redirect, although it must!")
            # end if

        except (OSError, http.client.RemoteDisconnected, socket.gaierror,
                http.client.CannotSendRequest) as err:
            comment_str = ""
            if not request_for is None:
                comment_str += " requesting for {}".format(request_for)
                if not acc is None:
                    comment_str += " (accession: '{}')".format(acc)
                # end if
                comment_str += '.'
            # end if
            print()
            printlog_warning("Can't connect to `{}`{}".format(
                server + url, comment_str))
            printlog_warning(str(err))
            printlog_warning(
                "the program will sleep for 30 seconds and try to connect again."
            )
            sleep(30)
        except _DoesNotRedirectError as err:
            printlog_error_time(str(err))
            printlog_error("Please, contact the developer.")
            platf_depend_exit(1)
        else:
            error = False  # if no exception ocured, get out of the loop
        finally:
            conn.close()
        # end try
    # end while

    # And here goes simple "lingering_https_get_request",
    #   which will retrieve content from redirected location
    return lingering_https_get_request(server, redirect_url, request_for, acc)