Exemplo n.º 1
0
def test_is_integer():
    assert is_integer(0)
    assert is_integer(-1)
    assert is_integer(1)
    # big integer
    assert is_integer(10**30)
    assert not is_integer("")
    assert not is_integer("a")
    assert not is_integer([])
    assert not is_integer([1])
    assert not is_integer(object())
    assert not is_integer(None)
Exemplo n.º 2
0
def infer_genome(genome_object_string_or_int):
    """
    If given an integer, return associated human EnsemblRelease for that
    Ensembl version.

    If given a string, return latest EnsemblRelease which has a reference
    of the same name.

    If given a PyEnsembl Genome, simply return it.
    """
    if isinstance(genome_object_string_or_int, Genome):
        return genome_object_string_or_int
    if is_integer(genome_object_string_or_int):
        return cached_release(genome_object_string_or_int)
    elif is_string(genome_object_string_or_int):
        # first infer the canonical reference name, e.g. mapping hg19 -> GRCh37
        # and then get the associated PyEnsembl Genome object
        reference_name = infer_reference_name(genome_object_string_or_int)
        return genome_for_reference_name(reference_name)
    else:
        raise TypeError(
            ("Expected genome to be an int, string, or pyensembl.Genome "
                "instance, got %s : %s") % (
                str(genome_object_string_or_int),
                type(genome_object_string_or_int)))
Exemplo n.º 3
0
def normalize_chromosome(c):
    try:
        return NORMALIZE_CHROMOSOME_CACHE[c]
    except KeyError:
        pass

    if not (is_string(c) or is_integer(c)):
        raise TypeError("Chromosome cannot be '%s' : %s" % (c, type(c)))

    result = str(c)
    if result == "0":
        raise ValueError("Chromosome name cannot be 0")
    elif result == "":
        raise ValueError("Chromosome name cannot be empty")

    # only strip off lowercase chr since some of the non-chromosomal
    # contigs start with "CHR"
    if result.startswith("chr"):
        result = result[3:]

    # just in case someone is being lazy, capitalize "M", "MT", X", "Y"
    result = result.upper()

    # standardize mitochondrial genome to be "MT"
    if result == "M":
        result = "MT"

    # interning strings since the chromosome names probably get constructed
    # or parsed millions of times, can save memory in tight situations
    # (such as parsing GTF files)
    result = intern(result)

    NORMALIZE_CHROMOSOME_CACHE[c] = result

    return result
Exemplo n.º 4
0
def normalize_chromosome(c):
    try:
        return NORMALIZE_CHROMOSOME_CACHE[c]
    except KeyError:
        pass

    if not (is_string(c) or is_integer(c)):
        raise TypeError("Chromosome cannot be '%s' : %s" % (c, type(c)))

    result = str(c)
    if result == "0":
        raise ValueError("Chromosome name cannot be 0")
    elif result == "":
        raise ValueError("Chromosome name cannot be empty")

    # only strip off lowercase chr since some of the non-chromosomal
    # contigs start with "CHR"
    if result.startswith("chr"):
        result = result[3:]

    # standardize mitochondrial genome to be "MT"
    if result == "M":
        result = "MT"
    else:
        # just in case someone is being lazy, capitalize "X" and "Y"
        result = result.upper()
    # interning strings since the chromosome names probably get constructed
    # or parsed millions of times, can save memory in tight situations
    # (such as parsing GTF files)
    result = intern(result)
    NORMALIZE_CHROMOSOME_CACHE[c] = result
    return result
Exemplo n.º 5
0
def normalize_chromosome(c):
    try:
        return NORMALIZE_CHROMOSOME_CACHE[c]
    except KeyError:
        pass

    result = c
    if is_integer(result):
        if result == 0:
            raise ValueError("Contig cannot be 0")
        result = str(result)
    else:
        require_string(result, "contig name", nonempty=True)

        # only strip off lowercase chr since some of the non-chromosomal
        # contigs start with "CHR"
        if result.startswith("chr"):
            result = result[3:]

        # standardize mitochondrial genome to be "MT"
        if result == "M":
            result = "MT"
        else:
            # just in case someone is being lazy, capitalize "X" and "Y"
            result = result.upper()
        
    NORMALIZE_CHROMOSOME_CACHE[c] = result
    return result
Exemplo n.º 6
0
def infer_genome(genome_object_string_or_int):
    """
    If given an integer, get the human EnsemblRelease object for that
    Ensembl version.

    If given a string, return latest EnsemblRelease which has an equivalent
    reference. If the given name is a UCSC genome (e.g. hg19) then convert
    it to the equivalent Ensembl reference (e.g. GRCh37).

    If given a PyEnsembl Genome, simply use it.

    Returns a pair of (Genome, bool) where the bool corresponds to whether
    the input requested a UCSC genome (e.g. "hg19") and an Ensembl (e.g. GRCh37)
    was returned as a substitute.
    """
    converted_ucsc_to_ensembl = False
    if isinstance(genome_object_string_or_int, Genome):
        genome =  genome_object_string_or_int
    elif is_integer(genome_object_string_or_int):
        genome = cached_ensembl_release(genome_object_string_or_int)
    elif is_string(genome_object_string_or_int):
        genome, converted_ucsc_to_ensembl = \
            infer_genome_for_reference_name(genome_object_string_or_int)
    else:
        raise TypeError(
            ("Expected genome to be an int, string, or pyensembl.Genome "
                "instance, got %s : %s") % (
                str(genome_object_string_or_int),
                type(genome_object_string_or_int)))
    return genome, converted_ucsc_to_ensembl
Exemplo n.º 7
0
def infer_genome(genome_object_string_or_int):
    """
    If given an integer, return associated human EnsemblRelease for that
    Ensembl version.

    If given a string, return latest EnsemblRelease which has a reference
    of the same name.

    If given a PyEnsembl Genome, simply return it.
    """
    if isinstance(genome_object_string_or_int, Genome):
        return genome_object_string_or_int
    if is_integer(genome_object_string_or_int):
        return cached_release(genome_object_string_or_int)
    elif is_string(genome_object_string_or_int):
        # first infer the canonical reference name, e.g. mapping hg19 -> GRCh37
        # and then get the associated PyEnsembl Genome object
        reference_name = infer_reference_name(genome_object_string_or_int)
        return genome_for_reference_name(reference_name)
    else:
        raise TypeError(
                ("Expected genome to be an int, string, or pyensembl.Genome "
                 "instance, got %s : %s") % (
                    str(genome_object_string_or_int),
                    type(genome_object_string_or_int)))
Exemplo n.º 8
0
def normalize_chromosome(c):
    try:
        return NORMALIZE_CHROMOSOME_CACHE[c]
    except KeyError:
        pass

    if not (is_string(c) or is_integer(c)):
        raise TypeError("Chromosome cannot be '%s' : %s" % (c, type(c)))

    result = str(c)

    if result == "0":
        raise ValueError("Chromosome name cannot be 0")
    elif result == "":
        raise ValueError("Chromosome name cannot be empty")

    if result.startswith("chr") and "_" not in result:
        # excluding "_" for names like "chrUn_gl000212"
        # capitalize "chrx" -> "chrX"
        result = "chr" + result[3:].upper()
    elif result.isalpha():
        # capitalize e.g. "x" -> "X"
        result = result.upper()

    # interning strings since the chromosome names probably get constructed
    # or parsed millions of times, can save memory in tight situations
    # (such as parsing GTF files)
    result = intern(result)

    NORMALIZE_CHROMOSOME_CACHE[c] = result

    return result