Пример #1
0
 def test_valid_no_headers(self):
     """Test the function with valid Sequences and no variable headers.
     """
     seq1 = Sequence(name='seq1')
     seq1.set_cont_values([1.0, 2.0, 3.0])
     seq2 = Sequence(name='seq2')
     seq2.set_cont_values([3.0, 3.2, 4.1])
     test_sequence_list = [seq1, seq2]
     mtx = dr.get_character_matrix_from_sequences_list(test_sequence_list)
     assert isinstance(mtx, Matrix)
Пример #2
0
 def test_valid_with_headers(self):
     """Test the function with valid Sequences and include headers.
     """
     seq1 = Sequence(name='seq1')
     seq1.set_cont_values([1.0, 2.0, 3.0])
     seq2 = Sequence(name='seq2')
     seq2.set_cont_values([3.0, 3.2, 4.1])
     test_sequence_list = [seq1, seq2]
     col_headers = ['Val 1', 'Val 2', 'Val 3']
     mtx = dr.get_character_matrix_from_sequences_list(
         test_sequence_list, var_headers=col_headers)
     assert isinstance(mtx, Matrix)
     assert mtx.get_column_headers() == col_headers
Пример #3
0
def create_sequence_list_from_dict(values_dict):
    """Creates a list of sequences from a dictionary

    Args:
        values_dict (dict) : A dictionary of taxon name keys and a list of
            values for each value.

    Note:
        * The dictionary should have structure::

            {
                "{taxon_name}" : [{values}]
            }


    Returns:
        A list of Sequence objects and None for headers.

    Raises:
        AlignmentIOError: If a dictionary value is not a list.
    """
    headers = None
    sequence_list = []
    for name, values in values_dict.items():
        if not isinstance(values, list):
            raise AlignmentIOError('Values must be a list')
        seq = Sequence(name=name)
        seq.set_cont_values(values)
        sequence_list.append(seq)
    return sequence_list, headers
Пример #4
0
def read_table_alignment_flo(table_flo):
    """Reads a table from a file-like object.

    Args:
        table_flo (file-like): A file-like object containing table data.

    Returns:
        A list of Sequence objects.

    Raises:
        AlignmentIOError: If there is a problem creating sequences.
    """
    seqlist = []
    for i in table_flo:
        if len(i) > 2:
            try:
                spls = i.strip().split("\t")
                name = spls[0].strip()
                seq = spls[1].strip().split(" ")
                seq = [float(j) for j in seq]
                tseq = Sequence(name=name)
                tseq.set_cont_values(seq)
                seqlist.append(tseq)
            except Exception as e:
                raise AlignmentIOError(str(e))
    return seqlist
Пример #5
0
def read_json_alignment_flo(json_flo):
    """Read a JSON file-like object and return a list of sequences and headers.

    Args:
        json_flo (file-like): A file-like object with JSON alignment data.

    Note:
        * File should have structure::

            {
                "headers" : [{header_names}],
                "values" : [
                    {
                        "name" : "{taxon_name}",
                        "values" : [{values}]
                    }
                ]
            }

    Returns:
        A list of Sequence objects and headers.

    Raises:
        AlignmentIOError: If headers are provided but they are not a list.
    """
    json_vals = json.load(json_flo)

    if 'headers' in json_vals.keys():
        headers = json_vals['headers']
        if not isinstance(headers, list):
            raise AlignmentIOError(
                'If headers are provided, they must be a list')
    else:
        headers = None

    sequence_list = []
    for val_dict in json_vals['values']:
        name = val_dict['name']
        vals = [float(v) for v in val_dict['values']]
        seq = Sequence(name=name)
        seq.set_cont_values(vals)
        sequence_list.append(seq)
    return sequence_list, headers
Пример #6
0
def read_phylip_alignment_flo(phylip_flo):
    """Reads a phylip alignment file-like object and return the sequences.

    Args:
        phylip_flo (file-like): The phylip file-like object.

    Note:
        * We assume that the phylip files are extended and not strict (in terms
            of how many characters for taxon names).
        * The phylip file is in the format::
            numoftaxa numofsites
            seqlabel sequence
            seqlabel sequence

    Returns:
        A list of Sequence objects.

    Raises:
        AlignmentIOError: If there is a problem creating sequences.
    """
    seqlist = []
    # first line is the number of taxa and num of sites
    # we don't really even need to read this line,
    # so let's just skip it
    i = phylip_flo.readline()
    for i in phylip_flo:
        try:
            if len(i) > 2:
                spls = i.strip().split()
                name = spls[0].strip()
                seq = spls[1].strip()
                tseq = Sequence(name=name, seq=seq)
                seqlist.append(tseq)
        except Exception as e:
            raise AlignmentIOError(str(e))
    return seqlist
Пример #7
0
def read_csv_alignment_flo(csv_flo):
    """Reads a CSV file-like object and return a list of sequences and headers.

    Args:
        csv_flo (file-like): A file-like object with CSV alignment data.

    Returns:
        A list of Sequence objects and headers.

    Raises:
        AlignmentIOError: If the number of columns is inconsistent across the
            sequences.
    """
    headers = None
    sequence_list = []

    has_header = csv.Sniffer().has_header(csv_flo.readline())
    csv_flo.seek(0)

    num_parts = None
    for line in csv_flo:
        parts = line.strip().split(',')
        if num_parts is None:
            num_parts = len(parts)
        else:
            if len(parts) != num_parts:
                raise AlignmentIOError('Number of columns is inconsistent')
        if has_header and headers is None:
            headers = parts[1:]
        else:
            name = parts[0]
            vals = [float(i) for i in parts[1:]]
            seq = Sequence(name=name)
            seq.set_cont_values(vals)
            sequence_list.append(seq)
    return sequence_list, headers