Exemplo n.º 1
0
    def test_keep_columns_from_mapping_file(self):
        """Check correct selection of metadata is being done"""

        # test it returns the same data
        out_data, out_headers = keep_columns_from_mapping_file(
            self.mapping_file_data, self.mapping_file_headers, [])
        self.assertEquals(out_data, [[], [], [], [], [], [], [], [], []])
        self.assertEquals(out_headers, [])

        # test it can filter a list of columns
        out_data, out_headers = keep_columns_from_mapping_file(
            self.mapping_file_data, self.mapping_file_headers,
            ['SampleID', 'LinkerPrimerSequence', 'Description'])
        self.assertEquals(out_headers,
                          ['SampleID', 'LinkerPrimerSequence', 'Description'])
        self.assertEquals(out_data, PRE_PROCESS_B)

        # test correct negation of filtering
        out_data, out_headers = keep_columns_from_mapping_file(
            self.mapping_file_data, self.mapping_file_headers,
            ['LinkerPrimerSequence', 'Description'], True)
        self.assertEquals(out_data, PRE_PROCESS_A)
        self.assertEquals(out_headers,
                          ['SampleID', 'BarcodeSequence', 'Treatment', 'DOB'])
Exemplo n.º 2
0
    def test_keep_columns_from_mapping_file(self):
        """Check correct selection of metadata is being done"""

        # test it returns the same data
        out_data, out_headers = keep_columns_from_mapping_file(
            self.mapping_file_data, self.mapping_file_headers, [])
        self.assertEquals(out_data, [[], [], [], [], [], [], [], [], []])
        self.assertEquals(out_headers, [])

        # test it can filter a list of columns
        out_data, out_headers = keep_columns_from_mapping_file(
            self.mapping_file_data, self.mapping_file_headers,
            ['SampleID', 'LinkerPrimerSequence', 'Description'])
        self.assertEquals(out_headers,
                          ['SampleID', 'LinkerPrimerSequence', 'Description'])
        self.assertEquals(out_data, PRE_PROCESS_B)

        # test correct negation of filtering
        out_data, out_headers = keep_columns_from_mapping_file(
            self.mapping_file_data, self.mapping_file_headers,
            ['LinkerPrimerSequence', 'Description'], True)
        self.assertEquals(out_data, PRE_PROCESS_A)
        self.assertEquals(out_headers,
                          ['SampleID', 'BarcodeSequence', 'Treatment', 'DOB'])
Exemplo n.º 3
0
def format_vectors_to_js(mapping_file_data, mapping_file_headers, coords_data,
                        coords_headers, connected_by_header,
                        sorted_by_header=None):
    """Write a string representing the vectors in a PCoA plot as javascript

    Inputs:
    mapping_file_data: contents of the mapping file
    mapping_file_headers: headers of the mapping file
    coords_data: coordinates of the PCoA plot in a numpy 2-D array or a list of
    numpy 2-D arrays for jackknifed input
    coords_headers: headers of the coords in the PCoA plot or a list of lists
    with the headers for jackknifed input
    connected_by_header: header of the mapping file that represents how the
    lines will be connected
    sorted_by_header: numeric-only header name to sort the samples in the
    vectors

    Output:
    js_vectors_string: string that represents the vectors in the shape of a
    javascript object

    Notes:
    If using jackknifed input, the coordinates and headers that will be used are
    the ones belonging to the master coords i. e. the first element.
    """

    js_vectors_string = []
    js_vectors_string.append('\nvar g_vectorPositions = new Array();\n')

    if connected_by_header != None:
        # check if we are processing jackknifed input, if so just get the master
        if type(coords_data) == list:
            coords_data = coords_data[0]
            coords_headers = coords_headers[0]

        columns_to_keep = ['SampleID', connected_by_header]

        # do not ad None if sorted_by_header is None or empty
        if sorted_by_header:
            columns_to_keep.append(sorted_by_header)

        # reduce the amount of data by keeping the required fields only
        mapping_file_data, mapping_file_headers =\
            keep_columns_from_mapping_file(mapping_file_data,
            mapping_file_headers, columns_to_keep)

        # format the mapping file to use this with the filtering function
        mf_string = format_mapping_file(mapping_file_headers, mapping_file_data)

        index = mapping_file_headers.index(connected_by_header)
        connected_by = list(set([line[index] for line in mapping_file_data]))

        for category in connected_by:
            # convert to StringIO to for each iteration; else the object
            # won't be usable after the first iteration & you'll get an error
            sample_ids = sample_ids_from_metadata_description(
                StringIO(mf_string),'%s:%s' % (connected_by_header,category))

            # if there is a sorting header, sort the coords using these values
            if sorted_by_header:
                sorting_index = mapping_file_headers.index(sorted_by_header)
                to_sort = [line for line in mapping_file_data if line[0] in\
                    sample_ids]

                # get the sorted sample ids from the sorted-reduced mapping file
                sample_ids = zip(*sorted(to_sort,
                    key=lambda x: float(x[sorting_index])))[0]

            # each category value is a new vector
            js_vectors_string.append("g_vectorPositions['%s'] = new Array();\n"
                % (category))

            for s in sample_ids:
                index = coords_headers.index(s)

                # print the first three elements of each coord for each sample
                js_vectors_string.append("g_vectorPositions['%s']['%s'] = %s;\n"
                    % (category, s, coords_data[index, :3].tolist()))

    return ''.join(js_vectors_string)
Exemplo n.º 4
0
def format_vectors_to_js(mapping_file_data,
                         mapping_file_headers,
                         coords_data,
                         coords_headers,
                         connected_by_header,
                         sorted_by_header=None):
    """Write a string representing the vectors in a PCoA plot as javascript

    Inputs:
    mapping_file_data: contents of the mapping file
    mapping_file_headers: headers of the mapping file
    coords_data: coordinates of the PCoA plot in a numpy 2-D array or a list of
    numpy 2-D arrays for jackknifed input
    coords_headers: headers of the coords in the PCoA plot or a list of lists
    with the headers for jackknifed input
    connected_by_header: header of the mapping file that represents how the
    lines will be connected
    sorted_by_header: numeric-only header name to sort the samples in the
    vectors

    Output:
    js_vectors_string: string that represents the vectors in the shape of a
    javascript object

    Notes:
    If using jackknifed input, the coordinates and headers that will be used are
    the ones belonging to the master coords i. e. the first element.
    """

    js_vectors_string = []
    js_vectors_string.append('\nvar g_vectorPositions = new Array();\n')

    if connected_by_header != None:
        # check if we are processing jackknifed input, if so just get the master
        if type(coords_data) == list:
            coords_data = coords_data[0]
            coords_headers = coords_headers[0]

        columns_to_keep = ['SampleID', connected_by_header]

        # do not ad None if sorted_by_header is None or empty
        if sorted_by_header:
            columns_to_keep.append(sorted_by_header)

        # reduce the amount of data by keeping the required fields only
        mapping_file_data, mapping_file_headers =\
            keep_columns_from_mapping_file(mapping_file_data,
            mapping_file_headers, columns_to_keep)

        # format the mapping file to use this with the filtering function
        mf_string = format_mapping_file(mapping_file_headers,
                                        mapping_file_data)

        index = mapping_file_headers.index(connected_by_header)
        connected_by = list(set([line[index] for line in mapping_file_data]))

        for category in connected_by:
            # convert to StringIO to for each iteration; else the object
            # won't be usable after the first iteration & you'll get an error
            sample_ids = sample_ids_from_metadata_description(
                StringIO(mf_string), '%s:%s' % (connected_by_header, category))

            # if there is a sorting header, sort the coords using these values
            if sorted_by_header:
                sorting_index = mapping_file_headers.index(sorted_by_header)
                to_sort = [line for line in mapping_file_data if line[0] in\
                    sample_ids]

                # get the sorted sample ids from the sorted-reduced mapping file
                sample_ids = zip(
                    *sorted(to_sort, key=lambda x: float(x[sorting_index])))[0]

            # each category value is a new vector
            js_vectors_string.append(
                "g_vectorPositions['%s'] = new Array();\n" % (category))

            for s in sample_ids:
                index = coords_headers.index(s)

                # print the first three elements of each coord for each sample
                js_vectors_string.append(
                    "g_vectorPositions['%s']['%s'] = %s;\n" %
                    (category, s, coords_data[index, :3].tolist()))

    return ''.join(js_vectors_string)