def test_keep_columns_from_mapping_file(self): """Check correct selection of metadata is being done""" # test it returns the same data out_data, out_headers = keep_columns_from_mapping_file( self.mapping_file_data, self.mapping_file_headers, []) self.assertEquals(out_data, [[], [], [], [], [], [], [], [], []]) self.assertEquals(out_headers, []) # test it can filter a list of columns out_data, out_headers = keep_columns_from_mapping_file( self.mapping_file_data, self.mapping_file_headers, ['SampleID', 'LinkerPrimerSequence', 'Description']) self.assertEquals(out_headers, ['SampleID', 'LinkerPrimerSequence', 'Description']) self.assertEquals(out_data, PRE_PROCESS_B) # test correct negation of filtering out_data, out_headers = keep_columns_from_mapping_file( self.mapping_file_data, self.mapping_file_headers, ['LinkerPrimerSequence', 'Description'], True) self.assertEquals(out_data, PRE_PROCESS_A) self.assertEquals(out_headers, ['SampleID', 'BarcodeSequence', 'Treatment', 'DOB'])
def format_vectors_to_js(mapping_file_data, mapping_file_headers, coords_data, coords_headers, connected_by_header, sorted_by_header=None): """Write a string representing the vectors in a PCoA plot as javascript Inputs: mapping_file_data: contents of the mapping file mapping_file_headers: headers of the mapping file coords_data: coordinates of the PCoA plot in a numpy 2-D array or a list of numpy 2-D arrays for jackknifed input coords_headers: headers of the coords in the PCoA plot or a list of lists with the headers for jackknifed input connected_by_header: header of the mapping file that represents how the lines will be connected sorted_by_header: numeric-only header name to sort the samples in the vectors Output: js_vectors_string: string that represents the vectors in the shape of a javascript object Notes: If using jackknifed input, the coordinates and headers that will be used are the ones belonging to the master coords i. e. the first element. """ js_vectors_string = [] js_vectors_string.append('\nvar g_vectorPositions = new Array();\n') if connected_by_header != None: # check if we are processing jackknifed input, if so just get the master if type(coords_data) == list: coords_data = coords_data[0] coords_headers = coords_headers[0] columns_to_keep = ['SampleID', connected_by_header] # do not ad None if sorted_by_header is None or empty if sorted_by_header: columns_to_keep.append(sorted_by_header) # reduce the amount of data by keeping the required fields only mapping_file_data, mapping_file_headers =\ keep_columns_from_mapping_file(mapping_file_data, mapping_file_headers, columns_to_keep) # format the mapping file to use this with the filtering function mf_string = format_mapping_file(mapping_file_headers, mapping_file_data) index = mapping_file_headers.index(connected_by_header) connected_by = list(set([line[index] for line in mapping_file_data])) for category in connected_by: # convert to StringIO to for each iteration; else the object # won't be usable after the first iteration & you'll get an error sample_ids = sample_ids_from_metadata_description( StringIO(mf_string),'%s:%s' % (connected_by_header,category)) # if there is a sorting header, sort the coords using these values if sorted_by_header: sorting_index = mapping_file_headers.index(sorted_by_header) to_sort = [line for line in mapping_file_data if line[0] in\ sample_ids] # get the sorted sample ids from the sorted-reduced mapping file sample_ids = zip(*sorted(to_sort, key=lambda x: float(x[sorting_index])))[0] # each category value is a new vector js_vectors_string.append("g_vectorPositions['%s'] = new Array();\n" % (category)) for s in sample_ids: index = coords_headers.index(s) # print the first three elements of each coord for each sample js_vectors_string.append("g_vectorPositions['%s']['%s'] = %s;\n" % (category, s, coords_data[index, :3].tolist())) return ''.join(js_vectors_string)
def format_vectors_to_js(mapping_file_data, mapping_file_headers, coords_data, coords_headers, connected_by_header, sorted_by_header=None): """Write a string representing the vectors in a PCoA plot as javascript Inputs: mapping_file_data: contents of the mapping file mapping_file_headers: headers of the mapping file coords_data: coordinates of the PCoA plot in a numpy 2-D array or a list of numpy 2-D arrays for jackknifed input coords_headers: headers of the coords in the PCoA plot or a list of lists with the headers for jackknifed input connected_by_header: header of the mapping file that represents how the lines will be connected sorted_by_header: numeric-only header name to sort the samples in the vectors Output: js_vectors_string: string that represents the vectors in the shape of a javascript object Notes: If using jackknifed input, the coordinates and headers that will be used are the ones belonging to the master coords i. e. the first element. """ js_vectors_string = [] js_vectors_string.append('\nvar g_vectorPositions = new Array();\n') if connected_by_header != None: # check if we are processing jackknifed input, if so just get the master if type(coords_data) == list: coords_data = coords_data[0] coords_headers = coords_headers[0] columns_to_keep = ['SampleID', connected_by_header] # do not ad None if sorted_by_header is None or empty if sorted_by_header: columns_to_keep.append(sorted_by_header) # reduce the amount of data by keeping the required fields only mapping_file_data, mapping_file_headers =\ keep_columns_from_mapping_file(mapping_file_data, mapping_file_headers, columns_to_keep) # format the mapping file to use this with the filtering function mf_string = format_mapping_file(mapping_file_headers, mapping_file_data) index = mapping_file_headers.index(connected_by_header) connected_by = list(set([line[index] for line in mapping_file_data])) for category in connected_by: # convert to StringIO to for each iteration; else the object # won't be usable after the first iteration & you'll get an error sample_ids = sample_ids_from_metadata_description( StringIO(mf_string), '%s:%s' % (connected_by_header, category)) # if there is a sorting header, sort the coords using these values if sorted_by_header: sorting_index = mapping_file_headers.index(sorted_by_header) to_sort = [line for line in mapping_file_data if line[0] in\ sample_ids] # get the sorted sample ids from the sorted-reduced mapping file sample_ids = zip( *sorted(to_sort, key=lambda x: float(x[sorting_index])))[0] # each category value is a new vector js_vectors_string.append( "g_vectorPositions['%s'] = new Array();\n" % (category)) for s in sample_ids: index = coords_headers.index(s) # print the first three elements of each coord for each sample js_vectors_string.append( "g_vectorPositions['%s']['%s'] = %s;\n" % (category, s, coords_data[index, :3].tolist())) return ''.join(js_vectors_string)