def test_format_mapping_file(self): """ format_mapping file should match expected result""" headers = ['SampleID','col1','col0','Description'] samples =\ [['bsample','v1_3','v0_3','d1'],['asample','aval','another','d2']] comments = ['this goes after headers','this too'] self.assertEqual(format_mapping_file(headers,samples,comments), example_mapping_file)
def filter_mapping_file_from_mapping_f(mapping_f,sample_ids_to_keep,negate=False): """ Filter rows from a metadata mapping file """ mapping_data, header, comments = parse_mapping_file(mapping_f) filtered_mapping_data = [] sample_ids_to_keep = {}.fromkeys(sample_ids_to_keep) for mapping_datum in mapping_data: hit = mapping_datum[0] in sample_ids_to_keep if hit and not negate: filtered_mapping_data.append(mapping_datum) elif not hit and negate: filtered_mapping_data.append(mapping_datum) else: pass return format_mapping_file(header,filtered_mapping_data)
def filter_mapping_file_from_mapping_f(mapping_f, sample_ids_to_keep, negate=False): """ Filter rows from a metadata mapping file """ mapping_data, header, comments = parse_mapping_file(mapping_f) filtered_mapping_data = [] sample_ids_to_keep = {}.fromkeys(sample_ids_to_keep) for mapping_datum in mapping_data: hit = mapping_datum[0] in sample_ids_to_keep if hit and not negate: filtered_mapping_data.append(mapping_datum) elif not hit and negate: filtered_mapping_data.append(mapping_datum) else: pass return format_mapping_file(header, filtered_mapping_data)
def format_vectors_to_js(mapping_file_data, mapping_file_headers, coords_data, coords_headers, connected_by_header, sorted_by_header=None): """Write a string representing the vectors in a PCoA plot as javascript Inputs: mapping_file_data: contents of the mapping file mapping_file_headers: headers of the mapping file coords_data: coordinates of the PCoA plot in a numpy 2-D array or a list of numpy 2-D arrays for jackknifed input coords_headers: headers of the coords in the PCoA plot or a list of lists with the headers for jackknifed input connected_by_header: header of the mapping file that represents how the lines will be connected sorted_by_header: numeric-only header name to sort the samples in the vectors Output: js_vectors_string: string that represents the vectors in the shape of a javascript object Notes: If using jackknifed input, the coordinates and headers that will be used are the ones belonging to the master coords i. e. the first element. """ js_vectors_string = [] js_vectors_string.append('\nvar g_vectorPositions = new Array();\n') if connected_by_header != None: # check if we are processing jackknifed input, if so just get the master if type(coords_data) == list: coords_data = coords_data[0] coords_headers = coords_headers[0] columns_to_keep = ['SampleID', connected_by_header] # do not ad None if sorted_by_header is None or empty if sorted_by_header: columns_to_keep.append(sorted_by_header) # reduce the amount of data by keeping the required fields only mapping_file_data, mapping_file_headers =\ keep_columns_from_mapping_file(mapping_file_data, mapping_file_headers, columns_to_keep) # format the mapping file to use this with the filtering function mf_string = format_mapping_file(mapping_file_headers, mapping_file_data) index = mapping_file_headers.index(connected_by_header) connected_by = list(set([line[index] for line in mapping_file_data])) for category in connected_by: # convert to StringIO to for each iteration; else the object # won't be usable after the first iteration & you'll get an error sample_ids = sample_ids_from_metadata_description( StringIO(mf_string),'%s:%s' % (connected_by_header,category)) # if there is a sorting header, sort the coords using these values if sorted_by_header: sorting_index = mapping_file_headers.index(sorted_by_header) to_sort = [line for line in mapping_file_data if line[0] in\ sample_ids] # get the sorted sample ids from the sorted-reduced mapping file sample_ids = zip(*sorted(to_sort, key=lambda x: float(x[sorting_index])))[0] # each category value is a new vector js_vectors_string.append("g_vectorPositions['%s'] = new Array();\n" % (category)) for s in sample_ids: index = coords_headers.index(s) # print the first three elements of each coord for each sample js_vectors_string.append("g_vectorPositions['%s']['%s'] = %s;\n" % (category, s, coords_data[index, :3].tolist())) return ''.join(js_vectors_string)
def format_vectors_to_js(mapping_file_data, mapping_file_headers, coords_data, coords_headers, connected_by_header, sorted_by_header=None): """Write a string representing the vectors in a PCoA plot as javascript Inputs: mapping_file_data: contents of the mapping file mapping_file_headers: headers of the mapping file coords_data: coordinates of the PCoA plot in a numpy 2-D array or a list of numpy 2-D arrays for jackknifed input coords_headers: headers of the coords in the PCoA plot or a list of lists with the headers for jackknifed input connected_by_header: header of the mapping file that represents how the lines will be connected sorted_by_header: numeric-only header name to sort the samples in the vectors Output: js_vectors_string: string that represents the vectors in the shape of a javascript object Notes: If using jackknifed input, the coordinates and headers that will be used are the ones belonging to the master coords i. e. the first element. """ js_vectors_string = [] js_vectors_string.append('\nvar g_vectorPositions = new Array();\n') if connected_by_header != None: # check if we are processing jackknifed input, if so just get the master if type(coords_data) == list: coords_data = coords_data[0] coords_headers = coords_headers[0] columns_to_keep = ['SampleID', connected_by_header] # do not ad None if sorted_by_header is None or empty if sorted_by_header: columns_to_keep.append(sorted_by_header) # reduce the amount of data by keeping the required fields only mapping_file_data, mapping_file_headers =\ keep_columns_from_mapping_file(mapping_file_data, mapping_file_headers, columns_to_keep) # format the mapping file to use this with the filtering function mf_string = format_mapping_file(mapping_file_headers, mapping_file_data) index = mapping_file_headers.index(connected_by_header) connected_by = list(set([line[index] for line in mapping_file_data])) for category in connected_by: # convert to StringIO to for each iteration; else the object # won't be usable after the first iteration & you'll get an error sample_ids = sample_ids_from_metadata_description( StringIO(mf_string), '%s:%s' % (connected_by_header, category)) # if there is a sorting header, sort the coords using these values if sorted_by_header: sorting_index = mapping_file_headers.index(sorted_by_header) to_sort = [line for line in mapping_file_data if line[0] in\ sample_ids] # get the sorted sample ids from the sorted-reduced mapping file sample_ids = zip( *sorted(to_sort, key=lambda x: float(x[sorting_index])))[0] # each category value is a new vector js_vectors_string.append( "g_vectorPositions['%s'] = new Array();\n" % (category)) for s in sample_ids: index = coords_headers.index(s) # print the first three elements of each coord for each sample js_vectors_string.append( "g_vectorPositions['%s']['%s'] = %s;\n" % (category, s, coords_data[index, :3].tolist())) return ''.join(js_vectors_string)
def write_mf(f, _df): from emperor.qiime_backports.format import format_mapping_file with open(f, 'w') as fp: lines = format_mapping_file(['SampleID'] + _df.columns.tolist(), list(_df.itertuples())) fp.write(lines+'\n')
def write_mf(f, _df): from emperor.qiime_backports.format import format_mapping_file with open(f, 'w') as fp: lines = format_mapping_file(['SampleID'] + _df.columns.tolist(), list(_df.itertuples())) fp.write(lines + '\n')