def test_from_file_error(self): for test_path in self.fferror_test_paths: with open(get_data_path(test_path), 'U') as f: with npt.assert_raises(FileFormatError): OrdinationResults.from_file(f) for test_path in self.verror_test_paths: with open(get_data_path(test_path), 'U') as f: with npt.assert_raises(ValueError): OrdinationResults.from_file(f)
def test_from_file_error(self): for test_path in self.fferror_test_paths: with open(get_data_path(test_path), 'U') as f: with npt.assert_raises(FileFormatError): OrdinationResults.from_file(f) for test_path in self.verror_test_paths: with open(get_data_path(test_path), 'U') as f: with npt.assert_raises(ValueError): OrdinationResults.from_file(f)
def test_from_file(self): for exp_scores, test_path in zip(self.scores, self.test_paths): for file_type in ('file like', 'file name'): fname = get_data_path(test_path) if file_type == 'file like': with open(fname) as fh: obs = OrdinationResults.from_file(fh) elif file_type == 'file name': obs = OrdinationResults.from_file(fname) yield self.check_OrdinationResults_equal, obs, exp_scores
def test_from_file(self): for exp_scores, test_path in zip(self.scores, self.test_paths): for file_type in ('file like', 'file name'): fname = get_data_path(test_path) if file_type == 'file like': with open(fname) as fh: obs = OrdinationResults.from_file(fh) elif file_type == 'file name': obs = OrdinationResults.from_file(fname) yield self.check_OrdinationResults_equal, obs, exp_scores
def setUp(self): or_f = StringIO(PCOA_STRING) self.ord_res = OrdinationResults.from_file(or_f) self.data = [['PC.354', 'Control', '20061218', 'Ctrol_mouse_I.D._354'], ['PC.355', 'Control', '20061218', 'Control_mouse_I.D._355'], ['PC.356', 'Control', '20061126', 'Control_mouse_I.D._356'], ['PC.481', 'Control', '20070314', 'Control_mouse_I.D._481'], ['PC.593', 'Control', '20071210', 'Control_mouse_I.D._593'], ['PC.607', 'Fast', '20071112', 'Fasting_mouse_I.D._607'], ['PC.634', 'Fast', '20080116', 'Fasting_mouse_I.D._634'], ['PC.635', 'Fast', '20080116', 'Fasting_mouse_I.D._635'], ['PC.636', 'Fast', '20080116', 'Fasting_mouse_I.D._636']] self.headers = ['SampleID', 'Treatment', 'DOB', 'Description']
def parse_coords(lines): """Parse skbio's ordination results file into coords, labels, eigvals, pct_explained. Returns: - list of sample labels in order - array of coords (rows = samples, cols = axes in descending order) - list of eigenvalues - list of percent variance explained For the file format check skbio.math.stats.ordination.OrdinationResults.from_file Strategy: read the file using skbio's parser and return the objects we want """ pcoa_results = OrdinationResults.from_file(lines) return (pcoa_results.site_ids, pcoa_results.site, pcoa_results.eigvals, pcoa_results.proportion_explained)
def parse_coords(lines): """Parse skbio's ordination results file into coords, labels, eigvals, pct_explained. Returns: - list of sample labels in order - array of coords (rows = samples, cols = axes in descending order) - list of eigenvalues - list of percent variance explained For the file format check skbio.math.stats.ordination.OrdinationResults.from_file Strategy: read the file using skbio's parser and return the objects we want """ pcoa_results = OrdinationResults.from_file(lines) return (pcoa_results.site_ids, pcoa_results.site, pcoa_results.eigvals, pcoa_results.proportion_explained)
if __name__ == '__main__': option_parser, opts, args = parse_command_line_parameters(**script_info) ord_fp = opts.input_fp mapping_fp = opts.map_fp categories = opts.categories.split(',') output_dir = opts.output_dir sort_by = opts.sort_by algorithm = opts.algorithm axes = opts.axes weighted = opts.weight_by_vector window_size = opts.window_size # Parse the ordination results with open(ord_fp, 'U') as f: ord_res = OrdinationResults.from_file(f) # Parse the mapping file with open(mapping_fp, 'U') as f: map_dict = parse_mapping_file_to_dict(f)[0] metamap = pd.DataFrame.from_dict(map_dict, orient='index') for category in categories: if category not in metamap.keys(): option_parser.error("Category %s does not exist in the mapping " "file" % categories) sort_category = None if sort_by: if sort_by == 'SampleID': sort_category = None
if __name__ == '__main__': option_parser, opts, args = parse_command_line_parameters(**script_info) ord_fp = opts.input_fp mapping_fp = opts.map_fp categories = opts.categories.split(',') output_dir = opts.output_dir sort_by = opts.sort_by algorithm = opts.algorithm axes = opts.axes weighted = opts.weight_by_vector window_size = opts.window_size # Parse the ordination results with open(ord_fp, 'U') as f: ord_res = OrdinationResults.from_file(f) # Parse the mapping file with open(mapping_fp, 'U') as f: map_dict = parse_mapping_file_to_dict(f)[0] metamap = pd.DataFrame.from_dict(map_dict, orient='index') for category in categories: if category not in metamap.keys(): option_parser.error("Category %s does not exist in the mapping " "file" % categories) sort_category = None if sort_by: if sort_by == 'SampleID': sort_category = None
def get_procrustes_results(coords_f1, coords_f2, sample_id_map=None, randomize=None, max_dimensions=None, get_eigenvalues=get_mean_eigenvalues, get_percent_variation_explained=get_mean_percent_variation): """ """ # Parse the PCoA files ord_res_1 = OrdinationResults.from_file(coords_f1) ord_res_2 = OrdinationResults.from_file(coords_f2) sample_ids1 = ord_res_1.site_ids coords1 = ord_res_1.site eigvals1 = ord_res_1.eigvals pct_var1 = ord_res_1.proportion_explained sample_ids2 = ord_res_2.site_ids coords2 = ord_res_2.site eigvals2 = ord_res_2.eigvals pct_var2 = ord_res_2.proportion_explained if sample_id_map: sample_ids1 = map_sample_ids(sample_ids1, sample_id_map) sample_ids2 = map_sample_ids(sample_ids2, sample_id_map) # rearrange the order of coords in coords2 to correspond to # the order of coords in coords1 order = list(set(sample_ids1) & set(sample_ids2)) coords1 = reorder_coords(coords1, sample_ids1, order) coords2 = reorder_coords(coords2, sample_ids2, order) if len(order) == 0: raise ValueError('No overlapping samples in the two files') # If this is a random trial, apply the shuffling function passed as # randomize() if randomize: coords2 = randomize(coords2) randomized_coords2 = OrdinationResults(eigvals=eigvals2, proportion_explained=pct_var2, site=coords2, site_ids=order) else: randomized_coords2 = None coords1, coords2 = pad_coords_matrices(coords1, coords2) if max_dimensions: coords1 = filter_coords_matrix(coords1, max_dimensions) coords2 = filter_coords_matrix(coords2, max_dimensions) pct_var1 = pct_var1[:max_dimensions] pct_var2 = pct_var2[:max_dimensions] eigvals1 = eigvals1[:max_dimensions] eigvals2 = eigvals2[:max_dimensions] else: if len(pct_var1) > len(pct_var2): pct_var2 = append(pct_var2, zeros(len(pct_var1) - len(pct_var2))) eigvals2 = append(eigvals2, zeros(len(eigvals1) - len(eigvals2))) elif len(pct_var1) < len(pct_var2): pct_var1 = append(pct_var1, zeros(len(pct_var2) - len(pct_var1))) eigvals1 = append(eigvals1, zeros(len(eigvals2) - len(eigvals1))) # Run the Procrustes analysis transformed_coords_m1, transformed_coords_m2, m_squared =\ procrustes(coords1, coords2) # print coords2 # print transformed_coords_m2 eigvals = get_eigenvalues(eigvals1, eigvals2) pct_var = get_percent_variation_explained(pct_var1, pct_var2) transformed_coords1 = OrdinationResults(eigvals=asarray(eigvals), proportion_explained=asarray(pct_var), site=asarray(transformed_coords_m1), site_ids=order) transformed_coords2 = OrdinationResults(eigvals=asarray(eigvals), proportion_explained=asarray(pct_var), site=asarray(transformed_coords_m2), site_ids=order) # Return the results return (transformed_coords1, transformed_coords2, m_squared, randomized_coords2)
def get_procrustes_results( coords_f1, coords_f2, sample_id_map=None, randomize=None, max_dimensions=None, get_eigenvalues=get_mean_eigenvalues, get_percent_variation_explained=get_mean_percent_variation): """ """ # Parse the PCoA files ord_res_1 = OrdinationResults.from_file(coords_f1) ord_res_2 = OrdinationResults.from_file(coords_f2) sample_ids1 = ord_res_1.site_ids coords1 = ord_res_1.site eigvals1 = ord_res_1.eigvals pct_var1 = ord_res_1.proportion_explained sample_ids2 = ord_res_2.site_ids coords2 = ord_res_2.site eigvals2 = ord_res_2.eigvals pct_var2 = ord_res_2.proportion_explained if sample_id_map: sample_ids1 = map_sample_ids(sample_ids1, sample_id_map) sample_ids2 = map_sample_ids(sample_ids2, sample_id_map) # rearrange the order of coords in coords2 to correspond to # the order of coords in coords1 order = list(set(sample_ids1) & set(sample_ids2)) coords1 = reorder_coords(coords1, sample_ids1, order) coords2 = reorder_coords(coords2, sample_ids2, order) if len(order) == 0: raise ValueError('No overlapping samples in the two files') # If this is a random trial, apply the shuffling function passed as # randomize() if randomize: coords2 = randomize(coords2) randomized_coords2 = OrdinationResults(eigvals=eigvals2, proportion_explained=pct_var2, site=coords2, site_ids=order) else: randomized_coords2 = None coords1, coords2 = pad_coords_matrices(coords1, coords2) if max_dimensions: coords1 = filter_coords_matrix(coords1, max_dimensions) coords2 = filter_coords_matrix(coords2, max_dimensions) pct_var1 = pct_var1[:max_dimensions] pct_var2 = pct_var2[:max_dimensions] eigvals1 = eigvals1[:max_dimensions] eigvals2 = eigvals2[:max_dimensions] else: if len(pct_var1) > len(pct_var2): pct_var2 = append(pct_var2, zeros(len(pct_var1) - len(pct_var2))) eigvals2 = append(eigvals2, zeros(len(eigvals1) - len(eigvals2))) elif len(pct_var1) < len(pct_var2): pct_var1 = append(pct_var1, zeros(len(pct_var2) - len(pct_var1))) eigvals1 = append(eigvals1, zeros(len(eigvals2) - len(eigvals1))) # Run the Procrustes analysis transformed_coords_m1, transformed_coords_m2, m_squared =\ procrustes(coords1, coords2) # print coords2 # print transformed_coords_m2 eigvals = get_eigenvalues(eigvals1, eigvals2) pct_var = get_percent_variation_explained(pct_var1, pct_var2) transformed_coords1 = OrdinationResults( eigvals=asarray(eigvals), proportion_explained=asarray(pct_var), site=asarray(transformed_coords_m1), site_ids=order) transformed_coords2 = OrdinationResults( eigvals=asarray(eigvals), proportion_explained=asarray(pct_var), site=asarray(transformed_coords_m2), site_ids=order) # Return the results return (transformed_coords1, transformed_coords2, m_squared, randomized_coords2)