def test_compute_and_overwrite_existing_column_integer_index(self): """Compute new values for an existing column referenced using integer index """ tabfile = TabFile('test',self.fp,first_line_is_header=True) # Check number of columns and header items self.assertEqual(tabfile.nColumns(),4) self.assertEqual(tabfile.header(),['chr','start','end','data']) # Compute new values for data column tabfile.computeColumn(3,lambda line: line['end'] - line['start']) self.assertEqual(tabfile.nColumns(),4) self.assertEqual(tabfile.header(),['chr','start','end','data']) results = [233,323,4444] for i in range(len(tabfile)): self.assertEqual(tabfile[i]['data'],results[i])
def test_compute_midpoint(self): """Compute the midpoint of the start and end columns """ tabfile = TabFile('test',self.fp,first_line_is_header=True) # Check number of columns and header items self.assertEqual(tabfile.nColumns(),4) self.assertEqual(tabfile.header(),['chr','start','end','data']) # Compute midpoint of start and end tabfile.computeColumn('midpoint',lambda line: (line['end'] + line['start'])/2.0) self.assertEqual(tabfile.nColumns(),5) self.assertEqual(tabfile.header(),['chr','start','end','data','midpoint']) results = [117.5,728.5,3456] for i in range(len(tabfile)): self.assertEqual(tabfile[i]['midpoint'],results[i])
def test_compute_and_overwrite_existing_column_integer_index(self): """Compute new values for an existing column referenced using integer index """ tabfile = TabFile('test', self.fp, first_line_is_header=True) # Check number of columns and header items self.assertEqual(tabfile.nColumns(), 4) self.assertEqual(tabfile.header(), ['chr', 'start', 'end', 'data']) # Compute new values for data column tabfile.computeColumn(3, lambda line: line['end'] - line['start']) self.assertEqual(tabfile.nColumns(), 4) self.assertEqual(tabfile.header(), ['chr', 'start', 'end', 'data']) results = [233, 323, 4444] for i in range(len(tabfile)): self.assertEqual(tabfile[i]['data'], results[i])
def test_reorder_columns(self): """Reorder columns in a TabFile """ tabfile = TabFile('test',self.fp,first_line_is_header=True) # Check number of columns and header items self.assertEqual(tabfile.nColumns(),4) self.assertEqual(tabfile.header(),['chr','start','end','data']) # Reorder new_columns = ['chr','data','start','end'] tabfile = tabfile.reorderColumns(new_columns) self.assertEqual(tabfile.nColumns(),4) self.assertEqual(tabfile.header(),new_columns) self.assertEqual(str(tabfile[0]),"chr1\t4.6\t1\t234") self.assertEqual(str(tabfile[1]),"chr1\t5.7\t567\t890") self.assertEqual(str(tabfile[2]),"chr2\t6.8\t1234\t5678")
def test_reorder_columns(self): """Reorder columns in a TabFile """ tabfile = TabFile('test', self.fp, first_line_is_header=True) # Check number of columns and header items self.assertEqual(tabfile.nColumns(), 4) self.assertEqual(tabfile.header(), ['chr', 'start', 'end', 'data']) # Reorder new_columns = ['chr', 'data', 'start', 'end'] tabfile = tabfile.reorderColumns(new_columns) self.assertEqual(tabfile.nColumns(), 4) self.assertEqual(tabfile.header(), new_columns) self.assertEqual(str(tabfile[0]), "chr1\t4.6\t1\t234") self.assertEqual(str(tabfile[1]), "chr1\t5.7\t567\t890") self.assertEqual(str(tabfile[2]), "chr2\t6.8\t1234\t5678")
def test_set_column_to_constant_value(self): """Set a column to a constant value using transformColumn """ tabfile = TabFile('test',self.fp,first_line_is_header=True) # Check number of columns and header items self.assertEqual(tabfile.nColumns(),4) self.assertEqual(tabfile.header(),['chr','start','end','data']) # Add a strand column tabfile.appendColumn('strand') self.assertEqual(tabfile.nColumns(),5) self.assertEqual(tabfile.header(),['chr','start','end','data','strand']) # Set all values to '+' tabfile.transformColumn('strand',lambda x: '+') for line in tabfile: self.assertEqual(line['strand'],'+')
def test_compute_midpoint(self): """Compute the midpoint of the start and end columns """ tabfile = TabFile('test', self.fp, first_line_is_header=True) # Check number of columns and header items self.assertEqual(tabfile.nColumns(), 4) self.assertEqual(tabfile.header(), ['chr', 'start', 'end', 'data']) # Compute midpoint of start and end tabfile.computeColumn('midpoint', lambda line: (line['end'] + line['start']) / 2.0) self.assertEqual(tabfile.nColumns(), 5) self.assertEqual(tabfile.header(), ['chr', 'start', 'end', 'data', 'midpoint']) results = [117.5, 728.5, 3456] for i in range(len(tabfile)): self.assertEqual(tabfile[i]['midpoint'], results[i])
def test_transpose_tab_file(self): """Test transposing TabFile """ tabfile1 = TabFile('test',self.fp,first_line_is_header=False) tabfile2 = tabfile1.transpose() self.assertEqual(len(tabfile1),tabfile2.nColumns()) self.assertEqual(len(tabfile2),tabfile1.nColumns())
def test_set_column_to_constant_value(self): """Set a column to a constant value using transformColumn """ tabfile = TabFile('test', self.fp, first_line_is_header=True) # Check number of columns and header items self.assertEqual(tabfile.nColumns(), 4) self.assertEqual(tabfile.header(), ['chr', 'start', 'end', 'data']) # Add a strand column tabfile.appendColumn('strand') self.assertEqual(tabfile.nColumns(), 5) self.assertEqual(tabfile.header(), ['chr', 'start', 'end', 'data', 'strand']) # Set all values to '+' tabfile.transformColumn('strand', lambda x: '+') for line in tabfile: self.assertEqual(line['strand'], '+')
def test_transpose_tab_file(self): """Test transposing TabFile """ tabfile1 = TabFile('test', self.fp, first_line_is_header=False) tabfile2 = tabfile1.transpose() self.assertEqual(len(tabfile1), tabfile2.nColumns()) self.assertEqual(len(tabfile2), tabfile1.nColumns())
def test_unexpected_uncommented_header(self): """Test reading in a tab file with an unexpected uncommented header """ tabfile = TabFile('test',self.fp) self.assertEqual(len(tabfile),4,"Input has 4 lines of data") self.assertEqual(tabfile.header(),[],"Wrong header") self.assertEqual(str(tabfile[0]),"chr\tstart\tend\tdata","Incorrect string representation") self.assertRaises(KeyError,tabfile[3].__getitem__,'chr') self.assertEqual(tabfile.nColumns(),4)
def test_expected_uncommented_header(self): """Test reading in a tab file with an expected uncommented header """ tabfile = TabFile('test',self.fp,first_line_is_header=True) self.assertEqual(len(tabfile),3,"Input has 3 lines of data") self.assertEqual(tabfile.header(),['chr','start','end','data'],"Wrong header") self.assertEqual(str(tabfile[0]),"chr1\t1\t234\t4.6","Incorrect string representation") self.assertEqual(tabfile[2]['chr'],'chr2',"Incorrect data") self.assertEqual(tabfile.nColumns(),4)
def test_load_data_with_header(self): """Create and load Tabfile using first line as header """ tabfile = TabFile('test',self.fp,first_line_is_header=True) self.assertEqual(len(tabfile),3,"Input has 3 lines of data") self.assertEqual(tabfile.header(),['chr','start','end','data'],"Wrong header") self.assertEqual(str(tabfile[0]),"chr1\t1\t234\t4.6","Incorrect string representation") self.assertEqual(tabfile[2]['chr'],'chr2',"Incorrect data") self.assertEqual(tabfile.nColumns(),4)
def test_reorder_columns_empty_cells(self): """Reorder columns where some lines have empty cells at the start """ tabfile = TabFile('test',self.fp,first_line_is_header=True) # Check number of columns and header items self.assertEqual(tabfile.nColumns(),4) self.assertEqual(tabfile.header(),['chr','start','end','data']) # Reset some cells to empty tabfile[0]['chr'] = '' tabfile[2]['chr'] = '' # Reorder new_columns = ['chr','data','start','end'] tabfile = tabfile.reorderColumns(new_columns) self.assertEqual(tabfile.nColumns(),4) self.assertEqual(tabfile.header(),new_columns) self.assertEqual(str(tabfile[0]),"\t4.6\t1\t234") self.assertEqual(str(tabfile[1]),"chr1\t5.7\t567\t890") self.assertEqual(str(tabfile[2]),"\t6.8\t1234\t5678")
def test_reorder_columns_empty_cells(self): """Reorder columns where some lines have empty cells at the start """ tabfile = TabFile('test', self.fp, first_line_is_header=True) # Check number of columns and header items self.assertEqual(tabfile.nColumns(), 4) self.assertEqual(tabfile.header(), ['chr', 'start', 'end', 'data']) # Reset some cells to empty tabfile[0]['chr'] = '' tabfile[2]['chr'] = '' # Reorder new_columns = ['chr', 'data', 'start', 'end'] tabfile = tabfile.reorderColumns(new_columns) self.assertEqual(tabfile.nColumns(), 4) self.assertEqual(tabfile.header(), new_columns) self.assertEqual(str(tabfile[0]), "\t4.6\t1\t234") self.assertEqual(str(tabfile[1]), "chr1\t5.7\t567\t890") self.assertEqual(str(tabfile[2]), "\t6.8\t1234\t5678")
def test_load_data(self): """Create and load new TabFile instance """ tabfile = TabFile('test',self.fp,delimiter=',') self.assertEqual(len(tabfile),3,"Input has 3 lines of data") self.assertEqual(tabfile.header(),[],"Header should be empty") self.assertEqual(str(tabfile[0]),"chr1,1,234,4.6","Incorrect string representation") self.assertEqual(tabfile[2][0],'chr2',"Incorrect data") self.assertEqual(tabfile.nColumns(),4) self.assertEqual(tabfile.filename(),'test')
def test_unexpected_uncommented_header(self): """Test reading in a tab file with an unexpected uncommented header """ tabfile = TabFile('test', self.fp) self.assertEqual(len(tabfile), 4, "Input has 4 lines of data") self.assertEqual(tabfile.header(), [], "Wrong header") self.assertEqual(str(tabfile[0]), "chr\tstart\tend\tdata", "Incorrect string representation") self.assertRaises(KeyError, tabfile[3].__getitem__, 'chr') self.assertEqual(tabfile.nColumns(), 4)
def test_load_data_setting_explicit_header(self): """Create and load TabFile setting the header explicitly """ tabfile = TabFile('test',self.fp,first_line_is_header=True, column_names=('CHROM','START','STOP','VALUES')) self.assertEqual(len(tabfile),3,"Input has 3 lines of data") self.assertEqual(tabfile.header(),['CHROM','START','STOP','VALUES'],"Wrong header") self.assertEqual(str(tabfile[0]),"chr1\t1\t234\t4.6","Incorrect string representation") self.assertEqual(tabfile[2]['CHROM'],'chr2',"Incorrect data") self.assertEqual(tabfile.nColumns(),4)
def test_load_data(self): """Create and load new TabFile instance """ tabfile = TabFile('test', self.fp, delimiter=',') self.assertEqual(len(tabfile), 3, "Input has 3 lines of data") self.assertEqual(tabfile.header(), [], "Header should be empty") self.assertEqual(str(tabfile[0]), "chr1,1,234,4.6", "Incorrect string representation") self.assertEqual(tabfile[2][0], 'chr2', "Incorrect data") self.assertEqual(tabfile.nColumns(), 4) self.assertEqual(tabfile.filename(), 'test')
def test_load_data_with_header(self): """Create and load Tabfile using first line as header """ tabfile = TabFile('test', self.fp, first_line_is_header=True) self.assertEqual(len(tabfile), 3, "Input has 3 lines of data") self.assertEqual(tabfile.header(), ['chr', 'start', 'end', 'data'], "Wrong header") self.assertEqual(str(tabfile[0]), "chr1\t1\t234\t4.6", "Incorrect string representation") self.assertEqual(tabfile[2]['chr'], 'chr2', "Incorrect data") self.assertEqual(tabfile.nColumns(), 4)
def test_expected_uncommented_header(self): """Test reading in a tab file with an expected uncommented header """ tabfile = TabFile('test', self.fp, first_line_is_header=True) self.assertEqual(len(tabfile), 3, "Input has 3 lines of data") self.assertEqual(tabfile.header(), ['chr', 'start', 'end', 'data'], "Wrong header") self.assertEqual(str(tabfile[0]), "chr1\t1\t234\t4.6", "Incorrect string representation") self.assertEqual(tabfile[2]['chr'], 'chr2', "Incorrect data") self.assertEqual(tabfile.nColumns(), 4)
def test_apply_operation_to_column(self): """Divide values in a column by 10 """ tabfile = TabFile('test',self.fp,first_line_is_header=True) # Check number of columns and header items self.assertEqual(tabfile.nColumns(),4) self.assertEqual(tabfile.header(),['chr','start','end','data']) # Divide data column by 10 tabfile.transformColumn('data',lambda x: x/10) results = [0.46,0.57,0.68] for i in range(len(tabfile)): self.assertEqual(tabfile[i]['data'],results[i])
def test_apply_operation_to_column(self): """Divide values in a column by 10 """ tabfile = TabFile('test', self.fp, first_line_is_header=True) # Check number of columns and header items self.assertEqual(tabfile.nColumns(), 4) self.assertEqual(tabfile.header(), ['chr', 'start', 'end', 'data']) # Divide data column by 10 tabfile.transformColumn('data', lambda x: x / 10) results = [0.46, 0.57, 0.68] for i in range(len(tabfile)): self.assertEqual(tabfile[i]['data'], results[i])
def test_load_data_setting_explicit_header(self): """Create and load TabFile setting the header explicitly """ tabfile = TabFile('test', self.fp, first_line_is_header=True, column_names=('CHROM', 'START', 'STOP', 'VALUES')) self.assertEqual(len(tabfile), 3, "Input has 3 lines of data") self.assertEqual(tabfile.header(), ['CHROM', 'START', 'STOP', 'VALUES'], "Wrong header") self.assertEqual(str(tabfile[0]), "chr1\t1\t234\t4.6", "Incorrect string representation") self.assertEqual(tabfile[2]['CHROM'], 'chr2', "Incorrect data") self.assertEqual(tabfile.nColumns(), 4)
def test_apply_operation_to_column(self): """Divide values in a column by 10 """ tabfile = TabFile('test', self.fp, first_line_is_header=True) # Check number of columns and header items self.assertEqual(tabfile.nColumns(), 4) self.assertEqual(tabfile.header(), ['chr', 'start', 'end', 'data']) # Divide data column by 10 tabfile.transformColumn('data', lambda x: x / 10.0) results = [0.46, 0.57, 0.68] for i in range(len(tabfile)): # When checking the transformed column, coerce # the values to two decimal places to avoid tests # failing because of rounding errors (e.g. # 0.45999999999999996 != 0.46) self.assertEqual(float("%.2f" % tabfile[i]['data']), results[i])