Пример #1
0
 def test_compute_and_overwrite_existing_column_integer_index(self):
     """Compute new values for an existing column referenced using integer index
     """
     tabfile = TabFile('test',self.fp,first_line_is_header=True)
     # Check number of columns and header items
     self.assertEqual(tabfile.nColumns(),4)
     self.assertEqual(tabfile.header(),['chr','start','end','data'])
     # Compute new values for data column
     tabfile.computeColumn(3,lambda line: line['end'] - line['start'])
     self.assertEqual(tabfile.nColumns(),4)
     self.assertEqual(tabfile.header(),['chr','start','end','data'])
     results = [233,323,4444]
     for i in range(len(tabfile)):
         self.assertEqual(tabfile[i]['data'],results[i])
Пример #2
0
 def test_compute_midpoint(self):
     """Compute the midpoint of the start and end columns
     """
     tabfile = TabFile('test',self.fp,first_line_is_header=True)
     # Check number of columns and header items
     self.assertEqual(tabfile.nColumns(),4)
     self.assertEqual(tabfile.header(),['chr','start','end','data'])
     # Compute midpoint of start and end
     tabfile.computeColumn('midpoint',lambda line: (line['end'] + line['start'])/2.0)
     self.assertEqual(tabfile.nColumns(),5)
     self.assertEqual(tabfile.header(),['chr','start','end','data','midpoint'])
     results = [117.5,728.5,3456]
     for i in range(len(tabfile)):
         self.assertEqual(tabfile[i]['midpoint'],results[i])
Пример #3
0
 def test_compute_and_overwrite_existing_column_integer_index(self):
     """Compute new values for an existing column referenced using integer index
     """
     tabfile = TabFile('test', self.fp, first_line_is_header=True)
     # Check number of columns and header items
     self.assertEqual(tabfile.nColumns(), 4)
     self.assertEqual(tabfile.header(), ['chr', 'start', 'end', 'data'])
     # Compute new values for data column
     tabfile.computeColumn(3, lambda line: line['end'] - line['start'])
     self.assertEqual(tabfile.nColumns(), 4)
     self.assertEqual(tabfile.header(), ['chr', 'start', 'end', 'data'])
     results = [233, 323, 4444]
     for i in range(len(tabfile)):
         self.assertEqual(tabfile[i]['data'], results[i])
Пример #4
0
 def test_reorder_columns(self):
     """Reorder columns in a TabFile
     """
     tabfile = TabFile('test',self.fp,first_line_is_header=True)
     # Check number of columns and header items
     self.assertEqual(tabfile.nColumns(),4)
     self.assertEqual(tabfile.header(),['chr','start','end','data'])
     # Reorder
     new_columns = ['chr','data','start','end']
     tabfile = tabfile.reorderColumns(new_columns)
     self.assertEqual(tabfile.nColumns(),4)
     self.assertEqual(tabfile.header(),new_columns)
     self.assertEqual(str(tabfile[0]),"chr1\t4.6\t1\t234")
     self.assertEqual(str(tabfile[1]),"chr1\t5.7\t567\t890")
     self.assertEqual(str(tabfile[2]),"chr2\t6.8\t1234\t5678")
Пример #5
0
 def test_reorder_columns(self):
     """Reorder columns in a TabFile
     """
     tabfile = TabFile('test', self.fp, first_line_is_header=True)
     # Check number of columns and header items
     self.assertEqual(tabfile.nColumns(), 4)
     self.assertEqual(tabfile.header(), ['chr', 'start', 'end', 'data'])
     # Reorder
     new_columns = ['chr', 'data', 'start', 'end']
     tabfile = tabfile.reorderColumns(new_columns)
     self.assertEqual(tabfile.nColumns(), 4)
     self.assertEqual(tabfile.header(), new_columns)
     self.assertEqual(str(tabfile[0]), "chr1\t4.6\t1\t234")
     self.assertEqual(str(tabfile[1]), "chr1\t5.7\t567\t890")
     self.assertEqual(str(tabfile[2]), "chr2\t6.8\t1234\t5678")
Пример #6
0
 def test_set_column_to_constant_value(self):
     """Set a column to a constant value using transformColumn
     """
     tabfile = TabFile('test',self.fp,first_line_is_header=True)
     # Check number of columns and header items
     self.assertEqual(tabfile.nColumns(),4)
     self.assertEqual(tabfile.header(),['chr','start','end','data'])
     # Add a strand column
     tabfile.appendColumn('strand')
     self.assertEqual(tabfile.nColumns(),5)
     self.assertEqual(tabfile.header(),['chr','start','end','data','strand'])
     # Set all values to '+'
     tabfile.transformColumn('strand',lambda x: '+')
     for line in tabfile:
         self.assertEqual(line['strand'],'+')
Пример #7
0
 def test_compute_midpoint(self):
     """Compute the midpoint of the start and end columns
     """
     tabfile = TabFile('test', self.fp, first_line_is_header=True)
     # Check number of columns and header items
     self.assertEqual(tabfile.nColumns(), 4)
     self.assertEqual(tabfile.header(), ['chr', 'start', 'end', 'data'])
     # Compute midpoint of start and end
     tabfile.computeColumn('midpoint', lambda line:
                           (line['end'] + line['start']) / 2.0)
     self.assertEqual(tabfile.nColumns(), 5)
     self.assertEqual(tabfile.header(),
                      ['chr', 'start', 'end', 'data', 'midpoint'])
     results = [117.5, 728.5, 3456]
     for i in range(len(tabfile)):
         self.assertEqual(tabfile[i]['midpoint'], results[i])
Пример #8
0
 def test_transpose_tab_file(self):
     """Test transposing TabFile
     """
     tabfile1 = TabFile('test',self.fp,first_line_is_header=False)
     tabfile2 = tabfile1.transpose()
     self.assertEqual(len(tabfile1),tabfile2.nColumns())
     self.assertEqual(len(tabfile2),tabfile1.nColumns())
Пример #9
0
 def test_set_column_to_constant_value(self):
     """Set a column to a constant value using transformColumn
     """
     tabfile = TabFile('test', self.fp, first_line_is_header=True)
     # Check number of columns and header items
     self.assertEqual(tabfile.nColumns(), 4)
     self.assertEqual(tabfile.header(), ['chr', 'start', 'end', 'data'])
     # Add a strand column
     tabfile.appendColumn('strand')
     self.assertEqual(tabfile.nColumns(), 5)
     self.assertEqual(tabfile.header(),
                      ['chr', 'start', 'end', 'data', 'strand'])
     # Set all values to '+'
     tabfile.transformColumn('strand', lambda x: '+')
     for line in tabfile:
         self.assertEqual(line['strand'], '+')
Пример #10
0
 def test_transpose_tab_file(self):
     """Test transposing TabFile
     """
     tabfile1 = TabFile('test', self.fp, first_line_is_header=False)
     tabfile2 = tabfile1.transpose()
     self.assertEqual(len(tabfile1), tabfile2.nColumns())
     self.assertEqual(len(tabfile2), tabfile1.nColumns())
Пример #11
0
 def test_unexpected_uncommented_header(self):
     """Test reading in a tab file with an unexpected uncommented header
     """
     tabfile = TabFile('test',self.fp)
     self.assertEqual(len(tabfile),4,"Input has 4 lines of data")
     self.assertEqual(tabfile.header(),[],"Wrong header")
     self.assertEqual(str(tabfile[0]),"chr\tstart\tend\tdata","Incorrect string representation")
     self.assertRaises(KeyError,tabfile[3].__getitem__,'chr')
     self.assertEqual(tabfile.nColumns(),4)
Пример #12
0
 def test_expected_uncommented_header(self):
     """Test reading in a tab file with an expected uncommented header
     """
     tabfile = TabFile('test',self.fp,first_line_is_header=True)
     self.assertEqual(len(tabfile),3,"Input has 3 lines of data")
     self.assertEqual(tabfile.header(),['chr','start','end','data'],"Wrong header")
     self.assertEqual(str(tabfile[0]),"chr1\t1\t234\t4.6","Incorrect string representation")
     self.assertEqual(tabfile[2]['chr'],'chr2',"Incorrect data")
     self.assertEqual(tabfile.nColumns(),4)
Пример #13
0
 def test_load_data_with_header(self):
     """Create and load Tabfile using first line as header
     """
     tabfile = TabFile('test',self.fp,first_line_is_header=True)
     self.assertEqual(len(tabfile),3,"Input has 3 lines of data")
     self.assertEqual(tabfile.header(),['chr','start','end','data'],"Wrong header")
     self.assertEqual(str(tabfile[0]),"chr1\t1\t234\t4.6","Incorrect string representation")
     self.assertEqual(tabfile[2]['chr'],'chr2',"Incorrect data")
     self.assertEqual(tabfile.nColumns(),4)
Пример #14
0
 def test_reorder_columns_empty_cells(self):
     """Reorder columns where some lines have empty cells at the start
     """
     tabfile = TabFile('test',self.fp,first_line_is_header=True)
     # Check number of columns and header items
     self.assertEqual(tabfile.nColumns(),4)
     self.assertEqual(tabfile.header(),['chr','start','end','data'])
     # Reset some cells to empty
     tabfile[0]['chr'] = ''
     tabfile[2]['chr'] = ''
     # Reorder
     new_columns = ['chr','data','start','end']
     tabfile = tabfile.reorderColumns(new_columns)
     self.assertEqual(tabfile.nColumns(),4)
     self.assertEqual(tabfile.header(),new_columns)
     self.assertEqual(str(tabfile[0]),"\t4.6\t1\t234")
     self.assertEqual(str(tabfile[1]),"chr1\t5.7\t567\t890")
     self.assertEqual(str(tabfile[2]),"\t6.8\t1234\t5678")
Пример #15
0
 def test_reorder_columns_empty_cells(self):
     """Reorder columns where some lines have empty cells at the start
     """
     tabfile = TabFile('test', self.fp, first_line_is_header=True)
     # Check number of columns and header items
     self.assertEqual(tabfile.nColumns(), 4)
     self.assertEqual(tabfile.header(), ['chr', 'start', 'end', 'data'])
     # Reset some cells to empty
     tabfile[0]['chr'] = ''
     tabfile[2]['chr'] = ''
     # Reorder
     new_columns = ['chr', 'data', 'start', 'end']
     tabfile = tabfile.reorderColumns(new_columns)
     self.assertEqual(tabfile.nColumns(), 4)
     self.assertEqual(tabfile.header(), new_columns)
     self.assertEqual(str(tabfile[0]), "\t4.6\t1\t234")
     self.assertEqual(str(tabfile[1]), "chr1\t5.7\t567\t890")
     self.assertEqual(str(tabfile[2]), "\t6.8\t1234\t5678")
Пример #16
0
 def test_load_data(self):
     """Create and load new TabFile instance
     """
     tabfile = TabFile('test',self.fp,delimiter=',')
     self.assertEqual(len(tabfile),3,"Input has 3 lines of data")
     self.assertEqual(tabfile.header(),[],"Header should be empty")
     self.assertEqual(str(tabfile[0]),"chr1,1,234,4.6","Incorrect string representation")
     self.assertEqual(tabfile[2][0],'chr2',"Incorrect data")
     self.assertEqual(tabfile.nColumns(),4)
     self.assertEqual(tabfile.filename(),'test')
Пример #17
0
 def test_unexpected_uncommented_header(self):
     """Test reading in a tab file with an unexpected uncommented header
     """
     tabfile = TabFile('test', self.fp)
     self.assertEqual(len(tabfile), 4, "Input has 4 lines of data")
     self.assertEqual(tabfile.header(), [], "Wrong header")
     self.assertEqual(str(tabfile[0]), "chr\tstart\tend\tdata",
                      "Incorrect string representation")
     self.assertRaises(KeyError, tabfile[3].__getitem__, 'chr')
     self.assertEqual(tabfile.nColumns(), 4)
Пример #18
0
 def test_load_data_setting_explicit_header(self):
     """Create and load TabFile setting the header explicitly
     """
     tabfile = TabFile('test',self.fp,first_line_is_header=True,
                       column_names=('CHROM','START','STOP','VALUES'))
     self.assertEqual(len(tabfile),3,"Input has 3 lines of data")
     self.assertEqual(tabfile.header(),['CHROM','START','STOP','VALUES'],"Wrong header")
     self.assertEqual(str(tabfile[0]),"chr1\t1\t234\t4.6","Incorrect string representation")
     self.assertEqual(tabfile[2]['CHROM'],'chr2',"Incorrect data")
     self.assertEqual(tabfile.nColumns(),4)
Пример #19
0
 def test_load_data(self):
     """Create and load new TabFile instance
     """
     tabfile = TabFile('test', self.fp, delimiter=',')
     self.assertEqual(len(tabfile), 3, "Input has 3 lines of data")
     self.assertEqual(tabfile.header(), [], "Header should be empty")
     self.assertEqual(str(tabfile[0]), "chr1,1,234,4.6",
                      "Incorrect string representation")
     self.assertEqual(tabfile[2][0], 'chr2', "Incorrect data")
     self.assertEqual(tabfile.nColumns(), 4)
     self.assertEqual(tabfile.filename(), 'test')
Пример #20
0
 def test_load_data_with_header(self):
     """Create and load Tabfile using first line as header
     """
     tabfile = TabFile('test', self.fp, first_line_is_header=True)
     self.assertEqual(len(tabfile), 3, "Input has 3 lines of data")
     self.assertEqual(tabfile.header(), ['chr', 'start', 'end', 'data'],
                      "Wrong header")
     self.assertEqual(str(tabfile[0]), "chr1\t1\t234\t4.6",
                      "Incorrect string representation")
     self.assertEqual(tabfile[2]['chr'], 'chr2', "Incorrect data")
     self.assertEqual(tabfile.nColumns(), 4)
Пример #21
0
 def test_expected_uncommented_header(self):
     """Test reading in a tab file with an expected uncommented header
     """
     tabfile = TabFile('test', self.fp, first_line_is_header=True)
     self.assertEqual(len(tabfile), 3, "Input has 3 lines of data")
     self.assertEqual(tabfile.header(), ['chr', 'start', 'end', 'data'],
                      "Wrong header")
     self.assertEqual(str(tabfile[0]), "chr1\t1\t234\t4.6",
                      "Incorrect string representation")
     self.assertEqual(tabfile[2]['chr'], 'chr2', "Incorrect data")
     self.assertEqual(tabfile.nColumns(), 4)
Пример #22
0
 def test_apply_operation_to_column(self):
     """Divide values in a column by 10
     """
     tabfile = TabFile('test',self.fp,first_line_is_header=True)
     # Check number of columns and header items
     self.assertEqual(tabfile.nColumns(),4)
     self.assertEqual(tabfile.header(),['chr','start','end','data'])
     # Divide data column by 10
     tabfile.transformColumn('data',lambda x: x/10)
     results = [0.46,0.57,0.68]
     for i in range(len(tabfile)):
         self.assertEqual(tabfile[i]['data'],results[i])
Пример #23
0
 def test_apply_operation_to_column(self):
     """Divide values in a column by 10
     """
     tabfile = TabFile('test', self.fp, first_line_is_header=True)
     # Check number of columns and header items
     self.assertEqual(tabfile.nColumns(), 4)
     self.assertEqual(tabfile.header(), ['chr', 'start', 'end', 'data'])
     # Divide data column by 10
     tabfile.transformColumn('data', lambda x: x / 10)
     results = [0.46, 0.57, 0.68]
     for i in range(len(tabfile)):
         self.assertEqual(tabfile[i]['data'], results[i])
Пример #24
0
 def test_load_data_setting_explicit_header(self):
     """Create and load TabFile setting the header explicitly
     """
     tabfile = TabFile('test',
                       self.fp,
                       first_line_is_header=True,
                       column_names=('CHROM', 'START', 'STOP', 'VALUES'))
     self.assertEqual(len(tabfile), 3, "Input has 3 lines of data")
     self.assertEqual(tabfile.header(),
                      ['CHROM', 'START', 'STOP', 'VALUES'], "Wrong header")
     self.assertEqual(str(tabfile[0]), "chr1\t1\t234\t4.6",
                      "Incorrect string representation")
     self.assertEqual(tabfile[2]['CHROM'], 'chr2', "Incorrect data")
     self.assertEqual(tabfile.nColumns(), 4)
Пример #25
0
 def test_apply_operation_to_column(self):
     """Divide values in a column by 10
     """
     tabfile = TabFile('test', self.fp, first_line_is_header=True)
     # Check number of columns and header items
     self.assertEqual(tabfile.nColumns(), 4)
     self.assertEqual(tabfile.header(), ['chr', 'start', 'end', 'data'])
     # Divide data column by 10
     tabfile.transformColumn('data', lambda x: x / 10.0)
     results = [0.46, 0.57, 0.68]
     for i in range(len(tabfile)):
         # When checking the transformed column, coerce
         # the values to two decimal places to avoid tests
         # failing because of rounding errors (e.g.
         # 0.45999999999999996 != 0.46)
         self.assertEqual(float("%.2f" % tabfile[i]['data']), results[i])