def _splitFile(self): totalSize = os.path.getsize(self._inFile.name) if totalSize <= self._splitSize: # do not split file, the file isn't so big. return None fileNames = [] with open(self._inFile.name, 'r+b') as f: size = 0 lines = [] for line in f: if not is_number(line.rstrip().split('\t')[-1]): print 'hej',line size += len(line) lines.append(line) if size >= self._splitSize: tmpFile = NamedTemporaryFile(delete=False) fileNames.append(tmpFile) tmpFile.write(''.join(lines)) tmpFile.close() del lines[:] size = 0 if size > 0: tmpFile = NamedTemporaryFile(delete=False) fileNames.append(tmpFile) tmpFile.write(''.join(lines)) tmpFile.close() for tmp_file in fileNames: for line in open(tmp_file.name, 'rb'): if not is_number(line.rstrip().split('\t')[-1]): print line return fileNames
def _splitFile(self): totalSize = os.path.getsize(self._inFile.name) if totalSize <= self._splitSize: # do not split file, the file isn't so big. return None fileNames = [] with open(self._inFile.name, 'r+b') as f: size = 0 lines = [] for line in f: if not is_number(line.rstrip().split('\t')[-1]): print('hej', line) size += len(line) lines.append(line) if size >= self._splitSize: tmpFile = NamedTemporaryFile(delete=False) fileNames.append(tmpFile) tmpFile.write(''.join(lines)) tmpFile.close() del lines[:] size = 0 if size > 0: tmpFile = NamedTemporaryFile(delete=False) fileNames.append(tmpFile) tmpFile.write(''.join(lines)) tmpFile.close() for tmp_file in fileNames: for line in open(tmp_file.name, 'rb'): if not is_number(line.rstrip().split('\t')[-1]): print(line) return fileNames
def test_negative(): """Try if is_number behaves correct when not given a number.""" my_empty_string = '' my_minus = '-' my_string = 'g' my_none = None assert not is_number(my_none) assert not is_number(my_empty_string) assert not is_number(my_string) assert not is_number(my_minus)
def check_predictions(mutation_taster = None, avsift = None, poly_phen = None): """Score the variant based on the scores from prediction databases.""" prediction_score = 0 if is_number.is_number(avsift): if float(avsift) <= 0.05: prediction_score += 1 if is_number.is_number(mutation_taster): if float(mutation_taster) >= 0.05: prediction_score += 1 if is_number.is_number(poly_phen): if float(poly_phen) >= 0.85: prediction_score += 1 return prediction_score
def check_predictions(mutation_taster=None, avsift=None, poly_phen=None): """Score the variant based on the scores from prediction databases.""" prediction_score = 0 if is_number.is_number(avsift): if float(avsift) <= 0.05: prediction_score += 1 if is_number.is_number(mutation_taster): if float(mutation_taster) >= 0.05: prediction_score += 1 if is_number.is_number(poly_phen): if float(poly_phen) >= 0.85: prediction_score += 1 return prediction_score
def ref_gene_parser(self, line, info, line_count): """Parse a file in the refGene format""" line = line.split('\t') if 'hr' in line[2]: info['chrom'] = line[2][3:] else: info['chrom'] = line[2] if is_number.is_number(line[4]) and is_number.is_number(line[5]): info['start'] = int(line[4]) info['stop'] = int(line[5]) info['transcript_id'] = line[1] #??? info['gene_id'] = line[12] info['feature_id'] = info['gene_id'] return info
def ccds_parser(self, line, info, line_count): """Parse a ccds line""" line = line.split('\t') if 'hr' in line[0]: info['chrom'] = line[0][3:] else: info['chrom'] = line[0] info['transcript_id'] = line[1] info['gene_id'] = line[2] info['feature_id'] = info['gene_id'] if is_number.is_number(line[7]) and is_number.is_number(line[8]): info['start'] = int(line[7]) info['stop'] = int(line[8]) return info
def main(): parser = argparse.ArgumentParser(description="Check files.") parser.add_argument('infile', type=str, nargs=1, help='Specify the path to the file of interest.') parser.add_argument('-out', '--outfile', type=str, nargs=1, default=[None], help='Specify the path to the outfile.') args = parser.parse_args() infile = args.infile[0] new_file = NamedTemporaryFile(delete=False) with open(infile, 'rb') as f: for line in f: if not line.startswith('#'): new_file.write(line) for line in new_file.readlines(): if not is_number(line.rstrip().split('\t')[-1]): print('du', line) print('no errors') fs = FileSort(new_file, args.outfile[0]) fs.sort()
def check_base_conservation(gerp_base_score=None): """Score the variant based on the base level conservation.""" base_conservation_score = 0 if is_number.is_number(gerp_base_score): if float(gerp_base_score) >= 4: base_conservation_score += 2 elif float(gerp_base_score) >= 2: base_conservation_score += 1 return base_conservation_score
def check_phylop_score(phylop=None): """Score the variant based on the Phylop score.""" phylop_score = 0 if is_number.is_number(phylop): if float(phylop) >= 0.9984188612: phylop_score += 2 elif float(phylop) >= 0.95: phylop_score += 1 return phylop_score
def check_base_conservation(gerp_base_score = None): """Score the variant based on the base level conservation.""" base_conservation_score = 0 if is_number.is_number(gerp_base_score): if float(gerp_base_score) >= 4: base_conservation_score += 2 elif float(gerp_base_score) >= 2: base_conservation_score += 1 return base_conservation_score
def check_phylop_score(phylop = None): """Score the variant based on the Phylop score.""" phylop_score = 0 if is_number.is_number(phylop): if float(phylop) >= 0.9984188612: phylop_score += 2 elif float(phylop) >= 0.95: phylop_score += 1 return phylop_score
def gtf_parser(self, line, info, line_count): """Parse a gtf line""" line = line.split('\t') if 'hr' in line[0]: info['chrom'] = line[0][3:] else: info['chrom'] = line[0] if is_number.is_number(line[3]) and is_number.is_number(line[4]): info['start'] = int(line[3]) info['stop'] = int(line[4]) info_field = line[8].split(';')[:-1] for information in info_field: entry = information.split() if entry[0] == 'transcript_id': info['transcript_id'] = entry[1][1:-1] if entry[0] == 'gene_id': info['gene_id'] = entry[1][1:-1] info['feature_id'] = info['gene_id'] return info
def get_freq_score(frequency): """Returns a score depending on the frequency""" if is_number.is_number(frequency): if float(frequency) <= 0.005: return 2 elif float(frequency) <= 0.02: return 1 #If common variant: else: return -12 else: # If not existing in database return 3
def get_freq_score(frequency): """Returns a score depending on the frequency""" if is_number.is_number(frequency): if float(frequency) <= 0.005: return 2 elif float(frequency) <= 0.02: return 1 #If common variant: else: return -12 else:# If not existing in database return 3
def main(): parser = argparse.ArgumentParser(description="Check files.") parser.add_argument('infile', type=str, nargs=1, help='Specify the path to the file of interest.') parser.add_argument('-out', '--outfile', type=str, nargs=1, default=[None], help='Specify the path to the outfile.') args = parser.parse_args() infile = args.infile[0] new_file = NamedTemporaryFile(delete=False) with open(infile, 'rb') as f: for line in f: if not line.startswith('#'): new_file.write(line) for line in new_file.readlines(): if not is_number(line.rstrip().split('\t')[-1]): print 'du', line print 'no errors' fs = FileSort(new_file, args.outfile[0]) fs.sort()
def test_integer(): """Try if is_number behaves correct when given an integer.""" my_integer_string = '1' my_integer = 1 assert is_number(my_integer_string) assert is_number(my_integer)
def test_zero(): """Try how is_number behaves when given zero.""" my_zero = 0 my_zero_string = '0' assert is_number(my_zero) assert is_number(my_zero_string)
def test_float(): """Try is is_number behaves correct when given a float.""" my_float = 0.22 my_float_string = '0.22' assert is_number(my_float_string) assert is_number(my_float)