def tiers2xls(tier_dirs, output_dir): # make new excel output worksheet wb = xlwt.Workbook() for dir in tier_dirs: dirname = general_utils.get_file_or_dir_name(dir) sheetname = dirname sheet = wb.add_sheet(sheetname) rowNum = 0 # number of row to write to within excel sheet tier0file = os.path.join(dir, 'tiering_allvars.tier0.txt') tier1file = os.path.join(dir, 'tiering_allvars.tier1.txt') tier2file = os.path.join(dir, 'tiering_allvars.tier2.txt') tier3file = os.path.join(dir, 'tiering_allvars.tier3.txt') tier4file = os.path.join(dir, 'tiering_allvars.tier4.txt') tierFiles = [tier0file, tier1file, tier2file, tier3file, tier4file] for idx,tierFile in enumerate(tierFiles): tierh = open(tierFile, 'r') if(idx != 0): # skip header for all files except the first one tierh.readline() for line in tierh: lineContents = line.rstrip("\n").split("\t") for col,value in enumerate(lineContents): sheet.write(rowNum, col, value) rowNum += 1 #save the excel file out_path = os.path.join(output_dir, 'tiered_output.xls') wb.save(out_path) return out_path
def tiers2xls(tier_dirs, output_dir, yaml_commands): print 'Saving tiered variants to Excel worksheet' # make new excel output worksheet wb = xlwt.Workbook( encoding='latin-1' ) # needed since our SQLite database gives results using latin-1 encoding (so we'd otherwise get UnicodeDecode errors when attempting to decode certain characters if we specified nothing (default is probably ascii) or (probably) UTF-8 as the encoding for the workbook). for dir in tier_dirs: dirname = general_utils.get_file_or_dir_name(dir) sheetname = dirname sheet = wb.add_sheet(sheetname) rowNum = 0 # number of row to write to within excel sheet tier0file = os.path.join(dir, 'tiering_allvars.tier0.txt') tier1file = os.path.join(dir, 'tiering_allvars.tier1.txt') tier2file = os.path.join(dir, 'tiering_allvars.tier2.txt') tier3file = os.path.join(dir, 'tiering_allvars.tier3.txt') tier4file = os.path.join(dir, 'tiering_allvars.tier4.txt') tierFiles = [tier0file, tier1file, tier2file, tier3file, tier4file] for idx, tierFile in enumerate(tierFiles): tierh = open(tierFile, 'r') if (idx != 0): # skip header for all files except the first one tierh.readline() warned_of_tier_variants_exceeded = False for lineNum, line in enumerate(tierh): lineContents = line.rstrip("\n").split("\t") for col, value in enumerate(lineContents): if (len(value) > 32767): print 'warning: tierFile ' + str( tierFile) + ' line ' + str( lineNum) + ' col ' + str( col) + ' has length ' + str( len(value) ) + ' longer than 32767 chars. Truncating.' sheet.write(rowNum, col, value[:32767]) elif (lineNum > yaml_commands[yaml_keys.kModules][yaml_keys.kTiering] [yaml_keys.kTMaxNumVariantsPerTier]): if (not warned_of_tier_variants_exceeded): print 'warning: number of variants exceeded for ' + str( tierFile ) + '. Omitting remaining variants in this tier from XLS file. Current max variants per tier: ' + str( yaml_commands[yaml_keys.kModules][ yaml_keys.kTiering][ yaml_keys.kTMaxNumVariantsPerTier] ) + '. To output more variants per tier, modify this value in modules.yml.' warned_of_tier_variants_exceeded = True continue # don't increment the rowNum since we didn't write anything else: sheet.write(rowNum, col, value) rowNum += 1 #save the excel file out_path = os.path.join(output_dir, 'tiered_output.xls') wb.save(out_path) return out_path
def tiers2xls(tier_dirs, output_dir, yaml_commands): print 'Saving tiered variants to Excel worksheet' # make new excel output worksheet wb = xlwt.Workbook(encoding='latin-1') # needed since our SQLite database gives results using latin-1 encoding (so we'd otherwise get UnicodeDecode errors when attempting to decode certain characters if we specified nothing (default is probably ascii) or (probably) UTF-8 as the encoding for the workbook). for dir in tier_dirs: dirname = general_utils.get_file_or_dir_name(dir) sheetname = dirname sheet = wb.add_sheet(sheetname) rowNum = 0 # number of row to write to within excel sheet tier0file = os.path.join(dir, 'tiering_allvars.tier0.txt') tier1file = os.path.join(dir, 'tiering_allvars.tier1.txt') tier2file = os.path.join(dir, 'tiering_allvars.tier2.txt') tier3file = os.path.join(dir, 'tiering_allvars.tier3.txt') tier4file = os.path.join(dir, 'tiering_allvars.tier4.txt') tierFiles = [tier0file, tier1file, tier2file, tier3file, tier4file] for idx,tierFile in enumerate(tierFiles): tierh = open(tierFile, 'r') if(idx != 0): # skip header for all files except the first one tierh.readline() warned_of_tier_variants_exceeded = False for lineNum,line in enumerate(tierh): lineContents = line.rstrip("\n").split("\t") for col,value in enumerate(lineContents): if(len(value) > 32767): print 'warning: tierFile ' + str(tierFile) + ' line ' + str(lineNum) + ' col ' + str(col) + ' has length ' + str(len(value)) + ' longer than 32767 chars. Truncating.' sheet.write(rowNum, col, value[:32767]) elif(lineNum > yaml_commands[yaml_keys.kModules][yaml_keys.kTiering][yaml_keys.kTMaxNumVariantsPerTier]): if(not warned_of_tier_variants_exceeded): print 'warning: number of variants exceeded for ' + str(tierFile) + '. Omitting remaining variants in this tier from XLS file. Current max variants per tier: ' + str(yaml_commands[yaml_keys.kModules][yaml_keys.kTiering][yaml_keys.kTMaxNumVariantsPerTier]) + '. To output more variants per tier, modify this value in modules.yml.' warned_of_tier_variants_exceeded = True continue # don't increment the rowNum since we didn't write anything else: sheet.write(rowNum, col, value) rowNum += 1 #save the excel file out_path = os.path.join(output_dir, 'tiered_output.xls') wb.save(out_path) return out_path