def main(workspace, accessions_files): headers = [ "project_id", "accession_id", "species", "cell", "label", "readType", "qualities", "file_location", "dataType", "rnaExtract", "localization", "lab", "view", "type", "replicate", "file_not_found", "file_size", "configuration_file" ] template = '\t'.join(['%s'] * len(headers)) + '\n' output_file = open(os.path.join(workspace, "files.csv"), "w") output_file.write('\t'.join(headers) + '\n') for input_file in accessions_files: accession_file = open(input_file, 'r') accessions = parse_accession_file(accession_file) project_id = os.path.split(os.path.split(input_file)[0])[-1] files = extract_files(accessions) for accession_id, item in files: file_info = utils.file_info(item['file_location']) output_file.write( template % (project_id, accession_id, item.get( 'species', ''), item.get('cell', ''), item.get( 'label', ''), item.get('readType', ''), item.get('qualities', ''), item.get('file_location', ''), item.get('dataType', ''), item.get('rnaExtract', ''), item.get('localization', ''), item.get( 'lab', ''), item.get('view', ''), item.get('type', ''), item.get('replicate', ''), file_info['file_not_found'], file_info['file_size'], input_file)) accession_file.close() output_file.close()
def main(workspace, accessions_files): headers = ["project_id", "accession_id", "species", "cell", "label", "readType", "qualities", "file_location", "dataType", "rnaExtract", "localization", "lab", "view", "type", "replicate", "file_not_found", "file_size", "configuration_file" ] template = '\t'.join(['%s'] * len(headers)) + '\n' output_file = open(os.path.join(workspace, "files.csv"), "w") output_file.write('\t'.join(headers) + '\n') for input_file in accessions_files: accession_file = open(input_file, 'r') accessions = parse_accession_file(accession_file) project_id = os.path.split(os.path.split(input_file)[0])[-1] files = extract_files(accessions) for accession_id, item in files: file_info = utils.file_info(item['file_location']) output_file.write(template % (project_id, accession_id, item.get('species', ''), item.get('cell', ''), item.get('label', ''), item.get('readType', ''), item.get('qualities', ''), item.get('file_location', ''), item.get('dataType', ''), item.get('rnaExtract', ''), item.get('localization', ''), item.get('lab', ''), item.get('view', ''), item.get('type', ''), item.get('replicate', ''), file_info['file_not_found'], file_info['file_size'], input_file )) accession_file.close() output_file.close()
def main(workspace, annotations_file): headers = ("species", "version", "url", "file_location", "file_not_found", "file_size") template = '\t'.join(['%s'] * len(headers)) + '\n' output_file = open(os.path.join(workspace, "annotations.csv"), "w") output_file.write(template % headers) parser = ConfigParser.RawConfigParser() parser.optionxform = lambda s: s parser.readfp(annotations_file) for section in parser.sections(): data = dict(parser.items(section)) data.update(utils.file_info(data['file_location'])) output_file.write(template % tuple([data[h] for h in headers])) annotations_file.close() output_file.close()
def main(workspace, genomes_file): headers = ("species", "version", "url", "file_location", "file_not_found", "file_size") template = '\t'.join(['%s'] * len(headers)) + '\n' output_file = open(os.path.join(workspace, "genomes.csv"), "w") output_file.write(template % headers) parser = ConfigParser.RawConfigParser() parser.optionxform = lambda s: s parser.readfp(genomes_file) for section in parser.sections(): data = dict(parser.items(section)) data.update(utils.file_info(data['file_location'])) output_file.write(template % tuple([data[h] for h in headers])) genomes_file.close() output_file.close()