def create_mapped_reads_count(self, parent=None, name=None, url=None, reference_genome=None, metainfo=None): """ Create a Mapped Reads Count file from a local or remote mapped reads count file. :param parent: accession of parent folder leave empty for ``Imported files`` :type parent: str :param name: name of the file :type name: str :param url: URL of a file :param reference_genome: reference genome accession :type reference_genome: str :param metainfo: metainfo object :type metainfo: BioMetainfo :return: file accession :rtype: str """ metainfo = metainfo or BioMetainfo() name and metainfo.add_string(BioMetainfo.NAME, name) reference_genome and metainfo.add_file_reference( BioMetainfo.REFERENCE_GENOME, reference_genome) url and metainfo.add_external_link(BioMetainfo.DATA_LINK, url) return self.__invoke_loader('genestack/mappedReadsCountLoader', 'importFile', parent, metainfo)
def create_microarray_assay(self, parent, name=None, urls=None, method=None, organism=None, metainfo=None): """ Create a Genestack Microarray Assay inside an Experiment folder. If ``parent`` is not an Experiment, an exception will be raised. ``name`` and ``urls`` are required fields. They can be specified through the arguments or via a metainfo object. :param parent: accession of parent experiment :type parent: str :param name: name of the file :type name: str :param urls: list of urls :type urls: list :param method: method :type method: str :param organism: organism :type organism: str :param metainfo: metainfo object :type metainfo: BioMetainfo :return: file accession :rtype: str """ metainfo = metainfo or BioMetainfo() name and metainfo.add_string(BioMetainfo.NAME, name) organism and metainfo.add_organism(BioMetainfo.ORGANISM, organism) method and metainfo.add_string(BioMetainfo.METHOD, method) if urls: for url in urls: metainfo.add_external_link(BioMetainfo.READS_LINK, url) return self.__invoke_loader('genestack/experimentLoader', 'addMicroarrayAssay', parent, metainfo)
def create_report_file(self, parent=None, name=None, urls=None, metainfo=None): """ Create a Genestack Report File from a local or remote data file. ``name`` and ``urls`` are required fields. They can be specified through the arguments or via a metainfo object. :param parent: accession of parent folder leave empty for ``Imported files`` :type parent: str :param name: name of the file :type name: str :param urls: URL or list of URLs of local file paths :type urls: list or str :param metainfo: metainfo object :type metainfo: BioMetainfo :return: file accession :rtype: str """ metainfo = metainfo or BioMetainfo() name and metainfo.add_string(BioMetainfo.NAME, name) if urls: for url in urls: metainfo.add_external_link(BioMetainfo.DATA_LINK, url) return self.__invoke_loader('genestack/reportLoader', 'importFile', parent, metainfo)
def create_dbnsfp(self, parent=None, url=None, name=None, organism=None, metainfo=None): """ Create a Genestack Variation Database file. ``name`` and ``url`` are required fields. They can be specified through the arguments or via a metainfo object. :param parent: accession of parent folder leave empty for ``Imported files`` :type parent: str :param url: URL or local path :type url: str :param name: name of the file :type name: str :param organism: organism :type organism: str :param metainfo: metainfo object :type metainfo: BioMetainfo :return: file accession :rtype: str """ metainfo = metainfo or BioMetainfo() metainfo.add_string(BioMetainfo.DATABASE_ID, 'dbNSFP') name and metainfo.add_string(BioMetainfo.NAME, name) organism and metainfo.add_organism(BioMetainfo.ORGANISM, organism) if url: metainfo.add_external_link(BioMetainfo.DATA_LINK, url) return self.__invoke_loader('genestack/variationDatabaseLoader', 'addDbNSFP', parent, metainfo)
def create_experiment(self, parent=None, name=None, description=None, metainfo=None): """ Create a Genestack Experiment. The ``name`` parameter is required. It can be specified through the arguments or via a metainfo object. :param parent: accession of parent folder. Leave empty for ``Imported files`` :type parent: str :param name: name of the file :type name: str :param description: experiment description :type description: str :param metainfo: metainfo object :type metainfo: BioMetainfo :return: file accession :rtype: str """ metainfo = metainfo or BioMetainfo() name and metainfo.add_string(BioMetainfo.NAME, name) description and metainfo.add_string(BioMetainfo.DESCRIPTION, description) return self.__invoke_loader('genestack/experimentLoader', 'addExperiment', parent, metainfo)
def create_wig(self, parent=None, name=None, reference_genome=None, url=None, metainfo=None): """ Create a Genestack Wiggle Track from a local or remote WIG file. ``name`` and ``url`` are required fields. They can be specified through the arguments or via a metainfo object. :param parent: accession of parent folder. Leave empty for ``Imported files`` :type parent: str :param name: name of the file :type name: str :param reference_genome: accession of reference genome :type reference_genome: str :param url: URL or local path to file :type url: str :param metainfo: metainfo object :type metainfo: BioMetainfo :return: file accession :rtype: str """ metainfo = metainfo or BioMetainfo() name and metainfo.add_string(BioMetainfo.NAME, name) reference_genome and metainfo.add_file_reference( BioMetainfo.REFERENCE_GENOME, reference_genome) url and metainfo.add_external_link(BioMetainfo.DATA_LINK, url) return self.__invoke_loader('genestack/wigLoader', 'importFile', parent, metainfo)
def create_reference_genome(self, parent=None, name=None, description='', sequence_urls=None, annotation_url=None, organism=None, assembly=None, release=None, strain=None, metainfo=None): """ Create a Genestack Reference Genome from a collection of local or remote FASTA sequence files, and a GTF or GFF annotation file. :param parent: accession of parent folder leave empty for ``Imported files`` :type parent: str :param name: name of the file :type name: str :param description: experiment description :type description: str :param sequence_urls: list urls or local path to sequencing files. :type sequence_urls: list :param annotation_url: url to annotation file :type annotation_url: str :param organism: organism :type organism: str :param assembly: assembly :type assembly: str :param release: release :type release: str :param strain: strain :type strain: str :param metainfo: metainfo object :type metainfo: BioMetainfo :return: """ metainfo = metainfo or BioMetainfo() name and metainfo.add_string(BioMetainfo.NAME, name) organism and metainfo.add_organism(BioMetainfo.ORGANISM, organism) strain and metainfo.add_string(BioMetainfo.STRAIN, strain) assembly and metainfo.add_string(BioMetainfo.REFERENCE_GENOME_ASSEMBLY, assembly) release and metainfo.add_string(BioMetainfo.REFERENCE_GENOME_RELEASE, release) annotation_url and metainfo.add_external_link( ANNOTATION_KEY, annotation_url, text='Annotations data link') metainfo.add_string(metainfo.DESCRIPTION, description or '') for seq_link in sequence_urls: metainfo.add_external_link(SEQUENCE_KEY, seq_link, text='Sequence data link') return self.__invoke_loader('genestack/referenceGenomeLoader', 'importFile', parent, metainfo)
def create_genome_annotation(self, parent=None, url=None, name=None, organism=None, reference_genome=None, strain=None, metainfo=None): """ Create a Genestack Genome Annotation file from a local or remote file. ``name`` and ``url`` are required fields. They can be specified through the arguments or via a metainfo object. :param parent: accession of parent folder. Leave empty for ``Imported files`` :type parent: str :param url: URL or local path :type url: str :param name: name of the file :type name: str :param organism: organism :type organism: str :param reference_genome: reference genome accession :type reference_genome: str :param strain: strain :type strain: str :param metainfo: metainfo object :type metainfo: BioMetainfo :return: file accession :rtype: str """ metainfo = metainfo or BioMetainfo() name and metainfo.add_string(BioMetainfo.NAME, name) organism and metainfo.add_organism(BioMetainfo.ORGANISM, organism) strain and metainfo.add_string(BioMetainfo.STRAIN, strain) reference_genome and metainfo.add_file_reference( BioMetainfo.REFERENCE_GENOME, reference_genome) if url: metainfo.add_external_link(BioMetainfo.DATA_LINK, url) return self.__invoke_loader('genestack/genome-annotation-loader', 'addGOAnnotationFile', parent, metainfo)
def create_unaligned_read(self, parent=None, name=None, urls=None, method=None, organism=None, metainfo=None): """ Create a Genestack Unaligned Reads file from one or several local or remote files. Most common file formats encoding sequencing reads with quality scores are accepted (FASTQ 33/64, SRA, FASTA+QUAL, SFF, FAST5). ``name`` and ``urls`` are required fields. They can be specified through the arguments or via a metainfo object. :param parent: accession of parent folder leave empty for ``Imported files`` :type parent: str :param name: name of the file :type name: str :param urls: list of urls :type urls: list :param method: method :type method: str :param organism: organism :type organism: str :param metainfo: metainfo object :type metainfo: BioMetainfo :return: file accession :rtype: str """ metainfo = metainfo or BioMetainfo() name and metainfo.add_string(BioMetainfo.NAME, name) organism and metainfo.add_organism(BioMetainfo.ORGANISM, organism) method and metainfo.add_string(BioMetainfo.METHOD, method) if urls: for url in urls: metainfo.add_external_link(BioMetainfo.READS_LINK, url) return self.__invoke_loader('genestack/unalignedReadsLoader', 'importFile', parent, metainfo)
def create_bam(self, parent=None, name=None, url=None, metainfo=None, organism=None, strain=None, reference_genome=None): """ Create a Genestack Aligned Reads file from a local or remote BAM file. :param parent: accession of parent folder leave empty for ``Imported files`` :type parent: str :param name: name of the file :type name: str :param url: URL of a BAM file; the index will be created at initialization :param metainfo: metainfo object :type metainfo: BioMetainfo :param organism: organism :type organism: str :param strain: strain :type strain: :param reference_genome: reference genome accession :type reference_genome: str :return: file accession :rtype: str """ metainfo = metainfo or BioMetainfo() name and metainfo.add_string(BioMetainfo.NAME, name) organism and metainfo.add_organism(BioMetainfo.ORGANISM, organism) strain and metainfo.add_string(BioMetainfo.STRAIN, strain) reference_genome and metainfo.add_file_reference( BioMetainfo.REFERENCE_GENOME, reference_genome) url and metainfo.add_external_link(BioMetainfo.BAM_FILE_LINK, url) return self.__invoke_loader('genestack/alignedReadsLoader', 'importFile', parent, metainfo)
# parse the CSV file with open(csv_input, 'r') as the_file: reader = csv.DictReader(the_file, delimiter=",") field_names = reader.fieldnames # check if mandatory keys are in the CSV file for mandatory_key in MANDATORY_KEYS: if mandatory_key not in field_names: raise GenestackException( "The key '%s' must be supplied in the CSV file" % mandatory_key) for file_data in reader: # for each entry, prepare a BioMetainfo object metainfo = BioMetainfo() for key in field_names: # 'link' and 'organism' are treated separately, as they are added to the metainfo using specific methods if key == "link": url = file_data[key] metainfo.add_external_link(key=BioMetainfo.READS_LINK, text="link", url=url, fmt=file_format) elif key == "organism": metainfo.add_organism(BioMetainfo.ORGANISM, file_data[key]) # all the other keys are added as strings else: metainfo_key = SPECIAL_KEYS.get(key.lower(), key) metainfo.add_string(metainfo_key, file_data[key])
reader = csv.DictReader(the_file, delimiter=",") field_names = reader.fieldnames if args.local_key not in field_names: raise GenestackException("Error: the local key %s is not present in the supplied CSV file" % args.local_key) for file_data in reader: # find the corresponding file local_identifier = file_data[local_key] remote_file = identifier_map.get(local_identifier) if not remote_file: print "Warning: no match found for file name '%s'" % local_identifier continue # prepare a BioMetainfo object metainfo = BioMetainfo() for key in field_names: # key parsing logic value = file_data[key] if value == "" or value is None: continue if key == args.local_key: continue if key == "organism": metainfo.add_organism(BioMetainfo.ORGANISM, value) else: metainfo_key = SPECIAL_KEYS.get(key.lower(), key) if parse_as_boolean(value) is not None: metainfo.add_boolean(metainfo_key, parse_as_boolean(value)) else: metainfo.add_string(metainfo_key, value)
# parse the CSV file with open(csv_input, 'r') as the_file: reader = csv.DictReader(the_file, delimiter=",") field_names = reader.fieldnames # check if mandatory keys are in the CSV file for mandatory_key in MANDATORY_KEYS: if mandatory_key not in field_names: raise GenestackException("The key '%s' must be supplied in the CSV file" % mandatory_key) for file_data in reader: # for each entry, prepare a BioMetainfo object metainfo = BioMetainfo() for key in field_names: # 'link' and 'organism' are treated separately, as they are added to the metainfo using specific methods if key == "link": url = file_data[key] metainfo.add_external_link(key=BioMetainfo.READS_LINK, text="link", url=url, fmt=file_format) elif key == "organism": metainfo.add_organism(BioMetainfo.ORGANISM, file_data[key]) # all the other keys are added as strings else: metainfo_key = SPECIAL_KEYS.get(key.lower(), key) metainfo.add_string(metainfo_key, file_data[key]) # create the sequencing assay on Genestack created_file = importer.create_sequencing_assay(experiment, metainfo=metainfo)
def create_codon_table(self, parent=None, metainfo=None): metainfo = metainfo or BioMetainfo() return self.__invoke_loader('genestack/codonTableLoader', 'addCodonTable', parent, metainfo)
if args.local_key not in field_names: raise GenestackException( "Error: the local key %s is not present in the supplied CSV file" % args.local_key) for file_data in reader: # find the corresponding file local_identifier = file_data[local_key] remote_file = identifier_map.get(local_identifier) if not remote_file: print "Warning: no match found for file name '%s'" % local_identifier continue # prepare a BioMetainfo object metainfo = BioMetainfo() for key in field_names: # key parsing logic value = file_data[key] if value == "" or value is None: continue if key == args.local_key: continue if key == "organism": metainfo.add_organism(BioMetainfo.ORGANISM, value) else: metainfo_key = SPECIAL_KEYS.get(key.lower(), key) if parse_as_boolean(value) is not None: metainfo.add_boolean(metainfo_key, parse_as_boolean(value)) else: