Exemplo n.º 1
0
    def mark_obsolete(self, accession):
        """
        Mark Genestack file as obsolete one by adding corresponding key to metainfo.

        :param accession: accession of file
        :return: None
        """
        metainfo = Metainfo()
        metainfo.add_boolean('genestack:obsolete', True)
        self.add_metainfo_values(accession, metainfo)
    def mark_obsolete(self, accession):
        """
        Mark Genestack file as obsolete one by adding corresponding key to metainfo.

        :param accession: accession of file
        :return: None
        """
        metainfo = Metainfo()
        metainfo.add_boolean('genestack:obsolete', True)
        self.add_metainfo_values(accession, metainfo)
Exemplo n.º 3
0
    def mark_for_tests(self, app_file):
        """
        Mark Genestack file as test one by adding corresponding key to metainfo.
        Test file will calculate md5 checksums of its encapsulated physical files
        during initialization.

        :param app_file: accession of file
        :return: None
        """
        metainfo = Metainfo()
        metainfo.add_boolean(CALCULATE_CHECKSUMS_KEY, True)
        self.add_metainfo_values(app_file, metainfo)
    def mark_for_tests(self, app_file):
        """
        Mark Genestack file as test one by adding corresponding key to metainfo.
        Test file will calculate md5 checksums of its encapsulated physical files
        during initialization.

        :param app_file: accession of file
        :return: None
        """
        metainfo = Metainfo()
        metainfo.add_boolean(CALCULATE_CHECKSUMS_KEY, True)
        self.add_metainfo_values(app_file, metainfo)
    def _fill_dataset_metainfo(dataset_metainfo, name):
        dataset_metainfo = dataset_metainfo or Metainfo()
        if Metainfo.NAME in dataset_metainfo:
            raise GenestackException(
                'Provided metainfo must not have "%s" field set' %
                Metainfo.NAME)

        dataset_metainfo.add_string(Metainfo.NAME, name)
        return dataset_metainfo
Exemplo n.º 6
0
    def add_checksums(self, app_file, expected_checksums):
        """
        Add expected MD5 checksum to the metainfo of a CLA file.
        Expected checksums are calculated in the following way:

            - The number of checksums equals number of entries in storage.
              For instance, a Reference Genome file has 2 entries (annotation and sequence files).
            - If there are multiple files in one entry, they will be concatenated in the same order
              as they were ``PUT`` to storage by the initialization script.
            - If a file is marked for testing, then after initialization its metainfo
              will contain both expected and actual checksum values.

        :param app_file: accession of application file
        :param expected_checksums: collection of MD5 checksums
        :return: None
        """
        metainfo = Metainfo()
        for key, value in expected_checksums.items():
            metainfo.add_string('%s%s' % (EXPECTED_CHECKSUM_PREFIX, key), value)
        self.add_metainfo_values(app_file, metainfo)
    def add_checksums(self, app_file, expected_checksums):
        """
        Add expected MD5 checksum to the metainfo of a CLA file.
        Expected checksums are calculated in the following way:

            - The number of checksums equals number of entries in storage.
              For instance, a Reference Genome file has 2 entries (annotation and sequence files).
            - If there are multiple files in one entry, they will be concatenated in the same order
              as they were ``PUT`` to storage by the initialization script.
            - If a file is marked for testing, then after initialization its metainfo
              will contain both expected and actual checksum values.

        :param app_file: accession of application file
        :param expected_checksums: collection of MD5 checksums
        :return: None
        """
        metainfo = Metainfo()
        for key, value in expected_checksums.items():
            metainfo.add_string('%s%s' % (EXPECTED_CHECKSUM_PREFIX, key),
                                value)
        self.add_metainfo_values(app_file, metainfo)
Exemplo n.º 8
0
    def collect_metainfos(self, accessions):
        """
        Get complete metainfo of a list of files.

        :param accessions: list of accessions
        :type accessions: list[str]
        :return: list of metainfo objects
        :rtype: list[Metainfo]
        """
        return [
            Metainfo.parse_metainfo_from_dict(mi)
            for mi in self.invoke('getMetainfo', accessions)
        ]
Exemplo n.º 9
0
    def create_folder(self,
                      name,
                      parent=None,
                      description=None,
                      metainfo=None):
        """
        Create a folder.

        :param name: name of the folder
        :type name: str
        :param parent: if not specified, create folder in the user's private folder
        :type parent: str
        :param description: description of the folder (goes into the metainfo)
        :type description: str
        :param metainfo: additional Metainfo. Description and accession should be specified either via arguments or in a metainfo object (but not in both).
        :type metainfo: Metainfo
        :return: accession of created folder
        """
        metainfo = metainfo or Metainfo()
        metainfo.add_string(Metainfo.NAME, name)
        if description is not None:
            metainfo.add_string(Metainfo.DESCRIPTION, description)
        return self.invoke('createFolder', parent, metainfo)
Exemplo n.º 10
0
 def _copy_metainfo(metainfo):
     return Metainfo(metainfo) if metainfo else Metainfo()
Exemplo n.º 11
0
created_pairs = set()

# parse the CSV file
with open(tsv_input, 'r') as the_file:
    reader = csv.DictReader(the_file, dialect='excel_tab')
    field_names = reader.fieldnames

    for file_data in reader:

        # skip the entry if the file was already included in a previously created paired-end assay
        if file_data[FILE_ACCESSION] in created_pairs:
            continue

        # for each entry, prepare a Metainfo object
        metainfo = Metainfo()
        for key in VALID_FIELDS.keys():
            metainfo.add_string(VALID_FIELDS.get(key) or key, file_data[key])
        metainfo.add_external_link(BioMetaKeys.READS_LINK, ENCODE_URL_PATTERN.format(file_data[FILE_ACCESSION]))

        if file_data.get(PAIRED_ACCESSION):
            # add URL of second mate if the reads are paired-end
            metainfo.add_string(FILE_ACCESSION, PAIRED_ACCESSION)
            metainfo.add_external_link(BioMetaKeys.READS_LINK, ENCODE_URL_PATTERN.format(file_data[PAIRED_ACCESSION]))
            created_pairs.add(file_data[PAIRED_ACCESSION])

        # create the sequencing assay on Genestack
        created_file = importer.create_sequencing_assay(experiment, metainfo=metainfo)

        print('Created file "%s" (%s)' % (file_data[FILE_ACCESSION], created_file))

# parse the CSV file
with open(csv_input, 'r') as the_file:
    reader = csv.DictReader(the_file, delimiter=",")
    field_names = reader.fieldnames

    # check if mandatory keys are in the CSV file
    for mandatory_key in MANDATORY_KEYS:
        if mandatory_key not in field_names:
            raise GenestackException("The key '%s' must be supplied in the CSV file" % mandatory_key)

    for file_data in reader:

        # for each entry, prepare a Metainfo object
        metainfo = Metainfo()
        for key in field_names:
            # 'link' and 'organism' are treated separately, as they are added to the metainfo using specific methods
            if key == "link":
                url = file_data[key]
                metainfo.add_external_link(key=BioMetaKeys.READS_LINK, text="link", url=url, fmt=file_format)
            elif key == "organism":
                metainfo.add_string(BioMetaKeys.ORGANISM, file_data[key])
            # all the other keys are added as strings
            else:
                metainfo_key = SPECIAL_KEYS.get(key.lower(), key)
                metainfo.add_string(metainfo_key, file_data[key])

        # create the sequencing assay on Genestack
        created_file = importer.create_sequencing_assay(experiment, metainfo=metainfo)
        if args.local_key not in field_names:
            raise GenestackException(
                "Error: the local key %s is not present in the supplied CSV file"
                % args.local_key)

        for file_data in reader:
            # find the corresponding file
            local_identifier = file_data[local_key]
            remote_file = identifier_map.get(local_identifier)
            if not remote_file:
                print('Warning: no match found for file name "%s"' %
                      local_identifier)
                continue

            # prepare a Metainfo object
            metainfo = Metainfo()
            for key in field_names:
                # key parsing logic
                value = file_data[key]
                if value == "" or value is None:
                    continue
                if key == args.local_key:
                    continue
                if key == "organism":
                    metainfo.add_string(BioMetaKeys.ORGANISM, value)
                else:
                    metainfo_key = SPECIAL_KEYS.get(key.lower(), key)
                    if parse_as_boolean(value) is not None:
                        metainfo.add_boolean(metainfo_key,
                                             parse_as_boolean(value))
                    else:
Exemplo n.º 14
0
def test_metainfo_io(conn):
    data_importer = DataImporter(conn)
    fu = FilesUtil(conn)

    created = fu.get_special_folder(SpecialFolders.CREATED)
    info = Metainfo()
    info.add_boolean("a", True)
    info.add_file_reference("b", created)
    info.add_date_time("c", "2015-12-13")
    info.add_integer("d", 239)
    info.add_decimal("e", 238.583)
    info.add_decimal("e", -13.4)
    info.add_string("f", "hello")
    info.add_memory_size("g", 2847633)
    info.add_person("i", "Rosalind Franklin", "+1-202-555-0123",
                    "*****@*****.**")
    info.add_publication("j",
                         "My Publication",
                         "Myself",
                         "Journal of Me",
                         "23/12/2014",
                         pages="12-23")
    info.add_value(Metainfo.NAME, StringValue("Test report file"))
    report_file = None
    try:
        report_file = data_importer.create_report_file(metainfo=info,
                                                       urls=[TEST_URL],
                                                       parent=created)
        metainfo = next(iter(fu.collect_metainfos([report_file])))
        assert metainfo.get('a')[0].get_boolean()
        assert isinstance(metainfo.get('b')[0].get_accession(), str)
        assert metainfo.get('c')[0].get_date() == _strptime_local(
            '2015-12-13', '%Y-%m-%d')
        assert metainfo.get('d')[0].get_int() == 239
        assert metainfo.get('e')[0].get_decimal() == 238.583
        assert metainfo.get('e')[1].get_decimal() == -13.4
        assert metainfo.get('f')[0].get_string() == "hello"
        assert metainfo.get('g')[0].get_int() == 2847633
        assert metainfo.get('i')[0].get_person() == {
            'name': 'Rosalind Franklin',
            'phone': '+1-202-555-0123',
            'email': '*****@*****.**'
        }
        assert metainfo.get('j')[0].get_publication() == {
            'title': 'My Publication',
            'authors': 'Myself',
            'journalName': 'Journal of Me',
            'issueDate': '23/12/2014',
            'pages': '12-23',
            'issueNumber': None,
            'identifiers': {}
        }
        assert metainfo.get(
            Metainfo.NAME)[0].get_string() == "Test report file"
        assert metainfo.get(BioMetaKeys.DATA_LINK)[0].get_url() == TEST_URL
    finally:
        if report_file is not None:
            fu.unlink_file(report_file, created)
Exemplo n.º 15
0
def test_metainfo_io(conn):
    data_importer = DataImporter(conn)
    fu = FilesUtil(conn)

    created = fu.get_special_folder(SpecialFolders.CREATED)
    info = Metainfo()
    info.add_boolean("a", True)
    info.add_file_reference("b", created)
    info.add_date_time("c", "2015-12-13")
    info.add_integer("d", 239)
    info.add_decimal("e", 238.583)
    info.add_decimal("e", -13.4)
    info.add_string("f", "hello")
    info.add_memory_size("g", 2847633)
    info.add_person("i", "Rosalind Franklin", "+1-202-555-0123", "*****@*****.**")
    info.add_publication("j", "My Publication", "Myself", "Journal of Me", "23/12/2014", pages="12-23")
    info.add_value(Metainfo.NAME, StringValue("Test report file"))
    report_file = None
    try:
        report_file = data_importer.create_report_file(metainfo=info, urls=[TEST_URL], parent=created)
        metainfo = next(iter(fu.collect_metainfos([report_file])))
        assert metainfo.get('a')[0].get_boolean()
        assert isinstance(metainfo.get('b')[0].get_accession(), str)
        assert metainfo.get('c')[0].get_date() == _strptime_local('2015-12-13', '%Y-%m-%d')
        assert metainfo.get('d')[0].get_int() == 239
        assert metainfo.get('e')[0].get_decimal() == 238.583
        assert metainfo.get('e')[1].get_decimal() == -13.4
        assert metainfo.get('f')[0].get_string() == "hello"
        assert metainfo.get('g')[0].get_int() == 2847633
        assert metainfo.get('i')[0].get_person() == {'name': 'Rosalind Franklin', 'phone': '+1-202-555-0123',
                                                     'email': '*****@*****.**'}
        assert metainfo.get('j')[0].get_publication() == {'title': 'My Publication', 'authors': 'Myself',
                                                          'journalName': 'Journal of Me', 'issueDate': '23/12/2014',
                                                          'pages': '12-23', 'issueNumber': None, 'identifiers': {}}
        assert metainfo.get(Metainfo.NAME)[0].get_string() == "Test report file"
        assert metainfo.get(BioMetaKeys.DATA_LINK)[0].get_url() == TEST_URL
    finally:
        if report_file is not None:
            fu.unlink_file(report_file, created)
Exemplo n.º 16
0
def metainfo():
    return Metainfo()
        reader = csv.DictReader(the_file, delimiter=",")
        field_names = reader.fieldnames

        if args.local_key not in field_names:
            raise GenestackException("Error: the local key %s is not present in the supplied CSV file" % args.local_key)

        for file_data in reader:
            # find the corresponding file
            local_identifier = file_data[local_key]
            remote_file = identifier_map.get(local_identifier)
            if not remote_file:
                print('Warning: no match found for file name "%s"' % local_identifier)
                continue

            # prepare a Metainfo object
            metainfo = Metainfo()
            for key in field_names:
                # key parsing logic
                value = file_data[key]
                if value == "" or value is None:
                    continue
                if key == args.local_key:
                    continue
                if key == "organism":
                    metainfo.add_string(BioMetaKeys.ORGANISM, value)
                else:
                    metainfo_key = SPECIAL_KEYS.get(key.lower(), key)
                    if parse_as_boolean(value) is not None:
                        metainfo.add_boolean(metainfo_key, parse_as_boolean(value))
                    else:
                        metainfo.add_string(metainfo_key, value)
Exemplo n.º 18
0

# parse the CSV file
with open(csv_input, 'r') as the_file:
    reader = csv.DictReader(the_file, delimiter=",")
    field_names = reader.fieldnames

    # check if mandatory keys are in the CSV file
    for mandatory_key in MANDATORY_KEYS:
        if mandatory_key not in field_names:
            raise GenestackException("The key '%s' must be supplied in the CSV file" % mandatory_key)

    for file_data in reader:

        # for each entry, prepare a Metainfo object
        metainfo = Metainfo()
        for key in field_names:
            # 'link' and 'organism' are treated separately, as they are added to the metainfo using specific methods
            if key == "link":
                url = file_data[key]
                metainfo.add_external_link(key=BioMetaKeys.READS_LINK, text="link", url=url, fmt=file_format)
            elif key == "organism":
                metainfo.add_string(BioMetaKeys.ORGANISM, file_data[key])
            # all the other keys are added as strings
            else:
                metainfo_key = SPECIAL_KEYS.get(key.lower(), key)
                metainfo.add_string(metainfo_key, file_data[key])

        # create the sequencing assay on Genestack
        created_file = importer.create_sequencing_assay(experiment, metainfo=metainfo)