示例#1
0
    def create_dataflow(self, accession, name=None):
        """
        Creates a data flow based on the file provenance of the specified file.
        The nodes of the data flow can be accessed by the accession of the corresponding files in the file provenance.

        :param accession: file accession
        :type accession: str
        :param name: data flow name
        :type name: str
        :return: accession of the created data flow file
        :rtype: str
        :raise GenestackException:
        """
        response = self.invoke('initializeApplicationState',
                               'createFromSources', accession)

        if response['type'] == 'newPage':
            accession = response['fileInfo']['accession']
        elif response['type'] == 'existingPages':
            # If file already exists we expect to get the last created file.
            # Existing page contains files from first to last (or MAX QUERY)
            # TODO: in case there are more files then MAX QUERY (100 ATM),
            # the last file in response will not be really last
            # (it is almost impossible use case, though)
            file_info = response['fileInfos'][-1]
            accession = file_info['accession']
        else:
            raise GenestackException("Unknown response type: %s" %
                                     response['type'])
        if name:
            FilesUtil(self.connection).replace_metainfo_string_value(
                [accession], Metainfo.NAME, name)
        return accession
def upload_files(connection, files, folder_name, folder_accession):
    """
    :param genestack_client.Connection connection:
    :param list[str] files:
    :param str folder_name:
    :param str folder_accession:
    """
    importer = DataImporter(connection)
    fu = FilesUtil(connection)
    upload = fu.get_special_folder(SpecialFolders.UPLOADED)
    if not folder_accession:
        folder_name = folder_name or datetime.now().strftime(
            'Upload %d.%m.%y %H:%M:%S')
        folder_accession = fu.create_folder(
            folder_name,
            parent=upload,
            description='Files uploaded by genestack-uploader')
    else:
        folder_name = fu.get_infos([folder_accession])[0]['name']
    accession_file_map = {}
    for f in files:
        accession = importer.load_raw(f)
        fu.link_file(accession, folder_accession)
        fu.unlink_file(accession, upload)
        accession_file_map[accession] = f
    return folder_accession, folder_name, accession_file_map
示例#3
0
def test_metainfo_io(conn):
    data_importer = DataImporter(conn)
    fu = FilesUtil(conn)

    created = fu.get_special_folder(SpecialFolders.CREATED)
    info = Metainfo()
    info.add_boolean("a", True)
    info.add_file_reference("b", created)
    info.add_date_time("c", "2015-12-13")
    info.add_integer("d", 239)
    info.add_decimal("e", 238.583)
    info.add_decimal("e", -13.4)
    info.add_string("f", "hello")
    info.add_memory_size("g", 2847633)
    info.add_person("i", "Rosalind Franklin", "+1-202-555-0123",
                    "*****@*****.**")
    info.add_publication("j",
                         "My Publication",
                         "Myself",
                         "Journal of Me",
                         "23/12/2014",
                         pages="12-23")
    info.add_value(Metainfo.NAME, StringValue("Test report file"))
    report_file = None
    try:
        report_file = data_importer.create_report_file(metainfo=info,
                                                       urls=[TEST_URL],
                                                       parent=created)
        metainfo = next(iter(fu.collect_metainfos([report_file])))
        assert metainfo.get('a')[0].get_boolean()
        assert isinstance(metainfo.get('b')[0].get_accession(), str)
        assert metainfo.get('c')[0].get_date() == _strptime_local(
            '2015-12-13', '%Y-%m-%d')
        assert metainfo.get('d')[0].get_int() == 239
        assert metainfo.get('e')[0].get_decimal() == 238.583
        assert metainfo.get('e')[1].get_decimal() == -13.4
        assert metainfo.get('f')[0].get_string() == "hello"
        assert metainfo.get('g')[0].get_int() == 2847633
        assert metainfo.get('i')[0].get_person() == {
            'name': 'Rosalind Franklin',
            'phone': '+1-202-555-0123',
            'email': '*****@*****.**'
        }
        assert metainfo.get('j')[0].get_publication() == {
            'title': 'My Publication',
            'authors': 'Myself',
            'journalName': 'Journal of Me',
            'issueDate': '23/12/2014',
            'pages': '12-23',
            'issueNumber': None,
            'identifiers': {}
        }
        assert metainfo.get(
            Metainfo.NAME)[0].get_string() == "Test report file"
        assert metainfo.get(BioMetaKeys.DATA_LINK)[0].get_url() == TEST_URL
    finally:
        if report_file is not None:
            fu.unlink_file(report_file, created)
    def __get_mydatasets_folder(self):
        """
        Get default folder for datasets.

        :return: default dataset folder accession
        :rtype: str
        """
        return FilesUtil(self.connection).get_special_folder(
            SpecialFolders.MY_DATASETS)
示例#5
0
def test_en_isoforms(conn, keep_files):
    fu = FilesUtil(conn)
    en = ExpressionNavigatorforIsoforms(conn)
    en_file = None
    try:
        groups = [{'accessions': accs} for accs in ISOFORM_GROUPS]
        en_file = en.create_file(groups, multi_mapping_corr=True)
    finally:
        if (not keep_files) and (en_file is not None):
            fu.unlink_file(en_file, fu.get_special_folder(SpecialFolders.CREATED))
示例#6
0
def test_en_rna_seq(conn, keep_files):
    fu = FilesUtil(conn)
    en = ExpressionNavigatorforGenes(conn)
    en_file = None
    try:
        groups = [{'accessions': accs} for accs in RNA_SEQ_GROUPS]
        en_file = en.create_file(groups, r_package=en.PKG_DESEQ, organism="new organism")
    finally:
        if (not keep_files) and (en_file is not None):
            fu.unlink_file(en_file, fu.get_special_folder(SpecialFolders.CREATED))
示例#7
0
    def __init__(self, cla, base_folder, friendly_name, custom_args=None):
        """
        Constructor of the general batch files creator, to create multiple files from a CLA.

        :param cla: a ``CLApplication`` object, wrapper for the corresponding CLA
        :param base_folder: accession of the base folder where the pipeline files will be organised into subfolders
        :param friendly_name: user-friendly name of the files produced by the app ; used in the on-screen statements
        and in the name of the project subfolders
        :param custom_args: list of custom command-line argument strings for the files. Default is ``None``
        """

        self._cla = cla
        self._files_util = FilesUtil(cla.connection)
        self._base_folder = base_folder
        self._friendly_name = friendly_name
        self._custom_args = custom_args
示例#8
0
def test_en_microarrays(conn, keep_files):
    fu = FilesUtil(conn)
    en = ExpressionNavigatorforMicroarrays(conn)
    norm_app = AffymetrixMicroarraysNormalizationApplication(conn)
    en_file = None
    norm_file = None
    try:
        groups = [{'accessions': accs} for accs in MICROARRAY_GROUPS]
        groups[0]['is_control'] = True
        norm_file = norm_app.create_file([f for group in MICROARRAY_GROUPS for f in group])
        en_file = en.create_file(groups, norm_file, RAT_AFFY_ANNOTATION)
    finally:
        if not keep_files:
            created = fu.get_special_folder(SpecialFolders.CREATED)
            for f in (norm_file, en_file):
                if f is not None:
                    fu.unlink_file(f, created)
示例#9
0
    def create_file(self,
                    source_files,
                    name=None,
                    params=None,
                    calculate_checksums=False,
                    expected_checksums=None,
                    initialize=False):
        """
        Create a native Genestack file with the application and return its accession.
        If a source file is not found or is not of the expected type, an exception will be thrown.

        :param source_files: list of source files accessions
        :type source_files: list
        :param name: if a name is provided, the created file will be renamed
        :type name: str
        :param params: custom command-line arguments strings; if None,
            the application defaults will be used.
        :param params: list
        :param calculate_checksums: a flag used in the initialization script
            to compute checksums for the created files
        :type calculate_checksums: bool
        :param expected_checksums: Dict of expected checksums (``{metainfo_key: expected_checksum}``)
        :type expected_checksums: dict
        :param initialize: should initialization be started immediately
            after the file is created?
        :return: accession of created file
        :rtype: str
        """
        app_file = self.__create_file(source_files, params)

        fu = FilesUtil(self.connection)
        if name:
            fu.rename_file(app_file, name)

        if calculate_checksums:
            fu.mark_for_tests(app_file)

        if expected_checksums:
            fu.add_checksums(app_file, expected_checksums)

        if initialize:
            self.start(app_file)
        return app_file
def recognize_files(connection, accession_file_map, new_folder):
    # Files Recognition
    fu = FilesUtil(connection)

    application = connection.application('genestack/upload')
    recognised_files = application.invoke('recognizeGroupsByAccession',
                                          accession_file_map.keys())

    recognized_accessions = set()
    for x in recognised_files:
        for sources in x['sourceFileInfos'].values():
            for info in sources:
                recognized_accessions.add(info['accession'])

    created_files = application.invoke('createFiles', recognised_files, [],
                                       None)
    groups = sorted(created_files['files'], key=itemgetter('kind'))
    for name, group in groupby(groups, key=itemgetter('kind')):
        print(name)
        # maybe sort by filename before printing a group?
        for f in group:
            print('\t%s / %s' % (f['accession'], f['name']))

    unrecognized_file_infos = set(accession_file_map) - recognized_accessions

    if unrecognized_file_infos:
        print('Unrecognized Raw Files')
        for accession in unrecognized_file_infos:
            print('\t%s / %s' %
                  (accession, accession_file_map[accession].decode('utf-8')))
        # move unrecognized files to new folder
        unrecognized_folder = fu.create_folder("Unrecognized files",
                                               parent=new_folder)
        for accession in unrecognized_file_infos:
            fu.link_file(accession, unrecognized_folder)
            fu.unlink_file(accession, new_folder)
        print('Unrecognized files moved to %s / %s' %
              (unrecognized_folder, 'Unrecognized files'))
        'local_key',
        help=
        'Name of the local key to match CSV records and Genestack files names')
    parser.add_argument(
        'folder',
        help='Accession of the Genestack folder containing the files')

    args = parser.parse_args()
    csv_input = args.csv_file
    local_key = args.local_key

    print('Connecting to Genestack...')

    # get connection and application handlers
    connection = get_connection(args)
    files_util = FilesUtil(connection)

    print('Collecting files...')
    files = files_util.get_file_children(args.folder)
    print('Found %d files. Collecting metadata...' % len(files))
    infos = files_util.get_infos(files)

    identifier_map = {info['name']: info['accession'] for info in infos}

    # parse the CSV file
    with open(csv_input, 'r') as the_file:
        reader = csv.DictReader(the_file, delimiter=",")
        field_names = reader.fieldnames

        if args.local_key not in field_names:
            raise GenestackException(
示例#12
0
def files_utils():
    connection = get_connection(make_connection_parser().parse_args([]))
    files_utils = FilesUtil(connection)
    return files_utils
示例#13
0
 def rename_file(self, accession, name):
     sys.stderr.write('Deprecated: use FilesUtil.rename_file instead\n')
     FilesUtil(self.connection).rename_file(accession, name)