Пример #1
0
class BatchFilesCreator(object):
    def __init__(self, cla, base_folder, friendly_name, custom_args=None):
        """
        Constructor of the general batch files creator, to create multiple files from a CLA.

        :param cla: a ``CLApplication`` object, wrapper for the corresponding CLA
        :param base_folder: accession of the base folder where the pipeline files will be organised into subfolders
        :param friendly_name: user-friendly name of the files produced by the app ; used in the on-screen statements
        and in the name of the project subfolders
        :param custom_args: list of custom command-line argument strings for the files. Default is ``None``
        """

        self._cla = cla
        self._files_util = FilesUtil(cla.connection)
        self._base_folder = base_folder
        self._friendly_name = friendly_name
        self._custom_args = custom_args

    def create_files(self, sources):
        print('Creating %s files...' % self._friendly_name)
        output_folder = self._files_util.create_folder(self._friendly_name, parent=self._base_folder)
        output_files = []
        for i, source in enumerate(sources, 1):
            output = self._create_output_file(source)
            self._files_util.link_file(output, output_folder)
            print('Created %s file %s (%d/%d)' % (self._friendly_name, output, i, len(output)))
            output_files.append(output)
        return output_files

    # this method can be overridden in child classes to allow for more complex file creation logic
    def _create_output_file(self, source):
        output = self._cla.create_file(source)
        if self._custom_args:
            self._cla.change_command_line_arguments(output, self._custom_args)
        return output
Пример #2
0
def test_en_rna_seq(conn, keep_files):
    fu = FilesUtil(conn)
    en = ExpressionNavigatorforGenes(conn)
    en_file = None
    try:
        groups = [{'accessions': accs} for accs in RNA_SEQ_GROUPS]
        en_file = en.create_file(groups, r_package=en.PKG_DESEQ, organism="new organism")
    finally:
        if (not keep_files) and (en_file is not None):
            fu.unlink_file(en_file, fu.get_special_folder(SpecialFolders.CREATED))
Пример #3
0
def test_en_isoforms(conn, keep_files):
    fu = FilesUtil(conn)
    en = ExpressionNavigatorforIsoforms(conn)
    en_file = None
    try:
        groups = [{'accessions': accs} for accs in ISOFORM_GROUPS]
        en_file = en.create_file(groups, multi_mapping_corr=True)
    finally:
        if (not keep_files) and (en_file is not None):
            fu.unlink_file(en_file, fu.get_special_folder(SpecialFolders.CREATED))
def test_en_rna_seq(conn, keep_files):
    fu = FilesUtil(conn)
    en = ExpressionNavigatorforGenes(conn)
    en_file = None
    try:
        groups = [{'accessions': accs} for accs in RNA_SEQ_GROUPS]
        en_file = en.create_file(groups, r_package=en.PKG_DESEQ, organism="new organism")
    finally:
        if (not keep_files) and (en_file is not None):
            fu.unlink_file(en_file, fu.get_special_folder(SpecialFolders.CREATED))
def test_en_isoforms(conn, keep_files):
    fu = FilesUtil(conn)
    en = ExpressionNavigatorforIsoforms(conn)
    en_file = None
    try:
        groups = [{'accessions': accs} for accs in ISOFORM_GROUPS]
        en_file = en.create_file(groups, multi_mapping_corr=True)
    finally:
        if (not keep_files) and (en_file is not None):
            fu.unlink_file(en_file, fu.get_special_folder(SpecialFolders.CREATED))
def upload_files(connection, files, folder_name, folder_accession):
    """
    :param genestack_client.Connection connection:
    :param list[str] files:
    :param str folder_name:
    :param str folder_accession:
    """
    importer = DataImporter(connection)
    fu = FilesUtil(connection)
    upload = fu.get_special_folder(SpecialFolders.UPLOADED)
    if not folder_accession:
        folder_name = folder_name or datetime.now().strftime(
            'Upload %d.%m.%y %H:%M:%S')
        folder_accession = fu.create_folder(
            folder_name,
            parent=upload,
            description='Files uploaded by genestack-uploader')
    else:
        folder_name = fu.get_infos([folder_accession])[0]['name']
    accession_file_map = {}
    for f in files:
        accession = importer.load_raw(f)
        fu.link_file(accession, folder_accession)
        fu.unlink_file(accession, upload)
        accession_file_map[accession] = f
    return folder_accession, folder_name, accession_file_map
Пример #7
0
    def __init__(self, cla, base_folder, friendly_name, custom_args=None):
        """
        Constructor of the general batch files creator, to create multiple files from a CLA.

        :param cla: a ``CLApplication`` object, wrapper for the corresponding CLA
        :param base_folder: accession of the base folder where the pipeline files will be organised into subfolders
        :param friendly_name: user-friendly name of the files produced by the app ; used in the on-screen statements
        and in the name of the project subfolders
        :param custom_args: list of custom command-line argument strings for the files. Default is ``None``
        """

        self._cla = cla
        self._files_util = FilesUtil(cla.connection)
        self._base_folder = base_folder
        self._friendly_name = friendly_name
        self._custom_args = custom_args
Пример #8
0
    def create_dataflow(self, accession, name=None):
        """
        Creates a data flow based on the file provenance of the specified file.
        The nodes of the data flow can be accessed by the accession of the corresponding files in the file provenance.

        :param accession: file accession
        :type accession: str
        :param name: data flow name
        :type name: str
        :return: accession of the created data flow file
        :rtype: str
        :raise GenestackException:
        """
        response = self.invoke('initializeApplicationState',
                               'createFromSources', accession)

        if response['type'] == 'newPage':
            accession = response['fileInfo']['accession']
        elif response['type'] == 'existingPages':
            # If file already exists we expect to get the last created file.
            # Existing page contains files from first to last (or MAX QUERY)
            # TODO: in case there are more files then MAX QUERY (100 ATM),
            # the last file in response will not be really last
            # (it is almost impossible use case, though)
            file_info = response['fileInfos'][-1]
            accession = file_info['accession']
        else:
            raise GenestackException("Unknown response type: %s" %
                                     response['type'])
        if name:
            FilesUtil(self.connection).replace_metainfo_string_value(
                [accession], Metainfo.NAME, name)
        return accession
Пример #9
0
def test_en_microarrays(conn, keep_files):
    fu = FilesUtil(conn)
    en = ExpressionNavigatorforMicroarrays(conn)
    norm_app = AffymetrixMicroarraysNormalizationApplication(conn)
    en_file = None
    norm_file = None
    try:
        groups = [{'accessions': accs} for accs in MICROARRAY_GROUPS]
        groups[0]['is_control'] = True
        norm_file = norm_app.create_file([f for group in MICROARRAY_GROUPS for f in group])
        en_file = en.create_file(groups, norm_file, RAT_AFFY_ANNOTATION)
    finally:
        if not keep_files:
            created = fu.get_special_folder(SpecialFolders.CREATED)
            for f in (norm_file, en_file):
                if f is not None:
                    fu.unlink_file(f, created)
def test_en_microarrays(conn, keep_files):
    fu = FilesUtil(conn)
    en = ExpressionNavigatorforMicroarrays(conn)
    norm_app = AffymetrixMicroarraysNormalizationApplication(conn)
    en_file = None
    norm_file = None
    try:
        groups = [{'accessions': accs} for accs in MICROARRAY_GROUPS]
        groups[0]['is_control'] = True
        norm_file = norm_app.create_file([f for group in MICROARRAY_GROUPS for f in group])
        en_file = en.create_file(groups, norm_file, RAT_AFFY_ANNOTATION)
    finally:
        if not keep_files:
            created = fu.get_special_folder(SpecialFolders.CREATED)
            for f in (norm_file, en_file):
                if f is not None:
                    fu.unlink_file(f, created)
    def __get_mydatasets_folder(self):
        """
        Get default folder for datasets.

        :return: default dataset folder accession
        :rtype: str
        """
        return FilesUtil(self.connection).get_special_folder(
            SpecialFolders.MY_DATASETS)
Пример #12
0
def recognize_files(connection, accession_file_map, new_folder):
    # Files Recognition
    fu = FilesUtil(connection)

    application = connection.application('genestack/upload')
    recognised_files = application.invoke('recognizeGroupsByAccession', accession_file_map.keys())

    recognized_accessions = set()
    for x in recognised_files:
        for sources in x['sourceFileInfos'].values():
            for info in sources:
                recognized_accessions.add(info['accession'])

    created_files = application.invoke('createFiles', recognised_files, [], None)
    groups = sorted(created_files['files'], key=itemgetter('kind'))
    for name, group in groupby(groups, key=itemgetter('kind')):
        print(name)
        # maybe sort by filename before printing a group?
        for f in group:
            print('\t%s / %s' % (f['accession'], f['name']))

    unrecognized_file_infos = set(accession_file_map) - recognized_accessions

    if unrecognized_file_infos:
        print('Unrecognized Raw Files')
        for accession in unrecognized_file_infos:
            print('\t%s / %s' % (accession, accession_file_map[accession].decode('utf-8')))
        # move unrecognized files to new folder
        unrecognized_folder = fu.create_folder("Unrecognized files", parent=new_folder)
        for accession in unrecognized_file_infos:
            fu.link_file(accession, unrecognized_folder)
            fu.unlink_file(accession, new_folder)
        print('Unrecognized files moved to %s / %s' % (unrecognized_folder, 'Unrecognized files'))
Пример #13
0
def test_metainfo_io(conn):
    data_importer = DataImporter(conn)
    fu = FilesUtil(conn)

    created = fu.get_special_folder(SpecialFolders.CREATED)
    info = Metainfo()
    info.add_boolean("a", True)
    info.add_file_reference("b", created)
    info.add_date_time("c", "2015-12-13")
    info.add_integer("d", 239)
    info.add_decimal("e", 238.583)
    info.add_decimal("e", -13.4)
    info.add_string("f", "hello")
    info.add_memory_size("g", 2847633)
    info.add_person("i", "Rosalind Franklin", "+1-202-555-0123", "*****@*****.**")
    info.add_publication("j", "My Publication", "Myself", "Journal of Me", "23/12/2014", pages="12-23")
    info.add_value(Metainfo.NAME, StringValue("Test report file"))
    report_file = None
    try:
        report_file = data_importer.create_report_file(metainfo=info, urls=[TEST_URL], parent=created)
        metainfo = next(iter(fu.collect_metainfos([report_file])))
        assert metainfo.get('a')[0].get_boolean()
        assert isinstance(metainfo.get('b')[0].get_accession(), str)
        assert metainfo.get('c')[0].get_date() == _strptime_local('2015-12-13', '%Y-%m-%d')
        assert metainfo.get('d')[0].get_int() == 239
        assert metainfo.get('e')[0].get_decimal() == 238.583
        assert metainfo.get('e')[1].get_decimal() == -13.4
        assert metainfo.get('f')[0].get_string() == "hello"
        assert metainfo.get('g')[0].get_int() == 2847633
        assert metainfo.get('i')[0].get_person() == {'name': 'Rosalind Franklin', 'phone': '+1-202-555-0123',
                                                     'email': '*****@*****.**'}
        assert metainfo.get('j')[0].get_publication() == {'title': 'My Publication', 'authors': 'Myself',
                                                          'journalName': 'Journal of Me', 'issueDate': '23/12/2014',
                                                          'pages': '12-23', 'issueNumber': None, 'identifiers': {}}
        assert metainfo.get(Metainfo.NAME)[0].get_string() == "Test report file"
        assert metainfo.get(BioMetaKeys.DATA_LINK)[0].get_url() == TEST_URL
    finally:
        if report_file is not None:
            fu.unlink_file(report_file, created)
Пример #14
0
def test_metainfo_io(conn):
    data_importer = DataImporter(conn)
    fu = FilesUtil(conn)

    created = fu.get_special_folder(SpecialFolders.CREATED)
    info = Metainfo()
    info.add_boolean("a", True)
    info.add_file_reference("b", created)
    info.add_date_time("c", "2015-12-13")
    info.add_integer("d", 239)
    info.add_decimal("e", 238.583)
    info.add_decimal("e", -13.4)
    info.add_string("f", "hello")
    info.add_memory_size("g", 2847633)
    info.add_person("i", "Rosalind Franklin", "+1-202-555-0123",
                    "*****@*****.**")
    info.add_publication("j",
                         "My Publication",
                         "Myself",
                         "Journal of Me",
                         "23/12/2014",
                         pages="12-23")
    info.add_value(Metainfo.NAME, StringValue("Test report file"))
    report_file = None
    try:
        report_file = data_importer.create_report_file(metainfo=info,
                                                       urls=[TEST_URL],
                                                       parent=created)
        metainfo = next(iter(fu.collect_metainfos([report_file])))
        assert metainfo.get('a')[0].get_boolean()
        assert isinstance(metainfo.get('b')[0].get_accession(), str)
        assert metainfo.get('c')[0].get_date() == _strptime_local(
            '2015-12-13', '%Y-%m-%d')
        assert metainfo.get('d')[0].get_int() == 239
        assert metainfo.get('e')[0].get_decimal() == 238.583
        assert metainfo.get('e')[1].get_decimal() == -13.4
        assert metainfo.get('f')[0].get_string() == "hello"
        assert metainfo.get('g')[0].get_int() == 2847633
        assert metainfo.get('i')[0].get_person() == {
            'name': 'Rosalind Franklin',
            'phone': '+1-202-555-0123',
            'email': '*****@*****.**'
        }
        assert metainfo.get('j')[0].get_publication() == {
            'title': 'My Publication',
            'authors': 'Myself',
            'journalName': 'Journal of Me',
            'issueDate': '23/12/2014',
            'pages': '12-23',
            'issueNumber': None,
            'identifiers': {}
        }
        assert metainfo.get(
            Metainfo.NAME)[0].get_string() == "Test report file"
        assert metainfo.get(BioMetaKeys.DATA_LINK)[0].get_url() == TEST_URL
    finally:
        if report_file is not None:
            fu.unlink_file(report_file, created)
Пример #15
0
class BatchFilesCreator(object):
    def __init__(self, cla, base_folder, friendly_name, custom_args=None):
        """
        Constructor of the general batch files creator, to create multiple files from a CLA.

        :param cla: a ``CLApplication`` object, wrapper for the corresponding CLA
        :param base_folder: accession of the base folder where the pipeline files will be organised into subfolders
        :param friendly_name: user-friendly name of the files produced by the app ; used in the on-screen statements
        and in the name of the project subfolders
        :param custom_args: list of custom command-line argument strings for the files. Default is ``None``
        """

        self._cla = cla
        self._files_util = FilesUtil(cla.connection)
        self._base_folder = base_folder
        self._friendly_name = friendly_name
        self._custom_args = custom_args

    def create_files(self, sources):
        print "Creating %s files..." % self._friendly_name
        output_folder = self._files_util.create_folder(
            self._friendly_name, parent=self._base_folder)
        output_files = []
        for i, source in enumerate(sources, 1):
            output = self._create_output_file(source)
            self._files_util.link_file(output, output_folder)
            print "Created %s file %s (%d/%d)" % (self._friendly_name, output,
                                                  i, len(output))
            output_files.append(output)
        return output_files

    # this method can be overridden in child classes to allow for more complex file creation logic
    def _create_output_file(self, source):
        output = self._cla.create_file(source)
        if self._custom_args:
            self._cla.change_command_line_arguments(output, self._custom_args)
        return output
Пример #16
0
    def __init__(self, cla, base_folder, friendly_name, custom_args=None):
        """
        Constructor of the general batch files creator, to create multiple files from a CLA.

        :param cla: a ``CLApplication`` object, wrapper for the corresponding CLA
        :param base_folder: accession of the base folder where the pipeline files will be organised into subfolders
        :param friendly_name: user-friendly name of the files produced by the app ; used in the on-screen statements
        and in the name of the project subfolders
        :param custom_args: list of custom command-line argument strings for the files. Default is ``None``
        """

        self._cla = cla
        self._files_util = FilesUtil(cla.connection)
        self._base_folder = base_folder
        self._friendly_name = friendly_name
        self._custom_args = custom_args
Пример #17
0
def upload_files(connection, files, folder_name):
    importer = DataImporter(connection)
    fu = FilesUtil(connection)
    upload = fu.get_special_folder(SpecialFolders.UPLOADED)
    folder_name = folder_name or datetime.now().strftime('Upload %d.%m.%y %H:%M:%S')
    new_folder = fu.create_folder(folder_name, parent=upload,
                                  description='Files uploaded by genestack-uploader')
    accession_file_map = {}
    for f in files:
        accession = importer.load_raw(f)
        fu.link_file(accession, new_folder)
        fu.unlink_file(accession, upload)
        accession_file_map[accession] = f
    return new_folder, folder_name, accession_file_map
Пример #18
0
    def create_file(self,
                    source_files,
                    name=None,
                    params=None,
                    calculate_checksums=False,
                    expected_checksums=None,
                    initialize=False):
        """
        Create a native Genestack file with the application and return its accession.
        If a source file is not found or is not of the expected type, an exception will be thrown.

        :param source_files: list of source files accessions
        :type source_files: list
        :param name: if a name is provided, the created file will be renamed
        :type name: str
        :param params: custom command-line arguments strings; if None,
            the application defaults will be used.
        :param params: list
        :param calculate_checksums: a flag used in the initialization script
            to compute checksums for the created files
        :type calculate_checksums: bool
        :param expected_checksums: Dict of expected checksums (``{metainfo_key: expected_checksum}``)
        :type expected_checksums: dict
        :param initialize: should initialization be started immediately
            after the file is created?
        :return: accession of created file
        :rtype: str
        """
        app_file = self.__create_file(source_files, params)

        fu = FilesUtil(self.connection)
        if name:
            fu.rename_file(app_file, name)

        if calculate_checksums:
            fu.mark_for_tests(app_file)

        if expected_checksums:
            fu.add_checksums(app_file, expected_checksums)

        if initialize:
            self.start(app_file)
        return app_file
Пример #19
0
def recognize_files(connection, accession_file_map, new_folder):
    # Files Recognition
    fu = FilesUtil(connection)

    application = connection.application('genestack/upload')
    recognised_files = application.invoke('recognizeGroupsByAccession',
                                          accession_file_map.keys())

    recognized_accessions = set()
    for x in recognised_files:
        for sources in x['sourceFileInfos'].values():
            for info in sources:
                recognized_accessions.add(info['accession'])

    created_files = application.invoke('createFiles', recognised_files, [],
                                       None)
    groups = sorted(created_files['files'], key=itemgetter('kind'))
    for name, group in groupby(groups, key=itemgetter('kind')):
        print(name)
        # maybe sort by filename before printing a group?
        for f in group:
            print('\t%s / %s' % (f['accession'], f['name']))

    unrecognized_file_infos = set(accession_file_map) - recognized_accessions

    if unrecognized_file_infos:
        print('Unrecognized Raw Files')
        for accession in unrecognized_file_infos:
            print('\t%s / %s' %
                  (accession, accession_file_map[accession].decode('utf-8')))
        # move unrecognized files to new folder
        unrecognized_folder = fu.create_folder("Unrecognized files",
                                               parent=new_folder)
        for accession in unrecognized_file_infos:
            fu.link_file(accession, unrecognized_folder)
            fu.unlink_file(accession, new_folder)
        print('Unrecognized files moved to %s / %s' %
              (unrecognized_folder, 'Unrecognized files'))
Пример #20
0
    'Accession of the Genestack folder storing the files to group by application'
)
parser.add_argument(
    '--move-files',
    action='store_true',
    help=
    'If present, the original files will be unlinked from the source folder')
args = parser.parse_args()
source_folder = args.folder
move_files = args.move_files

print "Connecting to Genestack..."

# get connection and application handlers
connection = get_connection(args)
files_util = FilesUtil(connection)

print "Collecting files..."
files = files_util.get_file_children(source_folder)
files_count = len(files)
print "Found %d files to organise. Retrieving infos..." % files_count
infos = files_util.get_complete_infos(files)

output_folder = files_util.create_folder("Organized files",
                                         parent=source_folder)
grouping_folders = {}

for i, entry in enumerate(infos, 1):
    accession = entry['accession']
    print "Processing file %d of %d (%s)..." % (i, files_count, accession)
from genestack_client import FilesUtil, make_connection_parser, get_connection

# parse script arguments
parser = make_connection_parser()
parser.add_argument('folder', help='Accession of the Genestack folder storing the files to group by application')
parser.add_argument('--move-files', action='store_true',
                    help='If present, the original files will be unlinked from the source folder')
args = parser.parse_args()
source_folder = args.folder
move_files = args.move_files

print "Connecting to Genestack..."

# get connection and application handlers
connection = get_connection(args)
files_util = FilesUtil(connection)

print "Collecting files..."
files = files_util.get_file_children(source_folder)
files_count = len(files)
print "Found %d files to organise. Retrieving infos..." % files_count
infos = files_util.get_complete_infos(files)

output_folder = files_util.create_folder("Organized files", parent=source_folder)
grouping_folders = {}

for i, entry in enumerate(infos, 1):
    accession = entry['accession']
    print "Processing file %d of %d (%s)..." % (i, files_count, accession)

    # use either application name, application ID or "Unknown application" (in this order of preference)
Пример #22
0
 def rename_file(self, accession, name):
     sys.stderr.write('Deprecated: use FilesUtil.rename_file instead\n')
     FilesUtil(self.connection).rename_file(accession, name)
Пример #23
0
    parser.add_argument(
        '--name',
        default="New Project",
        help='Name of the Genestack folder where to put the output files')
    parser.add_argument(
        '--ref-genome',
        help='Accession of the reference genome to use for the mapping step')

    args = parser.parse_args()
    project_name = args.name

    print "Connecting to Genestack..."

    # get connection and create output folder
    connection = get_connection(args)
    files_util = FilesUtil(connection)
    created_files_folder = files_util.get_special_folder(
        SpecialFolders.CREATED)
    project_folder = files_util.create_folder(project_name,
                                              parent=created_files_folder)

    # create application wrappers and batch files creators
    bowtie_app = BowtieApplication(connection)
    mapped_qc_app = AlignedReadsQC(connection)
    variant_calling_app = VariationCaller2Application(connection)

    bowtie_creator = BowtieBatchFilesCreator(bowtie_app,
                                             project_folder,
                                             "Mapped Reads",
                                             ref_genome=args.ref_genome)
    mapped_qc_creator = BatchFilesCreator(mapped_qc_app, project_folder,
if __name__ == "__main__":
    # parse script arguments
    parser = make_connection_parser()
    parser.add_argument('csv_file', help='Path to the local comma-delimited CSV file containing the data')
    parser.add_argument('local_key', help='Name of the local key to match CSV records and Genestack files names')
    parser.add_argument('folder', help='Accession of the Genestack folder containing the files')

    args = parser.parse_args()
    csv_input = args.csv_file
    local_key = args.local_key

    print "Connecting to Genestack..."

    # get connection and application handlers
    connection = get_connection(args)
    files_util = FilesUtil(connection)

    print "Collecting files..."
    files = files_util.get_file_children(args.folder)
    print "Found %d files. Collecting metadata..." % len(files)
    infos = files_util.get_infos(files)

    identifier_map = {info['name']: info['accession'] for info in infos}

    # parse the CSV file
    with open(csv_input, 'r') as the_file:
        reader = csv.DictReader(the_file, delimiter=",")
        field_names = reader.fieldnames

        if args.local_key not in field_names:
            raise GenestackException("Error: the local key %s is not present in the supplied CSV file" % args.local_key)
Пример #25
0
    # parse script arguments
    parser = make_connection_parser()
    parser.add_argument('raw_reads_folder',
                        help='Genestack accession of the folder containing the raw reads files to process')
    parser.add_argument('--name', default="New Project",
                        help='Name of the Genestack folder where to put the output files')
    parser.add_argument('--ref-genome', help='Accession of the reference genome to use for the mapping step')

    args = parser.parse_args()
    project_name = args.name

    print('Connecting to Genestack...')

    # get connection and create output folder
    connection = get_connection(args)
    files_util = FilesUtil(connection)
    created_files_folder = files_util.get_special_folder(SpecialFolders.CREATED)
    project_folder = files_util.create_folder(project_name, parent=created_files_folder)

    # create application wrappers and batch files creators
    bowtie_app = BowtieApplication(connection)
    mapped_qc_app = AlignedReadsQC(connection)
    variant_calling_app = VariationCaller2Application(connection)

    bowtie_creator = BowtieBatchFilesCreator(bowtie_app, project_folder, "Mapped Reads", ref_genome=args.ref_genome)
    mapped_qc_creator = BatchFilesCreator(mapped_qc_app, project_folder, "Mapped Reads QC")
    vc_creator = BatchFilesCreator(variant_calling_app, project_folder, "Variants", custom_args=VC_ARGUMENTS_NO_INDELS)

    # collect files
    print('Collecting raw reads...')
    raw_reads = files_util.get_file_children(args.raw_reads_folder)
        'local_key',
        help=
        'Name of the local key to match CSV records and Genestack files names')
    parser.add_argument(
        'folder',
        help='Accession of the Genestack folder containing the files')

    args = parser.parse_args()
    csv_input = args.csv_file
    local_key = args.local_key

    print('Connecting to Genestack...')

    # get connection and application handlers
    connection = get_connection(args)
    files_util = FilesUtil(connection)

    print('Collecting files...')
    files = files_util.get_file_children(args.folder)
    print('Found %d files. Collecting metadata...' % len(files))
    infos = files_util.get_infos(files)

    identifier_map = {info['name']: info['accession'] for info in infos}

    # parse the CSV file
    with open(csv_input, 'r') as the_file:
        reader = csv.DictReader(the_file, delimiter=",")
        field_names = reader.fieldnames

        if args.local_key not in field_names:
            raise GenestackException(
Пример #27
0
def files_utils():
    connection = get_connection(make_connection_parser().parse_args([]))
    files_utils = FilesUtil(connection)
    return files_utils