Python CellProfilerImages примеры использования

Язык программирования: Python

Пространство имен/Пакет: parcp.cpimages

Класс/Тип: CellProfilerImages

Примеров на hotexamples.com: 2

Python CellProfilerImages - 2 примера найдено. Это лучшие примеры Python кода для parcp.cpimages.CellProfilerImages, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

parse_settings(1)

split_images(1)

Пример #1

Показать файл

Файл: __init__.py Проект: ewiger/parcp

 def __init__(self, project_path):
     self.project = Project(project_path)
     self.cpimages = CellProfilerImages()
     self.result_indexes = list()

Пример #2

Показать файл

Файл: __init__.py Проект: ewiger/parcp

class ParallelCellProfiler(object):

    def __init__(self, project_path):
        self.project = Project(project_path)
        self.cpimages = CellProfilerImages()
        self.result_indexes = list()

    def get_cp2_call(self):
        '''
        Requires a symlink pointing from a home forlder to the actual
        location of CP2. E.g.:

            cd && ln -s ~/dev/pelkmanslab/CellProfiler-2.1.1 CellProfiler2

        '''
        return 'python ' + os.path.expanduser(
            '~/CellProfiler2/CellProfiler.py')

    def load_image_setting(self, image_list_settings_filename):
        '''
        Load JSON file with settings on how to group images into sets -
        batches of files that can be independently processed in parallel.
        '''
        if not image_list_settings_filename.startswith('/') \
                and not os.path.exists(image_list_settings_filename):
            logger.info('Setting file to group images not found: %s',
                        image_list_settings_filename)
            image_list_settings_filename = os.path.join(
                self.project.path,
                image_list_settings_filename)
            if not os.path.exists(image_list_settings_filename):
                # Still missing
                raise IOError('Setting file to group images not found: %s',
                              image_list_settings_filename)
        if os.path.exists(image_list_settings_filename):
            # Load custom JSON settings file
            logger.info('Parsing grouping settings for images: %s',
                        image_list_settings_filename)
            self.cpimages.parse_settings(image_list_settings_filename)

    def split_images(self):
        '''
        Make CSV lists of images for each CP2 batch. Such CSV files are an
        "input argument", telling which images to process.
        '''
        logger.info('Splitting images in: %s', self.project.path)

        images_path = self.project.images_path
        output_path = self.project.image_groups_path
        if not os.path.exists(output_path):
            logger.info('Create missing output path: %s',  output_path)
            os.makedirs(output_path)

        logger.info('Splitting images into filenames')
        self.cpimages.split_images(images_path, output_path)
        num_of_image_sets = self.cpimages.set_num

        if num_of_image_sets == 0:
            print 'Error, splitting of images failed. No image set were '\
                  'generated.'
            exit(1)
        logger.info('Done. Split input images into %d lists' %
                    num_of_image_sets)

    def get_cp2_batch_command(self, cp_pipeline_file, input_csv_filepath,
                              output_path):
        '''
        Execute CellProfiller2 process in a command line. Pass arguments
        sufficient to process a single batch of images.
        '''
        images_path = self.project.images_path
        command_lines = textwrap.wrap('''
        %(cp2_call)s -b -c -i %(images_path)s -o %(output_path)s \
            --do-not-build --do-not-fetch --pipeline=%(cp_pipeline_file)s \
            --data-file=%(csv_filepath)s -L INFO
        ''' % {
            'cp2_call': self.get_cp2_call(),
            'images_path': images_path,
            'output_path': output_path,
            'cp_pipeline_file': cp_pipeline_file,
            'csv_filepath': input_csv_filepath,
        }, width=210, break_on_hyphens=False, break_long_words=False)
        # return ' \\\n'.join(command_lines)
        return ' '.join(command_lines)

    def exec_command(self, command_code, stdoutlog, stdouterr):
        # print command_code
        args = [arg for arg in command_code.split(' ') if len(arg) > 0]
        print args
        command = sh.Command(args[0])
        result = command(*args[1:], _out=stdoutlog, _err=stdouterr)
        return result

    def run_batch(self, pipeline_filepath, group_index, image_group):
        logger.info('Running cp2 with image group: %s', image_group)
        output_path = os.path.join(self.project.results_path,
                                   str(group_index))
        command_code = self.get_cp2_batch_command(pipeline_filepath,
                                                  image_group, output_path)
        if not os.path.exists(output_path):
            os.makedirs(output_path)
        stdoutlog = os.path.join(output_path, 'stdout.log')
        stdouterr = os.path.join(output_path, 'stderr.log')
        result = self.exec_command(command_code, stdoutlog, stdouterr)
        if result.exit_code != 0:
            raise Exception('Failed (exit_code %d) to run: %s' %
                            (result.exit_code, command_code))

    def run_batches(self, pipeline_filename):
        '''
        For each input CSV file found run a CP2 job and produce output.
        Method is not parallel, but useful for debugging and testing
        of next step - merging of results.
        '''
        pipeline_filepath = os.path.join(self.project.path, pipeline_filename)
        image_groups = glob(os.path.join(self.project.image_groups_path,
                            'image_set_*.csv'))
        image_groups = sorted(image_groups)
        for group_index, image_group in enumerate(image_groups):
            # group index is appended to output path of each batch to help
            # differentiate outputs per job in merging of results after the
            # parallel step.
            self.run_batch(pipeline_filepath, group_index, image_group)

    def merge_image_results(self):
        '''
        Special case - merge measurements of image for all results.
        In this case there is no number of the image. The line itself
        represents the number, so the order must be preserved.
        '''
        # TODO: implement me

    def merge_object_results(self, object_name):
        '''
        Merge objects measurements gathered across all result folders into one
        file.
        '''
        logger.info('Merging results for: %s', object_name)
        if object_name == 'Image':
            return self.merge_image_results()
        csv_path_tpl = os.path.join(self.project.results_path, '%d',
                                    object_name + '.csv')
        merged_csv_path = os.path.join(self.project.results_path,
                                       object_name + '.csv')
        # print csv_path_tpl
        logger.info('Writing %s measurements into: %s' %
                    (object_name, merged_csv_path))
        merged_csv = open(merged_csv_path, 'w+')
        merged_header = None
        image_count = 0
        object_count = 0
        for index in self.result_indexes:
            csv_path = csv_path_tpl % index
            # First column is ImageNumber and second is ObjectNumber - take
            # only those to avoid manipulating floats and introducing rounding
            # problems.
            with open(csv_path) as batch_csv:
                header = batch_csv.readline().rstrip()
                if merged_header is not None:
                    assert header == merged_header
                else:
                    merged_header = header
                    merged_csv.writelines([header + "\n"])
                # Manipulate indexes in first and second columns.
                # Don't change the rest.
                prev_img_count = 0
                for line in batch_csv:
                    # Assume comma-sep syntax
                    image_index, object_index, rest = line.rstrip()\
                        .split(',', 2)
                    image_index = int(image_index)
                    object_index = int(object_index)
                    # Merging is sequential, so this a new index
                    object_count += 1
                    if image_index > prev_img_count:
                        prev_img_count = image_index
                        image_count += 1
                    logger.debug(('Writing image %d (global %d):'
                                 ' object %d (global %d)') %
                                 (image_index, image_count,
                                  object_index, object_count))
                    merged_line = ','.join((str(image_count),
                                           str(object_count), rest))
                    merged_csv.writelines([merged_line + "\n"])
        merged_csv.close()
        logger.info('Done merging of: %s', merged_csv_path)

    def merge_results(self):
        '''
        Each job produces output stored as CSV (ExportToSpreadSheet module).
        I.e. we can run only those CP2 pipelines that contain export to CSV
        module.
        '''
        # Assume there is always at least one batch#0. All the CSV files are
        # object names. All CSV in all batches get merged per object.
        # Expected number of objects can be computed as a sum of number of
        # lines in CSV minus one (header line).
        self.result_indexes = [int(result_index) for result_index
                               in os.listdir(self.project.results_path)
                               if result_index.isdigit()]
        # They all are unique and consequent - no value in between
        assert max(self.result_indexes) == len(self.result_indexes) - 1
        # print results

        # Learn object names from 'results/0/*.csv'
        object_names = [os.path.basename(filename)[:-4] for filename
                        in glob(os.path.join(self.project.results_path,
                                '0', '*.csv'))]
        # print object_names
        for object_name in object_names:
            self.merge_object_results(object_name)