示例#1
0
def process_method(method, properties, body) -> dict:
    metadata = json.loads(body.decode('utf-8'))
    job_id = metadata['job_id']
    app_logger.info('New Job Queued: {}'.format(job_id))
    meta = read_data_from_s3(metadata['file_meta'], s3_bucket_name, index_column_first=False)
    counts = read_data_from_s3(metadata['file_counts'], s3_bucket_name, index_column_first=True)

    subsampler = Subsampler(bool(metadata['log']),
                            int(metadata['num_pc']),
                            int(metadata['num_cells']) if metadata.get('num_cells', False) else None
                            ) if metadata.get('subsampling', False) else None

    database_version = metadata.get('database_version', 'latest')

    if database_version not in list_local_versions() + ['latest']:
        database_version = 'latest'

    app = cpdb_app.create_app(verbose=False, database_file=find_database_for(database_version))

    if metadata['iterations']:
        response = statistical_analysis(app, meta, counts, job_id, metadata, subsampler)
    else:
        response = non_statistical_analysis(app, meta, counts, job_id, metadata, subsampler)

    return response
示例#2
0
 def test_non_statistical_method_subsampled_data_test__threshold__01__precision_3__num_pc_4__num_cells_4(self):
     data = 'test_subsampled'
     project_name = 'test_data'
     threshold = 0.1
     result_precision = 3
     subsampler = Subsampler(False, 4, 4, debug_seed=0)
     self._method_call(data, project_name, threshold, result_precision, subsampler)
    def cpdb_method_analysis_launcher(
        self,
        raw_meta: pd.DataFrame,
        counts: pd.DataFrame,
        counts_data: str,
        threshold: float,
        result_precision: int,
        subsampler: Subsampler = None,
    ) -> (pd.DataFrame, pd.DataFrame, pd.DataFrame):

        if threshold < 0 or threshold > 1:
            raise ThresholdValueException(threshold)
        meta = method_preprocessors.meta_preprocessor(raw_meta)

        counts = self._counts_validations(counts, meta)

        if subsampler is not None:
            counts = subsampler.subsample(counts)
            meta = meta.filter(items=list(counts), axis=0)

        interactions = self.database_manager.get_repository(
            'interaction').get_all_expanded(include_gene=False)
        genes = self.database_manager.get_repository('gene').get_all_expanded()
        complex_composition = self.database_manager.get_repository(
            'complex').get_all_compositions()
        complex_expanded = self.database_manager.get_repository(
            'complex').get_all_expanded()

        means, significant_means, deconvoluted = cpdb_analysis_method.call(
            meta, counts, counts_data, interactions, genes, complex_expanded,
            complex_composition, self.separator, threshold, result_precision)

        return means, significant_means, deconvoluted
def analysis(meta_filename: str,
             counts_filename: str,
             counts_data: str,
             project_name: str,
             threshold: float,
             result_precision: int,
             output_path: str,
             output_format: str,
             means_result_name: str,
             significant_means_result_name: str,
             deconvoluted_result_name: str,
             verbose: bool,
             database: Optional[str],
             subsampling: bool,
             subsampling_log: bool,
             subsampling_num_pc: int,
             subsampling_num_cells: Optional[int]
             ):
    try:

        subsampler = Subsampler(subsampling_log,
                                subsampling_num_pc,
                                subsampling_num_cells,
                                verbose) if subsampling else None

        LocalMethodLauncher(cpdb_app.create_app(verbose,
                                                database)).cpdb_analysis_local_method_launcher(meta_filename,
                                                                                               counts_filename,
                                                                                               counts_data,
                                                                                               project_name,
                                                                                               threshold,
                                                                                               output_path,
                                                                                               output_format,
                                                                                               means_result_name,
                                                                                               significant_means_result_name,
                                                                                               deconvoluted_result_name,
                                                                                               result_precision,
                                                                                               subsampler,
                                                                                               )
    except (ReadFileException, ParseMetaException, ParseCountsException, ThresholdValueException,
            AllCountsFilteredException) as e:
        app_logger.error(str(e) +
                         (':' if (hasattr(e, 'description') and e.description) or (
                                 hasattr(e, 'hint') and e.hint) else '') +
                         (' {}.'.format(e.description) if hasattr(e, 'description') and e.description else '') +
                         (' {}.'.format(e.hint) if hasattr(e, 'hint') and e.hint else '')
                         )

    except EmptyResultException as e:
        app_logger.warning(str(e) +
                           (':' if (hasattr(e, 'description') and e.description) or (
                                   hasattr(e, 'hint') and e.hint) else '') +
                           (' {}.'.format(e.description) if hasattr(e, 'description') and e.description else '') +
                           (' {}.'.format(e.hint) if hasattr(e, 'hint') and e.hint else '')
                           )
    except:
        app_logger.error('Unexpected error')

        if verbose:
            traceback.print_exc(file=sys.stdout)
def analysis_scanpy(adata,
                    var_names,
                    obs_names,
                    obs_key,
                    var_key=None,
                    gene_id_format=None,
                    project_name='',
                    threshold=0.1,
                    result_precision='3',
                    output_path='',
                    output_format='csv',
                    means_result_name='means',
                    significant_means_result_name='significant_means',
                    deconvoluted_result_name='deconvoluted',
                    verbose=True,
                    database='latest',
                    subsampling=False,
                    subsampling_log=True,
                    subsampling_num_pc=100,
                    subsampling_num_cells=None,
                    write=False,
                    add_to_uns=True):
    try:

        subsampler = Subsampler(subsampling_log, subsampling_num_pc,
                                subsampling_num_cells,
                                verbose) if subsampling else None

        out = LocalMethodLauncher(cpdb_app.create_app(
            verbose, database)).cpdb_analysis_local_method_launcher_scanpy(
                adata, var_names, obs_names, obs_key, var_key, gene_id_format,
                project_name, threshold, output_path, output_format,
                means_result_name, significant_means_result_name,
                deconvoluted_result_name, result_precision, subsampler, write,
                add_to_uns)
        return out
    except (ReadFileException, ParseMetaException, ParseCountsException,
            ThresholdValueException, AllCountsFilteredException) as e:
        app_logger.error(
            str(e) + (':' if (hasattr(e, 'description') and e.description) or
                      (hasattr(e, 'hint') and e.hint) else '') +
            (' {}.'.format(e.description)
             if hasattr(e, 'description') and e.description else '') +
            (' {}.'.format(e.hint) if hasattr(e, 'hint') and e.hint else ''))

    except EmptyResultException as e:
        app_logger.warning(
            str(e) + (':' if (hasattr(e, 'description') and e.description) or
                      (hasattr(e, 'hint') and e.hint) else '') +
            (' {}.'.format(e.description)
             if hasattr(e, 'description') and e.description else '') +
            (' {}.'.format(e.hint) if hasattr(e, 'hint') and e.hint else ''))
    except:
        app_logger.error('Unexpected error')

        if verbose:
            traceback.print_exc(file=sys.stdout)
    def cpdb_statistical_analysis_launcher(
        self,
        raw_meta: pd.DataFrame,
        counts: pd.DataFrame,
        counts_data: str,
        iterations: int,
        threshold: float,
        threads: int,
        debug_seed: int,
        result_precision: int,
        pvalue: float,
        subsampler: Subsampler = None,
    ) -> (pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame):

        if threads < 1:
            core_logger.info('Using Default thread number: %s' %
                             self.default_threads)
            threads = self.default_threads

        if threshold < 0 or threshold > 1:
            raise ThresholdValueException(threshold)

        meta = method_preprocessors.meta_preprocessor(raw_meta)
        counts = self._counts_validations(counts, meta)

        if subsampler is not None:
            counts = subsampler.subsample(counts)
            meta = meta.filter(items=(list(counts)), axis=0)

        interactions = self.database_manager.get_repository(
            'interaction').get_all_expanded(include_gene=False)
        genes = self.database_manager.get_repository('gene').get_all_expanded()
        complex_composition = self.database_manager.get_repository(
            'complex').get_all_compositions()
        complex_expanded = self.database_manager.get_repository(
            'complex').get_all_expanded()

        deconvoluted, means, pvalues, significant_means = \
            cpdb_statistical_analysis_method.call(meta,
                                                  counts,
                                                  counts_data,
                                                  interactions,
                                                  genes,
                                                  complex_expanded,
                                                  complex_composition,
                                                  iterations,
                                                  threshold,
                                                  threads,
                                                  debug_seed,
                                                  result_precision,
                                                  pvalue,
                                                  self.separator)

        return pvalues, means, significant_means, deconvoluted
def statistical_analysis(meta_filename: str,
                         counts_filename: str,
                         counts_data='ensembl',
                         project_name='',
                         threshold=0.1,
                         result_precision='3',
                         output_path='',
                         output_format='csv',
                         means_result_name='means',
                         significant_means_result_name='significant_means',
                         deconvoluted_result_name='deconvoluted',
                         verbose=True,
                         database='latest',
                         subsampling=False,
                         subsampling_log=True,
                         subsampling_num_pc=100,
                         subsampling_num_cells=None,
                         debug_seed='-1',
                         pvalue=0.05,
                         pvalues_result_name='pvalues',
                         iterations=1000,
                         threads=4) -> None:
    database = choose_database(None, None, value=database)
    try:

        subsampler = Subsampler(subsampling_log, subsampling_num_pc,
                                subsampling_num_cells,
                                verbose) if subsampling else None

        LocalMethodLauncher(cpdb_app.create_app(verbose, database)). \
            cpdb_statistical_analysis_local_method_launcher(meta_filename,
                                                            counts_filename,
                                                            counts_data,
                                                            project_name,
                                                            iterations,
                                                            threshold,
                                                            output_path,
                                                            output_format,
                                                            means_result_name,
                                                            pvalues_result_name,
                                                            significant_means_result_name,
                                                            deconvoluted_result_name,
                                                            debug_seed,
                                                            threads,
                                                            result_precision,
                                                            pvalue,
                                                            subsampler,
                                                            )
    except (ReadFileException, ParseMetaException, ParseCountsException,
            ThresholdValueException, AllCountsFilteredException) as e:
        app_logger.error(
            str(e) + (':' if (hasattr(e, 'description') and e.description) or
                      (hasattr(e, 'hint') and e.hint) else '') +
            (' {}.'.format(e.description)
             if hasattr(e, 'description') and e.description else '') +
            (' {}.'.format(e.hint) if hasattr(e, 'hint') and e.hint else ''))

    except EmptyResultException as e:
        app_logger.warning(
            str(e) + (':' if (hasattr(e, 'description') and e.description) or
                      (hasattr(e, 'hint') and e.hint) else '') +
            (' {}.'.format(e.description)
             if hasattr(e, 'description') and e.description else '') +
            (' {}.'.format(e.hint) if hasattr(e, 'hint') and e.hint else ''))
    except:
        app_logger.error('Unexpected error')
        if verbose:
            traceback.print_exc(file=sys.stdout)