Python Log примеры использования

Язык программирования: Python

Пространство имен/Пакет: lisa.core.utils

Класс/Тип: Log

Примеров на hotexamples.com: 8

Python Log - 8 примеров найдено. Это лучшие примеры Python кода для lisa.core.utils.Log, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Log(7)

append(6)

section(3)

Основные методы

Log (7)

append (6)

section (3)

Пример #1

Показать файл

Файл: cli.py Проект: liulab-dfci/lisa2

def lisa_deseq(args):

    log = Log(target=sys.stderr, verbose=args.verbose)
    lisa = FromGenes(args.species,
                     **extract_kwargs(args, INSTANTIATION_KWARGS),
                     log=log)

    up_genes, down_genes = parse_deseq_file(args.deseq_file,
                                            lfc_cutoff=args.lfc_cutoff,
                                            pval_cutoff=args.pval_cutoff,
                                            sep=args.sep)

    results_summary = []
    all_passed = True
    for prefix, query_list in zip(['up-regulated', 'down-regulated'],
                                  [up_genes, down_genes]):

        with log.section('Modeling {}:'.format(str(prefix))):
            try:
                results, metadata = lisa.predict(
                    query_list, **extract_kwargs(args, PREDICTION_KWARGS))

                top_TFs_unique = save_and_get_top_TFs(args, prefix, results,
                                                      metadata)

                results_summary.append((prefix, top_TFs_unique))

            except AssertionError as err:
                all_passed = False
                log.append('ERROR: ' + str(err))

    print_results_multi(results_summary)

    if not all_passed:
        raise MultiError('One or more genelists raised an error')

Пример #2

Показать файл

    def __init__(self,
                 species,
                 window_size=1000,
                 download_if_not_exists=True,
                 make_new=False,
                 log=None,
                 path=None,
                 load_genes=True):

        self.species = species
        self.window_size = int(window_size)

        if log is None:
            self.log = Log()
        else:
            self.log = log

        if path is None:
            self.path = self.get_dataset_path(self.species, self.window_size)
        else:
            self.path = path

        if make_new:
            h5.File(self.path, 'w').close()
        elif not os.path.isfile(self.path):
            if download_if_not_exists and path is None:
                self.download_data()
            else:
                h5.File(self.path, 'w').close()

        #___ LOAD GENE DATA FROM PACKAGE _____
        self.genome = self.load_genome(self.species, self.window_size)

        if load_genes:
            self.load_genes()

Пример #3

Показать файл

Файл: cli.py Проект: liulab-dfci/lisa2

def lisa_multi(args):

    log = Log(target=sys.stderr, verbose=args.verbose)
    lisa = FromGenes(args.species,
                     **extract_kwargs(args, INSTANTIATION_KWARGS),
                     log=log)

    query_dict = {
        os.path.basename(query.name): query.readlines()
        for query in args.query_lists
    }

    results_summary = []
    all_passed = True
    for query_name, query_list in query_dict.items():

        with log.section('Modeling {}:'.format(str(query_name))):
            try:
                results, metadata = lisa.predict(
                    query_list, **extract_kwargs(args, PREDICTION_KWARGS))

                top_TFs_unique = save_and_get_top_TFs(args, query_name,
                                                      results, metadata)

                results_summary.append((query_name, top_TFs_unique))

            except AssertionError as err:
                all_passed = False
                log.append('ERROR: ' + str(err))

    print_results_multi(results_summary)

    if not all_passed:
        raise MultiError('One or more genelists raised an error')

Пример #4

Показать файл

Файл: coverage_test.py Проект: liulab-dfci/lisa2

    def convert_bigwig(cls, bigwig, species, log=None):

        if log is None:
            log = Log()

        genome = DataInterface.load_genome(species, cls.window_size)
        coverage_array = np.zeros(len(genome))

        log.append('Converting BigWig file to coverage array ...')

        bar = LoadingBar('Progress', len(genome) // 1000 + 1, cold_start=True)

        try:
            coverage_bw = bw.open(bigwig)

            log.append(bar, update_line=True)

            for i, window in enumerate(genome.list_windows()):

                if window.chromosome in coverage_bw.chroms():
                    mean_coverage = coverage_bw.stats(*window.to_tuple())[0]
                    coverage_array[i] = mean_coverage

                if i % 1000 == 0:
                    log.append(bar, update_line=True)

            return np.nan_to_num(coverage_array)

        finally:
            coverage_bw.close()

Пример #5

Показать файл

def main(species, motif_bed, window_size, gamma_threshold=0.95):

    genome = DataInterface.load_genome(species, window_size)

    log = Log(target=stderr)

    factor_name = None
    window_nums, scores = [], []

    with gzip.open(motif_bed, 'rb') as f:

        bed = f.readlines()

        bar = LoadingBar('Binning {} motif hits'.format(str(len(bed))),
                         len(bed),
                         cold_start=True)

        for i, line in enumerate(bed):

            chrom, start, end, factor, relscore, log_pval, strand = line.decode(
                'utf-8').strip().split('\t')

            if i == 0:
                factor_name = factor

            try:
                hit_windows = genome.get_region_windows(
                    Region(chrom, start, end))
                window_nums.extend(hit_windows)

                scores.extend([float(log_pval) / 100] * len(hit_windows))

            except BadRegionError:
                pass

            log.append(bar, update_line=True)

    log.append('')

    log.append('Done')

    hits = sparse.csc_matrix((scores, window_nums, [0, len(window_nums)]),
                             shape=(len(genome), 1)).tocoo().tocsc()

    sample_hit_scores = np.random.choice(np.array(hits.todense()).reshape(-1),
                                         size=10000)

    min_bin_score = gamma(*gamma.fit(sample_hit_scores)).ppf(gamma_threshold)

    hit_indices = hits.indices[(hits.data >= min_bin_score) & (hits.data > 0)]

    return hit_indices, factor_name

Пример #6

Показать файл

    def convert_bigwig(cls, bigwig, species, bigwig_cmd_path):

        log = Log()

        genome = DataInterface.load_genome(species, cls.window_size)
        coverage_array = np.zeros(len(genome))

        log.append('Converting BigWig file to coverage array ...')

        if not os.path.exists(cls._get_genome_bin_path(species)):
            log.append('Writing bins ...')
            cls._write_genome_bins(species)

        try:

            temp = tempfile.NamedTemporaryFile('w', delete=False)
            temp.close()

            process = subprocess.run([bigwig_cmd_path, bigwig, cls._get_genome_bin_path(species), temp.name], capture_output=True)

            if process.returncode == 0:

                with open(temp.name, 'r') as cmd_output:
                    for line in cmd_output:
                        fields = line.strip().split('\t')
                        coverage_array[int(fields[0])] = fields[4]
                    
                return coverage_array
            
            else:
                raise AssertionError(process.stderr.decode('utf-8'))
        finally:
            os.remove(temp.name)

Пример #7

Показать файл

Файл: coverage_test.py Проект: liulab-dfci/lisa2

    def using_bigwig(cls,
                     species,
                     query_genes,
                     bigwig_path,
                     rp_map='enhanced_10K',
                     isd_method='chipseq',
                     background_list=[],
                     background_strategy='all',
                     num_background_genes=3000,
                     seed=2556,
                     verbose=4,
                     log=None):
        '''
*classmethod*
**lisa.FromCoverage.using_bigwig** (species, query_genes, bigwig_path, rp_map = 'basic', rp_decay = 10000, isd_method = 'chipseq', background_list = [], background_strategy = 'all', num_background_genes = 3000, seed = 2556, header = False, verbose = 4, log = None)

    Run LISA FromCoverage test using a bigwig coverage file.

    Parameters:
        species: {'hg38', 'mm10'}

        query_genes (list): 
            Genes-of-interest, in either Symbol of RefSeqID format. Must provide between 20 to 500 genes.
        bigwig_path (str): 
            Path to bigwig file

    Returns:
        results (lisa.core.utils.LISA_Results): 
            With each key representing a table column, sorted by "summary_p_value" field. The dictionary can be passed directly to a the pandas constructor: ``results_df = pd.DataFrame(results.to_dict())``.
        metadata (dict): 
            Test metadata. Includes query genes provided and background genes that were selected.
        '''

        if log is None:
            log = Log()

        coverage_array = cls.convert_bigwig(bigwig_path, species, log=log)

        return cls(species, coverage_array, rp_map = rp_map, isd_method=isd_method, verbose=verbose, log=log)\
            .predict(query_genes, background_list=background_list, background_strategy=background_strategy, num_background_genes=num_background_genes,
            seed=seed)

Пример #8

Показать файл

class DataInterface:

    _config = h5_config
    data_path = os.path.join(PACKAGE_PATH, 'data')

    @classmethod
    def get_window_bedfile_str(cls, species, window_size):

        genome = cls.load_genome(species, window_size)

        window_strs = []
        for i, window in enumerate(genome.list_windows()):
            window_strs.append(str(window) + '\t' + str(i))

        return '\n'.join(window_strs)

    @classmethod
    def get_metadata_headers(cls, technology):
        return cls._config.get('metadata', technology + '_headers').split(',')

    @classmethod
    def get_dataset_url(cls, species, window_size):
        return h5_config.get('lisa_params','h5_path')\
            .format(path = cls._config.get('cistrome','data_url'), species = species,
            version = REQURED_DATASET_VERSION, window = str(window_size))

    @classmethod
    def get_dataset_path(cls, species, window_size):
        return cls._config.get('lisa_params', 'h5_path').format(
            path=cls.data_path,
            species=species,
            version=REQURED_DATASET_VERSION,
            window=str(window_size))

    #___ DATASET DOWNLOADING ____
    @classmethod
    def fetch_from_cistrome(cls, species, window_size):

        dataset_url = cls.get_dataset_url(species, window_size)

        if not os.path.isdir(cls.data_path):
            os.mkdir(cls.data_path)

        filename, _ = request.urlretrieve(dataset_url)
        os.rename(filename, cls.get_dataset_path(species, window_size))

    @classmethod
    def load_genome(cls, species, window_size):
        return genome_tools.Genome.from_file(cls._config.get('genome','genome')\
            .format(package_path = PACKAGE_PATH, species = species),
                window_size= window_size)

    def __init__(self,
                 species,
                 window_size=1000,
                 download_if_not_exists=True,
                 make_new=False,
                 log=None,
                 path=None,
                 load_genes=True):

        self.species = species
        self.window_size = int(window_size)

        if log is None:
            self.log = Log()
        else:
            self.log = log

        if path is None:
            self.path = self.get_dataset_path(self.species, self.window_size)
        else:
            self.path = path

        if make_new:
            h5.File(self.path, 'w').close()
        elif not os.path.isfile(self.path):
            if download_if_not_exists and path is None:
                self.download_data()
            else:
                h5.File(self.path, 'w').close()

        #___ LOAD GENE DATA FROM PACKAGE _____
        self.genome = self.load_genome(self.species, self.window_size)

        if load_genes:
            self.load_genes()

    def load_genes(self):
        self.log.append('Loading gene info ...')
        self.genes = gene_selection.GeneSet.from_refseq(self._config.get('genome','genes')\
            .format(package_path = PACKAGE_PATH, species = self.species), self.genome)

        self.gene_loc_set = genome_tools.RegionSet(
            [gene.get_tss_region() for gene in self.genes], self.genome)

        self.rp_map_locs = np.array(
            [r.annotation.get_location() for r in self.gene_loc_set.regions])

    def get_install_path(self):
        return self.data_path

    def get_windows(self):
        return '\n'.join(str(r) for r in self.genome.list_windows())

    # ____ RP MAP DATA _____

    @staticmethod
    def _make_basic_rp_map(gene_loc_set, region_set, decay):

        distance_matrix = gene_loc_set.map_intersects(
            region_set,
            lambda x, y: x.get_genomic_distance(y),
            slop_distance=5 * decay)

        distance_matrix.data = np.power(2, -distance_matrix.data / decay)

        return distance_matrix.tocsr()

    def _make_enhanced_rp_map(self, gene_loc_set, region_set, decay):

        #make regions x exons map and exons x genes map
        try:
            indptr, indices, exons = [0], [], []
            for locus in gene_loc_set.regions:
                new_exons = locus.annotation.get_exon_regions()
                exons.extend(new_exons)
                indices.extend(range(indptr[-1], indptr[-1] + len(new_exons)))
                indptr.append(indptr[-1] + len(new_exons))

            exon_gene_map = sparse.csc_matrix(
                (np.ones(len(exons)), indices, indptr),
                shape=(len(exons), len(gene_loc_set.regions)))

            exons = genome_tools.RegionSet(exons, self.genome)
            region_exon_map = region_set.map_intersects(
                exons,
                distance_function=lambda x, y: x.overlaps(
                    y, min_overlap_proportion=0.4),
                slop_distance=0)  #REGIONS X EXONS

            region_exon_map = region_exon_map.dot(exon_gene_map).astype(
                np.bool)

            not_exon_promoter = 1 - region_exon_map.sum(axis=1).astype(np.bool)

            basic_rp_map = self._make_basic_rp_map(gene_loc_set, region_set,
                                                   decay)

            enhanced_rp_map = basic_rp_map.transpose().multiply(
                not_exon_promoter) + region_exon_map

            return enhanced_rp_map.transpose()

        except Exception as err:
            print(repr(err))
            return region_exon_map, exon_gene_map

    def build_binned_rp_map(self, style, rp_decay):

        region_set = genome_tools.RegionSet(list(self.genome.list_windows()),
                                            self.genome)

        if style == 'basic':
            return self._make_basic_rp_map(self.gene_loc_set, region_set,
                                           rp_decay)
        elif style == 'enhanced':
            return self._make_enhanced_rp_map(self.gene_loc_set, region_set,
                                              rp_decay)
        else:
            NotImplementedError()

    @staticmethod
    def set_attributes(dataset, attr_dict):
        for key, value in attr_dict.items():
            dataset.attrs[key] = value

    def get_rp_map_shape(self):
        return (len(self.genes), len(self.genome))

    def add_rp_map(self, style, rp_map):

        assert(rp_map.shape == self.get_rp_map_shape()), \
            'RP map must be of shape (num genes, num bins): ' + str(self.get_rp_map_shape())

        rp_map_path = self._config.get('rp_map', 'rp_map').format(style=style)

        rp_map = rp_map.tocsr()

        with h5.File(self.path, 'a') as data:

            if rp_map_path in data:
                del data[rp_map_path]

            group = data.create_group(rp_map_path)

            group.create_dataset('indptr',
                                 data=rp_map.indptr,
                                 dtype=np.int32,
                                 compression=COMPRESSION)
            group.create_dataset('indices',
                                 data=rp_map.indices,
                                 dtype=np.int32,
                                 compression=COMPRESSION)
            group.create_dataset('data',
                                 data=rp_map.data,
                                 dtype=np.float32,
                                 compression=COMPRESSION)

            self.set_attributes(group, dict(shape=rp_map.shape))

    def get_rp_maps(self):

        try:
            with h5.File(self.path, 'a') as data:
                return list(data['rp_maps'].keys())
        except KeyError:
            return []

    def get_rp_map(self, style):

        rp_map_path = self._config.get('rp_map', 'rp_map').format(style=style)

        with h5.File(self.path, 'r') as data:

            try:
                group = data[rp_map_path]

                rp_map = sparse.csr_matrix(
                    (group['data'][...], group['indices'][...],
                     group['indptr'][...]),
                    shape=group.attrs['shape'])
            except KeyError:
                raise DatasetNotFoundError(rp_map_path)

        return rp_map

    #___ BIN PROJECTION _____

    def check_bin_map_unique(self, bin_map):
        return len(np.unique(bin_map)) == len(bin_map)

    def project_indices(self, indices, bin_map):

        input_hits = sparse.csc_matrix(
            (np.ones_like(indices), indices, [0, len(indices)]), )

        input_hits = self.project_sparse_matrix(input_hits, bin_map, None)

        return input_hits.tocoo().row

    @staticmethod
    def project_array(arr, bin_map, num_bins):
        #assert(check_bin_map_unique(bin_map[:,0]) and check_bin_map_unique(bin_map[:,1])), 'To project array, bin_map must have all one-to-one mappings'
        new_arr = np.zeros(num_bins)

        new_arr[bin_map[:, 1]] = arr[bin_map[:, 0]]

        return new_arr

    @staticmethod
    def project_sparse_matrix(input_hits, bin_map, num_bins, binarize=False):

        index_converted = input_hits.tocsc()[bin_map[:, 0], :].tocoo()

        input_hits = sparse.coo_matrix(
            (index_converted.data,
             (bin_map[index_converted.row, 1], index_converted.col)),
            shape=(num_bins, input_hits.shape[1])
            if not num_bins is None else None).tocsr()

        if binarize:
            input_hits.data = np.ones_like(input_hits.data)

        return input_hits

    #___ BINDING FACTOR DATA _____
    def get_factor_hit_path(self, technology, dataset_id):
        return self._config.get('factor_binding',
                                'hits').format(technology=technology,
                                               dataset_id=dataset_id)

    def get_factor_score_path(self, technology, dataset_id):
        return self._config.get('factor_binding',
                                'scores').format(technology=technology,
                                                 dataset_id=dataset_id)

    def get_metadata(self, attributes, technology, dataset_id):
        return {
            dataset_id: {
                key: attributes[key]
                for key in self.get_metadata_headers(technology)
            }
        }

    def transpose_metadata(self, metadata, technology):

        headers = self.get_metadata_headers(technology)
        sample_ids = list(metadata.keys())

        return {
            'sample_id': sample_ids,
            **{
                key: [metadata[sample][key] for sample in sample_ids]
                for key in headers
            }
        }

    def add_binding_data(self,
                         technology,
                         dataset_id,
                         hit_bins,
                         hit_scores=None,
                         **metadata):

        hits_path = self.get_factor_hit_path(technology, dataset_id)
        scores_path = self.get_factor_score_path(technology, dataset_id)

        with h5.File(self.path, 'a') as data:
            if hits_path in data:
                del data[hits_path]

            hits = data.create_dataset(hits_path,
                                       data=np.array(hit_bins),
                                       dtype=np.int32,
                                       compression=COMPRESSION)

            if not hit_scores is None:
                assert (len(hit_bins) == len(hit_scores))
                scores = data.create_dataset(scores_path,
                                             data=np.array(hit_scores),
                                             dtype=np.float64,
                                             compression=COMPRESSION)

            self.set_attributes(hits, metadata)

    def get_binding_dataset(self, technology, dataset_id):

        metadata_headers = self.get_metadata_headers(technology)

        with h5.File(self.path, 'r') as data:

            factor_dataset_path = self.get_factor_hit_path(
                technology, dataset_id)
            scores_path = self.get_factor_score_path(technology, dataset_id)

            try:
                hit_bins = np.array(data[factor_dataset_path][...])

                attributes = data[factor_dataset_path].attrs

                if scores_path in data:
                    scores = np.array(data[scores_path][...])
                else:
                    scores = np.ones_like(hit_bins)

            except KeyError:
                raise DatasetNotFoundError(factor_dataset_path)

            metadata = self.get_metadata(attributes, technology, dataset_id)

        return hit_bins, scores, metadata

    def get_binding_data(self, technology):

        with h5.File(self.path, 'r') as data:

            dataset_ids = list(data[self._config.get(
                'factor_binding',
                'root').format(technology=technology)].keys())

            indices = []
            scores = []
            metadata = dict()
            for dataset_id in dataset_ids:
                hit_bins, hit_scores, sample_meta = self.get_binding_dataset(
                    technology, dataset_id)

                metadata.update(sample_meta)
                indices.append(hit_bins)
                scores.append(hit_scores)

        hits_matrix = indices_list_to_sparse_array(indices, len(self.genome),
                                                   scores)

        return hits_matrix.transpose(), np.array(
            dataset_ids), self.transpose_metadata(metadata, technology)

    def remove_binding_dataset(self, technology, dataset_id):

        factor_dataset_path = self.get_factor_hit_path(technology, dataset_id)

        with h5.File(self.path, 'a') as data:
            del data[factor_dataset_path]

    def list_binding_datasets(self, technology):

        try:
            with h5.File(self.path, 'r') as data:

                dataset_ids = list(data[self._config.get(
                    'factor_binding',
                    'root').format(technology=technology)].keys())

            return dataset_ids

        except KeyError:
            return []

    #____ PROFILE DATA _____
    def add_profile_data(self,
                         technology,
                         dataset_id,
                         profile,
                         rp_maps,
                         rp_map_styles,
                         norm_depth=1e5,
                         **metadata):

        assert (len(rp_maps) == len(rp_map_styles))

        profile_path = self._config.get('profiles', 'profile').format(
            technology=technology, dataset_id=dataset_id)

        profile = np.array(profile)
        if len(profile.shape) == 1:
            profile = profile[:, np.newaxis]
        assert (len(profile.shape) == 2)
        assert (profile.shape[0] == self.genome.num_windows_in_genome())

        if not norm_depth is None:
            profile = profile / profile.sum() * norm_depth

        with h5.File(self.path, 'a') as data:

            if profile_path in data:
                del data[profile_path]

            hits = data.create_dataset(profile_path,
                                       data=profile,
                                       dtype=np.float16,
                                       compression=COMPRESSION)
            self.set_attributes(hits, metadata)

            for rp_map, style in zip(rp_maps, rp_map_styles):

                rp_matrix_path = self._config.get(
                    'profiles', 'rp_matrix_col').format(technology=technology,
                                                        style=style,
                                                        dataset_id=dataset_id)

                if rp_matrix_path in data:
                    del data[rp_matrix_path]

                rp_matrix_col = data.create_dataset(rp_matrix_path,
                                                    data=rp_map.dot(profile),
                                                    dtype=np.float32,
                                                    compression=COMPRESSION)
                self.set_attributes(rp_matrix_col, metadata)

    def remove_profile(self, technology, dataset_id):

        profile_path = self._config.get('profiles', 'profile').format(
            technology=technology, dataset_id=dataset_id)

        with h5.File(self.path, 'a') as data:
            del data[profile_path]

            for style in self.get_rp_maps():
                rp_matrix_col_path = self._config.get(
                    'profiles', 'rp_matrix_col').format(technology=technology,
                                                        style=style,
                                                        dataset_id=dataset_id)
                del data[rp_matrix_col_path]

    def get_profile(self, technology, dataset_id):

        profile_path = self._config.get('profiles', 'profile').format(
            technology=technology, dataset_id=dataset_id)

        with h5.File(self.path, 'r') as data:

            try:
                profile = np.array(data[profile_path][...])

                attributes = data[profile_path].attrs
            except KeyError:
                raise DatasetNotFoundError(profile_path)

            metadata = self.get_metadata(attributes, technology, dataset_id)

        return profile, metadata

    def list_profiles(self, technology):

        profiles_dir = self._config.get('profiles',
                                        'root').format(technology=technology)

        try:
            with h5.File(self.path, 'r') as data:

                dataset_ids = list(data[profiles_dir].keys())

            return dataset_ids
        except KeyError:
            return []

    def get_rp_matrix(self, technology, style):

        with h5.File(self.path, 'r') as data:

            rp_matrix_dir = self._config.get('profiles', 'rp_matrix').format(
                technology=technology, style=style)

            dataset_ids = list(data[rp_matrix_dir].keys())

            slices = []
            for _id in dataset_ids:
                slices.append(np.array(data[rp_matrix_dir][_id][...]))

        return np.concatenate(slices, axis=1), np.array(dataset_ids)

    def download_data(self):

        with self.log.section('Grabbing {} data (~15 minutes):'.format(
                self.species)):

            self.log.append('Downloading from database ...')

            try:
                self.fetch_from_cistrome(self.species, self.window_size)
            except error.URLError as err:
                raise AssertionError(
                    'ERROR: Cannot connect to cistrome.org for data (usually due to security settings on some servers)!\nView github pages for manual dataset install instructions.'
                )

            self.log.append('Done')