示例#1
0
文件: counts.py 项目: aashish24/lisc
    def run_collection(self,
                       db='pubmed',
                       field='TIAB',
                       api_key=None,
                       logging=None,
                       directory=None,
                       verbose=False):
        """Collect co-occurrence data.

        Parameters
        ----------
        db : str, optional, default: 'pubmed'
            Which database to access from EUtils.
        field : str, optional, default: 'TIAB'
            Field to search for term in.
            Defaults to 'TIAB', which is Title/Abstract.
        api_key : str, optional
            An API key for a NCBI account.
        logging : {None, 'print', 'store', 'file'}, optional
            What kind of logging, if any, to do for requested URLs.
        directory : str or SCDB object, optional
            Folder or database object specifying the save location.
        verbose : bool, optional, default: False
            Whether to print out updates.
        """

        # Run single list of terms against themselves, in 'square' mode
        if not self.terms['B'].has_data:
            self.square = True
            self.counts, self.terms[
                'A'].counts, self.meta_data = collect_counts(
                    terms_a=self.terms['A'].terms,
                    inclusions_a=self.terms['A'].inclusions,
                    exclusions_a=self.terms['A'].exclusions,
                    db=db,
                    field=field,
                    api_key=api_key,
                    logging=logging,
                    directory=directory,
                    verbose=verbose)

        # Run two different sets of terms
        else:
            self.square = False
            self.counts, term_counts, self.meta_data = collect_counts(
                terms_a=self.terms['A'].terms,
                inclusions_a=self.terms['A'].inclusions,
                exclusions_a=self.terms['A'].exclusions,
                terms_b=self.terms['B'].terms,
                inclusions_b=self.terms['B'].inclusions,
                exclusions_b=self.terms['B'].exclusions,
                db=db,
                field=field,
                api_key=api_key,
                logging=logging,
                directory=directory,
                verbose=verbose)
            self.terms['A'].counts, self.terms['B'].counts = term_counts
示例#2
0
###################################################################################################

# Import function to collect data, and helper functions to analyze co-occurrence data
from lisc.collect import collect_counts
from lisc.analysis.counts import compute_normalization, compute_association_index

###################################################################################################

# Set some terms to search for
terms_a = [['protein'], ['gene']]
terms_b = [['heart'], ['lung']]

###################################################################################################

# Collect co-occurrence data across a single list of terms
coocs, term_counts, meta_dat = collect_counts(terms_a, db='pubmed', verbose=True)

###################################################################################################

# Check how many articles were found for each combination
print(coocs)

###################################################################################################

# Print out how many articles found for each term
for term, count in zip(terms_a, term_counts):
    print('{:12} : {}'.format(term[0], count))

###################################################################################################
#
# When given a single set of terms, the function collects counts of each term
示例#3
0
# Import function to collect data, and helper functions to analyze co-occurrence data
import lisc
from lisc.collect import collect_counts
from lisc.analysis.counts import compute_normalization, compute_association_index

# Set some terms to search for
terms_a = [['Salmonella enterica'], ['Escherichia coli'],['Sus scrofa'],['H**o sapiens'],['Mus musculus']]

# Collect 'counts' (co-occurrence data) - across a single list of terms
coocs, term_counts, meta_dat = collect_counts(terms_a, db='nucleotide', verbose=True)

# Check how many articles were found for each combination
print(coocs)

# Print out how many articles found for each term
for term, count in zip(terms_a, term_counts):
    print('{:12} : {}'.format(term[0], count))
示例#4
0
文件: counts.py 项目: lisc-tools/lisc
    def run_collection(self,
                       db='pubmed',
                       field='TIAB',
                       api_key=None,
                       logging=None,
                       directory=None,
                       verbose=False,
                       **eutils_kwargs):
        """Collect co-occurrence data.

        Parameters
        ----------
        db : str, optional, default: 'pubmed'
            Which database to access from EUtils.
        field : str, optional, default: 'TIAB'
            Field to search for term in.
            Defaults to 'TIAB', which is Title/Abstract.
        api_key : str, optional
            An API key for a NCBI account.
        logging : {None, 'print', 'store', 'file'}, optional
            What kind of logging, if any, to do for requested URLs.
        directory : str or SCDB, optional
            Folder or database object specifying the save location.
        verbose : bool, optional, default: False
            Whether to print out updates.
        **eutils_kwargs
            Additional settings for the EUtils API.

        Examples
        --------
        Collect co-occurrence data from added terms, across one set of terms:

        >>> counts = Counts()
        >>> counts.add_terms(['frontal lobe', 'temporal lobe', 'parietal lobe', 'occipital lobe'])
        >>> counts.run_collection() # doctest: +SKIP

        Collect co-occurrence data from added terms, across two sets of terms:

        >>> counts = Counts()
        >>> counts.add_terms(['frontal lobe', 'temporal lobe', 'parietal lobe', 'occipital lobe'])
        >>> counts.add_terms(['attention', 'perception', 'cognition'], dim='B')
        >>> counts.run_collection() # doctest: +SKIP
        """

        # Run single list of terms against themselves, in 'square' mode
        if not self.terms['B'].has_terms:
            self.square = True
            self.counts, self.terms[
                'A'].counts, self.meta_data = collect_counts(
                    terms_a=self.terms['A'].terms,
                    inclusions_a=self.terms['A'].inclusions,
                    exclusions_a=self.terms['A'].exclusions,
                    labels_a=self.terms['A'].labels,
                    db=db,
                    field=field,
                    api_key=api_key,
                    logging=logging,
                    directory=directory,
                    verbose=verbose,
                    **eutils_kwargs)

        # Run two different sets of terms
        else:
            self.square = False
            self.counts, term_counts, self.meta_data = collect_counts(
                terms_a=self.terms['A'].terms,
                inclusions_a=self.terms['A'].inclusions,
                exclusions_a=self.terms['A'].exclusions,
                labels_a=self.terms['A'].labels,
                terms_b=self.terms['B'].terms,
                inclusions_b=self.terms['B'].inclusions,
                exclusions_b=self.terms['B'].exclusions,
                labels_b=self.terms['B'].labels,
                db=db,
                field=field,
                api_key=api_key,
                logging=logging,
                directory=directory,
                verbose=verbose,
                **eutils_kwargs)
            self.terms['A'].counts, self.terms['B'].counts = term_counts