Python Elsevier_Corpus_Reader примеры использования

Язык программирования: Python

Пространство имен/Пакет: CorpusReader

Примеров на hotexamples.com: 9

Python Elsevier_Corpus_Reader - 9 примеров найдено. Это лучшие примеры Python кода для CorpusReader.Elsevier_Corpus_Reader, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

CorpuKfoldLoader(5)

ScopusProcessedCorpusReader(5)

ScopusRawCorpusReader(4)

Пример #1

Показать файл

 def setUp(self) -> None:
     self.corpus = Elsevier_Corpus_Reader.ScopusProcessedCorpusReader(
         "Corpus/Processed_corpus/")
     self.loader = Elsevier_Corpus_Reader.CorpuKfoldLoader(self.corpus,
                                                           n_folds=12,
                                                           shuffle=False)
     self.subset = next(self.loader.fileids(test=True))

Пример #2

Показать файл

Файл: test_Corpus_Cluster.py Проект: aham1203/fictional-barnacle

 def setUp(self) -> None:
     self.corpus = Elsevier_Corpus_Reader.ScopusProcessedCorpusReader(
         "Corpus/Processed_corpus/")
     self.loader = Elsevier_Corpus_Reader.CorpuKfoldLoader(self.corpus,
                                                           n_folds=12,
                                                           shuffle=False)
     self.subset = next(self.loader.fileids(test=True))
     self.model = Pipeline([("norm", Corpus_Vectorizer.TitleNormalizer()),
                            ("vect", Corpus_Vectorizer.OneHotVectorizer()),
                            ('clusters',
                             Corpus_Cluster.HierarchicalClustering())])

Пример #3

Показать файл

Файл: Elsivier_Pipeline.py Проект: aham1203/fictional-barnacle

def process_corpus():
    corp = Elsevier_Corpus_Reader.ScopusRawCorpusReader(
            "Corpus/Processed_corpus/")

    formatter = Elsivier_Corpus_Pre_Processor.PickledCorpusPreProcessor(corp)

    formatter.transform()

Пример #4

Показать файл

Файл: Author_Networks.py Проект: aham1203/fictional-barnacle

    def __init__(self, path):
        """
        Initialise the author network
        Parameters
        ----------
        path : string like
            path to corpus

        """
        self.path = path
        self.corpus = Elsevier_Corpus_Reader.ScopusRawCorpusReader(path)

Пример #5

Показать файл

Файл: Plotting_Tools.py Проект: aham1203/fictional-barnacle

def document_feature_counter(path,
                             feature='pub_date',
                             sort=False,
                             how='count',
                             **kwargs) -> dict:
    """
    utility for counting the number of instances observed for a given feature
    in the document meta data
    Parameters
    ----------
    path: str
        path to the corpus
    feature: str
        feature to be counted
            'pub_date' - date of publication
            'pub_type' - type of document, eg. Article, Review ...
            'publication' - journal in which the document is published
    sort: bool
        should the output dictionary be sorted or not
    how: str
        if the output should be sorted, how should it be sorted
        'class' - sorted by the class, requires a sortable class, eg. dates
        'count' - sorted by the number of counts of a class
    kwargs:
        optional arguments that can be piped through to an underlying corpus
        reader method.

    Returns
    -------
        dict like object, either a Counter object or an OrderedDict
    """
    corp = Elsevier_Corpus_Reader.ScopusRawCorpusReader(path)
    feature_map = {'pub_date': corp.pub_date,
                   # 'pub_type': corp.pub_type,
                   'publication': corp.publication,
                   'author_count': corp.author_count}
    sort_how_map = {'class': 0,
                    'count': 1}
    if kwargs:
        data = Counter(feature_map[feature](**kwargs))
    else:
        data = Counter(feature_map[feature]())
    if not sort:
        return data
    else:
        sorted_data = sorted(data.items(), key=lambda kv: kv[sort_how_map[
            how]])
        return OrderedDict(sorted_data)

Пример #6

Показать файл

Файл: Cluster_Plotting.py Проект: aham1203/fictional-barnacle

def plot_clusters(X, y, **kwargs) -> None:

    fig, ax = plt.subplots(figsize=(10, 5))
    ax = sns.scatterplot(x=X[:,0], y=X[:,1], hue=y)
    plt.tight_layout()
    plt.show()


if __name__ == '__main__':
    from CorpusReader import Elsevier_Corpus_Reader
    from CorpusProcessingTools import Corpus_Vectorizer
    from CorpusProcessingTools import Corpus_Cluster


    corpus = Elsevier_Corpus_Reader.ScopusProcessedCorpusReader(
        "Corpus/Processed_corpus/")

    loader = Elsevier_Corpus_Reader.CorpuKfoldLoader(corpus, 100, shuffle=False)
    subset = next(loader.fileids(test=True))

    docs = list(corpus.title_tagged(fileids=subset))

    # # Plot hierarchical clustering
    # model = Pipeline([
    #     ("norm", Corpus_Vectorizer.TitleNormalizer()),
    #     ("vect", Corpus_Vectorizer.OneHotVectorizer()),
    #     ('clusters', Corpus_Cluster.HierarchicalClustering())
    # ])
    #
    # clusters = model.fit_transform(docs)
    # labels = model.named_steps['clusters'].labels

Пример #7

Показать файл

Файл: Elsivier_Pipeline.py Проект: aham1203/fictional-barnacle

    formatter = Elsivier_Corpus_Pre_Processor.PickledCorpusPreProcessor(corp)

    formatter.transform()


def plot_features():
    AN = Author_Networks.AuthorNetworks("Corpus/Processed_corpus/")
    # AN.plot_co_author_network(categories='soft robot/2000')
    AN.co_author_network_bokeh_better(categories=['soft robot/2000',
                                                  'soft robot/2001',
                                                  'soft robot/2002'])


if __name__ == '__main__':
    # step 1: download the raw corpus from elsivier
    # download_corpus()

    # step 2: reformat the corpus for faster manipulation
    # reformat_corpus()

    # step 3: reformat the corpus for faster manipulation
    process_corpus()

    # step 4: load the corpus reader
    corp = Elsevier_Corpus_Reader.ScopusProcessedCorpusReader(
        "Corpus/Processed_corpus/")

    # step 5: plot author connectivity
    # plot_features()

Пример #8

Показать файл

Файл: test_elsivierCorpusReader.py Проект: aham1203/fictional-barnacle

 def setUp(self) -> None:
     self.corp = Elsevier_Corpus_Reader.ScopusRawCorpusReader(
         "Corpus/Processed_corpus/")

Пример #9

Показать файл

Файл: test_elsivierCorpusReader.py Проект: aham1203/fictional-barnacle

 def setUp(self) -> None:
     self.corp = Elsevier_Corpus_Reader.ScopusProcessedCorpusReader(
         "Corpus/Processed_corpus/")
     self.loader = Elsevier_Corpus_Reader.CorpuKfoldLoader(self.corp,
                                                           n_folds=12,
                                                           shuffle=False)