Python MapReduce примеры использования

Язык программирования: Python

Пространство имен/Пакет: model.resource

Класс/Тип: MapReduce

Примеров на hotexamples.com: 4

Python MapReduce - 4 примера найдено. Это лучшие примеры Python кода для model.resource.MapReduce, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

all(3)

delete(1)

put(1)

Пример #1

Показать файл

Файл: search.py Проект: Zarmakuizz/articleanalysis

def getPaperByWords(words, docNumber = 10):
    '''Get a list of articles for a given request.
    :param words: list of words for the query.
    :param docNumber: count of max nomber of results to get.
    :returns: a dictionnary of [nameArticle=>table count[,]]. example of use: data["Article"] = [42, 2]'''
    article = Article.all()
    data = collections.defaultdict(list)
    for oneArticle in article:
        nbWord = 0
        nbOccur = []
        for strWord in words:
            mapReduce = MapReduce.all()
            mapReduce.filter('keyArticle = ', oneArticle)
            mapReduce.filter('keyWord =', strWord)
            if mapReduce.count() > 0:
                mR = mapReduce.get()
                nbWord += mR.count
                nbOccur.append(mR.count)
            else :
                nbOccur.append(0)
        if nbWord != 0 :
            data[oneArticle.name] = nbOccur
        
    # Sort the results based on the sum of each word's occurences
    sortedList = data.items()
    sortedList.sort(key=lambda x: sum(x[1]), reverse=True)
    return OrderedDict(sortedList)

Пример #2

Показать файл

Файл: search.py Проект: Zarmakuizz/articleanalysis

def getWordsMostFreqByAuthor(authorName, wordNumber = 10):
    ''' Get the keyword's stats for a given author.
    :param authorName: The asked author's name.
    :param wordNumber: the max number of results.
    :returns: a dictionnary of [word=>count]. example of use: data["lol"] = 42'''
    author = Author.all()
    author.filter('name = ', authorName)
    artiAuths = ArtiAuth.all()
    artiAuths.filter('keyAuthor = ', author.get())
    data = collections.defaultdict(list)
    for article in artiAuths :
        mapReduces = MapReduce.all()
        mapReduces.filter('keyArticle = ', article.keyArticle)
        for mR in mapReduces:
            try:
                if data[mR.keyWord]:
                    data[mR.keyWord] += mR.count
                else:
                    data[mR.keyWord] = mR.count
            except ReferencePropertyResolveError :
                print 'Pas de reference word'
    
    # Sort the results based on the sum of each word's occurences
    sortedList = data.items()
    sortedList.sort(key=lambda x: x[1], reverse=True)
    return OrderedDict(sortedList[0:wordNumber-1])

Пример #3

Показать файл

Файл: storeData.py Проект: Zarmakuizz/articleanalysis

def saveMapReduce(namefic):
    """
        Converted the pdf file in text
        Do the mapper and reduce in the text
        Get the references cited in the article
        Save data Author, Article, ArtCitedBib, MapReduce, Master
        :param namefic : the name file
    """
    # save_pdf(namefic)
    fic = convert_pdf_to_txt(namefic)

    dataDict = mapper(fic)
    dataDict = reducer(dataDict)

    lines = re.split(r"\n", fic)

    authorStr = re.sub(r"[^a-zA-Z\s]", " ", lines[4])
    author = Author(name=authorStr)
    author.put()

    titre = re.sub(r"[^a-zA-Z\s]", " ", lines[0] + lines[1])
    titre = titre.strip()
    article = Article(name=titre, fileName=namefic)
    article.put()

    getReferences(fic, article)

    artiAuth = ArtiAuth(keyAuthor=author, keyArticle=article)
    artiAuth.put()

    for cle in dataDict.keys():
        mapReduce = MapReduce(keyWord=cle, keyArticle=article, count=dataDict[cle])
        mapReduce.put()

        checkMaster = Master.all()
        checkMaster.filter("keyWord =", cle)
        if checkMaster.count() > 0:
            master = checkMaster.get()
            master.count = master.count + dataDict[cle]
        else:
            master = Master(keyWord=cle, count=dataDict[cle])
        master.put()

Пример #4

Показать файл

Файл: storeData.py Проект: Zarmakuizz/articleanalysis

def deleteData():
    """
        Delete all the data
    """
    masters = Master.all()
    for master in masters:
        Master.delete(master)

    mapReduces = MapReduce.all()
    for mapReduce in mapReduces:
        MapReduce.delete(mapReduce)

    articles = Article.all()
    for article in articles:
        Article.delete(article)

    authors = Author.all()
    for author in authors:
        Author.delete(author)

    artCitedBibs = ArtCitedBib.all()
    for artCitedBib in artCitedBibs:
        ArtCitedBib.delete(artCitedBib)