Exemplo n.º 1
0
def getWordsMostFreqByAuthor(authorName, wordNumber = 10):
    ''' Get the keyword's stats for a given author.
    :param authorName: The asked author's name.
    :param wordNumber: the max number of results.
    :returns: a dictionnary of [word=>count]. example of use: data["lol"] = 42'''
    author = Author.all()
    author.filter('name = ', authorName)
    artiAuths = ArtiAuth.all()
    artiAuths.filter('keyAuthor = ', author.get())
    data = collections.defaultdict(list)
    for article in artiAuths :
        mapReduces = MapReduce.all()
        mapReduces.filter('keyArticle = ', article.keyArticle)
        for mR in mapReduces:
            try:
                if data[mR.keyWord]:
                    data[mR.keyWord] += mR.count
                else:
                    data[mR.keyWord] = mR.count
            except ReferencePropertyResolveError :
                print 'Pas de reference word'
    
    # Sort the results based on the sum of each word's occurences
    sortedList = data.items()
    sortedList.sort(key=lambda x: x[1], reverse=True)
    return OrderedDict(sortedList[0:wordNumber-1])
Exemplo n.º 2
0
def saveMapReduce(namefic):
    """
        Converted the pdf file in text
        Do the mapper and reduce in the text
        Get the references cited in the article
        Save data Author, Article, ArtCitedBib, MapReduce, Master
        :param namefic : the name file
    """
    # save_pdf(namefic)
    fic = convert_pdf_to_txt(namefic)

    dataDict = mapper(fic)
    dataDict = reducer(dataDict)

    lines = re.split(r"\n", fic)

    authorStr = re.sub(r"[^a-zA-Z\s]", " ", lines[4])
    author = Author(name=authorStr)
    author.put()

    titre = re.sub(r"[^a-zA-Z\s]", " ", lines[0] + lines[1])
    titre = titre.strip()
    article = Article(name=titre, fileName=namefic)
    article.put()

    getReferences(fic, article)

    artiAuth = ArtiAuth(keyAuthor=author, keyArticle=article)
    artiAuth.put()

    for cle in dataDict.keys():
        mapReduce = MapReduce(keyWord=cle, keyArticle=article, count=dataDict[cle])
        mapReduce.put()

        checkMaster = Master.all()
        checkMaster.filter("keyWord =", cle)
        if checkMaster.count() > 0:
            master = checkMaster.get()
            master.count = master.count + dataDict[cle]
        else:
            master = Master(keyWord=cle, count=dataDict[cle])
        master.put()
Exemplo n.º 3
0
def listAuthor():
    '''Returns a list of all authors.
    :returns: a list of strings being the authors's names '''
    authors = Author.all()
    data = []
    logging.info("scout authors")
    for aut in authors:
        logging.info(aut.name)
        if(aut.name == ' '):
            continue # Filtrer les auteurs vides
        data.append(aut.name)
    return data
Exemplo n.º 4
0
def getAuthorsByWords(words):
    '''Get authors by their names.
    :param words: A string containing words.
    :returns: a list of Authors.'''
    dico = words.split(' ')
    query = Author.all()
    results = []
    for author in query:
        for w in dico:
            if (w in author.name):
                results.append(author)
    return list(set(results))
Exemplo n.º 5
0
def deleteData():
    """
        Delete all the data
    """
    masters = Master.all()
    for master in masters:
        Master.delete(master)

    mapReduces = MapReduce.all()
    for mapReduce in mapReduces:
        MapReduce.delete(mapReduce)

    articles = Article.all()
    for article in articles:
        Article.delete(article)

    authors = Author.all()
    for author in authors:
        Author.delete(author)

    artCitedBibs = ArtCitedBib.all()
    for artCitedBib in artCitedBibs:
        ArtCitedBib.delete(artCitedBib)
Exemplo n.º 6
0
def getArticleByAuthor (authorName, artNumber = 10) :
    '''Get the articles from a given author.
    :param authorName: a string which is the author's name.
    :param artNumber: the max count of results to give in the returned list.
    :returns: a list of string being the article's names.'''
    author = Author.all()
    author.filter('name = ', authorName)
    artiAuths = ArtiAuth.all()
    artiAuths.filter('keyAuthor = ', author.get())
    data = []
    results = artiAuths.fetch(limit=artNumber)
    for art in results:
        data.append(art.keyArticle.name)
    return data