def getArticleCited(): '''Give all articles cited :returns: a list of object artCitedBib ''' artCitedBib = ArtCitedBib.all() data = [] for artiCited in artCitedBib: data.append(artiCited) return data
def getArtCitedBiblio(wordNumber = 10): '''Give name all articles cited :param wordNumber: the max number of results. :return name of all the article cited, order by number of occurrence ''' artCitedBib = ArtCitedBib.all() dataDict = [] results = artCitedBib.fetch(limit=wordNumber) for artCited in results: dataDict.append(artCited.nameArticle) return dataDict
def getArtMostFreqCited(wordNumber = 10): '''Give article most frequently cited :param wordNumber: the max number of results. :returns: a list of object artCitedBib ''' artCitedBib = ArtCitedBib.all() artCitedBib.order('-count') dataDict = [] results = artCitedBib.fetch(limit=wordNumber) for artCited in results: dataDict.append(artCited) return dataDict
def getArtCitedFromArt(nameArt): ''' Give articles cited in an article :param nameArt: name of the article :returns: list of names of articles ''' article = Article.all() article.filter('name =', nameArt) result = article.get() artCitedBib = ArtCitedBib.all() artCitedBib.filter('keyArticle = ', result) data = [] for artiCited in artCitedBib: data.append(artiCited.nameArticle) return data
def getReferences(fic, articlePrinc): """ Get the references of the article """ fic = fic.replace("\n", " ") objFic = re.match(r"(.*)REFERENCES(.*)", fic) if objFic: references = re.split("(\[[0-9]+\])", objFic.group(2)) for oneRef in references: sepAuthName = oneRef.split(",") authorsArt = [] for l in sepAuthName: l = l.strip() matchAuth = re.match(r"([A-Z]\..*)", l) if matchAuth: author = re.sub(r"[^a-zA-Z\s\.]", "", matchAuth.group(1)) authorsArt.append(author) else: derMatchAuth = re.match(r" and (.*)", l) if derMatchAuth: author = re.sub(r"[^a-zA-Z\s\.]", "", derMatchAuth.group(1)) authorsArt.append(author) else: nameMatch = re.match(r"(.*)\..*", l) if nameMatch: nameArt = nameMatch.group(1) else: nameArt = l nameArt = re.sub(r"[^a-zA-Z\s]", "", nameArt) if nameArt == "" or len(authorsArt) == 0: break articleCitedBib = ArtCitedBib.all() articleCitedBib.filter("nameArticle =", nameArt) if articleCitedBib.count() > 0: artCitedBib = articleCitedBib.get() artCitedBib.count = artCitedBib.count + 1 else: artCitedBib = ArtCitedBib( keyArticle=articlePrinc, nameArticle=nameArt, authors=authorsArt, count=1 ) artCitedBib.put() break else: print "pas de references"
def deleteData(): """ Delete all the data """ masters = Master.all() for master in masters: Master.delete(master) mapReduces = MapReduce.all() for mapReduce in mapReduces: MapReduce.delete(mapReduce) articles = Article.all() for article in articles: Article.delete(article) authors = Author.all() for author in authors: Author.delete(author) artCitedBibs = ArtCitedBib.all() for artCitedBib in artCitedBibs: ArtCitedBib.delete(artCitedBib)