def mendeleyDisciplines(): def generalCondition(doc): time = doc.publicationDatetime() return ( (time.year == 2012 and time.month >= 6 and time.month <= 8) or (time.year > 2012) ) and doc.mendeleyDisciplines != None def domainDocs(domain): return [doc for doc in consideredDocs if generalCondition(doc) and domain in doc.mendeleyDisciplines] consideredDocs = list(filter(lambda doc: generalCondition(doc), SimpleDoc.getall())) totalDocs = len(consideredDocs) distinctDomains = set() for doc in consideredDocs: distinctDomains |= set(doc.mendeleyDisciplines) domainData = [] for domain in distinctDomains: d = domainDocs(domain) numDocs = len(d) meanTweets = numpy.mean([doc.numTweets() for doc in d]) domainData.append((domain, numDocs, ("%2.2f" % (float(numDocs)*100 / totalDocs)) + "\\%", "%2.2f" % meanTweets )) domainDataSorted = sorted(domainData, key=lambda x: x[1], reverse=True) compileTex( simpleTabular(["Disziplin", "\\#Dokumente", "Anteil", "AvgTweets"], domainDataSorted, orientation="lrrr"), figurePath("mendeleyDisciplines2.pdf") )
def groupByJournalAndVolume(): issns = { } docs = list(SimpleDoc.getall()) for doc in docs: issns[doc.issn] = issns.get(doc.issn, 0) + 1 validIssns = map(lambda kv: kv[0], filter(lambda item: item[1]>5 and item[0] != None, issns.items())) groups = { } for doc in docs: if doc.issn in validIssns: groupList = groups.get((doc.issn, doc.volume), []) # groupList = groups.get(doc.issn, []) groupList.append(doc) groups[(doc.issn, doc.volume)] = groupList # groups[doc.issn] = groupList validGroups = filter(lambda group: len(group[1]) > 5, groups.items()) # validGroups = groups.items() correlationValues = [] for ident, docs in validGroups: docTweets = map(lambda doc: doc.numTweets(), docs) docCrossrefs = map(lambda doc: doc.numCrossrefs(), docs) korr = None # docTweetCrossrefRatios = map(lambda doc: [float(doc.numTweets()) / doc.numCrossrefs() if doc.numCrossrefs() != 0 else float('nan')], docs) maxYear = max(map(lambda doc: doc.publicationDatetime().year, docs)) minYear = min(map(lambda doc: doc.publicationDatetime().year, docs)) yearRange = None if maxYear == minYear: yearRange = str(minYear) else: yearRange = str(minYear) + "-" + str(maxYear) try: korr = "%2.3f" % korrelationskoeffizient(docTweets, docCrossrefs) except ZeroDivisionError: korr = "NaN" # correlationValues.append([ident[0], ident[1], len(docs), "%2.2f" % numpy.mean(docTweets), "%2.2f" % numpy.std(docTweets), korr, yearRange]) correlationValues.append([ident[0], ident[1], len(docs), "%2.2f" % numpy.mean(docTweets), "%2.2f" % numpy.mean(docCrossrefs), "%2.2f" % (float(numpy.sum(docTweets))/numpy.sum(docCrossrefs)), yearRange]) # correlationValues.append([ident, len(docs), "%2.2f" % numpy.mean(docTweets), "%2.2f" % numpy.std(docTweets), korr]) correlationValues = sorted(correlationValues, key=lambda x: x[0]) compileTex( # simpleTabular(["ISSN", "Volume", "\\#Docs", "AVG Tweets", "StdDev", "korr", "Years"], correlationValues, orientation="llrrrrl"), simpleTabular(["ISSN", "Volume", "\\#Docs", "AVG T", "AVG C", "T/C", "Years"], correlationValues, orientation="llrrrrl"), # simpleTabular(["ISSN", "\\#Docs", "AVG Tweets", "StdDev", "korr", "Years"], correlationValues, orientation="lrrrrl"), figurePath("correlationsInJournals2.pdf") )