else: retweetUser = None text = rawText tweetData = [text, user, retweetUser, timestr2timestamp(date)] tweets.append(tweetData) return tweets def timestr2timestamp(timestr): return calendar.timegm(parse(timestr).timetuple()) def findRelevantData2(doc): for source in doc['sources']: if source['name'] == 'mendeley': events = source['events'] if len(events) != 0: print "publication_outlet: " + str(events.get('publication_outlet', None)) print "issue: " + str(events.get('issue', None)) print "type: " + str(events.get('type', None)) print "volume: " + str(events.get('volume', None)) print "year: " + str(events.get('year', None)) print "" <<<<<<< HEAD doForEachPlosDoc(findRelevantData, verbose=True) ======= doForEachDocInPath("/home/simon/data", findRelevantData, verbose=True) >>>>>>> c1d847f9933d66a47795fd212c3631dbcb27ee29 file.close()
import json from os import listdir from os.path import isfile, join from os.path import basename from main.util.common import plosDataFiles, plosDataBaseDir, readAsJson, writeJsonToData, doForEachPlosDoc users = [] def getRelevantData(plosDoc): global users sources = plosDoc['sources'] for source in sources: if source['name'] == 'twitter': events = source['events'] for event in events: user = event['event']['user'] users.append(user) doForEachPlosDoc(getRelevantData) writeJsonToData(users, "users.json")