示例#1
0
def storeCommentsInMongoDB(comments):
    mongoClient = getMongoClient()
    collection = mongoClient.reddit_data.comments
    inserted = bulkPostUniqueToDatabase(collection, comments)
    if (inserted != None):
        print("Posted {:d} new comments to MongoDB".format(inserted))
    mongoClient.close()
def buildCryptoChangepointEvents(fromTime, toTime, currency):
    validateMongoEnvironment()
    client = getMongoClient()
    collection = client.cryptoposts.changepoints
    query = {
        "coin": currency,
        "changepoint": {
            "$gte": fromTime,
            "$lte": toTime
        }
    }

    returnedData = queryDatabase(collection, query)
    changepointset = set()
    fileredData = []
    for data in returnedData:
        data["_id"] = "null"
        if (data["changepoint"] not in changepointset):
            changepointset.add(data["changepoint"])
            fileredData.append(data)

    with open('cryptoApp/timelinePlotter/static/cryptoChangepoints.json',
              'w+') as outputFile:
        json.dump(fileredData, outputFile)

    client.close()
示例#3
0
def storeSubmissionsInMongoDB(submissions):
    mongoClient = getMongoClient()
    collection = mongoClient.reddit_data.submissions
    inserted = bulkPostUniqueToDatabase(collection, submissions)
    if (inserted != None):
        print("Posted {:d} new submissions to MongoDB".format(inserted))
    mongoClient.close()
示例#4
0
def runEventDetector(timelineId, peakWindowSize, sensitivity, startTime,
                     endTime):
    validateMongoEnvironment()
    client = getMongoClient()
    timeline = getTimeline(timelineId, client, startTime, endTime)
    events = findEvents(timeline, peakWindowSize, sensitivity)
    postEventsToDatabase(client, timelineId, events, peakWindowSize,
                         sensitivity)
    client.close()
示例#5
0
def runAggregator(startTime,
                  endTime,
                  tag,
                  granularity=HOUR,
                  submissionWeight=3,
                  submissionScoreWeight=1,
                  commentWeight=2,
                  commentScoreWeight=1):
    """ Gets data from mongoDB, aggregates it, and posts the results back to mongodb. Returns an unique identifier for the timeseries.
    The aggregation values are calculated based on the provided weights.

    Arguments:
      startTime {float} -- Unix time, start of range to query.
      endTime {float} -- Unix time, end of range to query.
      tag {string} -- The tag of the data. E.g 'bitcoin'

    Keyword Arguments:
      granularity {int} -- The step size in the time dimension to aggregate on (default: {HOUR})
      submissionWeight {int} -- The weight given to the existence of a submission (default: {3})
      submissionScoreWeight {int} -- The weight given to the score of a submission (default: {1})
      commentWeight {int} -- The weight given to the existence of a comment (default: {2})
      commentScoreWeight {int} -- The weight given to the score of a comment (default: {1})

    Returns:
      string -- uuid used to tag the timeseries. Corresponds to the seriesId field in mongoDB.
    """

    validateMongoEnvironment()
    client = getMongoClient()
    print("Aggregating data about {} in the time interval [{},{}]".format(
        tag, startTime, endTime))

    (aggregationId,
     aggregation) = getAggregation(client,
                                   startTime,
                                   endTime,
                                   tag,
                                   granularity=granularity,
                                   submissionWeight=submissionWeight,
                                   submissionScoreWeight=submissionScoreWeight,
                                   commentWeight=commentWeight,
                                   commentScoreWeight=commentScoreWeight)
    aggregationCollection = client.reddit_data.aggregation
    bulkPostToDatabase(aggregationCollection, aggregation)
    postTimelineIndexToDatabase(client, aggregationId, startTime, endTime, tag,
                                granularity, submissionWeight,
                                submissionScoreWeight, commentWeight,
                                commentScoreWeight)

    client.close()
    return aggregationId
def buildCryptoDataSeries(fromTime, toTime, currency):
    validateMongoEnvironment()
    client = getMongoClient()
    collection = client.cryptoposts.crypto
    query = {"coin": currency, "time": {"$gte": fromTime, "$lt": toTime}}

    returnedData = queryDatabase(collection, query)
    for data in returnedData:
        data["_id"] = "null"

    with open('cryptoApp/timelinePlotter/static/crypto.json',
              'w+') as outputFile:
        json.dump(returnedData, outputFile)

    client.close()
    return
def buildSocialMediaSeries(seriesId):
    validateMongoEnvironment()
    client = getMongoClient()
    buildTimeline(client, seriesId)
    buildSocialMediaEvents(client, seriesId)
    client.close()
示例#8
0
from cryptoApp.mongoService.setup import validateMongoEnvironment, getMongoClient
from cryptoApp.constants.unixTime import DAY
from time import mktime
from datetime import datetime


def deleteInRange(collection, startTime, endTime):
    query = {"timestamp": {"$gte": startTime, "$lte": endTime}}

    return collection.delete_many(query)


# YYYY MM DD HH MM SS
days = 10
startTime = int(mktime(datetime(2017, 12, 11, 00, 00, 00).timetuple()))
endTime = startTime + days * DAY

validateMongoEnvironment()
client = getMongoClient()

print(
    "deleted: ",
    deleteInRange(client.reddit_data.comments, startTime,
                  endTime).deleted_count)
print(
    "deleted: ",
    deleteInRange(client.reddit_data.submissions, startTime,
                  endTime).deleted_count)

client.close()