def buildCryptoChangepointEvents(fromTime, toTime, currency):
    validateMongoEnvironment()
    client = getMongoClient()
    collection = client.cryptoposts.changepoints
    query = {
        "coin": currency,
        "changepoint": {
            "$gte": fromTime,
            "$lte": toTime
        }
    }

    returnedData = queryDatabase(collection, query)
    changepointset = set()
    fileredData = []
    for data in returnedData:
        data["_id"] = "null"
        if (data["changepoint"] not in changepointset):
            changepointset.add(data["changepoint"])
            fileredData.append(data)

    with open('cryptoApp/timelinePlotter/static/cryptoChangepoints.json',
              'w+') as outputFile:
        json.dump(fileredData, outputFile)

    client.close()
def buildTimeline(client, seriesId):
    query = {"seriesId": seriesId}
    collection = client.reddit_data.aggregation
    returnedData = queryDatabase(collection, query)

    for data in returnedData:
        data["_id"] = "null"

    with open('cryptoApp/timelinePlotter/static/timeline.json',
              'w+') as outputFile:
        json.dump(returnedData, outputFile)
示例#3
0
def getChangepoints(client, startTime, endTime, currency):
    collection = client.cryptoposts.changepoints
    query = {
        "coin": currency,
        "changepoint": {
            "$gte": startTime,
            "$lte": endTime
        }
    }

    return queryDatabase(collection, query)
def buildSocialMediaEvents(client, seriesId):
    query = {"seriesId": seriesId}
    collection = client.reddit_data.timeline_events
    returnedData = queryDatabase(collection, query)

    for data in returnedData:
        data["_id"] = "null"

    with open('cryptoApp/timelinePlotter/static/media_events.json',
              'w+') as outputFile:
        json.dump(returnedData, outputFile)
示例#5
0
def getMediaEvents(client, seriesId, startTime, endTime):
    collection = client.reddit_data.timeline_events
    query = {
        "seriesId": seriesId,
        "time": {
            "$gte": startTime,
            "$lte": endTime
        }
    }

    return queryDatabase(collection, query)
示例#6
0
def castChangepointValuesToNumbers(client):
    collection = client.cryptoposts.changepoints
    query = {
        "changepoint": {"$type": "string"}
    }

    changepoints = queryDatabase(collection, query)
    for changepoint in changepoints:
        changepoint["changepoint"] = int(changepoint["changepoint"])
        changepoint["end"] = int(changepoint["end"])
        changepoint["start"] = int(changepoint["start"])
        collection.update_one({'_id': changepoint["_id"]}, {"$set": changepoint}, upsert=False)
示例#7
0
def getTimeline(timelineId, client, startTime, endTime):
    collection = client.reddit_data.aggregation
    query = {
        "seriesId": timelineId,
        "startTime": {
            "$gte": startTime
        },
        "endTime": {
            "$lte": endTime
        }
    }

    return queryDatabase(collection, query)
def buildCryptoDataSeries(fromTime, toTime, currency):
    validateMongoEnvironment()
    client = getMongoClient()
    collection = client.cryptoposts.crypto
    query = {"coin": currency, "time": {"$gte": fromTime, "$lt": toTime}}

    returnedData = queryDatabase(collection, query)
    for data in returnedData:
        data["_id"] = "null"

    with open('cryptoApp/timelinePlotter/static/crypto.json',
              'w+') as outputFile:
        json.dump(returnedData, outputFile)

    client.close()
    return
示例#9
0
def getAggregation(mongoClient,
                   startTime,
                   endTime,
                   tag,
                   granularity=HOUR,
                   submissionWeight=3,
                   submissionScoreWeight=1,
                   commentWeight=2,
                   commentScoreWeight=1):
    """ Gets data from mongoDB and aggregates it based on the provided weights and granularity.

    Arguments:
      mongoClient {pymongo.client} -- A mongoDB client to use for querying
      startTime {float} -- Unix time, start of range to query.
      endTime {float} -- Unix time, end of range to query.
      tag {string} -- The tag of the data. E.g 'bitcoin'

    Keyword Arguments:
      granularity {int} -- The step size in the time dimension to aggregate on (default: {HOUR})
      submissionWeight {int} -- The weight given to the existence of a submission (default: {3})
      submissionScoreWeight {int} -- The weight given to the score of a submission (default: {1})
      commentWeight {int} -- The weight given to the existence of a comment (default: {2})
      commentScoreWeight {int} -- The weight given to the score of a comment (default: {1})

    Returns:
      (string, list)
      string -- The generated uuid used to tag the timeline, corresponding to the seriesId in mongoDB
      list -- The produced aggregated data
    """

    aggregations = []
    aggregationId = str(uuid.uuid1())
    slots = int((endTime - startTime) / granularity)
    currentSlot = 1

    while (startTime < endTime):
        if (currentSlot % 10 == 0):
            print("Aggregating slot {} of {}".format(currentSlot, slots))
        currentSlot += 1
        queryFromTime = startTime
        queryToTime = startTime + granularity

        submissionQuery = buildQuery(queryFromTime, queryToTime, tag)
        commentQuery = buildQuery(queryFromTime, queryToTime, tag)

        submissions = queryDatabase(mongoClient.reddit_data.submissions,
                                    submissionQuery)
        comments = queryDatabase(mongoClient.reddit_data.comments,
                                 commentQuery)

        commentAggregation = getCommentAggregation(comments, commentWeight)
        commentScoreAggregation = getCommentScoreAggregation(
            comments, commentScoreWeight)
        submissionAggregation = getSubmissionAggregation(
            submissions, submissionWeight)
        submissionScoreAggregation = getSubmissionScoreAggregation(
            submissions, submissionScoreWeight)
        weightedSum = commentAggregation + commentScoreAggregation + submissionAggregation + submissionScoreAggregation

        aggregation = {
            "startTime": queryFromTime,
            "endTime": queryToTime,
            "timeGranularity": granularity,
            "tag": tag,
            "seriesId": aggregationId,
            "comments": commentAggregation,
            "commentScores": commentScoreAggregation,
            "submissions": submissionAggregation,
            "submissionScores": submissionScoreAggregation,
            "sum": weightedSum
        }

        aggregations.append(aggregation)
        startTime = queryToTime

    return aggregationId, aggregations
示例#10
0
import pymongo
import sys

# YYYY MM DD HH MM SS
startTime = int(mktime(datetime(2017, 10, 1, 00, 00, 00).timetuple()))
endTime = int(mktime(datetime(2017, 10, 1, 23, 59, 59).timetuple()))
currency = BITCOIN
tag = cryptocurrencies[currency]["tag"]

if ("--scrape" in sys.argv):
    runScraper(currency, startTime, endTime, 200, 2)

client = getMongoClient()
collection = client.reddit_data.submissions

query = {
    "timestamp": {
        "$gte": startTime,
        "$lte": endTime
    },
    "subreddit": "Bitcoin"
}

results = queryDatabase(collection, query)

# Sort on score:
results = sorted(results, key=lambda k: k["score"], reverse=True)
pprint(results)

client.close()