Пример #1
0
def find(db: Database, match_time: bool = False):
    dups = db.aggregate([{
        "$group": {
            "_id": "$hash",
            "total": {
                "$sum": 1
            },
            "file_size": {
                "$max": "$file_size"
            },
            "items": {
                "$push": {
                    "file_name": "$_id",
                    "file_size": "$file_size",
                    "image_size": "$image_size",
                    "capture_time": "$capture_time"
                }
            }
        }
    }, {
        "$match": {
            "total": {
                "$gt": 1
            }
        }
    }, {
        "$sort": {
            "file_size": -1
        }
    }])

    if match_time:
        dups = (d for d in dups if same_time(d))

    return list(dups)
    def getTweetsTimeSeries(self, mongoDbCollection : Database, stockTicker, fromDate, toDate):
        raw_tweets = mongoDbCollection.aggregate([
            {"$match": {"stockTicker": stockTicker, "date": {"$gte": fromDate, "$lt": toDate}}},
            {"$group": { "_id": {"day": {"$dayOfMonth": "$date"}, "month": {"$month": "$date"}, "year": {"$year": "$date"}},
            "count": {"$sum": 1}}}])
        raw_tweets = list(raw_tweets)

        date_count_tweets = []
        # extract tweets to be in list of dictionaries of form date and tweets
        # TODO upgrade mongo DB to 3.6 to do a projection with $dateFromParts
        for tweet in raw_tweets:
            entry = {}
            entry['date'] = datetime(year=tweet["_id"]["year"], month=tweet["_id"]["month"], day=tweet["_id"]["day"])
            entry['tweets'] = tweet['count']
            date_count_tweets.append(entry)
        date_count_tweets = sorted(date_count_tweets, key=lambda k: k['date'])

        return date_count_tweets