Пример #1
0
    def get_rsvp_by_event(self, urls, filename="rsvp_events"):

        agg = Agg(self._mdb.pastEventsCollection())

        agg.addMatch({
            "batchID": self._batchID,
            "event.group.urlname": {
                "$in": urls
            }
        })

        if self._start_date or self._end_date:
            agg.addRangeMatch("event.time", self._start_date, self._end_date)

        agg.addGroup({
            "_id": "$event.group.urlname",
            "rsvp_count": {
                "$sum": "$event.yes_rsvp_count"
            }
        })

        if self._sorter:
            agg.addSort(self._sorter)

        if self._view:
            agg.create_view(self._mdb.database(), "rsvps_by_event_view")

        formatter = CursorFormatter(agg.aggregate(), filename, self._format)
        formatter.output(fieldNames=["_id", "rsvp_count"], limit=self._limit)

        if filename != "-":
            self._files.append(filename)
Пример #2
0
    def get_member_history(self, urls, filename=None):
        '''
        Got into every batch and see what the member count was for each group (URL) this uses all 
        the batches to get a history of a group.
        Range is used to select batches in this case via the "timestamp" field.
        '''
        audit = Audit(self._mdb)

        validBatches = list(audit.get_valid_batch_ids())

        agg = Agg(self._mdb.groupsCollection())

        if self._start_date or self._end_date:
            agg.addRangeMatch("timestamp", self._start_date, self._end_date)

        agg.addMatch({
            "batchID": {
                "$in": validBatches
            },
            "group.urlname": {
                "$in": urls
            }
        })

        agg.addProject({
            "_id": 0,
            "timestamp": 1,
            "batchID": 1,
            "urlname": "$group.urlname",
            "count": "$group.members"
        })

        #         agg.addGroup( { "_id"    : { "ts": "$timestamp", "batchID" : "$batchID" },
        #                         "groups" : { "$addToSet" : "$urlname" },
        #                         "count"  : { "$sum" : "$count"}})
        #
        # CursorFormatter( agg.aggregate()).output()

        if self._sorter:
            agg.addSort(self._sorter)

        if self._view:
            agg.create_view(self._mdb.database(), "groups_view")

        formatter = CursorFormatter(agg.aggregate(), filename, self._format)
        formatter.output(
            fieldNames=["timestamp", "batchID", "urlname", "count"],
            datemap=["timestamp"],
            limit=self._limit)

        if filename != "-":
            self._files.append(filename)
Пример #3
0
    def joined_by_year(self):

        agg_pipe = Agg(self._collection)
        agg_pipe.addMatch({"batchID": self._audit.get_last_valid_batch_id()})
        agg_pipe.addProject({
            "_id": 0,
            "member_id": "$member.member_id",
            "member_name": "$member.member_name",
            "year": {
                "$year": "$member.join_time"
            },
        })
        agg_pipe.addGroup({"_id": "$year", "total_registered": {"$sum": 1}})

        return agg_pipe.aggregate()
Пример #4
0
    def get_RSVP_history(self, urls, filename=None):
        '''
        Get the list of past events for groups (urls) and report on the date of the event
        and the number of RSVPs.
        '''

        agg = Agg(self._mdb.pastEventsCollection())

        agg.addMatch({"event.group.urlname": {"$in": urls}})

        if self._start_date or self._end_date:
            agg.addRangeMatch("event.time", self._start_date, self._end_date)

        agg.addProject({
            "timestamp": "$event.time",
            "event": "$event.name",
            "country": "$event.venue.country",
            "rsvp_count": "$event.yes_rsvp_count"
        })

        agg.addMatch({"timestamp": {"$type": "date"}})
        agg.addGroup({
            "_id": "$timestamp",
            #"event" : { "$addToSet" : { "event" : "$event", "country" : "$country" }},
            "rsvp_count": {
                "$sum": "$rsvp_count"
            }
        })

        if self._sorter:
            agg.addSort(self._sorter)

        if filename:
            self._filename = filename

        if self._view:
            agg.create_view(self._mdb.database(), "rsvps_view")

        formatter = CursorFormatter(agg.aggregate(), self._filename,
                                    self._format)
        filename = formatter.output(fieldNames=["_id", "rsvp_count"],
                                    datemap=["_id"],
                                    limit=self._limit)

        if filename != "-":
            self._files.append(filename)
Пример #5
0
    def getMembers(self, urls, filename=None):
        '''
        Get a count of the members for each group in "urls"
        Range doens't make sense here so its not used. If supplied it is ignored.
        '''

        agg = Agg(self._mdb.groupsCollection())

        agg.addMatch({
            "batchID": {
                "$in": [self._batchID]
            },
            "group.urlname": {
                "$in": urls
            }
        })

        agg.addProject({
            "_id": 0,
            "urlname": "$group.urlname",
            "country": "$group.country",
            "batchID": 1,
            "member_count": "$group.member_count"
        })

        if self._sorter:
            agg.addSort(self._sorter)

        if filename:
            self._filename = filename

        if self._view:
            agg.create_view(self._mdb.database(), "members_view")

        formatter = CursorFormatter(agg.aggregate(), self._filename,
                                    self._format)
        formatter.output(
            fieldNames=["urlname", "country", "batchID", "member_count"])
Пример #6
0
class Test(unittest.TestCase):
    def setUp(self):
        self._mdb = MUGAlyserMongoDB()
        self._agg = Agg(self._mdb.membersCollection())

    def tearDown(self):
        pass

    def testFormatter(self):
        self._agg.addMatch({"member.member_name": "Joe Drumgoole"})
        #print( "agg: %s" % self._agg )
        self._agg.addProject({
            "member.member_name": 1,
            "_id": 0,
            "member.join_time": 1,
            "member.city": 1,
        })
        cursor = self._agg.aggregate()
        prefix = "agg_test_"
        filename = "JoeDrumgoole"
        ext = "json"
        self._formatter = CursorFormatter(cursor,
                                          prefix=prefix,
                                          name=filename,
                                          ext=ext)
        self._formatter.output(fieldNames=[
            "member.member_name", "member.join_time", "member.city"
        ],
                               datemap=["member.join_time"])
        self.assertTrue(os.path.isfile(prefix + filename + "." + ext))
        os.unlink(prefix + filename + "." + ext)

    def testFieldMapper(self):

        doc = {"a": "b"}

        newdoc = CursorFormatter.fieldMapper(doc, ['a'])
        self.assertTrue(newdoc.has_key("a"))

        doc = {"a": "b", "c": "d", "e": "f"}
        newdoc = CursorFormatter.fieldMapper(doc, ['a', 'c'])
        self.assertTrue(newdoc.has_key("a"))
        self.assertTrue(newdoc.has_key("c"))
        self.assertFalse(newdoc.has_key("e"))

        doc = {"a": "b", "c": "d", "e": "f", "z": {"w": "x"}}

        newdoc = CursorFormatter.fieldMapper(doc, ['a', 'c', "z.w"])
        self.assertTrue(newdoc.has_key("a"))
        self.assertTrue(newdoc.has_key("c"))
        self.assertTrue(newdoc.has_key("z"))
        self.assertTrue(newdoc["z"].has_key("w"))
        self.assertFalse(newdoc.has_key("e"))

        doc = {"a": "b", "c": "d", "e": "f", "z": {"w": "x", "y": "p"}}

        newdoc = CursorFormatter.fieldMapper(doc, ['a', 'c', "z.w"])
        self.assertTrue(newdoc.has_key("a"))
        self.assertTrue(newdoc.has_key("c"))
        self.assertTrue(newdoc.has_key("z"))
        self.assertTrue(newdoc["z"].has_key("w"))
        self.assertFalse(newdoc.has_key("e"))
        self.assertFalse(newdoc['z'].has_key("y"))

        doc = {
            "a": "b",
            "c": "d",
            "e": "f",
            "z": {
                "w": "x",
                "y": "p"
            },
            "g": {
                "h": "i",
                "j": "k"
            }
        }

        newdoc = CursorFormatter.fieldMapper(doc, ['a', 'c', "z.w", "g.j"])
        self.assertTrue(newdoc.has_key("a"))
        self.assertTrue(newdoc.has_key("c"))
        self.assertTrue(newdoc.has_key("z"))
        self.assertTrue(newdoc["z"].has_key("w"))
        self.assertFalse(newdoc.has_key("e"))
        self.assertFalse(newdoc['z'].has_key("y"))
        self.assertTrue(newdoc.has_key("g"))
        self.assertTrue(newdoc['g'].has_key("j"))
        self.assertFalse(newdoc['g'].has_key("h"))

    def testNestedDict(self):
        d = Nested_Dict({})
        self.assertFalse(d.has_key("hello"))
        d.set_value("hello", "world")
        self.assertTrue(d.has_key("hello"))
        self.assertEqual(d.get_value("hello"), "world")
        self.assertRaises(KeyError, d.get_value, "Bingo")

        d.set_value("member.name", "Joe Drumgoole")
        self.assertEqual(d.get_value("member"), {"name": "Joe Drumgoole"})

    def test_dateMapField(self):

        test_doc = {"a": 1, "b": datetime.datetime.now()}
        #pprint.pprint( test_doc )
        _ = CursorFormatter.dateMapField(test_doc, "b")
Пример #7
0
class Members(MUGData):
    '''
    classdocs
    '''
    def __init__(self, mdb, collection_name=None):
        '''
        Constructor
        '''

        if collection_name is None:
            collection_name = "members"

        super(Members, self).__init__(mdb, collection_name)
        self._membersAgg = Agg(self._collection)
        self._membersAgg.addMatch({"member.name": {"$exists": 1}})
        self._membersAgg.addProject({"_id": 0, "name": "$member.name"})
        self._membersAgg.addGroup({"_id": "$name", "occurences": {"$sum": 1}})
        self._membersAgg.addSort(
            Sorter(occurences=pymongo.DESCENDING))  # largest first
        self._memberCount = 0

    def get_all_members(self, query=None):
        '''
        Query meetup API for multiple groups.
        '''
        return self.find(query)

    def get_by_name(self, name):
        member = self.find_one({"member.member_name": name})

        if member is None:
            return None
        else:
            return member["member"]

    def get_by_ID(self, member_id):
        val = self.find_one({"member.member_id": member_id})

        if val is None:
            return val
        else:
            return val["member"]

    def get_by_join_date(self, start, end):

        return self.find({"member.join_time": {"$gte": start, "$lte": end}})

    def joined_by_year(self):

        agg_pipe = Agg(self._collection)
        agg_pipe.addMatch({"batchID": self._audit.get_last_valid_batch_id()})
        agg_pipe.addProject({
            "_id": 0,
            "member_id": "$member.member_id",
            "member_name": "$member.member_name",
            "year": {
                "$year": "$member.join_time"
            },
        })
        agg_pipe.addGroup({"_id": "$year", "total_registered": {"$sum": 1}})

        return agg_pipe.aggregate()

    def distinct_members(self):
        return self._collection.distinct("member.name")

    def get_members(self):
        return self._membersAgg.aggregate()

    def summary(self, doc):
        return "name: %s, id: %s, country: %s" % (doc["member"]["member_name"],
                                                  doc["member"]["member_id"],
                                                  doc["member"]["country"])

    def one_line(self, doc):
        return "name : %s, id: %s" % (doc["member"]["member_name"],
                                      doc["member"]["member_id"])