Exemplo n.º 1
0
    def get_new_members( self, urls, filename=None ):
        '''
        Get all the members of all the groups (urls). Range is join_time.
        '''
        
        agg = Agg( self._mdb.proMembersCollection())

        agg.addMatch({ "batchID"            : self._batchID } )
        
        if self._start_date or self._end_date :
            agg.addRangeMatch( "member.join_time", self._start_date, self._end_date )
            
        agg.addUnwind( "$member.chapters" )
        agg.addMatch({ "member.chapters.urlname" : { "$in" : urls }} )
            
        agg.addProject( { "_id" : 0,
                          "group"     : "$member.chapters.urlname",
                          "name"      : "$member.member_name",
                          "join_date" : "$member.join_time" } )
        
        if self._sorter:
            agg.addSort( self._sorter)

        if self._view :
            agg.create_view( self._mdb.database(), "new_members_view" )
            
        formatter = CursorFormatter( agg, filename, self._format )
        formatter.output( fieldNames= [ "group", "name", "join_date" ], datemap=[ 'join_date'], limit=self._limit)
        
        if filename != "-":
            self._files.append( filename )
Exemplo n.º 2
0
def get_batches(mdb, start, end, limit=None):

    audit = Audit(mdb)

    c = CursorFormatter(audit.getCurrentValidBatches(start, end))
    c.output(["batchID", "end", "start"],
             datemap=["start", "end"],
             limit=limit)
Exemplo n.º 3
0
    def get_group_totals(self, urls, filename=None):
        '''
        get the total number of RSVPs by group.
        '''

        agg = Agg(self._mdb.pastEventsCollection())

        agg.addMatch({
            "batchID": self._batchID,
            "event.status": "past",
            "event.group.urlname": {
                "$in": urls
            }
        })

        if self._start_date or self._end_date:
            agg.addRangeMatch("groups.founded_date", self._start_date,
                              self._end_date)

        agg.addGroup({
            "_id": {
                "urlname": "$event.group.urlname",
                "year": {
                    "$year": "$event.time"
                }
            },
            "event_count": {
                "$sum": 1
            },
            "rsvp_count": {
                "$sum": "$event.yes_rsvp_count"
            }
        })

        agg.addProject({
            "_id": 0,
            "group": "$_id.urlname",
            "year": "$_id.year",
            "event_count": 1,
            "rsvp_count": 1
        })

        if self._sorter:
            agg.addSort(self._sorter)

        if filename:
            self._filename = filename

        if self._view:
            agg.create_view(self._mdb.database(), "group_totals_view")

        formatter = CursorFormatter(agg, self._filename, self._format)
        filename = formatter.output(
            fieldNames=["year", "group", "event_count", "rsvp_count"],
            limit=self._limit)

        if self._filename != "-":
            self._files.append(self._filename)
Exemplo n.º 4
0
    def testFieldMapper(self):

        doc = {"a": "b"}

        newdoc = CursorFormatter.fieldMapper(doc, ['a'])
        self.assertTrue(newdoc.has_key("a"))

        doc = {"a": "b", "c": "d", "e": "f"}
        newdoc = CursorFormatter.fieldMapper(doc, ['a', 'c'])
        self.assertTrue(newdoc.has_key("a"))
        self.assertTrue(newdoc.has_key("c"))
        self.assertFalse(newdoc.has_key("e"))

        doc = {"a": "b", "c": "d", "e": "f", "z": {"w": "x"}}

        newdoc = CursorFormatter.fieldMapper(doc, ['a', 'c', "z.w"])
        self.assertTrue(newdoc.has_key("a"))
        self.assertTrue(newdoc.has_key("c"))
        self.assertTrue(newdoc.has_key("z"))
        self.assertTrue(newdoc["z"].has_key("w"))
        self.assertFalse(newdoc.has_key("e"))

        doc = {"a": "b", "c": "d", "e": "f", "z": {"w": "x", "y": "p"}}

        newdoc = CursorFormatter.fieldMapper(doc, ['a', 'c', "z.w"])
        self.assertTrue(newdoc.has_key("a"))
        self.assertTrue(newdoc.has_key("c"))
        self.assertTrue(newdoc.has_key("z"))
        self.assertTrue(newdoc["z"].has_key("w"))
        self.assertFalse(newdoc.has_key("e"))
        self.assertFalse(newdoc['z'].has_key("y"))

        doc = {
            "a": "b",
            "c": "d",
            "e": "f",
            "z": {
                "w": "x",
                "y": "p"
            },
            "g": {
                "h": "i",
                "j": "k"
            }
        }

        newdoc = CursorFormatter.fieldMapper(doc, ['a', 'c', "z.w", "g.j"])
        self.assertTrue(newdoc.has_key("a"))
        self.assertTrue(newdoc.has_key("c"))
        self.assertTrue(newdoc.has_key("z"))
        self.assertTrue(newdoc["z"].has_key("w"))
        self.assertFalse(newdoc.has_key("e"))
        self.assertFalse(newdoc['z'].has_key("y"))
        self.assertTrue(newdoc.has_key("g"))
        self.assertTrue(newdoc['g'].has_key("j"))
        self.assertFalse(newdoc['g'].has_key("h"))
Exemplo n.º 5
0
    def get_rsvps(self, urls, filename=None):
        '''
        Lookup RSVPs by user. So for each user collect how many events they RSVPed to.
        '''
        agg = Agg(self._mdb.attendeesCollection())

        agg.addMatch({
            "batchID": self._batchID,
            "info.event.group.urlname": {
                "$in": urls
            }
        })

        if self._start_date or self._end_date:
            agg.addRangeMatch("info.event_time", self._start_date,
                              self._end_date)

        agg.addProject({
            "_id": 0,
            "attendee": "$info.attendee.member.name",
            "group": "$info.event.group.urlname",
            "event_name": "$info.event.name"
        })

        agg.addGroup({
            "_id": {
                "attendee": "$attendee",
                "group": "$group"
            },
            "event_count": {
                "$sum": 1
            }
        })

        agg.addProject({
            "_id": 0,
            "attendee": "$_id.attendee",
            "group": "$_id.group",
            "event_count": 1
        })

        if self._sorter:
            agg.addSort(self._sorter)

        if filename:
            self._filename = filename

        if self._view:
            agg.create_view(self._mdb.database(), "rsvps_view")

        formatter = CursorFormatter(agg, self._filename, self._format)
        filename = formatter.output(
            fieldNames=["attendee", "group", "event_count"], limit=self._limit)

        if self._filename != "-":
            self._files.append(self._filename)
Exemplo n.º 6
0
    def get_groups(self, urls, filename=None):
        '''
        Get all the groups listed by urls and their start dates
        '''

        agg = Agg(self._mdb.groupsCollection())
        agg.addMatch({
            "batchID": self._batchID,
            "group.urlname": {
                "$in": urls
            }
        })

        if self._pro_account:
            if self._start_date or self._end_date:
                agg.addRangeMatch("group.founded_date", self._start_date,
                                  self._end_date)
            agg.addProject({
                "_id": 0,
                "urlname": "$group.urlname",
                "members": "$group.member_count",
                "founded": "$group.founded_date"
            })
            print("Using pro search")
        else:
            if self._start_date or self._end_date:
                agg.addRangeMatch("group.created", self._start_date,
                                  self._end_date)
            agg.addProject({
                "_id": 0,
                "urlname": "$group.urlname",
                "members": "$group.members",
                "founded": "$group.created"
            })
            print("Using nopro search")
        if self._sorter:
            agg.addSort(self._sorter)

        if filename:
            self._filename = filename

        if self._view:
            agg.create_view(self._mdb.database(), "groups_view")

        formatter = CursorFormatter(agg, self._filename, self._format)
        filename = formatter.output(
            fieldNames=["urlname", "members", "founded"],
            datemap=["founded"],
            limit=self._limit)

        if self._filename != "-":
            self._files.append(self._filename)
Exemplo n.º 7
0
    def get_active_users(self, urls, filename=None):
        '''
        We define an active user as somebody who has rsvp'd to at least one event in the last six months.
        '''
        agg = Agg(self._mdb.attendeesCollection())

        agg.addMatch({
            "batchID": self._batchID,
            "info.event.group.urlname": {
                "$in": urls
            },
            "info.attendee.rsvp.response": "yes"
        })

        if self._start_date or self._end_date:
            agg.addRangeMatch("info.event_time", self._start_date,
                              self._end_date)

    #     agg.addProject( { "_id" : 0,
    #                       "name" : "$info.attendee.member.name",
    #                       "urlname" : "$info.event.group.urlname",
    #                       "event_name" : "$info.event.name" })

        agg.addGroup({
            "_id": "$info.attendee.member.name",
            "count": {
                "$sum": 1
            },
            "groups": {
                "$addToSet": "$info.event.group.urlname"
            }
        })

        if self._sorter:
            agg.addSort(self._sorter)

        if filename:
            self._filename = filename

        if self._view:
            agg.create_view(self._mdb.database(), "active_users_view")

        formatter = CursorFormatter(agg, self._filename, self._format)
        filename = formatter.output(fieldNames=["_id", "count", "groups"],
                                    limit=self._limit)

        if self._filename != "-":
            self._files.append(self._filename)
Exemplo n.º 8
0
    def get_total_events( self, urls, when="past", filename=None ) :
    
        agg = None
        
        if when == "past" : 
            agg = Agg( self._mdb.pastEventsCollection())

        elif when == "upcoming" :
            agg = Agg( self._mdb.upcomingEventsCollection())

        
        if self._start_date or self._end_date :
            agg.addRangeMatch( "event.time", self._start_date, self._end_date )
            
        agg.addMatch({ "batchID"             : self._batchID,
                       "event.status"        : when,
                       "event.group.urlname" : { "$in" : urls }} )
        
        agg.addProject( { "_id"       : 0,
                          "group"     : "$event.group.urlname",
                          "rsvp"      : "$event.group.yes_rsvp_count",
                          "month"     : { "$month" : "$event.time" },
                          "year"      : { "$year"  : "$event.time" }})
        
        agg.addGroup( { "_id" : { "month" : "$month",
                                  "year"  : "$year" },
                        "count" : { "$sum": 1 }})
        
        agg.addProject( { "month" : "$_id.month",
                          "year"  : "$_id.year",
                          "count" : "$count"} )
        
        
        if self._sorter:
            agg.addSort( self._sorter)
            
        if self._view :
            agg.create_view( self._mdb.database(), "total_events" )
            
        if filename :
            self._filename = filename
            
        formatter = CursorFormatter( agg, self._filename, self._format )
        filename = formatter.output( fieldNames= [ "month", "year", "count" ], limit=self._limit)
        
        if filename != "-":
            self._files.append( filename )
Exemplo n.º 9
0
    def get_RSVP_history(self, urls, filename=None):
        '''
        Get the list of past events for groups (urls) and report on the date of the event
        and the number of RSVPs.
        '''

        agg = Agg(self._mdb.pastEventsCollection())

        agg.addMatch({"event.group.urlname": {"$in": urls}})

        if self._start_date or self._end_date:
            agg.addRangeMatch("event.time", self._start_date, self._end_date)

        agg.addProject({
            "timestamp": "$event.time",
            "event": "$event.name",
            "country": "$event.venue.country",
            "rsvp_count": "$event.yes_rsvp_count"
        })

        agg.addMatch({"timestamp": {"$type": "date"}})
        agg.addGroup({
            "_id": "$timestamp",
            #"event" : { "$addToSet" : { "event" : "$event", "country" : "$country" }},
            "rsvp_count": {
                "$sum": "$rsvp_count"
            }
        })

        if self._sorter:
            agg.addSort(self._sorter)

        if filename:
            self._filename = filename

        if self._view:
            agg.create_view(self._mdb.database(), "rsvps_view")

        formatter = CursorFormatter(agg.aggregate(), self._filename,
                                    self._format)
        filename = formatter.output(fieldNames=["_id", "rsvp_count"],
                                    datemap=["_id"],
                                    limit=self._limit)

        if filename != "-":
            self._files.append(filename)
Exemplo n.º 10
0
    def get_events(self, urls, when="past", filename=None):
        '''
        Get events when=past means past events. when="upcoming" means future events.
        '''
    
        agg = None
        
        if when == "past" : 
            agg = Agg( self._mdb.pastEventsCollection())

        elif when == "upcoming" :
            agg = Agg( self._mdb.upcomingEventsCollection())

        
        agg.addMatch({ "batchID"      : self._batchID,
                       "event.status" : when,
                       "event.group.urlname" : { "$in" : urls }} )
        
        if self._start_date or self._end_date :
            agg.addRangeMatch( "event.time", self._start_date, self._end_date )
            
        agg.addProject( { "_id"          : 0, 
                          "group"        : u"$event.group.urlname", 
                          "name"         : u"$event.name",
                          "country"      : "$event.venue.country",
                          "rsvp_count"   : "$event.yes_rsvp_count",
                          "date"         : "$event.time" }) 
    
     
        if self._sorter:
            agg.addSort( self._sorter)

        if self._view :
            agg.create_view( self._mdb.database(), "events_view" )
            
        formatter = CursorFormatter( agg, filename, self._format )
        formatter.output( fieldNames= [ "group", "name", "rsvp_count", "date" ], datemap=[ "date"], limit=self._limit)

        if filename != "-":
            self._files.append( filename )
Exemplo n.º 11
0
    def get_events(self, urls, filename=None):

        agg = Agg(self._mdb.pastEventsCollection())

        agg.addMatch({
            "batchID": self._batchID,
            "event.status": "past",
            "event.group.urlname": {
                "$in": urls
            }
        })

        if self._start_date or self._end_date:
            agg.addRangeMatch("event.time", self._start_date, self._end_date)

        agg.addProject({
            "_id": 0,
            "group": u"$event.group.urlname",
            "name": u"$event.name",
            "rsvp_count": "$event.yes_rsvp_count",
            "date": "$event.time"
        })

        if self._sorter:
            agg.addSort(self._sorter)

        if filename:
            self._filename = filename

        if self._view:
            agg.create_view(self._mdb.database(), "events_view")

        formatter = CursorFormatter(agg, self._filename, self._format)
        filename = formatter.output(
            fieldNames=["group", "name", "rsvp_count", "date"],
            datemap=["date"],
            limit=self._limit)

        if self._filename != "-":
            self._files.append(self._filename)
Exemplo n.º 12
0
    def get_job_groups( self, filename=None ):
        '''
        Find all the jobs in SFCD
        '''
    
        agg = Agg( self._collection )

        # [ { "$group" : { "_id" : "$Job Function", "count" : { "$sum" : 1  }}} ]
        agg.addGroup( { "_id"   : "$Job Function",
                        "count" : { "$sum" : 1 }})
        
        if self._sorter:
            agg.addSort( self._sorter )

        print( agg )
        for i in agg() :
            print( i ) 
        formatter = CursorFormatter( agg, filename, self._format )
        formatter.output( fieldNames= [ "_id", "count" ], limit=self._limit )
        
        if filename != "-":
            self._files.append( filename )
Exemplo n.º 13
0
    def getMembers(self, urls, filename=None):
        '''
        Get a count of the members for each group in "urls"
        Range doens't make sense here so its not used. If supplied it is ignored.
        '''

        agg = Agg(self._mdb.groupsCollection())

        agg.addMatch({
            "batchID": {
                "$in": [self._batchID]
            },
            "group.urlname": {
                "$in": urls
            }
        })

        agg.addProject({
            "_id": 0,
            "urlname": "$group.urlname",
            "country": "$group.country",
            "batchID": 1,
            "member_count": "$group.member_count"
        })

        if self._sorter:
            agg.addSort(self._sorter)

        if filename:
            self._filename = filename

        if self._view:
            agg.create_view(self._mdb.database(), "members_view")

        formatter = CursorFormatter(agg.aggregate(), self._filename,
                                    self._format)
        formatter.output(
            fieldNames=["urlname", "country", "batchID", "member_count"])
Exemplo n.º 14
0
 def testFormatter(self):
     self._agg.addMatch({"member.member_name": "Joe Drumgoole"})
     #print( "agg: %s" % self._agg )
     self._agg.addProject({
         "member.member_name": 1,
         "_id": 0,
         "member.join_time": 1,
         "member.city": 1,
     })
     cursor = self._agg.aggregate()
     prefix = "agg_test_"
     filename = "JoeDrumgoole"
     ext = "json"
     self._formatter = CursorFormatter(cursor,
                                       prefix=prefix,
                                       name=filename,
                                       ext=ext)
     self._formatter.output(fieldNames=[
         "member.member_name", "member.join_time", "member.city"
     ],
                            datemap=["member.join_time"])
     self.assertTrue(os.path.isfile(prefix + filename + "." + ext))
     os.unlink(prefix + filename + "." + ext)
Exemplo n.º 15
0
    def get_organisers( self, urls, filename=None ):
        '''
        Get all the members of all the groups (urls). Range is join_time.
        '''
        
        agg = Agg( self._mdb.proGroupsCollection())


        agg.addMatch({ "batchID"       : self._batchID } )
        agg.addMatch({ "group.urlname" : { "$in" : urls }} ) 
        
        if self._start_date or self._end_date :
            agg.addRangeMatch( "group.created", self._start_date, self._end_date )
         
        agg.addUnwind( "$group.organizers" )
            
        agg.addGroup( { "_id" : "$group.organizers",
                        "groups" : { "$addToSet" : "$group.urlname"}})
        
        agg.addProject( { "_id" : 0,
                          "name" : "$_id.name",
                          "role" : "$_id.permission",
                          "groups" : "$groups"
                          ""})
        
        print( agg )
        if self._sorter:
            agg.addSort( self._sorter)

        if self._view :
            agg.create_view( self._mdb.database(), "new_members_view" )
            
        formatter = CursorFormatter( agg, filename, self._format )
        formatter.output( fieldNames= [ "name", "role", "groups" ], limit=self._limit)
        
        if filename != "-":
            self._files.append( filename )
Exemplo n.º 16
0
    def get_rsvp_by_event(self, urls, filename="rsvp_events"):

        agg = Agg(self._mdb.pastEventsCollection())

        agg.addMatch({
            "batchID": self._batchID,
            "event.group.urlname": {
                "$in": urls
            }
        })

        if self._start_date or self._end_date:
            agg.addRangeMatch("event.time", self._start_date, self._end_date)

        agg.addGroup({
            "_id": "$event.group.urlname",
            "rsvp_count": {
                "$sum": "$event.yes_rsvp_count"
            }
        })

        if self._sorter:
            agg.addSort(self._sorter)

        if filename:
            self._filename = filename

        if self._view:
            agg.create_view(self._mdb.database(), "rsvps_by_event_view")

        formatter = CursorFormatter(agg.aggregate(), self._filename,
                                    self._format)
        filename = formatter.output(fieldNames=["_id", "rsvp_count"],
                                    limit=self._limit)

        if self._filename != "-":
            self._files.append(self._filename)
Exemplo n.º 17
0
class Test(unittest.TestCase):
    def setUp(self):
        self._mdb = MUGAlyserMongoDB()
        self._agg = Agg(self._mdb.membersCollection())

    def tearDown(self):
        pass

    def testFormatter(self):
        self._agg.addMatch({"member.member_name": "Joe Drumgoole"})
        #print( "agg: %s" % self._agg )
        self._agg.addProject({
            "member.member_name": 1,
            "_id": 0,
            "member.join_time": 1,
            "member.city": 1,
        })
        cursor = self._agg.aggregate()
        prefix = "agg_test_"
        filename = "JoeDrumgoole"
        ext = "json"
        self._formatter = CursorFormatter(cursor,
                                          prefix=prefix,
                                          name=filename,
                                          ext=ext)
        self._formatter.output(fieldNames=[
            "member.member_name", "member.join_time", "member.city"
        ],
                               datemap=["member.join_time"])
        self.assertTrue(os.path.isfile(prefix + filename + "." + ext))
        os.unlink(prefix + filename + "." + ext)

    def testFieldMapper(self):

        doc = {"a": "b"}

        newdoc = CursorFormatter.fieldMapper(doc, ['a'])
        self.assertTrue(newdoc.has_key("a"))

        doc = {"a": "b", "c": "d", "e": "f"}
        newdoc = CursorFormatter.fieldMapper(doc, ['a', 'c'])
        self.assertTrue(newdoc.has_key("a"))
        self.assertTrue(newdoc.has_key("c"))
        self.assertFalse(newdoc.has_key("e"))

        doc = {"a": "b", "c": "d", "e": "f", "z": {"w": "x"}}

        newdoc = CursorFormatter.fieldMapper(doc, ['a', 'c', "z.w"])
        self.assertTrue(newdoc.has_key("a"))
        self.assertTrue(newdoc.has_key("c"))
        self.assertTrue(newdoc.has_key("z"))
        self.assertTrue(newdoc["z"].has_key("w"))
        self.assertFalse(newdoc.has_key("e"))

        doc = {"a": "b", "c": "d", "e": "f", "z": {"w": "x", "y": "p"}}

        newdoc = CursorFormatter.fieldMapper(doc, ['a', 'c', "z.w"])
        self.assertTrue(newdoc.has_key("a"))
        self.assertTrue(newdoc.has_key("c"))
        self.assertTrue(newdoc.has_key("z"))
        self.assertTrue(newdoc["z"].has_key("w"))
        self.assertFalse(newdoc.has_key("e"))
        self.assertFalse(newdoc['z'].has_key("y"))

        doc = {
            "a": "b",
            "c": "d",
            "e": "f",
            "z": {
                "w": "x",
                "y": "p"
            },
            "g": {
                "h": "i",
                "j": "k"
            }
        }

        newdoc = CursorFormatter.fieldMapper(doc, ['a', 'c', "z.w", "g.j"])
        self.assertTrue(newdoc.has_key("a"))
        self.assertTrue(newdoc.has_key("c"))
        self.assertTrue(newdoc.has_key("z"))
        self.assertTrue(newdoc["z"].has_key("w"))
        self.assertFalse(newdoc.has_key("e"))
        self.assertFalse(newdoc['z'].has_key("y"))
        self.assertTrue(newdoc.has_key("g"))
        self.assertTrue(newdoc['g'].has_key("j"))
        self.assertFalse(newdoc['g'].has_key("h"))

    def testNestedDict(self):
        d = Nested_Dict({})
        self.assertFalse(d.has_key("hello"))
        d.set_value("hello", "world")
        self.assertTrue(d.has_key("hello"))
        self.assertEqual(d.get_value("hello"), "world")
        self.assertRaises(KeyError, d.get_value, "Bingo")

        d.set_value("member.name", "Joe Drumgoole")
        self.assertEqual(d.get_value("member"), {"name": "Joe Drumgoole"})

    def test_dateMapField(self):

        test_doc = {"a": 1, "b": datetime.datetime.now()}
        #pprint.pprint( test_doc )
        _ = CursorFormatter.dateMapField(test_doc, "b")
Exemplo n.º 18
0
    def test_dateMapField(self):

        test_doc = {"a": 1, "b": datetime.datetime.now()}
        #pprint.pprint( test_doc )
        _ = CursorFormatter.dateMapField(test_doc, "b")
Exemplo n.º 19
0
    def get_member_history(self, urls, filename=None):
        '''
        Got into every batch and see what the member count was for each group (URL) this uses all 
        the batches to get a history of a group.
        Range is used to select batches in this case via the "timestamp" field.
        '''
        audit = Audit(self._mdb)

        validBatches = list(audit.getCurrentValidBatchIDs())

        agg = Agg(self._mdb.groupsCollection())

        if self._start_date or self._end_date:
            agg.addRangeMatch("timestamp", self._start_date, self._end_date)

        agg.addMatch({
            "batchID": {
                "$in": validBatches
            },
            "group.urlname": {
                "$in": urls
            }
        })

        agg.addProject({
            "_id":
            0,
            "timestamp":
            1,
            "batchID":
            1,
            "urlname":
            "$group.urlname",
            #"count" : "$group.members" } )
            "count":
            Agg.ifNull("$group.member_count", "$group.members")
        })

        agg.addGroup({
            "_id": {
                "ts": "$timestamp",
                "batchID": "$batchID"
            },
            "groups": {
                "$addToSet": "$urlname"
            },
            "count": {
                "$sum": "$count"
            }
        })

        #CursorFormatter( agg.aggregate()).output()

        if self._sorter:
            agg.addSort(self._sorter)

        #CursorFormatter( agg.aggregate()).output()
        if filename:
            self._filename = filename

        if self._view:
            agg.create_view(self._mdb.database(), "groups_view")

        formatter = CursorFormatter(agg.aggregate(), self._filename,
                                    self._format)
        formatter.output(fieldNames=["_id", "groups", "count"],
                         datemap=["_id.ts"],
                         limit=self._limit)

        if filename != "-":
            self._files.append(filename)