def dehydrate(self, bundle): """inject extra info""" event, data = bundle.obj, bundle.data category_resource = self.concrete_category.get_related_resource( bundle.obj.concrete_category) ctree = CachedCategoryTree() # concrete parent category concrete_parent_category = ctree.surface_parent( event.concrete_category_id) concrete_parent_category_uri = category_resource.get_resource_uri( concrete_parent_category) data.update(concrete_parent_category=concrete_parent_category_uri) # concrete breadcrumbs :) concrete_category_breadcrumb_uris = [] for category in ctree.parents(event.concrete_category_id): concrete_category_breadcrumb_uris.append( category_resource.get_resource_uri(category)) data.update( concrete_category_breadcrumbs=concrete_category_breadcrumb_uris) # detail image thumbnail try: image = event.image_chain(ctree) detail_thumb = get_thumbnail(image, **settings.IPHONE_THUMB_OPTIONS) except Exception: pass else: data.update(thumbnail_detail=detail_thumb.url) return super(EventResource, self).dehydrate(bundle)
def dehydrate(self, bundle): """inject extra info""" event, data = bundle.obj, bundle.data category_resource = self.concrete_category.get_related_resource(bundle.obj.concrete_category) ctree = CachedCategoryTree() # concrete parent category concrete_parent_category = ctree.surface_parent(event.concrete_category_id) concrete_parent_category_uri = category_resource.get_resource_uri(concrete_parent_category) data.update(concrete_parent_category=concrete_parent_category_uri) # concrete breadcrumbs :) concrete_category_breadcrumb_uris = [] for category in ctree.parents(event.concrete_category_id): concrete_category_breadcrumb_uris.append(category_resource.get_resource_uri(category)) data.update(concrete_category_breadcrumbs=concrete_category_breadcrumb_uris) # detail image thumbnail try: image = event.image_chain(ctree) detail_thumb = get_thumbnail(image, **settings.IPHONE_THUMB_OPTIONS) except Exception: pass else: data.update(thumbnail_detail=detail_thumb.url) return super(EventResource, self).dehydrate(bundle)
def __init__(self): self.x_event_categories = defaultdict(lambda: []) self.places_by_xid = {} self.occurrences_by_xid = {} self.prices_by_xid = {} self.events_by_xid = {} self.x_categories_by_xid = {} self.ctree = CachedCategoryTree()
def __init__(self, userID, category=None, parent=None, ctree=None, eaa=None, score=None, dictionary=None, db=user_behavior.DJANGO_DB): """ A Tree for a user is represented recursively as a collection of trees, Each gtree is for a specific user. The name of a tree node is the category name otherwise defaults to ROOT the value is calculated from the persisted representation of the tree much the user likes this, and optionally children """ self.parent = parent self.children = [] if not ctree: ctree = CachedCategoryTree() if eaa == None: # get from DB (whether Django or dictionary) eaa = db.gvix_dict(userID) if category: self.children = [ CategoryTree(userID, x, self, ctree, eaa) for x in ctree.children(category) ] self.category = category self.title = category.title else: self.children = [ CategoryTree(userID, x, self, ctree, eaa) for x in ctree.children(ctree.concrete_node) ] self.category = ctree.concrete_node self.title = ctree.concrete_node.title if dictionary: self.dictionary = dictionary else: self.dictionary = {} try: self.score = eaa[self.category.id] except: #if self.category: # self.score = settings.default_eaa[self.category.id] #else: self.score = ( (0, 0, 0, 0) ) # * settings.scoringFunction((0,0,0,0)) #This is the root node
def test_convergence(self): categories = ml.recommend_categories(self.user) #print "Categories: ", categories picked_category = ml.sample_distribution(categories.items())[0] #print "picked category: ", picked_category picked_aggr = EventActionAggregate(user=self.user, category=picked_category) lst = [] ctree = CachedCategoryTree() parents = ctree.parents(picked_category) count = 0 while count < 100: count +=1 print "Round: %d\r"%count, sys.stdout.flush() # recommend a new set of categories recommendation_scores = ml.recommend_categories(self.user) cats = ml.sample_category_distribution(recommendation_scores.items(), settings.N) found_count = cats.count(picked_category) #print "Categories: ",cats #print "ID: ", picked_category.id cats = set(cats) cats.discard(picked_category.id) # # G(oto) picked category picked_aggr.g += found_count picked_aggr.save() # X all other categories for c in cats: if c in parents: continue try: eaa = EventActionAggregate.objects.get(user=self.user, category=c) except EventActionAggregate.DoesNotExist: eaa = EventActionAggregate(user=self.user, category=c) eaa.x += 1 eaa.save() lst.append(found_count*100.0/settings.N) plt.plot(lst,color="blue") plt.title("Rate of learning one category") plt.xlabel("Trials") plt.ylabel("% of all Recommendations") plt.savefig("learning/test_results/test.pdf") plt.cla() self.assertTrue(True)
def get_event_score(user, event_ids, event_score): """ Given a user and event_ids, returns a dictionary that maps the abstract events score for each event_id """ if not event_score: event_score = defaultdict(lambda: 0) #Mapping between event ids and all abstract categories. event_cat_dict = Category.objects.for_events(tuple(event_ids), 'A') ctree = CachedCategoryTree() event_abstract_score = random_tree_walk_algorithm(user, ctree.abstract_node, ctree) #event_abstract_score is now a mapping of category objects to scores # Converting it to a mapping of category id to score event_abstract_score = dict([ (cat.id, value) for cat, value in event_abstract_score.items() ]) # FIXME: This can be optimized. # Only calculate scores for events that you sample inside the # for category in categories loop. for event_id, abstract_category_ids in event_cat_dict.items(): scores = [event_abstract_score[c] for c in abstract_category_ids] # FIXME: consider using a kernel function here instead of just max. event_score[event_id] = max(scores) return event_score
def __init__(self, user=None, db=user_behavior.DJANGO_DB): """optionally given a user, otherwise creates one. Optionally given a DB, otherwise uses default Django DB""" # rounds are a 2D array- first dimension is the round number, second # is the different trials for that round (so they can be averaged # easily) self.rounds = [] self.db = db # keep a cached category tree self.ctree = CachedCategoryTree() if user: self.user = user self.delete_user = False else: # Create and assign a new user. # if necessary later delete the user as well. But this might not be # necessary since in Django we will be working with a fresh database # that will in any case be destructed at the end of testing. self.delete_user = True success = False count = 0 while not success: count += 1 try: self.user = User(username="******" + str(count), password='******' + str(count)) self.user.save() success = True except: success = False # initialize user self.db.initialize_user(self.user, self.delete_user)
class CachedCategoryTreeTest(TestCase): fixtures = ['categories'] ctree = CachedCategoryTree() def test_get_by_title(self): title = 'Math and Science' cmem = self.ctree.get(title=title) cdb = Category.objects.get(title=title) self.assertEqual(cdb, cmem) def test_get_by_slug(self): slug = 'concerts' cmem = self.ctree.get(slug=slug) cdb = Category.objects.get(title=slug) self.assertEqual(cdb, cmem) def test_get_by_id(self): id = 10 cmem = self.ctree.get(id=id) cdb = Category.objects.get(id=id) self.assertEqual(cdb, cmem) def test_convenience(self): self.assertTrue(self.ctree.concretes) self.assertTrue(self.ctree.abstracts) self.assertFalse(set(self.ctree.concretes) & set(self.ctree.abstracts))
def handle(self, **options): try: ct = CachedCategoryTree() for e in Event.objects.all(): image = e.image_chain(ct) get_thumbnail(image, **settings.IPHONE_THUMB_OPTIONS) except Exception, e: raise CommandError(e)
def __init__(self, user=None, db=None, preference_file=DEFAULT_PREFERENCES_FILE): """initialize this user's behavior as a category->preference mapping""" transition_matrix = PreferenceTransitionMatrix(preference_file) ct = CachedCategoryTree() pref_map = transition_matrix.get_preference_dictionary(ct) DiscretePreferencePerson.__init__(self, pref_map, user, db=db)
def run(self): ctree = CachedCategoryTree() tester_api = User.objects.get(username='******') village_vanguard = Place.objects.get(slug='village-vanguard') for concrete_category in ctree.concretes: for n_events in xrange(self.n_events_per_concrete_category): # create an event title = '%s event #%i' % (concrete_category.title, n_events) e = Event( xid = 'xid-%i' % n_events, title = title, slug = slugify(title), description = 'This is a test `%s`.\n\nSome Mitch:\n%s' % (title, self.description), submitted_by = tester_api, url = 'http://abextratech.com/', image_url = 'http://www3.pictures.fp.zimbio.com/Vicky+Cristina+Barcelona+Movie+Stills+-Zma0rlbU7Tl.jpg', video_url = 'http://www.youtube.com/watch?v=a1Y73sPHKxw' ) e.concrete_category = concrete_category e.save() # add some abstract categories to it abstract_leaves = ctree.leaves(ctree.abstract_node) divergent_leaves = unique_everseen(abstract_leaves, lambda c: c.parent) for ac in random.sample(list(divergent_leaves), random.randint(1, self.max_abstract_categories)): e.categories.add(ac) # add some occurrences for n_occurences in xrange(random.randint(1, self.max_occurrences_per_event)): today = datetime.date.today() Occurrence( event = e, place = village_vanguard, one_off_place = "AbextraTech @ 93 Leonard St., New York, NY 10013", start_date = today, start_time = datetime.datetime.now().time(), end_date = today, end_time = datetime.datetime.now().time(), is_all_day = False ).save()
def save(self, commit=True): self.instance.slug = slugify(self.cleaned_data['title']) ctree = CachedCategoryTree() # get category_type from the parent parent = self.cleaned_data.get('parent') if parent: if parent == ctree.abstract_node: category_type = 'A' elif parent == ctree.concrete_node: category_type = 'C' else: category_type = parent.category_type else: category_type = 'O' self.instance.category_type = category_type # recurse the children and set their type correctly if self.instance.id: for c in ctree.children_recursive(self.instance): c.category_type = self.instance.category_type c.save() return super(CategoryAdminForm, self).save(commit=commit)
def __init__(self, userID, category=None, parent=None, ctree=None, eaa=None, score=None, dictionary=None, db=user_behavior.DJANGO_DB): """ A Tree for a user is represented recursively as a collection of trees, Each gtree is for a specific user. The name of a tree node is the category name otherwise defaults to ROOT the value is calculated from the persisted representation of the tree much the user likes this, and optionally children """ self.parent = parent self.children = [] if not ctree: ctree = CachedCategoryTree() if eaa==None: # get from DB (whether Django or dictionary) eaa = db.gvix_dict(userID) if category: self.children = [CategoryTree(userID, x, self, ctree, eaa) for x in ctree.children(category)] self.category = category self.title = category.title else: self.children = [CategoryTree(userID, x, self, ctree, eaa) for x in ctree.children(ctree.concrete_node)] self.category = ctree.concrete_node self.title = ctree.concrete_node.title if dictionary: self.dictionary = dictionary else: self.dictionary = {} try: self.score = eaa[self.category.id] except: #if self.category: # self.score = settings.default_eaa[self.category.id] #else: self.score = ((0, 0, 0, 0)) # * settings.scoringFunction((0,0,0,0)) #This is the root node
def save(self, *args, **kwargs): # secret key generation if not self.secret_key: self.secret_key = self.random_secret_key() # FIXME haxord summarization if self.pk and self.occurrences.count(): from events.utils import CachedCategoryTree ctree = CachedCategoryTree() EventSummary.objects.for_event(self, ctree).save() super(Event, self).save(*args, **kwargs)
def summarize_events(events): """ Argument: 'events' : A list of event ids This is a wrapper to summarize_event and gets called for all ids passed in events """ event_objs = Event.objects.filter(id__in=events) lst = [] ctree = CachedCategoryTree() for event in event_objs: lst.append(summarize_event(event, ctree, True)) return lst
class Arbiter(object): """ """ def __init__(self, rules): """ """ self.rules = rules self.cachedcategorytree = CachedCategoryTree() def apply_rules(self, event, source, external_categories): """ This function has no side effects. It applies all its rules to the event object and returns raw categories These raw categories may need to be filtered down (using the concrete_category and abstract_category functions.) """ raw_abstracts = [] raw_concretes = [] for rule in self.rules: concretes, abstracts = rule.classify(event, source, external_categories=external_categories) # Special handling for event classification rules. # Could later put this into a special class and abstract out # the common theme into a BaseChain class. concretes = [category for category in concretes if category] if concretes and not raw_concretes: raw_concretes = concretes raw_abstracts.extend(abstracts) return (raw_concretes, raw_abstracts) def abstract_categories(self, event, source, ext_category_xids=None): return self.apply_rules(event, source, ext_category_xids)[1] def concrete_categories(self, event, source, ext_category_xids=None): categories = self.apply_rules(event, source, ext_category_xids)[0] return self.cachedcategorytree.deepest_category(categories)
def __init__(self, *args, **kwargs): super(EventAdapter, self).__init__(*args, **kwargs) self.ctree = CachedCategoryTree()
def test_load_db(self): ctree = CachedCategoryTree() for cc in ctree.children(ctree.concrete_node): self.assertEqual(self.n, cc.events_concrete.all().count())
class ImportScrapeData(object): def __init__(self): self.x_event_categories = defaultdict(lambda: []) self.places_by_xid = {} self.occurrences_by_xid = {} self.prices_by_xid = {} self.events_by_xid = {} self.x_categories_by_xid = {} self.ctree = CachedCategoryTree() def run(self): for x_category in x_models.Category.objects.all(): self.x_categories_by_xid[x_category.id] = x_category for x_event_category in x_models.EventCategory.objects.all(): self.add_event_category(x_event_category) for x_loc in x_models.Location.objects.all(): self.add_place(x_loc) for x_event in x_models.Event.objects.all(): self.add_event(x_event) for x_occurrence in x_models.Occurrence.objects.all(): self.add_occurrence(x_occurrence) for x_price in x_models.Price.objects.all(): self.add_price(x_price) def add_price(self, x_price): occurrence = self.occurrences_by_xid.get(x_price.occurrence_id) if occurrence: price, created = Price.objects.get_or_create( quantity=x_price.quantity, units=x_price.units, remark=x_price.remark or '', occurrence=occurrence ) self.prices_by_xid[x_price.id] = price def add_occurrence(self, x_occurrence): event = self.events_by_xid.get(x_occurrence.event_id) if event: place = self.places_by_xid[x_occurrence.location_id] occurrence, created = Occurrence.objects.get_or_create( event=event, place=place, # one_off_place= models.CharField(max_length=200, blank=True), start_date=x_occurrence.start_date, start_time=x_occurrence.start_time, end_date= x_occurrence.end_date, end_time= x_occurrence.end_time, # is_all_day= models.BooleanField(default=False) ) self.occurrences_by_xid[x_occurrence.id] = occurrence def add_event(self, x_event): x_categories = self.x_event_categories.get(x_event.id) # TODO will be lost if x_categories: x_category_xids = map(lambda x_category: x_category.xid, x_categories) category_ids = ExternalCategory.objects \ .values_list('category_id', flat=True) \ .filter(xid__in=x_category_xids) concrete_category = self.ctree.deepest_category( self.ctree.get(id=category_id) for category_id in category_ids ) event, created = Event.objects.get_or_create( xid=x_event.guid, title=x_event.title, slug=slugify(x_event.title)[:50], description=x_event.description or '', # submitted_by = models.ForeignKey(User, blank=True, null=True) # created = models.DateTimeField(auto_now_add=True) # modified = models.DateTimeField(auto_now=True) url=x_event.url or '', image_url=x_event.image_url or '', # video_url = models.URLField(verify_exists=False, max_length=200, blank=True) concrete_category=concrete_category, # categories = models.ManyToManyField(Category, related_name='events_abstract', verbose_name=_('abstract categories')) ) self.events_by_xid[x_event.id] = event def add_place(self, x_location): city, created = City.objects.get_or_create( city=x_location.city, state=x_location.state, slug=slugify(u'-'.join((x_location.city, x_location.state))) ) point, created = Point.objects.get_or_create( latitude=x_location.latitude, longitude=x_location.longitude, address=x_location.address, city=city, zip=x_location.zipcode, country='US' ) place, created = Place.objects.get_or_create( point=point, # prefix = '', title=x_location.title, slug=slugify(x_location.title)[:50], # nickname=models.CharField(_('nickname'), blank=True, max_length=100), # unit=models.CharField(_('unit'), blank=True, max_length=100, help_text='Suite or Apartment #'), phone=x_location.phone if x_location.phone and (not ':' in x_location.phone) else None or '', url=x_location.url or x_location.guid, # FIXME source (villagevoice) specific image_url=x_location.image_url or '' # email=models.EmailField(_('email'), blank=True), # description = models.TextField(_('description'), blank=True), # status = models.IntegerField(_('status'), choices=STATUS_CHOICES, default=1) # created = models.DateTimeField(auto_now_add=True) # modified = models.DateTimeField(auto_now=True) # place_types = models.ManyToManyField(PlaceType, blank=True) ) self.places_by_xid[x_location.id] = place def add_event_category(self, x_event_category): x_category = self.x_categories_by_xid[x_event_category.category_id] self.x_event_categories[x_event_category.event_id].append(x_category)
def __init__(self, rules): """ """ self.rules = rules self.cachedcategorytree = CachedCategoryTree()
class ImportScrapeData(object): def __init__(self): self.x_event_categories = defaultdict(lambda: []) self.places_by_xid = {} self.occurrences_by_xid = {} self.prices_by_xid = {} self.events_by_xid = {} self.x_categories_by_xid = {} self.ctree = CachedCategoryTree() def run(self): for x_category in x_models.Category.objects.all(): self.x_categories_by_xid[x_category.id] = x_category for x_event_category in x_models.EventCategory.objects.all(): self.add_event_category(x_event_category) for x_loc in x_models.Location.objects.all(): self.add_place(x_loc) for x_event in x_models.Event.objects.all(): self.add_event(x_event) for x_occurrence in x_models.Occurrence.objects.all(): self.add_occurrence(x_occurrence) for x_price in x_models.Price.objects.all(): self.add_price(x_price) def add_price(self, x_price): occurrence = self.occurrences_by_xid.get(x_price.occurrence_id) if occurrence: price, created = Price.objects.get_or_create( quantity=x_price.quantity, units=x_price.units, remark=x_price.remark or '', occurrence=occurrence) self.prices_by_xid[x_price.id] = price def add_occurrence(self, x_occurrence): event = self.events_by_xid.get(x_occurrence.event_id) if event: place = self.places_by_xid[x_occurrence.location_id] occurrence, created = Occurrence.objects.get_or_create( event=event, place=place, # one_off_place= models.CharField(max_length=200, blank=True), start_date=x_occurrence.start_date, start_time=x_occurrence.start_time, end_date=x_occurrence.end_date, end_time=x_occurrence.end_time, # is_all_day= models.BooleanField(default=False) ) self.occurrences_by_xid[x_occurrence.id] = occurrence def add_event(self, x_event): x_categories = self.x_event_categories.get( x_event.id) # TODO will be lost if x_categories: x_category_xids = map(lambda x_category: x_category.xid, x_categories) category_ids = ExternalCategory.objects \ .values_list('category_id', flat=True) \ .filter(xid__in=x_category_xids) concrete_category = self.ctree.deepest_category( self.ctree.get(id=category_id) for category_id in category_ids) event, created = Event.objects.get_or_create( xid=x_event.guid, title=x_event.title, slug=slugify(x_event.title)[:50], description=x_event.description or '', # submitted_by = models.ForeignKey(User, blank=True, null=True) # created = models.DateTimeField(auto_now_add=True) # modified = models.DateTimeField(auto_now=True) url=x_event.url or '', image_url=x_event.image_url or '', # video_url = models.URLField(verify_exists=False, max_length=200, blank=True) concrete_category=concrete_category, # categories = models.ManyToManyField(Category, related_name='events_abstract', verbose_name=_('abstract categories')) ) self.events_by_xid[x_event.id] = event def add_place(self, x_location): city, created = City.objects.get_or_create(city=x_location.city, state=x_location.state, slug=slugify(u'-'.join( (x_location.city, x_location.state)))) point, created = Point.objects.get_or_create( latitude=x_location.latitude, longitude=x_location.longitude, address=x_location.address, city=city, zip=x_location.zipcode, country='US') place, created = Place.objects.get_or_create( point=point, # prefix = '', title=x_location.title, slug=slugify(x_location.title)[:50], # nickname=models.CharField(_('nickname'), blank=True, max_length=100), # unit=models.CharField(_('unit'), blank=True, max_length=100, help_text='Suite or Apartment #'), phone=x_location.phone if x_location.phone and (not ':' in x_location.phone) else None or '', url=x_location.url or x_location.guid, # FIXME source (villagevoice) specific image_url=x_location.image_url or '' # email=models.EmailField(_('email'), blank=True), # description = models.TextField(_('description'), blank=True), # status = models.IntegerField(_('status'), choices=STATUS_CHOICES, default=1) # created = models.DateTimeField(auto_now_add=True) # modified = models.DateTimeField(auto_now=True) # place_types = models.ManyToManyField(PlaceType, blank=True) ) self.places_by_xid[x_location.id] = place def add_event_category(self, x_event_category): x_category = self.x_categories_by_xid[x_event_category.category_id] self.x_event_categories[x_event_category.event_id].append(x_category)