def _do_new_item(self, desc, initial_tag, create_time=None, other=None): """helper function for implementing :meth:`new_item`""" assert isinstance(desc, ItemDescBase), \ 'bad desc: {!r}'.format(type(desc)) assert isinstance(initial_tag, list) and \ all([isinstance(i, basestring) for i in initial_tag]), \ 'bad initial_tag: {!r}'.format(initial_tag) assert other is None or isinstance(other, dict), \ 'bad other arg: {!r}'.format(other) initial_tag = map(unicode, initial_tag) declare_tag(initial_tag) if create_time is None: create_time = time.localtime() db = get_mongo('item') item_id = global_counter('item') db.ensure_index('fetcher_type') db.ensure_index('fetcher_name') db.ensure_index('tag') db.ensure_index('creation_time') doc = { '_id': item_id, 'fetcher_type': self.fetcher_type, 'fetcher_name': self.fetcher_name, 'desc': deepcopy(desc), 'tag': initial_tag, 'other': other, 'creation_time': datetime.fromtimestamp(time.mktime(create_time))} prefilter.apply(self, doc) doc['desc'] = Binary(doc['desc'].serialize()) db.insert(doc) return item_id
def auto_tagging(ctx, doc): """auto tagging an item. It will load tagger model from `ukconfig.tagger_path`. Model should be trained prior to make this function work""" global _tagger if _tagger is None: try: log_info('loading tagger ...') _tagger = TextTagger.load(ukconfig.tagger_path) except IOError: log_info('tagger model not found.') return tags = _tagger.predict_one(doc['desc'].render_content()) declare_tag(tags) log_info('original tag: ' + str(doc['tag'])) log_info('autotagging: ' + str(tags)) doc['tag'] = list(set(doc['tag'] + tags)) """auto tag """