Пример #1
0
 def readdb(self, query, iterate=None):
     print query
     con = DB(self.host, self.port, self.user, self.passwd, self.db)
     if iterate:
         return con.iterget(query)
     else:
         return con.get(query)
Пример #2
0
 def save(self, name_hash, keyword_id, json, vertical, debug):
     db = DB(self.host, self.port, self.user, self.passwd, self.db, self.table)
     try:
         db.insert({'name_hash':name_hash, 'keyword_id':keyword_id, 'json':json, 'vertical':vertical})
     except Exception, err:
         #inserter.update({'name_hash':name_hash, 'keyword_id':keyword_id, 'json':json, 'vertical':vertical}, 'name_hash="%s" AND vertical=%d'%(name_hash, vertical))
         pass
Пример #3
0
 def save(self, keyword, related_keywords, debug):
     db = DB(self.host, self.port, self.user, self.passwd, self.db, self.table)
     name_hash = md5(keyword['name'].encode('utf-8').lower()).hexdigest()
     try:
         db.remove('keyword_id=%d'%keyword['ori_id'])
     except Exception, err:
         print err
         return
Пример #4
0
 def readdb(self, query, iterate=None):
     print query
     try:
         con = DB(self.host, self.port, self.user, self.passwd, self.db, self.table)
         if iterate:
             return con.iterget(query)
         else:
             return con.get(query)
     except Exception, err:
         print err
Пример #5
0
 def run(self):
     self.iterate_callables(exceptions='callback')
     db = DB(self.host, self.port, self.user, self.passwd, self.db, self.table)
     self.output = db.insert(self.data)
     try:
         self.callback.run()
     except:
         if hasattr(self, 'debug') and self.debug:
             raise InserterError("!BaseInserter: failed during callback.\n%r"%Traceback())
     return self.output
Пример #6
0
 def save(self, keyword, vertical, column, based_on_id, duplicate_name_filter, debug):
     db = DB(self.host, self.port, self.user, self.passwd, self.db, self.table)
     name_hash = md5(keyword['name'].encode('utf-8').lower()).hexdigest()
     if duplicate_name_filter:
         con = DB(self.host, self.port, self.user, self.passwd, self.db, self.table)
         ids = con.get('select id from %s where name=%s and %s'%(self.table, '%s', duplicate_name_filter), keyword['name'])
         if not ids: return
         where = 'id in (%s)'%(','.join([str(i['id']) for i in ids]))
     elif based_on_id:
         where = 'id="%s"'%keyword['ori_id']
     else:
         where = 'name_hash="%s"'%name_hash
     if column:
         db.update({column:keyword['idf']}, where)
     else:
         db.update({'idf%d'%vertical:keyword['idf']}, where)
     self.output += 1
     if debug: print "!IDFUpdater: Updateded:%s"%name_hash
Пример #7
0
 def readdb(self, iterate=None):
     con = DB(self.host, self.port, self.user, self.passwd, self.db, self.table)
     if iterate:
         return con.iterget(self.query)
     else:
         return con.get(self.query)
Пример #8
0
 def run(self):
     self.iterate_callables(exceptions='callback')
     if hasattr(self, 'logger'):
         self.setup_logger(self.logger['filename'])
     self.output = []
     max_pages = 100
     while True:
         pages = [p for p in Page().find(self.query).limit(max_pages)]
         for page in pages:
             db = DB(self.host, self.port, self.user, self.passwd, self.db, self.table)
             if not page['wrapper']: 
                 if hasattr(self, 'log'): self.log.warn('No wrapper result provided')
                 self.save_page(page['_id'], -1)
                 continue
             wrapper = pickle.loads(page['wrapper'].encode('utf-8'))
             if isinstance(wrapper, dict): data = wrapper
             else: data['wrapper'] = wrapper
             data['url'] = page['url']
             data['effective_url'] = page['effective_url']
             data['inserted_at'] = page['inserted_at']
             data['last_updated_at'] = page['last_updated_at']
             data['updated_times'] = page['updated_times']
             data['rank'] = page['rank']
             data['label'] = page['label']
             data['url_hash'] = page['url_hash']
             if hasattr(self, 'keys') and self.keys.has_key('mongo_keys'):
                 new_data = {}
                 if self.keys.has_key('db_keys'):
                     #data = [{d:data[m]} for m, d in zip(self.keys['mongo_keys'], self.keys['db_keys']) if m in data]
                     for m, d in zip(self.keys['mongo_keys'], self.keys['db_keys']):
                         if m not in data: continue
                         if isinstance(data[m], (str, unicode)): new_data[d] = data[m]
                         else: new_data[d] = simplejson.dumps(data[m])
                 else:
                     #data = [data[k] for k in self.keys['mongo_keys'] if k in data]
                     for k in self.keys['mongo_keys']:
                         if k not in data: continue
                         if isinstance(data[k], (str, unicode)): new_data[k] = data[k]
                         else: new_data[d] = simplejson.dumps(data[k])
                 data = new_data
             
             if hasattr(self, 'user_info') and self.user_info:
                 data.update(self.user_info)
             if hasattr(self, 'furthure') and self.furthure:
                 data.update(self.furthure_parser(data))
             try:
                 last_id = db.insert(data)
             except:
                 try:
                     last_id = db.update(data, "url_hash='%s'"%data['url_hash'])
                     lid = last_id
                 except:
                     last_id = None
                     lid = -1
                     pid = page['_id']
                     self.save_page(pid, lid)
             if last_id:
                 self.output.append(last_id)
                 pid = page['_id']
                 self.save_page(pid, last_id)
             elif hasattr(self, 'debug') and self.debug:
                 raise InserterError("!IterateInserter: failed to insert.\n%r"%data)
         if not pages: break
             
     try:
         self.callback.run()
     except:
         if hasattr(self, 'debug') and self.debug:
             raise InserterError("!IterateInserter: failed during callback.\n%r"%Traceback())
     return self.output