def run(self): LOG.info( 'rest server run at %s:%d' % (self.conf.rest_server_address(), self.conf.rest_server_port())) self.app.run(self.conf.rest_server_address(), self.conf.rest_server_port(), True)
def negative_word(self): negative_word = self.session.query(NegativeWord.word).first() if not negative_word: LOG.error("get negative word failed.") return None return negative_word.word
def user_email(self, username): result = self.session.query( User.email).filter_by(name=username).first() if not result: LOG.error("user %s do not setting email" % (username)) return None return result.email
def user_get(self, username): user = self.session.query( User.name, User.monitor_keyword).filter_by(name=username).first() if not user: LOG.error("username %s not exist." % (username)) return None return user
def schedule_users(self): user_list = self.db.user_list() if not user_list or len(user_list) == 0: LOG.info("not user in database.") return for user in user_list: self.add_user(user.name)
def __init__(self, conf_file): self.conf_file = conf_file self.parser = ConfigParser.ConfigParser() try: self.parser.read(conf_file) except Exception as e: LOG.warn("parse config file %s failed." % (conf_file)) raise
def add_result(self,user_name,url,source_url,keyword): user_id=self.session.query(User.id).filter_by(name=user_name).first() if not user_id: LOG.error("user_name: %s not found in db"%(user_name)) return self.session.add(Result(user_id=user_id.id,url=url, source_url=source_url,keyword=keyword, datetime=datetime.datetime.now())) self.session.commit()
def fetch_page(self, url): page = None try: fd = urllib.urlopen(url) page = fd.read() fd.close() return page except IOError as e: LOG.warn("open url:%s failed." % (url)) return page
def add_user(self, username): if len(self.se_mgr.stats_get()) == 0: LOG.info("not search engine.") return if self.se_mgr.user_exist(username): return se_key = self.filter.select(self.se_mgr.stats_get()) if not se_key: LOG.error("filter do not select useful search engine for user:%s" % (username)) return self.se_mgr.add_user(se_key, username)
def result_list_direct(self, username, start_time, end_time): user = self.session.query(User.id).filter_by(name=username).first() if not user: LOG.info('user %s not find' % (username)) return [] if not end_time: end_time = datetime.datetime.now() results = self.session.query( Result.url, Result.source_url, Result.datetime, Result.keyword).filter(Result.user_id == user.id).filter( Result.datetime >= start_time).filter( Result.datetime < end_time).all() return results
def start(self): LOG.info("search engine running...") while True: self.user_list_reload() if len(self.user_list) == 0: time.sleep(SearchEngineBase.USER_LIST_EMPTY_RESCHED_INTERVAL) continue for user in self.user_list: self.search_user(user) time.sleep(self.conf.search_interval()) time.sleep(self.conf.search_interval())
def user_update(self, username, password, email, mobile_phone, permission, company, monitor_keyword): user = self.session.query(User).filter_by(name=username).first() if not user: LOG.error("user %s not exist" % (username)) return False user.password = password user.email = email user.mobile_phone = mobile_phone user.permission = permission user.company = company user.monitor_keyword = monitor_keyword self.session.commit() return True
def __init__(self, env, conf_file): self.env = env self._log_init(conf_file.split('.')[0]) self.conf = SearchEngineConf('/'.join([self.env.conf_dir(), conf_file])) LOG.set_log_level(self.conf.log_level()) self.analyze_conf = AnalyzeConf('/'.join( [self.env.basic_conf_dir(), env_cons.ANALYZE_CONF_FILE])) self.db = SearchEngineDB(self.analyze_conf) module_name = "analyze.search_engine.%s" % (self.conf.engine_name()) m = importlib.import_module(module_name) self.engine = m.SearchEngine(self.conf, self.env, self.db)
def user_add(self, username, password, email, mobile_phone, permission, company, monitor_keyword): user_id = self.session.query(User.id).filter_by(name=username).first() if user_id: LOG.error("user %s already exist." % (username)) return False self.session.add( User(name=username, password=password, email=email, mobile_phone=mobile_phone, permission=permission, company=company, monitor_keyword=monitor_keyword)) self.session.commit() return True
def _aging_check(self): while True: se_del_list = [] for key, stat in self.stats.iteritems(): timer_count = self.stats[key][self.SE_STATS_TIMER] if timer_count == 0: se_del_list.append(key) continue timer_count = timer_count - 1 self.stats[key][self.SE_STATS_TIMER] = timer_count for l in se_del_list: self._aging_timer_fire(key) self.stats.pop(key) LOG.warn("search engine %s aging!" % (key)) time.sleep(self.SE_AGING_TIMER_INTERVAL)
def _build_msg(self, report, sendto, sendfrom, report_format): try: f = open(report, 'rb') except IOError as e: LOG.error("open report %s failed. except:%s" % (report, e)) return None msg = MIMEMultipart() attach = MIMEText(f.read(), 'base64', 'utf-8') attach['Content-Type'] = 'application/octet-stream' attach['Content-Disposition'] = 'attachment; filename="报告.%s"' % ( report_format) msg.attach(attach) msg['to'] = sendto msg['from'] = sendfrom msg['subject'] = '报告' return msg
def _search_negative_word(self, user, link): page = self.fetch_page(link) if not page: return try: self.html_parser.reset_parser() self.html_parser.feed(page) except: LOG.warn("parse link:%s failed." % (link)) return url = link if self.html_parser.redirect_url: page = self.fetch_page(self.html_parser.redirect_url) url = self.html_parser.redirect_url for negative_word in user['negative_word']: self._search_one_negative_word(user, page, negative_word, url)
def _send_user_info(self, key, username_list): negative_word = self.db.negative_word() if not negative_word: LOG.error("no negative word. so no need dispatch user.") return user_info_list = [] for username in username_list: user_info = self.db.user_get(username) if not user_info: continue user_info_list.append({ sec.USERNAME: user_info.name, sec.KEYWORD: user_info.monitor_keyword, sec.NEGATIVE_WORD: negative_word.split(',') }) self.mq.send_user_info(key, user_info_list)
def _search_keyword(self, user, keyword): abs_page = self._first_abs_page(keyword) if not abs_page: return self.html_parser.reset_parser() self.html_parser.feed(abs_page) if len(self.html_parser.search_result_href) == 0: return self.page_cout = 0 while True: if self.page_count >= self.max_page: break nextpage_url = None if self.html_parser.nextpage_url: nextpage_url = self.conf.url() + self.html_parser.nextpage_url search_result_href = copy.deepcopy( self.html_parser.search_result_href) for link in search_result_href: self._search_negative_word(user, link) if not nextpage_url: break time.sleep(self.conf.search_interval()) page = self.fetch_page(nextpage_url) if not page: break LOG.debug("search next page:%s" % (nextpage_url)) self.html_parser.reset_parser() self.html_parser.feed(page) if len(self.html_parser.search_result_href) == 0: break
def _gen_result(self,username,result_list): self.ready(username) for result in result_list: self.add(result) file_name=self.finish(username) user_email=self.db.user_email(username) if not user_email: LOG.error("username %s get email failed."%(username)) os.unlink(file_name) return False if not self.notify.send(file_name,self.format(),user_email): LOG.error("username %s send email failed."%(username)) os.unlink(file_name) return False os.unlink(file_name) return True
def send(self, report, report_format, sendto): setting = self.db.global_setting() if not setting: LOG.error('email not set') return False msg = self._build_msg(report, sendto, setting.email, report_format) if not msg: return False try: smtp = smtplib.SMTP() smtp.connect(setting.smtp_server, setting.smtp_port) smtp.login(setting.smtp_username, setting.smtp_password) smtp.sendmail(setting.email, sendto, msg.as_string()) smtp.close() return True except Exception as e: LOG.error("send mail to %s failed. exception: %s" % (sendto, e)) return False
def search_user(self, user): print user LOG.info("username:%s" % (user['username'])) LOG.info('keyword:%s' % (user['keyword'])) LOG.info('negative_word:%s' % (user['negative_word'])) for keyword in user['keyword']: self._search_keyword(user, keyword) time.sleep(self.conf.search_interval())
def _log_init(self, id): LOG.set_log_id(id) LOG.set_log_level('info') log_file = self.env.log_dir() + "/" + id + ".log" LOG.set_log_file(log_file)
def _search_one_negative_word(self, user, page, word, url): if self.algorithm.match(word, page): LOG.info("find match: user: %s url:%s word:%s" % (user, url, word)) self.db.add_result(user['username'], url, self.conf.url(), word)