Пример #1
0
def speech_to_text(audio_file_path):
    data = None
    cnt = 0
    # if data is None HTTP Request failed,
    # The cause is probably because there is a limit of how many Request can be sent,
    # but since it looks random we'll just try again when it happens...
    while data is None and cnt < 5:
        data = _send_request(audio_file_path)
        cnt += 1
        if data is None:
            log.warning(f'try number {cnt} failed for {audio_file_path}')
            time.sleep(0.5)

    if data is None:
        log.warning(f'{audio_file_path} had an HTTP Request failure')
        return
    if data.get('RecognitionStatus') != 'Success':
        logwarning = f'{audio_file_path} has an empty transcript'
        if data.get('RecognitionStatus') is not None:
            logwarning += ', RecognitionStatus = ' + data.get(
                'RecognitionStatus')
        log.warning(logwarning)
        return

    transcript = data.get('NBest')[0].get('Display')
    log.info("Transcript: %s" % transcript)
    return transcript
Пример #2
0
 def start(self):
     """
     启动调度器和配置文件监视器
     """
     log.info('apollocrond start')
     self._ap_sched.start()
     self._notifier.loop()
Пример #3
0
 def start(self):
     """
     启动调度器和配置文件监视器
     """
     log.info('apollocrond start')
     self._ap_sched.start()
     self._notifier.loop()
Пример #4
0
 def refreshJobs(self):
     """
     purpose:计划任务随配置的变化而变化
     """
     self.host = getHostname()
     cron_jobs, status = parseCrontab(self._cronfile)
     if status:
         running_jobs = self._ap_sched.get_jobs()
         new_jobs = set(cron_jobs.keys()) - set(running_jobs.keys())
         old_jobs = set(running_jobs.keys()) - set(cron_jobs.keys())
         samename_jobs = set(cron_jobs.keys()) & set(running_jobs.keys())
         for key in old_jobs:
             self._ap_sched.unschedule_job(running_jobs[key])
         for key in new_jobs:
             self._ap_sched.add_job(key, cron_jobs[key]['name'], cron_jobs[key]['trigger'], self.execCmd, 
                                     args=cron_jobs[key]['args'], kwargs=None)
         for key in samename_jobs:
             trigger = None
             args = None
             new_name = None
             if str(cron_jobs[key]['trigger']) != str(running_jobs[key].trigger):
                 trigger = cron_jobs[key]['trigger']
             if cron_jobs[key]['args'] != running_jobs[key].args:
                 args = cron_jobs[key]['args']
             if cron_jobs[key]['name'] != running_jobs[key].name:
                 new_name = cron_jobs[key]['name']
             if trigger or args or new_name:
                 self._ap_sched.update_job(running_jobs[key], trigger, args, new_name)
                 log.info('job[%s] changed' % key)
Пример #5
0
 def process_IN_MODIFY(self, event):
     """
     当cron配置文件发生变化时作相应的更新
     """
     if os.path.realpath(os.path.abspath(self._crond._cronfile)) == event.pathname:
         log.info('crontab配置文件发生变化,重新载入')
         self._crond.refreshJobs()
Пример #6
0
def checkActiveTask():
    '''检查是否有任务需要执行,负责调配任务的执行
    '''
    global next_conf_check, task_list
    now = int(time()) 
    if now >= next_conf_check:
        global conf_last_change
        mtime = int(os.stat(DEFAULT['task_file_config']).st_mtime) + int(os.stat(DEFAULT['host_file_config']).st_mtime)
        if mtime != conf_last_change:
            log.info('任务和主机配置文件发生变化,重新载入配置文件')
            parseConf()
            conf_last_change = mtime
            log.info('当前任务数:%d,[%s]'%(len(task_list), ','.join([task['task_name'] for task in task_list])))
        next_conf_check += DEFAULT['config_check_delay']
    for task in task_list:
        try:
            if now >= task['nextcheck']:
                dt = threads.deferToThread(doTask, task) 
                
                #if 256 == int(task.get('return_type', 256)): 
                #    #dt.addCallback((lambda x: pass))
                #    continue
                #else:
                dt.addCallback(sender_manager.sendData)
                if task.has_key('task_interval'):
                    task['nextcheck'] += task['task_interval']
                elif task.has_key('task_time_specify'):
                    task['nextcheck'] += 3600*24
        except:
            continue
    reactor.callLater(1,checkActiveTask)
Пример #7
0
def checkActiveTask():
    '''检查是否有任务需要执行,负责调配任务的执行
    '''
    global next_conf_check, task_list
    now = int(time())
    if now >= next_conf_check:
        global conf_last_change
        mtime = int(os.stat(DEFAULT['task_file_config']).st_mtime) + int(
            os.stat(DEFAULT['host_file_config']).st_mtime)
        if mtime != conf_last_change:
            log.info('任务和主机配置文件发生变化,重新载入配置文件')
            parseConf()
            conf_last_change = mtime
            log.info('当前任务数:%d,[%s]' % (len(task_list), ','.join(
                [task['task_name'] for task in task_list])))
        next_conf_check += DEFAULT['config_check_delay']
    for task in task_list:
        try:
            if now >= task['nextcheck']:
                dt = threads.deferToThread(doTask, task)

                #if 256 == int(task.get('return_type', 256)):
                #    #dt.addCallback((lambda x: pass))
                #    continue
                #else:
                dt.addCallback(sender_manager.sendData)
                if task.has_key('task_interval'):
                    task['nextcheck'] += task['task_interval']
                elif task.has_key('task_time_specify'):
                    task['nextcheck'] += 3600 * 24
        except:
            continue
    reactor.callLater(1, checkActiveTask)
Пример #8
0
 def refreshJobs(self):
     """
     purpose:计划任务随配置的变化而变化
     """
     self.host = getHostname()
     cron_jobs, status = parseCrontab(self._cronfile)
     if status:
         running_jobs = self._ap_sched.get_jobs()
         new_jobs = set(cron_jobs.keys()) - set(running_jobs.keys())
         old_jobs = set(running_jobs.keys()) - set(cron_jobs.keys())
         samename_jobs = set(cron_jobs.keys()) & set(running_jobs.keys())
         for key in old_jobs:
             self._ap_sched.unschedule_job(running_jobs[key])
         for key in new_jobs:
             self._ap_sched.add_job(key,
                                    cron_jobs[key]['name'],
                                    cron_jobs[key]['trigger'],
                                    self.execCmd,
                                    args=cron_jobs[key]['args'],
                                    kwargs=None)
         for key in samename_jobs:
             trigger = None
             args = None
             new_name = None
             if str(cron_jobs[key]['trigger']) != str(
                     running_jobs[key].trigger):
                 trigger = cron_jobs[key]['trigger']
             if cron_jobs[key]['args'] != running_jobs[key].args:
                 args = cron_jobs[key]['args']
             if cron_jobs[key]['name'] != running_jobs[key].name:
                 new_name = cron_jobs[key]['name']
             if trigger or args or new_name:
                 self._ap_sched.update_job(running_jobs[key], trigger, args,
                                           new_name)
                 log.info('job[%s] changed' % key)
Пример #9
0
def find_5_results_per_percentage_step():
    accuracy_file_path = join(config.accuracy_dir(),
                              f'{config.provider()}_accuracy.txt')
    accuracy_info = load_accuracy_info(accuracy_file_path)

    interesting_results = {}

    log.info("Looking for some test data results...")

    for line in accuracy_info:
        file_name = line[:line.index('\t')]
        result = float(line[line.index('\t') + 1:].replace('\n', ''))

        original_text_file_path = join(config.clean_data_text_dir(), file_name)
        custom_text_file_path = join(config.provider_accuracy_dir(), file_name)

        original_words = extract_clean_words(
            open(original_text_file_path, 'r').read())
        custom_words = extract_clean_words(
            open(custom_text_file_path, 'r').read())

        for error_rate in range(0, 101, 5):
            if result == error_rate and len(
                [x for x in interesting_results.values() if x == result]) < 5:
                interesting_results.update({
                    f'{" ".join(original_words)}\n{" ".join(custom_words)}':
                    result
                })

    for k, v in sorted(interesting_results.items(), key=lambda x: x[1]):
        log.info(f'{v}% word error rate:\n{k}\n')
Пример #10
0
    def __op_new(self, data_loaded, connection):
        item_id = int(data_loaded['id'])
        item = UrlItem.load_with_content(
            id=item_id,file_path=config.path_judge_inbox)
        feature = self.__fe.extract_item(item)

        if 'decision' not in data_loaded.keys():
            decision, confidence = self.__auto_judge(feature)
            log.info("[%s]: [%s] # %s # %s%%" % (item_id, FeatureExtract.str_feature(feature), decision, confidence))
        else:
            decision, confidence = data_loaded['decision'],100
            log.info("[%s]: back from Extractor # %s # %s%%" % (item_id, decision, confidence))
            self.__relearn_clf(feature,decision)

        if confidence > config.const_CONFIDENCE_THRESHOLD:
            item['is_target'] = decision
            item.save()
            if int(item['is_target']) in [config.const_IS_TARGET_MULTIPLE, config.const_IS_TARGET_SIGNLE]:
                self.__send_to_extractor(item)
            else:
                os.remove(config.path_judge_inbox + "/%s" % item.filename())
        else:
            item['is_target'] = config.const_IS_TARGET_UNKNOW
            item.save()

            self.__judge_queue[item_id] = {
                "title": item['title'],
                "url": item['url'],
                "filename": item.filename(),
                "confidence": round(confidence,2),
                "decision": decision,
                "feature": feature
            }
        pass
Пример #11
0
    def __view_update(self, bot, job):
        channel_id = job if isinstance(job, str) else job.context
        result = self.__check_more_than_max_numbers_of_find_things()
        last_message = ''
        for find_thing in FIND_THINGS:
            if result[find_thing]:
                export_message = '--- {0} ---\n'.format(find_thing)
                count = 0
                for notice in result[find_thing]:
                    count += 1
                    export_message += '{title}(<a href="{url}">링크</a>)\n'\
                        .format(title=notice['title'], url=notice['url'])
                bot.sendMessage(channel_id,
                                text=export_message,
                                disable_notification=True,
                                parse_mode=ParseMode.HTML)

                last_message += '{0}({1}개)/'.format(find_thing, count)

        if last_message:
            message_text = last_message[:-1]
            bot.sendMessage(channel_id,
                            text=NEW_NOTICE + message_text,
                            parse_mode=ParseMode.HTML)
        else:
            message_text = '공지사항 없음'

        log.info(message_text)
Пример #12
0
def main():
    for audio_file_path in glob.glob(
            join(config.clean_data_audio_dir(), '*.wav')):
        spoken_text = speech_to_text(audio_file_path)
        clean_text = clean_up_text(spoken_text)
        flight_data = analyse_text(clean_text)
        log.info(flight_data)
    def __list_of_files(self,
                        dir_name,
                        dir_name_as_string,
                        count,
                        root_dir=False):
        if count > ZIP_DIR_DEPTH:
            return [], []
        list_of_file = os.listdir(dir_name)
        all_files_as_string = list()
        all_files = list()
        for entry in list_of_file:
            full_path = os.path.join(dir_name, entry)
            current_dir_name_as_string = os.path.basename(dir_name)

            if current_dir_name_as_string == "__MACOSX":
                log.info(f'Skipping __MACOSX folder')
                continue

            if not root_dir:
                new_dir_as_string = f'{dir_name_as_string}/{current_dir_name_as_string}'
            else:
                new_dir_as_string = ""

            if os.path.isdir(full_path):
                files, strings = self.__list_of_files(full_path,
                                                      new_dir_as_string,
                                                      count + 1)
                all_files = all_files + files
                all_files_as_string = all_files_as_string + strings
            elif os.path.isfile(full_path) and self.__check_file_type(
                    full_path):
                all_files.append(full_path)
                all_files_as_string.append(new_dir_as_string)
        return all_files, all_files_as_string
Пример #14
0
    def handle_thread_inner(self, thread_num):
        last_smpl = None
        begin = thread_num * self.num_frames / THREAD_NUM
        end = (thread_num + 1) * self.num_frames / THREAD_NUM
        for i in xrange(begin, end):
            if i == begin:
                current_frame = self.base_frame
            else:
                current_frame = self.create_frame(i, last_smpl)

            log.info('Fit frame {}'.format(i))
            # re-init if necessary
            self.reinit_frame(current_frame, self.prior_data['mean'],
                              self.nohands, self.debug_rn)
            # final fit
            self.fit_pose(current_frame, last_smpl, self.frustum, self.nohands,
                          self.debug_rn)

            self.temp_poses_dset[i] = current_frame.smpl.pose.r
            self.temp_trans_dset[i] = current_frame.smpl.trans.r

            if i == begin:
                self.temp_betas_dset[:] = current_frame.smpl.betas.r

            last_smpl = current_frame.smpl
Пример #15
0
 def process_IN_MODIFY(self, event):
     """
     当cron配置文件发生变化时作相应的更新
     """
     if os.path.realpath(os.path.abspath(
             self._crond._cronfile)) == event.pathname:
         log.info('crontab配置文件发生变化,重新载入')
         self._crond.refreshJobs()
Пример #16
0
 def stringReceived(self, data):
     if data.startswith('PING'):
         info = data.split(":", 2)
         self.sendString('PINGBACK:%s' % info[1])
         #log.info('收到%s的心跳包'%self._peer)
         return
     log.info('收到%s发回的数据' % self._peer)
     self.service.dealData(data)
Пример #17
0
 def stringReceived(self, data):
     if data.startswith('PING'):
         info = data.split(":", 2)
         self.sendString('PINGBACK:%s' % info[1])
         #log.info('收到%s的心跳包'%self._peer)
         return
     log.info('收到%s发回的数据'%self._peer)
     self.service.dealData(data)
Пример #18
0
def convert_rml_and_determine_accuracy(suffix="", override=False):
    log.info(suffix)
    convert_rml(config.language_understanding_result_dir(suffix='_RML' +
                                                         suffix),
                suffix=suffix,
                override=override)

    # Run LUIS Accuracy Check
    determine_accuracy(suffix="_RML" + suffix)
Пример #19
0
 def command_close(self, bot, update, args):
     if len(args) < 1:
         return bot.sendMessage(update.message.chat_id, text=TEXT_NOT_INPUT)
     elif args[0] in FIND_THINGS:
         self.db.set_enable(args[0], False)
         log.info('[/close] {0} {1} '.format(args[0],
                                             TEXT_DONE.format(False)))
         return bot.sendMessage(update.message.chat_id,
                                text=TEXT_DONE.format(False))
Пример #20
0
def checkConfUpdate():
    global conf_last_change
    cur = int(os.stat(DEFAULT['realtime_task_config']).st_mtime)
    if cur != conf_last_change:
        log.info('listener configuration has changed,reloading...')
        parseConf()
        conf_last_change = cur
        
    reactor.callLater(DEFAULT['config_check_delay'],checkConfUpdate)
Пример #21
0
def printWERThreshold(accuracy_map):
    h = sorted(accuracy_map.values())
    log.info("\nWER (%)" + "\n0        %d" % (len([x for x in h if x == 0])) +
             "\n>0-20    %d" % (len([x for x in h if x > 0 and x <= 20])) +
             "\n>20-40   %d" % (len([x for x in h if x > 20 and x <= 40])) +
             "\n>40-60   %d" % (len([x for x in h if x > 40 and x <= 60])) +
             "\n>60-80   %d" % (len([x for x in h if x > 60 and x <= 80])) +
             "\n>80-100  %d" % (len([x for x in h if x > 80 and x <= 100])) +
             "\n>80      %d" % (len([x for x in h if x > 80])))
Пример #22
0
    def run(self):

        # get betas from 5 frames
        log.info('Initial fit')

        # num_init = 5
        # indices_init = np.ceil(np.arange(num_init) * self.num_frames * 1. / num_init).astype(np.int)
        #
        # init_frames = [self.base_frame]
        # for i in indices_init[1:]:
        #     init_frames.append(self.create_frame(i, self.base_smpl))
        #
        # init(init_frames, self.body_height, self.b2m, self.debug_rn)

        # fp = h5py.File(self.out, 'w')
        # self.poses_dset = fp.create_dataset("pose", (self.num_frames, 72), 'f', chunks=True, compression="lzf")
        # self.trans_dset = fp.create_dataset("trans", (self.num_frames, 3), 'f', chunks=True, compression="lzf")
        # self.betas_dset = fp.create_dataset("betas", (10,), 'f', chunks=True, compression="lzf")

        # pool = multiprocessing.Pool(THREAD_NUM)
        # pool.map(handle_thread, [(self, i) for i in range(THREAD_NUM)])
        # pool.join()
        ps = []
        for i in range(THREAD_NUM):
            p = Process(target=self.handle_thread_inner, args=(i, ))
            p.start()
            ps.append(p)
        for p in ps:
            p.join()
        log.info('Done.')

        log.info('Write hdf5 begin')
        with h5py.File(self.out, 'w') as fp:
            poses_dset = fp.create_dataset("pose", (self.num_frames, 72),
                                           'f',
                                           chunks=True,
                                           compression="lzf")
            trans_dset = fp.create_dataset("trans", (self.num_frames, 3),
                                           'f',
                                           chunks=True,
                                           compression="lzf")
            betas_dset = fp.create_dataset("betas", (10, ),
                                           'f',
                                           chunks=True,
                                           compression="lzf")
            for i in range(self.num_frames):
                log.info('writing frame {}'.format(i))
                # assert self.temp_poses_dset[i] is not None
                # assert self.temp_trans_dset[i] is not None
                # log.info('frame poses info: {}'.format(self.temp_poses_dset[i]))
                # log.info('frame trans info: {}'.format(self.temp_trans_dset[i]))
                poses_dset[i] = self.temp_poses_dset[i]
                trans_dset[i] = self.temp_trans_dset[i]
            betas_dset[:] = self.temp_betas_dset[:]

        log.info('Finally done!!')
Пример #23
0
 def _create_window(self):
     if self.window:
         del self.window
     if self.is_login:
         log.info('Token {} found, show main window'.format(
             type(self.is_login)))
         return MainWindow(self)
     else:
         log.info('No token found, show login')
         return LoginWindow(self)
Пример #24
0
 def test_insert(self):
     mon = StockMonitor(stock_id="sz000068",
                        monitoring_type="top",
                        threshold_value="top",
                        status="0",
                        user_id="default")
     db.session.add(mon)
     db.session.commit()
     mons = StockMonitor.query.all()
     log.info(json.dumps(mons, cls=AutoJSONEncoder, indent=2))
Пример #25
0
 def command_setting(self, bot, update, args):
     if len(args) < 1:
         return bot.sendMessage(update.message.chat_id, text=TEXT_NOT_INPUT)
     elif args[0] in FIND_THINGS:
         enable = not self.db.get_enable(args[0])
         self.db.set_enable(args[0], enable)
         log.info('[/setting] {0} {1} '.format(args[0],
                                               TEXT_DONE.format(enable)))
         return bot.sendMessage(update.message.chat_id,
                                text=TEXT_DONE.format(enable))
Пример #26
0
 def command_set(self, bot, update, job_queue):
     channel_id = CHANNEL_ID
     when = FREQUENCY.split(' ')
     self.set_alarms(channel_id, when, job_queue)
     text = ''
     for w in when:
         text += '{0}시, '.format(w)
     log.info('[/set] {0}'.format(SET_ALARM.format(text[:-2])))
     return bot.sendMessage(update.message.chat_id,
                            text=SET_ALARM.format(text[:-2]))
Пример #27
0
def start_cluster_generation_thread(data, settings) -> Tuple[str, List, List]:
    log.info('Starting request: {0}'.format(threading.active_count()))
    cluster_generation_lock.acquire()
    log.info('Enter cluster generation: ID {0}'.format(
        threading.current_thread().ident))
    try:
        c = cached_cluster(data, settings)
    finally:
        cluster_generation_lock.release()
    return c
 def __init__(self, language):
     log.info('Setup NLTK')
     nltk.download('punkt', quiet=True)
     nltk.download('averaged_perceptron_tagger', quiet=True)
     nltk.download('maxent_ne_chunker', quiet=True)
     nltk.download('words', quiet=True)
     nltk.download('wordnet', quiet=True)
     nltk.download('stopwords', quiet=True)
     self.language = language
     self.bigrams = False
Пример #29
0
    def test_best_plan(self):
        grid = Grid()
        grid.init_generate_grid()

        grid.show_detail()
        result = grid.swap_by_strategy(StrategyType.HIGH_ORDER_FIRST)

        if len(result) == 0:
            log.error('no swap')
        for i in result:
            log.info('swap %s' % i)
Пример #30
0
def prepare_clustered_data_structure(data_handler, k_cluster) -> List[RestDisplayStructure]:
    log.info(f'Generating Prediction')
    result = [RestDisplayStructure(label, meta_info, term, cluster_id)
              for label, meta_info, term, cluster_id in
              zip(data_handler.display_labels(),
                  data_handler.meta_info(),
                  remove_rare_terms(k_cluster.get_terms()),
                  k_cluster.get_cluster_id())]

    if data_handler.SHUFFLE_DATA:
        random.shuffle(result)
    return result
Пример #31
0
 def stringReceived(self,data):
     """接受心跳包回复 """
     if data.startswith('PINGBACK'):
         suretime = data.split(":", 2)[1]
         deltatime = time() - self._heartsendtime
         if int(suretime) != self._htime or deltatime > self.factory._connectiontimeout:
             return 
         log.info('收到心跳回复,时间差:%f' % deltatime)
         if self.factory._paused:
                 self.factory._paused = False
                 self.sendQueue()
         self._heartreceivetime = time()    
Пример #32
0
    def show_soldier(self):
        result = []
        for (ref_id, order, level) in self.soldiers:
            result.append(self.soldiers[(ref_id, order, level)])
            # log.info('%s' % self.soldiers[(ref_id, order, level)])

        result = sorted(result, key=lambda soldiers: soldiers.order.value, reverse=True)
        count = 0
        for soldier in result:
            log.info('%s' % soldier)
            count += soldier.count
        log.info('count=%s' % count)
Пример #33
0
def generate_cluster(selected_data, settings) -> Tuple[str, List, List]:
    data_handler = initialize_data(selected_data, settings)

    pre_load_uuid = data_handler.PRE_LOAD_UUID

    if data_handler.HAS_MULTIPLE_DATA_SOURCES:
        selected_data = data_handler.DATA_SOURCE

    if pre_load_uuid and storage_io.is_model_present(pre_load_uuid, selected_data):
        log.info(f'Preload model: {pre_load_uuid}')
        return load_cluster(data_handler.PRE_LOAD_UUID, "", selected_data, settings, data_handler=data_handler)

    return generate_k_means(data_handler, selected_data)
Пример #34
0
 def __get_var(self, var):
     if not self.__config_file:
         try:
             with open(self.__application_config_path, 'r') as stream:
                 self.__config_file = yaml.safe_load(stream)
                 log.info("Config Loaded")
         except FileNotFoundError:
             log.info("Config not found, using ENV Var")
             return os.environ.get(var)
     try:
         return os.environ.get(var) or self.__config_file[var]
     except KeyError:
         log.error('Can not find ENV var: %s' % var)
Пример #35
0
 def connectionMade(self):
     """当连接建立后启动心跳机制 """
     self._peer = self.transport.getPeer()
     log.info('connect to %s successfully' % self._peer)
     self.factory._paused = False
     self._connected = True
     self._heartsendtime = 0
     self._heartreceivetime = 0
     self.factory.resetDelay()
     self.startHeartbeat()
     self.factory._connectionMade.callback(self)
     self.factory._connectionMade = Deferred()
     self.resumeQueue()
Пример #36
0
def _convert_audio_with_normalisation(audio_file_path, speed):
    suffix = f'_speed-{speed}-normal'
    file_name = _add_suffix(audio_file_path, suffix)
    output_path = os.path.join(AUDIO_DIR(suffix), file_name)

    if os.path.exists(output_path):
        log.info(
            f'already exists... skipping:\t {file_name}-{speed}-normalisation')
        return

    log.info(f'Convert {file_name} with 1.1 normalisation')
    os.system(
        f'ffmpeg -i {audio_file_path}  -filter:a "atempo={speed}" {output_path}'
    )
Пример #37
0
    def __op_new(self, data_loaded, connection):
        item_id = int(data_loaded['id'])
        item = UrlItem.load_with_content(
            id=item_id,file_path=config.path_extractor_inbox)

        count, maps = db.get_url_with_same_layout_hash(item['layout_hash'])
        log.info(str(maps))
        log.info(count)
        if len(maps) > 0:
            import operator

            tar_ext = max(maps.iteritems(), key=operator.itemgetter(1))
            log.info(float(tar_ext[1]) / len(maps))
            if tar_ext[1] > config.extractor_same_layout_number:
                extractor = tool.str2extractor(tar_ext[0])
                self.__extract(item,extractor)
                return

        extractor = config.const_RULE_UNKNOW

        self.__ext_queue[item_id] = {
            "title": item['title'],
            "url": item['url'],
            "filename": item.filename(),
            "decision": item['is_target'],
            "extractor": extractor
        }

        log.info("[%s]: # %s " % (item_id, extractor))

        pass
Пример #38
0
 def __op_preview(self, data_loaded, connection):
     log.info(data_loaded['extractor'])
     if data_loaded['extractor'] == config.const_RULE_UNKNOW:
         result = {x: "" for x in xrange(1, self.__ie.num_attr() + 1)}
     else:
         item_id = int(data_loaded['id'])
         item = UrlItem.load_with_content(
             item_id, file_path=config.path_extractor_inbox)
         extractor = data_loaded['extractor']
         result = self.__ie.extract(item, extractor)
     preview = list()
     for att, str in result.iteritems():
         preview.insert(att, dict(name=self.__ie.name(att), value=str))
     log.info(preview)
     tool.send_msg(connection, pickle.dumps(preview, -1))
     pass
 def stringReceived(self, data):
     commands = data.split(None, 1)
     command_name = commands[0]
     args = ''
     if len(commands) > 1:
         args = commands[1]
     command = task_list.get(command_name.strip())
     do_cmd = '%s %s' % (command, args)
     #实时任务未在任务白名单中
     if command is None:
         result = {'s': -1, 'o': 'invalid command'} 
         log.error('不合法的远程请求:%s'%data)
         self.connectionEnd(result)
     else:
         log.info('远程请求:%s'%(do_cmd))
         #异步执行实时任务
         defer = self.factory.getStatusOutput(do_cmd)
         defer.addCallback(self.action)
         defer.addErrback(self.errorAction)
Пример #40
0
    def __extract(self,item,extractor):
        result = self.__ie.extract(item, extractor)
        log.info(extractor)
        log.info(result)
        info = dict()
        for att, str in result.iteritems():
            info[self.__ie.db_col(att)] = str[0:self.__ie.max_len(att)]
        log.info(info)
        db.new_sem_with_map(item['id'], info)
        item['extractor'] = extractor
        item.save()

        os.remove(config.path_extractor_inbox + "/%s" % item.filename())
Пример #41
0
 def execCmd(self, cmd, ctid):
     """crontab 的实际执行过程""" 
     pscmd = "ps -A -opid,ppid,state,user,cmd|grep '\-\-ctid %s'|grep -v 'grep '" % ctid
     rets, retv = getstatusoutput(pscmd) 
     retv = retv.strip()
     log.info('ps 结果: \n%s' % retv)
     if retv:
         log.info('进程还在执行')
         return {'seq': -1, 
                 'exec_time': 0, 
                 'stats': 0, 
                 'output': '检测到相同任务正在执行,*未被调度*',
                 }
     start = time.time()
     status, output = getstatusoutput(cmd)
     log.info('EXEC %s: %d' % (cmd, status))
     if len(output) > 60000:
         output = '===large output===\n...%s' % output.decode('utf-8')[-10240:].encode('utf-8')
     end = time.time()
     exec_time = end - start
     return {'seq': 3, 'exec_time': '%.3f' % exec_time, 'stats': status, 'output': output}
Пример #42
0
 def __op_refresh(self,data_loaded,connection):
     delete_ids = []
     for key, ent in self.__ext_queue.iteritems():
         item_id = int(key)
         if not os.path.isfile(
                 config.path_extractor_inbox+"/"+str(item_id)+".html"):
             delete_ids.append(item_id)
             continue
         item = UrlItem.load_with_content(
             id=item_id,file_path=config.path_extractor_inbox)
         count, maps = db.get_url_with_same_layout_hash(item['layout_hash'])
         log.info(str(maps))
         log.info(count)
         if len(maps) > 0:
             import operator
             tar_ext = max(maps.iteritems(), key=operator.itemgetter(1))
             log.info(float(tar_ext[1]) / len(maps))
             if tar_ext[1] > config.extractor_same_layout_number:
                 extractor = tool.str2extractor(tar_ext[0])
                 self.__extract(item,extractor)
                 delete_ids.append(item_id)
     for ent_id in delete_ids:
         del self.__ext_queue[ent_id]
Пример #43
0
            parseConf()
            conf_last_change = mtime
            log.info('当前任务数:%d,[%s]'%(len(task_list), ','.join([task['task_name'] for task in task_list])))
        next_conf_check += DEFAULT['config_check_delay']
    for task in task_list:
        try:
            if now >= task['nextcheck']:
                dt = threads.deferToThread(doTask, task) 
                
                #if 256 == int(task.get('return_type', 256)): 
                #    #dt.addCallback((lambda x: pass))
                #    continue
                #else:
                dt.addCallback(sender_manager.sendData)
                if task.has_key('task_interval'):
                    task['nextcheck'] += task['task_interval']
                elif task.has_key('task_time_specify'):
                    task['nextcheck'] += 3600*24
        except:
            continue
    reactor.callLater(1,checkActiveTask)

log.setOwn('apollocollecter')
log.info('apollocollecter程序启动,读取任务和主机配置文件')
parseConf()
conf_last_change = int(os.stat(DEFAULT['task_file_config']).st_mtime) + int(os.stat(DEFAULT['host_file_config']).st_mtime)
log.info('当前任务数:%d,[%s]'%(len(task_list), ','.join([task['task_name'] for task in task_list])))
reactor.callLater(1,checkActiveTask)
log.info('apollo agent starting...')
reactor.run() 
Пример #44
0
__author__ = 'LeoDong'

import socket

import sys

from util import config
from judge.SAEJudge import SAEJudge
from util.logger import log

#TODO unique id in queue, store to file and reload.
# Create a TCP/IP socket
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
log.info('start listening on %s' % str(config.socket_addr_judge))
sock.bind(config.socket_addr_judge)
# Listen for incoming connections
sock.listen(10)
judge = SAEJudge(config.path_judge_dtree,config.dtree_param)
try:
    while True:
        # Wait for a connection
        connection, client_address = sock.accept()
        judge.process(connection, client_address)
finally:
    log.info("Saving")
    judge.save()
        log.error('配置文件格式错误[%s]'%str(e))
        return
    global task_list
    task_list = {} 
    for sec in task_cfg.sections():
        items = task_cfg.items(sec)
        key = None
        value = None
        for k, v in items:
            if 'task_name' == k and v.strip != '':
                key = v
            elif 'task_content' == k and v.strip != '':
                value = v
        if key and value:
            task_list[key] = value
    log.info('realtime task:[%s]' % ','.join(task_list.keys()))

def checkConfUpdate():
    global conf_last_change
    cur = int(os.stat(DEFAULT['realtime_task_config']).st_mtime)
    if cur != conf_last_change:
        log.info('listener configuration has changed,reloading...')
        parseConf()
        conf_last_change = cur
        
    reactor.callLater(DEFAULT['config_check_delay'],checkConfUpdate)

class AgentListenerProtocol(NetstringReceiver):
    def connectionMade(self):
        self._peer = self.transport.getPeer()
        log.info('收到来自%s的连接'%self._peer)
 def connectionLost(self, reason):
     log.info('%s断开连接' % self._peer)
 def connectionMade(self):
     self._peer = self.transport.getPeer()
     log.info('收到来自%s的连接'%self._peer)
Пример #48
0
    def dealJobEvent(self, event):
        """
        处理调度过程中产生的事件
         - event : 事件的相关信息对象
        """

        def_ret = {
                    'jn'    : self._ap_sched._jobs_num,
                    'host_name'  : self.host,
                    }
        if EVENT_JOB_STARTED == event.code or EVENT_JOB_MISSED == event.code:
            log.info('[%s] %s, 任务数 %d' % (event.job.name, self.code_string[event.code], def_ret['jn']))
            ret = {
                    'cid'   : event.job.key,
                    'name'  : event.job.name,
                    'seq'   : 1 if EVENT_JOB_STARTED == event.code else -1,
                    'stats' : 0,
                    'exec_time' : 0,
                    'stime' : str(event.scheduled_run_time),
                    'output': self.code_string[event.code],
                  } 
            ret.update(def_ret)
            info = {    
                    'req_type': 'RESPONSE',
                    'res_type': 'CRON',
                    'retval'  : json.dumps(ret),
                   }
            self.sendMessage(json.dumps(info))
        elif EVENT_JOB_EXECUTED == event.code:
            retval = event.retval    
            if -1 == retval['seq']:
                log.info('[%s]异常,未被调度' % event.job.name)
            else:
                log.info('[%s] 执行完成, 任务数 %d' % (event.job.name, def_ret['jn']))
                
            retval.update(def_ret)
            retval.update(name=event.job.name, cid = event.job.key, stime=str(event.scheduled_run_time))
            info = {
                    'req_type': 'RESPONSE',
                    'res_type': 'CRON',
                    'retval': json.dumps(retval),
                   }
            self.sendMessage(json.dumps(info))
        elif EVENT_JOB_MAXINSTANCE == event.code:
            log.info('job[%s] reach max instance, 不被调度' % event.job.name)
        elif EVENT_JOBSTORE_JOB_ADDED == event.code:
            log.info('job[%s] added, 任务数 %d' % (event.job.name, def_ret['jn']))
        elif EVENT_JOBSTORE_JOB_REMOVED == event.code:
            log.info('job[%s] removed, 任务数 %d' % (event.job.name, def_ret['jn']))
        elif EVENT_JOB_ERROR == event.code:
            log.info('job[%s] error:%s' % (event.job.name,event.exception))
Пример #49
0
    
    def updateBeat(self, hostname):
        self.factory.beats[hostname] = time()


class ApolloReceiverFactory(protocol.ServerFactory):
    """实现心跳检测"""
    protocol = ApolloReceiverProtocol
    def __init__(self, serve):
        self._clientcount = 0
        self.beats = {}

    def checkBeats(self):
        """检测每个连到服务器端的client的心跳状态是否正常"""
        
        now = time()
        for host, last in self.beats:
            time_diff = now - last 
    
from twisted.internet import epollreactor
epollreactor.install()
from twisted.internet import reactor

log.setOwn('apolloreceiver')
service = ApolloService()
factory = ApolloReceiverFactory(service)
reactor.listenTCP(DEFAULT['receiver_port'], factory)
log.info('apolloserver starting...')
reactor.run()

Пример #50
0
 def connectionMade(self):
     self.factory._clientcount += 1
     self._peer = self.transport.getPeer()
     log.info('%s connected, 当前连接数: %d' % (self._peer, self.factory._clientcount))
Пример #51
0
 def connectionLost(self, reason):
     self.factory._clientcount -= 1
     log.info('%s lost connection, 当前连接数: %d' % (self._peer, self.factory._clientcount))