def crawl_result(result, force=False, commit=True): """crawl_results.""" if not result.crawlable: return result now = datetime.datetime.now() if (not force) and (now - result.updated_at).total_seconds() < 4: return result # if log file is not updated, not necessary to get log contents is_updated = _check_log_updated(result) crawled_result = crawl_result_path(result.path_name, is_updated) if is_updated: current_log_idx = len(result.logs) if len(crawled_result['logs']) < current_log_idx: current_log_idx = 0 result.logs = [] result.args = None for log in crawled_result['logs'][current_log_idx:]: result.logs.append(Log(log)) if result.args is None: result.args = Argument(json.dumps(crawled_result['args'])) if result.name is None: _update_to_default_name(result) # commands list includes new commands and already registered commands. # registered commands can be get response, so need to update current_cmd_idx = len(result.commands) if len(crawled_result['commands']) < current_cmd_idx: current_cmd_idx = 0 result.commands = [] result.snapshots = [] for cmd in crawled_result['commands'][current_cmd_idx:]: result.commands.append(Command(**cmd)) for i, cmd in enumerate(crawled_result['commands'][:current_cmd_idx]): result.commands[i].update(cmd.get('response', None)) # snapshots file list are sorted but not natural order, for example, # 'iter_900' set latter than 'iter_1000', so need to check the file # is registered or not. registered_snapshot_keys = [ss.iteration for ss in result.snapshots] for i, snapshot in enumerate(crawled_result['snapshots']): number_str = snapshot.split('snapshot_iter_')[1] if not is_numberable(number_str): continue number = int(number_str) if number in registered_snapshot_keys: continue result.snapshots.append(Snapshot(snapshot, number)) result.updated_at = datetime.datetime.now() if commit: db.session.commit() return result
def crawl_result(result_id, force=False): """crawl_results.""" current_result = DB_SESSION.query(Result).filter_by(id=result_id).first() now = datetime.datetime.now() if (not force) and (now - current_result.updated_at).total_seconds() < 4: return current_result # if log file is not updated, not necessary to get log contents is_updated = _check_log_updated(current_result) crawled_result = crawl_result_path(current_result.path_name, is_updated) if is_updated: current_log_idx = len(current_result.logs) if len(crawled_result['logs']) < current_log_idx: current_log_idx = 0 current_result.logs = [] current_result.args = None for log in crawled_result['logs'][current_log_idx:]: current_result.logs.append(Log(log)) if current_result.args is None: current_result.args = Argument(json.dumps(crawled_result['args'])) current_result.commands = [] current_result.snapshots = [] for cmd in crawled_result['commands']: current_result.commands.append(cmd.to_model()) for snapshot in crawled_result['snapshots']: number_str = snapshot.split('snapshot_iter_')[1] if is_numberable(number_str): current_result.snapshots.append(Snapshot(snapshot, int(number_str))) current_result.updated_at = datetime.datetime.now() DB_SESSION.commit() return current_result
def crawl_result(result_id, force=None): """crawl_results.""" current_result = DB_SESSION.query(Result).filter_by(id=result_id).first() now = datetime.datetime.now() if force is None and (now - current_result.updated_at).total_seconds() < 4: return current_result crawled_result = crawl_result_path(current_result.path_name) need_reset = len(crawled_result['logs']) < len(current_result.logs) if need_reset: current_result.logs = [] current_result.args = None current_result.commands = [] current_result.snapshots = [] for log in crawled_result['logs'][len(current_result.logs):]: current_result.logs.append(Log(json.dumps(log))) if current_result.args is None: current_result.args = Argument(json.dumps(crawled_result['args'])) for cmd in crawled_result['commands'][len(current_result.commands):]: current_result.commands.append(cmd.to_model()) for snapshot in crawled_result['snapshots'][len(current_result.snapshots ):]: number_str = snapshot.split('snapshot_iter_')[1] if is_numberable(number_str): current_result.snapshots.append(Snapshot(snapshot, int(number_str))) current_result.updated_at = datetime.datetime.now() DB_SESSION.commit() return current_result
def crawl_result(result, force=False, commit=True): """crawl_results.""" now = datetime.datetime.now() if (not force) and (now - result.updated_at).total_seconds() < 4: return result # if log file is not updated, not necessary to get log contents is_updated = _check_log_updated(result) crawled_result = crawl_result_path(result.path_name, is_updated) if is_updated: current_log_idx = len(result.logs) if len(crawled_result['logs']) < current_log_idx: current_log_idx = 0 result.logs = [] result.args = None for log in crawled_result['logs'][current_log_idx:]: result.logs.append(Log(log)) if result.args is None: result.args = Argument(json.dumps(crawled_result['args'])) result.commands = [] result.snapshots = [] for cmd in crawled_result['commands']: result.commands.append(Command(**cmd)) for snapshot in crawled_result['snapshots']: number_str = snapshot.split('snapshot_iter_')[1] if is_numberable(number_str): result.snapshots.append(Snapshot(snapshot, int(number_str))) result.updated_at = datetime.datetime.now() if commit: db.session.commit() return result
def test_is_numberable(self): assert is_numberable('12345') assert not is_numberable('abcde')