Пример #1
0
    def _archive(self):
        archive_interval = timedelta(days=get_config('generic', 'archive'))
        cut_time = (datetime.now() - archive_interval).date()
        cut_time = cut_time.replace(day=1)

        # Format:
        # { 2020: { 12: [(directory, uuid)] } }
        to_archive: Dict[int, Dict[int, List[Path]]] = defaultdict(lambda: defaultdict(list))
        for capture_uuid in get_captures_dir().glob('**/uuid'):
            timestamp = datetime.strptime(capture_uuid.parent.name, '%Y-%m-%dT%H:%M:%S.%f')
            if timestamp.date() >= cut_time:
                continue
            to_archive[timestamp.year][timestamp.month].append(capture_uuid.parent)
            self.logger.info(f'Archiving {capture_uuid.parent}.')

        if not to_archive:
            self.logger.info('Nothing to archive.')
            return

        p = self.redis.pipeline()
        for year, month_captures in to_archive.items():
            for month, captures in month_captures.items():
                dest_dir = self.archived_captures_dir / str(year) / f'{month:02}'
                dest_dir.mkdir(parents=True, exist_ok=True)
                for capture_path in captures:
                    p.delete(str(capture_path))
                    capture_path.rename(dest_dir / capture_path.name)
        p.execute()

        # Clear empty

        self.logger.info('Archiving done.')
Пример #2
0
 def __init__(self, loglevel: int = logging.INFO):
     super().__init__(loglevel)
     self.lookyloo = Lookyloo()
     self.script_name = 'async_capture'
     self.only_global_lookups: bool = get_config('generic',
                                                 'only_global_lookups')
     self.capture_dir: Path = get_captures_dir()
     self.splash_url: str = get_splash_url()
     self.redis = Redis(unix_socket_path=get_socket_path('cache'),
                        decode_responses=True)
Пример #3
0
    def __init__(self, loglevel: int = logging.INFO):
        super().__init__(loglevel)
        self.script_name = 'async_capture'
        self.only_global_lookups: bool = get_config('generic',
                                                    'only_global_lookups')
        self.capture_dir: Path = get_captures_dir()
        self.user_agents = UserAgents()

        self.fox = FOX(get_config('modules', 'FOX'))
        if not self.fox.available:
            self.logger.warning('Unable to setup the FOX module')
Пример #4
0
    def _update_all_capture_indexes(self):
        '''Run that after the captures are in the proper directories'''
        # Recent captures
        directories_to_index = set(capture_dir.parent.parent for capture_dir in get_captures_dir().glob('**/uuid'))
        for directory_to_index in directories_to_index:
            self._update_index(directory_to_index)

        # Archived captures
        directories_to_index = set(capture_dir.parent.parent for capture_dir in self.archived_captures_dir.glob('**/uuid'))
        for directory_to_index in directories_to_index:
            self._update_index(directory_to_index)
Пример #5
0
def rename_captures():
    r = Redis(unix_socket_path=get_socket_path('cache'))
    capture_dir: Path = get_captures_dir()
    for uuid_path in capture_dir.glob('*/uuid'):
        with uuid_path.open() as f:
            uuid = f.read()
            dir_key = r.hget('lookup_dirs', uuid)
            if dir_key:
                r.hdel('lookup_dirs', uuid)
                r.delete(dir_key)
        timestamp = datetime.strptime(uuid_path.parent.name,
                                      '%Y-%m-%dT%H:%M:%S.%f')
        dest_dir = capture_dir / str(timestamp.year) / f'{timestamp.month:02}'
        safe_create_dir(dest_dir)
        uuid_path.parent.rename(dest_dir / uuid_path.parent.name)
Пример #6
0
    def _load_indexes(self):
        # Initialize archives
        for index in get_captures_dir().glob('**/index'):
            with index.open('r') as _f:
                recent_uuids: Dict[str, str] = {uuid: str(index.parent / dirname) for uuid, dirname in csv.reader(_f) if (index.parent / dirname).exists()}
            if recent_uuids:
                self.redis.hmset('lookup_dirs', recent_uuids)  # type: ignore
            else:
                index.unlink()

        # Initialize archives
        for index in self.archived_captures_dir.glob('**/index'):
            with index.open('r') as _f:
                archived_uuids: Dict[str, str] = {uuid: str(index.parent / dirname) for uuid, dirname in csv.reader(_f) if (index.parent / dirname).exists()}
            if archived_uuids:
                self.redis.hmset('lookup_dirs_archived', archived_uuids)  # type: ignore
            else:
                index.unlink()