class Resource(): def __init__(self, db): self.db = db self.findex = Findex(db=self.db) @data_strap def overview(self, env): data = { "resources": self.findex.get_resource_objects() } return jinja2_template('main/resources_overview', env=env, data=data)
def __init__(self, cfg, db): self.cfg = cfg self.db = db self.findex = Findex(db=self.db)
class Browse(): def __init__(self, cfg, db): self.cfg = cfg self.db = db self.findex = Findex(db=self.db) @data_strap def hosts(self, env): data = {'hosts': self.db.query(Resources).list()} return jinja2_template('main/browse_hosts', env=env, data=data) @data_strap def browse(self, path, env): env['time_pageload'] = datetime.now() try: browser = Browser(db=self.db, findex=self.findex, path=path) browser.fetch_files() browser.prepare_files(env=env) data = { 'files': browser.files, 'breadcrumbs': browser.breadcrumbs(), 'action_fetches': browser.generate_action_fetches(), 'env': browser.data } env['time_pageload'] = (datetime.now() - env['time_pageload']).total_seconds() return jinja2_template('main/browse_dir', env=env, data=data) except HTTPResponse as resp: return resp except Exception as ex: print str(ex) return jinja2_template('main/error', env=env, data={'error': 'no files were found'}) @data_strap def goto(self, path, env): try: uid = int(path) f = self.findex.get_files_objects(id=uid) if not f: raise Exception() f = f[0] h = self.db.query(Resources).filter_by( id=f.resource_id ).first() if f and h: data = { 'file': f, 'host': h } else: raise Exception() return jinja2_template('main/browse_goto', env=env, data=data) except Exception as ex: return 'error :( we could always stay here'
class Searcher: def __init__(self, cfg, db, env): self.cfg = cfg self.db = db self.env = env self.findex = Findex(self.db) def _key_check(self, keyword): if isinstance(keyword, dict): if not "key" in keyword or not keyword["key"]: raise SearchException("Search query must contain 4 characters or more") keyword = keyword["key"][0] block = ["-", ",", "+", "_", "%"] for b in block: keyword = keyword.replace(b, " ") if len(keyword) < 4: raise SearchException("Search query must contain 4 characters or more") return keyword def search(self, vars): val = self._key_check(vars) val = val.lower() filtered = False start_dbtime = datetime.now() # to-do: move this to API (or make api.py use this class) q = self.db.query(Files) # if this is later set with Files.<column_name>, it will be sorted on this. sort = "" sdata = {"protocols": [], "hosts": [], "exts": [], "cats": [], "fsize": 0} if "protocols" in vars: protocols = [z.lower() for z in vars["protocols"]] plookup = {"ftp": 0, "http": 1, "smb": 2} protocols_ids = [] if isinstance(protocols, list): protocols = [z.lower() for z in protocols] for p in protocols: if p in plookup and not plookup[p] in protocols_ids: protocols_ids.append(plookup[p]) if protocols_ids: sdata["protocols"] = protocols_ids else: sdata["protocols"] = [0, 1, 2] if "hosts" in vars: dhosts = vars["hosts"] if isinstance(dhosts, list): if not dhosts[0] == "*": host_ids = [] for host in dhosts: host_results = ( self.db.query(Resources) .filter(Resources.address == host) .filter(Resources.protocol.in_(sdata["protocols"])) .all() ) for host_result in host_results: host_ids.append(host_result.id) if host_ids: sdata["hosts"] = host_ids else: raise SearchException("Could not find any host entries for specified host(s)") if sdata["hosts"]: q = q.filter(Files.resource_id.in_(sdata["hosts"])) filtered = True if "cats" in vars: clookup = {"unknown": 0, "documents": 1, "movies": 2, "music": 3, "pictures": 4} dformats = [] for cat in [z.lower() for z in vars["cats"]]: if cat in clookup: dformats.append(clookup[cat]) else: dformats.append(int(cat)) if isinstance(dformats, list): q = q.filter(Files.file_format.in_(dformats)) for dformat in dformats: sdata["cats"].append(dformat) else: sdata["cats"] = [0, 1, 2, 3, 4] for i in [0, 1, 2, 3, 4]: if not i in sdata["cats"]: filtered = True if "exts" in vars: exts = vars["exts"] if isinstance(exts, list): exts = [z.replace(".", "") for z in exts if z] q = q.filter(Files.file_ext.in_(exts)) filtered = True for ext in exts: sdata["exts"].append(ext) elif "." in val: spl = val.split(".", 1) ext = spl[1].replace(",", "").strip() q = q.filter(Files.file_ext == ext) sdata["exts"].append(ext) val = self._key_check(spl[0]) filtered = True if "size" in vars: fsize = vars["size"] if isinstance(fsize, list): fsize = int(fsize[0]) sizes = { 0: "*", 1: (0, 8388600), 2: (8388600, 134220000), 3: (134220000, 536870912), 4: (536870912, 2147483648), 5: (2147483648, 8589934592), 6: (8589934592), } if fsize == 0: pass elif 1 <= fsize <= 5: q = q.filter(Files.file_size > sizes[fsize][0], Files.file_size < sizes[fsize][1]) filtered = True elif fsize == 6: q = q.filter(Files.file_size > sizes[fsize]) filtered = True sdata["fsize"] = fsize sort = "file_size" if "path" in vars: path = vars["path"] if isinstance(path, list): path = path[0] if len(path) > 3: path = quote_plus(path) q = q.filter(Files.file_path.like(path + "%")) filtered = True if "host" in vars: host = vars["host"] if isinstance(host, list): host = int(host[0]) q = q.filter(Files.resource_id == host) filtered = True q = q.filter(Files.searchable.like("%" + val + "%")).limit(600) results = {} results["data"] = q.all() results["load_dbtime"] = (datetime.now() - start_dbtime).total_seconds() if sort: results["data"] = sorted(results["data"], key=lambda k: k.file_size, reverse=True) # to-do: dont do this here for r in results["data"]: host = self.findex.get_resource_objects(r.resource_id) setattr(r, "resource", host) results["data"] = self.findex.set_humanize(results["data"]) results["data"] = self.findex.set_icons(env=self.env, files=results["data"]) sdata["filtered"] = filtered return {"sdata": sdata, "results": results, "key": jinja2.escape(vars["key"][0])}
def __init__(self, cfg, db, env): self.cfg = cfg self.db = db self.env = env self.findex = Findex(self.db)
def __init__(self, db): self.db = db self.findex = Findex(db=self.db)
def __init__(self, db): self.db = db self.findex = Findex(db) self.env = {} self.files = None
class Searcher(): def __init__(self, cfg, db): self.cfg = cfg self.db = db self.findex = Findex(self.db) def _key_check(self, keyword): if isinstance(keyword, dict): if not 'key' in keyword or not keyword['key']: raise SearchException( 'Search query must contain 4 characters or more') keyword = keyword['key'][0] block = ['-', ',', '+', '_', '%'] for b in block: keyword = keyword.replace(b, ' ') if len(keyword) < 4: raise SearchException( 'Search query must contain 4 characters or more') return keyword def search(self, vars): val = self._key_check(vars) val = val.lower() filtered = False start_dbtime = datetime.now() # to-do: move this to API (or make api.py use this class) q = self.db.query(Files) # if this is later set with Files.<column_name>, it will be sorted on this. sort = '' sdata = { 'protocols': [], 'hosts': [], 'exts': [], 'cats': [], 'fsize': 0 } if 'protocols' in vars: protocols = [z.lower() for z in vars['protocols']] plookup = {'ftp': 0, 'http': 1, 'smb': 2} protocols_ids = [] if isinstance(protocols, list): protocols = [z.lower() for z in protocols] for p in protocols: if p in plookup and not plookup[p] in protocols_ids: protocols_ids.append(plookup[p]) if protocols_ids: sdata['protocols'] = protocols_ids else: sdata['protocols'] = [0, 1, 2] if 'hosts' in vars: dhosts = vars['hosts'] if isinstance(dhosts, list): if not dhosts[0] == '*': host_ids = [] for host in dhosts: host_results = self.db.query(Hosts).filter( Hosts.address == host).filter( Hosts.protocol.in_(sdata['protocols'])).all() for host_result in host_results: host_ids.append(host_result.id) if host_ids: sdata['hosts'] = host_ids else: raise SearchException( 'Could not find any host entries for specified host(s)' ) if sdata['hosts']: q = q.filter(Files.host_id.in_(sdata['hosts'])) filtered = True if 'cats' in vars: clookup = { 'unknown': 0, 'documents': 1, 'movies': 2, 'music': 3, 'pictures': 4 } dformats = [] for cat in [z.lower() for z in vars['cats']]: if cat in clookup: dformats.append(clookup[cat]) else: dformats.append(int(cat)) if isinstance(dformats, list): q = q.filter(Files.file_format.in_(dformats)) for dformat in dformats: sdata['cats'].append(dformat) else: sdata['cats'] = [0, 1, 2, 3, 4] for i in [0, 1, 2, 3, 4]: if not i in sdata['cats']: filtered = True if 'exts' in vars: exts = vars['exts'] if isinstance(exts, list): exts = [z.replace('.', '') for z in exts if z] q = q.filter(Files.file_ext.in_(exts)) filtered = True for ext in exts: sdata['exts'].append(ext) elif '.' in val: spl = val.split('.', 1) ext = spl[1].replace(',', '').strip() q = q.filter(Files.file_ext == ext) sdata['exts'].append(ext) val = self._key_check(spl[0]) filtered = True if 'size' in vars: fsize = vars['size'] if isinstance(fsize, list): fsize = int(fsize[0]) sizes = { 0: '*', 1: (0, 8388600), 2: (8388600, 134220000), 3: (134220000, 536870912), 4: (536870912, 2147483648), 5: (2147483648, 8589934592), 6: (8589934592) } if fsize == 0: pass elif 1 <= fsize <= 5: q = q.filter(Files.file_size > sizes[fsize][0], Files.file_size < sizes[fsize][1]) filtered = True elif fsize == 6: q = q.filter(Files.file_size > sizes[fsize]) filtered = True sdata['fsize'] = fsize sort = 'file_size' if 'path' in vars: path = vars['path'] if isinstance(path, list): path = path[0] if len(path) > 3: path = quote_plus(path) q = q.filter(Files.file_path.like(path + '%')) filtered = True if 'host' in vars: host = vars['host'] if isinstance(host, list): host = int(host[0]) q = q.filter(Files.host_id == host) filtered = True q = q.filter(Files.searchable.like('%' + val + '%')).limit(600) results = {} results['data'] = q.all() results['load_dbtime'] = (datetime.now() - start_dbtime).total_seconds() if sort: results['data'] = sorted(results['data'], key=lambda k: k.file_size, reverse=True) # to-do: dont do this here for r in results['data']: host = self.findex.get_host_objects(r.host_id) setattr(r, 'host', host) results['data'] = self.findex.set_humanize(results['data']) results['data'] = self.findex.set_icons(results['data']) sdata['filtered'] = filtered return { 'sdata': sdata, 'results': results, 'key': jinja2.escape(vars['key'][0]) }
class Browser(): def __init__(self, db): self.db = db self.findex = Findex(db) self.env = {} self.files = None def parse_incoming_path(self, path): self.env['isdir'] = path.endswith('/') spl = path.split('/') self.env['host'] = spl[0] self.env['file_path'] = '/' + '/'.join(spl[1:-1]) if not self.env['isdir']: self.env['file_name'] = path.split('/')[-1] if self.env['file_path'] != '/': self.env['file_path'] += '/' self.env['file_path_quoted'] = quote_plus(self.env['file_path']) def fetch_files(self): host = self.db.query(Hosts).filter_by(address=self.env['host']).first() if not host: raise BrowseException('No host found') self.env['host_id'] = host.id files = self.findex.get_files_objects( host_id=host.id, file_path=self.env['file_path_quoted']) if not files: raise BrowseException('No files found') self.files = files def prepare_files(self, sort=None): # sort files self.files = sorted(self.files, key=lambda k: k.file_name) # alphabetically self.files = sorted(self.files, key=lambda k: k.file_isdir, reverse=True) # folders always on top if not self.env['file_path'] == '/': # add CDUP dirs x = Files(file_name='..', file_path='../', file_ext='', file_format=-1, file_isdir=True, file_modified=datetime.now(), file_perm=None, searchable=None, file_size=0, host=self.env['host']) setattr(x, 'file_name_human', '..') self.files.insert(0, x) self.files = self.findex.set_icons(self.files) def sort(self): # calculate total folder file size (non-recursive) total_size = 0 for f in self.files: total_size += f.file_size def generate_action_fetches(self): url = 'ftp://%s' % self.env['host'] if self.env['file_path'] == '/': path = '' elif self.env['file_path'].startswith('/') and url.endswith('/'): path = self.env['file_path'][1:] wget_extras = '' lftp_extras = '' # if self.source.crawl_authtype: # wget_extras = 'user=%s password=%s ' % (self.source.crawl_username, self.source.crawl_password) # # if self.source.crawl_authtype == 'HTTP_DIGEST': # lftp_extras = Debug('Authentication using DIGEST is not supported by lftp') # else: # lftp_extras = '-u %s,%s ' % (self.source.crawl_username, self.source.crawl_password) wget = 'wget %s-r -nH --no-parent \'%s\'' % (wget_extras, url + self.env['file_path']) #if not isinstance(lftp_extras, Debug): lftp = 'lftp %s-e \'mirror\' \'%s\'' % (lftp_extras, url + self.env['file_path']) #else: # lftp = lftp_extras.message return dict(wget=wget, lftp=lftp) def breadcrumbs(self): data = [self.env['host']] data += [z for z in self.env['file_path'].split('/')[1:] if z] return data def output_json(self): data = [] for source_file in self.files: if source_file.filename_human == '..': continue data.append( '[%s] %s%s%s' % ('D' if source_file.is_directory else 'F', self.folder.source.crawl_url, source_file.filepath_human, source_file.filename_human)) return '\n'.join(data)
class Browser(): def __init__(self, db): self.db = db self.findex = Findex(db) self.env = {} self.files = None def parse_incoming_path(self, path): self.env['isdir'] = path.endswith('/') spl = path.split('/') self.env['host'] = spl[0] self.env['file_path'] = '/' + '/'.join(spl[1:-1]) if not self.env['isdir']: self.env['file_name'] = path.split('/')[-1] if self.env['file_path'] != '/': self.env['file_path'] += '/' self.env['file_path_quoted'] = quote_plus(self.env['file_path']) def fetch_files(self): host = self.db.query(Hosts).filter_by( address=self.env['host'] ).first() if not host: raise BrowseException('No host found') self.env['host_id'] = host.id files = self.findex.get_files_objects(host_id=host.id, file_path=self.env['file_path_quoted']) if not files: raise BrowseException('No files found') self.files = files def prepare_files(self, sort=None): # sort files self.files = sorted(self.files, key=lambda k: k.file_name) # alphabetically self.files = sorted(self.files, key=lambda k: k.file_isdir, reverse=True) # folders always on top if not self.env['file_path'] == '/': # add CDUP dirs x = Files( file_name='..', file_path='../', file_ext='', file_format=-1, file_isdir=True, file_modified=datetime.now(), file_perm=None, searchable=None, file_size=0, host=self.env['host'] ) setattr(x, 'file_name_human', '..') self.files.insert(0, x) self.files = self.findex.set_icons(self.files) def sort(self): # calculate total folder file size (non-recursive) total_size = 0 for f in self.files: total_size += f.file_size def generate_action_fetches(self): url = 'ftp://%s' % self.env['host'] if self.env['file_path'] == '/': path = '' elif self.env['file_path'].startswith('/') and url.endswith('/'): path = self.env['file_path'][1:] wget_extras = '' lftp_extras = '' # if self.source.crawl_authtype: # wget_extras = 'user=%s password=%s ' % (self.source.crawl_username, self.source.crawl_password) # # if self.source.crawl_authtype == 'HTTP_DIGEST': # lftp_extras = Debug('Authentication using DIGEST is not supported by lftp') # else: # lftp_extras = '-u %s,%s ' % (self.source.crawl_username, self.source.crawl_password) wget = 'wget %s-r -nH --no-parent \'%s\'' % (wget_extras, url + self.env['file_path']) #if not isinstance(lftp_extras, Debug): lftp = 'lftp %s-e \'mirror\' \'%s\'' % (lftp_extras, url + self.env['file_path']) #else: # lftp = lftp_extras.message return dict(wget=wget, lftp=lftp) def breadcrumbs(self): data = [self.env['host']] data += [z for z in self.env['file_path'].split('/')[1:] if z] return data def output_json(self): data = [] for source_file in self.files: if source_file.filename_human == '..': continue data.append('[%s] %s%s%s' % ( 'D' if source_file.is_directory else 'F', self.folder.source.crawl_url, source_file.filepath_human, source_file.filename_human)) return '\n'.join(data)
class Browse(): def __init__(self, cfg, db): self.cfg = cfg self.db = db self.findex = Findex(db=self.db) @data_strap def hosts(self, env): data = {} data['hosts'] = self.db.query(Hosts).all() return jinja2_template('main/browse_hosts', env=env, data=data) @data_strap def browse(self, path, env): env['load_dbtime'] = 0 browser = Browser(db=self.db) try: browser.parse_incoming_path(path) start_dbtime = datetime.now() browser.fetch_files() env['load_dbtime'] = (datetime.now() - start_dbtime).total_seconds() browser.prepare_files() data = { 'files': browser.files, 'breadcrumbs': browser.breadcrumbs(), 'action_fetches': browser.generate_action_fetches(), 'env': browser.env } return jinja2_template('main/browse_dir', env=env, data=data) except Exception as ex: return jinja2_template('main/error', env=env, data={'error': 'no files were found'}) return '' @data_strap def goto(self, path, env): try: uid = int(path) f = self.findex.get_files_objects(id=uid) if not f: raise Exception() f = f[0] h = self.db.query(Hosts).filter_by(id=f.host_id).first() if f and h: data = {'file': f, 'host': h} else: raise Exception() return jinja2_template('main/browse_goto', env=env, data=data) except Exception as ex: return 'error :( we could always stay here'
class Searcher(): def __init__(self, cfg, db): self.cfg = cfg self.db = db self.findex = Findex(self.db) def _key_check(self, keyword): if isinstance(keyword, dict): if not 'key' in keyword or not keyword['key']: raise SearchException('Search query must contain 4 characters or more') keyword = keyword['key'][0] block = ['-', ',', '+', '_', '%'] for b in block: keyword = keyword.replace(b, ' ') if len(keyword) < 4: raise SearchException('Search query must contain 4 characters or more') return keyword def search(self, vars): val = self._key_check(vars) val = val.lower() filtered = False start_dbtime = datetime.now() # to-do: move this to API (or make api.py use this class) q = self.db.query(Files) # if this is later set with Files.<column_name>, it will be sorted on this. sort = '' sdata = { 'protocols': [], 'hosts': [], 'exts': [], 'cats': [], 'fsize': 0 } if 'protocols' in vars: protocols = [z.lower() for z in vars['protocols']] plookup = {'ftp': 0, 'http': 1, 'smb': 2} protocols_ids = [] if isinstance(protocols, list): protocols = [z.lower() for z in protocols] for p in protocols: if p in plookup and not plookup[p] in protocols_ids: protocols_ids.append(plookup[p]) if protocols_ids: sdata['protocols'] = protocols_ids else: sdata['protocols'] = [0, 1, 2] if 'hosts' in vars: dhosts = vars['hosts'] if isinstance(dhosts, list): if not dhosts[0] == '*': host_ids = [] for host in dhosts: host_results = self.db.query(Hosts).filter(Hosts.address==host).filter(Hosts.protocol.in_(sdata['protocols'])).all() for host_result in host_results: host_ids.append(host_result.id) if host_ids: sdata['hosts'] = host_ids else: raise SearchException('Could not find any host entries for specified host(s)') if sdata['hosts']: q = q.filter(Files.host_id.in_(sdata['hosts'])) filtered = True if 'cats' in vars: clookup = { 'unknown': 0, 'documents': 1, 'movies': 2, 'music': 3, 'pictures': 4 } dformats = [] for cat in [z.lower() for z in vars['cats']]: if cat in clookup: dformats.append(clookup[cat]) else: dformats.append(int(cat)) if isinstance(dformats, list): q = q.filter(Files.file_format.in_(dformats)) for dformat in dformats: sdata['cats'].append(dformat) else: sdata['cats'] = [0, 1, 2, 3, 4] for i in [0, 1, 2, 3, 4]: if not i in sdata['cats']: filtered = True if 'exts' in vars: exts = vars['exts'] if isinstance(exts, list): exts = [z.replace('.', '') for z in exts if z] q = q.filter(Files.file_ext.in_(exts)) filtered = True for ext in exts: sdata['exts'].append(ext) elif '.' in val: spl = val.split('.', 1) ext = spl[1].replace(',', '').strip() q = q.filter(Files.file_ext == ext) sdata['exts'].append(ext) val = self._key_check(spl[0]) filtered = True if 'size' in vars: fsize = vars['size'] if isinstance(fsize, list): fsize = int(fsize[0]) sizes = { 0: '*', 1: (0, 8388600), 2: (8388600, 134220000), 3: (134220000, 536870912), 4: (536870912, 2147483648), 5: (2147483648, 8589934592), 6: (8589934592) } if fsize == 0: pass elif 1 <= fsize <= 5: q = q.filter(Files.file_size > sizes[fsize][0], Files.file_size < sizes[fsize][1]) filtered = True elif fsize == 6: q = q.filter(Files.file_size > sizes[fsize]) filtered = True sdata['fsize'] = fsize sort = 'file_size' if 'path' in vars: path = vars['path'] if isinstance(path, list): path = path[0] if len(path) > 3: path = quote_plus(path) q = q.filter(Files.file_path.like(path+'%')) filtered = True if 'host' in vars: host = vars['host'] if isinstance(host, list): host = int(host[0]) q = q.filter(Files.host_id == host) filtered = True q = q.filter(Files.searchable.like('%'+val+'%')).limit(600) results = {} results['data'] = q.all() results['load_dbtime'] = (datetime.now() - start_dbtime).total_seconds() if sort: results['data'] = sorted(results['data'], key=lambda k: k.file_size, reverse=True) # to-do: dont do this here for r in results['data']: host = self.findex.get_host_objects(r.host_id) setattr(r, 'host', host) results['data'] = self.findex.set_humanize(results['data']) results['data'] = self.findex.set_icons(results['data']) sdata['filtered'] = filtered return {'sdata': sdata, 'results': results, 'key': jinja2.escape(vars['key'][0])}