def get_article(self, title, version=-1): LOG.debug('Get article') if version != -1: partial_ipfs_address = self.history[version]['ID'] else: partial_ipfs_address = self.blockchain_db.get_article_ID(title) full_ipfs_address = self.ipfs.get_article(partial_ipfs_address, 20) title = title + "_" + str(version) # Todo - this can be checked before downloading ??? # Check if local file exists and is up-to-date if (os.path.exists(title) and utils.file_hash( full_ipfs_address) != utils.file_hash(title)): os.remove(title) os.rename(full_ipfs_address, title) elif not os.path.exists(title): os.rename(full_ipfs_address, title) else: os.remove(full_ipfs_address) path = os.path.join(utils.get_prefix_path(), title) article_content_file = open(path) article_content = article_content_file.read() article_content_file.close() return article_content
def check_uploaded_file(mime_type, fname): size = os.stat(fname).st_size hash = file_hash(fname) extension = os.path.splitext(fname)[1] if extension: extension=extension[1:] return check_file(mime_type, size, hash, extension)
def _clean_seed_corpus(seed_corpus_dir): """Moves seed corpus files from sub-directories into the corpus directory root. Also, deletes any files that exceed the 1 MB limit.""" if not os.path.exists(seed_corpus_dir): return failed_to_move_files = [] for root, _, files in os.walk(seed_corpus_dir): for filename in files: file_path = os.path.join(root, filename) if os.path.getsize(file_path) > CORPUS_ELEMENT_BYTES_LIMIT: os.remove(file_path) logs.warning('Removed seed file %s as it exceeds 1 Mb limit.', file_path) continue sha1sum = utils.file_hash(file_path) new_file_path = os.path.join(seed_corpus_dir, sha1sum) try: shutil.move(file_path, new_file_path) except OSError: failed_to_move_files.append((file_path, new_file_path)) if failed_to_move_files: logs.error('Failed to move seed corpus files: %s', failed_to_move_files)
def check_uploaded_file(mime_type, fname): size = os.stat(fname).st_size hash = file_hash(fname) extension = os.path.splitext(fname)[1] if extension: extension = extension[1:] return check_file(mime_type, size, hash, extension)
def _clean_seed_corpus(seed_corpus_dir): """Prepares |seed_corpus_dir| for the trial. This ensures that it can be used by AFL which is picky about the seed corpus. Moves seed corpus files from sub-directories into the corpus directory root. Also, deletes any files that exceed the 1 MB limit. If the NO_SEEDS env var is specified than the seed corpus files are deleted.""" if not os.path.exists(seed_corpus_dir): return if environment.get('NO_SEEDS'): logs.info('NO_SEEDS specified, deleting seed corpus files.') shutil.rmtree(seed_corpus_dir) os.mkdir(seed_corpus_dir) return failed_to_move_files = [] for root, _, files in os.walk(seed_corpus_dir): for filename in files: file_path = os.path.join(root, filename) if os.path.getsize(file_path) > CORPUS_ELEMENT_BYTES_LIMIT: os.remove(file_path) logs.warning('Removed seed file %s as it exceeds 1 Mb limit.', file_path) continue sha1sum = utils.file_hash(file_path) new_file_path = os.path.join(seed_corpus_dir, sha1sum) try: shutil.move(file_path, new_file_path) except OSError: failed_to_move_files.append((file_path, new_file_path)) if failed_to_move_files: logs.error('Failed to move seed corpus files: %s', failed_to_move_files)
def do(self): fname = self.opts.file alt_name = self.opts.file_name or fname if not (os.access(fname, os.R_OK) and os.path.isfile(fname)): raise ActionError('File %s does not exists or is not readable' % fname) file_info = { 'size': os.stat(fname).st_size, 'hash': file_hash(fname), 'mime_type': guess_type(alt_name)[0] or '', 'extension': os.path.splitext(alt_name)[1].lower()[1:] or '' } res = self.http.post('/api/upload/check', json=file_info) try: f = open(fname, 'rb') res = self.http.post('/api/upload', files={ 'file': (os.path.basename(alt_name), f, file_info['mime_type']) }) finally: f.close() uploaded_file = res['file'] log.debug('File uploaded as %s', uploaded_file) proposed_meta = self._get_meta() res = self.client.call('metadata', uploaded_file, proposed_meta) upload_meta_id = res['result'] res = self.http.get('/api/uploads-meta/%d' % upload_meta_id) meta = res['meta'] log.debug('Metadata #%d for ebook - %s', upload_meta_id, meta) if not ('title' in meta and meta['title'] and 'language' in meta and meta['language'].get('id')): raise ActionError('We need at least title and language') search = [] if meta['authors']: search.extend( map( lambda x: x['first_name'] + ' ' + x['last_name'] if 'first_name' in x else x['last_name'], meta['authors'])) search.append(meta['title'].replace('/', '')) if 'series' in meta: search.append(meta['series']['title']) search = ' '.join(search) def get_ignore_404(*args, **kwargs): try: res = self.http.get(*args, **kwargs) except HTTPError as e: if hasattr(e, 'response') and e.response.status_code == 404: res = {} else: raise e return res res = get_ignore_404('/api/search/' + quote_plus(search), params={ 'page': 1, 'page_size': 5 }) log.debug('search results %s', res) book_id = None def same_authors(ebook, authors): #trivial case - if ebook does not have authors if not authors or not 'authors' in ebook or not ebook['authors']: if not authors and (not 'authors' in ebook or not ebook['authors']): return 1 else: return 0 last_names = set(map(lambda a: a['last_name'], authors)) count = 0 for a in ebook['authors']: if a['last_name'] in last_names: count += 1 return count if 'items' in res and res['items']: # some basic sanity check that what we found is OK # title is almost same and at least one author last hame is same for ebook in res['items']: # for now setting conservativelly to 0 - not to mess books like Volume I with Volume II if damlev(ebook['title'], meta['title']) <= 0 and\ same_authors(ebook, meta.get('authors')) >= 1: book_id = ebook['id'] break if not book_id: res = get_ignore_404('/api/ebooks/index/' + quote_plus(meta['title'])) log.debug('Alternative search by full title -result %s', res) if 'items' in res and res['items']: for ebook in res['items']: if same_authors(ebook, meta.get('authors')) >= 1: book_id = ebook['id'] break if book_id: res = self.http.post('/api/ebooks/%d/add-upload' % (book_id, ), json={ 'upload_id': upload_meta_id, 'quality': self.opts.quality }) log.info('Added file to existing ebook #%d', book_id) else: res = self.http.post('/api/ebooks', json=meta) book_id = res['id'] res = self.http.post('/api/ebooks/%d/add-upload' % (book_id, ), json={ 'upload_id': upload_meta_id, 'quality': self.opts.quality }) log.info('Added file to new ebook #%d', book_id) if self.opts.json: res = self.http.get('/api/ebooks/%d' % book_id) print(json.dumps(res))
def test_logic(self): b1 = model.Ebook.query.get(33837) self.assertEqual(b1.authors_str, 'Crichton Michael') b2 = model.Ebook.query.get(37157) self.assertEqual(b2.authors_str, 'Strugackij A N, Strugackij B N') b3 = model.Ebook.query.get(62546) self.assertEqual(b3.authors_str, 'Wilkins G, Dalton M, Young K') b3.authors.append(b1.authors[0]) self.assertEqual( b3.authors_str, 'Wilkins G, Dalton M, Young K and others') b1.authors = [] self.assertEqual(b1.authors_str, 'No Authors') source = model.Source.query.get(46519) name = logic.norm_file_name(source) self.assertEqual( name, 'Strugackij A N, Strugackij B N/Noc na Marse(sk)/Strugackij A N, Strugackij B N - Noc na Marse.doc') source = model.Source.query.get(63546) name = logic.norm_file_name(source) self.assertEqual( name, 'Monroe Lucy/Nevesty od Stredozemniho more/Nevesty od Stredozemniho more 2 - Spanelova milenka(cs)/Monroe Lucy - Nevesty od Stredozemniho more 2 - Spanelova milenka.doc') res = logic.check_uploaded_file('application/epub+zip', ebook_file) self.assertEqual(res['error'], 'file already exists') size = os.stat(downloaded_file).st_size hash = utils.file_hash(downloaded_file) s = model.Source.query.get(86060) new_loc = logic.create_new_location(s, downloaded_file) self.assertEqual( new_loc, 'Kissinger Henry/Roky v Bilem dome(cs)/Kissinger Henry - Roky v Bilem dome.epub') shutil.copy(ebook_file, downloaded_file) new_loc = logic.create_new_location(s, downloaded_file) self.assertEqual( new_loc, 'Kissinger Henry/Roky v Bilem dome(cs)/Kissinger Henry - Roky v Bilem dome(1).epub') admin = model.User.query.get(1) conv = model.Conversion(source=source, format=model.Format.query.filter_by(extension='epub').one(), location='bla.epub', created_by=admin, modified_by=admin) db.session.add(conv) db.session.commit() res = logic.query_converted_sources_for_ebook( source.ebook.id, admin).all() self.assertEqual(len(res), 1) self.assertEqual(res[0].location, 'bla.epub') b1 = model.Ebook.query.get(33837) b2 = model.Ebook.query.get(37157) tot = len(b1.sources) + len(b2.sources) logic.merge_ebooks(b1, b2) self.assertEqual(len(b1.sources), tot) s = model.Series(title='Series/Neserie') a = model.Author(first_name='Jan', last_name='Kocian/Koci') b = model.Ebook(title= 'Neco / Nekde', language=model.Language(code='cs', name='Czech'), series_index=1) b.series = s b.authors.append(a) self.assertEqual(logic.norm_file_name(b,'doc'), 'Kocian-Koci Jan/Series-Neserie/Series-Neserie 1 - Neco - Nekde(cs)/Kocian-Koci Jan - Series-Neserie 1 - Neco - Nekde.doc')
def do(self): fname = self.opts.file alt_name = self.opts.file_name or fname if not (os.access(fname, os.R_OK) and os.path.isfile(fname)): raise ActionError('File %s does not exists or is not readable'%fname) file_info = {'size': os.stat(fname).st_size, 'hash': file_hash(fname), 'mime_type': guess_type(alt_name)[0] or '', 'extension': os.path.splitext(alt_name)[1].lower()[1:] or ''} res=self.http.post('/api/upload/check', json=file_info) try: f= open(fname, 'rb') res = self.http.post('/api/upload', files={'file':(os.path.basename(alt_name), f, file_info['mime_type'])}) finally: f.close() uploaded_file = res['file'] log.debug('File uploaded as %s', uploaded_file) proposed_meta = self._get_meta() res = self.client.call('metadata', uploaded_file, proposed_meta) upload_meta_id = res['result'] res = self.http.get('/api/uploads-meta/%d'%upload_meta_id) meta = res['meta'] log.debug('Metadata #%d for ebook - %s', upload_meta_id, meta) if not ('title' in meta and meta['title'] and 'language' in meta and meta['language'].get('id')): raise ActionError('We need at least title and language') search = [] if meta['authors']: search.extend(map(lambda x: x['first_name']+ ' ' + x['last_name'] if 'first_name' in x else x['last_name'], meta['authors'])) search.append(meta['title'].replace('/', '')) if 'series' in meta: search.append(meta['series']['title']) search = ' '.join(search) def get_ignore_404(*args, **kwargs): try: res = self.http.get(*args, **kwargs) except HTTPError as e: if hasattr(e, 'response') and e.response.status_code == 404: res ={} else: raise e return res res = get_ignore_404('/api/search/'+quote_plus(search), params={'page':1, 'page_size':5}) log.debug('search results %s', res) book_id=None def same_authors(ebook, authors): #trivial case - if ebook does not have authors if not authors or not 'authors' in ebook or not ebook['authors']: if not authors and (not 'authors' in ebook or not ebook['authors']): return 1 else: return 0 last_names = set(map(lambda a: a['last_name'], authors)) count = 0 for a in ebook['authors']: if a['last_name'] in last_names: count+=1 return count if 'items' in res and res['items']: # some basic sanity check that what we found is OK # title is almost same and at least one author last hame is same for ebook in res['items']: # for now setting conservativelly to 0 - not to mess books like Volume I with Volume II if damlev(ebook['title'], meta['title']) <= 0 and\ same_authors(ebook, meta.get('authors')) >= 1: book_id = ebook['id'] break if not book_id: res = get_ignore_404('/api/ebooks/index/'+quote_plus(meta['title'])) log.debug('Alternative search by full title -result %s', res) if 'items' in res and res['items']: for ebook in res['items']: if same_authors(ebook, meta.get('authors')) >= 1: book_id = ebook['id'] break if book_id: res = self.http.post('/api/ebooks/%d/add-upload'%(book_id,), json={'upload_id':upload_meta_id, 'quality':self.opts.quality}) log.info('Added file to existing ebook #%d', book_id) else: res = self.http.post('/api/ebooks', json = meta) book_id = res['id'] res = self.http.post('/api/ebooks/%d/add-upload'%(book_id,), json={'upload_id':upload_meta_id, 'quality':self.opts.quality}) log.info('Added file to new ebook #%d', book_id) if self.opts.json: res=self.http.get('/api/ebooks/%d'%book_id) print(json.dumps(res))
def test_logic(self): b1 = model.Ebook.query.get(33837) self.assertEqual(b1.authors_str, 'Crichton Michael') b2 = model.Ebook.query.get(37157) self.assertEqual(b2.authors_str, 'Strugackij A N, Strugackij B N') b3 = model.Ebook.query.get(62546) self.assertEqual(b3.authors_str, 'Wilkins G, Dalton M, Young K') b3.authors.append(b1.authors[0]) self.assertEqual(b3.authors_str, 'Wilkins G, Dalton M, Young K and others') b1.authors = [] self.assertEqual(b1.authors_str, 'No Authors') source = model.Source.query.get(46519) name = logic.norm_file_name(source) self.assertEqual( name, 'Strugackij A N, Strugackij B N/Noc na Marse(sk)/Strugackij A N, Strugackij B N - Noc na Marse.doc' ) source = model.Source.query.get(63546) name = logic.norm_file_name(source) self.assertEqual( name, 'Monroe Lucy/Nevesty od Stredozemniho more/Nevesty od Stredozemniho more 2 - Spanelova milenka(cs)/Monroe Lucy - Nevesty od Stredozemniho more 2 - Spanelova milenka.doc' ) res = logic.check_uploaded_file('application/epub+zip', ebook_file) self.assertEqual(res['error'], 'file already exists') size = os.stat(downloaded_file).st_size hash = utils.file_hash(downloaded_file) s = model.Source.query.get(86060) new_loc = logic.create_new_location(s, downloaded_file) self.assertEqual( new_loc, 'Kissinger Henry/Roky v Bilem dome(cs)/Kissinger Henry - Roky v Bilem dome.epub' ) shutil.copy(ebook_file, downloaded_file) new_loc = logic.create_new_location(s, downloaded_file) self.assertEqual( new_loc, 'Kissinger Henry/Roky v Bilem dome(cs)/Kissinger Henry - Roky v Bilem dome(1).epub' ) admin = model.User.query.get(1) conv = model.Conversion( source=source, format=model.Format.query.filter_by(extension='epub').one(), location='bla.epub', created_by=admin, modified_by=admin) db.session.add(conv) db.session.commit() res = logic.query_converted_sources_for_ebook(source.ebook.id, admin).all() self.assertEqual(len(res), 1) self.assertEqual(res[0].location, 'bla.epub') b1 = model.Ebook.query.get(33837) b2 = model.Ebook.query.get(37157) tot = len(b1.sources) + len(b2.sources) logic.merge_ebooks(b1, b2) self.assertEqual(len(b1.sources), tot) s = model.Series(title='Series/Neserie') a = model.Author(first_name='Jan', last_name='Kocian/Koci') b = model.Ebook(title='Neco / Nekde', language=model.Language(code='cs', name='Czech'), series_index=1) b.series = s b.authors.append(a) self.assertEqual( logic.norm_file_name(b, 'doc'), 'Kocian-Koci Jan/Series-Neserie/Series-Neserie 1 - Neco - Nekde(cs)/Kocian-Koci Jan - Series-Neserie 1 - Neco - Nekde.doc' )