Exemplo n.º 1
0
def delete_unfinished_book(book):
    '''Delete a book without checking for remote state

    Called in worker thread
    '''
    try:
        book.logger.debug('Deleting unfinished book')

        payload = {
            'function': 'delete_unfinished_book',
            'local_id': book.uuid,
            'status': book.status
        }

        shutil.rmtree(book.path)
        book.delete_callback(book)

        push_event('tts-book-deleted', payload, 'book', book['path'])
    except ScribeException as e:
        raise e
    except OSError as e:
        return
    except Exception:
        book.logger.error(traceback.format_exc())
        raise ScribeException('Could not delete book!')
Exemplo n.º 2
0
    def bt_server_register():
        message = ''
        #config = Scribe3Configuration()
        Logger.info('bt_server_register: Registering scribe')
        try:
            dd = dict(
                (k, v)
                for k, v in get_metadata(scribe_globals.CONFIG_DIR).items()
                if v)
            dd['books'] = '[]'
            tts = TTSServices(dd)
            success, tts_id = tts.register_tts(tts_id=dd['scanner'],
                                               metadata=dd)
            if success:
                #config.set('identifier', str(tts_id))
                push_event('tts-register', dd, 'tts', tts_id)
                Logger.info(
                    'bt_server_register: Registered scribe: {}'.format(tts_id))

            else:
                message =  'bt_server_register: Could not register this '\
                                'scribe with Archive.org or scribe already '\
                                'registered'
                Logger.info(message)
        except Exception:
            message = 'bt_server_register: Failed to register'
            Logger.exception(message)
        return message
Exemplo n.º 3
0
    def _update_set_book_repub_state(self):
        state = -2
        errors = []
        self.dispatch_progress('Setting republisher state to {}'.format(state))
        for _ in range(scribe_globals.TASK_DEFAULT_MAX_RETRIES):
            try:
                self.dispatch_progress(
                    '[{}/{}] Setting republisher state to {}'.format(
                        _ + 1, scribe_globals.TASK_DEFAULT_MAX_RETRIES, state))
                ia_item = get_ia_session().get_item(self.identifier)
                resp = ia_item.modify_metadata({'repub_state': state})
                self.logger.info('Response from cluster: {} | '
                                 'Headers {}'.format(resp.text, resp.headers))
            except Exception as e:
                self.logger.error(
                    '[{}/{}]Transient error {} while setting repub_state to {}.'
                    .format(_ + 1, scribe_globals.TASK_DEFAULT_MAX_RETRIES, e,
                            state))
                errors.append(e)
                continue
            else:
                break
        else:
            self.logger.error(
                'Could not set repub_state to {} because {}'.format(
                    state, errors[-1]))
            payload = {
                'task_type': 'MARCMetadataViaDWWITask',
                'selector': self.isbn,
                'type': 'isbn',
                'errors': json.dumps([str(x) for x in errors]),
                'attempts': len(errors),
            }

            push_event('tts-task-exception', payload)
Exemplo n.º 4
0
    def on_book_rejected(self, task):
        try:
            slip_metadata = task._slip_metadata
            cleaned_md = {
                k: '{}'.format(v).encode('utf-8').decode('utf-8')
                for k, v in self.book_obj.metadata.items()
            }
            payload = {
                'reason':
                slip_metadata['reason'],
                'error':
                slip_metadata['error'] if 'error' in slip_metadata else '',
                'comment':
                slip_metadata['comment'] if 'comment' in slip_metadata else '',
                'book_metadata':
                cleaned_md,
            }

            push_event('tts-book-dwwi-reject', payload)
            self.logger.info('CaptureScreen: Pushed event tts-book-reject to '
                             'iabdash with payload: {}'.format(payload))
        except Exception as e:
            self.logger.exception(
                'CaptureScreen: Failed to push tts-book-reject '
                'event because: {}'.format(e))
Exemplo n.º 5
0
def create_preimage_zip(book):
    logger = book.logger

    if book['status'] >= UploadStatus.uploaded.value:
        return

    logger.info('Package book: Creating preimage.zip')
    #Clock.schedule_once(partial(self.set_status_callback,
    #                            'Now creating book upload bundle for {}'.format(book.get('identifier', ''))))
    try:

        zip_path = join(book['path'],
                        '{id}_preimage.zip'.format(id=book['identifier']))

        compression = zipfile.ZIP_STORED
        allow_zip64 = True
        target = book.get_imagestack()

        if target == None or len(target) == 0:
            raise ScribeException('Could not find jpegs to compress.')

        with zipfile.ZipFile(zip_path, 'w', compression,
                             allow_zip64) as preimage_zip:
            for jpeg in target:
                logger.debug('adding ' + jpeg + ' to ' + zip_path)
                arcname = ('{id}_preimage/{j}'.format(
                    id=book['identifier'], j=os.path.basename(jpeg)))
                preimage_zip.write(jpeg, arcname)

            scandata = join(book['path'], 'scandata.json')
            if os.path.exists(scandata):
                arcname = ('{id}_preimage/scandata.json'.format(
                    id=book['identifier']))
                preimage_zip.write(scandata, arcname)

        book.do_finish_preimage_zip()

    except Exception as e:
        book.error = e
        book.logger.error(traceback.format_exc())
        book.do_error_preimage_zip()

        payload = {
            'local_id': book['uuid'],
            'status': book['status'],
            'exception': str(e)
        }

        push_event('tts-book-packaging-exception', payload, 'book',
                   book['identifier'])
        raise ScribeException('Could not create preimage.zip - {}'.format(
            str(e)))
Exemplo n.º 6
0
    def _send_stats(self):
        self.dispatch_progress('Notifying iabdash')
        payload = {
            'files': self._files,
            'total_time': self.total_time,
        }
        push_event('tts-cd-downloaded', payload, 'cd', self.identifier,
                   os.path.join(self._cd.path, "iabdash.log"))

        self.notifications_manager.add_notification(
            title='Downloaded',
            message="CD {} has been downloaded.".format(self.identifier),
            show_system_tile=False,
            book=self._cd)
Exemplo n.º 7
0
 def _notify_iabdash(self):
     self.dispatch_progress('Notifying iabdash')
     payload = {
         'task_type': 'MetadataViaIdentifierTask',
         'selector': self.identifier,
         'type': 'identifier',
     }
     try:
         push_event('tts-task-success', payload)
         self.logger.info(
             'MetadataViaIdentifierTask: Pushed event tts-task-success to '
             'iabdash with payload: {}'.format(payload))
     except Exception:
         self.logger.exception(
             'MetadataViaIdentifierTask: Failed to push tts-task-success '
             'event with payload: {}'.format(payload))
Exemplo n.º 8
0
 def _send_stats(self):
     self.dispatch_progress('Notifying iabdash')
     payload = {
         'repub_state': self._book_final_repub_state,
         'checkout_info': self._checkout_info,
         'claimer': self._claimer,
         'files': self._files,
     }
     push_event('tts-book-downloaded', payload, 'book', self.identifier,
                os.path.join(self._book.path, "iabdash.log"))
     self.notifications_manager.add_notification(
         title='Downloaded',
         message="{} has been downloaded and is ready for {}.".format(
             self.identifier, self._download_type),
         show_system_tile=False,
         book=self._book)
Exemplo n.º 9
0
 def _notify_iabdash(self):
     self.dispatch_progress('Notifying iabdash')
     payload = {
         'task_type': 'MARCMetadataViaDWWITask',
         'search_id': self.metadata['scribe3_search_id'],
         'catalog': self.metadata['scribe3_search_catalog'],
         'identifier': self.book.identifier,
     }
     try:
         push_event('tts-task-success', payload)
         put_metric('scribe3.tasks.metadata.dwwi', '1', payload)
         self.logger.info(
             'MARCMetadataViaDWWITask: Pushed event tts-task-success to '
             'iabdash with payload: {}'.format(payload))
     except Exception:
         self.logger.exception(
             'MARCMetadataViaDWWITask: Failed to push tts-task-success '
             'event with payload: {}'.format(payload))
Exemplo n.º 10
0
def _push_metrics(book, scandata, encoded_md, sizes_dict, doing_foldouts,
                  responses, responses_dict, book_upload_phase_start,
                  book_upload_phase_end, book_upload_total_start,
                  book_preimage_upload_start, book_preimage_upload_end):
    if os.path.exists(join(book['path'], 'time.log')):
        with open(join(book['path'], 'time.log'), 'r') as fp:
            global_time_open = float(fp.readline())
    else:
        global_time_open = None

    for item in responses:
        try:
            for r in item:
                responses_dict[str(r.request.url)] = r.status_code
        except:
            responses_dict[item] = 'Error'
    book_upload_total_end = time.time()

    times_dict = {
        'upload_phase': book_upload_phase_end - book_upload_phase_start,
        'total': book_upload_total_end - book_upload_total_start,
        'preimage': book_preimage_upload_end - book_preimage_upload_start,
    }

    payload = {
        'local_id': book['uuid'],
        'status': book['status'],
        'activeTime': global_time_open,
        'leafNum': scandata.count_pages(),
        'metadata': encoded_md,
        'responses': responses_dict,
        'times': times_dict,
        'sizes': sizes_dict,
        'foldouts': doing_foldouts,
    }
    push_event('tts-book-uploaded', payload, 'book', book['identifier'])
Exemplo n.º 11
0
def upload_book(book):
    Logger = book.logger
    Logger.debug('Starting upload of ' + book['identifier'])

    _check_preconditons(book)

    #book.do_book_upload_begin()

    _set_upload_lock_file(book, Logger)

    responses_dict = {}
    book_upload_total_start = time.time()
    try:
        scandata = ScanData(book['path'])

        zip_path = _check_preimage_is_valid(book)

        ia_session = get_ia_session()
        item = ia_session.get_item(book['identifier'])
        Logger.info('Got item {}'.format(item.identifier))

        if not book.force_upload:
            _check_remote_preconditons(item, Logger)

        encoded_md = _prepare_metadata(book, item, Logger)

        metasource_file_location, metasource_file_upload_name = _generate_metasource(
            book, Logger)

        responses = []
        book_upload_phase_start = time.time()

        needs_metadata_pushed = item.exists

        doing_foldouts = os.path.exists(
            os.path.join(book['path'], 'send_to_station'))

        book_preimage_upload_start, \
        book_preimage_upload_end, \
        sizes_dict                  = _upload_book_files( zip_path, book,
                                                        encoded_md, item, responses,
                                                        metasource_file_location,
                                                        metasource_file_upload_name,
                                                        Logger)

        if needs_metadata_pushed:
            _only_push_metadata(encoded_md, book, item, responses, Logger)

        book_upload_phase_end = time.time()

        _upload_logs(book=book, item=item, responses=responses)

        _verify_responses(responses, Logger)

        Logger.debug('OK! Finished uploads to {} | Took {}s'.format(
            book['identifier'],
            book_upload_phase_end - book_upload_phase_start))

        book.do_upload_book_end()

        _push_metrics(book, scandata, encoded_md, sizes_dict, doing_foldouts,
                      responses, responses_dict, book_upload_phase_start,
                      book_upload_phase_end, book_upload_total_start,
                      book_preimage_upload_start, book_preimage_upload_end)

        if config.is_true('show_book_notifications'):
            notifications_manager.add_notification(
                title='Uploaded',
                message="{} has been successfully uploaded.".format(
                    book['identifier']),
                book=book)

        Logger.debug('Finished upload for ' + book['identifier'])

        # Clock.schedule_once(partial(self.update_status_callback, book))
        time.sleep(10)  # Wait for book to be added to metadata api
    except requests.ConnectionError as e:

        book.do_upload_book_error()
        Logger.error(traceback.format_exc())
        payload = {
            'local_id': book['uuid'],
            'status': book['status'],
            'exception': str(e)
        }

        push_event('tts-book-failed-upload', payload, 'book',
                   book['identifier'])

        raise ScribeException('Upload Failed. '
                              'Please check network and S3 Keys')
    except Exception as e:

        book.do_upload_book_error()
        Logger.error(traceback.format_exc())

        payload = {
            'local_id': book['uuid'],
            'status': book['status'],
            'responses': responses_dict,
            'exception': str(e)
        }

        push_event('tts-book-upload-exception', payload, 'book',
                   book['identifier'])

        raise ScribeException('Upload Failed! - {}'.format(str(e)))
    finally:
        book.force_upload = False
        Logger.info("Removing upload lock file at {}".format(
            join(book['path'], "upload_lock")))
        os.remove(join(book['path'], "upload_lock"))
Exemplo n.º 12
0
def delete_finished_book(book):
    '''

    Called from worker thread.
    '''
    # if book['status'] < UploadStatus.done.value:
    #     return
    book.logger.debug('Checking repub_state for {}'.format(book))
    repub_state = None
    try:
        md_url = ('https://archive.org/metadata/{id}/metadata'.format(
            id=book['identifier']))
        md = json.load(urllib.request.urlopen(md_url))
    except Exception:
        book.logger.error(traceback.format_exc())
        raise ScribeException('Could not query archive.org for '
                              'repub_state!')
    try:
        if md is None or 'result' not in md:
            print(
                "No repub state or MDAPI unavailable. Continuing with deletion."
            )

        else:
            repub_state = md['result'].get('repub_state')
            if repub_state is None:
                book.logger.warning('Repub state not found for {}'.format(
                    book['identifier']))
                return
        if repub_state:
            if int(repub_state) == RepubState.done.value or \
                    RepubState.uploaded.value or \
                    RepubState.post_autocropped.value:
                if os.path.exists(book.path):
                    # User may have already deleted local copy of this book
                    book.logger.info('Deleting {}'.format(book))
                    payload = {
                        'function': 'delete_finished_book',
                        'local_id': book.uuid,
                        'status': book.status
                    }
                    push_event('tts-book-deleted', payload, 'book',
                               book['identifier'])
                    shutil.rmtree(book['path'])
                    book.delete_callback(book)
                else:
                    book.logger.error('UploadWidget: Book not found '
                                      '(could be deleted): {}'.format(
                                          book['path']))
            else:
                book.logger.info('Not deleting {} | repub_state={}'.format(
                    book['path'], repub_state))
        else:
            if os.path.exists(book.path):
                # User may have already deleted local copy of this book
                book.logger.info('Deleting {}'.format(book))
                payload = {
                    'function': 'delete_finished_book',
                    'local_id': book.uuid,
                    'status': book.status
                }
                push_event('tts-book-deleted', payload, 'book',
                           book['identifier'])
                shutil.rmtree(book['path'])
                book.delete_callback(book)
    except ScribeException:
        raise
    except Exception as e:
        book.logger.error(traceback.format_exc())
        raise ScribeException('Could not delete book! {}'.format(e))
Exemplo n.º 13
0
 def _send_telemetry(self):
     self.dispatch_progress('Sending telemetry')
     payload = {
         'book': self.book.as_dict(),
     }
     push_event('tts-book-reset', payload, 'book', self.book.uuid)
Exemplo n.º 14
0
def upload_book_foldouts(book,):
    try:
        Logger = book.logger
        Logger.info('upload_book_foldouts: Uploading foldouts for book '
                    '{}'.format(book))

        ia_session = get_ia_session()
        book_item = ia_session.get_item(book['identifier'], )

        _check_preconditions(book, book_item, Logger)

        book_folder = 'foldouts'

        cdic, tdic, rdic, rtdic = _create_scandata(book, book_folder, True, Logger)

        responses = []
        # Upload the pictures
        Logger.debug('upload_book_foldouts: Uploading pics')
        book.update_message('Foldouts upload | Images')
        if cdic != {}:
            res = book_item.upload(cdic, retries=10, verify=True,
                                   retries_sleep=60, queue_derive=False)
            responses.append(res)

        if tdic != {}:
            res = book_item.upload(tdic, retries=10, verify=True,
                                   retries_sleep=60, queue_derive=False)
            responses.append(res)

        try:
            if rdic != {}:
                res = book_item.upload(rdic, retries=10, verify=True,
                                       retries_sleep=60, queue_derive=False)
                responses.append(res)

            if rtdic != {}:
                res = book_item.upload(rtdic, retries=10, verify=True,
                                       retries_sleep=60, queue_derive=False)
                responses.append(res)
        except requests.exceptions.ConnectionError as e:
            Logger.error(
                'upload_book_foldouts: Connection exception {} '
                'has occurred at rdic upload time; aborting!'.format(str(e)))
            raise e
        except Exception as e:
            Logger.error('upload_book_foldouts: Error {} has occurred at rdic upload time'.format(e))
            raise e

        Logger.debug('upload_book_foldouts: Done. Uploading scandata...')
        # Upload the scandata

        target_scandata = 'scandata.json'
        book.update_message('Foldouts upload | Scandata')
        scandata = join(book['path'], 'scandata_rerepublished.json')
        upload_res = book_item.upload({target_scandata: scandata},
                                      retries=10,
                                      retries_sleep=60,
                                      queue_derive=False,
                                      verify=True,)

        if os.path.exists(os.path.join(book['path'], 'scanning.log')):
            book.update_message('Foldouts upload | Log')
            book.logger.debug(
                'Uploading Scanning log file'
            )
            upload_name_mapping = \
                {'logs/' + book['identifier']
                 + '_scanning_{:%Y-%m-%d%H:%M:%S}.log'.format(datetime.now()):
                     join(book['path'], 'scanning.log')}
            response = book_item.upload(upload_name_mapping, retries=10,
                                   retries_sleep=60, queue_derive=False, verbose=True,verify=True, )
            responses.append(response)
            url_to_status_code = \
                {r.request.url: r.status_code for r in response}
            book.logger.debug('Response from upload: {} | {}'
                              .format(response, url_to_status_code))

        responses.append(upload_res)
        # corrections_uploaded

        # flatten responses list:
        flat_responses = [y for x in responses for y in x]
        for response in flat_responses:
            Logger.info('{} returned {}'.format(response.url, response.status_code))
            if response.status_code != 200:
                raise Exception('upload_book_foldouts: Response code {} {} - {} from cluster. '
                                'URL was: {} | content: {}'
                                'This is an error. Upload will be attempted again.'
                                .format(response.status_code,
                                        response.reason,
                                        response.text if 'text' in response else "",
                                        response.url,
                                        response.content))

        Logger.debug('Done. Changing repub state...')

        _change_repub_state(book_item, 43)

        _remove_book_from_btserver_item(book, Logger)

        book.do_upload_foldouts_done()

        payload = {
            'repub_state': 43,
            'responses': flat_responses,

        }
        push_event('tts-book-corrections-sent', payload,
                   'book', book['identifier'])
        Logger.debug('All done.')

        return

    except requests.ConnectionError as e:
        raise ScribeException('Upload Failed. Please check network and '
                              'S3 Keys (Error was: {})'.format(e))
    except Exception as e:
        book.do_upload_foldouts_fail()
        book.raise_exception(e)
Exemplo n.º 15
0
 def _upload_payload(self):
     self.dispatch_progress('Uploading')
     push_event('tts-heartbeat', self._payload.copy())
     self.logger.debug('{}: Payload:\n{}'.format(self.name,
                                                 pformat(self._payload)))