Exemplo n.º 1
0
    def test_ceb_psa_text_ulb_L3(self):
        """ Populates the ResourceContainer object and verifies the output."""
        # test with the English OBS
        zip_file = os.path.join(self.resources_dir, 'ceb_psa_text_ulb_L3.zip')
        self.out_dir = tempfile.mkdtemp(prefix='repo_')
        unzip(zip_file, self.out_dir)
        repo_dir = os.path.join(self.out_dir, 'ceb_psa_text_ulb_l3')
        rc = RC(directory=repo_dir)
        rc.as_dict()
        json = load_json_object(os.path.join(repo_dir, 'manifest.json'))
        self.assertEqual(rc.resource.identifier, json['resource']['id'])
        self.assertEqual(rc.resource.type, 'book')
        self.assertEqual(rc.resource.format, 'text/{0}'.format(json['format']))
        self.assertEqual(rc.resource.file_ext, json['format'])
        self.assertEqual(rc.resource.conformsto, 'pre-rc')
        self.assertEqual(rc.resource.modified, datetime.utcnow().strftime("%Y-%m-%d"))
        chapters = rc.projects[0].chapters()
        idx = 1

        for chapter in chapters:
            if chapter.isnumeric():
                self.assertEqual(int(chapter), idx)
                idx += 1

        self.assertEqual(len(chapters), 151)
        chunks = rc.projects[0].chunks('01')
        self.assertEqual(len(chunks), 5)
Exemplo n.º 2
0
 def test_multiple_projects(self):
     """ Populates the ResourceContainer object and verifies the output."""
     # test with the English OBS
     zip_file = os.path.join(self.resources_dir, 'en-ta-multiple-projects.zip')
     self.out_dir = tempfile.mkdtemp(prefix='repo_')
     unzip(zip_file, self.out_dir)
     repo_dir = os.path.join(self.out_dir, 'en_ta')
     rc = RC(directory=repo_dir)
     rc.as_dict()
     yaml = load_yaml_object(os.path.join(repo_dir, 'manifest.yaml'))
     self.assertEqual(rc.resource.identifier, yaml['dublin_core']['identifier'])
     self.assertEqual(rc.resource.type, yaml['dublin_core']['type'])
     self.assertEqual(rc.resource.format, yaml['dublin_core']['format'])
     self.assertEqual(rc.resource.file_ext, 'md')
     self.assertEqual(rc.resource.conformsto, yaml['dublin_core']['conformsto'])
     self.assertEqual(rc.resource.modified, yaml['dublin_core']['modified'])
     self.assertEqual(len(rc.project_ids), 4)
     self.assertEqual(rc.project_count, 4)
     chapters = rc.project('checking').chapters()
     self.assertEqual(len(chapters), 44)
     chunks = rc.project('checking').chunks('level1')
     self.assertEqual(chunks, ['01.md', 'sub-title.md', 'title.md'])
     self.assertTrue('acceptable' in rc.project('checking').config())
     self.assertTrue('title' in rc.project('checking').toc())
     self.assertTrue(rc.project('checking').toc()['title'], 'Table of Contents')
Exemplo n.º 3
0
 def test_en_obs_manifest_yaml(self):
     """ Populates the ResourceContainer object and verifies the output."""
     # test with the English OBS
     zip_file = os.path.join(self.resources_dir, 'en-obs-manifest-yaml.zip')
     self.out_dir = tempfile.mkdtemp(prefix='repo_')
     unzip(zip_file, self.out_dir)
     repo_dir = os.path.join(self.out_dir, 'en_obs')
     rc = RC(directory=repo_dir, repo_name='en_obs')
     rc_dic = rc.as_dict()
     yaml = load_yaml_object(os.path.join(repo_dir, 'manifest.yaml'))
     self.assertDictEqual(yaml, rc_dic)
     chapters = rc.projects[0].chapters()
     self.assertEqual(len(chapters), 2)
     chunks = rc.project().chunks('front')
     self.assertEqual(chunks, ['intro.md', 'title.md'])
Exemplo n.º 4
0
    def print_project(self, project_id):
        """
        :param string project_id: 
        :return string: 
        """
        self.project_id = project_id
        if len(project_id.split('/')) != 3:
            raise Exception('Project not found.')
        user_name, repo_name, commit_id = project_id.split('/')
        source_path = 'u/{0}'.format(project_id)
        print_all_key = '{0}/print_all.html'.format(source_path)
        print_all_file = tempfile.mktemp(prefix='print_all_')
        if not App.cdn_s3_handler().key_exists(print_all_key):
            files_dir = tempfile.mkdtemp(prefix='files_')
            App.cdn_s3_handler().download_dir(source_path, files_dir)
            project_dir = os.path.join(files_dir,
                                       source_path.replace('/', os.path.sep))
            if not os.path.isdir(project_dir):
                raise Exception('Project not found.')
            rc = RC(project_dir, repo_name)
            with codecs.open(print_all_file, 'w', 'utf-8-sig') as print_all:
                print_all.write("""
<html lang="{0}" dir="{1}">
    <head>
        <meta charset="UTF-8"/>
        <title>{2}: {3}</title>
        <style type="text/css">
            body > div {{
                page-break-after: always;
            }}
        </style>
    </head>
    <body onLoad="window.print()">
        <h1>{2}: {3}</h1>
""".format(rc.resource.language.identifier, rc.resource.language.direction,
                rc.resource.language.title, rc.resource.title))
                for fname in sorted(glob(os.path.join(project_dir, '*.html')),
                                    key=self.front_to_back):
                    with codecs.open(fname, 'r', 'utf-8-sig') as f:
                        soup = BeautifulSoup(f, 'html.parser')
                        # get the body of the raw html file
                        content = soup.div
                        if not content:
                            content = BeautifulSoup(
                                '<div>No content</div>',
                                'html.parser').find('div').extract()
                        content['id'] = os.path.basename(fname)
                        print_all.write(unicode(content))
                print_all.write("""
    </body>
</html>
""")
                App.cdn_s3_handler().upload_file(print_all_file,
                                                 print_all_key,
                                                 cache_time=0)
            html = read_file(print_all_file)
        else:
            html = App.cdn_s3_handler().get_file_contents(print_all_key)
        return html
Exemplo n.º 5
0
 def test_EnUlbValid(self):
     out_dir = self.unzip_resource('en_ulb.zip')
     expected_warnings = 0
     start = time.time()
     rc = RC(out_dir)
     linter = UsfmLinter(source_dir=out_dir, rc=rc)
     linter.run()
     elapsed_seconds = int(time.time() - start)
     App.logger.debug("Checking time was " + str(elapsed_seconds) +
                      " seconds")
     self.verify_results_counts(expected_warnings, linter)
Exemplo n.º 6
0
 def test_en_obs_package_json(self):
     """ Populates the ResourceContainer object and verifies the output."""
     # test with the English OBS
     zip_file = os.path.join(self.resources_dir, 'en-obs-package-json.zip')
     self.out_dir = tempfile.mkdtemp(prefix='repo_')
     unzip(zip_file, self.out_dir)
     repo_dir = os.path.join(self.out_dir, 'en-obs')
     rc = RC(directory=repo_dir)
     rc.as_dict()
     json = load_json_object(os.path.join(repo_dir, 'package.json'))
     self.assertEqual(rc.resource.identifier, json['resource']['slug'])
     self.assertEqual(rc.resource.type, 'book')
     self.assertEqual(rc.resource.format, json['content_mime_type'])
     self.assertEqual(rc.resource.file_ext, 'md')
     self.assertEqual(rc.resource.conformsto, 'pre-rc')
     self.assertEqual(rc.resource.issued, json['resource']['status']['pub_date'])
     chapters = rc.projects[0].chapters()
     self.assertEqual(len(chapters), 2)
     chunks = rc.project().chunks('_back')
     self.assertEqual(chunks, ['back-matter.md'])
Exemplo n.º 7
0
 def test_bible_from_tx_pre_rc(self):
     """ Populates the ResourceContainer object and verifies the output."""
     # test with the English OBS
     zip_file = os.path.join(self.resources_dir, 'id_mat_text_ulb-ts.zip')
     self.out_dir = tempfile.mkdtemp(prefix='repo_')
     unzip(zip_file, self.out_dir)
     repo_dir = os.path.join(self.out_dir, 'id_mat_text_ulb-ts')
     rc = RC(directory=repo_dir)
     rc.as_dict()
     json = load_json_object(os.path.join(repo_dir, 'manifest.json'))
     self.assertEqual(rc.resource.identifier, json['resource']['id'])
     self.assertEqual(rc.resource.type, 'book')
     self.assertEqual(rc.resource.format, 'text/{0}'.format(json['format']))
     self.assertEqual(rc.resource.file_ext, json['format'])
     self.assertEqual(rc.resource.conformsto, 'pre-rc')
     self.assertEqual(rc.resource.modified, datetime.utcnow().strftime("%Y-%m-%d"))
     chapters = rc.projects[0].chapters()
     self.assertEqual(len(chapters), 29)
     chunks = rc.projects[0].chunks('01')
     self.assertEqual(len(chunks), 11)
Exemplo n.º 8
0
 def run(self):
     # get the resource container
     self.rc = RC(self.source_dir)
     with open(self.template_file) as template_file:
         self.template_html = template_file.read()
         soup = BeautifulSoup(self.template_html, 'html.parser')
         soup.body['class'] = soup.body.get('class',
                                            []) + [self.resource_type]
         if self.classes:
             soup.body['class'] = soup.body.get('class', []) + self.classes
         self.template_html = unicode(soup)
     self.apply_template()
     return True
Exemplo n.º 9
0
    def extractFiles(cls, file_name, repo_name):
        file_path = os.path.join(TestTnPreprocessor.resources_dir, file_name)

        # 1) unzip the repo files
        temp_dir = tempfile.mkdtemp(prefix='repo_')
        unzip(file_path, temp_dir)
        repo_dir = os.path.join(temp_dir, repo_name)
        if not os.path.isdir(repo_dir):
            repo_dir = file_path

        # 2) Get the resource container
        rc = RC(repo_dir)

        return rc, repo_dir, temp_dir
Exemplo n.º 10
0
 def test_EnUlbValidSubset(self):
     check_files = [
         '19-PSA.usfm', '22-SNG.usfm', '24-JER.usfm', '25-LAM.usfm',
         '35-HAB.usfm'
     ]
     out_dir = self.unzip_resource_only('en_ulb.zip', check_files)
     expected_warnings = 0
     start = time.time()
     rc = RC(out_dir)
     linter = UsfmLinter(source_dir=out_dir, rc=rc)
     linter.run()
     elapsed_seconds = int(time.time() - start)
     App.logger.debug("Checking time was " + str(elapsed_seconds) +
                      " seconds")
     self.verify_results_counts(expected_warnings, linter)
Exemplo n.º 11
0
    def run(self):
        """
        Run common handling for all linters,and then calls the lint() function
        """
        success = False
        try:
            # Download file if a source_zip_url was given
            if self.source_zip_url:
                App.logger.debug("Linting url: " + self.source_zip_url)
                self.download_archive()
            # unzip the input archive if a source_zip_file exists
            if self.source_zip_file:
                App.logger.debug("Linting zip: " + self.source_zip_file)
                self.unzip_archive()
            # lint files
            if self.source_dir:
                self.rc = RC(directory=self.source_dir)
                App.logger.debug("Linting '{0}' files...".format(
                    self.source_dir))
                success = self.lint()
                App.logger.debug("...finished.")
        except Exception as e:
            message = 'Linting process ended abnormally: {0}'.format(e.message)
            App.logger.error(message)
            self.log.warnings.append(message)
            App.logger.error('{0}: {1}'.format(str(e), traceback.format_exc()))
        warnings = self.log.warnings
        if len(
                warnings
        ) > 200:  # sanity check so we don't overflow callback size limits
            warnings = warnings[0:199]
            msg = 'Warnings truncated for {0}'.format(self.s3_results_key)
            App.logger.debug(msg)
            warnings.append(msg)
        results = {
            'identifier': self.identifier,
            'success': success,
            'warnings': warnings,
            's3_results_key': self.s3_results_key
        }

        if self.callback is not None:
            self.callback_results = results
            self.do_callback(self.callback, self.callback_results)

        App.logger.debug("Linter results: " + str(results))
        return results
Exemplo n.º 12
0
 def test_bible_no_manifest(self):
     """ Populates the ResourceContainer object and verifies the output."""
     # test with the English OBS
     zip_file = os.path.join(self.resources_dir, 'bible-no-manifest.zip')
     self.out_dir = tempfile.mkdtemp(prefix='repo_')
     unzip(zip_file, self.out_dir)
     repo_dir = os.path.join(self.out_dir, 'en_ulb')
     rc = RC(directory=repo_dir)
     rc.as_dict()
     self.assertEqual(rc.resource.identifier, 'ulb')
     self.assertEqual(rc.resource.type, 'bundle')
     self.assertEqual(rc.resource.format, 'text/usfm')
     self.assertEqual(rc.resource.file_ext, 'usfm')
     self.assertEqual(rc.resource.conformsto, 'pre-rc')
     self.assertEqual(rc.resource.modified, datetime.utcnow().strftime("%Y-%m-%d"))
     chapters = rc.project().chapters()
     self.assertEqual(len(chapters), 0)
     self.assertEqual(len(rc.project().usfm_files()), 8)
Exemplo n.º 13
0
    def process_webhook(self):
        # Check that we got commit data
        if not self.commit_data:
            raise Exception('No commit data from DCS was found in the Payload')

        # Check that the user token is valid
        if not App.gogs_user_token:
            raise Exception('DCS user token not given in Payload.')
        user = App.gogs_handler().get_user(App.gogs_user_token)
        if not user:
            raise Exception('Invalid DCS user token given in Payload')

        # Check that the URL to the DCS repo is valid
        if not self.commit_data['repository']['html_url'].startswith(App.gogs_url):
            raise Exception('Repos can only belong to {0} to use this webhook client.'.format(App.gogs_url))

        # Check that commit is on repo's default branch, else quit
        try:
            commit_branch = self.commit_data['ref'].split('/')[2]
        except IndexError:
            raise Exception('Could not determine commit branch, exiting.')
        except KeyError:
            Exception('This does not appear to be a push, exiting.')
        if commit_branch != self.commit_data['repository']['default_branch']:
            raise Exception('Commit branch: {0} is not the default branch, exiting.'.format(commit_branch))

        # Get the commit_id, commit_url
        commit_id = self.commit_data['after']
        commit = None
        for commit in self.commit_data['commits']:
            if commit['id'] == commit_id:
                break
        commit_id = commit_id[:10]  # Only use the short form
        commit_url = commit['url']


        # Gather other details from the commit that we will note for the job(s)
        user_name = self.commit_data['repository']['owner']['username']
        repo_name = self.commit_data['repository']['name']
        compare_url = self.commit_data['compare_url']
        commit_message = commit['message']

        if 'pusher' in self.commit_data:
            pusher = self.commit_data['pusher']
        else:
            pusher = {'username': commit['author']['username']}
        pusher_username = pusher['username']

        # Download and unzip the repo files
        repo_dir = self.get_repo_files(commit_url, repo_name)

        # Get the resource container
        rc = RC(repo_dir, repo_name)

        # Save manifest to manifest table
        manifest_data = {
            'repo_name': repo_name,
            'user_name': user_name,
            'lang_code': rc.resource.language.identifier,
            'resource_id': rc.resource.identifier,
            'resource_type': rc.resource.type,
            'title': rc.resource.title,
            'manifest': json.dumps(rc.as_dict()),
            'last_updated': datetime.utcnow()
        }
        # First see if manifest already exists in DB and update it if it is
        tx_manifest = TxManifest.get(repo_name=repo_name, user_name=user_name)
        if tx_manifest:
            for key, value in manifest_data.iteritems():
                setattr(tx_manifest, key, value)
            App.logger.debug('Updating manifest in manifest table: {0}'.format(manifest_data))
            tx_manifest.update()
        else:
            tx_manifest = TxManifest(**manifest_data)
            App.logger.debug('Inserting manifest into manifest table: {0}'.format(tx_manifest))
            tx_manifest.insert()

        # Preprocess the files
        preprocess_dir = tempfile.mkdtemp(dir=self.base_temp_dir, prefix='preprocess_')
        results, preprocessor = do_preprocess(rc, repo_dir, preprocess_dir)

        # Zip up the massaged files
        zip_filepath = tempfile.mktemp(dir=self.base_temp_dir, suffix='.zip')
        App.logger.debug('Zipping files from {0} to {1}...'.format(preprocess_dir, zip_filepath))
        add_contents_to_zip(zip_filepath, preprocess_dir)
        App.logger.debug('finished.')

        # Upload zipped file to the S3 bucket
        file_key = self.upload_zip_file(commit_id, zip_filepath)

        job = TxJob()
        job.job_id = self.get_unique_job_id()
        job.identifier = job.job_id
        job.user_name = user_name
        job.repo_name = repo_name
        job.commit_id = commit_id
        job.manifests_id = tx_manifest.id
        job.created_at = datetime.utcnow()
        job.user = user.username  # Username of the token, not necessarily the repo's owner
        job.input_format = rc.resource.file_ext
        job.resource_type = rc.resource.identifier
        job.source = self.source_url_base + "/" + file_key
        job.cdn_bucket = App.cdn_bucket
        job.cdn_file = 'tx/job/{0}.zip'.format(job.job_id)
        job.output = 'https://{0}/{1}'.format(App.cdn_bucket, job.cdn_file)
        job.callback = App.api_url + '/client/callback'
        job.output_format = 'html'
        job.links = {
            "href": "{0}/tx/job/{1}".format(App.api_url, job.job_id),
            "rel": "self",
            "method": "GET"
        }
        job.success = False

        converter = self.get_converter_module(job)
        linter = self.get_linter_module(job)

        if converter:
            job.convert_module = converter.name
            job.started_at = datetime.utcnow()
            job.expires_at = job.started_at + timedelta(days=1)
            job.eta = job.started_at + timedelta(minutes=5)
            job.status = 'started'
            job.message = 'Conversion started...'
            job.log_message('Started job for {0}/{1}/{2}'.format(job.user_name, job.repo_name, job.commit_id))
        else:
            job.error_message('No converter was found to convert {0} from {1} to {2}'.format(job.resource_type,
                                                                                             job.input_format,
                                                                                             job.output_format))
            job.message = 'No converter found'
            job.status = 'failed'

        if linter:
            job.lint_module = linter.name
        else:
            App.logger.debug('No linter was found to lint {0}'.format(job.resource_type))

        job.insert()

        # Get S3 bucket/dir ready
        s3_commit_key = 'u/{0}/{1}/{2}'.format(job.user_name, job.repo_name, job.commit_id)
        self.clear_commit_directory_in_cdn(s3_commit_key)

        # Create a build log
        build_log_json = self.create_build_log(commit_id, commit_message, commit_url, compare_url, job,
                                               pusher_username, repo_name, user_name)
        # Upload an initial build_log
        self.upload_build_log_to_s3(build_log_json, s3_commit_key)

        # Update the project.json file
        self.update_project_json(commit_id, job, repo_name, user_name)

        # Convert and lint
        if converter:
            if not preprocessor.is_multiple_jobs():
                self.send_request_to_converter(job, converter)
                if linter:
                    extra_payload = {
                        's3_results_key': s3_commit_key
                    }
                    self.send_request_to_linter(job, linter, commit_url, extra_payload=extra_payload)
            else:
                # -----------------------------
                # multiple book project
                # -----------------------------
                books = preprocessor.get_book_list()
                App.logger.debug('Splitting job into separate parts for books: ' + ','.join(books))
                book_count = len(books)
                build_log_json['multiple'] = True
                build_log_json['build_logs'] = []
                for i in range(0, len(books)):
                    book = books[i]
                    App.logger.debug('Adding job for {0}, part {1} of {2}'.format(book, i, book_count))
                    # Send job request to tx-manager
                    if i == 0:
                        book_job = job  # use the original job created above for the first book
                        book_job.identifier = '{0}/{1}/{2}/{3}'.format(job.job_id, book_count, i, book)
                    else:
                        book_job = job.clone()  # copy the original job for this book's job
                        book_job.job_id = self.get_unique_job_id()
                        book_job.identifier = '{0}/{1}/{2}/{3}'.format(book_job.job_id, book_count, i, book)
                        book_job.cdn_file = 'tx/job/{0}.zip'.format(book_job.job_id)
                        book_job.output = 'https://{0}/{1}'.format(App.cdn_bucket, book_job.cdn_file)
                        book_job.links = {
                            "href": "{0}/tx/job/{1}".format(App.api_url, book_job.job_id),
                            "rel": "self",
                            "method": "GET"
                        }
                        book_job.insert()

                    book_job.source = self.build_multipart_source(file_key, book)
                    book_job.update()
                    book_build_log = self.create_build_log(commit_id, commit_message, commit_url, compare_url, book_job,
                                                           pusher_username, repo_name, user_name)
                    if len(book) > 0:
                        part = str(i)
                        book_build_log['book'] = book
                        book_build_log['part'] = part
                    build_log_json['build_logs'].append(book_build_log)
                    self.upload_build_log_to_s3(book_build_log, s3_commit_key, str(i) + "/")
                    self.send_request_to_converter(book_job, converter)
                    if linter:
                        extra_payload = {
                            'single_file': book,
                            's3_results_key': '{0}/{1}'.format(s3_commit_key, i)
                        }
                        self.send_request_to_linter(book_job, linter, commit_url, extra_payload)

        remove_tree(self.base_temp_dir)  # cleanup
        return build_log_json
Exemplo n.º 14
0
    def process_webhook(self):
        # Check that we got commit data
        if not self.commit_data:
            raise Exception('No commit data from DCS was found in the Payload')

        # Check that the user token is valid
        if not App.gogs_user_token:
            raise Exception('DCS user token not given in Payload.')
        user = App.gogs_handler().get_user(App.gogs_user_token)
        if not user:
            raise Exception('Invalid DCS user token given in Payload')

        # Check that the URL to the DCS repo is valid
        if not self.commit_data['repository']['html_url'].startswith(App.gogs_url):
            raise Exception('Repos can only belong to {0} to use this webhook client.'.format(App.gogs_url))

        # Check that commit is on repo's default branch, else quit
        try:
            commit_branch = self.commit_data['ref'].split('/')[2]
        except IndexError:
            raise Exception('Could not determine commit branch, exiting.')
        except KeyError:
            Exception('This does not appear to be a push, exiting.')
        if commit_branch != self.commit_data['repository']['default_branch']:
            raise Exception('Commit branch: {0} is not the default branch, exiting.'.format(commit_branch))

        # Get the commit_id, commit_url
        commit_id = self.commit_data['after']
        commit = None
        for commit in self.commit_data['commits']:
            if commit['id'] == commit_id:
                break
        commit_id = commit_id[:10]  # Only use the short form
        commit_url = commit['url']


        # Gather other details from the commit that we will note for the job(s)
        user_name = self.commit_data['repository']['owner']['username']
        repo_name = self.commit_data['repository']['name']
        compare_url = self.commit_data['compare_url']
        commit_message = commit['message']

        if 'pusher' in self.commit_data:
            pusher = self.commit_data['pusher']
        else:
            pusher = {'username': commit['author']['username']}
        pusher_username = pusher['username']

        # Download and unzip the repo files
        repo_dir = self.get_repo_files(commit_url, repo_name)

        # Get the resource container
        rc = RC(repo_dir, repo_name)

        # Save manifest to manifest table
        manifest_data = {
            'repo_name': repo_name,
            'user_name': user_name,
            'lang_code': rc.resource.language.identifier,
            'resource_id': rc.resource.identifier,
            'resource_type': rc.resource.type,
            'title': rc.resource.title,
            'manifest': json.dumps(rc.as_dict()),
            'last_updated': datetime.utcnow()
        }
        # First see if manifest already exists in DB and update it if it is
        tx_manifest = TxManifest.get(repo_name=repo_name, user_name=user_name)
        if tx_manifest:
            for key, value in manifest_data.iteritems():
                setattr(tx_manifest, key, value)
            App.logger.debug('Updating manifest in manifest table: {0}'.format(manifest_data))
            tx_manifest.update()
        else:
            tx_manifest = TxManifest(**manifest_data)
            App.logger.debug('Inserting manifest into manifest table: {0}'.format(tx_manifest))
            tx_manifest.insert()

        # Preprocess the files
        preprocess_dir = tempfile.mkdtemp(dir=self.base_temp_dir, prefix='preprocess_')
        results, preprocessor = do_preprocess(rc, repo_dir, preprocess_dir)

        # Zip up the massaged files
        zip_filepath = tempfile.mktemp(dir=self.base_temp_dir, suffix='.zip')
        App.logger.debug('Zipping files from {0} to {1}...'.format(preprocess_dir, zip_filepath))
        add_contents_to_zip(zip_filepath, preprocess_dir)
        App.logger.debug('finished.')

        # Upload zipped file to the S3 bucket
        file_key = self.upload_zip_file(commit_id, zip_filepath)

        job = TxJob()
        job.job_id = self.get_unique_job_id()
        job.identifier = job.job_id
        job.user_name = user_name
        job.repo_name = repo_name
        job.commit_id = commit_id
        job.manifests_id = tx_manifest.id
        job.created_at = datetime.utcnow()
        job.user = user.username  # Username of the token, not necessarily the repo's owner
        job.input_format = rc.resource.file_ext
        job.resource_type = rc.resource.identifier
        job.source = self.source_url_base + "/" + file_key
        job.cdn_bucket = App.cdn_bucket
        job.cdn_file = 'tx/job/{0}.zip'.format(job.job_id)
        job.output = 'http://{0}.s3-{1}.amazonaws.com/{2}'.format(App.cdn_bucket, App.aws_region_name, job.cdn_file)
        job.callback = App.api_url + '/client/callback'
        job.output_format = 'html'
        job.links = {
            "href": "{0}/tx/job/{1}".format(App.api_url, job.job_id),
            "rel": "self",
            "method": "GET"
        }
        job.success = False

        converter = self.get_converter_module(job)
        linter = self.get_linter_module(job)

        if converter:
            job.convert_module = converter.name
            job.started_at = datetime.utcnow()
            job.expires_at = job.started_at + timedelta(days=1)
            job.eta = job.started_at + timedelta(minutes=5)
            job.status = 'started'
            job.message = 'Conversion started...'
            job.log_message('Started job for {0}/{1}/{2}'.format(job.user_name, job.repo_name, job.commit_id))
        else:
            job.error_message('No converter was found to convert {0} from {1} to {2}'.format(job.resource_type,
                                                                                             job.input_format,
                                                                                             job.output_format))
            job.message = 'No converter found'
            job.status = 'failed'

        if linter:
            job.lint_module = linter.name
        else:
            App.logger.debug('No linter was found to lint {0}'.format(job.resource_type))

        job.insert()

        # Get S3 bucket/dir ready
        s3_commit_key = 'u/{0}/{1}/{2}'.format(job.user_name, job.repo_name, job.commit_id)
        self.clear_commit_directory_in_cdn(s3_commit_key)

        # Create a build log
        build_log_json = self.create_build_log(commit_id, commit_message, commit_url, compare_url, job,
                                               pusher_username, repo_name, user_name)
        # Upload an initial build_log
        self.upload_build_log_to_s3(build_log_json, s3_commit_key)

        # Update the project.json file
        self.update_project_json(commit_id, job, repo_name, user_name)

        # Convert and lint
        if converter:
            if not preprocessor.is_multiple_jobs():
                self.send_request_to_converter(job, converter)
                if linter:
                    extra_payload = {
                        's3_results_key': s3_commit_key
                    }
                    self.send_request_to_linter(job, linter, commit_url, extra_payload=extra_payload)
            else:
                # -----------------------------
                # multiple book project
                # -----------------------------
                books = preprocessor.get_book_list()
                App.logger.debug('Splitting job into separate parts for books: ' + ','.join(books))
                book_count = len(books)
                build_log_json['multiple'] = True
                build_log_json['build_logs'] = []
                for i in range(0, len(books)):
                    book = books[i]
                    App.logger.debug('Adding job for {0}, part {1} of {2}'.format(book, i, book_count))
                    # Send job request to tx-manager
                    if i == 0:
                        book_job = job  # use the original job created above for the first book
                        book_job.identifier = '{0}/{1}/{2}/{3}'.format(job.job_id, book_count, i, book)
                    else:
                        book_job = job.clone()  # copy the original job for this book's job
                        book_job.job_id = self.get_unique_job_id()
                        book_job.identifier = '{0}/{1}/{2}/{3}'.format(book_job.job_id, book_count, i, book)
                        book_job.cdn_file = 'tx/job/{0}.zip'.format(book_job.job_id)
                        book_job.output = 'http://{0}.s3-{1}.amazonaws.com/{2}'.format(App.cdn_bucket, App.aws_region_name, book_job.cdn_file)
                        book_job.links = {
                            "href": "{0}/tx/job/{1}".format(App.api_url, book_job.job_id),
                            "rel": "self",
                            "method": "GET"
                        }
                        book_job.insert()

                    book_job.source = self.build_multipart_source(file_key, book)
                    book_job.update()
                    book_build_log = self.create_build_log(commit_id, commit_message, commit_url, compare_url, book_job,
                                                           pusher_username, repo_name, user_name)
                    if len(book) > 0:
                        part = str(i)
                        book_build_log['book'] = book
                        book_build_log['part'] = part
                    build_log_json['build_logs'].append(book_build_log)
                    self.upload_build_log_to_s3(book_build_log, s3_commit_key, str(i) + "/")
                    self.send_request_to_converter(book_job, converter)
                    if linter:
                        extra_payload = {
                            'single_file': book,
                            's3_results_key': '{0}/{1}'.format(s3_commit_key, i)
                        }
                        self.send_request_to_linter(book_job, linter, commit_url, extra_payload)

        remove_tree(self.base_temp_dir)  # cleanup
        return build_log_json
Exemplo n.º 15
0
    def test_fix_links(self):
        # given
        rc = RC(os.path.join(self.resources_dir, 'manifests', 'tw'))
        repo_name = 'Door43'
        current_category = 'names'
        tw = TwPreprocessor(rc, tempfile.gettempdir(), tempfile.gettempdir())
        tw.repo_name = repo_name
        content = "This has links to the same category: (See also: [titus](../names/titus.md), [timothy](../names/timothy.md)"
        expected = "This has links to the same category: (See also: [titus](#titus), [timothy](#timothy)"

        # when
        converted = tw.fix_links(content, current_category)

        # then
        self.assertEqual(converted, expected)

        # given
        content = """This has links to other categories:
        (See also:[lamb](../kt/lamb.md), [license](../other/license.md)"""
        expected = """This has links to other categories:
        (See also:[lamb](kt.html#lamb), [license](other.html#license)"""

        # when
        converted = tw.fix_links(content, current_category)

        # then
        self.assertEqual(converted, expected)

        # given
        content = """This has links to the same category and others:
        (See also: [titus](../names/titus.md), [timothy](../names/timothy.md), [lamb](../kt/lamb.md), 
        [license](../other/license.md)"""
        expected = """This has links to the same category and others:
        (See also: [titus](#titus), [timothy](#timothy), [lamb](kt.html#lamb), 
        [license](other.html#license)"""

        # when
        converted = tw.fix_links(content, current_category)

        # then
        self.assertEqual(converted, expected)

        # given
        content = """This link should NOT be converted: [webpage](http://example.com/somewhere/outthere) """
        expected = """This link should NOT be converted: [webpage](http://example.com/somewhere/outthere) """

        # when
        converted = tw.fix_links(content, current_category)

        # then
        self.assertEqual(converted, expected)

        # given
        content = """This [link](rc://en/tn/help/ezr/09/01) is a rc link that should go to 
            ezr/09/01.md in the en_tn repo"""
        expected = """This [link](https://git.door43.org/Door43/en_tn/src/master/ezr/09/01.md) is a rc link that should go to 
            ezr/09/01.md in the en_tn repo"""

        # when
        converted = tw.fix_links(content, current_category)

        # then
        self.assertEqual(converted, expected)

        # given
        content = """This url should be made into a link: http://example.com/somewhere/outthere and so should www.example.com/asdf.html?id=5&view=dashboard#report."""
        expected = """This url should be made into a link: [http://example.com/somewhere/outthere](http://example.com/somewhere/outthere) and so should [www.example.com/asdf.html?id=5&view=dashboard#report](http://www.example.com/asdf.html?id=5&view=dashboard#report)."""

        # when
        converted = tw.fix_links(content, current_category)

        # then
        self.assertEqual(converted, expected)
Exemplo n.º 16
0
    def test_fix_links(self):
        rc = RC(os.path.join(self.resources_dir, 'manifests', 'ta'))
        ta = TaPreprocessor(rc, tempfile.gettempdir(), tempfile.gettempdir())
        content = "This has [links](../section1/01.md) to the same [manual](../section2/01.md)"
        expected = "This has [links](#section1) to the same [manual](#section2)"
        converted = ta.fix_links(content)
        self.assertEqual(converted, expected)

        content = """This has links to 
        [other](../../checking/section1/01.md) [manuals](../../translate/section2/01.md)"""
        expected = """This has links to 
        [other](04-checking.html#section1) [manuals](03-translate.html#section2)"""
        converted = ta.fix_links(content)
        self.assertEqual(converted, expected)

        content = """This has links to both this [manual](../section1/01.md),
         this [page](section2) and [another manual](../../process/section3/01.md)."""
        expected = """This has links to both this [manual](#section1),
         this [page](#section2) and [another manual](02-process.html#section3)."""
        converted = ta.fix_links(content)
        self.assertEqual(converted, expected)

        content = """This link should NOT be converted: [webpage](http://example.com/somewhere/outthere) """
        expected = """This link should NOT be converted: [webpage](http://example.com/somewhere/outthere) """
        converted = ta.fix_links(content)
        self.assertEqual(converted, expected)

        content = """This [link](rc://en/tw/dict/bible/other/dream) is a rc link that should go to 
            other/dream.md in the en_tw repo"""
        expected = """This [link](https://git.door43.org/Door43/en_tw/src/master/bible/other/dream.md) is a rc link that should go to 
            other/dream.md in the en_tw repo"""
        converted = ta.fix_links(content)
        self.assertEqual(converted, expected)

        content = """This url should be made into a link: http://example.com/somewhere/outthere and so should www.example.com/asdf.html?id=5&view=dashboard#report."""
        expected = """This url should be made into a link: [http://example.com/somewhere/outthere](http://example.com/somewhere/outthere) and so should [www.example.com/asdf.html?id=5&view=dashboard#report](http://www.example.com/asdf.html?id=5&view=dashboard#report)."""
        converted = ta.fix_links(content)
        self.assertEqual(converted, expected)
        # Tests https://git.door43.org/Door43/en_ta/raw/master/translate/translate-source-text/01.md
        content = """
### Factors to Consider for a Source Text

When choosing a source text, there are a number of factors that must be considered:

  * **[Statement of Faith](../../intro/statement-of-faith/01.md)** - Is the text in line with the Statement of Faith?
  * **[Translation Guidelines](../../intro/translation-guidelines/01.md)** - Is the text in line with the Translation Guidelines?
  * **Language** - Is the text in a suitable language that translators and checkers understand well?
  * **[Copyrights, Licensing, and Source Texts](../translate-source-licensing/01.md)** - Is the text released under a license that gives sufficient legal freedom?
  * **[Source Texts and Version Numbers](../translate-source-version/01.md)** - Is the text the latest, most updated version?
  * **[The Original and Source Languages](../translate-original/01.md)** - Does the translation team understand the difference between source languages and original languages?
  * **[Original Manuscripts](../translate-manuscripts/01.md)** - Does the translation team understand about Original Manuscripts and [Textual Variants](../translate-textvariants/01.md)?

It is important the the leaders of the churches in the language group agree that the source text is a good one. The Open Bible Stories are available in many source languages on http://ufw.io/stories/. There are also translations of the Bible there to be used as sources for translation in English, and soon other languages, as well.
"""
        expected = """
### Factors to Consider for a Source Text

When choosing a source text, there are a number of factors that must be considered:

  * **[Statement of Faith](01-intro.html#statement-of-faith)** - Is the text in line with the Statement of Faith?
  * **[Translation Guidelines](01-intro.html#translation-guidelines)** - Is the text in line with the Translation Guidelines?
  * **Language** - Is the text in a suitable language that translators and checkers understand well?
  * **[Copyrights, Licensing, and Source Texts](#translate-source-licensing)** - Is the text released under a license that gives sufficient legal freedom?
  * **[Source Texts and Version Numbers](#translate-source-version)** - Is the text the latest, most updated version?
  * **[The Original and Source Languages](#translate-original)** - Does the translation team understand the difference between source languages and original languages?
  * **[Original Manuscripts](#translate-manuscripts)** - Does the translation team understand about Original Manuscripts and [Textual Variants](#translate-textvariants)?

It is important the the leaders of the churches in the language group agree that the source text is a good one. The Open Bible Stories are available in many source languages on [http://ufw.io/stories/](http://ufw.io/stories/). There are also translations of the Bible there to be used as sources for translation in English, and soon other languages, as well.
"""
        converted = ta.fix_links(content)
        self.assertEqual(converted, expected)