Пример #1
0
def minidom_pretty_print(content):
    pretty = None

    try:
        content = content.replace('\r', '')
        content = ' '.join([item for item in content.split('\n')])
        content = preserve_styles(content)
        content = remove_exceeding_spaces_in_all_tags(content)
        prefix, content = split_prefix(content)

        if isinstance(content, unicode):
            content = content.encode('utf-8')

        print(content[content.find('Stern et al'):content.find('Stern et al')+400])
        doc = xml.dom.minidom.parseString(content)
        pretty = doc.toprettyxml().strip()
        if not isinstance(pretty, unicode):
            pretty = pretty.decode('utf-8')

        ign, pretty = split_prefix(pretty)
        pretty = '\n'.join([item for item in pretty.split('\n') if item.strip() != ''])

        pretty = remove_break_lines_off_element_content(pretty)

        pretty = restore_styles(pretty)
        pretty = prefix + remove_exceding_style_tags(pretty).strip()
    except Exception as e:
        print('ERROR in pretty')
        print(e)
        #print(content)
        #print(pretty)
        fs_utils.write_file('./pretty_print.xml', content)
    return pretty
Пример #2
0
def java_xml_utils_style_validation(xml_filename, doctype, report_filename, xsl_prep_report, xsl_report):
    # STYLE CHECKER REPORT
    register_log('java_xml_utils_style_validation: inicio')
    is_valid_style = False
    xml_report = report_filename.replace('.html', '.xml')
    if os.path.exists(xml_report):
        os.unlink(xml_report)
    if os.path.exists(report_filename):
        os.unlink(report_filename)

    parameters = {}
    bkp_xml_filename = xml_utils.apply_dtd(xml_filename, doctype)
    if java_xml_utils.xml_transform(xml_filename, xsl_prep_report, xml_report, parameters):
        #parameters = {'filename': xml_report}
        java_xml_utils.xml_transform(xml_report, xsl_report, report_filename, parameters)
    else:
        fs_utils.write_file(report_filename, validation_status.STATUS_FATAL_ERROR + ': ' + _('Unable to create') + ' ' + report_filename)
    if os.path.isfile(report_filename):
        c = fs_utils.read_file(report_filename)
        is_valid_style = ('Total of errors = 0' in c) and (('Total of warnings = 0' in c) or (not 'Total of warnings =' in c))

    if os.path.isfile(bkp_xml_filename):
        xml_utils.restore_xml_file(xml_filename, bkp_xml_filename)

    if os.path.isfile(xml_report):
        os.unlink(xml_report)
    register_log('java_xml_utils_style_validation: fim')
    return is_valid_style
Пример #3
0
def minidom_pretty_print(content):
    pretty = None

    try:
        content = content.replace("\r", "")
        content = " ".join([item for item in content.split("\n")])
        content = preserve_styles(content)
        content = remove_exceeding_spaces_in_all_tags(content)
        prefix, content = split_prefix(content)

        if isinstance(content, unicode):
            content = content.encode("utf-8")

        doc = xml.dom.minidom.parseString(content)
        pretty = doc.toprettyxml().strip()
        if not isinstance(pretty, unicode):
            pretty = pretty.decode("utf-8")

        ign, pretty = split_prefix(pretty)
        pretty = "\n".join([item for item in pretty.split("\n") if item.strip() != ""])

        pretty = remove_break_lines_off_element_content(pretty)

        pretty = restore_styles(pretty)
        pretty = prefix + remove_exceding_style_tags(pretty).strip()
    except Exception as e:
        print("ERROR in pretty")
        print(e)
        print(content)
        # print(pretty)
        fs_utils.write_file("./pretty_print.xml", content)
        raise
    return pretty
Пример #4
0
 def validate(self, xml_filename, dtd_report_filename, style_report_filename):
     self.logger.register('XMLValidator.validate - inicio')
     self.logger.register('XMLValidator.validate - self.validator.setup()')
     self.validator.logger = self.logger
     self.validator.setup(xml_filename)
     self.logger.register('XMLValidator.validate - xml_utils.load_xml')
     xml, e = xml_utils.load_xml(self.validator.xml.content)
     self.logger.register('XMLValidator.validate - self.validator.dtd_validation')
     is_valid_dtd = self.validator.dtd_validation(dtd_report_filename)
     content = ''
     if e is None:
         self.logger.register('XMLValidator.validate - self.validator.style_validation')
         self.validator.style_validation(style_report_filename)
         self.logger.register('XMLValidator.validate - fs_utils.read_file')
         content = fs_utils.read_file(style_report_filename)
     else:
         self.logger.register('XMLValidator.validate - e is not None')
         content = validation_status.STATUS_FATAL_ERROR + ': ' + _('Unable to load {xml}. ').format(xml=xml_filename) + '\n' + e
         fs_utils.write_file(style_report_filename, content)
     self.logger.register('XMLValidator.validate - style_checker_statistics')
     f, e, w = style_checker_statistics(content)
     self.logger.register('XMLValidator.validate - self.validator.finish()')
     self.validator.finish()
     self.logger.register('XMLValidator.validate - fim')
     return (xml, is_valid_dtd, (f, e, w))
Пример #5
0
def packtools_dtd_validation(xml_filename, report_filename):
    import packtools
    xml_validator = packtools.stylechecker.XMLValidator(xml_filename)
    is_valid, errors = xml_validator.validate()
    r = '\n'.join([err.message for err in errors])
    fs_utils.write_file(report_filename, r)
    return is_valid
Пример #6
0
 def _change_doctype(self):
     if self.logger is not None:
         self.logger.register('XML._change_doctype - inicio')
     self.content = self.content.replace('\r\n', '\n')
     if '<!DOCTYPE' in self.content:
         find_text = self.content[self.content.find('<!DOCTYPE'):]
         find_text = find_text[0:find_text.find('>')+1]
         if len(find_text) > 0:
             if len(self.doctype) > 0:
                 self.content = self.content.replace(find_text, self.doctype)
             else:
                 if find_text + '\n' in self.content:
                     self.content = self.content.replace(find_text + '\n', self.doctype)
     elif self.content.startswith('<?xml '):
         if '?>' in self.content:
             xml_proc = self.content[0:self.content.find('?>')+2]
         xml = self.content[1:]
         if '<' in xml:
             xml = xml[xml.find('<'):]
         if len(self.doctype) > 0:
             self.content = xml_proc + '\n' + self.doctype + '\n' + xml
         else:
             self.content = xml_proc + '\n' + xml
     fs_utils.write_file(self.xml_filename, self.content)
     if self.logger is not None:
         self.logger.register('XML._change_doctype - fim')
def update_wayta_orgname_location_country(source, wayta_normalized_aff, wayta_orgname_location_country):
    items = fs_utils.get_downloaded_data(source, wayta_normalized_aff)
    print('wayta normalized aff')
    print(len(items.split('\n')))

    items = items.replace(';', '\t')
    print(1)
    print(len(items.split('\n')))

    items = remove_exceding_blank_spaces(items)
    print(2)
    print(len(items.split('\n')))

    items = items.split('\n')
    print(3)
    print(len(items))

    results = []
    for item in items:
        if item.startswith('"') and '"\t' in item:
            item = item[1:].replace('"\t', '\t')
        item = item.replace('""', '"')
        parts = item.split('\t')
        if len(parts) == 6:
            bad, correct, country_name, country_code, state, city = parts
            results.append('\t'.join([correct, city, state, country_code, country_name]))
    results = list(set(results))
    print('downloaded:')
    print(len(results))
    fs_utils.write_file(wayta_orgname_location_country, '\n'.join(sorted(results)))
Пример #8
0
 def temp_xml_filename(self):
     temp_filename = self.issue_stuff.temp_path + '/pubmed_tmp_' + os.path.basename(self.pubmed_filename)
     xml_content = '<?xml version="1.0" encoding="utf-8"?>\n'
     xml_content += '<root>'
     xml_content += self.articles_filenames_xml_content
     xml_content += self.articles_pids_xml_content
     xml_content += '</root>'
     fs_utils.write_file(temp_filename, xml_content)
     return temp_filename
Пример #9
0
def xml_content_transform(content, xsl_filename):
    f = tempfile.NamedTemporaryFile(delete=False)
    f.close()

    fs_utils.write_file(f.name, content)

    f2 = tempfile.NamedTemporaryFile(delete=False)
    f2.close()
    if xml_transform(f.name, xsl_filename, f2.name):
        content = fs_utils.read_file(f2.name)
        os.unlink(f2.name)
    if os.path.exists(f.name):
        os.unlink(f.name)
    return content
Пример #10
0
def validate_article_xml(xml_filename, dtd_files, dtd_report_filename, style_report_filename):
    register_log('validate_article_xml: inicio')
    is_valid_style = False

    register_log('validate_article_xml: inicio')
    xml, e = xml_utils.load_xml(xml_filename)
    is_valid_dtd = dtd_validation(xml_filename, dtd_report_filename, dtd_files.doctype_with_local_path, dtd_files.database_name)
    if e is None:
        is_valid_style = style_validation(xml_filename, dtd_files.doctype_with_local_path, style_report_filename, dtd_files.xsl_prep_report, dtd_files.xsl_report, dtd_files.database_name)
    else:
        text = validation_status.STATUS_FATAL_ERROR + ': ' + _('Unable to load') + ' ' + xml_filename + '\n' + e
        fs_utils.write_file(style_report_filename, text)
    f, e, w = style_checker_statistics(style_report_filename)
    register_log('validate_article_xml: fim')
    #open(os.path.dirname(style_report_filename) + '/validate_article_xml.log', 'a+').write('\n'.join(log_items))
    return (xml, is_valid_dtd, (f, e, w))
Пример #11
0
def format_reports_for_web(report_path, pkg_path, issue_path):
    if not os.path.isdir(converter_env.local_web_app_path + '/htdocs/reports/' + issue_path):
        os.makedirs(converter_env.local_web_app_path + '/htdocs/reports/' + issue_path)

    #utils.debugging('format_reports_for_web')
    #utils.debugging('content of ' + report_path)
    #utils.debugging('\n'.join(os.listdir(report_path)))

    for f in os.listdir(report_path):
        if f.endswith('.zip') or f == 'xml_converter.txt':
            os.unlink(report_path + '/' + f)
        else:
            #utils.debugging(report_path + '/' + f)
            content = fs_utils.read_file(report_path + '/' + f)
            content = content.replace('file:///' + pkg_path, '/img/revistas/' + issue_path)
            content = content.replace('file:///' + report_path, '/reports/' + issue_path)
            if isinstance(content, unicode):
                content = content.encode('utf-8')
            fs_utils.write_file(converter_env.local_web_app_path + '/htdocs/reports/' + issue_path + '/' + f, content)
def report_differences(old, new, deleted_report, added_report, fixed_report, replaced_report):
    old_items = fs_utils.read_file(old)
    old_items = old_items.split('\n')
    print('current:')
    print(len(old_items))

    new_items = fs_utils.read_file(new)
    new_items = new_items.split('\n')
    print('new:')
    print(len(new_items))

    maybe_deleted = []
    for item in old_items:
        if not item in new_items:
            maybe_deleted.append(item)

    maybe_added = []
    for item in new_items:
        if not item in old_items:
            maybe_added.append(item)

    print('=>')
    print([len(maybe_deleted), len(maybe_added)])
    organized_items = classify_items_by_len(maybe_added)

    deleted = []
    replaced = []
    fixed = []
    total = '/' + str(len(maybe_deleted))
    i = 0
    for item in maybe_deleted:
        i += 1
        if str(i).endswith('500') or str(i).endswith('000'):
            print(str(i) + total)

        similar = found_similar(item, maybe_added)
        if similar is None:
            similar = found_similar_2(item, organized_items.get(len(item), []))
        if similar is None:
            deleted.append(item)
        else:
            replaced.append(item + '\n' + similar + '\n')
            fixed.append(similar)

    added = [item for item in maybe_added if not item in fixed]

    fs_utils.write_file(replaced_report, '\n'.join(replaced))
    fs_utils.write_file(fixed_report, '\n'.join(fixed))
    fs_utils.write_file(deleted_report, '\n'.join(deleted))
    fs_utils.write_file(added_report, '\n'.join(added))

    return [len(deleted), len(added), len(fixed)]
Пример #13
0
    def transform_content(self, xsl_filename):
        if self.logger is not None:
            self.logger.register('XML.transform_content - inicio')
        f = tempfile.NamedTemporaryFile(delete=False)
        f.close()

        f2 = tempfile.NamedTemporaryFile(delete=False)
        f2.close()

        fs_utils.write_file(f.name, self.content)

        content = ''
        if self.transform_file(f.name, xsl_filename, f2.name):
            content = fs_utils.read_file(f2.name)

        for item in [f.name, f2.name]:
            os.unlink(f.name)
        if self.logger is not None:
            self.logger.register('XML.transform_content - fim')
        return content
Пример #14
0
def xml_validate(xml_filename, result_filename, doctype=None):
    #register_log('xml_validate: inicio')
    validation_type = ''

    if doctype is None:
        doctype = ''
    else:
        validation_type = '--validate'

    bkp_xml_filename = xml_utils.apply_dtd(xml_filename, doctype)
    temp_result_filename = TMP_DIR + '/' + os.path.basename(result_filename)
    if os.path.isfile(result_filename):
        os.unlink(result_filename)
    if not os.path.isdir(os.path.dirname(result_filename)):
        os.makedirs(os.path.dirname(result_filename))

    cmd = JAVA_PATH + ' -cp "' + JAR_VALIDATE + '" br.bireme.XMLCheck.XMLCheck "' + xml_filename + '" ' + validation_type + '>"' + temp_result_filename + '"'
    cmd = cmd.encode(encoding=sys.getfilesystemencoding())
    os.system(cmd)

    if os.path.exists(temp_result_filename):
        result = fs_utils.read_file(temp_result_filename, sys.getfilesystemencoding())

        if 'ERROR' in result.upper():
            n = 0
            s = ''
            for line in open(xml_filename, 'r').readlines():
                if n > 0:
                    s += str(n) + ':' + line
                n += 1
            result += '\n' + s.decode('utf-8')
            fs_utils.write_file(temp_result_filename, result)
    else:
        result = 'ERROR: Not valid. Unknown error.\n' + cmd
        fs_utils.write_file(temp_result_filename, result)

    shutil.move(temp_result_filename, result_filename)
    shutil.move(bkp_xml_filename, xml_filename)
    #register_log('xml_validate: fim')
    return not 'ERROR' in result.upper()
Пример #15
0
    def transform_file(self, xsl_filename, result_filename, parameters={}):
        if self.logger is not None:
            self.logger.register('XML.transform_file - inicio')
        error = False

        temp_result_filename = self.prepare(result_filename)

        if self.logger is not None:
            self.logger.register('XML.transform_file - command - inicio')
        cmd = JAVA_PATH + ' -jar "' + JAR_TRANSFORM + '" -novw -w0 -o "' + temp_result_filename + '" "' + self.xml_filename + '" "' + xsl_filename + '" ' + format_parameters(parameters)
        cmd = cmd.encode(encoding=sys.getfilesystemencoding())
        os.system(cmd)
        if self.logger is not None:
            self.logger.register('XML.transform_file - command - fim')

        if not os.path.exists(temp_result_filename):
            fs_utils.write_file(temp_result_filename, 'ERROR: transformation error.\n' + cmd)
            error = True
        shutil.move(temp_result_filename, result_filename)
        if self.logger is not None:
            self.logger.register('XML.transform_file - fim')

        return (not error)
Пример #16
0
    def style_validation(self, report_filename):
        is_valid_style = False
        xml_report = report_filename.replace('.html', '.xml')

        for item in [xml_report, report_filename]:
            if os.path.exists(item):
                os.unlink(item)

        parameters = {}
        if self.xml.transform_file(self.xsl_prep_report, xml_report, parameters):
            xml_transformer_report = java_xml_utils.XML(xml_report, None)
            xml_transformer_report.logger = self.logger
            xml_transformer_report.transform_file(self.xsl_report, report_filename, parameters)
            result = fs_utils.read_file(report_filename)
            if os.path.isfile(xml_report):
                os.unlink(xml_report)

        if not os.path.isfile(report_filename):
            result = 'ERROR: ' + _('Unable to create') + ' ' + report_filename
            fs_utils.write_file(report_filename, result)

        is_valid_style = ('Total of errors = 0' in result) and (('Total of warnings = 0' in result) or (not 'Total of warnings =' in result))

        return is_valid_style
Пример #17
0
def xml_transform(xml_filename, xsl_filename, result_filename, parameters={}):
    #register_log('xml_transform: inicio')
    error = False

    temp_result_filename = TMP_DIR + '/' + os.path.basename(result_filename)

    if not os.path.isdir(os.path.dirname(result_filename)):
        os.makedirs(os.path.dirname(result_filename))
    for f in [result_filename, temp_result_filename]:
        if os.path.isfile(f):
            os.unlink(f)
    tmp_xml_filename = create_temp_xml_filename(xml_filename)
    cmd = JAVA_PATH + ' -jar "' + JAR_TRANSFORM + '" -novw -w0 -o "' + temp_result_filename + '" "' + tmp_xml_filename + '" "' + xsl_filename + '" ' + format_parameters(parameters)
    cmd = cmd.encode(encoding=sys.getfilesystemencoding())
    os.system(cmd)
    if not os.path.exists(temp_result_filename):
        fs_utils.write_file(temp_result_filename, 'ERROR: transformation error.\n' + cmd)
        error = True
    shutil.move(temp_result_filename, result_filename)

    fs_utils.delete_file_or_folder(tmp_xml_filename)
    #register_log('xml_transform: fim')

    return (not error)
Пример #18
0
    def xml_validate(self, result_filename):
        if self.logger is not None:
            self.logger.register('XML.xml_validate - inicio')
        validation_type = '' if self.doctype == '' else '--validate'
        temp_result_filename = self.prepare(result_filename)

        if self.logger is not None:
            self.logger.register('XML.transform_file - command - inicio')
        cmd = JAVA_PATH + ' -cp "' + JAR_VALIDATE + '" br.bireme.XMLCheck.XMLCheck "' + self.xml_filename + '" ' + validation_type + '>"' + temp_result_filename + '"'
        cmd = cmd.encode(encoding=sys.getfilesystemencoding())
        os.system(cmd)
        if self.logger is not None:
            self.logger.register('XML.transform_file - command - fim')

        if os.path.exists(temp_result_filename):
            result = fs_utils.read_file(temp_result_filename, sys.getfilesystemencoding())
            if 'ERROR' in result.upper():
                n = 0
                s = ''
                for line in open(self.xml_filename, 'r').readlines():
                    if n > 0:
                        s += str(n) + ':' + line
                    n += 1
                result += '\n' + s.decode('utf-8')
                fs_utils.write_file(result_filename, result)
                os.unlink(temp_result_filename)
            else:
                shutil.move(temp_result_filename, result_filename)
        else:
            result = 'ERROR: Not valid. Unknown error.\n' + cmd
            fs_utils.write_file(result_filename, result)
        if self.logger is not None:
            self.logger.register('XML.transform_file - command - fim')
        if self.logger is not None:
            self.logger.register('XML.xml_validate - fim')
        return not 'ERROR' in result.upper()
Пример #19
0
def apply_dtd(xml_filename, doctype):
    temp_filename = tempfile.mkdtemp() + '/' + os.path.basename(xml_filename)
    shutil.copyfile(xml_filename, temp_filename)
    content = replace_doctype(fs_utils.read_file(xml_filename), doctype)
    fs_utils.write_file(xml_filename, content)
    return temp_filename
Пример #20
0
 def dtd_validation(self, report_filename):
     fs_utils.write_file(report_filename, self._dtd_validation)
     return self.is_valid
        if len(parts) == 6:
            bad, correct, country_name, country_code, state, city = parts
            results.append('\t'.join([correct, city, state, country_code, country_name]))
    results = list(set(results))
    print('downloaded:')
    print(len(results))
    fs_utils.write_file(wayta_orgname_location_country, '\n'.join(sorted(results)))


execute_update = False
if len(sys.argv) == 1:
    update_wayta_orgname_location_country(source, wayta_normalized_aff, wayta_orgname_location_country)
    counts = report_differences(local_orgname_location_country, wayta_orgname_location_country, deleted_report, added_report, fixed_report, replaced_report)

    print('->')
    print(counts)
    print(sum(counts))

elif len(sys.argv) == 2:
    execute_update = (sys.argv[1] == 'update')
    if sys.argv[1] == 'fix_local':
        fs_utils.write_file(local_orgname_location_country, remove_exceding_blank_spaces(fs_utils.read_file(local_orgname_location_country)))

if execute_update is True:
    import institutions_service
    a = institutions_service.OrgManager()
    a.create_db()
    print('db updated')
else:
    print('No update')
def fix_endoflines(filename, destination):
    r = []
    items = fs_utils.read_file(filename)
    for item in items.split('\n'):
        r.append(item.strip())
    fs_utils.write_file(destination, '\n'.join(sorted(items)))