# Check if published date is after treshold:
        if is_beyond_threshold_date(threshold_date, fulltext_file):
            # The published date is beyond the threshold, we continue
            msg = "Warning: Article published beyond threshold: %s" % \
                  (record.doi,)
            write_message(msg)
            yield record, msg
            continue
        else:
            write_message("OK. Record is below the threshold.", verbose=3)

        if add_metadata:
            from harvestingkit.aps_package import (ApsPackage,
                                                   ApsPackageXMLError)
            # Generate Metadata,FFT and yield it
            aps = ApsPackage(journal_mappings)
            try:
                xml = aps.get_record(fulltext_file)
                record.add_metadata_by_string(xml)
            except ApsPackageXMLError, e:
                # This must be old-format XML
                write_message("Warning: old-style metadata detected for %s" %
                              (fulltext_file))
                # Remove any DTD info in the file before converting
                cleaned_fulltext_file = remove_dtd_information(fulltext_file)
                try:
                    convert_xml_using_saxon(cleaned_fulltext_file,
                                            CFG_APSHARVEST_XSLT)

                    # Conversion is a success. Let's derive location of converted file
                    source_directory = os.path.dirname(cleaned_fulltext_file)
示例#2
0
            # Check if published date is after threshold:
            if is_beyond_threshold_date(parameters.get("threshold_date"), fulltext_file):
                # The published date is beyond the threshold, we continue
                msg = "Warning: Article published beyond threshold: %s" % \
                      (record.doi,)
                write_message(msg)
                yield record, msg
                continue
            else:
                write_message("OK. Record is below the threshold.", verbose=3)

            if parameters.get("metadata"):
                from harvestingkit.aps_package import (ApsPackage,
                                                       ApsPackageXMLError)
                # Generate Metadata,FFT and yield it
                aps = ApsPackage(self.journal_mappings)
                try:
                    xml = aps.get_record(fulltext_file)
                    record.add_metadata_by_string(xml)
                except ApsPackageXMLError, e:
                    # This must be old-format XML
                    write_message("Warning: old-style metadata detected for %s" %
                                  (fulltext_file))
                    # Remove any DTD info in the file before converting
                    cleaned_fulltext_file = remove_dtd_information(fulltext_file)
                    try:
                        convert_xml_using_saxon(cleaned_fulltext_file,
                                                CFG_APSHARVEST_XSLT)

                        # Conversion is a success. Let's derive location of converted file
                        source_directory = os.path.dirname(cleaned_fulltext_file)
示例#3
0
 def setUp(self):
     """Setup sample parsing used in tests."""
     self.aps = ApsPackage(journal_mappings)
     self.aps.document = parse(join(dirname(folder), aps_test_record))