Exemplo n.º 1
0
    def check_records(self):
        """
        Checks if given records exists on the system and then returns
        a tuple of records that is new and records that exists:

        @return: a tuple of (new_records, existing_records)
        @rtype: tuple
        """
        # We check if any records already exists
        new_records = []
        existing_records = []
        for record in self.records_harvested:
            # Do we already have the record id perhaps?
            if not record.recid:
                try:
                    record.recid = get_record_from_doi(record.doi)
                except APSHarvesterSearchError, e:
                    write_message("Error while getting recid from %s: %s" %
                                  (record.doi, str(e)))

                    # Problem detected, send mail immediately:
                    problem_rec = generate_xml_for_records(
                        records=[record],
                        directory=self.out_folder,
                        suffix="problem.xml")
                    subject = "APS harvest problem: %s" % \
                              (self.date_started.strftime("%Y-%m-%d %H:%M:%S"),)
                    body = "There was a problem harvesting %s. \n %s \n Path: \n%s" % \
                           (record.doi, str(e), problem_rec)
                    submit_records_via_mail(subject, body,
                                            CFG_APSHARVEST_EMAIL)
                    continue

            # What about now?
            if record.recid:
                existing_records.append(record)
            else:
                new_records.append(record)
Exemplo n.º 2
0
    def check_records(self):
        """
        Checks if given records exists on the system and then returns
        a tuple of records that is new and records that exists:

        @return: a tuple of (new_records, existing_records)
        @rtype: tuple
        """
        # We check if any records already exists
        new_records = []
        existing_records = []
        for record in self.records_harvested:
            # Do we already have the record id perhaps?
            if not record.recid:
                try:
                    record.recid = get_record_from_doi(record.doi)
                except APSHarvesterSearchError, e:
                    write_message("Error while getting recid from %s: %s" %
                                  (record.doi, str(e)))

                    # Problem detected, send mail immediately:
                    problem_rec = generate_xml_for_records(records=[record],
                                                           directory=self.out_folder,
                                                           suffix="problem.xml")
                    subject = "APS harvest problem: %s" % \
                              (self.date_started.strftime("%Y-%m-%d %H:%M:%S"),)
                    body = "There was a problem harvesting %s. \n %s \n Path: \n%s" % \
                           (record.doi, str(e), problem_rec)
                    submit_records_via_mail(subject, body, CFG_APSHARVEST_EMAIL)
                    continue

            # What about now?
            if record.recid:
                existing_records.append(record)
            else:
                new_records.append(record)
Exemplo n.º 3
0
        # Turn dates back into strings (away from datetime object)
        harvest_from_date = harvest_from_date.strftime("%Y-%m-%d")
        harvest_until_date = harvest_until_date.strftime("%Y-%m-%d")
        final_record_list = harvest_aps(harvest_from_date,
                                        harvest_until_date,
                                        perpage)
    else:
        # We use any given IDs or records from the local Invenio instance.
        if len(parameters.get("dois")) > 0:
            write_message("Parsing DOIs...")

            # We are doing DOIs, we need to get record ids
            for doi in parameters.get("dois").split(','):
                doi = doi.strip()
                try:
                    recid = get_record_from_doi(doi)
                except APSHarvesterSearchError, e:
                    write_message("Error while getting recid from %s: %s" %
                                  (doi, str(e)))
                    continue
                if not recid:
                    # Record not found on the system, we harvest from APS
                    write_message("No recid found, we get record from APS")
                    recid = None
                final_record_list.append(APSRecord(recid, doi))

        if len(parameters.get("recids")) > 0:
            write_message("Parsing record IDs...")

            # We are doing rec ids
            recids = split_cli_ids_arg(parameters.get("recids"))