示例#1
0
文件: models.py 项目: doecode/scraper
    def from_doecode(klass, record):
        """
        Create CodeGovProject object from DOE CODE record

        Handles crafting Code.gov Project
        """
        if not isinstance(record, dict):
            raise TypeError("`record` must be a dict")

        project = klass()

        # -- REQUIRED FIELDS --

        project["name"] = record["software_title"]
        logger.debug('DOE CODE: software_title="%s"', record["software_title"])

        link = record.get("repository_link", "")
        if not link:
            link = record.get("landing_page")
            logger.debug("DOE CODE: No repositoryURL, using landing_page: %s",
                         link)

        project["repositoryURL"] = link

        project["description"] = record["description"]

        licenses = set(record["licenses"])
        licenses.discard(None)
        logger.debug("DOE CODE: licenses=%s", licenses)

        license_objects = []
        if "Other" in licenses:
            licenses.remove("Other")
            license_objects = [{
                "name": "Other",
                "URL": record["proprietary_url"]
            }]

        if licenses:
            license_objects.extend(
                [_license_obj(license) for license in licenses])

        project["permissions"]["licenses"] = license_objects

        if record["open_source"]:
            usage_type = "openSource"
        else:
            usage_type = "exemptByLaw"
            project["permissions"][
                "exemptionText"] = "This source code is restricted by patent and / or intellectual property law."

        project["permissions"]["usageType"] = usage_type

        labor_hours = record.get("labor_hours")
        if labor_hours is not None:
            project["laborHours"] = labor_hours
        else:
            project["laborHours"] = 0

        project["tags"] = ["DOE CODE"]
        lab_name = record.get("lab_display_name")
        if lab_name is not None:
            project["tags"].append(lab_name)

        project["contact"]["email"] = record["owner"]
        # project['contact']['URL'] = ''
        # project['contact']['name'] = ''
        # project['contact']['phone'] = ''

        # -- OPTIONAL FIELDS --

        if "version_number" in record and record["version_number"]:
            project["version"] = record["version_number"]

        if lab_name is not None:
            project["organization"] = lab_name

        # Currently, can't be an empty string, see: https://github.com/GSA/code-gov-web/issues/370
        status = record.get("ever_announced")
        if status is None:
            raise ValueError(
                'DOE CODE: Unable to determine "ever_announced" value!')
        elif status:
            status = "Production"
        else:
            status = "Development"

        project["status"] = status

        vcs = None
        link = project["repositoryURL"]
        if "github.com" in link:
            vcs = "git"
        if vcs is None:
            logger.debug(
                'DOE CODE: Unable to determine vcs for: name="%s", repositoryURL=%s',
                project["name"],
                link,
            )
            vcs = ""
        if vcs:
            project["vcs"] = vcs

        url = record.get("landing_page", "")
        if url:
            project["homepageURL"] = url

        # record['downloadURL'] = ''

        # self['disclaimerText'] = ''

        # self['disclaimerURL'] = ''

        if "programming_languages" in record:
            project["languages"] = record["programming_languages"]

        # self['partners'] = []
        # TODO: Look into using record['contributing_organizations']

        # self['relatedCode'] = []

        # self['reusedCode'] = []

        # date: [object] A date object describing the release.
        #   created: [string] The date the release was originally created, in YYYY-MM-DD or ISO 8601 format.
        #   lastModified: [string] The date the release was modified, in YYYY-MM-DD or ISO 8601 format.
        #   metadataLastUpdated: [string] The date the metadata of the release was last updated, in YYYY-MM-DD or ISO 8601 format.
        if "date_record_added" in record and "date_record_updated" in record:
            project["date"] = {
                "created": record["date_record_added"],
                # 'lastModified': '',
                "metadataLastUpdated": record["date_record_updated"],
            }

        return project
示例#2
0
    def from_doecode(klass, record):
        """
        Create CodeGovProject object from DOE CODE record

        Handles crafting Code.gov Project
        """
        if not isinstance(record, dict):
            raise TypeError('`record` must be a dict')

        project = klass()

        # -- REQUIRED FIELDS --

        project['name'] = record['software_title']
        logger.debug('DOE CODE: software_title="%s"', record['software_title'])

        link = record.get('repository_link', '')
        if not link:
            link = record.get('landing_page')
            logger.warning(
                'DOE CODE: No repositoryURL, using landing_page: %s', link)

        project['repositoryURL'] = link

        project['description'] = record['description']

        licenses = set(record['licenses'])
        licenses.discard(None)
        logger.debug('DOE CODE: licenses=%s', licenses)

        license_objects = []
        if 'Other' in licenses:
            licenses.remove('Other')
            license_objects = [{
                'name': 'Other',
                'URL': record['proprietary_url']
            }]

        if licenses:
            license_objects.extend(
                [_license_obj(license) for license in licenses])

        project['permissions']['licenses'] = license_objects

        if record['open_source']:
            usage_type = 'openSource'
        else:
            usage_type = 'exemptByLaw'
            project['permissions'][
                'exemptionText'] = 'This source code is restricted by patent and / or intellectual property law.'

        project['permissions']['usageType'] = usage_type

        # TODO: Compute from git repo
        project['laborHours'] = 0

        project['tags'] = ['DOE CODE']
        lab_name = record.get('lab_display_name')
        if lab_name is not None:
            project['tags'].append(lab_name)

        project['contact']['email'] = record['owner']
        # project['contact']['URL'] = ''
        # project['contact']['name'] = ''
        # project['contact']['phone'] = ''

        # -- OPTIONAL FIELDS --

        if 'version_number' in record and record['version_number']:
            project['version'] = record['version_number']

        if lab_name is not None:
            project['organization'] = lab_name

        # Currently, can't be an empty string, see: https://github.com/GSA/code-gov-web/issues/370
        status = record.get('ever_announced')
        if status is None:
            raise ValueError(
                'DOE CODE: Unable to determine "ever_announced" value!')
        elif status:
            status = 'Production'
        else:
            status = 'Development'

        project['status'] = status

        vcs = None
        link = project['repositoryURL']
        if 'github.com' in link:
            vcs = 'git'
        if vcs is None:
            logger.debug(
                'DOE CODE: Unable to determine vcs for: name="%s", repositoryURL=%s',
                project['name'], link)
            vcs = ''
        if vcs:
            project['vcs'] = vcs

        url = record.get('landing_page', '')
        if url:
            project['homepageURL'] = url

        # record['downloadURL'] = ''

        # self['disclaimerText'] = ''

        # self['disclaimerURL'] = ''

        if 'programming_languages' in record:
            project['languages'] = record['programming_languages']

        # self['partners'] = []
        # TODO: Look into using record['contributing_organizations']

        # self['relatedCode'] = []

        # self['reusedCode'] = []

        # date: [object] A date object describing the release.
        #   created: [string] The date the release was originally created, in YYYY-MM-DD or ISO 8601 format.
        #   lastModified: [string] The date the release was modified, in YYYY-MM-DD or ISO 8601 format.
        #   metadataLastUpdated: [string] The date the metadata of the release was last updated, in YYYY-MM-DD or ISO 8601 format.
        if 'date_record_added' in record and 'date_record_updated' in record:
            project['date'] = {
                'created': record['date_record_added'],
                # 'lastModified': '',
                'metadataLastUpdated': record['date_record_updated']
            }

        return project