def parse_scm_connection(scm_connection): """ Return an SPDX vcs_url given a Maven `scm_connection` string or the string as-is if it cannot be parsed. See https://maven.apache.org/scm/scm-url-format.html scm:<scm_provider><delimiter><provider_specific_part> scm:git:git://server_name[:port]/path_to_repository scm:git:http://server_name[:port]/path_to_repository scm:git:https://server_name[:port]/path_to_repository scm:git:ssh://server_name[:port]/path_to_repository scm:git:file://[hostname]/path_to_repository """ delimiter = '|' if '|' in scm_connection else ':' segments = scm_connection.split(delimiter, 2) if not len(segments) == 3: # we cannot parse this so we return it as is return scm_connection _scm, scm_tool, vcs_url = segments # TODO: vcs_tool is not yet supported normalized = normalize_vcs_url(vcs_url, vcs_tool=scm_tool) if normalized: vcs_url = normalized if not vcs_url.startswith(VCS_URLS): if not vcs_url.startswith(scm_tool): vcs_url = '{scm_tool}+{vcs_url}'.format(**locals()) return vcs_url
def build_vcs_and_code_view_urls(scm): """ Return a proper vcs_url and code_view_url from a Maven `scm` mapping or None. For example: >>> scm = dict(connection='scm:git:[email protected]:histogrammar/histogrammar-scala.git', tag='HEAD', url='https://github.com/histogrammar/histogrammar-scala') """ vcs_url = scm.get('connection') or None code_view_url = scm.get('url') or None if code_view_url: cvu = normalize_vcs_url(code_view_url) or None if cvu: code_view_url = cvu if not vcs_url: if code_view_url: # we can craft a vcs_url in some cases vcs_url = code_view_url return vcs_url, code_view_url vcs_url = parse_scm_connection(vcs_url) # TODO: handle tag # vcs_tag = scm.get('tag') return vcs_url, code_view_url
def vcs_repository_mapper(repo, package, vcs_revision=None): """ https://docs.npmjs.com/files/package.json#repository "repository" : { "type" : "git" , "url" : "https://github.com/npm/npm.git" } "repository" : { "type" : "svn" , "url" : "https://v8.googlecode.com/svn/trunk/" } """ if not repo: return package if isinstance(repo, list): # There is a case where we can have a list with a single element repo = repo[0] vcs_tool = '' vcs_repository = '' if isinstance(repo, str): vcs_repository = normalize_vcs_url(repo) elif isinstance(repo, dict): repo_url = normalize_vcs_url(repo.get('url')) if repo_url: vcs_tool = repo.get('type') or 'git' # remove vcs_tool string if repo_url already contains it if repo_url.startswith(vcs_tool): vcs_tool = '' vcs_repository = repo_url if vcs_repository: if vcs_tool: vcs_url = '{}+{}'.format(vcs_tool, vcs_repository) else: vcs_url = vcs_repository if vcs_revision: vcs_url += '@' + vcs_revision package.vcs_url = vcs_url return package
def build_vcs_and_code_view_urls(scm): """ Return a mapping of vcs_url and code_view_url from a Maven `scm` mapping. For example: >>> scm = dict( ... connection='scm:git:[email protected]:histogrammar/histogrammar-scala.git', ... tag='HEAD', ... url='https://github.com/histogrammar/histogrammar-scala') >>> expected = { ... 'vcs_url': 'git+https://github.com/histogrammar/histogrammar-scala.git', ... 'code_view_url': 'https://github.com/histogrammar/histogrammar-scala'} >>> assert build_vcs_and_code_view_urls(scm) == expected """ vcs_url = scm.get('connection') or None code_view_url = scm.get('url') or None if code_view_url: cvu = normalize_vcs_url(code_view_url) or None if cvu: code_view_url = cvu if not vcs_url: if code_view_url: # we can craft a vcs_url in some cases vcs_url = code_view_url return dict( vcs_url=vcs_url, code_view_url=code_view_url, ) vcs_url = parse_scm_connection(vcs_url) # TODO: handle tag # vcs_tag = scm.get('tag') return dict( vcs_url=vcs_url, code_view_url=code_view_url, )
def test_normalize_vcs_url_0(self): test = 'npm/npm' expected = 'https://github.com/npm/npm' assert expected == normalize_vcs_url(test)
def test_normalize_vcs_url_does_not_fail_on_empty(self): assert normalize_vcs_url(None) == None assert normalize_vcs_url('') == None assert normalize_vcs_url(' ') == None
def test_normalize_vcs_url_11(self): test = 'https://github.com/christkv/kerberos.git' expected = 'https://github.com/christkv/kerberos.git' assert expected == normalize_vcs_url(test)
def test_normalize_vcs_url_9(self): test = 'http://github.com/isaacs/nopt' expected = 'http://github.com/isaacs/nopt' assert expected == normalize_vcs_url(test)
def test_normalize_vcs_url_7(self): test = '[email protected]:balderdashy/waterline-criteria.git' expected = 'https://github.com/balderdashy/waterline-criteria.git' assert expected == normalize_vcs_url(test)
def test_normalize_vcs_url_5(self): test = 'git://github.com/angular/di.js.git' expected = 'git://github.com/angular/di.js.git' assert expected == normalize_vcs_url(test)
def test_normalize_vcs_url_3(self): test = 'gitlab:another/repo' expected = 'https://gitlab.com/another/repo' assert expected == normalize_vcs_url(test)
def test_normalize_vcs_url_svn(self): url = 'http://svn.example.org/projectA/' result = normalize_vcs_url(url) expected = 'http://svn.example.org/projectA/' assert expected == result
def test_normalize_vcs_url_basic(self): url = 'https://pear2.php.net' result = normalize_vcs_url(url) expected = 'https://pear2.php.net' assert expected == result
def get_normalized_package_data(manifest_main_section): """ Return a mapping of package-like data normalized from a mapping of the `manifest_main_section` data or None. Maven Archiver does this: Manifest-Version: 1.0 Created-By: Apache Maven ${maven.version} Built-By: ${user.name} Build-Jdk: ${java.version} Specification-Title: ${project.name} Specification-Vendor: ${project.organization.name} Implementation-Title: ${project.name} Implementation-Vendor-Id: ${project.groupId} Implementation-Version: ${project.version} Implementation-Vendor: ${project.organization.name} Implementation-URL: ${project.url} See https://maven.apache.org/shared/maven-archiver/examples/manifest.html """ if not manifest_main_section or len(manifest_main_section) == 1: # only a manifest version return def dget(s): v = manifest_main_section.get(s) if v and v.startswith(('%', '$', '{')): v = None return v built_with_gradle = bool(dget('Gradle-Version')) # Name, namespace, version ######################### # from Eclipse OSGi # Bundle-SymbolicName: org.eclipse.ui.workbench.compatibility # Bundle-SymbolicName: org.eclipse.ui.intro.universal;singleton:=true b_sym_name = dget('Bundle-SymbolicName') if b_sym_name and ';' in b_sym_name: b_sym_name, _, _ = b_sym_name.partition(';') is_osgi_bundle = bool(b_sym_name) # Implementation-Title: org.apache.xerces.impl.Version # Implementation-Title: Apache Commons IO i_title = dget('Implementation-Title') i_title_is_id = is_id(i_title) # if present this is typically gid.aid (but with no clear split) # Extension-Name: org.apache.commons.logging ext_nm = dget('Extension-Name') if ext_nm == b_sym_name: ext_nm = None ext_nm_is_id = is_id(ext_nm) # Automatic-Module-Name: org.apache.commons.io am_nm = dget('Automatic-Module-Name') if am_nm == b_sym_name: am_nm = None am_nm_is_id = is_id(am_nm) # Name: Datalogic SDK nm = dget('Name') nm_is_id = is_id(nm) # this a namespace # Implementation-Vendor-Id: org.apache # Implementation-Vendor-Id: commons-io # Implementation-Vendor-Id: ${project.groupId} i_vendid = dget('Implementation-Vendor-Id') # Bundle-Version: 3.2.200.v20080610 # Implementation-Version: 2.6.2 # ImplementationVersion b_version = dget('Bundle-Version') i_version = dget('Implementation-Version') or dget('ImplementationVersion') # Descriptions ######################### # the Bundle-Name is always a short description # Bundle-Name: DejaCode Toolkit # Bundle-Name: %pluginName # Bundle-Name: %fragmentName b_name = dget('Bundle-Name') # Bundle-Description: Apache Log4j 1.2 b_desc = dget('Bundle-Description') s_title = dget('Specification-Title') if s_title in ( i_title, b_name, b_desc, ): s_title = None # Implementation-Title structured by Gradle if Gradle-Version: is present # Implementation-Title: com.netflix.hystrix#hystrix-rx-netty-metrics-stream;1.5.12 it_namespace = it_name = it_version = None it_split = re.split('[#;]', i_title or '') if len(it_split) == 3: it_namespace, it_name, it_version = it_split has_gradle_structured_i_title = i_title_is_id and it_namespace and it_name and it_version # Set ns, name and version ############################## package_type = namespace = name = version = None descriptions = [] # FIXME: may be we should then return each "personality" # we have several cases for names: # this is built with gradle and we have good id data if has_gradle_structured_i_title: package_type = 'maven' namespace = it_namespace name = it_name version = it_version descriptions = [nm, s_title, b_name, b_desc] # we have been created by maven archiver elif i_title and i_vendid and i_version: # TODO: improve name and namespace if ns is in name namespace = i_vendid name = i_title package_type = 'maven' if ( i_title_is_id and not name.startswith(namespace)) else 'jar' version = i_version descriptions = [b_name, b_desc] # TODO: add case with only title + version that can still be handled if title is dotted # this is an OSGi bundle and we have enough to build a bundle elif is_osgi_bundle: # no namespace name = b_sym_name version = b_version descriptions = [b_name, b_desc] package_type = 'osgi' # we have not much data else: package_type = 'jar' # no namespace version = i_version if i_title_is_id: name = i_title descriptions = [s_title, nm] elif am_nm_is_id: name = am_nm descriptions = [s_title, i_title, nm] elif ext_nm_is_id: name = ext_nm descriptions = [s_title, i_title, nm] elif nm_is_id: name = nm descriptions = [s_title, i_title] else: name = i_title or am_nm or ext_nm or nm descriptions = [s_title, i_title, nm] descriptions = unique(descriptions) descriptions = [d for d in descriptions if d and d.strip() and d != name] description = '\n'.join(descriptions) if description == name: description = None # create the mapping we will return package = {} package['type'] = package_type package['namespace'] = namespace package['name'] = name package['version'] = version package['description'] = description # licensing ######################### # Bundle-License: http://www.apache.org/licenses/LICENSE-2.0.txt package['declared_license'] = dget('Bundle-License') # Bundle-Copyright: Apache 2.0 package['copyright'] = dget('Bundle-Copyright') # URLs ######################### # typically homepage or DOC # Implementation-Url # Implementation-URL: http://xml.apache.org/xerces2-j/ package['homepage_url'] = dget('Implementation-URL') or dget( 'Implementation-Url') # Bundle-DocURL: http://logging.apache.org/log4j/1.2 package['documentation_url'] = dget('Bundle-DocURL') # vendor/owner/contact ######################### package['parties'] = parties = [] # Implementation-Vendor: Apache Software Foundation # Implementation-Vendor: The Apache Software Foundation i_vend = dget('Implementation-Vendor') if i_vend: parties.append(dict(role='vendor', name=i_vend)) # Specification-Vendor: Sun Microsystems, Inc. s_vend = dget('Specification-Vendor') if s_vend == i_vend: s_vend = None if s_vend: parties.append(dict(role='spec-vendor', name=s_vend)) # Bundle-Vendor: %providerName # Bundle-Vendor: %provider_name # Bundle-Vendor: Apache Software Foundation # Bundle-Vendor: http://supercsv.sourceforge.net/ and http://spiffyframe b_vend = dget('Bundle-Vendor') or dget('BundleVendor') if b_vend: v = dict(role='vendor', name=b_vend) if v not in parties: parties.append(v) # Module-Email: [email protected] # Module-Owner: [email protected] m_email = dget('Module-Email') m_owner = dget('Module-Owner') if m_owner: o = dict(role='owner', name=m_owner) if m_email and m_email != m_owner: o['email'] = m_email parties.append(o) # VCS # the model is <vcs_tool>+<transport>://<host_name>[/<path_to_repository>][@<revision_tag_or_branch>][#<sub_path>] ######################### vcs_url = None code_view_url = None m_vcs_url = dget('Module-Origin') or '' if m_vcs_url.strip(): # this block comes from Gradle? # Module-Origin: [email protected]:Netflix/Hystrix.git # Module-Source: /hystrix-contrib/hystrix-rx-netty-metrics-stream # Branch: master # Change: a7b66ca m_vcs_url = normalize_vcs_url(m_vcs_url) m_vcs_rev = dget('Change') or dget('Branch') or '' m_vcs_rev = m_vcs_rev.strip() m_vcs_rev = m_vcs_rev and ('@' + m_vcs_rev) m_vcs_subpath = dget('Module-Source') or '' m_vcs_subpath = m_vcs_subpath.strip('/').strip() m_vcs_subpath = m_vcs_subpath and ('#' + m_vcs_subpath.strip('/')) vcs_url = '{m_vcs_url}{m_vcs_rev}{m_vcs_subpath}'.format(**locals()) else: # this block comes from Maven? # Scm-Url: http://github.com/fabric8io/kubernetes-model/kubernetes-model/ # Scm-Connection: scm:git:https://github.com/fabric8io/zjsonpatch.git # Scm-Revision: ${buildNumber} # Scm-Revision: 4ec4abe2e7ac9e1a5e4be88e6dd09403592f9512 s_vcs_url = dget('Scm-Url') or '' s_scm_connection = dget('Scm-Connection') or '' s_vcs_rev = dget('Scm-Revision') or '' s_vcs_rev = s_vcs_rev.strip() if s_vcs_rev: s_vcs_rev = '@' + s_vcs_rev if s_vcs_url.strip(): code_view_url = s_vcs_url s_vcs_url = normalize_vcs_url(s_vcs_url) vcs_url = '{s_vcs_url}{s_vcs_rev}'.format(**locals()) elif s_scm_connection.strip(): vcs_url = parse_scm_connection(s_scm_connection) vcs_url = '{s_vcs_url}{s_vcs_rev}'.format(**locals()) package['vcs_url'] = vcs_url package['code_view_url'] = code_view_url # Misc, unused for now ######################### # Source: # Eclipse-SourceBundle: org.eclipse.jetty.websocket.api;version="9.4.12.v20180830";roots:="." # Deps: # Require-Bundle package['notes'] = dget('Comment') return package
def test_normalize_vcs_url_1(self): test = 'gist:11081aaa281' expected = 'https://gist.github.com/11081aaa281' assert expected == normalize_vcs_url(test)
def test_normalize_vcs_url_github(self): url = 'https://github.com/igorw/monolog' result = normalize_vcs_url(url) expected = 'https://github.com/igorw/monolog' assert expected == result
def test_normalize_vcs_url_2(self): test = 'bitbucket:example/repo' expected = 'https://bitbucket.org/example/repo' assert expected == normalize_vcs_url(test)
def test_normalize_vcs_url_bitbucket(self): url = '[email protected]:vendor/my-private-repo.git' result = normalize_vcs_url(url) expected = 'https://bitbucket.org/vendor/my-private-repo.git' assert expected == result
def test_normalize_vcs_url_4(self): test = 'expressjs/serve-static' expected = 'https://github.com/expressjs/serve-static' assert expected == normalize_vcs_url(test)
def test_normalize_vcs_url_does_not_pad_git_plus(self): url = 'git+git://bitbucket.org/vendor/my-private-repo.git' result = normalize_vcs_url(url) assert url == result
def test_normalize_vcs_url_6(self): test = 'git://github.com/hapijs/boom' expected = 'git://github.com/hapijs/boom' assert expected == normalize_vcs_url(test)
def test_normalize_vcs_url_does_not_pad_git_plus2(self): url = 'git+https://github.com/stevepapa/angular2-autosize.git' result = normalize_vcs_url(url) expected = 'git+https://github.com/stevepapa/angular2-autosize.git' assert expected == result
def test_normalize_vcs_url_8(self): test = 'http://github.com/ariya/esprima.git' expected = 'http://github.com/ariya/esprima.git' assert expected == normalize_vcs_url(test)
def test_normalize_vcs_url_git_repo_url_without_slash_slash(self): test = '[email protected]/Filirom1/npm2aur.git' expected = 'https://github.com/Filirom1/npm2aur.git' assert expected == normalize_vcs_url(test)
def test_normalize_vcs_url_10(self): test = 'https://github.com/chaijs/chai' expected = 'https://github.com/chaijs/chai' assert expected == normalize_vcs_url(test)
def test_normalize_vcs_url_does_not_fail_on_empty(self): assert None == normalize_vcs_url(None) assert None == normalize_vcs_url('') assert None == normalize_vcs_url(' ')
def test_normalize_vcs_url_13(self): test = '[email protected]:foo/private.git' expected = 'https://gitlab.com/foo/private.git' assert expected == normalize_vcs_url(test)
def test_normalize_vcs_url_12(self): test = 'https://gitlab.com/foo/private.git' expected = 'https://gitlab.com/foo/private.git' assert normalize_vcs_url(test) == expected