def __convert_findbugs_xml(url, findbugs_xml): import xmldict, json result_json = json.loads(json.dumps(xmldict.parse(findbugs_xml)).replace('"@','"')) url_arr = get_metadata_from_url(url) # -1: jar, -2: version, -3: artifact_id, -4: group_id _jar_filename = metadata['jar_filename'] _version = metadata['version'] _artifact_id = metadata['artifact_id'] _group_id = metadata['group_id'] # get pom information _pom_url = url.replace('.jar', '.pom') _pom_filename = _jar_filename.replace('.jar', '.pom') log.info('Downloading POM: %s -> %s' % (_pom_url, _pom_filename)) urllib.urlretrieve(_pom_url, _pom_filename) _dependencies = [] if os.path.exists(_pom_filename): try: _pom_json = json.loads(json.dumps(xmldict.parse(open(_pom_filename, 'r').read()))) _dependencies = _pom_json.get('project', {}).get('dependencies', {}).get('dependency', []) except Exception, e: log.warn('Could not download/parse data from %s' % (_pom_filename,)) os.remove(_pom_filename)
def convert_findbugs_xml(findbugs_xml): import xmldict import json xml_data = open(findbugs_xml, "r").read() result_json = json.loads(json.dumps(xmldict.parse(xml_data)).replace('"@', '"')) # -1: jar, -2: version, -3: artifact_id, -4: group_id _full_name = result_json.get("BugCollection", {}).get("Project", {}).get("Jar", None) if _full_name is None: raise Exception("Jar tag is not found (%s)" % (result_json,)) print "Processing ... %s" % (_full_name,) _jar_filename = os.path.basename(_full_name) _pom_filename = _full_name.replace(".jar", ".pom") _metadata_filename = "%s/maven-metadata.xml" % ("/".join(_full_name.split("/")[:-2]),) _dependencies = [] _versions = [] _version = None _artifact_id = None _group_id = None _version_order = 0 if os.path.exists(_pom_filename): try: _pom_json = json.loads(json.dumps(xmldict.parse(open(_pom_filename, "r").read()))) _dependencies = _pom_json.get("project", {}).get("dependencies", {}).get("dependency", []) except Exception, e: print "Could not download/parse data from %s (%s)" % (_pom_filename, e)
def main(): base_url = 'http://mirrors.ibiblio.org/maven2/' fp = open('data/bugless_projects.json', 'r') c = 0 for l in fp: arr = l.strip().split('||') group_id = arr[0] artifact_id = arr[1] c += 1 maven_base_url = '%s%s/%s/' % (base_url, group_id.replace( '.', '/'), artifact_id) maven_metadata_name = '%smaven-metadata.xml' % (maven_base_url, ) local_maven_metadata_name = 'work/maven-metadata.xml.%d' % (c, ) urllib.urlretrieve(maven_metadata_name, local_maven_metadata_name) json_xml = xmldict.parse(open(local_maven_metadata_name, 'r').read()) versions = json_xml.get('metadata', {}).get('versioning', {}).get('versions', {}).get('version') version_list = [] if isinstance(versions, list): version_list.extend(versions) else: version_list.append(versions) for v in version_list: vfile = '%s-%s.jar' % (artifact_id, v) vfile_url = '%s%s/%s' % (maven_base_url, v, vfile) urllib.urlretrieve(vfile_url, 'work/%s' % (vfile, )) print vfile_url fp.close()
def main(): base_url = 'http://mirrors.ibiblio.org/maven2/' fp = open('data/bugless_projects.json', 'r') c = 0 for l in fp: arr = l.strip().split('||') group_id = arr[0] artifact_id = arr[1] c += 1 maven_base_url = '%s%s/%s/' % (base_url, group_id.replace('.', '/'), artifact_id) maven_metadata_name = '%smaven-metadata.xml' % (maven_base_url,) local_maven_metadata_name = 'work/maven-metadata.xml.%d' % (c,) urllib.urlretrieve(maven_metadata_name, local_maven_metadata_name) json_xml = xmldict.parse(open(local_maven_metadata_name, 'r').read()) versions = json_xml.get('metadata', {}).get('versioning', {}).get('versions', {}).get('version') version_list = [] if isinstance(versions, list): version_list.extend(versions) else: version_list.append(versions) for v in version_list: vfile = '%s-%s.jar' % (artifact_id, v) vfile_url = '%s%s/%s' % (maven_base_url, v, vfile) urllib.urlretrieve(vfile_url, 'work/%s' % (vfile,)) print vfile_url fp.close()
def convert_findbugs_xml(findbugs_xml): import xmldict import json xml_data = open(findbugs_xml, 'r').read() result_json = json.loads( json.dumps(xmldict.parse(xml_data)).replace('"@', '"')) # -1: jar, -2: version, -3: artifact_id, -4: group_id _full_name = result_json.get('BugCollection', {}).get('Project', {}).get('Jar', None) if _full_name is None: raise Exception('Jar tag is not found (%s)' % (result_json, )) print 'Processing ... %s' % (_full_name, ) _jar_filename = os.path.basename(_full_name) _pom_filename = _full_name.replace('.jar', '.pom') _metadata_filename = '%s/maven-metadata.xml' % ('/'.join( _full_name.split('/')[:-2]), ) _dependencies = [] _versions = [] _version = None _artifact_id = None _group_id = None _version_order = 0 if os.path.exists(_pom_filename): try: _pom_json = json.loads( json.dumps(xmldict.parse(open(_pom_filename, 'r').read()))) _dependencies = _pom_json.get('project', {}).get('dependencies', {}).get('dependency', []) except Exception, e: print 'Could not download/parse data from %s (%s)' % ( _pom_filename, e)
def main(): base_url = '/Users/bkarak/devel/repositories/maven/maven/' col_obj = get_mongo_connection()[MONGO_COL] projects = load_projects_json() total_jars = 0 missing = 0 really_missing = 0 for proj in projects: group_id = proj.group_id().strip() artifact_id = proj.artifact_id().strip() maven_base_url = '%s%s/%s/' % (base_url, group_id.replace('.', '/'), artifact_id) maven_metadata_name = '%smaven-metadata.xml' % (maven_base_url,) if not os.path.exists(maven_metadata_name): continue json_xml = xmldict.parse(open(maven_metadata_name, 'r').read()) versions = json_xml.get('metadata', {}).get('versioning', {}).get('versions', {}).get('version') version_list = [] if isinstance(versions, list): version_list.extend(versions) else: version_list.append(versions) for v in version_list: if v is not None: v = v.strip() docs = get_version(col_obj, group_id, artifact_id, v) total_jars += 1 if len(docs) == 0: missing += 1 sys.stderr.write('[%d]: Missing %s||%s||%s\n' % (total_jars, group_id, artifact_id, v)) local_jar_path = '%s%s/%s-%s.jar' % (maven_base_url, v, artifact_id, v) if not os.path.exists(local_jar_path): sys.stderr.write('[%d]: Invalid Jar: %s||%s||%s\n' % (total_jars, group_id, artifact_id, v)) really_missing += 1 else: if has_classes(local_jar_path): sys.stderr.write('ADDED: Total: %d, Missing: %d (%d)\n' % (total_jars, missing - really_missing, missing)) print "findbugs -textui -xml -output `basename %s`-findbugs.xml %s" % (local_jar_path, local_jar_path) else: really_missing += 1 sys.stderr.write('HAS_NO_CLASSES: %s\n' % (local_jar_path,)) sys.stderr.write('Total: %d, Missing: %d (%d)\n' % (total_jars, missing - really_missing, missing))
def main(): base_url = 'http://mirrors.ibiblio.org/maven2' col_obj = get_mongo_connection()[MONGO_COL] fp = open('data/missing_versions.txt', 'r') total_jars = 0 missing = 0 really_missing = 0 for line in fp: (group_id, artifact_id) = line.strip().split('||') maven_base_url = '%s/%s/%s/' % (base_url, group_id.replace( '.', '/'), artifact_id) maven_metadata_name = '%smaven-metadata.xml' % (maven_base_url, ) try: fp = urlopen(maven_metadata_name) json_xml = xmldict.parse(fp.read()) versions = json_xml.get('metadata', {}).get('versioning', {}).get('versions', {}).get('version') version_list = [] if isinstance(versions, list): version_list.extend(versions) else: version_list.append(versions) for v in version_list: docs = get_version(col_obj, group_id, artifact_id, v) total_jars += 1 if len(docs) == 0: missing += 1 print '[%d]: Missing %s||%s||%s' % (total_jars, group_id, artifact_id, v) url_jar_path = '/maven2/%s/%s/%s/%s-%s.jar' % ( group_id.replace('.', '/'), artifact_id, v, artifact_id, v) if not url_exists('mirrors.ibiblio.org', url_jar_path): really_missing += 1 print '%s%s' % ('mirrors.ibiblio.org', url_jar_path) except Exception, e: print '[%d] ERROR: %s (%s)' % (total_jars, maven_metadata_name, e) continue
def main(): base_url = 'http://mirrors.ibiblio.org/maven2' col_obj = get_mongo_connection()[MONGO_COL] fp = open('data/missing_versions.txt', 'r') total_jars = 0 missing = 0 really_missing = 0 for line in fp: (group_id, artifact_id) = line.strip().split('||') maven_base_url = '%s/%s/%s/' % (base_url, group_id.replace('.', '/'), artifact_id) maven_metadata_name = '%smaven-metadata.xml' % (maven_base_url,) try: fp = urlopen(maven_metadata_name) json_xml = xmldict.parse(fp.read()) versions = json_xml.get('metadata', {}).get('versioning', {}).get('versions', {}).get('version') version_list = [] if isinstance(versions, list): version_list.extend(versions) else: version_list.append(versions) for v in version_list: docs = get_version(col_obj, group_id, artifact_id, v) total_jars += 1 if len(docs) == 0: missing += 1 print '[%d]: Missing %s||%s||%s' % (total_jars, group_id, artifact_id, v) url_jar_path = '/maven2/%s/%s/%s/%s-%s.jar' % (group_id.replace('.', '/'), artifact_id, v, artifact_id, v) if not url_exists('mirrors.ibiblio.org', url_jar_path): really_missing += 1 print '%s%s' % ('mirrors.ibiblio.org', url_jar_path) except Exception, e: print '[%d] ERROR: %s (%s)' % (total_jars, maven_metadata_name, e) continue
# 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # # # Test Naive conversion of Findbugs XML to JSON (using the xmldict utility) # # Authors: # Vassilios Karakoidas ([email protected]) import xmldict, json findbugs_xml = open('acegi-security-0.5.jar-findbugs.xml', 'r') print json.dumps(xmldict.parse(findbugs_xml)).replace('"@','"')
except Exception, e: log.warn('Could not download/parse data from %s' % (_pom_filename,)) os.remove(_pom_filename) # get xml information _metadata_url = url.replace('%s/%s' % (_version, _jar_filename), 'maven-metadata.xml') _metadata_filename = '%s-metadata.xml' % (_jar_filename,) log.info('Downloading %s -> %s' % (_metadata_url, _metadata_filename)) urllib.urlretrieve(_metadata_url, _metadata_filename) _version_order = 0 if os.path.exists(_metadata_filename): try: _metadata_json = json.loads(json.dumps(xmldict.parse(open(_metadata_filename, 'r').read()))) _versions = _metadata_json.get('metadata', {}).get('versioning', {}).get('versions', {}).get('version', []) if not isinstance(_versions, list): _versions = [_versions] _versions = [x.strip() for x in _versions] try: _version_order = _versions.index(_version.strip()) + 1 except ValueError, ve: log.warn('Could not find version (%s in %s): %s' % (_version, _versions, ve)) except Exception, e: log.warn('Could not parse data from %s: %s' % (_metadata_filename, e)) os.remove(_metadata_filename)
# 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # # # Test Naive conversion of Findbugs XML to JSON (using the xmldict utility) # # Authors: # Vassilios Karakoidas ([email protected]) import xmldict, json findbugs_xml = open('acegi-security-0.5.jar-findbugs.xml', 'r') print json.dumps(xmldict.parse(findbugs_xml)).replace('"@', '"')