def main(): projects = load_projects_json() total_projects = len(projects) count = 0 bugless_count = 0 print 'Found %d Projects' % (total_projects, ) for p in projects: piter = MongoProjectIterator(p.group_id(), p.artifact_id(), fields=[ 'JarMetadata.group_id', 'JarMetadata.artifact_id', 'JarMetadata.version', 'JarMetadata.version_order', 'BugCollection.BugInstance.category', 'BugCollection.BugInstance.type' ]) doc_list = piter.documents_list() proj_array_count = ArrayCount() bug_list = [] count += 1 for d in doc_list: bug_instances = d.get('BugCollection', {}).get('BugInstance', []) if len(bug_instances) == 0: bugless_count += 1 break print '[%d:%d:%d] %s||%s: %d versions' % ( count, total_projects, bugless_count, p.group_id(), p.artifact_id(), len(doc_list)) print "bugless: %d, total: %d" % (bugless_count, total)
def main(): statistics = ArrayCount() for p in load_projects_json(): statistics.incr(p.version_count()) strio = StringIO.StringIO() for (k, v) in statistics.get_series().iteritems(): strio.write(str(k) + "," + str(v) + "\n") save_to_file('version_count.dat', strio.getvalue())
def main(): base_url = '/Users/bkarak/devel/repositories/maven/maven/' col_obj = get_mongo_connection()[MONGO_COL] projects = load_projects_json() total_jars = 0 missing = 0 really_missing = 0 for proj in projects: group_id = proj.group_id().strip() artifact_id = proj.artifact_id().strip() maven_base_url = '%s%s/%s/' % (base_url, group_id.replace('.', '/'), artifact_id) maven_metadata_name = '%smaven-metadata.xml' % (maven_base_url,) if not os.path.exists(maven_metadata_name): continue json_xml = xmldict.parse(open(maven_metadata_name, 'r').read()) versions = json_xml.get('metadata', {}).get('versioning', {}).get('versions', {}).get('version') version_list = [] if isinstance(versions, list): version_list.extend(versions) else: version_list.append(versions) for v in version_list: if v is not None: v = v.strip() docs = get_version(col_obj, group_id, artifact_id, v) total_jars += 1 if len(docs) == 0: missing += 1 sys.stderr.write('[%d]: Missing %s||%s||%s\n' % (total_jars, group_id, artifact_id, v)) local_jar_path = '%s%s/%s-%s.jar' % (maven_base_url, v, artifact_id, v) if not os.path.exists(local_jar_path): sys.stderr.write('[%d]: Invalid Jar: %s||%s||%s\n' % (total_jars, group_id, artifact_id, v)) really_missing += 1 else: if has_classes(local_jar_path): sys.stderr.write('ADDED: Total: %d, Missing: %d (%d)\n' % (total_jars, missing - really_missing, missing)) print "findbugs -textui -xml -output `basename %s`-findbugs.xml %s" % (local_jar_path, local_jar_path) else: really_missing += 1 sys.stderr.write('HAS_NO_CLASSES: %s\n' % (local_jar_path,)) sys.stderr.write('Total: %d, Missing: %d (%d)\n' % (total_jars, missing - really_missing, missing))
def main(): project_list = load_projects_json() project_count = len(project_list) version_list = [x.version_count() for x in project_list] version_count = sum(version_list) version_list = sorted(version_list) print "Projects: %d" % (project_count,) print "Versions (total): %d" % (version_count,) print "Max. Version Count: %d" % (statistics.stat_max(version_list),) print "Min. Version Count: %d" % (statistics.stat_min(version_list),) print "Mean: %.2f" % (statistics.mean(version_list)) print "Median: %d" % (version_list[statistics.median(version_list)]) print "Range: %d" % (statistics.stat_range(version_list)) print "1st Qrt: %d" % (version_list[statistics.first_quartile(version_list)]) print "3rd Qrt: %d" % (version_list[statistics.third_quartile(version_list)])
def main(): projects = load_projects_json() total_projects = len(projects) count = 0 bugless_count = 0 print 'Found %d Projects' % (total_projects,) for p in projects: piter = MongoProjectIterator(p.group_id(), p.artifact_id(), fields=['JarMetadata.group_id', 'JarMetadata.artifact_id', 'JarMetadata.version', 'JarMetadata.version_order', 'BugCollection.BugInstance.category', 'BugCollection.BugInstance.type']) doc_list = piter.documents_list() proj_array_count = ArrayCount() bug_list = [] count += 1 for d in doc_list: bug_instances = d.get('BugCollection', {}).get('BugInstance', []) if len(bug_instances) == 0: bugless_count += 1 break print '[%d:%d:%d] %s||%s: %d versions' % (count, total_projects, bugless_count, p.group_id(), p.artifact_id(), len(doc_list)) print "bugless: %d, total: %d" % (bugless_count, total)
def main(): projects = load_projects_json() valid_projects = [] total = len(projects) valid = 0 counter = 0 for p in projects: counter += 1 key = '%s||%s' % (p.group_id(), p.artifact_id()) piter = MongoProjectIterator(p.group_id(), p.artifact_id(), fields=['JarMetadata.version_order'])\ piter.evolution_list() print '[%d:%d:%d] Checking ... %s' % (counter, valid, total, key), if piter.valid(): valid_projects.append(key) print ' ... Valid (%d versions)' % (len(piter.evolution_list())) valid += 1 else: print ' ... Invalid (%d versions)' % (len(piter.evolution_list())) print 'Total: %d, Valid: %d' % (total, valid) save_to_file('valid_projects.json', json.dumps(valid_projects))
def main(): projects = load_projects_json() results = {} security_bugs = ['HRS_REQUEST_PARAMETER_TO_COOKIE', 'HRS_REQUEST_PARAMETER_TO_HTTP_HEADER', 'PT_ABSOLUTE_PATH_TRAVERSAL', 'SQL_NONCONSTANT_STRING_PASSED_TO_EXECUTE', 'SQL_PREPARED_STATEMENT_GENERATED_FROM_NONCONSTANT_STRING', 'XSS_REQUEST_PARAMETER_TO_JSP_WRITER', 'XSS_REQUEST_PARAMETER_TO_SEND_ERROR', 'XSS_REQUEST_PARAMETER_TO_SERVLET_WRITER'] total_projects = len(projects) count = 0 print 'Found %d Projects' % (total_projects,) for p in projects: piter = MongoProjectIterator(p.group_id(), p.artifact_id(), fields=['JarMetadata.group_id', 'JarMetadata.artifact_id', 'JarMetadata.version', 'JarMetadata.version_order', 'BugCollection.BugInstance.category', 'BugCollection.BugInstance.type', 'BugCollection.BugInstance.Class.classname','BugCollection.BugInstance.Method.name', 'BugCollection.BugInstance.Field.name']) doc_list = piter.documents_list() proj_array_count = ArrayCount() bug_list = [] count += 1 print '[%d:%d] %s||%s: %d versions' % (count, total_projects, p.group_id(), p.artifact_id(), len(doc_list)) for d in doc_list: for bi in d.get('BugCollection', {}).get('BugInstance', []): if not isinstance(bi, dict): #print 'Invalid BugInstance (%s)' % (bi,) continue bug_c = bi.get('category', '') if bug_c == 'SECURITY': bug_type = bi.get('type', None) if bug_type is None: print 'Invalid Type!' continue if bug_type in security_bugs: bug_category = 'SECURITY_HIGH' else: bug_category = 'SECURITY_LOW' else: bug_category = bug_c # create signature signatures_ids = [] classnames = bi['Class'] if isinstance(classnames, list): for c in classnames: signatures_ids.append(c.get('classname', 'NotSet')) elif isinstance(classnames, dict): signatures_ids.append(classnames.get('classname', 'NotSet')) # methods methodnames = bi.get('Method', {}) if isinstance(methodnames, list): for m in methodnames: signatures_ids.append(m.get('name', 'NotSet')) elif isinstance(methodnames, dict): signatures_ids.append(methodnames.get('name', 'NotSet')) # fields fieldnames = bi.get('Field', {}) if isinstance(fieldnames, list): for f in fieldnames: signatures_ids.append(f.get('name', 'NotSet')) elif isinstance(fieldnames, dict): signatures_ids.append(fieldnames.get('name', 'NotSet')) type = bi['type'] signature = '%s||%s||%s' % (bug_category, type, '||'.join(signatures_ids)) # method if signature not in bug_list: bug_list.append(signature) proj_array_count.incr(bug_category) proj_array_count.incr('TOTAL_' + bug_category) print proj_array_count.get_series() results['%s||%s' % (p.group_id(), p.artifact_id())] = proj_array_count.get_series() save_to_file('bug_correlation_counters_full.json', json.dumps(results))
def main(): projects = load_projects_json() results = {} security_bugs = ['HRS_REQUEST_PARAMETER_TO_COOKIE', 'HRS_REQUEST_PARAMETER_TO_HTTP_HEADER', 'SQL_NONCONSTANT_STRING_PASSED_TO_EXECUTE', 'SQL_PREPARED_STATEMENT_GENERATED_FROM_NONCONSTANT_STRING', 'XSS_REQUEST_PARAMETER_TO_JSP_WRITER', 'XSS_REQUEST_PARAMETER_TO_SEND_ERROR', 'XSS_REQUEST_PARAMETER_TO_SERVLET_WRITER'] sql_bugs = {'activemq-all', 'activemq', 'activeobjects', 'cas-workflow', 'ebxmlms', 'efaps-kernel', 'fabric3-binding-ws', 'geotk-metadata-sql', 'jackrabbit-standalone', 'james', 'james-server-mailets', 'jcaptcha-all', 'jdatabaseimport', 'jetty-webapp', 'jonas-jms-manager', 'joram', 'kernel', 'makumba', 'MetaModel', 'nunaliit2-adhocQueries', 'openjms', 'org.openl.rules.eclipse.ui.wizard', 'sandesha2-persistence', 'servicemix-components', 'sesame', 'sonar-application', 'sqltool', 'sqltool-j5', 'squirrel-sql', 'torque', 'transactions-jta', 'ujo-orm', 'xmlui'} xss_bugs = {'activemq-all', 'activemq-web', 'makumba', 'netcdf', 'opendap', 'org.talend.esb.job.console', 'rdfbean-sparql', 'tika-app', 'tuscany-domain-manager', 'tuscany-sca-all', 'webmin', 'WebProxyPortlet', 'whiteboard', 'activemq', 'apacheds', 'avro-tools', 'css-validator', 'dspace-jspui-api', 'dspace-lni-core', 'fabric3-binding-ws', 'force-oauth', 'groovysoap-all-jsr06', 'jackrabbit-standalone', 'jetty-webapp', 'jftp', 'makumba', 'MessAdmin-Core', 'myfaces', 'myfaces-all', 'ocpsoft-pretty-faces', 'org.apache.felix.webconsole', 'org.apache.sling.openidauth', 'org.jbundle.util.webapp.redirect', 'org.talend.esb.job.console', 'pustefix-webservices-jaxws', 'sonar-application', 'vt-ldap'} input_bugs = set() input_bugs |= sql_bugs input_bugs |= xss_bugs total_projects = len(projects) count = 0 print 'Found %d Projects' % (total_projects,) for p in projects: piter = MongoProjectIterator(p.group_id(), p.artifact_id(), fields=['JarMetadata.group_id', 'JarMetadata.artifact_id', 'JarMetadata.version', 'JarMetadata.version_order', 'BugCollection.BugInstance.category', 'BugCollection.BugInstance.type', 'BugCollection.BugInstance.Class.classname','BugCollection.BugInstance.Method.name', 'BugCollection.BugInstance.Field.name']) doc_list = piter.documents_list() proj_array_count = ArrayCount() bug_list = [] count += 1 print '[%d:%d] %s||%s: %d versions' % (count, total_projects, p.group_id(), p.artifact_id(), len(doc_list)) for d in doc_list: for bi in d.get('BugCollection', {}).get('BugInstance', []): if not isinstance(bi, dict): #print 'Invalid BugInstance (%s)' % (bi,) continue bug_c = bi.get('category', '') if bug_c == 'SECURITY': bug_type = bi.get('type', None) if bug_type is None: print 'Invalid Type!' continue if bug_type in security_bugs: if p.artifact_id() in input_bugs: bug_category = 'INPUT_VALIDATION_BUGS' else: continue else: bug_category = 'SECURITY_REST' else: bug_category = bug_c # create signature signatures_ids = [] classnames = bi['Class'] if isinstance(classnames, list): for c in classnames: signatures_ids.append(c.get('classname', 'NotSet')) elif isinstance(classnames, dict): signatures_ids.append(classnames.get('classname', 'NotSet')) # methods methodnames = bi.get('Method', {}) if isinstance(methodnames, list): for m in methodnames: signatures_ids.append(m.get('name', 'NotSet')) elif isinstance(methodnames, dict): signatures_ids.append(methodnames.get('name', 'NotSet')) # fields fieldnames = bi.get('Field', {}) if isinstance(fieldnames, list): for f in fieldnames: signatures_ids.append(f.get('name', 'NotSet')) elif isinstance(fieldnames, dict): signatures_ids.append(fieldnames.get('name', 'NotSet')) type = bi['type'] signature = '%s||%s||%s' % (bug_category, type, '||'.join(signatures_ids)) # method if signature not in bug_list: bug_list.append(signature) proj_array_count.incr(bug_category) proj_array_count.incr('TOTAL_' + bug_category) print proj_array_count.get_series() results['%s||%s' % (p.group_id(), p.artifact_id())] = proj_array_count.get_series() save_to_file('data/bug_correlation_counters_full.json', json.dumps(results))