Exemplo n.º 1
0
def main():
    projects = load_projects_json()
    total_projects = len(projects)
    count = 0
    bugless_count = 0

    print 'Found %d Projects' % (total_projects, )

    for p in projects:
        piter = MongoProjectIterator(p.group_id(),
                                     p.artifact_id(),
                                     fields=[
                                         'JarMetadata.group_id',
                                         'JarMetadata.artifact_id',
                                         'JarMetadata.version',
                                         'JarMetadata.version_order',
                                         'BugCollection.BugInstance.category',
                                         'BugCollection.BugInstance.type'
                                     ])
        doc_list = piter.documents_list()
        proj_array_count = ArrayCount()
        bug_list = []
        count += 1

        for d in doc_list:
            bug_instances = d.get('BugCollection', {}).get('BugInstance', [])
            if len(bug_instances) == 0:
                bugless_count += 1
                break

        print '[%d:%d:%d] %s||%s: %d versions' % (
            count, total_projects, bugless_count, p.group_id(),
            p.artifact_id(), len(doc_list))

    print "bugless: %d, total: %d" % (bugless_count, total)
def main():
    statistics = ArrayCount()

    for p in load_projects_json():
        statistics.incr(p.version_count())

    strio = StringIO.StringIO()

    for (k, v) in statistics.get_series().iteritems():
        strio.write(str(k) + "," + str(v) + "\n")

    save_to_file('version_count.dat', strio.getvalue())
def main():
    base_url = '/Users/bkarak/devel/repositories/maven/maven/'
    col_obj = get_mongo_connection()[MONGO_COL]
    projects = load_projects_json()

    total_jars = 0
    missing = 0
    really_missing = 0

    for proj in projects:
        group_id = proj.group_id().strip()
        artifact_id = proj.artifact_id().strip()
        maven_base_url = '%s%s/%s/' % (base_url, group_id.replace('.', '/'), artifact_id)
        maven_metadata_name = '%smaven-metadata.xml' % (maven_base_url,)

        if not os.path.exists(maven_metadata_name):
            continue

        json_xml = xmldict.parse(open(maven_metadata_name, 'r').read())
        versions = json_xml.get('metadata', {}).get('versioning', {}).get('versions', {}).get('version')
        version_list = []

        if isinstance(versions, list):
            version_list.extend(versions)
        else:
            version_list.append(versions)

        for v in version_list:
            if v is not None:
                v = v.strip()
            docs = get_version(col_obj, group_id, artifact_id, v)
            total_jars += 1

            if len(docs) == 0:
                missing += 1
                sys.stderr.write('[%d]: Missing %s||%s||%s\n' % (total_jars, group_id, artifact_id, v))
                local_jar_path = '%s%s/%s-%s.jar' % (maven_base_url, v, artifact_id, v)

                if not os.path.exists(local_jar_path):
                    sys.stderr.write('[%d]: Invalid Jar: %s||%s||%s\n' % (total_jars, group_id, artifact_id, v))
                    really_missing += 1
                else:
                    if has_classes(local_jar_path):
                        sys.stderr.write('ADDED: Total: %d, Missing: %d (%d)\n' % (total_jars, missing - really_missing, missing))
                        print "findbugs -textui -xml -output `basename %s`-findbugs.xml %s" % (local_jar_path, local_jar_path)
                    else:
                        really_missing += 1
                        sys.stderr.write('HAS_NO_CLASSES: %s\n' % (local_jar_path,))


    sys.stderr.write('Total: %d, Missing: %d (%d)\n' % (total_jars, missing - really_missing, missing))
def main():
    project_list = load_projects_json()

    project_count = len(project_list)
    version_list = [x.version_count() for x in project_list]
    version_count = sum(version_list)

    version_list = sorted(version_list)

    print "Projects: %d" % (project_count,)
    print "Versions (total): %d" % (version_count,)
    print "Max. Version Count: %d" % (statistics.stat_max(version_list),)
    print "Min. Version Count: %d" % (statistics.stat_min(version_list),)
    print "Mean: %.2f" % (statistics.mean(version_list))
    print "Median: %d" % (version_list[statistics.median(version_list)])
    print "Range: %d" % (statistics.stat_range(version_list))
    print "1st Qrt: %d" % (version_list[statistics.first_quartile(version_list)])
    print "3rd Qrt: %d" % (version_list[statistics.third_quartile(version_list)])
def main():
    projects = load_projects_json()
    total_projects = len(projects)
    count = 0
    bugless_count = 0

    print 'Found %d Projects' % (total_projects,)

    for p in projects:
        piter = MongoProjectIterator(p.group_id(), p.artifact_id(), fields=['JarMetadata.group_id', 'JarMetadata.artifact_id', 'JarMetadata.version', 'JarMetadata.version_order', 'BugCollection.BugInstance.category', 'BugCollection.BugInstance.type'])
        doc_list = piter.documents_list()
        proj_array_count = ArrayCount()
        bug_list = []
        count += 1

        for d in doc_list:
        	bug_instances = d.get('BugCollection', {}).get('BugInstance', [])
        	if len(bug_instances) == 0:
        		bugless_count += 1
        		break

        print '[%d:%d:%d] %s||%s: %d versions' % (count, total_projects, bugless_count, p.group_id(), p.artifact_id(), len(doc_list))

    print "bugless: %d, total: %d" % (bugless_count, total)
def main():
    projects = load_projects_json()
    valid_projects = []
    total = len(projects)
    valid = 0
    counter = 0

    for p in projects:
        counter += 1
        key = '%s||%s' % (p.group_id(), p.artifact_id())
        piter = MongoProjectIterator(p.group_id(), p.artifact_id(), fields=['JarMetadata.version_order'])\

        piter.evolution_list()
        print '[%d:%d:%d] Checking ... %s' % (counter, valid, total, key),

        if piter.valid():
            valid_projects.append(key)
            print ' ... Valid (%d versions)' % (len(piter.evolution_list()))
            valid += 1
        else:
            print ' ... Invalid (%d versions)' % (len(piter.evolution_list()))

    print 'Total: %d, Valid: %d' % (total, valid)
    save_to_file('valid_projects.json', json.dumps(valid_projects))
def main():
    projects = load_projects_json()
    results = {}
    security_bugs = ['HRS_REQUEST_PARAMETER_TO_COOKIE',
                     'HRS_REQUEST_PARAMETER_TO_HTTP_HEADER',
                     'PT_ABSOLUTE_PATH_TRAVERSAL',
                     'SQL_NONCONSTANT_STRING_PASSED_TO_EXECUTE',
                     'SQL_PREPARED_STATEMENT_GENERATED_FROM_NONCONSTANT_STRING',
                     'XSS_REQUEST_PARAMETER_TO_JSP_WRITER',
                     'XSS_REQUEST_PARAMETER_TO_SEND_ERROR',
                     'XSS_REQUEST_PARAMETER_TO_SERVLET_WRITER']
    total_projects = len(projects)
    count = 0

    print 'Found %d Projects' % (total_projects,)

    for p in projects:
        piter = MongoProjectIterator(p.group_id(), p.artifact_id(), fields=['JarMetadata.group_id', 'JarMetadata.artifact_id', 'JarMetadata.version', 'JarMetadata.version_order', 'BugCollection.BugInstance.category', 'BugCollection.BugInstance.type', 'BugCollection.BugInstance.Class.classname','BugCollection.BugInstance.Method.name', 'BugCollection.BugInstance.Field.name'])
        doc_list = piter.documents_list()
        proj_array_count = ArrayCount()
        bug_list = []
        count += 1

        print '[%d:%d] %s||%s: %d versions' % (count, total_projects, p.group_id(), p.artifact_id(), len(doc_list))

        for d in doc_list:
            for bi in d.get('BugCollection', {}).get('BugInstance', []):
                if not isinstance(bi, dict):
                    #print 'Invalid BugInstance (%s)' % (bi,)
                    continue

                bug_c = bi.get('category', '')
                if bug_c == 'SECURITY':
                    bug_type = bi.get('type', None)
                    
                    if bug_type is None:
                        print 'Invalid Type!'
                        continue
                        
                    if bug_type in security_bugs:
                        bug_category = 'SECURITY_HIGH'
                    else:
                        bug_category = 'SECURITY_LOW'
                else:
                    bug_category = bug_c
                
                # create signature
                signatures_ids = []
                classnames = bi['Class']

                if isinstance(classnames, list):
                    for c in classnames:
                        signatures_ids.append(c.get('classname', 'NotSet'))
                elif isinstance(classnames, dict):
                    signatures_ids.append(classnames.get('classname', 'NotSet'))

                # methods
                methodnames = bi.get('Method', {})

                if isinstance(methodnames, list):
                    for m in methodnames:
                        signatures_ids.append(m.get('name', 'NotSet'))
                elif isinstance(methodnames, dict):
                    signatures_ids.append(methodnames.get('name', 'NotSet'))

                # fields
                fieldnames = bi.get('Field', {})
                if isinstance(fieldnames, list):
                    for f in fieldnames:
                        signatures_ids.append(f.get('name', 'NotSet'))
                elif isinstance(fieldnames, dict):
                    signatures_ids.append(fieldnames.get('name', 'NotSet'))

                type = bi['type']
                signature = '%s||%s||%s' % (bug_category, type, '||'.join(signatures_ids))

                # method
                if signature not in bug_list:
                    bug_list.append(signature)
                    proj_array_count.incr(bug_category)
                
                proj_array_count.incr('TOTAL_' + bug_category)

        print proj_array_count.get_series()
        results['%s||%s' % (p.group_id(), p.artifact_id())] = proj_array_count.get_series()

    save_to_file('bug_correlation_counters_full.json', json.dumps(results))
def main():
    projects = load_projects_json()
    results = {}
    security_bugs = ['HRS_REQUEST_PARAMETER_TO_COOKIE',
                     'HRS_REQUEST_PARAMETER_TO_HTTP_HEADER',
                     'SQL_NONCONSTANT_STRING_PASSED_TO_EXECUTE',
                     'SQL_PREPARED_STATEMENT_GENERATED_FROM_NONCONSTANT_STRING',
                     'XSS_REQUEST_PARAMETER_TO_JSP_WRITER',
                     'XSS_REQUEST_PARAMETER_TO_SEND_ERROR',
                     'XSS_REQUEST_PARAMETER_TO_SERVLET_WRITER']

    sql_bugs = {'activemq-all', 'activemq', 'activeobjects', 'cas-workflow',
                'ebxmlms', 'efaps-kernel', 'fabric3-binding-ws', 'geotk-metadata-sql',
                'jackrabbit-standalone', 'james', 'james-server-mailets', 'jcaptcha-all',
                'jdatabaseimport', 'jetty-webapp', 'jonas-jms-manager', 'joram', 'kernel',
                'makumba', 'MetaModel', 'nunaliit2-adhocQueries', 'openjms',
                'org.openl.rules.eclipse.ui.wizard', 'sandesha2-persistence',
                'servicemix-components', 'sesame', 'sonar-application', 'sqltool',
                'sqltool-j5', 'squirrel-sql', 'torque', 'transactions-jta',
                'ujo-orm', 'xmlui'}

    xss_bugs = {'activemq-all', 'activemq-web', 'makumba', 'netcdf', 'opendap',
                'org.talend.esb.job.console', 'rdfbean-sparql', 'tika-app',
                'tuscany-domain-manager', 'tuscany-sca-all', 'webmin', 'WebProxyPortlet',
                'whiteboard', 'activemq', 'apacheds', 'avro-tools', 'css-validator',
                'dspace-jspui-api', 'dspace-lni-core', 'fabric3-binding-ws', 'force-oauth',
                'groovysoap-all-jsr06', 'jackrabbit-standalone', 'jetty-webapp', 'jftp',
                'makumba', 'MessAdmin-Core', 'myfaces', 'myfaces-all', 'ocpsoft-pretty-faces',
                'org.apache.felix.webconsole', 'org.apache.sling.openidauth',
                'org.jbundle.util.webapp.redirect', 'org.talend.esb.job.console',
                'pustefix-webservices-jaxws', 'sonar-application', 'vt-ldap'}

    input_bugs = set()
    input_bugs |= sql_bugs
    input_bugs |= xss_bugs

    total_projects = len(projects)
    count = 0

    print 'Found %d Projects' % (total_projects,)

    for p in projects:
        piter = MongoProjectIterator(p.group_id(), p.artifact_id(), fields=['JarMetadata.group_id', 'JarMetadata.artifact_id', 'JarMetadata.version', 'JarMetadata.version_order', 'BugCollection.BugInstance.category', 'BugCollection.BugInstance.type', 'BugCollection.BugInstance.Class.classname','BugCollection.BugInstance.Method.name', 'BugCollection.BugInstance.Field.name'])
        doc_list = piter.documents_list()
        proj_array_count = ArrayCount()
        bug_list = []
        count += 1

        print '[%d:%d] %s||%s: %d versions' % (count, total_projects, p.group_id(), p.artifact_id(), len(doc_list))

        for d in doc_list:
            for bi in d.get('BugCollection', {}).get('BugInstance', []):
                if not isinstance(bi, dict):
                    #print 'Invalid BugInstance (%s)' % (bi,)
                    continue

                bug_c = bi.get('category', '')
                if bug_c == 'SECURITY':
                    bug_type = bi.get('type', None)
                    
                    if bug_type is None:
                        print 'Invalid Type!'
                        continue
                        
                    if bug_type in security_bugs:
                        if p.artifact_id() in input_bugs:
                            bug_category = 'INPUT_VALIDATION_BUGS'
                        else:
                            continue
                    else:
                        bug_category = 'SECURITY_REST'
                else:
                    bug_category = bug_c
                
                # create signature
                signatures_ids = []
                classnames = bi['Class']

                if isinstance(classnames, list):
                    for c in classnames:
                        signatures_ids.append(c.get('classname', 'NotSet'))
                elif isinstance(classnames, dict):
                    signatures_ids.append(classnames.get('classname', 'NotSet'))

                # methods
                methodnames = bi.get('Method', {})

                if isinstance(methodnames, list):
                    for m in methodnames:
                        signatures_ids.append(m.get('name', 'NotSet'))
                elif isinstance(methodnames, dict):
                    signatures_ids.append(methodnames.get('name', 'NotSet'))

                # fields
                fieldnames = bi.get('Field', {})
                if isinstance(fieldnames, list):
                    for f in fieldnames:
                        signatures_ids.append(f.get('name', 'NotSet'))
                elif isinstance(fieldnames, dict):
                    signatures_ids.append(fieldnames.get('name', 'NotSet'))

                type = bi['type']
                signature = '%s||%s||%s' % (bug_category, type, '||'.join(signatures_ids))

                # method
                if signature not in bug_list:
                    bug_list.append(signature)
                    proj_array_count.incr(bug_category)
                
                proj_array_count.incr('TOTAL_' + bug_category)

        print proj_array_count.get_series()
        results['%s||%s' % (p.group_id(), p.artifact_id())] = proj_array_count.get_series()

    save_to_file('data/bug_correlation_counters_full.json', json.dumps(results))