Exemplo n.º 1
0
def updateGoldStandardJsonWithIds(gsCrawl):
	resultJson = os.path.join(gsCrawl, RESULT_JSON)
	if not os.path.exists(resultJson):
		print("Result JSON not found at : {0}".format(resultJson))
		return None
	resultJsonData = importJson(resultJson)

	gsJson = os.path.join(gsCrawl, 'gs', GS_JSON_NAME)
	if not os.path.exists(gsJson):
		print("GS JSON not found at : {0}".format(gsJson))
		return None
	gsJsonData = importJson(gsJson)
	if(gsJsonData == None):
		print("Error Import Gold Standard Json")
		return
	states = resultJsonData['states']
	gsStates = gsJsonData['states']
	noId = False
	for state in states:
		if state in gsStates:
			if 'id' not in gsStates[state]:
				noId = True
				gsStates[state]['id'] = states[state]['id']

	if noId:
		shutil.copy2(gsJson, os.path.join(gsCrawl, 'gs', 'gsResults_noId.json'))
		with open(gsJson, "w") as write_file:
			json.dump(gsJsonData, write_file)
Exemplo n.º 2
0
def getNewDiscoveries(jsonData, oldGsCrawl, crawl):
	newDiscoveries = []
	gsJson = os.path.join(oldGsCrawl, 'gs', GS_JSON_NAME)
	if not os.path.exists(gsJson):
		print("GS JSON not found at : {0}".format(gsJson))
		return None
	gsJsonData = importJson(gsJson)
	allGsBins = getAllBins(gsJsonData['states'])
	allCrawlBins = getAllBins(jsonData['states'])
	binRepresentatives = getBinRepresentatives(jsonData['states'])
	for crawlBin in allCrawlBins:
		if crawlBin == "":
			continue
		if crawlBin not in allGsBins:
			newDiscoveries.append({crawlBin:binRepresentatives[crawlBin]})

	newDiscoveriesPath = os.path.join(crawl, 'comp_output', 'newDiscoveries.json')
	try:
		with open(newDiscoveriesPath, 'w') as write_file:
			json.dump(newDiscoveries, write_file)
	except Exception as ex:
		print("Could not wirte new Discoveries file to {0}".format(newDiscoveriesPath))
		print(ex)

	return len(newDiscoveries)
Exemplo n.º 3
0
def testGetCoverage():
	gsJson =importJson("src/main/resources/GoldStandards/petclinic/crawl-petclinic-60min/gs/gsResults.json")
	states = gsJson['states']
	print(getNumBins(states))
Exemplo n.º 4
0
def testGetSAF():
	testConfigData = importJson("../ALLCRAWLS/out/petclinic/petclinic_DEFAULT_-1.0_60mins/localhost/crawl0/config.json")
	getSAF(testConfigData)
Exemplo n.º 5
0
def analyze(gsCrawl, crawl, appName, thresholdEntry, preDefinedSaveJsonLocation=None):
	status = None
	gsJson = os.path.join(gsCrawl, 'gs', GS_JSON_NAME)
	if not os.path.exists(gsJson):
		print("GS JSON not found at : {0}".format(gsJson))
		status = STATUS_JSON_NOT_FOUND
		return status, None
	gsJsonData = importJson(gsJson)
	saveJsonName = getCrawlFolderName(crawl) + "_" + VERIFIED_CLASSIFICATION_JSON_NAME
	verifClassificationJson = os.path.join(crawl, 'comp_output', VERIFIED_CLASSIFICATION_JSON_NAME)
	HTML_OUTPUT_PATH = os.path.join(os.path.abspath(crawl), "html_classification")

	if (not os.path.exists(verifClassificationJson)) and (preDefinedSaveJsonLocation!=None) and (os.path.exists(HTML_OUTPUT_PATH)):
		saveJsons = find(saveJsonName, preDefinedSaveJsonLocation)
		if len(saveJsons) > 0:
			saveJson = saveJsons[0]
			try:
				shutil.copy2(saveJson, verifClassificationJson)
				shutil.move(saveJson, saveJson + "_backup")
				print("Moved classification_verified.json to {0}".format(verifClassificationJson))
			except Exception as ex:
				print(ex)
				print("Could not move classification_verified to the right location.")

	if not os.path.exists(verifClassificationJson):
		classificationJson = os.path.join(crawl, 'comp_output', GENERATED_CLASSIFICATION_JSON_NAME)
		if not os.path.exists(classificationJson):
			print("classification json not found {0}".format(classificationJson))
			done = createClassificationJson(gsCrawl, crawl, appName)
			status= STATUS_CREATED_CLASSIFICATION_HTML
			return status,None
		jsonData = importJson(classificationJson)
		OUTPUT_HTML_NAME = "classification.html"
		TITLE = "Classification HTML"
		
		preLoadedBins = getAllBins(gsJsonData['states'])
		singleCrawlOutput(crawl, HTML_OUTPUT_PATH, OUTPUT_HTML_NAME, TITLE, outputJson = jsonData, saveJsonName = saveJsonName, preLoadedBins = preLoadedBins )
		status = STATUS_CREATED_CLASSIFICATION_HTML
		return status, None

	else:
		jsonData = importJson(verifClassificationJson)

		resultJson = os.path.join(crawl, RESULT_JSON)
		if not os.path.exists(resultJson):
			print("Result JSON not found at : {0}".format(resultJson))
			status = STATUS_JSON_NOT_FOUND
			return status,None

		configJson = os.path.join(crawl, CONFIG_JSON)
		if not os.path.exists(configJson):
			print("Config JSON not found at : {0}".format(configJson))
			status = STATUS_JSON_NOT_FOUND
			return status,None

		resultJsonData = importJson(resultJson)
		configJsonData = importJson(configJson)
		
		if 'newBins' in jsonData:
			toAdd, errored = addToGoldStandard(jsonData['newBins'], gsCrawl, crawl, gsJsonData, jsonData)
			if len(errored) >0:
				print("Some bins could not be added to the Gold Standard. Find out why ")

				
			toAddStates = toAdd['states']
			toAddPairs = toAdd['pairs']
			if len(toAddPairs) != 0:
				newStatesJson = gsJsonData['states']
				newPairsJson = gsJsonData['pairs']
				for state in toAddStates:
					newStatesJson[state] = toAddStates[state]

				for pair in toAddPairs:
					newPairsJson.append(pair)

				newGSJsonData = {'states' : newStatesJson, 'pairs': newPairsJson}
				writeNewGoldStandardJsonAndHtml(newGSJsonData, gsCrawl)
				status = STATUS_GS_UPDATED
				return status,None

		stats = getStats(jsonData, gsJsonData, resultJsonData, configJsonData, crawl, thresholdEntry)

		if os.path.exists(os.path.abspath(gsCrawl)+UNALTERED_GS_TAG):
			newDiscoveries = getNewDiscoveries(jsonData, os.path.abspath(gsCrawl)  + UNALTERED_GS_TAG, crawl)
			stats["newDiscoveries"] = newDiscoveries
		else:
			stats["newDiscoveries"] = 0

		print(stats)
		status = STATUS_STATS_CREATED
		return status, stats
Exemplo n.º 6
0
def testNormalizeHyst():
	pDiffRawCSVs = find('*Hyst-raw.csv', '/Test/gt10/')
	totalNormalized = 0
	totalUncalculated = 0
	totalupdated = 0
	totalSynced = 0
	appsToRerun=[]
	appsNormalized= []
	allZeroApps = []
	for pDiffRawCSV in pDiffRawCSVs:
	# pDiffRawCSV = "/Test/gt10/windowsvc.com/crawl0/comp_output/crawl0-VISUAL-PDiff-raw.csv"
	# if pDiffRawCSV !=None:
		print(pDiffRawCSV)
		path, file = os.path.split(pDiffRawCSV)
		folders = splitPathIntoFolders(pDiffRawCSV)
		#print(folders)
		appName = folders[2]
		crawl = folders[1]
		pixelSizeJson = os.path.join(os.path.abspath(path), '', 'pixelSizes.json')
		pixelSizes = importJson(pixelSizeJson)
		pDiffPairs = getAllPairsFromCSV(pDiffRawCSV)
		normalized, uncalculated, raw, needToRerun, allZero, pDiffPairsNormalized = normalizePDiff(pDiffPairs, pixelSizes)
		totalNormalized += normalized
		totalUncalculated+= uncalculated
		if needToRerun:
			appsToRerun.append(appName)
		if raw:
			appsNormalized.append(appName)
			fieldnames = getFieldNames(pDiffRawCSV)
			dst = os.path.join(path, '', crawl+'-VISUAL-Hyst-normalized.csv')
			writeNormalizedCSV(pDiffPairsNormalized, fieldnames, dst)
			# try:
			# 	testdb = '/Test/DS.db'
			# 	connectToDB(testdb)
			# 	updatedPairs, ignoredPairs, sameValuePairs, errorPairs = updateDB(pDiffPairsNormalized, appName, crawl, str(ALGOS.VISUAL_PDIFF).split('.')[1])
			# 	totalupdated += updatedPairs
			# 	totalSynced += updatedPairs
			# 	totalSynced += sameValuePairs
			# 	print("Updated : {0}, Ignored not present in db: {1}, Ignored same Value : {2}, Errored : {3}  db records".format(updatedPairs, ignoredPairs, sameValuePairs, errorPairs))
			# except Exception as ex:
			# 	print(e)
			# 	print("Encountered exception while updating records")
			# finally:
			# 	closeDBConnection()
		if allZero:
			allZeroApps.append(appName)


	print("Noramlized total {0} Hyst distances from {1} apps".format(totalNormalized, len(pDiffRawCSVs)))
	print("Found total {0} uncalculated Hyst distances from {1} apps".format(totalUncalculated, len(pDiffRawCSVs)))

	print("appsToRerun : {0}" + str(len(appsToRerun)))
	print("rawApps : " + str(len(appsNormalized)))
	print("allZeroApps : " + str(len(allZeroApps)))
	print("intersection of appsToRerun and allZero. : " + str(len(list(set(appsToRerun) & set(allZeroApps)))))
	
	# print("Updated total {0} db records from {1} csvs".format(totalupdated, len(updatedCSVs)))
	# print("Synced total {0} db records from {1} csvs".format(totalSynced, len(updatedCSVs)))

	with open("Output.txt", "w") as text_file:
		for app in appsToRerun:
			print(app, file=text_file)
Exemplo n.º 7
0
def singleCrawlOutput(
        CRAWL_PATH,
        OUTPUT_PATH,
        OUTPUT_HTML_NAME,
        TITLE,
        outputJson=None,
        saveJsonName="gsResults.json",
        preLoadedBins=None,
        overwrite=None,
        RESOURCES=os.path.join(os.path.abspath("../HTMLStuff/resources/"), ''),
        TEMPLATE_DIR=os.path.join(os.path.abspath("../HTMLStuff/"), ''),
        TEMPLATE_HTML="gsTemplate.html"):

    TEMPLATE_HTML_PATH = os.path.abspath(
        os.path.join(TEMPLATE_DIR, TEMPLATE_HTML))
    IMAGES = "images"
    SCREENSHOTS = "screenshots"
    CRAWL_RESULT_JSON_FILE = os.path.abspath(
        os.path.join(CRAWL_PATH, "result.json"))

    outputPathConfirmed, OUTPUT_PATH = confirmOutputPath(OUTPUT_PATH,
                                                         RESOURCES,
                                                         overwrite=overwrite)
    if not outputPathConfirmed:
        # print("USAGE : program <DB_PATH> <DB_NAME> <CRAWL_PATH> <OUTPUT_PATH> <OUTPUT_HTML_NAME> <NUMBEROFPAIRS>")
        return

    if outputJson != None:
        outputJsonString = json.dumps(outputJson)
        output_html_doc(TEMPLATE_DIR,
                        TEMPLATE_HTML,
                        os.path.join(OUTPUT_PATH, OUTPUT_HTML_NAME),
                        outputJsonString,
                        TITLE,
                        saveJsonName=saveJsonName,
                        preLoadedBins=preLoadedBins)

        return

    resultJson = importJson(CRAWL_RESULT_JSON_FILE)
    states = resultJson['states']
    outputJson = {'states': {}, 'pairs': []}
    sortedStates = sorted(states.items(), key=lambda x: x[1]['id'])

    for state1 in sortedStates:
        # print(state1[0])
        i = state1[1]['id']
        stateOutput = {}
        stateOutput['name'] = state1[1]['name']
        stateOutput['bin'] = ""
        stateOutput['clones'] = []
        stateOutput['id'] = i
        stateOutput['url'] = state1[1]['url']
        if 'timeAdded' in state1[1]:
            stateOutput['timeAdded'] = state1[1]['timeAdded']
        else:
            stateOutput['timeAdded'] = i

        if 'url' in state1[1]:
            stateOutput['url'] = state1[1]['url']

        # print(stateOutput)
        outputJson['states'][state1[0]] = stateOutput
        for state2 in sortedStates:
            j = state2[1]['id']
            if (j <= i):
                continue
            pair = {}
            pair['state1'] = state1[0]
            pair['state2'] = state2[0]
            pair['response'] = -1
            pair['inferred'] = 0
            pair['tags'] = []
            pair['comments'] = ""
            outputJson['pairs'].append(pair)
            # print(pair)

    outputJsonString = json.dumps(outputJson)
    print(outputJsonString)
    output_html_doc(TEMPLATE_DIR,
                    TEMPLATE_HTML,
                    os.path.join(OUTPUT_PATH, OUTPUT_HTML_NAME),
                    outputJsonString,
                    TITLE,
                    saveJsonName=saveJsonName,
                    preLoadedBins=preLoadedBins)