Пример #1
0
def process_diff_srcml(data,type):
	
	data = data.replace("shuangyinhao","")
	splits = data.split("\n")
	del splits[0]
	new_splits = list()
	for split in splits:
		if split:
			if split[0] == "+" or split[0] == "-":
				new_splits.append(split[1:])
			else:
				new_splits.append(split)
	new_data = "\n".join(new_splits)
	if type == 1:
		lang = "cs"
	else:
		lang = "java"
	name = "Temp." + lang
	with open(name,"w") as f:
		f.write(new_data)


	command = "srcml " + name + " > Temp.xml"
	os.system(command)

	tree = parse_tree("Temp.xml")
	root  = tree.getroot()
	parent_map = get_parent_map(tree)
	tree_str = ""
	processed_code = iterate_recursive(root,tree_str,parent_map)

	processed_code = process_srcml_source_code(processed_code)
	return processed_code
Пример #2
0
def process_diff_srcml(data, type):

    data = data.replace("shuangyinhao", "")
    splits = data.split("\n")
    del splits[0]
    new_splits = list()
    for split in splits:
        if split:
            if split[0] == "+" or split[0] == "-":
                new_splits.append(split[1:])
            else:
                new_splits.append(split)
    new_data = "\n".join(new_splits)
    if type == 1:
        lang = "cs"
    else:
        lang = "java"
    name = "Temp." + lang
    with open(name, "w") as f:
        f.write(new_data)

    command = "srcml " + name + " > Temp.xml"
    os.system(command)

    tree = parse_tree("Temp.xml")
    root = tree.getroot()
    parent_map = get_parent_map(tree)
    tree_str = ""
    processed_code = iterate_recursive(root, tree_str, parent_map)

    processed_code = process_srcml_source_code(processed_code)
    return processed_code
Пример #3
0
def process_expression(data, type):
    if type == 1:
        lang = "cs"
    else:
        lang = "java"
    name = "Temp." + lang
    with open(name, "w") as f:
        f.write(data)

    command = "srcml " + name + " > Temp.xml"
    os.system(command)

    tree = parse_tree("Temp.xml")
    root = tree.getroot()
    parent_map = get_parent_map(tree)
    tree_str = ""
    processed_code = iterate_recursive(root, tree_str, parent_map)
    processed_code = processed_code.split(" ")
    removed_empty = [x.strip() for x in processed_code if x.strip()]

    processed_code = " ".join(removed_empty)
    return processed_code
Пример #4
0
def pre_process(file_path):

	

	split = file_path.split("/")
	try:
		tree = parse_tree(file_path)
		file_type = 0
		if split[7] == "cs":
			file_type = 1

		parent_map = get_parent_map(tree)
		tree_str = ""
		root  = tree.getroot()
		# C#
		# if file_type == 1:

			
		# 	namespace = find_name_space(root)
		# 	biggest_block = find_block(namespace)
		# 	processed_code =  iterate_recursive(biggest_block,tree_str,parent_map)

		# # Java
		# else:
		processed_code = iterate_recursive(root,tree_str,parent_map)
		processed_code = process_srcml_source_code(processed_code)
		# print(processed_code)
		srcml_data_path = os.path.join(CURRENT_DIR,"SRCML_PROCESSED_DATA_SPLIT_CAMEL_ALL_V3",split[6],split[7])
		if not os.path.exists(srcml_data_path):
			os.makedirs(srcml_data_path)
		
		new_path = os.path.join(CURRENT_DIR,"SRCML_PROCESSED_DATA_SPLIT_CAMEL_ALL_V3",split[6],split[7],split[8])

		with codecs.open(new_path,"a",encoding="utf-8", errors="ignore") as f2:
			f2.write(processed_code)

	except Exception as e:
		print(e)
Пример #5
0
def process_expression(data,type):
	if type == 1:
		lang = "cs"
	else:
		lang = "java"
	name = "Temp." + lang
	with open(name,"w") as f:
		f.write(data)


	command = "srcml " + name + " > Temp.xml"
	os.system(command)

	tree = parse_tree("Temp.xml")
	root  = tree.getroot()
	parent_map = get_parent_map(tree)
	tree_str = ""
	processed_code = iterate_recursive(root,tree_str,parent_map)
	processed_code = processed_code.split(" ")
	removed_empty = [x.strip() for x in processed_code if x.strip()]

	processed_code = " ".join(removed_empty)
	return processed_code
Пример #6
0
                java_paths.append(file_path)

    for cs_path in cs_paths:

        for java_path in java_paths:

            cs_splits = cs_path.split("/")
            java_splits = java_path.split("/")
            cs_file = cs_splits[8]
            java_file = java_splits[8]
            if cs_file.split(".")[0] == java_file.split(".")[0]:
                print "###################"
                print cs_file
                try:
                    cs_tree = parse_tree(cs_path)
                    cs_parent_map = get_parent_map(cs_tree)
                    cs_tree_str = ""
                    cs_root = cs_tree.getroot()

                    java_tree = parse_tree(java_path)
                    java_parent_map = get_parent_map(java_tree)
                    java_tree_str = ""
                    java_root = java_tree.getroot()

                    cs_class_node = get_class_node_cs(cs_root)
                    java_class_node = get_class_node_java(java_root)

                    biggest_block_cs = None
                    biggest_block_java = None

                    for c in cs_class_node.getchildren():
            split = file_path.split("/")
            if split[7] == "cs":
                cs_paths.append(file_path)

    for cs_path in cs_paths:
        try:
            print(
                "---------------------------------------------------------------------------------"
            )
            print(cs_path)
            cs_splits = cs_path.split("/")
            cs_file = cs_splits[8]
            cs_parent_map, cs_global_vars_mapping, cs_package_object_mapping, cs_object_method_mapping, cs_third_party_package_object_mapping_list, cs_third_party_object_method_mapping_list = get_necessary_information_to_process_source_code(
                cs_path, "cs", project)
            cs_tree = parse_tree(cs_path)
            cs_parent_map = get_parent_map(cs_tree)
            cs_tree_str = ""
            cs_root = cs_tree.getroot()
            cs_class_node = get_class_node_cs(cs_root)
            biggest_block_cs = None
            for c in cs_class_node.getchildren():
                tag = c.tag.replace(STUPID_URL, "")
                if tag == "block":
                    biggest_block_cs = c

            biggest_block = find_biggest_block(cs_class_node)
            decl_stmts_global = get_all_decl_stmt_from_block_global(
                biggest_block)
            global_vars_mapping = get_information_of_decl_stmts(
                decl_stmts_global)
        print("Index : " + str(count) +
              "-------------------------------------------------------------")

        if count > 0:
            try:
                # java_paths.append(file_path)
                # print("---------------------------------------------------------------------------------")
                print(java_path)

                java_splits = java_path.split("\\")

                java_parent_map, java_global_vars_mapping = get_necessary_information_to_process_source_code(
                    java_path, lang, project)

                java_tree = parse_tree(java_path)
                java_parent_map = get_parent_map(java_tree)
                java_tree_str = ""
                java_root = java_tree.getroot()

                imports = extract_all_import(java_root, lang)
                # print(imports)

                candidate_sdk_packages = get_candidate_sdk_packages_from_import_list(
                    imports, lang)
                # print(candidate_sdk_packages)

                java_class_node = get_class_node_java(java_root)
                biggest_block_java = None
                for j in java_class_node.getchildren():
                    tag = j.tag.replace(STUPID_URL, "")
                    if tag == "block":
Пример #9
0
project = "openjdk-8_temp"
lang = "java"
java_signatures = list()
signature_path = "./SIGNATURE_DATA/" + project
print "Signature path : " + signature_path
for r, ds, files in os.walk(
        os.path.join(CURRENT_DIR, "SRCML_DATA_2", lang, project)):
    for file in files:

        file_path = os.path.join(r, file)
        print "Parsing file : " + file_path + " --------------------------"

        try:
            tree = parse_tree(file_path)
            root = tree.getroot()
            parent_map = get_parent_map(tree)

            decorations = ["class", "interface"]
            for decoration in decorations:
                java_class_nodes = list()
                java_class_nodes = iterate_to_get_node_with_type(
                    root, decoration, java_class_nodes)

                biggest_block_java = None
                class_name = None
                if len(java_class_nodes) != 0:
                    # print java_class_nodes[0].text
                    for c in java_class_nodes[0].getchildren():
                        tag = c.tag.replace(STUPID_URL, "")
                        if tag == "block":
                            biggest_block_java = c
	for r,ds,files in os.walk(os.path.join(CURRENT_DIR,"SRCML_DATA_" + str(i),lang)):
		for file in files:
			cs_path = os.path.join(r,file)
			split = cs_path.split("/")
			count = count + 1
			print("Index : " + str(count) + "-------------------------------------------------------------")
		
			if count > 1791377:
				try:
					# print("---------------------------------------------------------------------------------")
					print(cs_path)
					cs_splits = cs_path.split("/")
					cs_file = cs_splits[8]
					cs_parent_map, cs_global_vars_mapping = get_necessary_information_to_process_source_code(cs_path, lang, project)
					cs_tree = parse_tree(cs_path)
					cs_parent_map = get_parent_map(cs_tree)
					cs_tree_str = ""
					cs_root  = cs_tree.getroot()

					imports = extract_all_import(cs_root,lang)
					# print(imports)
					candidate_sdk_packages = get_candidate_sdk_packages_from_import_list(imports,lang)
					# print(candidate_sdk_packages)

					cs_class_node = get_class_node_cs(cs_root)
					biggest_block_cs = None
					for c in cs_class_node.getchildren():
						tag = c.tag.replace(STUPID_URL,"")
						if tag == "block":
							biggest_block_cs = c
	for r,ds,files in os.walk(os.path.join(CURRENT_DIR,"SRCML_DATA",project)):
		for file in files:
			file_path = os.path.join(r,file)
			split = file_path.split("/")
			if split[7] == "java":
				java_paths.append(file_path)

	for java_path in java_paths:
		try:
			print("--------------------")
			print(java_path)
			java_splits = java_path.split("/")
			java_file = java_splits[8]
			java_parent_map, java_global_vars_mapping, java_package_object_mapping, java_object_method_mapping, java_third_party_package_object_mapping_list, java_third_party_object_method_mapping_list = get_necessary_information_to_process_source_code(java_path, "java", project)
			java_tree = parse_tree(java_path)
			java_parent_map = get_parent_map(java_tree)
			java_tree_str = ""
			java_root  = java_tree.getroot()

			
			java_class_node = get_class_node_java(java_root)
			biggest_block_java = None
			for j in java_class_node.getchildren():
				tag = j.tag.replace(STUPID_URL,"")
				if tag == "block":
					biggest_block_java = j


			biggest_block = find_biggest_block(java_class_node)
			decl_stmts_global = get_all_decl_stmt_from_block_global(biggest_block)
			global_vars_mapping = get_information_of_decl_stmts(decl_stmts_global)
project = "corefx"
lang = "cs"
signature_path = "./SIGNATURE_DATA/" + project
print "Signature path : " + signature_path

for r,ds,files in os.walk(os.path.join(CURRENT_DIR,"SRCML_DATA_2",lang,project)):
	for file in files:
	
		file_path = os.path.join(r, file)
		print "Parsing file : " + file_path + " --------------------------"
		
	
		try:
			tree = parse_tree(file_path)
			root  = tree.getroot()
			parent_map = get_parent_map(tree)
			
			decorations = ["class","interface","struct"]
			for decoration in decorations:
				cs_class_nodes = list()
				cs_class_nodes = iterate_to_get_node_with_type(root,decoration,cs_class_nodes)

				biggest_block_cs = None
				class_name = None
			
				if len(cs_class_nodes) != 0:
					for c in cs_class_nodes[0].getchildren():
						tag = c.tag.replace(STUPID_URL,"")
						if tag == "block":
							biggest_block_cs = c
						if tag == "name":

	for cs_path in cs_paths:
	
		for java_path in java_paths:

			cs_splits = cs_path.split("/")
			java_splits = java_path.split("/")
			cs_file = cs_splits[8]
			java_file = java_splits[8]
			if cs_file.split(".")[0] == java_file.split(".")[0]:
				print "###################"
				print cs_file
				try: 
					cs_tree = parse_tree(cs_path)
					cs_parent_map = get_parent_map(cs_tree)
					cs_tree_str = ""
					cs_root  = cs_tree.getroot()

					java_tree = parse_tree(java_path)
					java_parent_map = get_parent_map(java_tree)
					java_tree_str = ""
					java_root  = java_tree.getroot()

					cs_class_node = get_class_node_cs(cs_root)
					java_class_node = get_class_node_java(java_root)
					
					biggest_block_cs = None
					biggest_block_java = None

					for c in cs_class_node.getchildren():