Пример #1
0
def extractFunctionData(application_code):
    """
    Extract functions and their meta data
    """
    closure_matched_iter = re.finditer(r"""
        def\s+
        ([\w_]+) # function name
        \s*
        \(
        (.*) # arguments of function
        \)
        (.*?)
        {    # start position of the function span
        """, application_code, re.X)

    no_closure_matched_iter = re.finditer(r"""
        def\s+
        ([\w_]+) # function name
        \s*
        \(
        (.*) # arguments of function
        \)
        (.*?)
        =    # take into account functions that do not have closures
        """, application_code, re.X)

    closure_function_list = []

    for matched_obj in closure_matched_iter:
        if not opt.inComment(matched_obj, application_code):
            function_name = matched_obj.group(1)
            arg_string = matched_obj.group(2)
            arg_array = map(str.strip, arg_string.split(","))
            arg_with_type_array = []
            rdd_type_arg_index = []
            for i in range(len(arg_array)):
            	arg = arg_array[i]
                arg_name, arg_type = map(str.strip, arg.split(":"))
                rdd_flag = hasRDDType(arg_type)
                #append arg name with its properties
                arg_with_type_array.append((arg_name,arg_type,rdd_flag))
                if hasRDDType(arg_type):
                	#appends arg positions of RDD type
                	rdd_type_arg_index.append(i)

            return_type_regex = matched_obj.group(3).strip()
            returnRDDFlag = hasRDDType(return_type_regex)
            regex_span = matched_obj.span()
            function_span = getSpanFromStartPosition(regex_span[1] - 1,application_code)
            num_args = len(arg_array)
            closure_function_list.append([function_name, arg_with_type_array, function_span, returnRDDFlag, num_args, rdd_type_arg_index])

    function_span_list = [func[2] for func in closure_function_list]
    effective_span_list = getEffectiveSpan(function_span_list)

    #reassign function_span to effective_span
    for i in range(len(effective_span_list)):
        closure_function_list[i][2] = effective_span_list[i]

    return closure_function_list
Пример #2
0
def getRDDsFromLoops(loop, rdd_actions, rdd_functions):
	"""
	finds all RDD candidates from loop and returns it as a set
	"""
	comments_span_list = opt.findCommentSpans(loop)
	rdd_set = set()
	non_arg_matched_iter = re.finditer(r'(\w+?)\.(%s)'%rdd_actions, loop, re.S|re.X|re.M)
	for matched_obj in non_arg_matched_iter:
		if not opt.inComment(matched_obj, loop, comments_span_list):
			rddname = matched_obj.group(1)
			rdd_set.add(rddname) 

	arg_matched_iter = re.finditer(r'(%s)\(\s*(\w+?)\s*\)'%rdd_actions, loop, re.S|re.X|re.M)
	for matched_obj in arg_matched_iter:
		if not opt.inComment(matched_obj, loop, comments_span_list):
			rddname = matched_obj.group(2)
			rdd_set.add(rddname) 

	#this is to capture functions defined that are not default RDD functions
	for rdd_func in rdd_functions:
		func_name = rdd_func[0]
		num_args = rdd_func[4]
		arg_pos_array = rdd_func[5]

		num_periods = num_args - 1
		arg_regex_pattern = ""
		for i in range(num_args):
			#adds accordingly number of arg patterns to capture
			arg_regex_pattern += "\s*(\w+?)\s*"
			if i < num_args -1 :
				arg_regex_pattern += ","

		func_arg_matched_iter = re.finditer(r"""
			{0}\s*\({1}\)
			""".format(func_name,arg_regex_pattern) , loop, re.S|re.X|re.M)

		for matched_obj in func_arg_matched_iter:
			if not opt.inComment(matched_obj, loop, comments_span_list):
				#add in the corresponding arguments at their positions
				for arg_pos in arg_pos_array:
					rddname = matched_obj.group(arg_pos+1)
					rdd_set.add(rddname) 

	return rdd_set
Пример #3
0
def getLoopPatternPosition(loop_patterns, application_code, func_spans):
	"""
	Gets the position of loop regex (for/while/do) occurence in code
	"""
	loop_keyword_positions = []
	for keyword in loop_patterns:
		matched_iter = re.finditer(keyword, application_code, re.S)
		for matched_obj in matched_iter:
			if not opt.inComment(matched_obj, application_code) and opt.inFunctionDecl(matched_obj, application_code, func_spans):
				loop_keyword_positions += [matched_obj.span()]
	return loop_keyword_positions
Пример #4
0
def findReassignedRDD(body, pattern_list, comments_span_list):
	"""
	Finds reassigned RDDs in a body of code
	"""
	reassigned_candidates = set()
	matched_iter = re.finditer(r'.*(%s)\s+=\s+\w+' %pattern_list, body, re.S)
	if matched_iter:
		for matched_obj in matched_iter:
			if opt.inComment(matched_obj, body):
				continue
			reassigned_candidates.add(matched_obj.group(1))
	return reassigned_candidates
Пример #5
0
def initBeforeLoop(application_code, rdd, end_limit, func_spans, func_rdd_args):
	"""
	Finds all the rdd var names in the code
	"""
	# Check if the args of the function was one of the candidate
	for rdd_arg in func_rdd_args:
		if rdd_arg == rdd:
			return True

	span_with_limit = opt.spansWithEndLimit(func_spans, end_limit)
	search_region = opt.extractSearchRegion(span_with_limit, application_code)
	comments_span_list = opt.findCommentSpans(search_region)
	rdd_set = set()
	matched_iter = re.finditer(r'(val|var)\s*(%s)\s*?='%rdd, search_region, re.S|re.X|re.M)
	for matched_obj in matched_iter:
		if not opt.inComment (matched_obj, search_region):
			rdd_set.add(matched_obj.group())
	return len(rdd_set) > 0