def _parse_type_hierarchy(self, jtype, type_binding, type_code_element): supertypes = [type_code_element.fqn] super_class = type_binding.getSuperclass() if super_class != None: (_, fqn) = clean_java_name(super_class.getQualifiedName()) supertypes.append(fqn) for interface in type_binding.getInterfaces(): (_, fqn) = clean_java_name(interface.getQualifiedName()) supertypes.append(fqn) # Save hierarchy for further processing if len(supertypes) > 1: self.hierarchies.append(supertypes)
def _parse_method(self, method, container_code_element): # method header method_binding = method.resolveBinding() if not self._is_private(method_binding): java_element = method_binding.getJavaElement() simple_name = method_binding.getName() (_, fqn) = clean_java_name( method_binding.getDeclaringClass().getQualifiedName()) fqn = fqn + '.' + simple_name parameters = method_binding.getParameterTypes() parameter_declarations = method.parameters() params_length = len(parameters) (return_simple_name, return_fqn) = clean_java_name( method_binding.getReturnType().getQualifiedName()) deprecated = method_binding.isDeprecated() method_code_element = MethodElement( codebase=self.codebase, kind=self.method_kind, simple_name=simple_name, fqn=fqn, parameters_length=params_length, eclipse_handle=java_element.getHandleIdentifier(), return_simple_name=return_simple_name, return_fqn=return_fqn, parser=JAVA_PARSER, deprecated=deprecated) # method container method_code_element.save() method_code_element.containers.add(container_code_element) # parse parameters for i, parameter in enumerate(parameters): (type_simple_name, type_fqn) = clean_java_name(parameter.getQualifiedName()) simple_name = fqn = \ parameter_declarations[i].getName().getIdentifier() parameter_code_element = ParameterElement( codebase=self.codebase, kind=self.method_parameter_kind, simple_name=simple_name, fqn=fqn, type_simple_name=type_simple_name, type_fqn=type_fqn, index=i, attcontainer=method_code_element, parser=JAVA_PARSER) parameter_code_element.save()
def _parse_type(self, jtype, container_code_element): type_binding = jtype.resolveBinding() java_element = type_binding.getJavaElement() (simple_name, fqn) = clean_java_name(type_binding.getQualifiedName()) deprecated = type_binding.isDeprecated() type_code_element = CodeElement(codebase=self.codebase, simple_name=simple_name, fqn=fqn, eclipse_handle=java_element.getHandleIdentifier(), parser=JAVA_PARSER, deprecated=deprecated) node_type = jtype.getNodeType() if node_type == self.annotation_type: type_code_element.kind = self.annotation_kind elif node_type == self.enumeration_type: type_code_element.kind = self.enumeration_kind else: type_code_element.kind = self.class_kind type_code_element.save() type_code_element.containers.add(container_code_element) self._parse_type_members(jtype, type_code_element) self._parse_type_hierarchy(jtype, type_binding, type_code_element)
def _parse_type(self, jtype, container_code_element): type_binding = jtype.resolveBinding() java_element = type_binding.getJavaElement() (simple_name, fqn) = clean_java_name(type_binding.getQualifiedName()) deprecated = type_binding.isDeprecated() type_code_element = CodeElement( codebase=self.codebase, simple_name=simple_name, fqn=fqn, eclipse_handle=java_element.getHandleIdentifier(), parser=JAVA_PARSER, deprecated=deprecated) node_type = jtype.getNodeType() if node_type == self.annotation_type: type_code_element.kind = self.annotation_kind elif node_type == self.enumeration_type: type_code_element.kind = self.enumeration_kind else: type_code_element.kind = self.class_kind type_code_element.save() type_code_element.containers.add(container_code_element) self._parse_type_members(jtype, type_code_element) self._parse_type_hierarchy(jtype, type_binding, type_code_element)
def _link_all_references(self, unknown_refs, ucount, progress_monitor): skipped = 0 class_tuples = [] progress_monitor.start('Parsing all unknown refs', ucount) for reference in unknown_refs: content = su.safe_strip(reference.content) if content is None or content == '': progress_monitor.info('Empty {0}'.format(reference.pk)) progress_monitor.work('Empty {0}'.format(reference.pk), 1) skipped += 1 (simple, fqn) = je.clean_java_name(je.get_clean_name(content)) prefix = '{0}{1}'.format(PREFIX_GENERIC_LINKER, cu.get_codebase_key(self.codebase)) code_elements = cu.get_value( prefix, simple, gl.get_any_code_element, [simple, self.codebase]) classified_elements = self._classify_code_elements(code_elements) class_tuples.append((reference, simple, fqn) + classified_elements) count = self._process_tuples(class_tuples, progress_monitor) progress_monitor.info('Associated {0} elements, Skipped {1} elements' .format(count, skipped)) progress_monitor.done()
def _compute_match(self, actual_params, formal_params): matches = 0 size = len(actual_params) if size != formal_params.count(): matches = 0 else: actuals = [ je.clean_java_name(actual_param)[0] for actual_param in actual_params ] formals = [ formal_param.type_simple_name for formal_param in formal_params ] for (actual, formal) in zip(actuals, formals): similarity = su.pairwise_simil(actual.lower(), formal.lower()) if similarity >= self.PARAM_SIMILARITY_THRESHOLD: matches += 1 # We don't want to far half-matches methods because this is too # fragile. if (float(matches) / float(size)) < self.PARAM_SIZE_THRESHOLD: matches = 0 return matches
def _link_all_references(self, unknown_refs, ucount, progress_monitor): skipped = 0 class_tuples = [] progress_monitor.start('Parsing all unknown refs', ucount) for reference in unknown_refs: content = su.safe_strip(reference.content) if content is None or content == '': progress_monitor.info('Empty {0}'.format(reference.pk)) progress_monitor.work('Empty {0}'.format(reference.pk), 1) skipped += 1 (simple, fqn) = je.clean_java_name(je.get_clean_name(content)) prefix = '{0}{1}'.format(PREFIX_GENERIC_LINKER, cu.get_codebase_key(self.codebase)) code_elements = cu.get_value(prefix, simple, gl.get_any_code_element, [simple, self.codebase]) classified_elements = self._classify_code_elements(code_elements) class_tuples.append((reference, simple, fqn) + classified_elements) count = self._process_tuples(class_tuples, progress_monitor) progress_monitor.info( 'Associated {0} elements, Skipped {1} elements'.format( count, skipped)) progress_monitor.done()
def _parse_method(self, method, container_code_element): # method header method_binding = method.resolveBinding() if not self._is_private(method_binding): java_element = method_binding.getJavaElement() simple_name = method_binding.getName() (_, fqn) = clean_java_name( method_binding.getDeclaringClass().getQualifiedName()) fqn = fqn + '.' + simple_name parameters = method_binding.getParameterTypes() parameter_declarations = method.parameters() params_length = len(parameters) (return_simple_name, return_fqn) = clean_java_name( method_binding.getReturnType().getQualifiedName()) deprecated = method_binding.isDeprecated() method_code_element = MethodElement(codebase=self.codebase, kind=self.method_kind, simple_name=simple_name, fqn=fqn, parameters_length=params_length, eclipse_handle=java_element.getHandleIdentifier(), return_simple_name=return_simple_name, return_fqn=return_fqn, parser=JAVA_PARSER, deprecated=deprecated) # method container method_code_element.save() method_code_element.containers.add(container_code_element) # parse parameters for i, parameter in enumerate(parameters): (type_simple_name, type_fqn) = clean_java_name( parameter.getQualifiedName()) simple_name = fqn = \ parameter_declarations[i].getName().getIdentifier() parameter_code_element = ParameterElement( codebase=self.codebase, kind=self.method_parameter_kind, simple_name=simple_name, fqn=fqn, type_simple_name=type_simple_name, type_fqn=type_fqn, index=i, attcontainer=method_code_element, parser=JAVA_PARSER) parameter_code_element.save()
def _get_potentials(self, potentials, fqn_container): new_potentials = [] (imm_simple, imm_fqn) = je.clean_java_name(fqn_container) imm_simple = imm_simple.lower() imm_fqn = imm_fqn.lower() for potential in potentials: (simple, fqn) = je.clean_java_name(get_container(potential).fqn) simple = simple.lower() fqn = fqn.lower() if imm_simple != imm_fqn: if fqn == imm_fqn: new_potentials.append(potential) elif simple == imm_simple: new_potentials.append(potential) return new_potentials
def _get_method_header(self, match): fqn_container = None groupdict = match.groupdict() method_name = groupdict['method_name'] if 'target' in groupdict: target = groupdict['target'] if target is not None and len(target.strip()) > 0: fqn_container = je.clean_java_name(groupdict['target'])[1] return (method_name, fqn_container)
def filter(self, filter_input): element_name = filter_input.element_name potentials = filter_input.potentials scode_reference = filter_input.scode_reference (simple, fqn) = je.clean_java_name(element_name, True) result = custom_filter(self, potentials, scode_reference, simple, fqn) return result
def compute_filters(codebase): filters = CodeElementFilter.objects.filter(codebase=codebase).all() simple_filters = defaultdict(list) for cfilter in filters: simple_name = clean_java_name(cfilter.fqn)[0].lower() simple_filters[simple_name].append(cfilter) fqn_filters = {cfilter.fqn.lower(): cfilter for cfilter in filters} return (simple_filters, fqn_filters)
def test_java_name(self): to_test = [ ('java.lang.String', 'java.lang.String', 'String'), ('String', 'String', 'String'), ('p1.Foo$Fa', 'p1.Foo.Fa', 'Fa'), ('p1.Foo$Fa<p2.String,int>', 'p1.Foo.Fa', 'Fa'), ('p1.Bar[[]]', 'p1.Bar', 'Bar'), ] for (original, fqn, simple) in to_test: (simple2, fqn2) = je.clean_java_name(original) self.assertEqual(simple, simple2) self.assertEqual(fqn, fqn2)
def _parse_enumeration_value(self, value, container_code_element): field_binding = value.resolveVariable() if not self._is_private(field_binding): java_element = field_binding.getJavaElement() simple_name = value.getName().getIdentifier() (_, fqn) = clean_java_name( field_binding.getDeclaringClass().getQualifiedName()) fqn = fqn + '.' + simple_name (type_simple_name, type_fqn) = clean_java_name( field_binding.getType().getQualifiedName()) field_code_element = FieldElement(codebase=self.codebase, kind=self.enumeration_value_kind, simple_name=simple_name, fqn=fqn, eclipse_handle=java_element.getHandleIdentifier(), type_simple_name=type_simple_name, type_fqn=type_fqn, parser=JAVA_PARSER) field_code_element.save() field_code_element.containers.add(container_code_element)
def _get_potentials_by_similarity(self, potentials, fqn_container): new_potentials = [] max_similarity = 0.0 (container_simple, _) = je.clean_java_name(fqn_container) container_tokens = [ token.lower() for token in su.tokenize(container_simple) ] container_simple_lower = container_simple.lower() similarities = [] for potential in potentials: (simple, _) = je.clean_java_name(get_container(potential).fqn) potential_tokens = [token.lower() for token in su.tokenize(simple)] simple_lower = simple.lower() common_token = self._get_common_token_ratio( container_tokens, potential_tokens) psimilarity = su.pairwise_simil(container_simple_lower, simple_lower) # This is the minimum required by this filter: if common_token == 0.0 or psimilarity < self.PAIRWISE_THRESHOLD: continue similarity = max(common_token, psimilarity) if similarity > max_similarity: max_similarity = similarity similarities.append((potential, similarity)) # Only keep the elements that match the threshold # Or accept elements that are fuzzily near the max_similarity if max_similarity < self.HIGH_SIMILARITY: max_similarity = max_similarity - self.DIFFERENCE_THRESHOLD for (potential, similarity) in similarities: if similarity >= max_similarity: new_potentials.append(potential) return new_potentials
def _get_potentials_by_similarity(self, potentials, fqn_container): new_potentials = [] max_similarity = 0.0 (container_simple, _) = je.clean_java_name(fqn_container) container_tokens = [token.lower() for token in su.tokenize(container_simple)] container_simple_lower = container_simple.lower() similarities = [] for potential in potentials: (simple, _) = je.clean_java_name(get_container(potential).fqn) potential_tokens = [token.lower() for token in su.tokenize(simple)] simple_lower = simple.lower() common_token = self._get_common_token_ratio(container_tokens, potential_tokens) psimilarity = su.pairwise_simil(container_simple_lower, simple_lower) # This is the minimum required by this filter: if common_token == 0.0 or psimilarity < self.PAIRWISE_THRESHOLD: continue similarity = max(common_token, psimilarity) if similarity > max_similarity: max_similarity = similarity similarities.append((potential, similarity)) # Only keep the elements that match the threshold # Or accept elements that are fuzzily near the max_similarity if max_similarity < self.HIGH_SIMILARITY: max_similarity = max_similarity - self.DIFFERENCE_THRESHOLD for (potential, similarity) in similarities: if similarity >= max_similarity: new_potentials.append(potential) return new_potentials
def _parse_annotation_field(self, field, container_code_element): method_binding = field.resolveBinding() if not self._is_private(method_binding): java_element = method_binding.getJavaElement() simple_name = field.getName().getIdentifier() (_, fqn) = clean_java_name( method_binding.getDeclaringClass().getQualifiedName()) fqn = fqn + '.' + simple_name (type_simple_name, type_fqn) = clean_java_name( method_binding.getReturnType().getQualifiedName()) field_code_element = FieldElement(codebase=self.codebase, kind=self.annotation_field_kind, simple_name=simple_name, fqn=fqn, eclipse_handle=java_element.getHandleIdentifier(), type_simple_name=type_simple_name, type_fqn=type_fqn, attcontainer=container_code_element, parser=JAVA_PARSER) field_code_element.save() field_code_element.containers.add(container_code_element)
def _parse_enumeration_value(self, value, container_code_element): field_binding = value.resolveVariable() if not self._is_private(field_binding): java_element = field_binding.getJavaElement() simple_name = value.getName().getIdentifier() (_, fqn) = clean_java_name( field_binding.getDeclaringClass().getQualifiedName()) fqn = fqn + '.' + simple_name (type_simple_name, type_fqn) = clean_java_name( field_binding.getType().getQualifiedName()) field_code_element = FieldElement( codebase=self.codebase, kind=self.enumeration_value_kind, simple_name=simple_name, fqn=fqn, eclipse_handle=java_element.getHandleIdentifier(), type_simple_name=type_simple_name, type_fqn=type_fqn, parser=JAVA_PARSER) field_code_element.save() field_code_element.containers.add(container_code_element)
def filter(self, filter_input): fqn_container = filter_input.fqn_container potentials = filter_input.potentials scode_reference = filter_input.scode_reference result = FilterResult(self, False, potentials) if fqn_container is not None and fqn_container != '': (simple, fqn) = je.clean_java_name(fqn_container, True) result = custom_filter(self, potentials, scode_reference, simple, fqn) return result
def _parse_annotation_field(self, field, container_code_element): method_binding = field.resolveBinding() if not self._is_private(method_binding): java_element = method_binding.getJavaElement() simple_name = field.getName().getIdentifier() (_, fqn) = clean_java_name( method_binding.getDeclaringClass().getQualifiedName()) fqn = fqn + '.' + simple_name (type_simple_name, type_fqn) = clean_java_name( method_binding.getReturnType().getQualifiedName()) field_code_element = FieldElement( codebase=self.codebase, kind=self.annotation_field_kind, simple_name=simple_name, fqn=fqn, eclipse_handle=java_element.getHandleIdentifier(), type_simple_name=type_simple_name, type_fqn=type_fqn, attcontainer=container_code_element, parser=JAVA_PARSER) field_code_element.save() field_code_element.containers.add(container_code_element)
def _get_field_name(self, scode_reference): field_name = fqn_container = None if scode_reference.snippet != None: parts = scode_reference.content.split(HANDLE_SEPARATOR) (field_name, fqn_container) = \ self._get_field_name_from_snippet(parts) else: content = je.get_clean_name(scode_reference.content) (field_name, fqn) = je.clean_java_name(content) if fqn != field_name: fqn = je.get_package_name(fqn) fqn_container = je.clean_potential_annotation(fqn) return (su.safe_strip(field_name), su.safe_strip(fqn_container))
def _get_potentials(self, potentials, fqn_container): new_potentials = [] (imm_simple, imm_fqn) = je.clean_java_name(fqn_container) imm_simple = imm_simple.lower() imm_fqn = imm_fqn.lower() for potential in potentials: container = get_container(potential) if container is None: continue hierarchy = ctx.get_hierarchy(container) for element in hierarchy: (simple, fqn) = je.clean_java_name(element.fqn) simple = simple.lower() fqn = fqn.lower() if imm_simple != imm_fqn: if fqn == imm_fqn: new_potentials.append(potential) break elif simple == imm_simple: new_potentials.append(potential) break return new_potentials
def _compute_match(self, actual_params, formal_params): matches = 0 size = len(actual_params) if size != formal_params.count(): matches = 0 else: actuals = [je.clean_java_name(actual_param)[0] for actual_param in actual_params] formals = [formal_param.type_simple_name for formal_param in formal_params] for (actual, formal) in zip(actuals, formals): similarity = su.pairwise_simil(actual.lower(), formal.lower()) if similarity >= self.PARAM_SIMILARITY_THRESHOLD: matches += 1 # We don't want to far half-matches methods because this is too # fragile. if (float(matches) / float(size)) < self.PARAM_SIZE_THRESHOLD: matches = 0 return matches