def url_to_resource_sequence(e: Operation, resources: list): counter = dict() for r in RESOURCE_TYPES: counter[r] = 1 # Remove unknown resources from the beginning of the url if resources: to_remove = [] for i, r in enumerate(reversed(resources)): if r.resource_type in [UNKNOWN_PARAM_RESOURCE] and i + 1 != len(resources) \ and (not r.is_param or not ParamUtils.is_necessary_param(r.name)): to_remove.append(r) else: break for r in to_remove: resources.remove(r) # Remove unwanted resources resources = list( filter( lambda r: r.resource_type not in [VERSION_RESOURCE, ALL_RESOURCE], resources)) ret = [e.verb] if e.operation_id: e.operation_id = e.operation_id.replace("post", "") e.operation_id = ParamUtils.normalize(e.operation_id) if " by " in e.operation_id: e.operation_id = e.operation_id[:e.operation_id.index(" by ")] words = e.operation_id.split() if len(words) > 1 and is_verb(words[0]): ret.append("OperationID") # ret.append(str("|")) for rs in resources: if rs.resource_type == SINGLETON: rs_coll_id = '{}_{}'.format(COLLECTION, counter[COLLECTION]) rs_id = '{}_{}'.format(SINGLETON, counter[SINGLETON]) counter[COLLECTION] += 1 ret.append(rs_coll_id) ret.append(rs_id) rs.ids = [rs_coll_id, rs_id] else: id = '{}_{}'.format(rs.resource_type, counter[rs.resource_type]) ret.append(id) rs.ids = [id] counter[rs.resource_type] += 1 if len(ret) == 1: return None, resources ret = [str(i) for i in ret] return " ".join(ret), resources
def extract_resources(e: Operation): """ returns a dictionary containing pairs of resources and their ids :param e: :return: """ url, base_path = PathUtils.remove_non_informative_segments( e.url, e.base_path) segments = PathUtils.extract_segments(url) skip = False ret = [] for i in reversed(range(0, len(segments))): if skip: skip = False continue current, previous = segments[i], None if i == 0 else segments[i - 1] resource = Resource(name=current, resource_type=UNKNOWN_RESOURCE) tagged = nlp.pos_tag(resource.name) resource.resource_type = __resource_type(previous, current, tagged[0][1], e.url) resource.is_param = ParamUtils.is_param(current) if resource.is_param: resource.param = __find_param(current[1:-1], e.params) if resource.resource_type == SINGLETON: resource.name = previous skip = True elif resource.is_param: resource.name = current[1:-1] ret.append(resource) reversed(ret) for seg in base_path.split('/'): if seg: if is_noun(seg): ret.append(Resource(name=seg, resource_type=BASE_NOUN_RESOURCE)) elif is_verb(seg): ret.append(Resource(name=seg, resource_type=BASE_VERB_RESOURCE)) return ret
def to_expression(e: Operation, templates: list, post_editing=False): resources = extract_resources(e) _, resources = Templatetizer.url_to_resource_sequence(e, resources) template = Templatetizer.__best_template(templates, resources) if e.operation_id: words = ParamUtils.normalize(e.operation_id).split() if len(words) > 1 and is_verb(words[0]): template = template.replace( "OperationID", ParamUtils.normalize(e.operation_id)) # if len(resources) == 1 and "SingleResource" in template: # template = template.replace("SingleResource", ParamUtils.normalize(resources[0].name)) for rs in resources: if len(rs.ids) > 1: rs_coll_id = rs.ids[0] rs_id = rs.ids[1] if rs_coll_id: template = template.replace(rs_coll_id, str(rs.name)) if rs_id: template = template.replace(rs_id, str(rs.param.name)) else: rs_id = rs.ids template = template.replace(rs_id[0], rs.name) tokens = template.replace(" << ", " <<").replace(" >>", ">>").split(" ") ret = [] for t in tokens: if not t.startswith("<<") and not Resource.is_resource_identifier( t): ret.append(ParamUtils.normalize(t)) else: ret.append(t) ret = " ".join(ret).replace("<<", "<< ").replace(">>", " >>").strip() if post_editing: ret = remove_extra_params(ret) ret = remove_dangling_words(ret) ret = edit_grammar(ret) ret = append_parameters(ret, resources) return ret.replace(" get ", " ")
def translate_collection(method, resources, sample_values): if method not in ['get', 'post', 'delete']: return None if len(resources) != 1: return None # if ':' in resources[0].name: # //v1/{name}:setDefault # return None if resources[0].resource_type == ACTION_RESOURCE: ret = ParamUtils.normalize(resources[0].name) for w in ret.split(): if not ParamUtils.is_necessary_param(w): return None if not ret or not is_verb(ret.split()[0]) or ret.endswith('ing') or ret.endswith('s'): return None return ret if resources[0].resource_type != COLLECTION: return None resource = ParamUtils.normalize(resources[0].name) for key in ["get ", "set ", "create ", "put ", "delete "]: if resource.startswith(key): resource = resource[len(key):] if is_singular(resource) or ' ' in resource: ret = 'get the {}' else: ret = 'get the list of {}' if method == 'post': resource = singular(resource) ret = 'create a {}' elif method == 'delete': ret = 'delete all {}' return ret.format(resource)
def generate_template(e: Operation, resources: dict): if not resources: return e.canonical_expr replst = list() if e.operation_id: e.operation_id = e.operation_id.replace("post", "") e.operation_id = ParamUtils.normalize(e.operation_id) if " by " in e.operation_id: e.operation_id = e.operation_id[:e.operation_id.index(" by ")] words = e.operation_id.split() if len(words) > 1 and is_verb(words[0]): e.operation_id = ParamUtils.normalize(e.operation_id) if e.operation_id.startswith( "add") and "new" not in e.operation_id: e.operation_id = "add a new" + e.operation_id[3:] replst.append((e.operation_id.strip(), "OperationID")) # if len(resources) == 1: # res = resources[0] # if res.resource_type not in [COLLECTION, SINGLETON] and not res.is_param: # replst.append((ParamUtils.normalize(res.name), "SingleResource")) can_expr = e.canonical_expr.lower() for r in resources: for rpl in Templatetizer.__replacements(r): replst.append((rpl[0], rpl[1])) replst.sort(key=lambda s: len(s[0]), reverse=True) can_expr = " {} ".format(can_expr.lower()) for rpl in replst: if " {} ".format(rpl[0]) in can_expr: can_expr = can_expr.replace(" {} ".format(rpl[0]), " {} ".format(rpl[1])) return re.sub(r'\s+', ' ', can_expr).strip() # Remove extra spaces
def __resource_type(previous, segment, current_tag, url): """ :return: is singleton, Collection, sub-Collection """ current = segment is_param = ParamUtils.is_param(current) if is_param: current = current[1:-1] current = ParamUtils.normalize(current) if not is_param: if current.startswith("by"): return FILTER_RESOURCE if current.startswith("search") or current.endswith("search") or current.startswith( "query") or current.endswith("query"): return SEARCH_RESOURCE if "count" == current: return COUNT_RESOURCE if "all" == current: return ALL_RESOURCE if ParamUtils.is_authentication(current): return AUTH_RESOURCE if current in {"swagger", "yaml"}: return SWAGGER_RESOURCE if current in {'pdf', 'json', 'xml', 'txt', 'doc', 'docx', 'jpeg', 'jpg', 'gif', 'png', 'xls', 'tsv', 'csv', 'fmw'}: return FILE_EXTENSION_RESOURCE if is_param and current in {"format"}: return FILE_EXTENSION_RESOURCE if is_param and previous: if current in previous: return SINGLETON if (current_tag.startswith('NNS') or is_plural(previous)) and ParamUtils.is_identifier(current): return SINGLETON if (current.endswith('name') or current.endswith('type')) and "{}.".format(segment) not in url: return SINGLETON if singular(previous) in current: return SINGLETON if editdistance.eval(current, previous) / (len(current) + len(previous)) < 0.4: return SINGLETON if current_tag.startswith('NNS') or is_plural(current): return COLLECTION if current_tag.startswith('jj') or is_adjective(current) or \ current.endswith('ed') or (current_tag.startswith('VB') and current.startswith('is')): return ATTRIBUTE_RESOURCE if (current_tag.startswith('VB') or is_verb(current)) and not is_param: return ACTION_RESOURCE words = current.split() if len(words) > 1 and is_verb(words[0]) and not is_param: return METHOD_NAME_RESOURCE if is_param: return UNKNOWN_PARAM_RESOURCE if ParamUtils.is_version(current): return VERSION_RESOURCE return UNKNOWN_RESOURCE