def main(): bot_template = get_template("bot.template") folder = get_folder_path() filter = get_filter() document = requests.get(BASE_URL) bs = BeautifulSoup(document.content) results = [] for h in bs.select("#dev_page_content > h4"): print("------") anchor = h.find(lol1) if not anchor or not anchor.has_attr("name"): continue link = "{base_url}#{anchor}".format(base_url=BASE_URL, anchor=anchor["name"]) title = h.text descr = [] table_type, param_strings = None, None print("title: " + title) print("link: " + link) if filter and title not in filter: print("Skipping {title}, filtered.".format(title=title)) continue # logger.debug(h) type_strings = [] default_returns = [] for sibling in h.next_siblings: if sibling == "\n": continue if sibling.name in ["p", "blockquote"]: if "return" in sibling.text.lower(): parts_splitted = [] is_first_element = True # truein string, for x in sibling.children: if isinstance(x, NavigableString): if is_first_element: # Start of a new sentence => new list parts_splitted.extend([[foo.lstrip()] for foo in x.split(".") ]) is_first_element = False else: # not = in the middle of a sentence => append parts_splitted[len(parts_splitted) - 1].append( x.split(".", maxsplit=1)[0]) parts_splitted.extend( [[foo] for foo in x.split(".")[1:]]) is_first_element = False is_first_element = x.strip().endswith(".") else: obj = None if x.name in ["a", "em"]: obj = x else: obj = x.text # end if if is_first_element: # if it is at the beginning of the sentence. parts_splitted.append([obj]) is_first_element = False else: parts_splitted[len(parts_splitted) - 1].append(obj) # end if # end for # end for returns__ = [] # array of strings return_text__ = [ ] # array if strings. one item = one sentence. Not ending with a dot. is_array = False for lol_part in parts_splitted: has_return = False returns_ = [] return_text_ = "" for lol_part_part in lol_part: if isinstance(lol_part_part, str): return_text_ += lol_part_part if lol_part_part.strip().lower().endswith( "array of"): is_array = True if "return" in lol_part_part.lower(): has_return = True # end if else: # not str return_text_ += lol_part_part.text if is_array: returns_.append("list of " + lol_part_part.text) is_array = False else: returns_.append(lol_part_part.text) # end for if has_return: # append, so we can have multible sentences. return_text__.append(return_text_.strip()) returns__.extend(returns_) # end if # end for if return_text__ or returns__: # finally set it. default_returns = (". ".join(return_text__).strip(), " or ".join(returns__).strip()) # end if # end if descr.append(sibling.text) elif sibling.name == "table": assert sibling.has_attr( "class") and "table" in sibling["class"] table_type, param_strings = parse_table(sibling) elif sibling.name == "h4": break elif sibling.name == "h3": break elif sibling.name == "hr": # end of page break else: print("unknown: " + sibling.name) # end if # end for if not all([link, title, descr]): print("Skipped: Missing link, title or description") continue if not all([table_type, param_strings]): if title not in WHITELISTED_FUNCS: print( "Skipped. Has no table with Parameters or Fields.\n" "Also isn't a whitelisted function in `code_generator_settings.WHITELISTED_FUNCS`." ) continue # -> else: is in WHITELISTED_FUNCS: table_type = "func" # end if descr = "\n".join(descr) print("descr: " + repr(descr)) params_string = "\n".join( param_strings ) if param_strings else None # WHITELISTED_FUNCS have no params if table_type == "func": seems_valid = False if len(default_returns) != 2: if "return" in descr.lower(): default_returns = ["", "Message"] default_returns[0] = [ x for x in descr.split(".") if "return" in x.lower() ][0].strip() seems_valid = len(default_returns[0].split(".")) == 1 default_returns[1] = " or ".join( type_strings) if type_strings else "Message" default_returns[1] = as_types(default_returns[1], "returns") else: default_returns = ("On success, True is returned", "True") # end if "return" in description else: seems_valid = len(default_returns[0].split(".")) == 1 # end if default set if not seems_valid: returns = answer( "Textual description what the function returns", default_returns[0]) return_type = answer("Return type", default_returns[1]) if isinstance(return_type, str): return_type = as_types(return_type, "return type") # end if else: returns = default_returns[0] return_type = default_returns[1] # end if logger.debug("\n") result = func(title, descr, link, params_string, returns=returns, return_type=return_type) results.append(result) elif table_type == "class": if title in CLASS_TYPE_PATHS: parent_clazz = CLASS_TYPE_PATHS[title][ CLASS_TYPE_PATHS__PARENT] print("superclass: " + parent_clazz) else: parent_clazz = answer("Parent class name", "TgBotApiObject") # end if result = clazz(title, parent_clazz, descr, link, params_string) results.append(result) # end if # end for can_quit = False do_overwrite = confirm( "Can the folder {path} be overwritten?".format(path=folder)) print("vvvvvvvvv") while not can_quit: if do_overwrite: try: import Send2Trash Send2Trash.send2trash(folder) except ImportError: import shutil shutil.rmtree(folder) # end try # end if try: safe_to_file(folder, results) print("Writen to file.") except TemplateError as e: if isinstance(e, TemplateSyntaxError): logger.exception("Template error at {file}:{line}".format( file=e.filename, line=e.lineno)) else: logger.exception("Template error.") # end if # end try can_quit = not confirm("Write again after reloading templates?", default=True) print("#########") print("Exit.")
def load_from_html(folder): filter = get_filter() document = requests.get(BASE_URL) bs = BeautifulSoup(document.content) results = [] for h in bs.select("#dev_page_content > h4"): print("------") anchor = h.find(lol1) if not anchor or not anchor.has_attr("name"): continue link = "{base_url}#{anchor}".format(base_url=BASE_URL, anchor=anchor["name"]) title = h.text descr = [] table_type, param_strings = None, None print("title: " + title) print("link: " + link) if filter and title not in filter: print("Skipping {title}, filtered.".format(title=title)) continue # logger.debug(h) type_strings = [] default_returns = [] for sibling in h.next_siblings: if sibling == "\n": continue if sibling.name in ["p", "blockquote"]: if "return" in sibling.text.lower(): parts_splitted = [] is_first_element = True # truein string, for x in sibling.children: if isinstance(x, NavigableString): if is_first_element: # Start of a new sentence => new list parts_splitted.extend([[foo.lstrip()] for foo in x.split(".") ]) is_first_element = False else: # not = in the middle of a sentence => append parts_splitted[len(parts_splitted) - 1].append( x.split(".", maxsplit=1)[0]) parts_splitted.extend( [[foo] for foo in x.split(".")[1:]]) is_first_element = False is_first_element = x.strip().endswith(".") else: obj = None if x.name in ["a", "em"]: obj = x else: obj = x.text # end if if is_first_element: # if it is at the beginning of the sentence. parts_splitted.append([obj]) is_first_element = False else: parts_splitted[len(parts_splitted) - 1].append(obj) # end if # end for # end for returns__ = [] # array of strings return_text__ = [ ] # array if strings. one item = one sentence. Not ending with a dot. is_array = False for lol_part in parts_splitted: has_return = False returns_ = [] return_text_ = "" for lol_part_part in lol_part: if isinstance(lol_part_part, str): return_text_ += lol_part_part if lol_part_part.strip().lower().endswith( "array of"): is_array = True if "return" in lol_part_part.lower(): has_return = True # end if else: # not str return_text_ += lol_part_part.text if is_array: returns_.append("list of " + lol_part_part.text) is_array = False else: returns_.append(lol_part_part.text) # end for if has_return: # append, so we can have multible sentences. return_text__.append(return_text_.strip()) returns__.extend(returns_) # end if # end for if return_text__ or returns__: # finally set it. default_returns = [ ". ".join(return_text__).strip(), " or ".join(returns__).strip() ] # end if # end if descr.append(sibling.text.replace('“', '"').replace('”', '"')) elif sibling.name == "table": assert sibling.has_attr( "class") and "table" in sibling["class"] table_type, param_strings = parse_table(sibling) elif sibling.name == "h4": break elif sibling.name == "h3": break elif sibling.name == "hr": # end of page break else: print("unknown: " + sibling.name) # end if # end for if not all([link, title, descr]): print("Skipped: Missing link, title or description") continue if not all([table_type, param_strings]): if title not in WHITELISTED_FUNCS: print( "Skipped. Has no table with Parameters or Fields.\n" "Also isn't a whitelisted function in `code_generator_settings.WHITELISTED_FUNCS`." ) continue # -> else: is in WHITELISTED_FUNCS: table_type = "func" # end if descr = "\n".join(descr) print("descr: " + repr(descr)) params_string = "\n".join( param_strings ) if param_strings else None # WHITELISTED_FUNCS have no params if table_type == "func": seems_valid = False if len(default_returns) != 2: if "return" in descr.lower(): default_returns = ["", "Message"] default_returns[0] = [ x for x in descr.split(".") if "return" in x.lower() ][0].strip() seems_valid = len(default_returns[0].split(".")) == 1 default_returns[1] = " or ".join( type_strings) if type_strings else "Message" default_returns[1] = as_types(default_returns[1], "returns") else: default_returns = ["On success, True is returned", "True"] # end if "return" in description else: seems_valid = len(default_returns[0].split(".")) == 1 # end if default set replaced_valid = None # load replacements from WHITELISTED_FUNCS. if title in WHITELISTED_FUNCS: # "func": {'return': {'expected': '', 'replace': ''}, 'rtype': {'expected': '', 'replace': ''}}, wlist_func = WHITELISTED_FUNCS[title] wlist_func_return = wlist_func[ 'return'] if 'return' in wlist_func else None wlist_func_r_type = wlist_func[ 'r_type'] if 'r_type' in wlist_func else None if wlist_func_return and default_returns[ 0] != wlist_func_return['expected']: print( f"whitelist: Mismatch in return. Expected {wlist_func_return['expected']!r}, got {default_returns[0]!r}." ) replaced_valid = False if wlist_func_r_type and default_returns[ 1] != wlist_func_r_type['expected']: print( f"whitelist: Mismatch in r_type. Expected {wlist_func_r_type['expected']!r}, got {default_returns[1]!r}" ) replaced_valid = False if replaced_valid is None: # whitelist didn't fail replaced_valid = True print("the found return: " + repr(default_returns[0]) + '.') print("the found r_type: " + repr(default_returns[1]) + '.') print("whitelist return: " + repr(wlist_func_return['replace']) + '.') print("whitelist r_type: " + repr(wlist_func_r_type['replace']) + '.') default_returns[0] = wlist_func_return['replace'] default_returns[1] = wlist_func_r_type['replace'] if not seems_valid and not replaced_valid: returns = answer( "Textual description what the function returns", default_returns[0]) return_type = answer("Return type", default_returns[1]) if isinstance(return_type, str): return_type = as_types(return_type, "return type") # end if else: returns = default_returns[0] return_type = default_returns[1] # end if logger.debug("\n") result = func(title, descr, link, params_string, returns=returns, return_type=return_type) results.append(result) elif table_type == "class": if title in CLASS_TYPE_PATHS: parent_clazz = CLASS_TYPE_PATHS[title][ CLASS_TYPE_PATHS__PARENT] print("superclass: " + parent_clazz) else: parent_clazz = answer("Parent class name", "TgBotApiObject") # end if result = clazz(title, parent_clazz, descr, link, params_string) results.append(result) # end if # end for return results, document.content
def main(): bot_template = get_template("bot.template") folder = get_folder_path() filter = get_filter() document = requests.get(BASE_URL) bs = BeautifulSoup(document.content) results = [] for h in bs.select("#dev_page_content > h4"): print("------") anchor = h.find(lol1) if not anchor or not anchor.has_attr("name"): continue link = "{base_url}#{anchor}".format(base_url=BASE_URL, anchor=anchor["name"]) title = h.text descr = [] table_type, param_strings = None, None print("title: " + title) print("link: " + link) if filter and title not in filter: print("Skipping {title}, filtered.".format(title=title)) continue # logger.debug(h) type_strings = [] default_returns = [] for sibling in h.next_siblings: if sibling == "\n": continue if sibling.name in ["p", "blockquote"]: if "return" in sibling.text.lower(): parts_splitted = [] is_first_element = True # truein string, for x in sibling.children: if isinstance(x, NavigableString): if is_first_element: # Start of a new sentence => new list parts_splitted.extend([[foo.lstrip()] for foo in x.split(".")]) is_first_element = False else: # not = in the middle of a sentence => append parts_splitted[len(parts_splitted)-1].append(x.split(".", maxsplit=1)[0]) parts_splitted.extend([[foo] for foo in x.split(".")[1:]]) is_first_element = False is_first_element = x.strip().endswith(".") else: obj = None if x.name in ["a", "em"]: obj = x else: obj = x.text # end if if is_first_element: # if it is at the beginning of the sentence. parts_splitted.append([obj]) is_first_element = False else: parts_splitted[len(parts_splitted)-1].append(obj) # end if # end for # end for returns__ = [] # array of strings return_text__ = [] # array if strings. one item = one sentence. Not ending with a dot. is_array = False for lol_part in parts_splitted: has_return = False returns_ = [] return_text_ = "" for lol_part_part in lol_part: if isinstance(lol_part_part, str): return_text_ += lol_part_part if lol_part_part.strip().lower().endswith("array of"): is_array = True if "return" in lol_part_part.lower(): has_return = True # end if else: # not str return_text_ += lol_part_part.text if is_array: returns_.append("list of " + lol_part_part.text) is_array = False else: returns_.append(lol_part_part.text) # end for if has_return: # append, so we can have multible sentences. return_text__.append(return_text_.strip()) returns__.extend(returns_) # end if # end for if return_text__ or returns__: # finally set it. default_returns = (". ".join(return_text__).strip(), " or ".join(returns__).strip()) # end if # end if descr.append(sibling.text) elif sibling.name == "table": assert sibling.has_attr("class") and "table" in sibling["class"] table_type, param_strings = parse_table(sibling) elif sibling.name == "h4": break elif sibling.name == "h3": break elif sibling.name == "hr": # end of page break else: print("unknown: " + sibling.name) # end if # end for if not all([link, title, descr]): print("Skipped: Missing link, title or description") continue if not all([table_type, param_strings]): if title not in WHITELISTED_FUNCS: print("Skipped. Has no table with Parameters or Fields.\n" "Also isn't a whitelisted function in `code_generator_settings.WHITELISTED_FUNCS`.") continue # -> else: is in WHITELISTED_FUNCS: table_type = "func" # end if descr = "\n".join(descr) print("descr: " + repr(descr)) params_string = "\n".join(param_strings) if param_strings else None # WHITELISTED_FUNCS have no params if table_type == "func": seems_valid = False if len(default_returns) != 2: if "return" in descr.lower(): default_returns = ["", "Message"] default_returns[0] = [x for x in descr.split(".") if "return" in x.lower()][0].strip() seems_valid = len(default_returns[0].split(".")) == 1 default_returns[1] = " or ".join(type_strings) if type_strings else "Message" default_returns[1] = as_types(default_returns[1], "returns") else: default_returns = ("On success, True is returned", "True") # end if "return" in description else: seems_valid = len(default_returns[0].split(".")) == 1 # end if default set if not seems_valid: returns = answer("Textual description what the function returns", default_returns[0]) return_type = answer("Return type", default_returns[1]) if isinstance(return_type, str): return_type = as_types(return_type, "return type") # end if else: returns = default_returns[0] return_type = default_returns[1] # end if logger.debug("\n") result = func(title, descr, link, params_string, returns=returns, return_type=return_type) results.append(result) elif table_type == "class": if title in CLASS_TYPE_PATHS: parent_clazz = CLASS_TYPE_PATHS[title][CLASS_TYPE_PATHS__PARENT] print("superclass: " + parent_clazz) else: parent_clazz = answer("Parent class name", "TgBotApiObject") # end if result = clazz(title, parent_clazz, descr, link, params_string) results.append(result) # end if # end for can_quit = False do_overwrite = confirm("Can the folder {path} be overwritten?".format(path=folder)) print("vvvvvvvvv") while not can_quit: if do_overwrite: try: import Send2Trash Send2Trash.send2trash(folder) except ImportError: import shutil shutil.rmtree(folder) # end try # end if try: safe_to_file(folder, results) with open(path_join(folder, "api.html"), "wb") as f: f.write(document.content) # end if print("Writen to file.") except TemplateError as e: if isinstance(e, TemplateSyntaxError): logger.exception("Template error at {file}:{line}".format(file=e.filename, line=e.lineno)) else: logger.exception("Template error.") # end if # end try can_quit = not confirm("Write again after reloading templates?", default=True) print("#########") print("Exit.")