def __init__(self): HTMLParser.__init__(self) self.buf = [] self.last_text = [] self.hide_output = False self.tag_count = 0 self.current_tag = None
def __init__(self): HTMLParser.__init__(self) self.links = [] self.in_anchor = False self.attrs = None self.title = ''
def __init__(self, baseURL): HTMLParser.__init__(self) self.stack = [] self.anchors = set() self.links = [] self.baseURL = baseURL self.printed = False
def __init__(self, **kwargs): HTMLParser.__init__(self) self.kwargs = kwargs self.active = None self.last_content = "" self.rows = [] self.found_first_valid_num = False
def __init__(self, builder=None, encoding=None): self.__stack = [] if builder is None: builder = ElementTree.TreeBuilder() self.__builder = builder self.encoding = encoding or "iso-8859-1" HTMLParser.__init__(self)
def __init__(self): warnings.warn("portage.getbinpkg.ParseLinks is deprecated", DeprecationWarning, stacklevel=2) self.PL_anchors = [] html_parser_HTMLParser.__init__(self)
def __init__(self): HTMLParser.__init__(self) self.collect_data = False self.bound = 20 self.des_tag = "div" self.des_attr = ("id", "content") self.stations_info = None
def __init__(self): HTMLParser.__init__(self) self.state = State.NOWHERE self.data = "" self.pnpid = None self.company = None self.table = []
def __init__(self): HTMLParser.__init__(self) self.url = None self.params = {} self.in_form = False self.form_parsed = False self.method = "GET"
def __init__(self): HTMLParser.__init__(self) self.state = [] self.href= "" self.obj = {} self.index = {} self.done = False
def __init__(self): HTMLParser.__init__(self) self.data = dict() self.recordingAuthor = False self.recordingBody = False; self.data["body"] = "" self.save_tags = ['p', 'blockquote', 'h1', 'h2', 'h3', 'h4', 'h5']
def __init__(self, args): HTMLParser.__init__(self) self.root_url = args.URL # Original URL passed. self.netloc = urllib.parse.urlparse(self.root_url).netloc # Netloc of the URL. self.depth = args.depth # Distance (pages) to travel. self.timer = args.time # Amount of time per page. self.db = MongoClient()[args.db][args.coll] # Database that stores data. self.sub = args.sub # Subdirectory to set as root of webpage. self.verbose = args.verbose # Verbosity setting. # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # self.key_terms = ["buy", "sell", "trade", "trading"] self.count = 0 # Amount of pages processed. self.posts = 0 # Amount of posts scanned. self.urlBlacklist = [] # Already completed URLS. self.urlDNU = [] # Do not use URLS, duplicates. self.urlList = [self.root_url] # List of URLS to scan. self.items = [] # Items to look for. self.discovered = {} # Items discovered + [URLs] self.BigDict = {} # Dictionary containing ThreadID + [URLS] <- urlDNU list?. # # # # # # # # # # # # # # # # # # # # # self.li_main = False # Start of play contribution self.blockquote_main = False # Start of the message self.div_quote_main = False # Start of Quote Container self.div_quote_xpand = False # Start of QuoteExpand self.blockquote_quote = False # Start of Quote Message self.text_lock = True # Locks the abilty to print text or use it. self.li_name = None # Name of original author self.blockquote_name = None # Name of person being quoted. # # # # # # # # # # # # # # # # # # # self.queryDB() # Loads the self.items list.
def __init__(self,strict=False): # Constructor call of parent class. HTMLParser.__init__(self,strict) # Defining variables of this class. # There are 3 types of variables. # data_variables: these are the required information # data_check_variables: boolean values corresponding to each of the data_variables # to keep a check on the data been already extracted or not. # tag_check_variables: used for matching the proper format. self.h1=False self.desc=False self.description=' ' self.sol=False self.solution=' ' self.p=False; self.li=False; self.ref=False; self.references=' ' self.cvss=False; self.cvss_score=0.0; self.cve=False self.cve_id=' ' self.links=[] self.prod=False; self.products=[] self.last_h6=' ' self.h6=False self.h7=False self.clas=False self.attack_from=' ' self.attk=False self.impact=' ' self.impt=False
def __init__(self): HTMLParser.__init__(self) self.convert_charrefs = False self.last = "starttag" self.in_pre = False self.output = "" self.last_tag = ""
def __init__(self, results, url, trackers): HTMLParser.__init__(self) self.results = results self.url = url self.trackers = trackers self.td_counter = None self.current_item = None
def __init__(self, base_href): HTMLParser.__init__(self) self.base_href = base_href self.results = {} self.group_name = self.group_desc = None self.in_group_name = self.in_group_desc = self.in_activity = 0 self._clear_info()
def __init__(self, url): HTMLParser.__init__(self) self.url = url self.current_item = None self.save_data = None self.seeds_leech = False self.size_repl = re_compile(",")
def __init__(self, zip_file): HTMLParser.__init__(self) self._html = StringIO() # buffer for the processed HTML self._zip_file = zip_file # used to exclude the contents of script and object tags self._excl_nested_level = 0
def __init__(self): # use a list to store literal bytes and escaped Unicode if py3: super().__init__() else: HTMLParser.__init__(self) self.title = []
def __init__(self, strict = False, reps = None, outs = None, sc = True): self.rep = reps self.outStream = outs self.stripComment = sc self.rep.parser = self HTMLParser.__init__(self, strict)
def __init__(self): """An overload of the HTML Parser constructor. We use this initialization code to make sure that every variable is flushed. Arguments: self -- Allows the function to reference parent class properties. It is unnecessary to specify self during function calls as it is implied. """ # Initialize the HTML Parser. HTMLParser.__init__(self) # Initialize the variables. self._record_name = False self._record_meal = False self._record_station = False self._record_attributes = False self._day = EMPTY_STRING self._meal = EMPTY_STRING self._station = EMPTY_STRING self._name_text = [] self._station_text = [] self._attributes = [] # Hold all the dining hall menus. self.menu = []
def __init__(self): self.foundGo = False if (sys.version_info.minor < 4): HTMLParser.__init__(self) else: self.html_parser_init_kwargs = { 'convert_charrefs' : True } HTMLParser.__init__(self, **self.html_parser_init_kwargs)
def __init__(self, remove_comments=False, remove_empty_space=False, remove_all_empty_space=False, reduce_empty_attributes=True, reduce_boolean_attributes=False, remove_optional_attribute_quotes=True, keep_pre=False, pre_tags=PRE_TAGS, pre_attr='pre'): if sys.version_info[0] >= 3 and sys.version_info[1] >= 4: # convert_charrefs is True by default in Python 3.5.0 and newer. It was # introduced in 3.4. HTMLParser.__init__(self, convert_charrefs=False) else: HTMLParser.__init__(self) self.keep_pre = keep_pre self.pre_tags = pre_tags self.remove_comments = remove_comments self.remove_empty_space = remove_empty_space self.remove_all_empty_space = remove_all_empty_space self.reduce_empty_attributes = reduce_empty_attributes self.reduce_boolean_attributes = reduce_boolean_attributes self.remove_optional_attribute_quotes = remove_optional_attribute_quotes self.pre_attr = pre_attr self._data_buffer = [] self._in_pre_tag = 0 self._in_head = False self._in_title = False self._after_doctype = False self._tag_stack = [] self._title_newly_opened = False self.__title_trailing_whitespace = False
def __init__(self): HTMLParser.__init__(self) self.items = [] self.foundItem = False self.br_before = False self.checkBr = False self.current_item_url = ""
def __init__(self, remove_comments=False, remove_empty_space=False, remove_all_empty_space=False, reduce_empty_attributes=True, reduce_boolean_attributes=False, remove_optional_attribute_quotes=True, keep_pre=False, pre_tags=PRE_TAGS, pre_attr='pre'): HTMLParser.__init__(self) self.keep_pre = keep_pre self.pre_tags = pre_tags self.remove_comments = remove_comments self.remove_empty_space = remove_empty_space self.remove_all_empty_space = remove_all_empty_space self.reduce_empty_attributes = reduce_empty_attributes self.reduce_boolean_attributes = reduce_boolean_attributes self.remove_optional_attribute_quotes = remove_optional_attribute_quotes self.pre_attr = pre_attr self._data_buffer = [] self._in_pre_tag = 0 self._in_head = False self._in_title = False self._after_doctype = False self._tag_stack = [] self._title_newly_opened = False self.__title_trailing_whitespace = False
def __init__(self, news): HTMLParser.__init__(self) self.count_a = 0; self.current_tag = "" self.looking_for_testata = False self.news = news
def __init__(self): HTMLParser.__init__(self) self.links = [] self.isNumber = 0 self.stack = [] self.day = [] self.test = []
def __init__(self): HTMLParser.__init__(self) self.starParsing = False self.ratingParsing = False self.starStack = Stack() self.ratingStack = Stack() self.ratingDict = {}
def __init__(self): HTMLParser.__init__(self) self.stack = [] self.template_names = [] self.templates = {} self.current_template = "" self.current_template_count = 0
def __init__(self): self.urlList = [] self.index = 0 self.nextUrl = '' self.tagList = ['li','a'] self.classList = ['photo-list-padding','pic'] HTMLParser.__init__(self)
def __init__(self): HTMLParser.__init__(self) self.recording = 0 self.data = [] self.link = ""
def __init__(self, allows=[]): HTMLParser.__init__(self) self.allow_tags = allows if allows else self.allow_tags self.result = [] self.start = [] self.data = []
def __init__(self): HTMLParser.__init__(self) self.title = None
def __init__(self): HTMLParser.__init__(self) self.reset() self.fed = []
def __init__(self, **kw): HTMLParser.__init__(self, **kw) self._fed = []
def __init__(self): HTMLParser.__init__(self) self.reset() self.HTMLDATA = []
def __init__(self): HTMLParser.__init__(self) self.AllLinks = []
def __init__(self): HTMLParser.__init__(self) self._texts = [] # type: list self._ignore = False
def __init__(self): HTMLParser.__init__(self) self.first_row = True self.in_cell = False self.links = []
def __init__(self): HTMLParser.__init__(self) self.json_link = None
def __init__(self): BaseHTMLParser.__init__(self, convert_charrefs=False)
def __init__(self, ostream): HTMLParser.__init__(self) self.in_pre = False self.in_code = False self.ostream = ostream self.pygments_fix = False
def __init__(self): HTMLParser.__init__(self) self.title = "" self.is_title = False self.content = "" self.is_content = False
def __init__(self): HTMLParser.__init__(self) self.out_buffer = []
def __init__(self): HTMLParser.__init__(self) self.__text = []
def __init__(self): HTMLParser.__init__(self) self.content = "" self.is_content = False self.other_content = False self.skip = False
def __init__(self): HTMLParser.__init__(self) self.flag = 0 self.endflag = 0 self.divflag = False
def __init__(self): HTMLParser.__init__(self) self.content = None self.content_type = self.UNKNOWN
def __init__(self): HTMLParser.__init__(self) self.strings = []
def __init__(self): HTMLParser.__init__(self) self.tag_results = {}
def __init__(self, tag="", attrs=None): # Initiate HTMLParser HTMLParser.__init__(self) self.convert_charrefs = True self._root = None # root element self._data = [] # data collector self._factory = Etree.Element self.enabled = not tag self._unw_attrs = [] self.tag = tag # Split attributes into wanted and unwanted attributes if attrs: self.attrs = attrs for key, value in attrs.copy().items(): if value == 0: self._unw_attrs.append(key) del attrs[key] else: self.attrs = {} # Some tags in html do not require closing tags so thoes tags will need to be auto closed (Void elements) # Refer to: https://www.w3.org/TR/html/syntax.html#void-elements self._voids = frozenset(( "area", "base", "br", "col", "hr", "img", "input", "link", "meta", "param", # Only in HTML5 "embed", "keygen", "source", "track", # Not supported in HTML5 "basefont", "frame", "isindex", # SVG self closing tags "rect", "circle", "ellipse", "line", "polyline", "polygon", "path", "stop", "use", "image", "animatetransform")) # Create temporary root element to protect from badly written sites that either # have no html starting tag or multiple top level elements elem = self._factory("html") self._elem = [elem] self._last = elem self._tail = 0
def __init__(self): HTMLParser.__init__(self) self.a = [] self.n = [] self.x = 0
def __init__(self,date_url_file): HTMLParser.__init__(self) self.tag = None self.date_url = False self.href = None self.date_url_file = date_url_file
def __init__(self): HTMLParser.__init__(self) self.content = [] self.in_div = False
def __init__(self, druid_module_name, compatible_license_names): HTMLParser.__init__(self) self.state = "none" self.druid_module_name = druid_module_name self.compatible_license_names = compatible_license_names
def __init__(self): HTMLParser.__init__(self) self.div = False self.div_2 = False self.cpt = 0 self.resulte = ""
def __init__(self): HTMLParser.__init__(self) self.maxword = 150
def __init__(self): HTMLParser.__init__(self)
def __init__(self, tag_to_analyse='div'): HTMLParser.__init__(self) self.tag_to_analyse = tag_to_analyse self.classes = {}
def __init__(self, base, output=None): HTMLParser.__init__(self) if output is None: output = [] self.output = output self.base = base