def get_async_node_data(self, async_data: IAsyncResult) -> str: try: data, _ = async_data.get() except NoNodeError as err: ErrorCodes.make_graceful(err, "No node found") sys.exit(ErrorCodes.NO_NODE.value) return data.decode("utf-8").replace("\n", "<br>")
def main(): try: host, zk_root, credentials, destination_file = get_args() export = ZkTreeExport.new(host, zk_root, credentials, destination_file) export.to_json() except IndexError as err: ErrorCodes.make_graceful(err) sys.exit(ErrorCodes.WRONG_NUM_ARGUMENTS.value, "wrong number of arguments") except NotImplementedError as err: ErrorCodes.make_graceful(err) sys.exit(ErrorCodes.NOT_IMPLEMENTED.value, "some function was not implemented")
def start_kazoo(host: str, credentials: str) -> KazooClient: """Starts a connection to the Zookeeper client""" zk_client = KazooClient(hosts=host) zk_client.add_auth_async("digest", credentials) try: event = zk_client.start_async() event.wait(timeout=10) logger.info("Zookeeper connection established") except KazooTimeoutError as err: ErrorCodes.make_graceful(err, "Zookeeper server timed out") sys.exit(ErrorCodes.KAZOO_TIMEOUT.value) return zk_client
def __processBaseURL(self): ''' Private member function used for Processing the base URL. Process the base URL and extract the links from them and create URLLinks object for each links extracted. Each of these objects are pushed as the child entry of the Main URLLinks object created for the Base URL. These child objects are later accessed in a loop and processed further to check for their validity, depending on the depth to which they belong. ''' ts = time.time() handle = self.__getDataFromURL(self.__dict__['_url']) ted = time.time() dlTime = ted - ts if ( self.__checkIfError(handle)): if ( handle[0] == 'HTTPError'): eCode = ErrorCodes(int(handle[1])) einfo = eCode.getError()[1] else: einfo = handle[1] urlObject = URLLinks(self.__dict__['_url'], None, self.__dict__['_url'], None, isProcessed=True, isBroken=True, size='<Unknown>', dlTime=dlTime, checkTime=dlTime, lastModified='<Unknwon>', info=einfo,status=handle[0] + ' : ' + handle[1], lType='<Unknwon>') self.__printError(handle[0] + ' : ' + handle[1] + ' : ' + einfo) self.__raiseError(handle, self.__dict__['_url']) return urlObject else: ts = time.time() htmlData = urllib2.urlopen(self.__dict__['_url']) ted = time.time() data = etree.HTML(htmlData.read()) dlTime = ted - ts title = self.__getURLTitle(data) links = self.__links(data) (lTtype, lastChagned, size) = self.__getURLInfo(handle) status = 'OK' urlObj = URLLinks(self.__dict__['_url'], title, self.__dict__['_url'], title, isProcessed=True, isBroken=False, size=size, dlTime=dlTime, lastModified=lastChagned, info='Successfully Processed', status=status, lType=lTtype) for link in links: cLink = str(link.attrib['href']).lstrip().rstrip() if ( cLink.startswith('#') or cLink.startswith('.') or cLink.startswith('..') or self.__dict__['_url'] not in cLink): cLink = urlparse.urljoin(self.__dict__['_url'], cLink) if ( self.__dict__['_url'] in cLink): cTitle = link.text temp = URLLinks(self.__dict__['_url'], title, cLink, cTitle) urlObj.addChild(temp) te = time.time() cTime = te - ts urlObj.setCheckTime(cTime) Deadcheck.__levelBasedLinks[0] = [] Deadcheck.__levelBasedLinks[0].append(urlObj)
def get_args(): # TODO: Implement argument parsing for future options if len(sys.argv) != 4: print(f"\nUsage: python3 {basename(__file__)} host:port/root /path/to/export") raise IndexError("Wrong number of arguments") try: host, zk_path = parse_zk_string(sys.argv[1]) except NotADirectoryError as err: ErrorCodes.make_graceful(err, "{zk_path} is not a directory") sys.exit(ErrorCodes.NOT_A_DIRECTORY.value) credentials = sys.argv[2] destination = sys.argv[3] return (host, zk_path, credentials, destination)
def __raiseError(self, value, *url): ''' Private member function used for raising different types of Errors that one may encounter during the processign or downloading of the page. This value is used for setting the status of the URLLinks objects and the same is used for reporting purpose later on. ''' if ( value[0] == 'HTTPError'): eCode = ErrorCodes(int(value[1])) raise urllib2.HTTPError(url[0], int(value[1]), eCode.getError(), None, None) elif ( value[0] == 'URLError'): raise urllib2.URLError(value[1]) elif ( value[0] == 'HTTPException'): raise httplib.HTTPException(value[1]) elif ( value[0] == 'Generic Exception'): raise Exception(value[0] + ' : ' + value[1])
def new(cls, host: str, zk_root: str, credentials: str, destination: str) -> "ZkTreeExport": """Initializes a ZkTreeObject, performing various tests""" instance = cls(zk_root, destination) instance.zk_client = ZkTreeExport.start_kazoo(host, credentials) try: ZkTreeExport.test_write_permission(destination) logger.debug("Write permission successful.") except IsADirectoryError as err: ErrorCodes.make_graceful(err, "{destination} is a directory") sys.exit(ErrorCodes.IS_A_DIRECTORY.value) except PermissionError as err: ErrorCodes.make_graceful(err, "no write permission in {destination}") sys.exit(ErrorCodes.NO_WRITE_PERMISSION.value) return instance
def AutoLogin(): # Decode the credentials credentials = base64.b64decode( b'YWJoaXNoZWtwYWh1amFAaG90bWFpbC5jb20=').decode( 'utf-8'), base64.b64decode(b'UElSQVRFU29mVEhFY2FyaWJiZWFu').decode( 'utf-8') # Create session session = requests.Session() # Authenticate test user session.auth = credentials zendesk = 'https://thedottedline.zendesk.com/api/v2/tickets.json?page=' url = zendesk + '1' response = session.get(url) if response.status_code != 200: # Call the error code method from errorCode.py ErrorCodes(response.status_code) # Sleep for 5 seconds before clearing screen sleep(5) clear() else: clear() # Check the number of pages DATA = response.json() total_records = DATA['count'] # Calculate the number of pages Total_Pages = int(total_records / 100) + 1 # If more than 1 page(100 tickets) DataArray = [DATA] * (total_records) RecordArray = [DATA] * Total_Pages # Initialise counters PageCount = 1 recordcounter = 0 # Put all the data in arrays while recordcounter < total_records: for records in RecordArray: zendesk = 'https://thedottedline.zendesk.com/api/v2/tickets.json?page=' url = zendesk + str(PageCount) response = session.get(url) # Get all the ticket records records = response.json() # Get individual ticket records for data in records['tickets']: DataArray[recordcounter] = data # Increment the record counter recordcounter += 1 # Increment to next page PageCount += 1 # display a message if no tickets were found if total_records == 0: print( 'this account has zero tickets. requsting tickets dislay will result in blank output' ) # Return the collected tickets return DataArray
def __analyze(self, url): ts = time.time() handle = self.__getDataFromURL(url) ted = time.time() dlTime = ted - ts if ( self.__checkIfError(handle)): if ( handle[0] == 'HTTPError'): eCode = ErrorCodes(int(handle[1])) einfo = eCode.getError()[1] else: einfo = handle[1] urlObject = URLLinks(url, None, url, None, isProcessed=True, isBroken=True, size='<Unknown>', dlTime=dlTime, checkTime=dlTime, lastModified='<Unknwon>', info=einfo,status=handle[0] + ' : ' + handle[1], lType='<Unknwon>') return urlObject else: ts = time.time() htmlData = urllib2.urlopen(url) ted = time.time() data = etree.HTML(htmlData.read()) dlTime = ted - ts title = self.__getURLTitle(data) links = self.__links(data) (lTtype, lastChagned, size) = self.__getURLInfo(handle) status = 'OK' urlObj = URLLinks(url, title, url, title, isProcessed=True, isBroken=False, size=size, dlTime=dlTime, lastModified=lastChagned, info='Successfully Processed', status=status, lType=lTtype) for link in links: cLink = str(link.attrib['href']).lstrip().rstrip() if ( cLink.startswith('#') or cLink.startswith('.') or cLink.startswith('..') or url not in cLink): cLink = urlparse.urljoin(url, cLink) if ( urlparse.urlparse(url).netloc in cLink): cTitle = link.text temp = URLLinks(url, title, cLink, cTitle) urlObj.addChild(temp) te = time.time() cTime = te - ts urlObj.setCheckTime(cTime) return urlObj
def process(self): ''' Method that will be called using the Deadcheck object from the Main script to instruct the module to process the links based on the depth to which they belong to. Each of the link is extracted from the childURL list that belongs to the parent URLLinks object and processed after checking for exemptions. Based on the processing, the apropriate parameters and values are set using the set method available in the URLLinks class. Each page being processed has its own list of the Child URL that are extracted and pushed into an array. These list of URLs are processed during the next depth / level value. ''' self.__loadExempt() if ( self.get_depth() == 0 ): self.__analyze() else: for level in range(self.get_depth()+1): Deadcheck.__levelBasedLinks[level+1] = [] for vobj in self.getAll()[level]: for obj in vobj.getChildren(): t1 = time.time() (url, title) = obj.get() #if ( not Deadcheck.__ProcessedLinks.has_key(url) and not self.__checkExempt(url) and 'javascript' not in url.lower()): if ( not Deadcheck.__ProcessedLinks.has_key(url) and not self.__checkExempt(url) ): Deadcheck.__ProcessedLinks[url] = 1 # Process javascript:openWindow type URL to extract necessary links. self.__printMessage("Processing Link : " + url); if ( 'javascript' in url.lower()): url = self.__cleanupJavaScript(url) ts = time.time() handle = self.__getDataFromURL(url) ted = time.time() if ( self.__checkIfError(handle)): if ( handle[0] == 'HTTPError'): eCode = ErrorCodes(int(handle[1])) einfo = eCode.getError()[1] else: einfo = handle[1] obj.setInfo(einfo) obj.setProcessed(True) obj.setBroken(True) obj.setStatus(handle[0] + ' : ' + str(handle[1])) obj.setDLTime(ted-ts) obj.setSize('<Unknown>') obj.setLastModified('<Unknown>') obj.setType('<Unknown>') obj.setCheckTime(ted-ts) print 'Broken ' + str(obj.get()) self.__printError('Broken Link ' + str(obj.get())); else: ts = time.time() htmlData = urllib2.urlopen(url) ted = time.time() data = etree.HTML(htmlData.read()) dlTime = ted - ts title = self.__getURLTitle(data) links = self.__links(data) (lTtype, lastChagned, size) = self.__getURLInfo(htmlData) status = 'OK' urlObj = URLLinks(url, title, url, title, isProcessed=True, isBroken=False, size=size, dlTime=dlTime, lastModified=lastChagned, info='Successfully Processed', status=status, lType=lTtype) for link in links: cLink = str(link.attrib['href']).lstrip().rstrip() if ( cLink.startswith('#') or cLink.startswith('.') or cLink.startswith('..') or url not in cLink): cLink = urlparse.urljoin(url, cLink) if ( urlparse.urlparse(self.__dict__['_url']).netloc in cLink): cTitle = link.text temp = URLLinks(url, title, cLink, cTitle, status='UNPROCESSED') urlObj.addChild(temp) te = time.time() cTime = te - ts urlObj.setCheckTime(cTime) Deadcheck.__levelBasedLinks[level+1].append(urlObj) t2 = time.time() obj.setInfo('Successfully Processed.') obj.setProcessed(True) obj.setBroken(False) obj.setStatus('OK') obj.setDLTime(dlTime) obj.setSize(size) obj.setLastModified(lastChagned) obj.setType(lTtype) obj.setCheckTime(t2-t1) else: if ( self.__checkExempt(url)): obj.setInfo('Exempted based on the Input file : ' + self.__dict__['_exempt']) obj.setStatus('EXEMPTED') self.__printWarning("URL Exempted : " + url); elif ( 'javascript' in url ): obj.setInfo('Javascript Links are not processed. Implementation underway.') obj.setStatus('WARNING') else: obj.setInfo('URL Already Processed. Will not be processed again.') obj.setStatus('SKIPPED') self.__printWarning("Skipping URL : " + url); obj.setProcessed(True) obj.setBroken(False) obj.setDLTime(None) obj.setSize(None) obj.setLastModified(None) obj.setType(None) obj.setCheckTime(None)
def Login(): # Set counter to 0 counter = 0 # If user does not make 3 incorrect attempts continue while counter < 3: print('https://{DomainName}.zendesk.com') # Get user's login details domain = input("Please enter the domain name (without curly braces): ") email = input("Please enter your email address: ") password = getpass("Please enter your password: "******"Please check the data you entered." "\nDomain Name: " + domain + "\nEmail ID: " + email + "\nPassword: "******"\nTotal attempts remaining: " + str(3 - counter)) else: clear() # Check the number of pages DATA = response.json() total_records = DATA['count'] # Calculate the number of pages Total_Pages = int(total_records / 100) + 1 # If more than 1 page(100 tickets) DataArray = [DATA] * (total_records) RecordArray = [DATA] * Total_Pages PageCount = 1 recordcounter = 0 # Put all the data in arrays while recordcounter < total_records: for records in RecordArray: zendesk = 'https://' + domain + '.zendesk.com/api/v2/tickets.json?page=' url = zendesk + str(PageCount) response = session.get(url) # get all the ticket records records = response.json() # Get individual ticket records for data in records['tickets']: DataArray[recordcounter] = data # Increment the record counter recordcounter += 1 # Increment the to next page PageCount += 1 # Set counter to 4 to break while loop counter = 4 # display a message if no tickets were found if total_records == 0: print( 'this account has zero tickets. requsting tickets dislay will result in blank output' ) # Return the ticket data return DataArray # If the user made 3 invalid attempts, close the program. InvalidAttempts(counter)