def __init__(self, config): self.config = config self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") self.bert_wwm_model = BertForMaskedLM.from_pretrained( self.config["Model"]["bert_wwm_ext_chinese"]) self.bert_wwm_model.eval() self.bert_wwm_model = self.bert_wwm_model.to(self.device) self.bert_wwm_tokenizer = BertTokenizer.from_pretrained( self.config["Model"]["bert_wwm_ext_chinese"]) self.bert_base_model = BertForMaskedLM.from_pretrained( self.config["Model"]["bert_base_chinese"]) self.bert_base_model.eval() self.bert_base_model = self.bert_base_model.to(self.device) self.bert_base_tokenizer = BertTokenizer.from_pretrained( self.config["Model"]["bert_base_chinese"]) self.utils = Utils(self.config) self.dict_trie = self.utils.loadDictionaryTrie( self.config["Data"]["dictionary"], True) self.pinyin = self.utils.loadPinYin(self.config["Data"]["pinyin"]) self.stroke = self.utils.loadStroke(self.config["Data"]["stroke"]) self.place = self.utils.loadPlace(self.config["Data"]["place"]) self.person = self.utils.loadPerson(self.config["Data"]["person"]) self.ner_model = NER(self.config["Model"]["ner"], self.pinyin, self.stroke, self.place, self.person, self.config["Data"]["ssc"]) self.charSet = self.utils.loadCharSet( self.config['Data']['common_char_set']) self.customConfusionDict = self.utils.loadCustomConfusion( self.config['Data']['confusion']) self.ngram_model = NGRAM(self.config["Model"]["ngram"])
class Aggregate: # Variables __file = '' __delimiter= '' __col_name = '' log = cl.customLogs() def __init__(self): self.utils = Utils() pass def __initiate_vars(self,file_path, delimiter): """ Creates the Data Frame objects and returns back in array :param file_path: :param delimiter: :return: """ df_pandas = pd.read_csv(file_path,delimiter=delimiter) rzt_data = RZTData(cntx.experiment) df_rzt = rzt_data.read({ 'path':file_path, 'delimiter':delimiter, 'encoding':'utf-8' }) return [df_pandas,df_rzt] def validate_count(self, file_path, delimiter=','): data_frames = self.__initiate_vars(file_path, delimiter) count_pd = len(data_frames[0]) count_rzt = data_frames[1].count() if self.utils.verifyArrayMatch(actual=count_rzt,expected=count_pd): self.log.info("Verified the row count matches in both Data Frames") else: self.log.error("Row count match failed") def validate_minima(self, file_path, delimiter=',', col_name=''): data_frames = self.__initiate_vars(file_path, delimiter) min_pd = data_frames[0][col_name].min() min_rzt = data_frames[1][col_name].min() if self.utils.verifyArrayMatch(actual=min_rzt,expected=min_pd): self.log.info("Verified the Minima matches in both Data Frames on column name : {}".format(col_name).upper()) else: self.log.error("Minima does not match") def validate_maxima(self, file_path, delimiter=',', col_name=''): data_frames = self.__initiate_vars(file_path, delimiter) max_pd = data_frames[0][col_name].max() max_rzt = data_frames[1][col_name].max() if self.utils.verifyArrayMatch(actual=max_rzt, expected=max_pd): self.log.info( "Verified the Maxima matches in both Data Frames on column name : {}".format(col_name).upper()) else: self.log.error("Maxima does not match")
def get_scan_status(scan_id): scan_status_from_db = get_scan_status_by_id(scan_id) if scan_status_from_db: current_status = Utils.convert_scan_status( scan_status_from_db[0][0]) else: # Check if task is cached for 20 minutes if is_cached(scan_id): current_status = Utils.convert_scan_status("PENDING") else: current_status = Utils.convert_scan_status("NOT_FOUND") return current_status
def _get_db_conn(self): ''' Retrieve a database connection using environment variables :return: db_conn: connection client to the database ''' # host='AQICAHhMz1Wf7tTGilIGlJZDb3LaCAXhS5a/i+jj8GeI1TQa6gHQuQO7vW3uvEq6M6eSJZRWAAAAljCBkwYJKoZIhvcNAQcGoIGFMIGCAgEAMH0GCSqGSIb3DQEHATAeBglghkgBZQMEAS4wEQQM/5hbtKGk/bpYfcDpAgEQgFCGmwoxqJoKriZ3QRQLKH/OjtdJhQhzKN7AeRey2snliLdcN401RUu/WfN74QMOmC5dQPF/Qa6tp2MkJFj2nGG0sT7BPiCGvpZcI6/+yxO/7Q==' # user='******' # password='******' utils = Utils() db_conn = mysql.connector.connect( host=utils.decrypt(os.environ.get('db_url')), user=utils.decrypt(os.environ.get('db_user')), password=utils.decrypt(os.environ.get('db_password')), # host=utils.decrypt(host), # user=utils.decrypt(user), # password=utils.decrypt(password), database=self.database) return db_conn
def validate_usedecimals(usedecimals): """ """ try: """Validate and parse useDecimals""" decimals = ['false', 'true'] if usedecimals: if usedecimals in decimals: usedecimals = usedecimals.lower() else: usedecimals = 'error' else: usedecimals = 'false' except: '''Get the stack trace and print it''' err = traceback.format_exc() logger.error(err) utils = Utils() utils.send_err_email_process( err, 'parameters not satisfy the expected values') finally: return usedecimals
def validate_granularity(granularity): """ """ try: """Validate and parse granularity""" granular_opt = ['hourly', 'daily'] if granularity: if granularity.lower() in granular_opt: granularity = granularity.upper() else: granularity = 'error' else: granularity = 'DAILY' except: '''Get the stack trace and print it''' err = traceback.format_exc() logger.error(err) utils = Utils() utils.send_err_email_process( err, 'parameters not satisfy the expected values') finally: return granularity
class ScreenDetails(BasePage): log = logging.getLogger('flipkartSearch.pdp') com_utils = Utils() _purchase_item_pdp_title = (By.XPATH, "//h1/span[starts-with(@class,'_')]") pdp_item_cost = (By.XPATH, "(//div[contains(text(),'₹')])[1]") _add_to_basket = ( By.XPATH, "//div[@class='row']/button[contains(text(),'ADD TO BASKET')]") _add_to_cart = (By.XPATH, "//ul/li/button") _cart_icon = (By.XPATH, "//a/span[contains(text(),'Cart')]") def get_pdp_item_title_cost(self): """ To return the cost of the item from PDP """ purchase_item_pdp_cost = self.getText(self.pdp_item_cost, info="Item Price value") print("PDP cost value :: " + str(purchase_item_pdp_cost)) return purchase_item_pdp_cost def add_navigate_cart(self): """ Add to basket and Navigate to cart """ basket_elem = self.getElement(self._add_to_basket, elementName="Add to Basket button") if basket_elem is None: # cart_elem = self.getElement(self._add_to_cart, elementName="Add to Cart button") # self.javascript_execute("arguments[0].scrollIntoView();", cart_elem) self.elementClick(self._add_to_cart, elementName="Add to Cart button") time.sleep(2) else: # self.javascript_execute("arguments[0].scrollIntoView();", basket_elem) self.elementClick(self._add_to_basket, elementName="Add to Basket button") time.sleep(2) # self.elementClick(self._add_to_basket, elementName="Add to Basket button") # time.sleep(2) # self.elementClick(self._add_to_cart, elementName="Add to Cart button") # time.sleep(2) self.elementClick(self._cart_icon, elementName="Cart icon") time.sleep(2)
class ZaiLaGan(): # Initialize config, device, model, tokenizer, and utilities def __init__(self, config): self.config = config self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") self.bert_wwm_model = BertForMaskedLM.from_pretrained( self.config["Model"]["bert_wwm_ext_chinese"]) self.bert_wwm_model.eval() self.bert_wwm_model = self.bert_wwm_model.to(self.device) self.bert_wwm_tokenizer = BertTokenizer.from_pretrained( self.config["Model"]["bert_wwm_ext_chinese"]) self.bert_base_model = BertForMaskedLM.from_pretrained( self.config["Model"]["bert_base_chinese"]) self.bert_base_model.eval() self.bert_base_model = self.bert_base_model.to(self.device) self.bert_base_tokenizer = BertTokenizer.from_pretrained( self.config["Model"]["bert_base_chinese"]) self.utils = Utils(self.config) self.dict_trie = self.utils.loadDictionaryTrie( self.config["Data"]["dictionary"], True) self.pinyin = self.utils.loadPinYin(self.config["Data"]["pinyin"]) self.stroke = self.utils.loadStroke(self.config["Data"]["stroke"]) self.place = self.utils.loadPlace(self.config["Data"]["place"]) self.person = self.utils.loadPerson(self.config["Data"]["person"]) self.ner_model = NER(self.config["Model"]["ner"], self.pinyin, self.stroke, self.place, self.person, self.config["Data"]["ssc"]) self.charSet = self.utils.loadCharSet( self.config['Data']['common_char_set']) self.customConfusionDict = self.utils.loadCustomConfusion( self.config['Data']['confusion']) self.ngram_model = NGRAM(self.config["Model"]["ngram"]) #self.GEC = grammarErrorCorrector(self.config["Data"]["label_map"], self.config["Model"]["ngram"], self.config["Model"]["pos_model"]) # Detect named-entities and return their corrections & positions def detectNamedEntity(self, sentences: List[str], task_name: str) -> List[Tuple[str, List[int]]]: return self.ner_model.check_ner(sentences, task_name) # Detect potential spelling errors in a given sentence/paragraph and return detected error positions & top predictions from BERT def detectSpellingError( self, text: str, threshold: float, topk: int) -> Tuple[List[int], Dict[int, List[str]]]: positions = [] predictions = {} # Mask each word and predict it for i in range(len(text)): # Check if current word is a chinese character if (not self.utils.isChineseChar(text[i])): continue # Add mask masked_text = "[CLS]" + text[:i] + "[MASK]" + text[i + 1:] + "[SEP]" # Tokenize input text tokenized_masked_text = self.bert_wwm_tokenizer.tokenize( masked_text) masked_token_index = tokenized_masked_text.index("[MASK]") # Construct token ids and segment ids token_ids = torch.tensor([ self.bert_wwm_tokenizer.convert_tokens_to_ids( tokenized_masked_text) ]) segment_ids = torch.tensor([[0] * token_ids.shape[1]]) # Set up ids on GPU token_ids = token_ids.to(self.device) segment_ids = segment_ids.to(self.device) # Predict masked token with torch.no_grad(): outputs = self.bert_wwm_model(token_ids, token_type_ids=segment_ids) scores = outputs[0][0, masked_token_index] # Classify the token as a potential spelling error if predicted probability is lower than given threshold token_probability = torch.nn.Softmax(0)(scores)[ self.bert_wwm_tokenizer.convert_tokens_to_ids(text[i])] if (token_probability < threshold): # Extract top predictions from BERT token_scores, token_indices = scores.topk(topk) top_predicted_tokens = self.bert_wwm_tokenizer.convert_ids_to_tokens( token_indices) positions.append(i) predictions[i] = top_predicted_tokens return (positions, predictions) # Give top n suggestions of spelling error correction def correctSpellingError( self, text: str, err_positions: Set[int], predictions: Dict[int, List[str]], ne_positions: Set[int], candidate_num: int, similar_bonus: float) -> List[Tuple[str, int, float]]: # Initialize a dictionary to record starting positions of potentially correct tokens/words starting_positions = {} # Add original tokens for i in range(len(text)): token = text[i] # Separate all tokens/words from tokens/words that are similar in stroke or pinyin starting_positions[i] = (set(token), set(token)) # Add similar tokens in stroke or pinyin for err_position in err_positions: # Check if the error token is included in a named-entity if (err_position in ne_positions): continue else: error_token = text[err_position] ssc_target = self.ner_model.ssc.getSoundCode(error_token) if (error_token in self.stroke): for similar_token in self.stroke[error_token][:3]: starting_positions[err_position][0].add(similar_token) starting_positions[err_position][1].add(similar_token) if (error_token in self.pinyin): for similar_token in self.pinyin[error_token][:7]: starting_positions[err_position][0].add(similar_token) starting_positions[err_position][1].add(similar_token) for predicted_token in predictions[err_position]: # Check if BERT's prediction is a chinese character if (len(predicted_token) == 1 and self.utils.isChineseChar(predicted_token)): starting_positions[err_position][0].add( predicted_token) ssc_pred = self.ner_model.ssc.getSoundCode( predicted_token) ssc_score = self.ner_model.ssc.computeSoundCodeSimilarity( ssc_target, ssc_pred) if ssc_score >= 0.7: starting_positions[err_position][1].add( predicted_token) # Construct candidate sentences candidates = [] prefixes = list(starting_positions[0][0]) # Initialize counts of tokens/words that are similar in stroke or pinyin for i in range(len(prefixes)): if (prefixes[i] in starting_positions[0][1]): prefixes[i] = (prefixes[i], 1) else: prefixes[i] = (prefixes[i], 0) while (len(prefixes) > 0): prefix = prefixes.pop(0) if (len(prefix[0]) == len(text)): candidates.append((prefix[0], prefix[1], self.ngram_model.get_ppl(prefix[0]))) else: for suffix in starting_positions[len(prefix[0])][0]: if (suffix in starting_positions[len(prefix[0])][1]): prefixes.append((prefix[0] + suffix, prefix[1] + 1)) else: prefixes.append((prefix[0] + suffix, prefix[1])) # Sort candidate sentences by perplexities from ngram model candidates.sort(key=lambda x: x[2]) # Compute top candidate sentences' perplexities again with GPT2 and sort candidates = candidates[:50] for i in range(len(candidates)): candidates[i] = (candidates[i][0], candidates[i][1], self.utils.getSentencePpl(candidates[i][0])) candidates.sort(key=lambda x: x[2]) # Extract top n suggestions recommendations = [] for i in range(min(len(candidates), candidate_num)): recommendations.append(candidates[i]) # Take counts of tokens/words that are similar in stroke or pinyin into consideration and sort again for i in range(len(recommendations)): recommendations[i] = (recommendations[i][0], recommendations[i][1], recommendations[i][2] / pow(similar_bonus, recommendations[i][1])) recommendations.sort(key=lambda x: x[2]) return recommendations # Find spelling error correction candidates from dictionary def generate_correction_cand(self, word): correction_candidates = [] if len(word) == 1: # Add similar tokens in pinyin confusion_word_set = set() for char in self.charSet: if lazy_pinyin(char) == lazy_pinyin(word): confusion_word_set.add(char) confusion_word_set = confusion_word_set.union( set(self.pinyin[word])) correction_candidates.extend(confusion_word_set) # Add similar tokens in stroke correction_candidates.extend(self.stroke[word]) if len(word) > 2: edit_cand = set() word_splits = [(word[:i], word[i:]) for i in range(len(word) + 1)] transposes = [ L + R[1] + R[0] + R[2:] for L, R in word_splits if len(R) > 1 ] replaces = [ L + c + R[1:] for L, R in word_splits if R for c in self.charSet ] edit_set = set(transposes + replaces) for edit in edit_set: if self.dict_trie.getWordFreq(edit) > 0: edit_cand.add(edit) correction_candidates.extend(edit_cand) confusion_word_set = set() if word in self.customConfusionDict: confusion_word_set = {self.customConfusionDict[word]} correction_candidates.extend(confusion_word_set) if len(word) == 2: # Add similar tokens in pinyin correction_candidates.extend( set(ele + word[1:] for ele in self.pinyin[word[0]] if ele)) correction_candidates.extend( set(word[:-1] + ele for ele in self.pinyin[word[-1]] if ele)) if len(word) > 2: correction_candidates.extend( set(word[0] + ele + word[2:] for ele in self.pinyin[word[1]] if ele)) correction_candidates.extend( set(ele + word[-1] for ele in self.pinyin[word[1]] if ele)) correction_candidates.extend( set(word[0] + ele for ele in self.pinyin[word[1]] if ele)) return correction_candidates # Detect and correct spelling errors given input text def bertDetectAndCorrect(self, text: str, topk: int, ner_pos_list: List[int]) -> Tuple[str, List[int]]: positions = [] text_list = list(text) # split input text into short texts re_punctuation = re.compile("([\u4E00-\u9FD5a-zA-Z0-9+#&]+)", re.U) short_texts = [] components = re_punctuation.split(text) components = list(filter(('').__ne__, components)) start_idx = 0 for comp in components: if re_punctuation.match(comp): short_texts.append((comp, start_idx)) start_idx += len(comp) start_idx += 1 # character-based detection and correction for (short_text, start_idx) in short_texts: for idx, single_word in enumerate(short_text): if self.utils.isChineseChar(single_word): if start_idx + idx not in ner_pos_list: # bert-based model generates topk candidates masked_text = "[CLS]" + text[:idx] + "[MASK]" + text[ idx + 1:] + "[SEP]" tokenized_masked_text = self.bert_base_tokenizer.tokenize( masked_text) token_ids = torch.tensor([ self.bert_base_tokenizer.convert_tokens_to_ids( tokenized_masked_text) ]) segment_ids = torch.tensor([[0] * token_ids.shape[1]]) token_ids = token_ids.to(self.device) segment_ids = segment_ids.to(self.device) with torch.no_grad(): outputs = self.bert_base_model( token_ids, token_type_ids=segment_ids) scores = outputs[0][0, idx + 1] token_probability = torch.nn.Softmax(0)(scores)[ self.bert_base_tokenizer.convert_tokens_to_ids( text[idx])] scores_list = torch.nn.Softmax(0)(scores) _, pred = scores_list.topk(topk, 0, True, True) topk_bert_candidates = [ self.bert_base_tokenizer.convert_ids_to_tokens( ele.item()) for ele in pred ] if topk_bert_candidates and ( single_word not in topk_bert_candidates): candidates = self.generate_correction_cand( short_text[idx]) candidates_sorted = sorted( candidates, key=lambda k: self.dict_trie.getWordFreq(k), reverse=True) if candidates_sorted: for topk_bert_cand in topk_bert_candidates: if topk_bert_cand in candidates_sorted: #print(['- '+single_word, '+ '+topk_bert_cand + '_'+str(start_idx+idx)]) text_list[start_idx + idx] = topk_bert_cand positions.append(start_idx + idx) single_word = topk_bert_cand break # word-based detection and correction for (short_text, start_idx) in short_texts: for n in [2, 3, 4, 5]: for idx in range(len(short_text) - n + 1): if not ner_pos_list or ( ner_pos_list and (start_idx + idx > ner_pos_list[-1] or start_idx + idx + n < ner_pos_list[0])): if self.utils.isChineseChar(short_text[idx]): word = short_text[idx:idx + n] # bert-based model generates topk candidates masked_text = "[CLS]" + text[: idx] + "[MASK]" + text[ idx + 1:] + "[SEP]" tokenized_masked_text = self.bert_base_tokenizer.tokenize( masked_text) token_ids = torch.tensor([ self.bert_base_tokenizer.convert_tokens_to_ids( tokenized_masked_text) ]) segment_ids = torch.tensor([[0] * token_ids.shape[1]]) token_ids = token_ids.to(self.device) segment_ids = segment_ids.to(self.device) with torch.no_grad(): outputs = self.bert_base_model( token_ids, token_type_ids=segment_ids) scores = outputs[0][0, idx + 1] token_probability = torch.nn.Softmax(0)( scores)[self.bert_base_tokenizer. convert_tokens_to_ids(text[idx])] scores_list = torch.nn.Softmax(0)(scores) _, pred = scores_list.topk(topk, 0, True, True) topk_bert_candidates = [ self.bert_base_tokenizer. convert_ids_to_tokens(ele.item()) for ele in pred ] candidates = self.generate_correction_cand(word) candidates = [ ele for ele in candidates if self.dict_trie.getWordFreq(ele) > 0 ] if candidates: for topk_bert_cand in topk_bert_candidates: tmp_word = topk_bert_cand + word[1:] if tmp_word in candidates and tmp_word != word: #print(['- '+short_text[idx], '+ '+topk_bert_cand + '_'+str(start_idx+idx)]) text_list[start_idx + idx] = topk_bert_cand positions.append(start_idx + idx) break # return corrected string and error position list return (''.join(text_list), sorted(list(set(positions)))) # Divide a long text into multiple parts and correct spelling errors separately def divideAndCorrectSpellingError(self, text: str) -> Tuple[str, str]: # Perform named-entity recognition first ner_processed_text, ne_positions, _ = self.detectNamedEntity( [text], 'correction')[0] ne_positions = set(ne_positions) # Detect spelling errors err_positions, bert_predictions = self.detectSpellingError( ner_processed_text, 1e-5, 3) err_positions = set(err_positions) # Split long text into multiple parts punctuations = {"。", "?", "!", ",", "、", ";", ":"} splitted_text = [] sub_ne_positions, sub_err_positions, sub_bert_predictions = set(), set( ), {} start = 0 count = 0 for i in range(len(ner_processed_text)): # Check if current character is included in a named-entity or is an error if (i in ne_positions): sub_ne_positions.add(i - start) if (i in err_positions): sub_err_positions.add(i - start) sub_bert_predictions[i - start] = bert_predictions[i] # Check if current character is a punctuation if (ner_processed_text[i] in punctuations): count += 1 # Check if a short text has been completed if (count == 2): splitted_text.append( (ner_processed_text[start:i + 1], sub_err_positions, sub_bert_predictions, sub_ne_positions)) sub_ne_positions, sub_err_positions, sub_bert_predictions = set( ), set(), {} start = i + 1 count = 0 elif (i == len(ner_processed_text) - 1): splitted_text.append( (ner_processed_text[start:], sub_err_positions, sub_bert_predictions, sub_ne_positions)) # Correct spelling errors in each short text and combine corrected results corrections = [] for short_text in splitted_text: correction = self.correctSpellingError(short_text[0], short_text[1], short_text[2], short_text[3], 10, 1.5)[0][0] corrections.append(correction) return (ner_processed_text, "".join(corrections)) # Get substitution words def getWordSub(self, text): res = os.popen( "conda run -n wordSub python ./utilities/wordSubJob.py " + text).read() dic = ast.literal_eval(res) return dic
def run(self, tdids, granularity, usedecimals): ''' Function that controls the extraction of data from the Aislelabs endpoint :return: No value to return ''' logger.info('Initiating run') '''Initiate the Utils''' utils = Utils() '''Retrieve values from config file''' config = utils.get_yaml_config() baseurl = config['aislelabs']['baseurl'] extension = config['aislelabs']['domain'] bucket = config['aws']['bucket'] database = config['status_table']['database'] table = config['status_table']['table'] '''Get the URL''' url = utils.concatenate_url(baseurl, extension) '''Retrieve values from environment vars''' sender = os.environ['sender'] recipients = os.environ['recipients'].split(',') region = os.environ['aws_region'] # sender = "*****@*****.**" # recipients = "*****@*****.**" # region = 'use-east-1' '''Get the api key stored in environment variables and decrypt it''' apikey = utils.decrypt(os.environ.get('apikey')) #apikey = '3c06767b873c483fc6295fbc7bc421e1' '''Get the request parameters and send the the request to the Aislelabs endpoint''' try: """Set the query datetimes""" ts1 = int(self.lambda_start_time) / 1000 ts2 = int(self.lambda_end_time) / 1000 query_start_date = datetime.fromtimestamp(ts1).strftime( '%Y-%m-%d %H:%M:%S') query_end_date = datetime.fromtimestamp(ts2).strftime( '%Y-%m-%d %H:%M:%S') """Executing HTTP GET request""" request_parameters = self._craft_request(self.lambda_start_time, self.lambda_end_time, apikey, tdids, granularity, usedecimals) response_json = self._make_request(url, request_parameters) filename_ts = self.lambda_start_time filename_end_ts = self.lambda_end_time '''Create the filename and upload the JSON response to S3 if possible''' if granularity.lower() == 'hourly': filename = '{}/{}/{}-{}-{}.json'.format( 'aislelabs', 'hourly-unfiltered-traffic', 'hourly', filename_ts, filename_end_ts) else: filename = '{}/{}/{}-{}-{}.json'.format( 'aislelabs', 'daily-unfiltered-traffic', 'daily', filename_ts, filename_end_ts) if response_json: logger.info( 'Uploading the file to S3 with the following filepath: {}/{}' .format(bucket, filename)) utils.json_to_s3(bucket, filename, response_json) '''Create a dict to pass to the Load class to send the status to RDS''' dict_to_db = { 'filename': filename, 'tdid': tdids, 'query_start_date': query_start_date, 'query_end_date': query_end_date, 'workflow_step_rds': 1, 'date_created': datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S'), 'date_updated': datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S'), 'processing_complete_rds': False, 'error_rds': False, } load = Load(database, table) load.send_status_to_db(dict_to_db) else: logger.warning( "We did not receive a successful response back from the endpoint. No file will be uploaded to S3" ) dict_to_db = { 'filename': filename, 'tdid': tdids, 'query_start_date': query_start_date, 'query_end_date': query_end_date, 'workflow_step_rds': 1, 'date_created': datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S'), 'date_updated': datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S'), 'processing_complete_rds': True, 'error_rds': False, } load = Load(database, table) load.send_status_to_db(dict_to_db) except: '''Get the stack trace and print it''' err = traceback.format_exc() logger.error(err) utils.send_err_email(err, 'Data retrieval from Aislelabs', sender, recipients, filename_ts, region) '''If we get an error, we still want to send a record to the DB for tracking''' dict_to_db = { 'filename': filename, 'tdid': tdids, 'query_start_date': query_start_date, 'query_end_date': query_end_date, 'workflow_step_rds': 1, 'date_created': datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S'), 'date_updated': datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S'), 'processing_complete_rds': False, 'error_rds': True, } load = Load(database, table) load.send_status_to_db(dict_to_db)
def lambda_handler(event, context): ''' Function called by the Lambda Trigger. The way this was created is to delegate tasks to other classes and functions in order to keep the main function lightweight. :param event: Event passed in by the lambda trigger :type event: dict :param context: Context passed in by the lambda trigger :return: No value to return ''' try: """Initial process flow""" param = event['queryStringParameters'] if param: """Validate parameters""" if "granularity" in param: granularity = Extract.validate_granularity( param['granularity']) else: granularity = 'DAILY' if "usedecimals" in param: usedecimals = Extract.validate_usedecimals( param['usedecimals']) else: usedecimals = 'false' assert granularity.lower() in ['hourly', 'daily'], "Please validate" assert usedecimals.lower() in ['false', 'true'], "Please validate" """Read the main parameters""" lambda_start_time = param['startEpoch'] lambda_end_time = param['endEpoch'] tdids = param['tdids'] Extract(lambda_start_time, lambda_end_time).run(tdids, granularity, usedecimals) return { "statusCode": 200, "body": json.dumps('Data retrieved successfully from {} to {}!'.format( lambda_start_time, lambda_end_time)) } else: return { "statusCode": 400, "body": json.dumps( 'The request has been sent without parameters, please try again!' ) } except Exception as err: logger = logging.getLogger() logger.setLevel(logging.INFO) utils = Utils() utils.send_err_email_process( err, 'parameters do not satisfy the expected values') logger.error('Process failed due to a initial parameters error') return { "statusCode": 400, "body": json.dumps( 'There is a request error on the query parameters, please validate the request and try again.' ) }
def __init__(self): self.utils = Utils() pass
class StringFunctions: log = cl.customLogs() def __init__(self): self.utils = Utils() pass def __initiate_vars(self, file_path, delimiter): """ Creates the Data Frame objects and returns back in array :param file_path: :param delimiter: :return: """ df_pandas = pd.read_csv(file_path, delimiter=delimiter) rzt_data = RZTData(cntx.experiment) df_rzt = rzt_data.read({ 'path': file_path, 'delimiter': delimiter, 'encoding': 'utf-8' }) return [df_pandas, df_rzt] def compareDataFrame(self, actual, expected, fnc='', index_col=1): actual.to_csv("Test.csv") actual_tmp = pd.read_csv("Test.csv", index_col=index_col) expected.to_csv("Test1.csv") expected_tmp = pd.read_csv("Test1.csv", index_col=index_col) if self.utils.verifyDFMatch(actual=actual_tmp, expected=expected_tmp) is None: self.log.info("Verified the DF Matches for {}() function".format( fnc).upper()) else: self.log.error( "DF match failed for {}() function".format(fnc).upper()) def validate_asc_sort(self, file_path, col_name, delimiter=','): data_frames = self.__initiate_vars(file_path, delimiter) asc_pd = data_frames[0].sort_values(by=col_name, ascending=True) asc_rzt = data_frames[1].asc_sort(col_name) self.compareDataFrame(actual=asc_rzt, expected=asc_pd, fnc="ascending") def validate_desc_sort(self, file_path, col_name, delimiter=','): data_frames = self.__initiate_vars(file_path, delimiter) dsc_pd = data_frames[0].sort_values(by=col_name, ascending=False) dsc_rzt = data_frames[1].desc_sort(col_name) self.compareDataFrame(actual=dsc_rzt, expected=dsc_pd, fnc="descending") def validate_to_upper(self, file_path, col_name, delimiter=','): data_frames = self.__initiate_vars(file_path, delimiter) to_upper_rzt = data_frames[1].to_upper(col_name) to_upper_pd = data_frames[0] to_upper_pd[col_name] = to_upper_pd[col_name].str.upper() self.compareDataFrame(actual=to_upper_rzt, expected=to_upper_pd, fnc="to_upper") def validate_to_lower(self, file_path, col_name, delimiter=','): data_frames = self.__initiate_vars(file_path, delimiter) to_lower_rzt = data_frames[1].to_lower(col_name) to_lower_pd = data_frames[0] to_lower_pd[col_name] = to_lower_pd[col_name].str.lower() self.compareDataFrame(actual=to_lower_rzt, expected=to_lower_pd, fnc="to_lower") def validate_to_title(self, file_path, col_name, delimiter=','): data_frames = self.__initiate_vars(file_path, delimiter) to_title_rzt = data_frames[1].to_titlecase(col_name) to_title_pd = data_frames[0] to_title_pd[col_name] = to_title_pd[col_name].str.title() self.compareDataFrame(actual=to_title_rzt, expected=to_title_pd, fnc="to_title") def validate_trim(self, file_path, col_name, delimiter=','): data_frames = self.__initiate_vars(file_path, delimiter) trim_rzt = data_frames[1].trim(col_name) trim_pd = data_frames[0] trim_pd[col_name] = trim_pd[col_name].str.strip() self.compareDataFrame(actual=trim_rzt, expected=trim_pd, fnc="trim") def validate_format_date(self, file_path, col_name, new_format='%d/%m/%Y', delimiter=','): data_frames = self.__initiate_vars(file_path, delimiter) format_rzt = data_frames[1].format_date(key=col_name, destinationformat=new_format) format_pd = data_frames[0] format_pd[col_name] = pd.to_datetime(format_pd[col_name], format=new_format) self.compareDataFrame(actual=format_rzt, expected=format_pd, fnc="format_date")
class Mathematical: # Variables __file = '' __delimiter = '' __col_name = '' log = cl.customLogs() def __init__(self): self.utils = Utils() pass def __initiate_vars(self, file_path, delimiter): """ Creates the Data Frame objects and returns back in array :param file_path: :param delimiter: :return: """ df_pandas = pd.read_csv(file_path, delimiter=delimiter) rzt_data = RZTData(cntx.experiment) df_rzt = rzt_data.read({ 'path': file_path, 'delimiter': delimiter, 'encoding': 'utf-8' }) return [df_pandas, df_rzt] def compareDataFrame(self, actual, expected, fnc='', index_col=1): actual.to_csv("Test.csv") actual_tmp = pd.read_csv("Test.csv", index_col=index_col) expected.to_csv("Test1.csv") expected_tmp = pd.read_csv("Test1.csv", index_col=index_col) if self.utils.verifyDFMatch(actual=actual_tmp, expected=expected_tmp) is None: self.log.info("Verified the DF Matches for {}() function".format( fnc).upper()) else: self.log.error( "DF match failed for {}() function".format(fnc).upper()) def validate_sqrt(self, file_path, col_name, delimiter=',', new_col_name=''): data_frames = self.__initiate_vars(file_path, delimiter) if new_col_name is None: sqrt_rzt = data_frames[1].sqrt(col_name) sqrt_pd = data_frames[0][col_name]**0.5 else: sqrt_rzt = data_frames[1].sqrt(col_name, new_col_name) sqrt_pd = data_frames[0] sqrt_pd[new_col_name] = sqrt_pd[col_name]**0.5 self.compareDataFrame(actual=sqrt_rzt, expected=sqrt_pd, fnc="sqrt") def validate_inverse(self, file_path, col_name, delimiter=',', new_col_name=''): data_frames = self.__initiate_vars(file_path, delimiter) if new_col_name is None: inverse_rzt = data_frames[1].inverse(col_name) inverse_pd = data_frames[0] inverse_pd[col_name] = 1 / inverse_pd[col_name] else: inverse_rzt = data_frames[1].inverse(col_name, new_col_name) inverse_pd = data_frames[0] inverse_pd[new_col_name] = 1 / inverse_pd[col_name] self.compareDataFrame(actual=inverse_rzt, expected=inverse_pd, fnc="inverse") def validate_power(self, file_path, col_name, exponent, delimiter=',', new_col_name=''): data_frames = self.__initiate_vars(file_path, delimiter) if new_col_name is None: power_rzt = data_frames[1].pow(col_name, exponent=exponent) power_pd = data_frames[0] power_pd[col_name] = power_pd[col_name]**exponent else: power_rzt = data_frames[1].pow(col_name, new_col_name, exponent=exponent) power_pd = data_frames[0] power_pd[new_col_name] = power_pd[col_name]**exponent self.compareDataFrame(actual=power_rzt, expected=power_pd, fnc="power") def validate_log2(self, file_path, col_name, delimiter=',', new_col_name=''): data_frames = self.__initiate_vars(file_path, delimiter) if new_col_name is None: log_rzt = data_frames[1].log2(col_name) log_pd = data_frames[0] log_pd[col_name] = np.log2(log_pd[col_name]) else: log_rzt = data_frames[1].log2(col_name, new_col_name) log_pd = data_frames[0] log_pd[new_col_name] = np.log2(log_pd[col_name]) self.compareDataFrame(actual=log_rzt, expected=log_pd, fnc="log2") def validate_log10(self, file_path, col_name, delimiter=',', new_col_name=''): data_frames = self.__initiate_vars(file_path, delimiter) if new_col_name is None: log_rzt = data_frames[1].log10(col_name) log_pd = data_frames[0] log_pd[col_name] = np.log10(log_pd[col_name]) else: log_rzt = data_frames[1].log10(col_name, new_col_name) log_pd = data_frames[0] log_pd[new_col_name] = np.log10(log_pd[col_name]) self.compareDataFrame(actual=log_rzt, expected=log_pd, fnc="log10") def validate_log(self, file_path, col_name, base=10, delimiter=',', new_col_name=''): data_frames = self.__initiate_vars(file_path, delimiter) if new_col_name is None: log_rzt = data_frames[1].log(col_name, base=base) log_pd = data_frames[0] log_pd[col_name] = np.log(log_pd[col_name]) / np.log(base) else: log_rzt = data_frames[1].log(col_name, new_col_name, base=base) log_pd = data_frames[0] log_pd[new_col_name] = np.log(log_pd[col_name]) / np.log(base) self.compareDataFrame(actual=log_rzt, expected=log_pd, fnc="log")
class PageHome(BasePage): com_utils = Utils() email_value = os.environ.get("flip_username") password_value = os.environ.get("flip_password") _signIn_user_name = (By.XPATH, "(//input[@type='text'])[last()]") _signIn_password = (By.CSS_SELECTOR, "input[type='password']") _signIn_login = (By.XPATH, "(//button[@type='submit'])[last()]") _signIn_userName_val= '//div[contains(text(),"{0}")]' username_elem = None _search_results_index_path = None _signIn_searchBox = (By.CSS_SELECTOR, "input[type='text'][name='q']") _search_icon = (By.CSS_SELECTOR, "button[type='submit']") _results_breadcrumb = "//div[@class='bhgxx2']//following::span[contains(text(),'{}')]" _search_results_index = "(//div[contains(@class,'bhgxx2 col-12-12')]/div/child::div[@data-id])[{}]//following::a" _purchase_item_pdp_title = "//h1/span[contains(text(), '{}')]" def invoke_url(self, url): """ Get amazon url and wait for the page to load """ self.open(url) pageLoaded = self.page_loaded() return pageLoaded def signIn_enter_emailaddress(self): self.clearTextVals(self._signIn_user_name, "Email/Mobile Number field") self.sendKeys(self.email_value, self._signIn_user_name, "Email/Mobile Number text field") def signIn_enter_password(self): self.clearTextVals(self._signIn_password, "Password field") self.sendKeys(self.password_value, self._signIn_password, "Password text field") def signIn_click_login(self, userName_val): self.elementClick(self._signIn_login, elementName="Login button") self._signIn_userName_assert = self.com_utils.addTextValXpath(userName_val, self._signIn_userName_val) self.username_elem = self.waitForElement(self._signIn_userName_assert, elementName="User Name text") def signIn_validate_credentials(self, userName_val): self.signIn_enter_emailaddress() self.signIn_enter_password() self.signIn_click_login(userName_val) return self.username_elem def enter_value_search(self, searchValue): """ Enter values based on searchValue parameter """ self.sendKeys(searchValue, self._signIn_searchBox, elementName="Search box") self.elementClick(self._search_icon, elementName="Search icon") _search_results_breadcrumb = self.com_utils.addTextValXpath(searchValue, self._results_breadcrumb) self.waitForElement(_search_results_breadcrumb, elementName="Breadcrumb text") breadcrumb_searchVal = self.isElementPresent(locator=_search_results_breadcrumb) return breadcrumb_searchVal def select_random_item_search_results(self, searchLabel): """ Select any random item from the search results """ _search_results_breadcrumb = self.com_utils.addTextValXpath(searchLabel, self._results_breadcrumb) results_text = self.getText(_search_results_breadcrumb, info="Getting results count in page") #print("Breadcrumb for search results :: " + results_count) results_index = results_text.split(" ") #print("Results index :: " + str(results_index)) results_start_index = results_index[1] #print("Results start index :: " + str(results_start_index)) results_end_index = results_index[3] #print("Results End index :: " + str(results_end_index)) random_index = random.randint(int(results_start_index) + 1, int(results_end_index) - 1) #print("Selecting the item in index :: " + str(random_index)) self._search_results_index_path = self.com_utils.addTextValXpath(random_index, self._search_results_index) purchase_item_title = self.getText(self._search_results_index_path, info="Purchase Item Title") if purchase_item_title.endswith("..."): purchase_item_title = purchase_item_title.replace("...", "", 1) return purchase_item_title def click_item_search_results(self, purchase_item_title): self.elementClick(self._search_results_index_path, elementName="Purchase Item") time.sleep(3)