def __init__( self, clientId ): isInt( clientId ) self.prompts = [] self.clientId = clientId # currently unused, satisfies ClientTracker signature # temp default, should really broadcast PROMPT_CREATED self.pushPrompt( lambda clientId: "\n{!{FB<client {FG%s{FB> " % clientId )
def __init__(self, clientId): isInt(clientId) self.prompts = [] self.clientId = clientId # currently unused, satisfies ClientTracker signature # temp default, should really broadcast PROMPT_CREATED self.pushPrompt( lambda clientId: "\n{!{FB<client {FG%s{FB> " % clientId)
def reverse(info=True): pInfo("Write 0 in any input to exit the current tool.") path = input( "Copy in current folder o other folder?(type . for current or type folder) " ) if path == '.': path = os.getcwd() elif not os.path.isdir(path): pError(f'Not a valid path.') return ip = input("Enter host ip: ") if not util.isIpv4(ip): pError(f'Not a valid ip.') return port = input("Select port number?(Enter for default [1969]) ") if port == '': code = copyReverse(ip, path=path) elif util.isInt(port): port = int(port) if not (1 <= port <= 65535): pError(f'Not in valid port range [1.65535].') return code = copyReverse(ip, port, path) else: pError(f'Not a valid number.') return if not info: return code pGood("The reverse shell has been created and enconded.") pInfo(f'Console command to execute shell: \n') pTest(code)
def findProducts(labels, words): products = [] product_name = '' product = {} for i, v in enumerate(words): if labels[i] == 'product_amount': product['amount'] = v if labels[i] == 'product_name': product_name += v.lower() + ' ' elif product_name != '': product['name'] = product_name[:-1] for j in range(i, len(words)): if labels[j] == 'product_amount': if util.isInt(v): product['amount'] = int(v) else: product['amount'] = v if 'price' in product: break if labels[j] == 'product_price': product['price'] = words[j] if 'amount' in product: break elif labels[j] == 'product_name': break if not 'amount' in product: product['amount'] = 1 products.append(product) product = {} product_name = '' return products
def popPrompt(self, n=1): assert isInt(n) assert n > 0 try: while n > 0: self.prompts.pop() n -= 1 except IndexError: pass
def getZoneTemplate( templateId ): assert isInt( templateId ) session = getSession() zone = session.query(ZoneTemplate).filter(ZoneTemplate.id == templateId ).first() session.close() return zone
def popPrompt(self, n = 1): assert isInt( n ) assert n > 0 try: while n > 0: self.prompts.pop() n -= 1 except IndexError: pass
def getMobTemplate( templateId ): assert isInt( templateId ) session = getSession() mob = session.query(MobTemplate).filter(MobTemplate.id == templateId ).first() session.close() return mob
def getOneLine( clientId, submitCallback ): """ activates a text input widget for clientId, which returns one line of text submitCallback: func( clientId, text ) """ assert isInt( clientId ) assert isFunc( submitCallback) pushCmdHandler( clientId, CmdMap( lambda x, remaining: _submitText( x, submitCallback, remaining ) ) )
def getZoneTemplate(templateId): assert isInt(templateId) session = getSession() zone = session.query(ZoneTemplate).filter( ZoneTemplate.id == templateId).first() session.close() return zone
def _dispatch(self,commandName,*args): rs=[] commands=commandName.split(',') for command in commands: parts=command.split('-') if len(parts)==1: if not util.isInt(parts[0]): raise CommandValidateException('Input wrong format, input like this - %s' % self.format) rs.append(int(parts[0])) if len(parts)==2: if not util.isInt(parts[1]): raise CommandValidateException('Input wrong format, input like this - %s' % self.format) rs.extend(range(int(parts[0]),int(parts[1])+1)) if len(rs)>1 and not self._multiple: raise CommandValidateException('Only one chose allowed.') options=self._optionsCreateFun() for c in rs: if not c in range(0,len(options)): raise CommandValidateException("'%s' is not in options." % c) result=[] for r in rs: result.append(options[r]) self._context.result=result raise ExitRequest('')
def findProductAmounts(textResult, labelsResult, reciept): products = reciept.groundTruth['products'] productName = '' lineText = reciept.linesText prices = [] foundRange = [] for index, (text, label) in enumerate(zip(textResult, labelsResult)): if label == 'product_name': productName+= text + ' ' elif productName != '': productName = productName[:-1] ### Find product Amount potentialAmounts = [p['amount'] for p in reciept.groundTruth['products'] if (fuzz.ratio(p['name'], productName) > 75 and 'amount' in p)] if sum(potentialAmounts) <= len(potentialAmounts): continue l = len(productName) for i in range(len(lineText) - l): sub = lineText[i:i+l] if sub == productName and (i, i+l) not in foundRange: foundRange.append((i, i + l)) amount = None rest = '' lines = 0 for j in range(i - 10, len(lineText)): if lines >= 3: break if lineText[j] == '\n': lines +=1 restWords = rest.replace('\n', '').split(' ') for word in restWords: if util.isInt(word) and int(word) in potentialAmounts: amount = word if not amount: continue for s in range(index - 10, len(textResult)): if textResult[s] == amount: labelsResult[s] = 'product_amount' break rest = '' if amount: break rest += lineText[j] break productName = '' return textResult, labelsResult
def __init__(self, root_path, filename, devices=None): # get the list of subdirectories from the under the root path self.devices = dict() #subdirs = listdirs(root_path) #print subdirs for file in listdir(root_path): if filename[2:] in file and isInt(file[0:2]): f = join(root_path, file) print f oid = file[0:2] if devices is None or int(oid) in devices: o = OrientSD(f) id = o.ID if id is not None: self.devices[id] = o self.IDs = self.devices.keys()
def findTotalPrice(labels, words): pot_price = [] for i in range(len(labels)): if labels[i] == 'total_price': pot_price.append(words[i]) price = '' for p in pot_price: if price == '': price += p elif price != '' and p == '.' and '.' not in price: price += p elif '.' in price and util.isInt(p) and int(p) < 100: price += p break if len(price) > 0 and price[-1] == '.': price = price[:-1] try: price = float(price) return price except: return None
def removeClient( self, clientId ): assert isInt( clientId ) assert clientId in self.clients del self.clients[ clientId ]
def addClient( self, clientId ): assert isInt( clientId ) assert clientId not in self.clients self.clients[ clientId ] = self.addFunc( clientId )
def isClient( self, clientId ): assert isInt( clientId ) return clientId in self.clients
def addClient(self, clientId): assert isInt(clientId) assert clientId not in self.clients self.clients[clientId] = self.addFunc(clientId)
def __init__( self, ID ): isInt( ID ) self.ID = ID
def main(args): fileNames = os.listdir(trainTextDir) fileNames = [i for i in fileNames if (i.endswith('.json'))] for fileName in fileNames: with open(os.path.join(trainTextDir, fileName)) as text_json: text_data = json.load(text_json) text_data = tx.filterGarbage(text_data) tx.calculateAngles(text_data) tx.calculateCenterPoints(text_data) text_lines = tx.createLines(text_data) with open( os.path.join(trainLabelsDir, fileName.split('_')[0] + '_labels.json')) as ground_truth_json: truth = json.load(ground_truth_json) truth = tx.removeSwedishLetters(truth) receipt = rc.Receipt(fileName, text_lines, truth) receipts.append(receipt) f = open('./data/test/test.txt', "r") for line in f: testFilePaths.append(line[:-1]) test_reciepts = [] for receipt in receipts: if receipt.path in testFilePaths: test_reciepts.append(receipt) if args[1] == 'plot_bert': d1 = pd.DataFrame( { 'train synthetic 10000': plot.train_10000_v2, 'validation synthetic 10000': plot.val_10000_v2 }, index=range(1, 31)) d2 = pd.DataFrame( { 'train synthetic 1000': plot.train_1000, 'validation synthetic 1000': plot.val_1000 }, index=range(1, 31)) d3 = pd.DataFrame( { 'train real data': plot.train, 'validation real data': plot.val }, index=range(1, 31)) data = pd.concat([d1, d2, d3], axis=1) sns.set_style("darkgrid") ax = sns.lineplot(data=data) ax.set(xlabel='epoch', ylabel='loss') plt.show() if args[1] == 'plot_lstm': f1 = open('/Users/markolazic/Desktop/sroie-task3/data/trainLoss.txt', 'r') f2 = open('/Users/markolazic/Desktop/sroie-task3/data/valLoss.txt', 'r') f3 = open( '/Users/markolazic/Desktop/sroie-task3/data/trainLoss1000.txt', 'r') f4 = open('/Users/markolazic/Desktop/sroie-task3/data/valLoss1000.txt', 'r') f5 = open( '/Users/markolazic/Desktop/sroie-task3/data/trainLoss10000.txt', 'r') f6 = open( '/Users/markolazic/Desktop/sroie-task3/data/valLoss10000.txt', 'r') f1Lines = f1.readlines() f2Lines = f2.readlines() f3Lines = f3.readlines() f4Lines = f4.readlines() f5Lines = f5.readlines() f6Lines = f6.readlines() train_loss = [] for line in f1Lines: train_loss.append(float(line[:-1])) val_loss = [] for line in f2Lines: val_loss.append(float(line[:-1])) train_loss1000 = [] for line in f3Lines: train_loss1000.append(float(line[:-1])) val_loss1000 = [] for line in f4Lines: val_loss1000.append(float(line[:-1])) train_loss10000 = [] for line in f5Lines: train_loss10000.append(float(line[:-1])) val_loss10000 = [] for line in f6Lines: val_loss10000.append(float(line[:-1])) d1 = pd.DataFrame( { 'train synthetic 10000': train_loss10000, 'validation synthetic 10000': val_loss10000 }, index=range(1, 2001)) d2 = pd.DataFrame( { 'train synthetic 1000': train_loss1000, 'validation synthetic 1000': val_loss1000 }, index=range(1, 2001)) d3 = pd.DataFrame( { 'train real data': train_loss, 'validation real data': val_loss }, index=range(1, 2001)) data = pd.concat([d1, d2, d3], axis=1) data = data.rolling(100).mean() sns.set_style("darkgrid") ax = sns.lineplot(data=data) ax.set(xlabel='epoch', ylabel='loss') plt.show() if args[1] == 'create_data_statistics': stats = util.create_data_statistics(receipts, 'vendor') for k, v in sorted(stats.items(), reverse=True, key=lambda item: item[1]): print(k, '---', v) if args[1] == 'generate_gcn_data': test_data_dict = {} train_data_dict = {} for i, receipt in enumerate(receipts): if receipt.path in testFilePaths: test_data_dict[i] = data_gen.generateWordClasses(receipt) else: train_data_dict[i] = data_gen.generateWordClasses( receipt, correcting=False) gcn.create(receipts, testFilePaths) if args[1] == 'create_result': path = './data/results/10000_synt' test_dict_path = os.path.join(path, 'res_dict.pth') res_dict = torch.load(test_dict_path) result = list(res_dict.items()) res_list = [] for i, (_, (labels, words)) in enumerate(result): res = extract(labels, words) res_list.append(res) calculateMetrics(test_reciepts, res_list, writeToFile=True, path=path) if args[1] == 'generate_word_data': generateSynthetic = False if args[2] and util.isInt(args[2]): generateSynthetic = True number = int(args[2]) train_data_dict = {} test_data_dict = {} for i, receipt in enumerate(receipts): if receipt.path in testFilePaths: test_data_dict[i] = data_gen.generateWordClasses(receipt) else: train_data_dict[i] = data_gen.generateWordClasses( receipt, correcting=False) train_receipts = [] for r in receipts: if r.path not in testFilePaths: train_receipts.append(r) if generateSynthetic: synthetic = generateSintheticData(train_receipts, number) for i, (words, labels) in enumerate(synthetic): train_data_dict[i + len(receipts)] = (words, labels) ''' vocab = data_gen.createVocabulary(receipts + synthetic) f=open('./data/synt_vocab.txt',"w+") for w in vocab: f.write(w + '\n') f.write('[UNK]' + '\n') f.write('[CLS]' + '\n') f.write('[SEP]' + '\n') f.write('[MASK]' + '\n') f.close() ''' torch.save(train_data_dict, "./data/synt_10000_train_data_dict.pth") torch.save(test_data_dict, "./data/synt_test_data_dict.pth") if args[1] == 'oracle': for i, receipt in enumerate(test_reciepts): _ = data_gen.generateWordClasses(receipt) oracle(test_reciepts) if args[1] == 'rule_based': for receipt in test_reciepts: predict(receipt) calculateRuleBasedAccuracy(test_reciepts) if args[1] == 'create_lstm_result': result_jsons = os.listdir(lstmResultDir) result_jsons = [i for i in result_jsons if (i.endswith('.json'))] result_jsons.sort(key=lambda r: int(r.split('.')[0])) results_dicts = [] for fileName in result_jsons: with open(os.path.join(lstmResultDir, fileName)) as text_json: text_data = json.load(text_json) results_dicts += [text_data] calculateLSTMaccuracy(test_reciepts, results_dicts) elif args[1] == 'create_char_data': generateSynthetic = True number = 10000 train_data_dict = {} test_data_dict = {} for i, receipt in enumerate(receipts): if receipt.path in testFilePaths: test_data_dict[i] = data_gen.generateCharClasses( receipt, includeProducts=True) else: train_data_dict[i] = data_gen.generateCharClasses( receipt, includeProducts=True) if generateSynthetic: VOCAB = ascii_uppercase + digits + punctuation + " \t\n" for r in receipts: data_gen.generateWordClasses(r) synthetic = generateSintheticData(receipts, number) for i, (words, labels) in enumerate(synthetic): t_new_words = '' t_new_labels = [] for w, l in zip(words, labels): t_new_words += w.upper() + ' ' t_new_labels += [ util.getClassInt(l) for i in range(len(w)) ] + [0] new_words = '' new_labels = [] for index in range(len(t_new_words)): if t_new_words[index] in VOCAB: new_words += t_new_words[index] new_labels.append(t_new_labels[index]) new_words = new_words[0:-1] new_labels = new_labels[0:-1] for i in range(1, len(new_words) - 1): if new_labels[i] == 0 and new_labels[i - 1] == new_labels[i + 1]: new_labels[i] = new_labels[i - 1] train_data_dict[len(receipts) + i] = (new_words, new_labels) print(train_data_dict) torch.save( train_data_dict, "/Users/markolazic/Desktop/sroie-task3/data/train_char_data_prod_synt10000.pth" ) torch.save( test_data_dict, "/Users/markolazic/Desktop/sroie-task3/data/test_char_data_prod_synt10000.pth" )
def removeClient(self, clientId): assert isInt(clientId) assert clientId in self.clients del self.clients[clientId]
def isClient( clientId ): isInt( clientId ) assert clientId in clients, "core.client.isClient received a clientId that matches no clients (%s)" % clientId
def isClient(self, clientId): assert isInt(clientId) return clientId in self.clients
def calculateLSTMaccuracy(receipts, results): total_price_total = 0 total_price_found = 0 total_price_correct = 0 currency_total = 0 currency_found = 0 currency_correct = 0 date_total = 0 date_found = 0 date_correct = 0 vendor_total = 0 vendor_found = 0 vendor_correct = 0 tax_rate_total = 0 tax_rate_found = 0 tax_rate_correct = 0 address_total = 0 address_found = 0 address_correct = 0 products_total = 0 products_found = 0 products_correct = 0 count = 0 for i, receipt in enumerate(receipts): corr = True ## Check total price if 'total_price' in results[i]: price = results[i]['total_price'].replace(',','.') to_remove = [] for p in price: if util.isInt(p) or p == '.': continue to_remove.append(p) for p in to_remove: price = price.replace(p, '') if price.count('.') == 2: index = price.index('.') price = price[0 : index : ] + price[index + 1 : :] elif price.count('.') == 1 and len(price.split('.')[-1]) > 2: price = price.replace('.', '') else: price = None if price and price != '': total_price_found+=1 if 'total_price' in receipt.groundTruth: total_price_total+= 1 if compare.totalPrice(receipt.groundTruth['total_price'], price): total_price_correct += 1 else: corr = False ## Check currecy if 'currency' in results[i]: currency = results[i]['currency'] to_remove = [] for c in currency: if c.isalpha(): continue to_remove.append(c) for c in to_remove: currency = currency.replace(c, '') else: currency = None if currency and currency != '': currency_found+=1 if 'currency' in receipt.groundTruth: currency_total+=1 if compare.currency(receipt.groundTruth['currency'], currency): currency_correct += 1 else: corr = False ## Check date if 'date' in results[i]: date = results[i]['date'] split = date.split(' ') if len(split) == 2: date = split[0] if len(split[1]) > len(split[0]): date = split[1] else: date = None if date and date != '': date_found+=1 if 'date' in receipt.groundTruth: date_total+=1 if compare.date(receipt.groundTruth['date'],date): date_correct += 1 else: corr = False ## Check vendor if 'vendor' in results[i]: vendor = results[i]['vendor'] else: vendor = None if vendor and vendor != '': vendor_found +=1 if 'vendor' in receipt.groundTruth: vendor_total+=1 if compare.vendor(receipt.groundTruth['vendor'], vendor): vendor_correct += 1 else: corr = False ## Check tax rate if 'tax_rate' in results[i]: tax = results[i]['tax_rate'] split = tax.split(' ') if len(split) == 2: tax = split[0] else: tax = None if tax and tax != '': tax_rate_found+=1 if 'tax_rate' in receipt.groundTruth: tax_rate_total+=1 if compare.taxRate(receipt.groundTruth['tax_rate'], tax): tax_rate_correct += 1 else: corr = False ## Check address if 'address' in results[i]: address = results[i]['address'] else: address = None if address and address != '': address_found += 1 if 'address' in receipt.groundTruth: address_total+=1 if compare.address(receipt.groundTruth['address'], address): address_correct += 1 else: corr = False if 'products' in receipt.groundTruth: products_total += len(receipt.groundTruth['products']) if 'products' in results[i]: products = results[i]['products'] found = [] for product in products: product['amount'] = 1 products_found += 1 if not 'name' in product: continue if 'products' in receipt.groundTruth: real_products = receipt.groundTruth['products'] for j,real_product in enumerate(real_products): if j in found: continue if compare.products(product, real_product): found.append(j) products_correct += 1 break totalDataPoints = vendor_total + date_total + address_total + tax_rate_total + total_price_total + currency_total + products_total totalDataPointsFound = vendor_found + date_found + address_found + tax_rate_found + total_price_found + currency_found + products_found totalCorrect = vendor_correct + date_correct + address_correct + tax_rate_correct + total_price_correct + currency_correct + products_correct total_precision = 0 total_recall = 0 print('-----TOTAL CORRECT RECEIPTS-----') print(count, 'of', len(receipts)) print('-----VENDORS-----') print(vendor_total, vendor_found, vendor_correct) precision = util.precision(vendor_correct, vendor_found) recall = util.recall(vendor_total, vendor_correct) total_precision += precision total_recall += recall print('Precision:', precision) print('Recall:', recall) print('F1:', util.fScore(precision, recall)) print('-----DATES-----') print(date_total, date_found, date_correct) precision = util.precision(date_correct, date_found) recall = util.recall(date_total, date_correct) total_precision += precision total_recall += recall print('Precision:', precision) print('Recall:', recall) print('F1:', util.fScore(precision, recall)) print('-----ADDRESSES-----') print(address_total, address_found, address_correct) precision = util.precision(address_correct, address_found) recall = util.recall(address_total, address_correct) total_precision += precision total_recall += recall print('Precision:', precision) print('Recall:', recall) print('F1:', util.fScore(precision, recall)) print('-----TAX RATES-----') print(tax_rate_total, tax_rate_found, tax_rate_correct) precision = util.precision(tax_rate_correct, tax_rate_found) recall = util.recall(tax_rate_total, tax_rate_correct) total_precision += precision total_recall += recall print('Precision:', precision) print('Recall:', recall) print('F1:', util.fScore(precision, recall)) print('-----PRICE-----') print(total_price_total, total_price_found, total_price_correct) precision = util.precision(total_price_correct, total_price_found) recall = util.recall(total_price_total, total_price_correct) total_precision += precision total_recall += recall print('Precision:', precision) print('Recall:', recall) print('F1:', util.fScore(precision, recall)) print('-----CURRENCY-----') print(currency_total, currency_found, currency_correct) precision = util.precision(currency_correct, currency_found) recall = util.recall(currency_total, currency_correct) total_precision += precision total_recall += recall print('Precision:', precision) print('Recall:', recall) print('F1:', util.fScore(precision, recall)) print('-----PRODUCTS-----') print(products_total, products_found, products_correct) precision = util.precision(products_correct, products_found) recall = util.recall(products_total, products_correct) total_precision += precision total_recall += recall print('Precision:', precision) print('Recall:', recall) print('F1:', util.fScore(precision, recall)) print('-----MICRO AVG-----') print(totalDataPoints, totalDataPointsFound, totalCorrect) precision = util.precision(totalCorrect, totalDataPointsFound) recall = util.recall(totalDataPoints, totalCorrect) total_precision += precision total_recall += recall print('Precision:', precision) print('Recall:', recall) print('F1:', util.fScore(precision, recall)) print('-----MACRO AVG-----') print(totalDataPoints, totalDataPointsFound, totalCorrect) precision = total_precision / 7.0 recall = total_recall / 7.0 print('Precision:', precision) print('Recall:', recall) print('F1:', util.fScore(precision, recall))
def clientConnectedToServer( clientId ): isInt( clientId ) assert not clientId in clients, "client.newClient received a duplicate clientId (%s)" % clientId c = Client( clientId ) clients[ clientId ] = c dispatcher.send( signals.CONNECTED, clientConnectedToServer, clientId )