def caption(img): model_wrapper = ModelWrapper() preds = model_wrapper.predict(img) return preds
def extract_tokens(csv_file): def extract_token(sent, regex='(\|.*?\|)+'): group = re.findall(regex, sent) tokens = [token[1:-1] for token in group] return tokens token_docs = [] with open(csv_file) as csvfile: csv_reader = csv.reader(csvfile, delimiter=',') for row in csv_reader: token_doc = [] for idx in range(len(row)): sent = row[idx] tokens = extract_token(sent) token_doc.append(tokens) token_docs.append(token_doc) return token_docs tokenlist = extract_tokens('en-50k-200.json_tokensOR.csv') each_doct = [] for j in tokenlist: each_doct.append(j) entities, total_inftime = model_wrapper.predict(each_doct) model_pred = {'tags': entities, 'total_inftime': total_inftime} print('throughput:', total_char / total_inftime)