def __init__(self, word_vectors, companies, styles, materials, items, probase_brands, probase_materials, patterns, top_category_items, deep_detectStartup, confFilePath, tfidf): self.conf = json.load(open(confFilePath)) self.tfidf = tfidf self.api_key = self.conf["google_api_key_path"] self.deep_detect_models = self.conf["deep_detect_models"] self.CAPTION_FACTOR = self.conf["caption_factor"] self.COMMENTS_FACTOR = self.conf["comments_factor"] self.USERTAG_FACTOR = self.conf["usertag_factor"] self.HASHTAG_FACTOR = self.conf["hashtag_factor"] if deep_detectStartup: self.dd = DD(self.conf["deep_detect_host"], port=self.conf["deep_detect_port"]) self.startup_deep_detect() self.wordvec_model = gensim.models.KeyedVectors.load_word2vec_format( word_vectors, binary=False) self.companies = companies self.styles = styles self.materials = materials self.items = items self.brands_keywords_google = [] self.materials_keywords_google = [] self.probase_brands = probase_brands self.probase_materials = probase_materials self.colors = [] self.patterns = patterns self.top_category_items = top_category_items self.lemmatize()
def predict(service, chart, image_filenames): # setting up DD client host = 'localhost' sname = config['REPO'][service]['NAME'] dd = DD(host) dd.set_return_format(dd.RETURN_PYTHON) parameters_input = {} parameters_mllib = {} parameters_output = { "best": 10, "template": "{{#body}}{{#predictions}} " "{ \"index\": {\"_index\": \"objects-10\", \"_type\": \"img\" } }\n " "{ \"uri\": \"{{uri}}\", " "\"chart\": \"" + chart + "\", " # "\"artist\": \"" + artist + "\", " "\"categories\": [ {{#classes}} " "{ \"category\": \"{{cat}}\", " "\"score\":{{prob}} } " "{{^last}},{{/last}}{{/classes}} ] }\n " "{{/predictions}}{{/body}} \n", "network": { "url": "host.docker.internal:9200/objects-10/_bulk", "http_method": "POST" } } predict = dd.post_predict(sname, image_filenames, parameters_input, parameters_mllib, parameters_output)
def __init__(self, dnnmodel, image_files, index_repo, batch_size=32, dd_host='localhost', dd_port=8080, dd_description='image classification', meta_in='', meta_out='', captions_in='', captions_out='', mapi_in='', mapi_out=''): self.dd_host = dd_host self.dd_port = dd_port self.dd_description = dd_description self.dd_mllib = 'caffe' self.meta_in = meta_in self.meta_out = meta_out self.captions_in = captions_in self.captions_out = captions_out self.mapi_in = mapi_in self.mapi_out = mapi_out self.gpuid = 0 self.dnnmodel = dnnmodel if self.dnnmodel.extract_layer: self.dd_mltype = 'unsupervised' else: self.dd_mltype = 'supervised' self.image_files = image_files self.batch_size = batch_size self.binarized = False self.dd = DD(self.dd_host, self.dd_port) self.dd.set_return_format(self.dd.RETURN_PYTHON) self.index_repo = index_repo + '/' + self.dnnmodel.name try: os.mkdir(self.index_repo) except: #logger.warning('directory ' + self.index_repo + ' may alreay exist') pass self.st = {} # shelve used for full tags storage self.stm = {} # in memory tmp storage if self.dd_mltype == 'supervised': self.st = shelve.open(self.index_repo + '/tags.bin') self.delete_dd_service()
def __init__(self,structure,logger,config): """ Instanciate a model trainer :param dic structure: Model Trainer specific settings eg: {"model-repo":"../models/mymodel","training-repo":"../training/mytraining","sname":"MyTrainer","test_split":0.01,"base-lr":0.01,"clevel":False,"sequence":140,"iterations":50000,"test_interval":1000,"stepsize":15000,"destroy":True,"resume":False,"finetune":False,"weights":"","nclasses":2,"documents":True,"batch-size":128,"test-batch-size":16,"gpuid":0,"mllib":"xgboost","lregression":False} *model-repo* location of the model *training-repo* location of the training files *sname* service name *test_plit* training split between 0 and < 1,type=float,default=0.01 *base_lr* initial learning rate,default=0.01,type=float *clevel* character-level convolutional net,type=boolean *sequence* sequence length for character level models,default=140,type=int *iterations* number of iterations,default=50000,type=int *test_interval* test interval',default=1000,type=int *stepsize* lr policy stepsize',default=15000,type=int *destroy* whether to destroy model',type=boolean *resume* whether to resume training,type=boolean *finetune* whether to finetune,type=boolean *weights* pre-trained weight file, when finetuning *nclasses* number of classes,type=int,default=2 *documents* whether to train from text documents (as opposed to sentences in one doc),type=boolean *batch_size* batch size,type=int,default=128 *test_batch_size* test batch size,type=int,default=16 *gpu* enable gpu usage is True, default=False *gpuid* specify gpu id,type=int,default=0 *mllib* caffe or xgboost,default='caffe' *lregression* whether to use logistic regression,type=boolean :param obj logger: DFM logger object :param obj storage: DFM storage object :param obj config: DFM global config object :returns: ModelTrainer object (instance of a modeltrainer class) """ self.config=config self.structure=structure self.logger=logger self.nclasses = self.structure['nclasses'] self.description = 'classifier' self.sname=self.structure['sname'] self.mllib = self.structure['mllib'] self.dd = DD(config['DEEP_DETECT_URI'],config['DEEP_DETECT_PORT']) self.dd.set_return_format(self.dd.RETURN_PYTHON)
def segment(image, nclasses=150, port=8080, host="localhost"): random.seed(134124) model_dir = '/home/model' sname = 'segserv' description = 'image segmentation' mllib = 'caffe' mltype = 'unsupervised' dd = DD(host, port) dd.set_return_format(dd.RETURN_PYTHON) def random_color(): ''' generate rgb using a list comprehension ''' r, g, b = [random.randint(0, 255) for i in range(3)] return [r, g, b] raw_img = plt.imread("/home/ubuntu/model/" + image).astype("float32") / 255 width, height = raw_img.shape[:2] #width = 480 #height = 480 # creating ML service model_repo = model_dir if not model_repo: model_repo = os.getcwd() + '/model/' model = {'repository': model_repo} parameters_input = {'connector': 'image', 'width': width, 'height': height} parameters_mllib = {'nclasses': nclasses} parameters_output = {} try: servput = dd.put_service(sname, model, description, mllib, parameters_input, parameters_mllib, parameters_output, mltype) except: # most likely the service already exists pass # prediction call parameters_input = {'segmentation': True} parameters_mllib = {'gpu': True, 'gpuid': 0} parameters_output = {} data = ["/home/model/" + image] detect = dd.post_predict(sname, data, parameters_input, parameters_mllib, parameters_output) pixels = np.array((map(int, detect['body']['predictions'][0]['vals']))) imgsize = detect['body']['predictions'][0]['imgsize'] # visual output label_colours = [] for c in range(nclasses): label_colours.append(random_color()) label_colours = np.array(label_colours) r = pixels.copy() g = pixels.copy() b = pixels.copy() for l in range(0, nclasses): r[pixels == l] = label_colours[l, 0] g[pixels == l] = label_colours[l, 1] b[pixels == l] = label_colours[l, 2] r = np.reshape(r, (imgsize['height'], imgsize['width'])) g = np.reshape(g, (imgsize['height'], imgsize['width'])) b = np.reshape(b, (imgsize['height'], imgsize['width'])) rgb = np.zeros((imgsize['height'], imgsize['width'], 3)) rgb[:, :, 0] = r / 255.0 rgb[:, :, 1] = g / 255.0 rgb[:, :, 2] = b / 255.0 print(rgb[0, 0]) body_mask = np.where(rgb * 255 == np.array([47, 197, 233]), 1, 0) result = body_mask * raw_img plt.imsave("result.png", result) return result
service_dict["test_split"] = test_split service_dict["min_count"] = min_count service_dict["min_word_length"] = min_word_length service_dict["batch_size"] = batch_size service_dict["test_interval"] = test_interval services_list.append(service_dict) #Create folders for all models for service in services_list: directory = root_repository+service['service_name'] if not os.path.exists(directory): os.makedirs(directory) #Connect to DD dd = DD(dede_server) dd.set_return_format(dd.RETURN_PYTHON) #Start the creation and training of services, pulling data every 10sec service_count = 1 for service in services_list: #Get start time value to avoid duplicate runs of the same service to overlap start_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") log_file.write("service number "+str(service_count)+" of "+str(len(services_list))+"\n") log_file.flush() service_count += 1 #create the service service_name = service['service_name'] log_file.write("Starting test for "+service_name+"\n") log_file.flush() if service['template'] == 'mlp':
imgquery = cv2.imread(imgfile) r = width / imgquery.shape[1] dim = (int(width), int(imgquery.shape[0] * r)) small = cv2.resize(imgquery,dim) return small host = 'localhost' sname = 'imageserv' description = 'image classification' mllib = 'caffe' mltype = 'supervised' extract_layer = 'rois' nclasses = args.nclasses layer_size = 512 # auto anyways width = height = 300 dd = DD(host) dd.set_return_format(dd.RETURN_PYTHON) ntrees = 1000 metric = 'angular' # or 'euclidean' # creating ML service model_repo = os.getcwd() + '/' + args.model_dir model = {'repository':model_repo,'templates':'../templates/caffe/'} parameters_input = {'connector':'image','width':width,'height':height} parameters_mllib = {'nclasses':nclasses} parameters_output = {} try: dd.put_service(sname,model,description,mllib, parameters_input,parameters_mllib,parameters_output,mltype) except: pass
parser.add_argument('--max-batch-size', help='max batch size to be tested', type=int, default=256) parser.add_argument('--list-bench-files', help='file holding the list of bench files', default='list_bench_files.txt') parser.add_argument('--npasses', help='number of passes for every batch size', type=int, default=5) args = parser.parse_args() host = args.host port = args.port dd = DD(host, port) dd.set_return_format(dd.RETURN_PYTHON) list_bench_files = [] with open(args.list_bench_files) as f: for l in f: list_bench_files.append(args.remote_bench_data_dir + '/' + l.rstrip()) init_batch_size = 1 batch_sizes = [] l = init_batch_size while l <= args.max_batch_size: batch_sizes.append(l) if l < 32: l = l * 2 else: l += 16
class InformationExtractor(object): """ Module with functions for information Extraction """ wordnet_lemmatizer = WordNetLemmatizer() #External service URLs google_service_url = 'https://kgsearch.googleapis.com/v1/entities:search' probase_service_url = "https://concept.research.microsoft.com/api/Concept/ScoreByProb" #DD constants height = width = 224 nclasses_clothing = 304 nclasses_bags = 37 nclasses_footwear = 51 nclasses_fabric = 233 #setting up DD client mllib = 'caffe' def __init__(self, word_vectors, companies, styles, materials, items, probase_brands, probase_materials, patterns, top_category_items, deep_detectStartup, confFilePath, tfidf): self.conf = json.load(open(confFilePath)) self.tfidf = tfidf self.api_key = self.conf["google_api_key_path"] self.deep_detect_models = self.conf["deep_detect_models"] self.CAPTION_FACTOR = self.conf["caption_factor"] self.COMMENTS_FACTOR = self.conf["comments_factor"] self.USERTAG_FACTOR = self.conf["usertag_factor"] self.HASHTAG_FACTOR = self.conf["hashtag_factor"] if deep_detectStartup: self.dd = DD(self.conf["deep_detect_host"], port=self.conf["deep_detect_port"]) self.startup_deep_detect() self.wordvec_model = gensim.models.KeyedVectors.load_word2vec_format( word_vectors, binary=False) self.companies = companies self.styles = styles self.materials = materials self.items = items self.brands_keywords_google = [] self.materials_keywords_google = [] self.probase_brands = probase_brands self.probase_materials = probase_materials self.colors = [] self.patterns = patterns self.top_category_items = top_category_items self.lemmatize() def lemmatize(self): """ Lemmatize domain lists""" self.styles_lemmas = { self.wordnet_lemmatizer.lemmatize(style): style for style in self.styles } self.materials_lemmas = { self.wordnet_lemmatizer.lemmatize(material): material for material in self.materials } self.items_lemmas = { self.wordnet_lemmatizer.lemmatize(item): item for item in self.items } def find_closest_semantic(self, caption, comments, tags, hashtags, segmented_hashtags, num, topic, id): """ Finds num semantically closest candidates for a given topic""" topic = map(lambda x: x.decode('utf-8', 'ignore').encode("utf-8"), topic) freq_scores = {} for x in topic: freq_scores[x] = 0.0 for token in caption: scores = [] for x in topic: token2 = x.lower() token2Lemma = self.wordnet_lemmatizer.lemmatize(token2) similarity = self.token_similarity(token, token2, token2Lemma, self.CAPTION_FACTOR, self.tfidf[id]) scores.append((x, similarity)) top = sorted(scores, reverse=True, key=lambda x: x[1])[:num] for x in top: freq_scores[x[0]] = freq_scores[x[0]] + x[1] for token in comments: scores = [] for x in topic: token2 = x.lower() token2Lemma = self.wordnet_lemmatizer.lemmatize(token2) similarity = self.token_similarity(token, token2, token2Lemma, self.COMMENTS_FACTOR, self.tfidf[id]) scores.append((x, similarity)) top = sorted(scores, reverse=True, key=lambda x: x[1])[:num] for x in top: freq_scores[x[0]] = freq_scores[x[0]] + x[1] for token in hashtags: scores = [] for x in topic: token2 = x.lower() token2Lemma = self.wordnet_lemmatizer.lemmatize(token2) similarity = self.token_similarity(token, token2, token2Lemma, self.HASHTAG_FACTOR, self.tfidf[id]) scores.append((x, similarity)) top = sorted(scores, reverse=True, key=lambda x: x[1])[:num] for x in top: freq_scores[x[0]] = freq_scores[x[0]] + x[1] for token in segmented_hashtags: scores = [] for x in topic: token2 = x.lower() token2Lemma = self.wordnet_lemmatizer.lemmatize(token2) similarity = self.token_similarity(token, token2, token2Lemma, self.HASHTAG_FACTOR, self.tfidf[id]) scores.append((x, similarity)) top = sorted(scores, reverse=True, key=lambda x: x[1])[:num] for x in top: freq_scores[x[0]] = freq_scores[x[0]] + x[1] for token in tags: scores = [] for x in topic: token2 = x.lower() token2Lemma = self.wordnet_lemmatizer.lemmatize(token2) similarity = self.token_similarity(token, token2, token2Lemma, self.USERTAG_FACTOR, self.tfidf[id]) scores.append((x, similarity)) top = sorted(scores, reverse=True, key=lambda x: x[1])[:num] for x in top: freq_scores[x[0]] = freq_scores[x[0]] + x[1] top = sorted([(k, v) for k, v in freq_scores.iteritems()], reverse=True, key=lambda x: x[1])[:num] return top def token_similarity(self, token, token2, token2Lemma, factor, tfidf): """ Returns similarity between two tokens using cosine similarity between embeddings, edit distance and TFIDF weighting""" similarity = 0.0 if isinstance(token, str): token = token.decode("utf-8", "ignore") tokenLemma = self.wordnet_lemmatizer.lemmatize(token) if tokenLemma in self.wordvec_model.wv.vocab and token2Lemma in self.wordvec_model.wv.vocab: if edit_distance(tokenLemma, token2Lemma) == 0: factor = factor * 10 similarity = factor * math.pow( float(self.wordvec_model.wv.similarity(tokenLemma, token2Lemma)), 2) else: dist = factor * edit_distance(tokenLemma, token2Lemma) similarity = float(1) / float(1 + math.pow(dist, 2)) tfidf_score = 0.0 if token in tfidf: tfidf_score = tfidf[token] if token.encode("utf-8") in tfidf: tfidf_score = tfidf[token.encode("utf-8")] tfidf_score = max(tfidf_score, 0.0001) similarity = similarity * tfidf_score return similarity def find_closest_syntactic(self, caption, comments, tags, hashtags, segmented_hashtags, num, topic, id): """ Finds num semantically closest candidates for a given topic""" topic = map(lambda x: x.decode('utf-8', 'ignore').encode("utf-8"), topic) freq_scores = {} for x in topic: freq_scores[x] = 0.0 for token in caption: scores = [] for x in topic: token2 = x.lower() token2Lemma = self.wordnet_lemmatizer.lemmatize(token2) similarity = self.token_similarity_syntactic_only( token, token2, token2Lemma, self.CAPTION_FACTOR, self.tfidf[id]) scores.append((x, similarity)) top = sorted(scores, reverse=True, key=lambda x: x[1])[:num] for x in top: freq_scores[x[0]] = freq_scores[x[0]] + x[1] for token in comments: scores = [] for x in topic: token2 = x.lower() token2Lemma = self.wordnet_lemmatizer.lemmatize(token2) similarity = self.token_similarity_syntactic_only( token, token2, token2Lemma, self.COMMENTS_FACTOR, self.tfidf[id]) scores.append((x, similarity)) top = sorted(scores, reverse=True, key=lambda x: x[1])[:num] for x in top: freq_scores[x[0]] = freq_scores[x[0]] + x[1] for token in hashtags: scores = [] for x in topic: token2 = x.lower() token2Lemma = self.wordnet_lemmatizer.lemmatize(token2) similarity = self.token_similarity_syntactic_only( token, token2, token2Lemma, self.HASHTAG_FACTOR, self.tfidf[id]) scores.append((x, similarity)) top = sorted(scores, reverse=True, key=lambda x: x[1])[:num] for x in top: freq_scores[x[0]] = freq_scores[x[0]] + x[1] for token in segmented_hashtags: scores = [] for x in topic: token2 = x.lower() token2Lemma = self.wordnet_lemmatizer.lemmatize(token2) similarity = self.token_similarity_syntactic_only( token, token2, token2Lemma, self.HASHTAG_FACTOR, self.tfidf[id]) scores.append((x, similarity)) top = sorted(scores, reverse=True, key=lambda x: x[1])[:num] for x in top: freq_scores[x[0]] = freq_scores[x[0]] + x[1] for token in tags: scores = [] for x in topic: token2 = x.lower() token2Lemma = self.wordnet_lemmatizer.lemmatize(token2) similarity = self.token_similarity_syntactic_only( token, token2, token2Lemma, self.USERTAG_FACTOR, self.tfidf[id]) scores.append((x, similarity)) top = sorted(scores, reverse=True, key=lambda x: x[1])[:num] for x in top: freq_scores[x[0]] = freq_scores[x[0]] + x[1] top = sorted([(k, v) for k, v in freq_scores.iteritems()], reverse=True, key=lambda x: x[1])[:num] return top def token_similarity_syntactic_only(self, token, token2, token2Lemma, factor, tfidf): """ Returns similarity between two tokens using edit distance and TFIDF weighting""" tokenLemma = self.wordnet_lemmatizer.lemmatize(token) similarity = 0.0 if edit_distance(tokenLemma, token2Lemma) == 0: factor = factor * 10 dist = edit_distance(tokenLemma, token2Lemma) similarity = factor * (float(1) / float(1 + dist)) tfidf_score = 0.0 if token in tfidf: tfidf_score = tfidf[token] if token.encode("utf-8") in tfidf: tfidf_score = tfidf[token.encode("utf-8")] tfidf_score = max(tfidf_score, 0.0001) similarity = similarity * tfidf_score return similarity def lookup_google(self, params): """ Lookup in Google Search""" #curl "https://kgsearch.googleapis.com/v1/entities:search?query=bebe&key=<key>&limit=2&indent=True&types=Organization" url = self.google_service_url + '?' + urllib.urlencode(params) #result score = an indicator of how well the entity matched the request constraints. response = json.loads(urllib.urlopen(url).read()) results = [] if "itemListElement" in response: for element in response['itemListElement']: dict_result = {} if "resultScore" in element: dict_result["resultScore"] = element['resultScore'] if "result" in element: if "detailedDescription" in element["result"]: dict_result["detailedDescription"] = element["result"][ 'detailedDescription'] if "description" in element["result"]: dict_result["description"] = element["result"][ 'description'] if "url" in element["result"]: dict_result["url"] = element["result"]["url"] results.append(dict_result) return results def rank_google_result_company(self, results): """ Binary rank of google search results""" for result in results: for keyword in self.brands_keywords_google: if "detailedDescription" in result: if keyword in result["detailedDescription"]: return 1 if "description" in result: if keyword in result["description"]: return 1 return 0.0 def rank_google_result_material(self, results): """ Binary rank of google search results""" for result in results: for keyword in self.materials_keywords_google: if keyword in result[ "detailedDescription"] or keyword in result[ "description"]: return 1 return 0.0 def rank_probase_result_company(self, result): """Probase probability ranking [0,1]""" keywords = filter(lambda x: x in result, self.probase_brands) keywords = map(lambda x: result[x], keywords) if len(keywords) > 0: return 1 + max(keywords) else: return 0.5 def rank_probase_result_material(self, result): """Probase probability ranking [0,1]""" keywords = filter(lambda x: x in result, self.probase_materials) keywords = map(lambda x: result[x], keywords) if len(keywords) > 0: return 1 + max(keywords) else: return 0.5 def lookup_probase(self, params): """Probase lookup""" #curl "https://concept.research.microsoft.com/api/Concept/ScoreByProb?instance=adidas&topK=10" url = self.probase_service_url + '?' + urllib.urlencode(params) response = json.loads(urllib.urlopen(url).read()) return response def get_liketoknowitlinks(self, tokens): """ Extract liketoknowit links""" links = [] for token in tokens: match = re.search("http://liketk.it/([^\s]+)", token) if match is not None: link = match.group(0) links.append(link) return links def lda_topic_models(self, num_topics, num_iter, min_occ, docs): """ Extract LDA topic models """ cvectorizer = CountVectorizer(min_df=min_occ, stop_words="english") cvz = cvectorizer.fit_transform(docs) lda_model = lda.LDA(n_topics=num_topics, n_iter=num_iter) X_topics = lda_model.fit_transform(cvz) _lda_keys = [] for i in xrange(X_topics.shape[0]): _lda_keys.append(X_topics[i].argmax()) topic_summaries = [] topic_word = lda_model.topic_word_ # all topic words n_top_words = 5 vocab = cvectorizer.get_feature_names() for i, topic_dist in enumerate(topic_word): topic_words = np.array(vocab)[np.argsort( topic_dist)][:-(n_top_words + 1):-1] # get! topic_summaries.append(' '.join(topic_words)) return topic_summaries def get_top_num(self, coll, num): """ Extract top 10 ranked items""" top, counts = zip(*Counter(coll).most_common(num)) return list(top) def get_wikipedia_vote(self, query): """ Wikipedia lookup binary rank""" pages = wikipedia.search(query) for pageName in pages: try: page = wikipedia.page(pageName) content = page.content.lower() for keyword in self.brands_keywords_google: if keyword in content: return 1 except: return 0.0 return 0.0 def get_google_search_vote(self, query): """ Google search lookup binary rank""" try: response = GoogleSearch().search(query) for result in response.results: text = result.getText().lower() title = result.title.lower() for keyword in self.brands_keywords_google: if keyword in text or keyword in title: return 1 except: return 0 return 0 def emoji_classification(self, emojis, num): """ Emoji classification """ items = {} for item in self.items_lemmas.keys(): items[item] = 0.0 for emoji in emojis: item_matches = self.emoji_to_item(emoji) for item_m in item_matches: items[item_m] = items[item_m] + 1 top = sorted([(k, v) for k, v in items.iteritems()], reverse=True, key=lambda x: x[1])[:num] return top def emoji_to_item(self, token): """Classify item based on emojis""" if token == u"👕": return ["shirt", "top"] if token == u"👖": return ["jean", "trouser", "legging", "jogger"] if token == u"👗": return ["dress"] if token == u"👚": return ["blouse", "shirt"] if token == u"👛": ["purse", "bag", "handbag"] if token == u"👜": return ["bag", "handbag"] if token == u"👝" or token == u"🎒 ": return ["bag"] if token == u"👞": return ["shoe", "boot"] if token == u"👟": return ["trainer", "shoe", "boot"] if token == u"👠" or token == u"👡 " or token == u"👢": return ["heel", "shoe"] if token == u"👒" or token == u"🎩": return ["hat"] return [] def map_candidates_to_ontology(self, candidates): """ Map candidates from external APIs to our classes""" topic = map(lambda x: x.decode('utf-8', 'ignore').encode("utf-8"), self.top_category_items) freq_scores = {} for x in topic: parts = x.split(",") label = parts[0] freq_scores[label] = 0.0 for token in candidates: for x in topic: parts = x.split(",") label = parts[0] words = parts[1].split(" ") acc_sim = 0 scores = [] for word in words: token2 = word.lower() token2Lemma = self.wordnet_lemmatizer.lemmatize(token2) similarity = self.token_similarity(token[0], token2, token2Lemma, self.CAPTION_FACTOR) scores.append(similarity * math.pow(token[1], 2)) acc_sim = acc_sim + max(scores) freq_scores[label] = freq_scores[label] + acc_sim return freq_scores def liketkit_classification(self, url): """ Liketkit link scraping """ text = [] try: driver = webdriver.PhantomJS() driver.get(url) p_element = driver.find_element_by_class_name("ltk-products") products = p_element.find_elements_by_xpath(".//*") urls = [] for prod in products: urls.append(prod.get_attribute("href")) for url in urls: driver.get(url) html = driver.page_source soup = BeautifulSoup(html, "lxml") data = soup.findAll(text=True, recursive=True) text.extend(list(data)) return text except: print("error in liketkit classification") return text def google_vision_lookup(self, imagePath): """ Google vision API lookup """ item_candidates = [] try: # Instantiates a client client = vision.ImageAnnotatorClient() # The name of the image file to annotate file_name = os.path.join(os.path.dirname(__file__), imagePath) # Loads the image into memory with io.open(file_name, 'rb') as image_file: content = image_file.read() image = types.Image(content=content) # Performs label detection on the image file response = client.label_detection(image=image) labels = response.label_annotations for label in labels: item_candidates.append((label.description, label.score)) return item_candidates except: print("error in google_vision_LF") return item_candidates def deep_detect_lookup(self, link): """ Deep detect local lookup""" items_and_fabrics = {} items_and_fabrics["items"] = [] items_and_fabrics["fabrics"] = [] try: parameters_input = {} parameters_mllib = {} parameters_output = {'best': 10} data = [link] clothing_res = self.dd.post_predict(self.sname_clothing, data, parameters_input, parameters_mllib, parameters_output) body = clothing_res[u"body"] predictions = body[u"predictions"] classes = predictions[0][u"classes"] for c in classes: items = c[u"cat"].strip(" ").split(",") prob = c[u"prob"] for item in items: items_and_fabrics["items"].append((item, prob)) bags_res = self.dd.post_predict(self.sname_bags, data, parameters_input, parameters_mllib, parameters_output) body = bags_res[u"body"] predictions = body[u"predictions"] classes = predictions[0][u"classes"] for c in classes: items = c[u"cat"].strip(" ").split(",") prob = c[u"prob"] for item in items: items_and_fabrics["items"].append((item, 0.5 * prob)) footwear_res = self.dd.post_predict(self.sname_footwear, data, parameters_input, parameters_mllib, parameters_output) body = footwear_res[u"body"] predictions = body[u"predictions"] classes = predictions[0][u"classes"] for c in classes: items = c[u"cat"].strip(" ").split(",") prob = c[u"prob"] for item in items: items_and_fabrics["items"].append((item, 0.5 * prob)) fabric_res = self.dd.post_predict(self.sname_fabric, data, parameters_input, parameters_mllib, parameters_output) body = fabric_res[u"body"] predictions = body[u"predictions"] classes = predictions[0][u"classes"] for c in classes: items = c[u"cat"].strip(" ").split(",") prob = c[u"prob"] for item in items: items_and_fabrics["fabrics"].append((item, prob)) return items_and_fabrics except: print("error in deep_detect_LF") return items_and_fabrics def startup_deep_detect(self): """ Startup services for deep detect classification """ self.dd.set_return_format(self.dd.RETURN_PYTHON) for model in self.deep_detect_models: m = {"repository": model["path"]} parameters_input = { 'connector': 'image', 'width': self.width, 'height': self.height } parameters_mllib = {'nclasses': self.nclasses_clothing} parameters_output = {} self.dd.put_service(model["name"], model, model["description"], self.mllib, parameters_input, parameters_mllib, parameters_output) def deepomatic_lookup(self, link): """ Deepomatic API lookup """ item_candidates = [] try: client = Client(529372386976, self.conf["deepomatic_api_key"]) task = client.helper.get("/detect/fashion/?url=" + link) taskid = task[u"task_id"] i = 0 while i < 10: sleep(0.1) #100ms res = client.helper.get("/tasks/" + str(taskid) + "/") task = res[u"task"] status = task[u"status"] if status == u"success" or status == "success": data = task[u"data"] boxes = data[u"boxes"] for item in boxes.keys(): info = boxes[item] probability = 0.0 for inf in info: probability = probability + inf[u"proba"] item_candidates.append( (item.encode("utf-8"), probability)) i = 10 else: i += 1 return item_candidates except: print("error in deepomaticLF") return item_candidates def clarifai_lookup(self, link): """ Clarifai API lookup""" item_candidates = [] try: app = ClarifaiApp(api_key=self.conf["clarifai_api_key"]) model = app.models.get('apparel') image = ClImage(url=link) res = model.predict([image]) outputs = res[u"outputs"] for output in outputs: data = output[u"data"] concepts = data[u"concepts"] for concept in concepts: concept_parts = concept[u"name"].encode("utf-8").split(" ") val = concept[u"value"] for part in concept_parts: item_candidates.append((part, val)) return item_candidates except: print("error in clarifai LF") return item_candidates def find_closest_semantic_hierarchy(self, caption, comments, tags, hashtags, topic, id, num): """ Finds num semantically closest candidates for a given topic with multiple words per topic""" topic = map(lambda x: x.decode('utf-8', 'ignore').encode("utf-8"), topic) freq_scores = {} for x in topic: parts = x.split(",") label = parts[0] freq_scores[label] = 0.0 for token in caption: for x in topic: parts = x.split(",") label = parts[0] words = parts[1].split(" ") acc_sim = 0 scores = [] for word in words: token2 = word.lower() token2Lemma = self.wordnet_lemmatizer.lemmatize(token2) similarity = self.token_similarity(token, token2, token2Lemma, self.CAPTION_FACTOR, self.tfidf[id]) scores.append(similarity) acc_sim = acc_sim + max(scores) freq_scores[label] = freq_scores[label] + acc_sim for token in comments: for x in topic: parts = x.split(",") label = parts[0] words = parts[1].split(" ") acc_sim = 0 scores = [] for word in words: token2 = word.lower() token2Lemma = self.wordnet_lemmatizer.lemmatize(token2) similarity = self.token_similarity(token, token2, token2Lemma, self.COMMENTS_FACTOR, self.tfidf[id]) scores.append(similarity) acc_sim = acc_sim + max(scores) freq_scores[label] = freq_scores[label] + acc_sim for token in hashtags: for x in topic: parts = x.split(",") label = parts[0] words = parts[1].split(" ") acc_sim = 0 scores = [] for word in words: token2 = word.lower() token2Lemma = self.wordnet_lemmatizer.lemmatize(token2) similarity = self.token_similarity(token, token2, token2Lemma, self.HASHTAG_FACTOR, self.tfidf[id]) scores.append(similarity) acc_sim = acc_sim + max(scores) freq_scores[label] = freq_scores[label] + acc_sim for token in tags: for x in topic: parts = x.split(",") label = parts[0] words = parts[1].split(" ") acc_sim = 0 scores = [] for word in words: token2 = word.lower() token2Lemma = self.wordnet_lemmatizer.lemmatize(token2) similarity = self.token_similarity(token, token2, token2Lemma, self.USERTAG_FACTOR, self.tfidf[id]) scores.append(similarity) acc_sim = acc_sim + similarity acc_sim = acc_sim + max(scores) freq_scores[label] = freq_scores[label] + acc_sim top = sorted([(k, v) for k, v in freq_scores.iteritems()], reverse=True, key=lambda x: x[1])[:num] return top def find_closest_syntactic_hierarchy(self, caption, comments, tags, hashtags, topic, id, num): """ Finds num syntactically closest candidates for a given topic, with multiple words per topic""" topic = map(lambda x: x.decode('utf-8', 'ignore').encode("utf-8"), topic) freq_scores = {} for x in topic: parts = x.split(",") label = parts[0] freq_scores[label] = 0.0 for token in caption: for x in topic: parts = x.split(",") label = parts[0] words = parts[1].split(" ") acc_sim = 0 scores = [] for word in words: token2 = word.lower() token2Lemma = self.wordnet_lemmatizer.lemmatize(token2) similarity = self.token_similarity_syntactic_only( token, token2, token2Lemma, self.CAPTION_FACTOR, self.tfidf[id]) scores.append(similarity) acc_sim = acc_sim + max(scores) freq_scores[label] = freq_scores[label] + acc_sim for token in comments: for x in topic: parts = x.split(",") label = parts[0] words = parts[1].split(" ") acc_sim = 0 scores = [] for word in words: token2 = word.lower() token2Lemma = self.wordnet_lemmatizer.lemmatize(token2) similarity = self.token_similarity_syntactic_only( token, token2, token2Lemma, self.COMMENTS_FACTOR, self.tfidf[id]) scores.append(similarity) acc_sim = acc_sim + max(scores) freq_scores[label] = freq_scores[label] + acc_sim for token in hashtags: for x in topic: parts = x.split(",") label = parts[0] words = parts[1].split(" ") acc_sim = 0 scores = [] for word in words: token2 = word.lower() token2Lemma = self.wordnet_lemmatizer.lemmatize(token2) similarity = self.token_similarity_syntactic_only( token, token2, token2Lemma, self.HASHTAG_FACTOR, self.tfidf[id]) scores.append(similarity) acc_sim = acc_sim + max(scores) freq_scores[label] = freq_scores[label] + acc_sim for token in tags: for x in topic: parts = x.split(",") label = parts[0] words = parts[1].split(" ") acc_sim = 0 scores = [] for word in words: token2 = word.lower() token2Lemma = self.wordnet_lemmatizer.lemmatize(token2) similarity = self.token_similarity_syntactic_only( token, token2, token2Lemma, self.USERTAG_FACTOR, self.tfidf[id]) scores.append(similarity) acc_sim = acc_sim + similarity acc_sim = acc_sim + max(scores) freq_scores[label] = freq_scores[label] + acc_sim top = sorted([(k, v) for k, v in freq_scores.iteritems()], reverse=True, key=lambda x: x[1])[:num] return top
def __init__(self, sname, host = "127.0.0.1", port = "8080", proto = 0, api_path = "", model = None, models = [], models_dir = "/opt/platform/models/", datafiles = [], datadir = "", output_dir = "/temp/predictions/", columns = [], target_cols = [], ignored_cols = [], offset=50, gpuid = 0, autoregressive = False, sam = False, batch_size = 50, iter_size = 1, iterations = 500000, base_lr=0.001, test_interval = 5000, anomaly_params = AnomalyParameters(), display_progress = True): self.sname = sname self.models = models self.models_dir = models_dir self.datafiles = datafiles self.datadir = datadir self.output_dir = output_dir self.columns = columns self.target_cols = target_cols self.ignored_cols = ignored_cols self.offset = offset self.batch_size = batch_size self.gpuid = gpuid self.autoregressive = autoregressive self.display_progress = display_progress self.solver_params = { "iter_size": iter_size, "iterations": iterations, "base_lr": base_lr, "test_interval": test_interval, "sam": sam } """ shift: How much the target is being shifted. As models predict at different horizons, shift enable to compare the same sections of the targets even if the target is shifted by a certain number of timesteps. """ self.shift = 0 # error based anomaly detection self.anomaly_params = anomaly_params self.anomaly_params.labels = self.target_cols # dict {dataset: target} self.targs = {} # dict { dataset: {model: preds / errors}} self.preds = {} # signed error self.errors = {} self.dd = DD(host, port, proto, api_path)
parser.add_argument("--confidences",type=str,help="whether to output the confidence map, e.g. best",default='') args = parser.parse_args(); host = 'localhost' port = 8080 sname = 'segserv' description = 'image segmentation' mllib = args.mllib if mllib == 'caffe': mltype = 'unsupervised' else: mltype = 'supervised' nclasses = args.nclasses width = args.width height = args.height dd = DD(host,port) dd.set_return_format(dd.RETURN_PYTHON) def random_color(): ''' generate rgb using a list comprehension ''' r, g, b = [random.randint(0,255) for i in range(3)] return [r, g, b] # creating ML service model_repo = args.model_dir if not model_repo: model_repo = os.getcwd() + '/model/' model = {'repository':model_repo} parameters_input = {'connector':'image','width':width,'height':height} parameters_mllib = {'nclasses':nclasses,'segmentation':True,'gpu':True,'gpuid':0} parameters_output = {}
parser.add_argument("--nclasses",help="number of classes",type=int,default=150) parser.add_argument("--width",help="image width",type=int,default=480) parser.add_argument("--height",help="image height",type=int,default=480) parser.add_argument("--model-dir",help="model directory") args = parser.parse_args(); host = 'localhost' port = 8080 sname = 'segserv' description = 'image segmentation' mllib = 'caffe' mltype = 'unsupervised' nclasses = args.nclasses width = args.width height = args.height dd = DD(host,port) dd.set_return_format(dd.RETURN_PYTHON) def random_color(): ''' generate rgb using a list comprehension ''' r, g, b = [random.randint(0,255) for i in range(3)] return [r, g, b] # creating ML service model_repo = args.model_dir if not model_repo: model_repo = os.getcwd() + '/model/' model = {'repository':model_repo} parameters_input = {'connector':'image','width':width,'height':height} parameters_mllib = {'nclasses':nclasses} parameters_output = {}
import sys import json from dd_client import DD with open('config.json', 'r') as f: config = json.load(f) service = sys.argv[1] MODEL_REPO = config['REPO'][service]['PATH'] nclasses = config['REPO'][service]['CLASS_COUNT'] height = width = config['REPO'][service]['IMAGE_SIZE'] # setting up DD client host = 'localhost' sname = config['REPO'][service]['NAME'] description = config['REPO'][service]['DESCRIPTION'] mllib = config['REPO'][service]['LIBRARY'] dd = DD(host) dd.set_return_format(dd.RETURN_PYTHON) # creating ML service model = {'repository': MODEL_REPO} parameters_input = {'connector': 'image', 'width': width, 'height': width} parameters_mllib = {'nclasses': nclasses} parameters_output = {} dd.put_service(sname, model, description, mllib, parameters_input, parameters_mllib, parameters_output)
'footwear': {'backend':'caffe','nclasses':51,'width':224,'height':224,'path':base_path_caffe}, 'sports': {'backend':'caffe','nclasses':143,'width':224,'height':224,'path':base_path_caffe}, 'furnitures': {'backend':'caffe','nclasses':179,'width':224,'height':224,'path':base_path_caffe}, } parser = argparse.ArgumentParser() parser.add_argument('--host',help='AMI public IP') parser.add_argument('--model-name',help='model name, e.g. googlenet, resnet_50, age_model, gender, clothing, see https://deepdetect.com/products/ami/ for full list',default='googlenet') parser.add_argument('--info',help='simple info call to remote DeepDetect server',action='store_true') parser.add_argument('--create-service',help='whether to create service',action='store_true') parser.add_argument('--delete',help='wether to delete service',action='store_true') parser.add_argument('--img-url',help='URL of image to classify') args = parser.parse_args() host = args.host dd = DD(host,8080) dd.set_return_format(dd.RETURN_PYTHON) if not args.model_name in models_config: print('Unknown model=',args.model_name) sys.exit() model_config = models_config[args.model_name] # info call if args.info: info = dd.info() print(info) sys.exit() if args.delete:
parser.add_argument('--port',help='server port',type=int,default=8080) parser.add_argument('--sname',help='service name') parser.add_argument('--img-width',help='image width',type=int,default=224) parser.add_argument('--img-height',help='image height',type=int,default=224) parser.add_argument('--gpu',help='whether to bench GPU',action='store_true') parser.add_argument('--cpu',help='whether to bench CPU',action='store_true') parser.add_argument('--remote-bench-data-dir',help='when bench data directory, when available remotely on the server') parser.add_argument('--max-batch-size',help='max batch size to be tested',type=int,default=256) parser.add_argument('--list-bench-files',help='file holding the list of bench files',default='list_bench_files.txt') parser.add_argument('--npasses',help='number of passes for every batch size',type=int,default=5) parser.add_argument('--detection',help='whether benching a detection model',action='store_true') args = parser.parse_args() host = args.host port = args.port dd = DD(host,port) dd.set_return_format(dd.RETURN_PYTHON) list_bench_files = [] with open(args.list_bench_files) as f: for l in f: list_bench_files.append(args.remote_bench_data_dir + '/' + l.rstrip()) init_batch_size = 1 batch_sizes = [] l = init_batch_size while l <= args.max_batch_size: batch_sizes.append(l) if l < 32: l = l * 2 else: l += 16
class ModelTrainer: """ Prediction Model trainer class binary char-based model training class """ def __init__(self,structure,logger,config): """ Instanciate a model trainer :param dic structure: Model Trainer specific settings eg: {"model-repo":"../models/mymodel","training-repo":"../training/mytraining","sname":"MyTrainer","test_split":0.01,"base-lr":0.01,"clevel":False,"sequence":140,"iterations":50000,"test_interval":1000,"stepsize":15000,"destroy":True,"resume":False,"finetune":False,"weights":"","nclasses":2,"documents":True,"batch-size":128,"test-batch-size":16,"gpuid":0,"mllib":"xgboost","lregression":False} *model-repo* location of the model *training-repo* location of the training files *sname* service name *test_plit* training split between 0 and < 1,type=float,default=0.01 *base_lr* initial learning rate,default=0.01,type=float *clevel* character-level convolutional net,type=boolean *sequence* sequence length for character level models,default=140,type=int *iterations* number of iterations,default=50000,type=int *test_interval* test interval',default=1000,type=int *stepsize* lr policy stepsize',default=15000,type=int *destroy* whether to destroy model',type=boolean *resume* whether to resume training,type=boolean *finetune* whether to finetune,type=boolean *weights* pre-trained weight file, when finetuning *nclasses* number of classes,type=int,default=2 *documents* whether to train from text documents (as opposed to sentences in one doc),type=boolean *batch_size* batch size,type=int,default=128 *test_batch_size* test batch size,type=int,default=16 *gpu* enable gpu usage is True, default=False *gpuid* specify gpu id,type=int,default=0 *mllib* caffe or xgboost,default='caffe' *lregression* whether to use logistic regression,type=boolean :param obj logger: DFM logger object :param obj storage: DFM storage object :param obj config: DFM global config object :returns: ModelTrainer object (instance of a modeltrainer class) """ self.config=config self.structure=structure self.logger=logger self.nclasses = self.structure['nclasses'] self.description = 'classifier' self.sname=self.structure['sname'] self.mllib = self.structure['mllib'] self.dd = DD(config['DEEP_DETECT_URI'],config['DEEP_DETECT_PORT']) self.dd.set_return_format(self.dd.RETURN_PYTHON) def createMLTrainerService(self): """ Create ML Trainer service in DeepDetect """ if self.structure['lregression']: self.template = 'lregression' else: self.template = 'mlp' layers = [800,500,200] if self.structure['clevel']: self.template = 'convnet' self.layers = ['1CR256','1CR256','4CR256','1024','1024'] self.model = {'templates':'../templates/caffe/','repository':self.structure['model-repo']} self.parameters_input = {'connector':'txt','sentences':False,'characters':self.structure['clevel'],'read_forward':True} if self.structure['documents']: self.parameters_input['sentences'] = False if self.structure['clevel']: self.parameters_input['sequence'] = self.sequence #parameters_input['alphabet'] = 'abcdef0123456789' # hex # parameters_input['alphabet'] = '_-,:?/.(){}*%0123456789abcdefghijklmnopqrstuvwxyz' # opcode #parameters_input['alphabet'] = "abcdefghijklmnopqrstuvwxyz0123456789,;.!?'"#\"/\\|_@#$%^&*~`+-=<>" self.parameters_mllib = {'template':self.template,'nclasses':self.nclasses,'db':True,'dropout':0.5} if self.mllib == 'xgboost': self.parameters_mllib['db'] = False if not self.template == 'lregression': self.parameters_mllib['layers'] = layers #parameters_mllib = {'nclasses':nclasses,'db':True} if self.structure['finetune']: self.parameters_mllib['finetuning'] = True if not self.structure['weights']: logger.error('Finetuning requires weights file') # server will fail on service creation anyways else: self.parameters_mllib['weights'] = self.structure['weights'] self.parameters_output = {} self.logger.debug("dd.put_service("+str(self.structure['sname'])+","+str(self.model)+","+str(self.description)+","+str(self.mllib)+","+str(self.parameters_input)+","+str(self.parameters_mllib)+","+str(self.parameters_output)+")") return self.dd.put_service(self.structure['sname'],self.model,self.description,self.mllib,self.parameters_input,self.parameters_mllib,self.parameters_output) def trainModel(self): """ Train the model. """ self.train_data = [self.structure['training-repo']] self.parameters_input = {'test_split':self.structure['test_split'],'shuffle':True,'db':True} if not self.structure['clevel']: self.parameters_input['min_word_length'] = 5 self.parameters_input['min_count'] = 10 self.parameters_input['count'] = False if self.mllib == 'xgboost': self.parameters_input['tfidf'] = True self.parameters_input['db'] = False else: self.parameters_input['sentences'] = True self.parameters_input['characters'] = True self.parameters_input['sequence'] = self.sequence if self.structure['documents']: self.parameters_input['sentences'] = False if self.mllib == 'caffe': self.parameters_input['db']=True self.parameters_mllib = { 'gpu':self.structure['gpu'], 'gpuid':self.structure['gpuid'], 'resume':self.structure['resume'], 'net':{ 'batch_size':self.structure['batch_size'] }, 'solver':{ 'test_interval':self.structure['test_interval'], 'test_initialization':False, 'base_lr':self.structure['base_lr'], 'solver_type':'ADAM', 'iterations':self.structure['iterations'] } }#,'lr_policy':'step','stepsize':self.structure['stepsize'],'gamma':0.5,'weight_decay':0.0001}} elif self.mllib == 'xgboost': self.parameters_mllib = { 'iterations':self.structure['iterations'], 'objective':'multi:softprob', 'booster_params':{'max_depth':50} } self.parameters_output = {'measure':['mcll','f1','cmdiag','cmfull']} if self.nclasses == 2: self.parameters_output['measure'].append('auc') self.logger.debug("dd.post_train("+self.structure['sname']+","+str(self.train_data)+","+str(self.parameters_input)+","+str(self.parameters_mllib)+","+str(self.parameters_output)+",async="+str(True)+")") self.dd.post_train(self.structure['sname'],self.train_data,self.parameters_input,self.parameters_mllib,self.parameters_output,async=True) time.sleep(1) train_status = '' while True: train_status = self.dd.get_train(self.sname,job=1,timeout=10) if train_status['head']['status'] == 'running': self.logger.debug(train_status['body']['measure']) else: self.logger.debug(train_status) break return train_status def clearMLTrainerService(self,clear=''): """ delete the service, keeping the model :param str clear: use clear='lib' to clear the model as well, default empty. :returns: DeepDetect delete result """ return self.dd.delete_service(self.sname,clear=clear)
import cv2 from dd_client import DD import numpy as np import argparse host = 'localhost' port = 8080 dd = DD(host, port) dd.set_return_format(dd.RETURN_PYTHON) parser = argparse.ArgumentParser() parser.add_argument( '--model-in-path', help='directory path that contains model to export (i.e. the .pt file)', required=True) parser.add_argument('--img-size', default=256, type=int, help='square image size') parser.add_argument('--img-in', help='image to transform', required=True) parser.add_argument('--img-out', help='transformed image', required=True) parser.add_argument('--gpu', help='whether to run on GPU', action='store_true') args = parser.parse_args() # service creation call model = {'repository': args.model_in_path} parameters_input = { 'connector': 'image', 'width': args.img_size, 'height': args.img_size }
class DNNFeatureExtractor(FeatureGenerator): def __init__(self, dnnmodel, image_files, index_repo, batch_size=32, dd_host='localhost', dd_port=8080, dd_description='image classification', meta_in='', meta_out='', captions_in='', captions_out='', mapi_in='', mapi_out=''): self.dd_host = dd_host self.dd_port = dd_port self.dd_description = dd_description self.dd_mllib = 'caffe' self.meta_in = meta_in self.meta_out = meta_out self.captions_in = captions_in self.captions_out = captions_out self.mapi_in = mapi_in self.mapi_out = mapi_out self.gpuid = 0 self.dnnmodel = dnnmodel if self.dnnmodel.extract_layer: self.dd_mltype = 'unsupervised' else: self.dd_mltype = 'supervised' self.image_files = image_files self.batch_size = batch_size self.binarized = False self.dd = DD(self.dd_host, self.dd_port) self.dd.set_return_format(self.dd.RETURN_PYTHON) self.index_repo = index_repo + '/' + self.dnnmodel.name try: os.mkdir(self.index_repo) except: #logger.warning('directory ' + self.index_repo + ' may alreay exist') pass self.st = {} # shelve used for full tags storage self.stm = {} # in memory tmp storage if self.dd_mltype == 'supervised': self.st = shelve.open(self.index_repo + '/tags.bin') self.delete_dd_service() def __del__(self): if self.dd_mltype == 'supervised': for i, t in self.stm.iteritems(): self.st[i] = t self.st.close() def create_dd_service(self): model = {'repository': self.dnnmodel.model_repo} parameters_input = { 'connector': 'image', 'width': self.dnnmodel.img_width, 'height': self.dnnmodel.img_height } parameters_mllib = { 'nclasses': self.dnnmodel.nclasses, 'gpu': True, 'gpuid': self.gpuid } parameters_output = {} screate = self.dd.put_service(self.dnnmodel.name, model, self.dd_description, self.dd_mllib, parameters_input, parameters_mllib, parameters_output, self.dd_mltype) outcode = screate['status']['code'] if outcode != 201 and outcode != 403: logger.error('failed creation of DNN service ' + self.dnnmodel.name) #return raise Exception('failed creating DNN service ' + self.dnnmodel.name) return def delete_dd_service(self): self.dd.delete_service(self.dnnmodel.name, clear='') def preproc(self): # none needed with dd at the moment return def index(self): ## feature generation, to be indexed or searched for self.create_dd_service() feature_vectors = [] uris = [] parameters_input = {} parameters_mllib = { 'gpu': True, 'gpuid': self.gpuid, 'extract_layer': self.dnnmodel.extract_layer } if self.dd_mltype == 'unsupervised': parameters_output = {'binarized': self.binarized} # pass one image to get the size of the output layer classif = self.dd.post_predict(self.dnnmodel.name, [self.image_files[0]], parameters_input, parameters_mllib, parameters_output) response_code = classif['status']['code'] if response_code != 200: print 'response=', classif logger.error( 'failed (index) initial prediction call to model ' + self.dnnmodel.name + ' via dd') self.delete_dd_service() return dim = len(classif['body']['predictions']['vals']) else: parameters_output = {'best': self.dnnmodel.best} dim = self.dnnmodel.nclasses c = 0 logger.info('dnn feature prediction and indexing for service ' + self.dnnmodel.name + ' with layer of size ' + str(dim)) with Indexer(dim, self.index_repo) as indexer: for x in batch(self.image_files, self.batch_size): classif = self.dd.post_predict(self.dnnmodel.name, x, parameters_input, parameters_mllib, parameters_output) #print classif response_code = classif['status']['code'] if response_code != 200: print 'response=', classif logger.error( 'failed (index) batch prediction call to model ' + self.dnnmodel.name + ' via dd') continue predictions = classif['body']['predictions'] if self.batch_size == 1 or len(self.image_files) == 1: predictions = [predictions] for p in predictions: if self.dd_mltype == 'unsupervised': indexer.index_single(c, p['vals'], p['uri']) if c > 0 and c % self.batch_size == 0: logger.info('indexed ' + str(c) + ' images') else: puri = str(p['uri']) indexer.index_tags_single(p['classes'], p['uri']) self.stm[puri] = [] for pc in p['classes']: self.stm[puri].append(pc['cat']) c = c + 1 indexer.build_index() indexer.save_index() logger.info('indexed a total of ' + str(c) + ' images') self.delete_dd_service() def search(self, jdataout={}): self.create_dd_service() parameters_input = {} parameters_mllib = { 'gpu': True, 'gpuid': self.gpuid, 'extract_layer': self.dnnmodel.extract_layer } if self.dd_mltype == 'unsupervised': parameters_output = {'binarized': self.binarized} else: parameters_output = {'best': self.dnnmodel.best} logger.info('dnn feature prediction and searching for service ' + self.dnnmodel.name) results = {} with Searcher(self.index_repo, search_size=500) as searcher: searcher.load_index() for x in batch(self.image_files, self.batch_size): classif = self.dd.post_predict(self.dnnmodel.name, x, parameters_input, parameters_mllib, parameters_output) response_code = classif['status']['code'] if response_code != 200: print 'response=', classif logger.error( 'failed batch (search) prediction call to model ' + self.dnnmodel.name + ' via dd') self.delete_dd_service() print classif raise Exception( 'failed batch (search) prediction call to model ' + self.dnnmodel.name) predictions = classif['body']['predictions'] if self.batch_size == 1 or len(self.image_files) == 1: predictions = [predictions] #print 'predictions=',predictions for p in predictions: if self.dd_mltype == 'unsupervised': nns = searcher.search_single(p['vals'], p['uri']) else: puri = str(p['uri']) nns = searcher.search_tags_single(p['classes'], puri) nns['tags_out_all'] = [] for nn in nns['nns_uris']: nns['tags_out_all'].append(self.st[str(nn)]) results[p['uri']] = nns self.delete_dd_service() return self.to_json(results, '/img/reuters/', '/img/tate/', self.dnnmodel.name, self.dnnmodel.description, jdataout, self.meta_in, self.meta_out, self.captions_in, self.captions_out, self.mapi_in, self.mapi_out)
parser.add_argument("--port", help="DeepDetect port", type=int, default=8080) parser.add_argument("--confidence-threshold", help="keep detections with confidence above threshold", type=float, default=0.1) parser.add_argument("--save-path", help="Where to save resulting image") args = parser.parse_args() host = 'localhost' sname = 'imgserv' description = 'image classification' mllib = 'caffe' mltype = 'supervised' nclasses = 21 width = height = 300 dd = DD(host, port=args.port) dd.set_return_format(dd.RETURN_PYTHON) # creating ML service model_repo = os.getcwd() + '/model' model = {'repository': model_repo} parameters_input = {'connector': 'image', 'width': width, 'height': height} parameters_mllib = {'nclasses': nclasses} parameters_output = {} dd.put_service(sname, model, description, mllib, parameters_input, parameters_mllib, parameters_output, mltype) # chain call calls = [] parameters_input = {"keep_orig": True}
small = cv2.resize(imgquery, dim) return small host = 'localhost' sname = 'imgserv' description = 'image classification' mllib = 'caffe' mltype = 'unsupervised' extract_layer = 'loss3/classifier' #extract_layer = 'pool5/7x7_s1' nclasses = 1000 layer_size = 1000 # default output code size width = height = 224 binarized = False dd = DD(host) dd.set_return_format(dd.RETURN_PYTHON) ntrees = 100 metric = 'angular' # or 'euclidean' # creating ML service model_repo = os.getcwd() + '/model' model = {'repository': model_repo, 'templates': '../templates/caffe/'} parameters_input = {'connector': 'image', 'width': width, 'height': height} # Only indexing needs the template. if args.index: parameters_mllib = {'nclasses': nclasses, 'template': 'googlenet'} else: parameters_mllib = {'nclasses': nclasses}
shutil.copy2('includes/dede_deploy.prototxt', 'dedemodel/deploy.prototxt') if not os.path.exists('dedemodel/corresp.txt'): shutil.copy2('includes/corresp.txt', 'dedemodel/corresp.txt') # remove old models for root, dirs, files in os.walk('dedemodel'): for name in files: if name.lower().endswith('.caffemodel'): os.remove(os.path.join(root, name)) # copy new model recentmodel = most_recent_iteration(args.builddir) print('Using model ' + recentmodel) shutil.copy2(os.path.join('builds', args.builddir, 'snapshots', recentmodel), 'dedemodel/model.caffemodel') # setup DeepDetect service if necessary dd = DD('localhost') dd.set_return_format(dd.RETURN_PYTHON) model = {'repository': '/dockershare/ssd/dedemodel'} parameters_input = {'connector': 'image', 'width': 512, 'height': 512} parameters_mllib = {'nclasses': 7} parameters_output = {} detect = dd.delete_service('ssd') detect = dd.put_service('ssd', model, 'single-shot detector', 'caffe', parameters_input, parameters_mllib, parameters_output, 'supervised') # recursively process input directory for root, dirs, files in os.walk(folder_input): for name in sorted(files): name, ext = os.path.splitext(name) if (ext.lower().endswith(('.mp4', '.avi', '.mov'))
from dd_client import DD import matplotlib import numpy as np import time import matplotlib.pyplot as plt import pylab model_repo = "/tmp" host = 'localhost' port = 8080 sname = 'test' description = 'clustering' mllib = 'tsne' dd = DD(host) dd.set_return_format(dd.RETURN_PYTHON) training_repo = 'http://deepdetect.com/dd/datasets/mnist_csv/mnist_test.csv' # service creation model = {'repository':model_repo} parameters_input = {'connector':'csv'} parameters_mllib = {} parameters_output = {} dd.put_service(sname,model,description,mllib, parameters_input,parameters_mllib,parameters_output,'unsupervised') # training train_data = [training_repo] parameters_input = {'id':'','separator':',','label':'label'} parameters_mllib = {'iterations':500}
import sys import json from dd_client import DD with open('config.json', 'r') as f: config = json.load(f) service = sys.argv[1] # setting up DD client host = 'localhost' sname = config['REPO'][service]['NAME'] dd = DD(host) dd.set_return_format(dd.RETURN_PYTHON) dd.delete_service(sname, 'full')
help= "How many top predictions should be considered to chose the next token.") parser.add_argument( "--temperature", type=float, default=1, help="Temperature of the predictions. The higher, the 'randomer'.") args = parser.parse_args() # dd global variables sname = 'gpt-2' description = 'Inference with GPT-2' mllib = 'torch' dd = DD(args.host, args.port) dd.set_return_format(dd.RETURN_PYTHON) # setting up the ML service model = {'repository': args.repository} parameters_input = { 'connector': 'txt', 'ordered_words': True, 'wordpiece_tokens': True, 'punctuation_tokens': True, 'lower_case': False, 'width': args.input_size } parameters_mllib = {'template': 'gpt2', 'gpu': True} parameters_output = {} dd.put_service(sname, model, description, mllib, parameters_input,
def main(): parser = argparse.ArgumentParser(description="DeepDetect benchmark tool") parser.add_argument("--host", help="server host", default="localhost") parser.add_argument("--port", help="server port", type=int, default=8080) parser.add_argument("--sname", help="service name") parser.add_argument("--img-width", help="image width", type=int, default=224) parser.add_argument("--img-height", help="image height", type=int, default=224) parser.add_argument("--bw", help="whether images are bw", action="store_true") parser.add_argument( "--histogram-equalization", "--eqhist", help="whether we apply an histogram equalization to images", action="store_true", ) parser.add_argument("--gpu", help="whether to bench GPU", action="store_true") parser.add_argument("--gpuid", help="gpu id to use", type=int, default=0) parser.add_argument("--cpu", help="whether to bench CPU", action="store_true") parser.add_argument( "--remote-bench-data-dir", help="when bench data directory, when available remotely on the server", ) parser.add_argument("--max-batch-size", help="max batch size to be tested", type=int, default=256) parser.add_argument( "--max-workspace-size", help="max workspace size for tensort bench", type=int, default=1024, ) parser.add_argument( "--list-bench-files", help="file holding the list of bench files", default="list_bench_files.txt", ) parser.add_argument("--npasses", help="number of passes for every batch size", type=int, default=5) parser.add_argument("--detection", help="whether benching a detection model", action="store_true") parser.add_argument( "--segmentation", help="whether benching a segmentation model", action="store_true", ) parser.add_argument( "--regression", help="whether benching a regression model", action="store_true", ) parser.add_argument( "--search", help="whether benching a similarity search service", action="store_true", ) parser.add_argument( "--search-multibox", help="whether benching a multibox similarity search service", action="store_true", ) parser.add_argument("--create", help="model's folder name to create a service") parser.add_argument( "--nclasses", help="number of classes for service creation", type=int, default=1000, ) parser.add_argument( "--auto-kill", help="auto kill the service after benchmarking", action="store_true", ) parser.add_argument("--csv-output", help="CSV file output") parser.add_argument("--json-output", help="JSON file output") parser.add_argument("--mllib", help="mllib to bench, ie [tensorrt|ncnn|caffe]", default="caffe") parser.add_argument("--datatype", help="datatype for tensorrt [fp16|fp32]", default="fp32") parser.add_argument( "--recreate", help= "recreate service between every batchsize, useful for batch_size dependent precompiling backends (ie tensorRT)", action="store_true", default=False, ) parser.add_argument("--dla", help="use dla", action="store_true", default=False) parser.add_argument("--gpu-resize", help="image resizing on gpu", action="store_true", default=False) parser.add_argument( "--image-interp", help="image interpolation method (nearest, linear, cubic, ...)", ) args = parser.parse_args() host = args.host port = args.port dd = DD(host, port) dd.set_return_format(dd.RETURN_PYTHON) autokill = args.auto_kill def service_create(bs): # Create a service if args.create: description = "image classification service" mllib = args.mllib model = {"repository": args.create} parameters_input = { "connector": "image", "width": args.img_width, "height": args.img_height, "bw": args.bw, "histogram_equalization": args.histogram_equalization, } if args.segmentation: parameters_input["segmentation"] = True if args.regression: parameters_input["regression"] = True if args.dla: parameters_mllib = { "nclasses": args.nclasses, "datatype": args.datatype, "readEngine": True, "writeEngine": True, "maxBatchSize": bs, "dla": 0, "maxWorkspaceSize": args.max_workspace_size, } else: parameters_mllib = { "nclasses": args.nclasses, "datatype": args.datatype, "readEngine": True, "writeEngine": True, "maxBatchSize": bs, "maxWorkspaceSize": args.max_workspace_size, } parameters_output = {} dd.put_service( args.sname, model, description, mllib, parameters_input, parameters_mllib, parameters_output, ) else: pass out_json = [] out_csv = None csv_writer = None if args.csv_output: out_csv = open(args.csv_output, "w+") csv_writer = csv.writer(out_csv) csv_writer.writerow( ["batch_size", "mean processing time", "mean time per img"]) list_bench_files = [] with open(args.list_bench_files) as f: for line in f: list_bench_files.append(args.remote_bench_data_dir + "/" + line.rstrip()) batch_sizes = [] batch_size = 1 while batch_size <= args.max_batch_size: batch_sizes.append(batch_size) if batch_size < 32: batch_size = batch_size * 2 else: batch_size += 16 parameters_input = {} if not args.image_interp == "": parameters_input["interp"] = args.image_interp if args.gpu_resize: parameters_input["cuda"] = args.gpu_resize parameters_mllib = {"gpu": args.gpu, "gpuid": args.gpuid} parameters_output = {} if args.detection: parameters_output["confidence_threshold"] = 0.1 if args.search or args.search_multibox: parameters_output["search"] = True parameters_output["rois"] = "rois" parameters_output["bbox"] = False else: parameters_output["bbox"] = True if args.search_multibox: parameters_output["multibox_rois"] = True elif args.segmentation: parameters_input["segmentation"] = True elif args.regression: parameters_output["regression"] = True elif args.search: parameters_output["search"] = True # First call to load model data = list_bench_files[:1] if not args.recreate: if not args.mllib == "tensorrt" or args.recreate: service_create(1) else: service_create(args.max_batch_size) classif = dd.post_predict(args.sname, data, parameters_input, parameters_mllib, parameters_output) for b in batch_sizes: data = list_bench_files[:b] fail = False if args.recreate: service_create(b) for i in range(5): classif = dd.post_predict( args.sname, data, parameters_input, parameters_mllib, parameters_output, ) mean_ptime = 0 mean_ptime_per_img = 0 for i in range(0, args.npasses + 1): print("testing batch size = %s" % len(data)) classif = dd.post_predict(args.sname, data, parameters_input, parameters_mllib, parameters_output) if classif["status"]["code"] == 200: if i == 0: continue # skipping first pass so that the batch resize does not affect timing ptime = classif["head"]["time"] ptime_per_img = ptime / b mean_ptime += ptime mean_ptime_per_img += ptime_per_img print( "pass %s batch size = %s / processing time = %s / time per image = %s" % (i, b, ptime, ptime_per_img)) else: print(classif["status"]) # reload model data = list_bench_files[:1] classif = dd.post_predict( args.sname, data, parameters_input, parameters_mllib, parameters_output, ) fail = True break mean_processing_time = mean_ptime / args.npasses mean_time_per_img = mean_ptime_per_img / args.npasses print( ">>> batch size = %s / mean processing time = %s / mean time per image = %s / fps = %s / fail = %s" % ( b, mean_ptime / args.npasses, mean_ptime_per_img / args.npasses, 1000 / (mean_ptime_per_img / args.npasses), fail, ), ) out_json.append({ "batch_size": b, "mean_processing_time": mean_processing_time, "mean_time_per_img": mean_time_per_img, }) if args.csv_output: csv_writer.writerow([b, mean_processing_time, mean_time_per_img]) # break if args.recreate: dd.delete_service(args.sname) if args.json_output: with open(args.json_output, "w") as outfile: json.dump(out_json, outfile) if autokill: dd.delete_service(args.sname)
from dd_client import DD import matplotlib import numpy as np import time import matplotlib.pyplot as plt import pylab model_repo = "/tmp" host = 'localhost' port = 8080 sname = 'test' description = 'clustering' mllib = 'tsne' dd = DD(host) dd.set_return_format(dd.RETURN_PYTHON) training_repo = 'http://deepdetect.com/dd/datasets/mnist_csv/mnist_test.csv' # service creation model = {'repository': model_repo} parameters_input = {'connector': 'csv'} parameters_mllib = {} parameters_output = {} dd.put_service(sname, model, description, mllib, parameters_input, parameters_mllib, parameters_output, 'unsupervised') # training train_data = [training_repo] parameters_input = {'id': '', 'separator': ',', 'label': 'label'} parameters_mllib = {'iterations': 500}
help='whether benching a detection model', action='store_true') parser.add_argument('--create', help='model\'s folder name to create a service') parser.add_argument('--nclasses', help='number of classes for service creation', type=int, default=1000) parser.add_argument('--auto-kill', help='auto kill the service after benchmarking', action='store_true') args = parser.parse_args() host = args.host port = args.port dd = DD(host, port) dd.set_return_format(dd.RETURN_PYTHON) autokill = args.auto_kill # Create a service if args.create: description = 'image classification service' mllib = 'caffe' model = {'repository': args.create} parameters_input = { 'connector': 'image', 'width': args.img_width, 'height': args.img_height } parameters_mllib = {'nclasses': args.nclasses} parameters_output = {}
http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ from dd_client import DD HOST = 'localhost' PORT = 8080 dd = DD(HOST, PORT) dd.set_return_format(dd.RETURN_PYTHON) def delete_dd_service(sname): dd.delete_service(sname, clear='') # main info = dd.info() # in case there are remaining services, remove them for s in info['head']['services']: sname = s['name'] delete_dd_service(sname)