def give_cluster(url): try: #print('URL name is :',url) # for key in final_dict.keys(): # for value in final_dict[key]: # if(url==value): # return key content = preprocess(get_text_content(url)) #print("line 1") new_url_vector=sent_vectorizer(content,modelg) #print("line 2") #print(kmeans.predict([new_url_vector])[0]) return kmeans.predict([new_url_vector])[0] except Exception as e: return e
def giveUrlInfo(url): try: urlInfo = checkUrlInDb(url) if(len(urlInfo['new_url_vector'])!=0): print("url in databse") cluster_no = urlInfo['cluster_no'] new_url_vector = urlInfo['new_url_vector'] else: print("url not in database") content = preprocess(get_text_content(url)) new_url_vector=sent_vectorizer(content,modelg) cluster_no=kmeans.predict([new_url_vector])[0] print("cluster",cluster_no) # print("cluster_no") # print(new_url_vector) return dict({"cluster_no":cluster_no,"urlvector":new_url_vector}) except Exception as e: print('giveUrlInfo error',e) return e
def getVectorOfUrl(url): try: content = preprocess(get_text_content(url)) return sent_vectorizer(content,modelg) except Exception as e: return e