def querydomain(keywords): f = open('keysimulated.tab', 'a') f.write('\t'.join(keywords) + '\t\n\n') f.close() doc = mylog.datalog('logs.txt') #queryfile() #appendtoqueryfile(keywords) data = Orange.data.Table('keysimulated.tab') classifier = Orange.classification.bayes.NaiveLearner(data) #data2 = Orange.data.Table('queried.tab') c = classifier(data[-1]) val = c.value appendtoqueryfile(keywords, val) doc.enterdomainfound(keywords, val) #logging.info(str(keywords)+" "+val) doc.close() res = [] for each in collection.find({ "keywords": { "$in": keywords }, "domain": val }, { "url": 1, "_id": 0 }): res.append(str(each["url"])) return val, res
def querydomain(keywords): f = open("keysimulated.tab", "a") f.write("\t".join(keywords) + "\t\n\n") f.close() doc = mylog.datalog("logs.txt") # queryfile() # appendtoqueryfile(keywords) data = Orange.data.Table("keysimulated.tab") classifier = Orange.classification.bayes.NaiveLearner(data) # data2 = Orange.data.Table('queried.tab') c = classifier(data[-1]) val = c.value appendtoqueryfile(keywords, val) doc.enterdomainfound(keywords, val) # logging.info(str(keywords)+" "+val) doc.close() res = [] for each in collection.find({"keywords": {"$in": keywords}, "domain": val}, {"url": 1, "_id": 0}): res.append(str(each["url"])) return val, res
"keywords": [ "artificial intelligence", "data mining", "data collectiont", "internet shopping", "electronic commerce", ], "url": "http://url14.com", }, { "domain": "social computing", "keywords": ["pattern classification", "data mining", "sentiment analysis"], "url": "http://url15.com", }, ] if __name__ == "__main__": # db.drop_collection("url_collection") # collection.insert(insert_in_database) doc = mylog.datalog("logs.txt") queryfile() queryingdomain() """x = raw_input('enter the 8 keywords:') #min 4 for proper classification words = x.split(',') #list a,b = querydomain(words) print a print b""" doc.close()
insert_in_database=[{"domain":"cloud computing","keywords":["cloud computing","cloud services","data centers","open systems"],"url":"http://url1.com"}, \ {"domain":"cloud computing","keywords":["virtualisation","cloud services","parallel computing","peer-to-peer computing"],"url":"http://url2.com"}, \ {"domain":"cloud computing","keywords":["data privacy","distributed computing",],"url":"http://url3.com"}, \ {"domain":"audio,speech and language processing","keywords":["speech","microphone","audio coding","distortion","speech enhancement"],"url":"http://url4.com"}, \ {"domain":"audio,speech and language processing","keywords":["noise reduction","dereverberation","videoconferencing"],"url":"http://url5.com"}, \ {"domain":"audio,speech and language processing","keywords":["audio coding","mixed signals","speech","acoustic correlation"],"url":"http://url6.com"}, \ {"domain":"security and privacy","keywords":["cyberattack","software vulnerabilities","identity management","cryptography"],"url":"http://url7.com"},\ {"domain":"security and privacy","keywords":["data privacy","antivirus","cryptography"],"url":"http://url8.com"},\ {"domain":"security and privacy","keywords":["security","virus","computer crime","cryptography","intrusion tolerance"],"url":"http://url9.com"},\ {"domain":"mobile computing","keywords":["mobile computing","wireless communication","long term evolution"],"url":"http://url10.com"},\ {"domain":"mobile computing","keywords":["interference","long term evolution","smart phones","global positioning system","energy consumption"],"url":"http://url11.com"},\ {"domain":"mobile computing","keywords":["Scattering","signal to noise ratio","ad hoc networks","sporadic connectivity"],"url":"http://url12.com"},\ {"domain":"social computing","keywords":["customer loyalty","social behavior","intelligent agents","social informatics","informatics","psychology"],"url":"http://url13.com"},\ {"domain":"social computing","keywords":["artificial intelligence","data mining","data collectiont","internet shopping","electronic commerce"],"url":"http://url14.com"},\ {"domain":"social computing","keywords":["pattern classification","data mining","sentiment analysis"],"url":"http://url15.com"},\ ] if __name__ == "__main__": #db.drop_collection("url_collection") #collection.insert(insert_in_database) doc = mylog.datalog('logs.txt') queryfile() queryingdomain() '''x = raw_input('enter the 8 keywords:') #min 4 for proper classification words = x.split(',') #list a,b = querydomain(words) print a print b''' doc.close()