示例#1
0
def querydomain(keywords):
    f = open('keysimulated.tab', 'a')
    f.write('\t'.join(keywords) + '\t\n\n')
    f.close()
    doc = mylog.datalog('logs.txt')
    #queryfile()
    #appendtoqueryfile(keywords)
    data = Orange.data.Table('keysimulated.tab')
    classifier = Orange.classification.bayes.NaiveLearner(data)
    #data2 = Orange.data.Table('queried.tab')
    c = classifier(data[-1])
    val = c.value
    appendtoqueryfile(keywords, val)
    doc.enterdomainfound(keywords, val)
    #logging.info(str(keywords)+" "+val)
    doc.close()
    res = []
    for each in collection.find({
            "keywords": {
                "$in": keywords
            },
            "domain": val
    }, {
            "url": 1,
            "_id": 0
    }):
        res.append(str(each["url"]))
    return val, res
示例#2
0
def querydomain(keywords):
    f = open("keysimulated.tab", "a")
    f.write("\t".join(keywords) + "\t\n\n")
    f.close()
    doc = mylog.datalog("logs.txt")
    # queryfile()
    # appendtoqueryfile(keywords)
    data = Orange.data.Table("keysimulated.tab")
    classifier = Orange.classification.bayes.NaiveLearner(data)
    # data2 = Orange.data.Table('queried.tab')
    c = classifier(data[-1])
    val = c.value
    appendtoqueryfile(keywords, val)
    doc.enterdomainfound(keywords, val)
    # logging.info(str(keywords)+" "+val)
    doc.close()
    res = []
    for each in collection.find({"keywords": {"$in": keywords}, "domain": val}, {"url": 1, "_id": 0}):
        res.append(str(each["url"]))
    return val, res
示例#3
0
        "keywords": [
            "artificial intelligence",
            "data mining",
            "data collectiont",
            "internet shopping",
            "electronic commerce",
        ],
        "url": "http://url14.com",
    },
    {
        "domain": "social computing",
        "keywords": ["pattern classification", "data mining", "sentiment analysis"],
        "url": "http://url15.com",
    },
]


if __name__ == "__main__":
    # db.drop_collection("url_collection")
    # collection.insert(insert_in_database)

    doc = mylog.datalog("logs.txt")
    queryfile()
    queryingdomain()
    """x = raw_input('enter the 8 keywords:') #min 4 for proper classification
    words = x.split(',') #list
    a,b = querydomain(words)
    print a
    print b"""
    doc.close()
示例#4
0
insert_in_database=[{"domain":"cloud computing","keywords":["cloud computing","cloud services","data centers","open systems"],"url":"http://url1.com"}, \
                    {"domain":"cloud computing","keywords":["virtualisation","cloud services","parallel computing","peer-to-peer computing"],"url":"http://url2.com"}, \
                    {"domain":"cloud computing","keywords":["data privacy","distributed computing",],"url":"http://url3.com"}, \
                    {"domain":"audio,speech and language processing","keywords":["speech","microphone","audio coding","distortion","speech enhancement"],"url":"http://url4.com"}, \
                    {"domain":"audio,speech and language processing","keywords":["noise reduction","dereverberation","videoconferencing"],"url":"http://url5.com"}, \
                    {"domain":"audio,speech and language processing","keywords":["audio coding","mixed signals","speech","acoustic correlation"],"url":"http://url6.com"}, \
                    {"domain":"security and privacy","keywords":["cyberattack","software vulnerabilities","identity management","cryptography"],"url":"http://url7.com"},\
                    {"domain":"security and privacy","keywords":["data privacy","antivirus","cryptography"],"url":"http://url8.com"},\
                    {"domain":"security and privacy","keywords":["security","virus","computer crime","cryptography","intrusion tolerance"],"url":"http://url9.com"},\
                    {"domain":"mobile computing","keywords":["mobile computing","wireless communication","long term evolution"],"url":"http://url10.com"},\
                    {"domain":"mobile computing","keywords":["interference","long term evolution","smart phones","global positioning system","energy consumption"],"url":"http://url11.com"},\
                    {"domain":"mobile computing","keywords":["Scattering","signal to noise ratio","ad hoc networks","sporadic connectivity"],"url":"http://url12.com"},\
                    {"domain":"social computing","keywords":["customer loyalty","social behavior","intelligent agents","social informatics","informatics","psychology"],"url":"http://url13.com"},\
                    {"domain":"social computing","keywords":["artificial intelligence","data mining","data collectiont","internet shopping","electronic commerce"],"url":"http://url14.com"},\
                    {"domain":"social computing","keywords":["pattern classification","data mining","sentiment analysis"],"url":"http://url15.com"},\
                    ]

if __name__ == "__main__":
    #db.drop_collection("url_collection")
    #collection.insert(insert_in_database)

    doc = mylog.datalog('logs.txt')
    queryfile()
    queryingdomain()
    '''x = raw_input('enter the 8 keywords:') #min 4 for proper classification
    words = x.split(',') #list
    a,b = querydomain(words)
    print a
    print b'''
    doc.close()