示例#1
0
文件: crons.py 项目: karo1205/AIC
    def do(self):
        logger.info('Getting open tasks from DB')
        opentasks = Task.objects.filter(status='S')  # get all started tasks

        for t in opentasks:
            payload = json.load(urllib2.urlopen(t.task_uri + '?format=json'))
            logger.info("Checking: " + payload['resource_uri'])
            if payload['answer'] != 'NULL':  # save answer when there is one
                t.answer = payload['answer']
                t.status = 'D'  # set to to done (enable for processeing)
                try:  # see of worker alread is known
                    w = Worker.objects.get(worker_uri=payload['worker'])
                    t.worker_id = w.id  # set Task-Worker realtion
                    logger.info("worker is already known: id = " + str(w.id))
                    logger.info("worker " + str(w.id) + " did task " + str(t.id))
                except Worker.DoesNotExist:
                    # create new worker
                    newworker = Worker(worker_uri=payload['worker'])
                    newworker.save()  # save befor assigning to t becasue newworker hast'got an id yet
                    t.worker_id = newworker.id
                    logger.info("new worker created: id = " + str(newworker.id))
                    logger.info("worker " + str(newworker.id) + " did task " + str(t.id))
                t.save()
        """
        Process Task Type 1

        """

        opentasks = Task.objects.filter(status='D')  # get all started tasks
        logger.info('Getting done tasks from DB. ' + str(opentasks.count()) + ' elements found')
        for t in opentasks:
            logger.info("Processing Task " + str(t.id))
            try:
                #answer = json.loads(t.answer)
                answer = t.answer  #TODO replace all anser with t.answer
            except ValueError:
                logger.error("the answer field of task " + str(t.id) + " does not contain a valid JSON Format. Skipping.")
                continue    # if there is not valid JSON there is no pint of considerung this answer
                            # TODO: Implement quality control and pot task
                            # again

            for kw in answer['keywords'].keys():
                try:
                    keyword = Keyword.objects.get(text=kw)
                    logger.info('Keyword "' + kw + '" already in DB')
                    t.keywords.add(keyword)
                    logger.info('Keyword "' + kw + '" assigned to Task' + str(t.id))
                except Keyword.DoesNotExist:
                    newkeyword=Keyword(text=str(kw), category=answer['keywords'][kw])
                    newkeyword.save()
                    t.keywords.add(newkeyword)
                    logger.info('new keyword "' + kw + '" created and assigned to Task' + str(t.id))

            t.status='P'  # set status to processed
            t.save()
示例#2
0
def words_split():
    global keywords

    # mysql 中的ids
    oldids = Job.objects.values_list('jobId')
    oldidset = set()
    for comp in oldids:
        oldidset.add(comp[0])
    # hbase 中的 ids
    try:
        newidset = hbase_tool.getalljobid()
    except BrokenPipeError as e:
        print(e.strerror)
        return

    # TODO 修改
    # newset = newidset
    newset = newidset - oldidset

    print("start split words")
    # 缓存keyword对象
    allkw = Keyword.objects.all()
    for kw in allkw:
        keywords[kw.keyword] = kw
    for id in newset:
        keyword = hbase_tool.getkeyword_byjobid(id)
        s = hbase_tool.getjobinfo_byjobid(id)
        s = str(s).strip()

        # 判断缓存中是否存在
        if (keywords.get(keyword) is None):
            print("new keyword : ", keyword)
            newkeyword = Keyword()
            newkeyword.keyword = keyword
            newkeyword.save()
            kw = Keyword.objects.get(keyword__contains=keyword)
            keywords[keyword] = kw
        executor.submit(thread_deal, s, keyword)
示例#3
0
文件: utils.py 项目: karo1205/AIC
def process_task_answers():

    """
    This funcion  goes trough all unprocessed tasks and add news
    keywords,sentiments, workes and relations

    """

    logger.info("start processing... ")


    opentasks = Task.objects.filter(status='D')  # get all started tasks
    logger.info('Getting done tasks from DB. ' + str(opentasks.count()) + ' elements found')
    for t in opentasks:
        logger.info("Processing Task " + str(t.id))
        try:
            answer = json.loads(t.answer)
            #answer = t.answer
        except ValueError:
            logger.error("the answer field of task " + str(t.id) + " does not contain a valid JSON Format. Skipping.")
            continue    # if there is not valid JSON there is no pint of considerung this answer
                        # TODO: Implement quality control and pot task
                        # again

        # Process Task Type 1
        if t.question == "Question1":
            logger.info("processing question1... ")
            for kw in answer['keywords'].keys():
                try:
                    keyword = Keyword.objects.get(text = kw, category = answer['keywords'][kw])
                    logger.info('Keyword "' + kw + '" already in DB')
                    t.keywords.add(keyword)  # add Task --> Keyword Relationship
                    keyword.feed.add(t.feed)  # add Keyword --> Feed Relationship
                    keyword.save()
                    logger.info('Keyword "' + kw + '" assigned to Task' + str(t.id))
                    keyword_inverse = Keyword.objects.filter(text=kw).exclude(category=answer['keywords'][kw])
                    if len(keyword_inverse) == 0:
                        logger.info("no inverse keyword found. skipping")
                        continue
                    elif keyword.task_set.count() / (keyword_inverse[0].task_set.count() + keyword.task_set.count()) <= 0.34:
                        t.worker.score -= 1
                        t.worker.save()
                        logger.info('Keyword "' + kw + '" has wrong catgory set.' + str(t.worker.id)+ ' was degraded')
                    else:
                        logger.info("no penalty with" + str(keyword.task_set.count() / (keyword_inverse[0].task_set.count() + keyword.task_set.coun())))
                except Keyword.DoesNotExist:
                    if t.feed.content.find(kw) == -1:  #if keyword is not found in text of the feed
                        t.worker.score -= 1
                        t.worker.save()
                        logger.info('Keyword "' + kw + '" was not found in feed. worker ' + str(t.worker.id)+ ' was degraded')
                    else:   # TODO: Debug here
                        newkeyword=Keyword(text=str(kw), category=answer['keywords'][kw])
                        newkeyword.save()
                        t.keywords.add(newkeyword)
                        newkeyword.feed.add(t.feed)  # add Keyword --> Feed Relationship
                        newkeyword.save()
                        logger.info('new keyword "' + kw + '" created and assigned to Task' + str(t.id))
            #uncomment the following line if no autmatic post of task 2 is required
            #post_task2_to_crowd(t.feed)

        # Process Task Type 2
        elif t.question == "Question2":
            logger.info("processing question1... ")
            for sen in answer['keywords'].keys():
                logger.info("processing Sentiment " + sen + "(" + answer['keywords'][sen] + ")")
                keywords = Keyword.objects.filter(text=sen)
                if len(keywords) == 0:
                    logger.error("Received sentiment for non existing Keyword")
                    continue
                else:
                    # Quality Contol
                    new_score = answer['keywords'][sen]
                    scores=[]
                    for k in keywords:
                        scores.extend(k.get_sentiment_scores())
                        print scores
                    if len(scores) <= 3:
                        logger.info("to few sentiments: Keyword " + sen + "has only " + str(len(scores)) + " sentiments")
                    elif abs(median(scores) - int(new_score)) >= 3:
                        logger.info("bad sentiment")
                        t.worker.score -= 1
                        logger.info('Worker ' + str(t.worker.id)+ ' was degraded')
                    #save sentiment
                    new_sentiment = Sentiment(score=new_score)
                    new_sentiment.worker = t.worker  # all the are Forein Keys of Sentiment
                    new_sentiment.feed = t.feed
                    new_sentiment.keyword = keywords[0] # choose better keyword instead of always thealways the  first
                    new_sentiment.save()
                    logger.info('new sentiment "' + sen +
                                '" created with score "' + str(new_sentiment.score) +
                                '" and relationships set: worker=' + t.worker.worker_uri +
                                ' feed=' + str(t.feed.id) +
                                ' keyword=' + str(keywords[0].text))
        else:
            logger.error("Keywords/Sentiments could not be processed.Something is wrong with task")

        t.status = 'P'  # set status to processed
        t.save()