Exemplo n.º 1
0
class CollaborationFeature(object):
    """docstring for CollaborationFeature"""
    def __init__(self):
        self.mRedis = RedisHelper()

    def isConfLeadCollab(self, author, conf):
        authorConfTime = self.mRedis.getAuConfTimes(author, conf)
        confAuthors = self.mRedis.getConfAuthors(conf)
        coAuthors = self.mRedis.getAuCoauthors(author)
        sameConfCoaus = set(confAuthors) & set(coAuthors)
        if len(sameConfCoaus) != 0:
            for coau in sameConfCoaus:
                coauConfTime = self.mRedis.getAuConfTimes(coau, conf)
                if authorConfTime == coauConfTime:
                    coauTime = self.mRedis.getAuCoauTimes(author, coau)
                    if min(coauTime) > min(authorConfTime):
                        coauTime = []
                        coauConfTime = []
                        authorConfTime = []
                        confAuthors = []
                        coAuthors = []
                        sameConfCoaus = []
                        return True
        coauTime = []
        coauConfTime = []
        authorConfTime = []
        confAuthors = []
        coAuthors = []
        sameConfCoaus = []
        return False

    def isCoauLeadByConf(self, author, coau):
        auCoauTimes = self.mRedis.getAuCoauTimes(author, coau)
        authorConfs = self.mRedis.getAuConfs(author)
        coauConfs = self.mRedis.getAuConfs(coau)
        sameConfs = set(authorConfs) & set(coauConfs)
        if len(sameConfs) > 1:
            confTimes = list()
            for conf in sameConfs:
                confTimes.extend(self.mRedis.getAuConfTimes(author, conf))
            if min(auCoauTimes) > min(confTimes):
                auCoauTimes = []
                authorConfs = []
                coauConfs = []
                sameConfs = []
                confTimes = []
                return True
        auCoauTimes = []
        authorConfs = []
        coauConfs = []
        sameConfs = []
        confTimes = []
        return False

    def getConfLeadCollabProb(self):
        confCountCLCPDictList = dict()
        authors = self.mRedis.getAllAuthors()
        authorDict = dict()
        index = 0
        while index < 200000:
            author = random.choice(authors)
            if authorDict.has_key(author):
                continue
            authorDict[author] = True
            authorConfs = self.mRedis.getAuConfs(author)
            ConfCnt = len(authorConfs)
            if ConfCnt < 2:
                continue
            index += 1
            if index % 1000 == 0:
                logging.info(index)
            CLCPNum = 0
            for conf in authorConfs:
                if self.isConfLeadCollab(author, conf):
                    CLCPNum += 1
            tmp = confCountCLCPDictList.setdefault(ConfCnt, [])
            tmp.append(CLCPNum * 1.0 / ConfCnt)
            authorConfs = []
        authors = []
        with open(OUTPUT_COLLAB_CONF_LEAD_COLLAB_PROB, 'w') as fileWriter:
            for k, v in confCountCLCPDictList.items():
                if len(v) == 0:
                    avg = 0
                else:
                    avg = sum(v) * 1.0 / len(v)
                fileWriter.write(str(k) + '\t' + str(avg) + '\n')
        fileWriter.close()
        confCountCLCPDictList = {}

    def getCoauLeadByConf(self):
        ConfCountCoauDictList = dict()
        authors = self.mRedis.getAllAuthors()
        authorDict = dict()
        index = 0
        while index < 200000:
            author = random.choice(authors)
            if authorDict.has_key(author):
                continue
            authorDict[author] = True
            auCoauthors = self.mRedis.getAuCoauthors(author)
            authorConfs = self.mRedis.getAuConfs(author)
            confCnt = len(authorConfs)
            if confCnt < 2:
                continue
            index += 1
            if index % 1000 == 0:
                logging.info(index)
            CLCsNum = 0
            for coau in auCoauthors:
                if self.isCoauLeadByConf(author, coau):
                    CLCsNum += 1
            tmp = ConfCountCoauDictList.setdefault(confCnt, [])
            tmp.append(CLCsNum)
            authorConfs = []
            auCoauthors = []
        authors = []
        with open(OUTPUT_COLLAB_COAU_NUM_LEAD_BY_CONF, 'w') as fileWriter:
            for k, v in ConfCountCoauDictList.items():
                if len(v) == 0:
                    avg = 0
                else:
                    avg = sum(v) * 1.0 / len(v)
                fileWriter.write(str(k) + '\t' + str(avg) + '\n')
        fileWriter.close()
        ConfCountCoauDictList = {}

    def getConfLeadPotentialCoaus(self):
        ConfCountPotentialCoausDict = dict()
        confAuthorDict = dict()
        confs = self.mRedis.getAllConfs()
        for conf in confs:
            confAuthorDict[conf] = self.mRedis.getConfAuthors(conf)
        authors = self.mRedis.getAllAuthors()
        authorDict = dict()
        index = 0
        for author in authors:
            index += 1
            if index % 1000 == 0:
                logging.info(index)
            confs = self.mRedis.getAuConfs(author)
            confCnt = len(confs)
            potentialCoaus = list()
            for conf in confs:
                potentialCoaus.extend(confAuthorDict[conf])
            coAuthors = self.mRedis.getAuCoauthors(author)
            PotenCoauNum = len(set(potentialCoaus) - set(coAuthors))
            tmp = ConfCountPotentialCoausDict.setdefault(confCnt, [])
            tmp.append(PotenCoauNum)
            confs = []
            potentialCoaus = []
            coAuthors = []
        authors = []
        confAuthorDict = {}
        with open(OUTPUT_COLLAB_CONF_LEAD_POTENRIAL_COAU, 'w') as fileWriter:
            for k, v in ConfCountPotentialCoausDict.items():
                if len(v) == 0:
                    avg = 0
                else:
                    avg = sum(v) * 1.0 / len(v)
                fileWriter.write(str(k) + '\t' + str(avg) + '\n')
        fileWriter.close()
        ConfCountPotentialCoausDict = {}