Exemplo n.º 1
0
def GettraintestdataV2(employee):
    user = employee
    singleFile = "./data/allusers/SingleSequence/" + user + ".csv"
    list = CSVFile.loadCSVfile1(singleFile)
    maxtime = list[-1][0]
    mintime = list[0][0]
    maxtime = datetime.datetime.strptime(maxtime, '%Y %j %H:%M:%S').date()
    mintime = datetime.datetime.strptime(mintime, '%Y %j %H:%M:%S').date()
    middletime = maxtime - relativedelta(days=60)

    if mintime > (middletime - relativedelta(days=90)):
        return 0

    TrainUserSequence = []
    TestUserSequence = []
    for i in (range(len(list))):
        time = datetime.datetime.strptime(list[i][0], '%Y %j %H:%M:%S').date()
        if time < middletime:
            TrainUserSequence.append([list[i][1]])
            #print "time<middletime", time < middletime, "TrainUserSequence"

    currenttime = middletime
    while (currenttime < maxtime):
        finishtime = currenttime + relativedelta(days=1)
        daysequenc = GetperiodSequence(user, currenttime, finishtime)
        if len(daysequenc) > 1:
            TestUserSequence.append(daysequenc)
        currenttime = finishtime

    trainFile = "./data/allusers/Train/" + user + ".csv"
    CSVFile.Writecsvtofile(trainFile, TrainUserSequence)
    print TestUserSequence
    print "TestUserSequence", len(TestUserSequence)
    return TestUserSequence
Exemplo n.º 2
0
def GetUserpro():
    employees = CSVFile.loadCSVfile1("./data/allusers/validusers.csv")
    userpro=[]
    for item in employees:
        proresult =[]
        user = item[0]
        state = item[1]
        testsequence = UserSequences.GettraintestdataV2(user)
        if testsequence==0:
            continue
        proresult.append(user)
        proresult.append(state)
        preProcess.GetTransiMatrixV2(user)
        H = HMM(user)
        for sequence in testsequence:
            result = H.hmmV2(sequence)
            proresult.append(result)

        userpro.append(proresult)

    # 得到用户60天的基于改进mm的方法的每天的概率,相乘
    #filename= "./data/allusers/userpro.csv"
    #得到用户60天的基于mm的方法的每天的概率,相加
    filename = "./data/allusers/MM_userpro.csv"
    CSVFile.Writecsvtofile(filename,userpro)
    print userpro
Exemplo n.º 3
0
    def hmm(self, user):
        """
        :param usersequences: 用户的行为序列
        :return: 对应用户行为序列的转移概率序列
        """

        state_seq = []

        #usersequences = usersequences
        usertestfile = "./data/allusers/Test/" + user + ".csv"
        usersequences = CSVFile.loadCSVfile1(usertestfile)
        state = ''
        pre_state = usersequences[0][1] + '\n'
        prob = 0
        for i in range(len(usersequences)):
            state = usersequences[i][1]
            state = state + '\n'

            try:
                if self.state_map.has_key(state) == False:
                    state = 'other\n'
                prob = self.trans_pro_matrix[self.state_map[state]][
                    self.state_map[pre_state]]

            except KeyError:
                pass
            state_seq.append(prob)
            pre_state = state

        result_state = state_seq
        return result_state
Exemplo n.º 4
0
def GetiForest_MM_scores():
    iforestdata = CSVFile.loadCSVfile1(
        './data/allusers/iForestdata/outallfeatures.csv')
    mmdata = CSVFile.loadCSVfile1('./data/allusers/weekdayavg.csv')
    userinfo = []
    users = []
    states = []
    iscores = []
    mscores = []

    for fitem in iforestdata:
        fuser = fitem[0]
        for mitem in mmdata:
            muser = mitem[0]
            if fuser != muser:
                continue
            tem = float(fitem[3]) + 0.3
            users.append(muser)
            states.append(mitem[1])
            mscores.append(mitem[2])
            iscores.append(tem)
            #score = -math.log(tem)
            #print score
            userinfo.append([muser, mitem[1], mitem[2], tem])
    #CSVFile.Writecsvtofile('./data/allusers/iForestdata/iforest_mmweekdayavg_data.csv',userinfo)
    print userinfo
    mscores = Normalization(mscores)
    iscores = Normalization(iscores)
    data = {}
    print users
    data = pd.DataFrame()
    data['users'] = users
    data['states'] = states
    data['mscores'] = mscores
    data['iscores'] = iscores
    print data

    print data.values
    data.to_csv(
        './data/allusers/iForestdata/iforest_mmweekdayavg_normalizedata.csv',
        index=False)
Exemplo n.º 5
0
def GetperiodSequence(employee, starttime, finishtime):
    user = employee
    singleFile = "./data/allusers/SingleSequence/" + user + ".csv"
    sequences = []
    list = CSVFile.loadCSVfile1(singleFile)
    for i in range(len(list)):
        currenttime = datetime.datetime.strptime(list[i][0],
                                                 '%Y %j %H:%M:%S').date()
        #print currenttime
        if starttime <= currenttime and currenttime < finishtime:
            #print "currenttime:",currenttime
            sequences.append(list[i][1])
    return sequences
Exemplo n.º 6
0
def GetResult():
    k=4
    UserSequences.Gettraintestdata(k)
    #employees = Employees.queryEmployees()
    employees = CSVFile.loadCSVfile1("./data/allusers/validusers.csv")
    #employees = CSVFile.loadCSVfile1("./data/allusers/Allusers_state.csv")
    resultlist = []
    avgresult = []

    for item in employees:
        user = item[0]
        state = item[1]
        preProcess.GetTransiMatrix(user)

        H = HMM(user)
        result = H.hmm(user)
        print user, result

        #resultlist.append([user,result])
        avgresultpro = average(result)
        avgresult.append([user,avgresultpro,state])

        print "average:",average(result), state
        result.insert(0, user)
        resultlist.append(result)
#=======
    t1 = time.time()
    start_date = '2009-12-01'
    employees = Employees.queryLeaveEmployees()
    #f = open('./data/Result.txt', 'w')
    f = open('./data/ProSquenceResult.txt', 'w')
    #for user in employees:
#>>>>>>> 34c6dfe6197febd37f54b0a34e607135f3d0d8e2:GetHMMresult.py
        #print user
    resultfile = './data/allusers/'+k+'ResultPro974.csv'
    avgresultfile = './data/allusers/'+k+'AvgResultPro974.csv'
    CSVFile.Writecsvtofile(resultfile,resultlist)
    CSVFile.Writecsvtofile(avgresultfile,avgresult)
Exemplo n.º 7
0
def GetMM_weekpro():
    userpro = CSVFile.loadCSVfile1("./data/allusers/MM_userpro.csv")
    userinfo = []
    for item in userpro:
        user = item[0]
        state = item[1]
        # print state
        sumpro = []
        for pro in item[2:9]:
            if len(pro) > 0 and float(pro) > 0:
                # print pro
                #pro = -math.log(float(pro))

                sumpro.append(float(pro))
        #avgpro = sum(sumpro) / len(sumpro)
        sumpro = sum(sumpro)
        # avgpro = GetVar(sumpro)
        #avgpro = GetCov(sumpro)
        userinfo.append([user,state,sumpro])
    CSVFile.Writecsvtofile("./data/allusers/MM_week_result.csv",userinfo)


    print userinfo
Exemplo n.º 8
0
    def parse_file(self):

        if self.pathfile and os.path.isfile(self.pathfile):

            # read the CSV file containing higher taxonomy
            f = CSVFile.CSVFile(self.pathfile)
            lines = f.get_table()

            # store the already retrived subfamilies and tribes (these are
            # lists of AssociatedTaxa)
            subfamilies = []
            tribes = []

            def add_associate(in_higher_tax, rank_name, ranks, genus):

                # check if the higher taxonomy (subfamily or tribe) already
                # exists and if so add the corresponding genus
                # if it doesnt exist add a new higher taxonomy record and
                # add the genus to it
                for rank in ranks:
                    if rank.main_taxa == in_higher_tax:
                        rank.add_associate(genus)
                        break
                else:
                    new_rank = TaxaList.AssociatedTaxa(in_higher_tax,
                                                       rank_name)
                    new_rank.add_associate(genus)
                    ranks.append(new_rank)

            # skip the file header
            for subfamily, tribe, genus in lines[1:]:

                add_associate(subfamily, "subfamily", subfamilies, genus)
                add_associate(tribe, "tribe", tribes, genus)

            logger.log_short_report(
                "--- Additional taxonomical information ---")
            logger.log_short_report("Retrived subfamilies: " +
                                    str(len(subfamilies)))
            logger.log_short_report("Retrived tribes: " + str(len(tribes)))
            logger.log_short_report("From file:" + self.pathfile)

            return subfamilies, tribes
        else:
            return None
Exemplo n.º 9
0
def generate_authority_file(taxa_list, fileinfo):
    ''' generates the csv file of the authority file'''

    csv_filename = fileinfo.csv_filename("authority_file")

    # Create a csv file
    f = CSVFile.CSVFile(csv_filename)

    # add the header
    header = [
        "", "Family", "Subfamily", "Tribe", "Genus", "SpecificEpithet",
        "SubspecificEpithet", "InfraspecificRank", "InfraspecificEpithet",
        "Authorship"
    ]

    f.add_line(header)

    # creates the records for each taxon in the list
    for i, taxa in enumerate(taxa_list):
        if i == 0:
            line = ["1"]
        else:
            line = [f"=A{i + 1} + 1"]

        line += [taxa.family]
        line += [prep_field(taxa.subfamily)]
        line += [prep_field(taxa.tribe)]
        line += [prep_field(taxa.genus)]

        # if is a genus the specie will be marked as "sp."
        if taxa.rank == Taxa.Taxa.rank_genus:
            line += ["sp."]
        else:
            line += [prep_field(taxa.specie)]

        line += [prep_field(taxa.subspecie)]
        line += [""]
        line += [""]
        line += [prep_field(taxa.author)]
        line += ["".join(f'{link}, ' for link in taxa.links)[:-2]]

        f.add_line(line)

    f.write()
    logger.log_short_report("Authority file saved file in:" + csv_filename)
Exemplo n.º 10
0
def GetallfeaturesUsers():
    data_disconnect = pd.read_csv('data/dataforperiod/deviceDisconnect.csv')
    data_connect = pd.read_csv('data/dataforperiod/deviceConnect.csv')
    data_file = pd.read_csv('data/dataforperiod/filecount.csv')
    data_logon = pd.read_csv('data/dataforperiod/logon.csv')
    data_logoff = pd.read_csv('data/dataforperiod/logoff.csv')
    #print (type(data))
    allinfo = []
    for item_connect in data_connect.values:
        user_connect = item_connect[0]

        for item_disconnect in data_disconnect.values:
            user_disconnect = item_disconnect[0]
            if user_connect != user_disconnect:
                continue

            for item_file in data_file.values:
                user_file = item_file[0]
                if user_file != user_disconnect:
                    continue

                for item_logon in data_logon.values:
                    user_logon = item_logon[0]
                    if user_logon != user_file:
                        continue

                    for item_logoff in data_logoff.values:
                        user_logoff = item_logoff[0]
                        if user_logoff != user_logon:
                            continue
                        tem = []
                        tem.extend(item_connect[0:9])
                        tem.extend(item_disconnect[1:9])
                        tem.extend(item_file[1:9])
                        tem.extend(item_logon[1:9])
                        tem.extend(item_logoff[1:10])
                        allinfo.append(tem)
    print(len(allinfo))
    print(allinfo)
    CSVFile.Writecsvtofile('data/dataforperiod/Allfeatures.csv', allinfo)
    for item in allinfo:
        print(item)
Exemplo n.º 11
0
import MXD
import CSVFile
import Layer
import Logger

data_source_map_list = CSVFile.csv_to_list()
map_document = MXD.Document()

sde_layers = map_document.retrieve_sde_layers()
repath_candidates = Layer.repath_candidates_list_from_map_layers(sde_layers)

for candidate in repath_candidates:

    matches = [data_source_map for data_source_map in data_source_map_list
               if Layer.is_candidate_data_source_map(data_source_map, candidate)]

    if len(matches) == 1:

        match = matches[0]
        is_repath_success = candidate.repath(match)

        if is_repath_success:
            Logger.log_message("Resolved: " + matches[0][2] + "." + matches[0][3] + " to " +
                               matches[0][1] + " using " + matches[0][0])
        else:

            Logger.log_warning(matches[0][2] + "." + matches[0][3] + " could not be mapped to " +
                               matches[0][1] + " using " + matches[0][0])

    else:
import sys
import CSVFile

file = CSVFile.CSVFile(sys.argv[1])

file.load()

data = file.getColumn(int(sys.argv[2]), float)

print('Maximo: ' + str(max(data)))
print('Minimo: ' + str(min(data)))
                elif i == 1:
                    sql = devicesql
                elif i == 2:
                    sql = filesql
                elif i == 3:
                    sql = httpsql
                else:
                    sql = emailsql
                #print sql
                maxtimeresult = db.querytl(sql)
                #print maxtimeresult[0][0]
                if maxtimeresult[0][0] is None:
                    pass
                else:
                    maxtime.append(maxtimeresult[0][0])
                print maxtime
            userinfo.append(user)
            userinfo.append(max(maxtime))
            userinfo.append(state)
            spamwriter.writerow(userinfo)
        #employees.append(userinfo)
        #print employees

    #return employees


if __name__ == '__main__':
    emplyoees = queryEmployees()
    filename = "./data/allusers/Allusers.csv"
    CSVFile.Writecsvtofile(filename, emplyoees)
Exemplo n.º 14
0
    def getAnalysisResult(self, mainClassPath):

        print("getAnalysisResult(" + mainClassPath + ")")
        pearsonResultPath = os.path.join(mainClassPath, "PearsonResult.csv").replace("\\","/")
        listdir = os.listdir(mainClassPath)

        mainClass = mainClassPath.split("/")[-1]
        #mainClassNum = mainClass[0]
        print("mainClass = " + mainClass)
        #print("mainClassNum = " + mainClassNum)
        mainClassDictValue = self.mainClassDict.get(mainClass, None)
        # if mainClassDictValue is None:
        #     mainClassDictValue = self.mainClassDict.get(mainClass, None)

        if mainClassDictValue is None:
            mainClassDictValue = self.defaultSelectProperty

        print("mainClassDictValue = ", mainClassDictValue)
        if mainClassDictValue is None:
            raise Exception("getSlnFiles(" + mainClass + "): 未正確設定欲做為皮爾森分析的屬性,請正確設定 self.mainClassDict 或 self.defaultSelectProperty !!")

        subClassFolderSet = set()
        com_phenoSet = set()
        for lists in listdir:
            if self.re_SubClass.match(lists):
                subClassFolderSet.add(lists)

            if self.re_Com_Pheno.match(lists):
                com_phenoSet.add(lists)

        subClassCount = len(subClassFolderSet)
        print("subClassCount = " + str(subClassCount))
        if len(com_phenoSet) != 1:
            raise Exception("com_pheno檔案位於 " + mainClassPath + " 路徑下不只一個,請移除或重新命名非必要項目(搜尋規則:com_pheno.*\.[(xlsx)(xls)(csv)])!")

        print("Get Excel File: " + os.path.join(mainClassPath, list(com_phenoSet)[0]).replace("\\","/"))
        excel = Excel(os.path.join(mainClassPath, list(com_phenoSet)[0]).replace("\\","/"), self.blnShowExcel, self.blnForceRestartExcel)

        #excel.get_sheet(1)
        self.getPhoneSheet(excel)

        dataRecoderCount = excel.sheetRowCounts - 1
        print("dataRecoderCount = " + str(dataRecoderCount))

        #randomPickLists = randomPickNGroup(dataRecoderCount, subClassCount)

        randomPickDict = {}

        case = 0
        for subClassFolder in subClassFolderSet:
            randomPickList = []
            case+=1
            phone_v_Path = os.path.join(mainClassPath, "pheno_" + str(subClassFolder) + ".csv").replace("\\","/")
            print("Read phone_v_File(" + phone_v_Path + ")")
            csv = CSVFile(phone_v_Path, decoding="utf-8")
            #print(csv.readToString())
            list2D = csv.readTo2DList(",")
            #print list2D
            #print "-------------------------------------"

            mappingColIdx = None
            if is_integer(self.pheno_MappingColId):
                mappingColIdx = self.pheno_MappingColId - 1
            else:
                m = -1
                for colName in list2D[0]:
                    m+=1
                    if colName.strip().lower() == self.pheno_MappingColId.strip().lower():
                        mappingColIdx = m

            #print("mappingColIdx = " + str(mappingColIdx))

            list2D.pop(0)
            #print list2D[0]
            for line in list2D:
                #print(line)
                #print(line[0])'
                if line[mappingColIdx] != "":
                    data = line[mappingColIdx].lower()
                    randomPickList.append(data)

            randomPickDict[subClassFolder] = randomPickList
            pass

        print ("randomPickDict = ", randomPickDict)
        for (k, randomList) in randomPickDict.items():
            print (len(randomList))

        colNamesString = list2str(excel.get_rowData(1))
        #range1 = excel.get_range(1, 1, 1 , excel.sheetColCounts)
        print("list2str=", colNamesString)

        #case = 0

        #subClassesResult = [] #[v1, v2, v3, ...]
        subClassesResult = {} #[v1, v2, v3, ...]
        for subClass in subClassFolderSet:
            result = self.getSlnFiles(excel, mainClassDictValue, mainClassPath, subClass, randomPickDict[subClass])
            #subClassesResult.append(result)
            subClassesResult[subClass] = result
            #case+=1

        print subClassesResult
        excel.close()
        print("++++++++++++++++++++++++++++++++++++++++++++++++")

        csv = CSVFile(pearsonResultPath, "utf-8")
        classCaseNum = 0
        #for slnFilesResult  in subClassesResult:
        for (subClassName, slnFilesResult)  in subClassesResult.items():
            #classRecoderSize = randomPickLists[classCaseNum]
            classRecoderSize = len(randomPickDict[subClassName])


            for lineNum in range(1, classRecoderSize+2):
                strLine = ""
                for sectionNum in range(1, len(mainClassDictValue)+1):
                    #print(slnFilesResult[sectionNum-1][lineNum-1])
                    strLine = strLine + list2str(slnFilesResult[sectionNum-1][lineNum-1]) + ",,"

                csv.writeLine(strLine)

            csv.writeLine("")
            classCaseNum+=1

        #subClassNum=0
        subClassResultDict = OrderedDict()
        for (subClassName, slnFilesResult)  in subClassesResult.items():
            #subClassNum+=1

            slnClassNum=0
            slnResultDict = OrderedDict()
            #subClass_v1
            v_type = slnFilesResult[0][0][2][9:]
            for slnFile in slnFilesResult:
                slnClassNum+=1
                pearsonr_list1 = []
                pearsonr_list2 = []
                #type_9
                select_Type = slnFile[0][5][5:]

                slnFile.pop(0)
                for lineData in slnFile:
                    print("lineData = ", lineData)
                    pearsonr_list1.append(lineData[4])
                    pearsonr_list2.append(lineData[5])

                print("pearsonr_list1(v_type=" + str(v_type) +  ", slnClassNum=" + str(slnClassNum) + ", select_Type=" + str(select_Type) + ") : " , pearsonr_list1)
                print("pearsonr_list2(v_type=" + str(v_type) +  ", slnClassNum=" + str(slnClassNum) + ", select_Type=" + str(select_Type) + ") : " , pearsonr_list2)

                pearson_result = pearsonr(pearsonr_list1, pearsonr_list2)
                pearsonr_list1[:] = []
                pearsonr_list2[:] = []
                print("pearsonr_result(v_type=" + str(v_type) +  ", slnClassNum=" + str(slnClassNum) + ", select_Type=" + str(select_Type) + ") : " + str(pearson_result))
                if str(select_Type) in slnResultDict:
                    raise Exception("select_Type 重複,請檢查 self.mainClassDict 或 self.defaultSelectProperty 是否有重複設定!!")
                slnResultDict[str(select_Type)] = pearson_result

            subClassResultDict[str(v_type)] = slnResultDict

        return subClassResultDict
Exemplo n.º 15
0
    def getPhenoResult(self, mainClassPath):
        print("getPhenoResult(" + mainClassPath + ")")
        listdir = os.listdir(mainClassPath)

        mainClass = mainClassPath.split("/")[-1]
        print("mainClass = " + mainClass)

        #subClassFolderSet = set()
        com_phenoSet = set()
        for lists in listdir:
            # if self.re_SubClass.match(lists):
            #     subClassFolderSet.add(lists)

            if self.re_Com_Pheno.match(lists):
                com_phenoSet.add(lists)

        subClassFolderList = self.mainClass_subClassMap.get(mainClass, None)

        if subClassFolderList is None:
            subClassFolderList = self.defaultSubClassList

        subClassCount = len(subClassFolderList)
        print("subClassCount = " + str(subClassCount))
        if len(com_phenoSet) != 1:
            raise Exception("com_pheno檔案位於 " + mainClassPath + " 路徑下不只一個,請移除或重新命名非必要項目(搜尋規則:com_pheno.*\.[(xlsx)(xls)(csv)])!")

        print("Get Excel File: " + os.path.join(mainClassPath, list(com_phenoSet)[0]))
        excel = Excel(os.path.join(mainClassPath, list(com_phenoSet)[0]).replace("\\","/"), self.blnShowExcel, self.blnForceRestartExcel)

        #excel.get_sheet(1)
        self.getPhoneSheet(excel)

        dataRecoderCount = excel.sheetRowCounts - 1
        print("dataRecoderCount = " + str(dataRecoderCount))

        randomPickLists = randomPickNGroup(dataRecoderCount, subClassCount)
        print ("randomPickList = ", randomPickLists)
        for randomList in randomPickLists:
            print (len(randomList))

        colNamesList = excel.get_rowData(1)
        colNamesString = list2str(colNamesList)
        #range1 = excel.get_range(1, 1, 1 , excel.sheetColCounts)
        print("list2str=", colNamesString)

        case = 0
        #for randomList in randomPickLists:
        for subClassFolder in subClassFolderList:

            phenoPath =  os.path.join(mainClassPath, "pheno_" + str(subClassFolder) + ".csv").replace("\\","/")
            phenoExceptPath = os.path.join(mainClassPath, "pheno_except_" + str(subClassFolder) + ".csv").replace("\\","/")

            csv = CSVFile(phenoPath, "utf-8")
            csv.writeLine(colNamesString)

            print("len(randomPickLists[case] = ", len(randomPickLists[case]))
            for num in randomPickLists[case]:

                rowData = excel.get_rowData(num+1)
                rowData = list(rowData)

                #if not self.isExcelFirstRowFloat:
                for colNum in self.com_pheno_NotFloatCols:
                    mappingColIdx = None
                    if is_integer(colNum):
                        #print("is_integer")
                        mappingColIdx = colNum - 1
                    else:
                        #print("is not integer")
                        m = -1
                        for colName in colNamesList:
                            #print(colName, colNum)
                            m+=1
                            if str(colNum).strip().lower() == colName.strip().lower():
                                mappingColIdx = m

                    #print("mappingColIdx = " + str(mappingColIdx))

                    try:
                        rowData[mappingColIdx] = int(rowData[mappingColIdx])
                    except Exception:
                        print("Change Excel Value Warning(int(rowData[" + str(mappingColIdx) + "])): " + str(rowData[mappingColIdx]), "You can check self.com_pheno_NotFloatCols, ignore this warning  if expected !!")
                        rowData[mappingColIdx] = str(rowData[mappingColIdx])


                csv.writeLine(list2str(rowData))

            csv = CSVFile(phenoExceptPath, "utf-8")
            csv.writeLine(colNamesString)

            differenceList = []
            i = 0
            for it in randomPickLists:
                if i != case:
                    differenceList.extend(it)
                i+=1

            print("differenceList", differenceList)
            print("differenceList len = " + str(len(differenceList)))


            for num in differenceList:
                rowData = excel.get_rowData(num+1)
                rowData = list(rowData)

                #if not self.isExcelFirstRowFloat:
                for colNum in self.com_pheno_NotFloatCols:
                    mappingColIdx = None
                    if is_integer(colNum):
                        #print("is_integer")
                        mappingColIdx = colNum - 1
                    else:
                        #print("is not integer")
                        m = -1
                        for colName in colNamesList:
                            #print(colName, colNum)
                            m+=1
                            if str(colNum).strip().lower() == colName.strip().lower():
                                mappingColIdx = m

                    #print("mappingColIdx = " + str(mappingColIdx))

                    try:
                        rowData[mappingColIdx] = int(rowData[mappingColIdx])
                    except Exception:
                        print("Change Excel Value Warning(int(rowData[" + str(mappingColIdx) + "])): " + str(rowData[mappingColIdx]), "You can check self.com_pheno_NotFloatCols, ignore this warning  if expected !!")
                        rowData[mappingColIdx] = str(rowData[mappingColIdx])


                csv.writeLine(list2str(rowData))

            case+=1

        excel.close()
Exemplo n.º 16
0
def GetweekconnectUser():
    connectuser = CSVFile.loadCSVfile1('./data/deviceConnect.csv')
    #weekuser = CSVFile.loadCSVfile1('./data/allusers/weekdayvar.csv')
    #weekuser = CSVFile.loadCSVfile1('./data/allusers/weekpro.csv')
    weekuser = CSVFile.loadCSVfile1('./data/allusers/MM_week_result.csv')

    userinfo = []
    for citem in connectuser:
        cuser = citem[0]
        for witem in weekuser:
            wuser = witem[0]
            if cuser != wuser:
                continue
            userinfo.append(witem)
    print len(userinfo)
    print userinfo
    #CSVFile.Writecsvtofile('./data/allusers/MM_connect_week.csv',userinfo)

    FTPR = []
    TPR = []
    FPR = []
    thr = 0
    a = 0
    b = 1
    userinfo = weekuser
    for i in range(6000):
        #for i in range(15000):
        tp = 0
        fp = 0
        tn = 0
        fn = 0
        for item in userinfo:
            state = int(item[1])
            # print state
            prestate = float(item[2])
            # print prestate

            if state == 0:
                # normalavg.append(prestate)
                if prestate > thr:
                    tn = tn + 1
                    #fp = fp + 1

                else:
                    fp = fp + 1
                    #tn = tn + 1
            if state == 1:
                # abnormalavg.append(prestate)
                if prestate > thr:
                    fn = fn + 1
                    #tp = tp + 1
                else:
                    tp = tp + 1
                    #fn = fn + 1
        if (tp + fn) == 0 or (fp + tn) == 0:
            thr = thr + 0.1
            continue
        tpr = float(tp) / (float(tp) + float(fn))
        fpr = float(fp) / (float(fp) + float(tn))
        print fpr, tpr

        TPR.append(tpr)
        FPR.append(fpr)
        thr = thr + 0.1
        '''
        if i<10:
            i = 0.1*i
        if i < 100 and i>10:
            thr = i
        if i >= 100 and i <= 10000:
            thr = i * 100
        if i > 10000  and i<40000:
            thr = i * 1000
            # thr=i*100
        if i > 40000:
            thr = i * 100000
        '''
    '''
    p = float(tp)/(float(tp)+float(fp))
    frp = float(fp)/(float(fp)+float(tn))
    r = float(tp)/(float(tp)+float(fn))
    print"tp:",tp,"fp:",fp,"tn:",tn,"fn:",fn
    print "p:",p,"frp:",frp,"r:",r
    '''
    # list.sort(normalavg)
    # list.sort(abnormalavg)
    #CSVFile.Writecsvtofile('./data/allusers/weekconnetproFT.CSV',)
    data = pd.DataFrame({'FPR': TPR, 'TPR': FPR})
    data.to_csv('./data/allusers/result/ROC_data/MM_weekconnectpro.csv')

    plt.figure()
    plt.plot(TPR, FPR, 'r')
    x = [0, 1]
    y = x
    plt.plot(x, y, 'b')
    plt.show()
Exemplo n.º 17
0
'''
points = [
     [1, 2],
     [2, 1],
     [3, 1],
     [5, 4],
     [5, 5],
     [6, 5],
     [10, 8],
     [7, 9],
     [11, 5],
     [14, 9],
     [14, 14],
     ]
'''
datalist = CSVFile.loadCSVfile1('./data/allusers/userpro1.csv')
state = []
points = []
for item in datalist:
    tem = []
    #print item[1]
    state.append(item[1])
    for i in item[2:42]:
        if len(i) > 0:
            tem.append(float(i))
        else:
            tem.append(0)

    points.append(tem)
    print len(tem)
    print tem
Exemplo n.º 18
0
        lists['PERCENTPESQS'].append(float(b / t))
        lists['PERCENTRANK'].append(float(r) / maximo)

    return lists


def getNiveis(listaBolsistas):
    niveis = {'SR': [], '1A': [], '1B': [], '1C': [], '1D': [], '2': []}

    for bolsista, nivel in listaBolsistas:
        niveis[nivel].append(bolsista)

    return niveis


todos = CSVFile.CSVFile(sys.argv[1])
bolsistas = CSVFile.CSVFile(sys.argv[2])

todos.load()
bolsistas.load()

listaBolsistas = bolsistas.getColumns([(2, int), (4, str)])

niveis = getNiveis(listaBolsistas)

n1 = niveis['1A'] + niveis['1B'] + niveis['1C'] + niveis['1D']
codes = n1 + niveis['SR'] + niveis['2']

codes = set(codes)

PR = contaRank(todos, codes, 2)
Exemplo n.º 19
0
def GetuserproROC():
    '''
    userpro = CSVFile.loadCSVfile1("./data/allusers/userpro.csv")
    weekpro = []
    zeronumber = 0
    for item in userpro:
        user = item[0]
        state = item[1]
        pro =0.0
        productpro = 1.0
        userinfo = []

        for i in range(8):
            if i ==1 or i==0 or len(item[i])<1:
                continue
            pro = float(item[i])
            if (pro<=0):

                zeronumber  = zeronumber+1
                print "*******************************************************************************Error!!"
                continue
            print "pro:",pro
            productpro = (-math.log(pro))*productpro
            print "productpro:",productpro

        userinfo .append(user)
        userinfo.append(state)
        userinfo.append(productpro)
        weekpro.append(userinfo)
    print "zeronumber:",zeronumber
    print weekpro
    CSVFile.Writecsvtofile("./data/allusers/weekpro.csv",weekpro)
    normalavg = []
    abnormalavg = []
    '''
    weekpro = CSVFile.loadCSVfile1("./data/allusers/weekpro.csv")
    TPR = []
    FPR = []
    thr=0
    a =0
    b =1
    for i in range(11000):
        tp = 0
        fp = 0
        tn = 0
        fn = 0
        for item in weekpro:
            state = int(item[1])
            # print state
            prestate = float(item[2])
            # print prestate

            if state == 0:
                #normalavg.append(prestate)
                if prestate>thr:
                    fp = fp +1
                else:
                    tn = tn+1
            if state == 1:
                #abnormalavg.append(prestate)
                if prestate>thr:
                    tp = tp+1
                else:
                    fn = fn+1

            if (tp+fn)==0 or (fp+tn)==0:
                continue
            tpr = float(tp)/(float(tp)+float(fn))
            fpr = float(fp)/(float(fp)+float(tn))
            print tpr,fpr
        TPR.append(tpr)
        FPR.append(fpr)
        if i<100:
            thr = i
        if i>=100 and i<=10000:
            thr = i*100
        if i>10000:
            thr = i*1000
        #thr=i*100



    '''
    p = float(tp)/(float(tp)+float(fp))
    frp = float(fp)/(float(fp)+float(tn))
    r = float(tp)/(float(tp)+float(fn))
    print"tp:",tp,"fp:",fp,"tn:",tn,"fn:",fn
    print "p:",p,"frp:",frp,"r:",r
    '''
    #list.sort(normalavg)
    #list.sort(abnormalavg)

    plt.figure()
    plt.plot(FPR, TPR, 'r')
    x=[0,1]
    y=x
    plt.plot(x,y,'r')
    plt.show()
Exemplo n.º 20
0
def Gettraintestdata(sequenlen):
    k = sequenlen
    #k = 4
    employees = Employees.queryEmployees()
    count = 0
    result = []
    count = 0
    validusers = []
    invalidusers = []
    for item in employees:
        user = item[0]
        singleFile = "./data/allusers/SingleSequence/" + user + ".csv"
        list = CSVFile.loadCSVfile1(singleFile)
        maxtime = list[-1][0]
        mintime = list[0][0]
        maxtime = datetime.datetime.strptime(maxtime, '%Y %j %H:%M:%S')
        mintime = datetime.datetime.strptime(mintime, '%Y %j %H:%M:%S')
        middletime = maxtime - relativedelta(months=2)

        if mintime > (middletime - relativedelta(months=3)):
            invalidusers.append(user)
            continue
            #if mintime > (maxtime - relativedelta(months = 1)):
            #   middletime  = mintime+relativedelta(days =20)
            #else:
            #   middletime = mintime + relativedelta(months=1)

        TrainUserSequence = []
        TestUserSequence = []
        for i in (range(len(list) - k)):
            sequ = ''
            state = ''
            time = datetime.datetime.strptime(list[i][0], '%Y %j %H:%M:%S')

            if time < middletime:
                for j in range(k):
                    sequ = sequ + list[i + j][1] + '-'
                    state = state + str(list[i + j][1])[0:2]
                tem = [sequ, state]
                TrainUserSequence.append(tem)
                print "time<middletime", time < middletime, "TrainUserSequence"
            if time >= middletime:
                for j in range(k):
                    sequ = sequ + list[i + j][1] + '-'
                    state = state + str(list[i + j][1])[0:2]
                tem = [sequ, state]
                TestUserSequence.append(tem)
                print "time>middletime", time >= middletime, "TestUserSequence"
        #validusers.append(user)
        print count, user
        count = count + 1
        trainFile = "./data/allusers/Train/" + user + ".csv"
        testFile = "./data/allusers/Test/" + user + ".csv"
        CSVFile.Writecsvtofile(trainFile, TrainUserSequence)
        CSVFile.Writecsvtofile(testFile, TestUserSequence)

    allusers = CSVFile.loadCSVfile1("./data/allusers/Allusers_state.csv")
    resultlist = []
    for invalid in invalidusers:
        for i in range(len(allusers)):
            if invalid == allusers[i][0]:
                del (allusers[i])
                break

    CSVFile.Writecsvtofile("./data/allusers/validusers.csv", allusers)
Exemplo n.º 21
0
    family_name = "Nepticulidae"

    ass = Associations()

    subfamilies, tribes = ass.find_associations(family_name)

    print("--- subfamilies ---")
    for sub in subfamilies:
        print(sub)

    print("--- tribes ---")
    for tribe in tribes:
        print(tribe)

    csv = CSVFile.CSVFile("./funet/" + family_name + "_subfamiles_tribes.csv")

    csv.add_line(["Subfamily", "Tribe", "Genus"])

    tot_genus = []

    for sub in subfamilies:
        for genus in sub.associates:
            tot_genus.append(genus)

    for tribe in tribes:
        for genus in tribe.associates:
            if genus not in tot_genus:
                tot_genus.append(genus)

    for genus in tot_genus:
Exemplo n.º 22
0
def GetMMweekdayacv_PR_XY():
    weekpro = CSVFile.loadCSVfile1('./data/allusers/connect_weekdayvar.csv')
    TPR = []
    FPR = []
    TRP = []
    RECALL = []
    thr = 0
    trp = 0
    recall = 0
    r = 0.001
    for i in range(9000):
        tp = 0
        fp = 0
        tn = 0
        fn = 0
        for item in weekpro:
            state = int(item[1])
            # print state
            prestate = float(item[2])
            # print prestate

            if state == 0:
                # normalavg.append(prestate)
                if prestate < thr:
                    fp = fp + 1
                else:
                    tn = tn + 1
            if state == 1:
                # abnormalavg.append(prestate)
                if prestate < thr:
                    tp = tp + 1
                else:
                    fn = fn + 1

        #if (tp + fn) == 0 or (fp + tn) == 0:
        if (tp + fp) == 0 or (fp + tn) == 0:
            if i < 4000:
                thr = thr + r
            if i >= 4000:
                thr = thr + 0.01
            print "*******************"
            continue
        #tpr = float(tp) / (float(tp) + float(fn))
        #fpr = float(fp) / (float(fp) + float(tn))
        trp = float(tp) / (float(tp) + float(fp))
        recall = float(tp) / (float(tp) + float(fn))
        #print "trp:",trp,"recall:",recall
        #print tpr, fpr
        #TPR.append(tpr)
        #FPR.append(fpr)
        TRP.append(trp)
        RECALL.append(recall)

        if i < 4000:
            thr = thr + r
        if i >= 4000:
            thr = thr + 0.01

    data = pd.DataFrame()
    data['RECALL'] = RECALL
    data['TRP'] = TRP
    data.to_csv('./data/allusers/result/PR_data/connectweekday_avg_pr.csv')
    plt.figure()
    #plt.plot(FPR, TPR, 'r')
    plt.plot(RECALL, TRP, 'r')
    x = [0, 1]
    y = [1, 0]
    plt.plot(x, y, 'b')
    plt.show()
import sys
import CSVFile

bolsistas = CSVFile.CSVFile(sys.argv[1])

bolsistas.load()

ranks = bolsistas.getColumn(3, int)

top100 = 0
top1k = 0
top10k = 0

for rank in ranks:
    if rank <= 10000:
        top10k = top10k + 1

        if rank <= 1000:
            top1k = top1k + 1

            if rank <= 100:
                top100 = top100 + 1

print("Top 100: " + str(top100))
print("Top 1k: " + str(top1k))
print("Top 10k: " + str(top10k))
import sys
import CSVFile
from operator import itemgetter as getter

def calculateRank(todos, codes):
	for pesq in todos:
		

todos = CSVFile.CSVFile(sys.argv[1])
bolsistas = CSVFile.CSVFile(sys.argv[2])

todos.load()
bolsistas.load()

codes = bolsistas.getColumn(1, int)

todosRankeados = CSVFile.CSVFile()
Exemplo n.º 25
0
def plotresult():
    avgresult = CSVFile.loadCSVfile1('./data/allusers/AvgResultPro974.csv')
    resultpro = CSVFile.loadCSVfile1('./data/allusers/ResultPro974.csv')
    normalavg =[]
    abnormalavg = []
    normalcov =[]
    abnormalcov = []

    for item in avgresult:
        state = int(item[2])
        #print state
        prestate = float(item[1])
        #print prestate
        if state == 0:
            normalavg.append(prestate)
        if state == 1:
            abnormalavg.append(prestate)

    list.sort(normalavg)
    list.sort(abnormalavg)

    for item in resultpro:

        state = int(item[1])
        prolist = []
        for number in item[2:]:
            if len(number)>0:
                prolist.append(float(number))
                #print float(number)
        prestate = float(cov(prolist))
        print prestate
        if state == 0:
            normalcov.append(prestate)
        if state == 1:
            abnormalcov.append(prestate)


#<<<<<<< HEAD:main.py
    print normalcov
    print abnormalcov
#=======
 #       result = H.hmm(usersequence)
  #      print employee,average(result),user[2]
  #      prosqu = str(result)
#>>>>>>> 34c6dfe6197febd37f54b0a34e607135f3d0d8e2:GetHMMresult.py

    list.sort(normalcov)
    list.sort(abnormalcov)


    plt.figure()
    x1 = range(len(normalcov))
    x2 = range(len(abnormalcov))
    #y = result[:1000]
    #x = [0,1]
    plt.ylim(0, 0.3)
    # print x
    # y=[0.1,0.2,0.3,0.4,0.5]
    plt.plot(x2,abnormalcov,'r')
    plt.plot(x1, normalcov, 'b')
    # plt.plot(x, result)
    #plt.legend()
    plt.show()

    '''