Пример #1
0
    def convSim(self, ifile, ofile, map1, logDir):

        f = None
        f <<= nm.mcut(nfni=True, f="0:tra", i=ifile)
        f <<= nm.msed(f="tra", c=' $', v="")
        f <<= nm.mnumber(q=True, S=1, a="num1")
        f <<= nm.mtra(r=True, f="tra:num11")
        f <<= nm.mnumber(q=True, S=1, a="order")
        f <<= nm.mcal(c='${num11}+1', a="num1")
        f <<= nm.mjoin(k="num1", m=map1, f=self.ef1)
        f <<= nm.mtra(k="num0", s="order%n,num1%n", f=self.ef1)
        f <<= nm.mcut(f=self.ef1, o="{}/{}".format(logDir, ofile))
        f.run()
Пример #2
0
    def pair2tra(self, ei, ef1, ef2, traFile, mapFile1, mapFile2):

        f1 = None
        f1 <<= nm.mcut(f="{}:node1".format(self.ef1), i=self.ei)
        f1 <<= nm.mdelnull(f="node1")
        f1 <<= nm.muniq(k="node1")
        f1 <<= nm.mnumber(s="node1", a="num1", o=mapFile1)

        f2 = None
        f2 <<= nm.mcut(f="{}:node2".format(self.ef2), i=self.ei)
        f2 <<= nm.mdelnull(f="node2")
        f2 <<= nm.muniq(k="node2")
        f2 <<= nm.mnumber(s="node2", a="num2", o=mapFile2)

        f3 = None
        f3 <<= nm.mcut(f="{}:node1,{}:node2".format(self.ef1, self.ef2),
                       i=self.ei)
        f3 <<= nm.mjoin(k="node1", m=f1, f="num1")
        f3 <<= nm.mjoin(k="node2", m=f2, f="num2")
        f3 <<= nm.mcut(f="num1,num2")
        f3 <<= nm.msortf(f="num1,num2%n")
        f3 <<= nm.mtra(k="num1", s="num2%n", f="num2")
        f3 <<= nm.msortf(f="num1%n")
        f3 <<= nm.mcut(f="num2", nfno=True, o=traFile)
        f3.run()
        os.system("cat " + traFile)
Пример #3
0
    def edge2mtx(self, ei, itra, map1, map2):

        p1 = nm.mcut(f=self.ef1, i=ei)
        p1 <<= nm.muniq(k=self.ef1)
        p1 <<= nm.mdelnull(f=self.ef1)
        p1 <<= nm.mnumber(q=True, a="num1", S=1, o=map1)

        p2 = nm.mcut(f=self.ef2, i=ei)
        p2 <<= nm.muniq(k=self.ef2)
        p2 <<= nm.mdelnull(f=self.ef2)
        p2 <<= nm.mnumber(q=True, a="num2", S=1, o=map2)

        runp = None
        runp <<= nm.mcut(f=[self.ef1, self.ef2], i=ei)
        runp <<= nm.mjoin(k=self.ef1, m=p1, f="num1")
        runp <<= nm.mjoin(k=self.ef2, m=p2, f="num2")
        runp <<= nm.mcut(f="num1,num2")
        runp <<= nm.mtra(k="num1", f="num2")
        runp <<= nm.msortf(f="num1%n")

        runp <<= nm.mcut(f="num2", nfno=True)
        runp <<= nm.cmd("tr ',' ' '")
        runp <<= nm.mwrite(o=itra)
        #runp <<= nm.mcut(f="num2",nfno=True,o=wff1)
        runp.run()
Пример #4
0
    def __init__(self, db, outtf=True):
        self.size = None
        self.pFile = None
        self.tFile = None
        self.temp = mtemp.Mtemp()
        self.db = db  # 入力データベース
        self.file = self.temp.file()
        self.outtf = outtf
        self.weightFile = {}
        self.posWeight = {}
        self.sigma = {}
        self.msgoff = True

        items = self.db.items
        for cName, posSize in db.clsNameRecSize.items():
            self.weightFile[cName] = self.temp.file()
            self.posWeight[cName] = self.calOmega(posSize)
            f = nm.mcut(nfno=True, f=self.db.clsFN, i=self.db.cFile)
            f <<= nm.mchgstr(nfn=True,
                             f=0,
                             O=-1,
                             o=self.weightFile[cName],
                             c="%s:%s" % (cName, self.posWeight[cName]))
            f.run()

        # アイテムをシンボルから番号に変換する。
        f = nm.mjoin(k=self.db.itemFN,
                     K=items.itemFN,
                     i=self.db.file,
                     m=items.file,
                     f=items.idFN)
        f <<= nm.mcut(f=self.db.idFN + "," + items.idFN)
        f <<= nm.mtra(k=self.db.idFN, f=items.idFN)
        f <<= nm.mcut(f=items.idFN, nfno=True, o=self.file)
        f.run()
Пример #5
0
def toNum():
    for size in ["all"]:
        iFile = "%s/online_all.csv" % datPath
        oFile1 = "%s/onlineT_all.csv" % datPath  # data for Take.core
        oFile2 = "%s/onlineO_all.basket" % datPath  # data for Orange
        oFile3 = "%s/onlineM_all.csv" % datPath  # data for Take

        f = None
        f <<= nm.mcut(f="InvoiceNo,StockCode", i=iFile)
        f <<= nm.muniq(k="InvoiceNo,StockCode")
        f <<= nm.mfldname(q=True, o=oFile3)
        f.run(msg=debug)

        st = None
        st <<= nm.mcut(f="StockCode", i=iFile)
        st <<= nm.muniq(k="StockCode")
        st <<= nm.mnumber(s="StockCode", a="num")
        f = None
        f <<= nm.mjoin(k="StockCode", m=st, f="num", i=iFile)
        f <<= nm.mcut(f="InvoiceNo,num:StockCode")
        f <<= nm.mtra(k="InvoiceNo", f="StockCode")
        f <<= nm.mcut(f="StockCode", nfno=True, o=oFile1)
        f.run(msg=debug)

        os.system("tr ' ' ',' <%s >%s" % (oFile1, oFile2))
Пример #6
0
	def __init__(self,db):
		self.db=db
		self.eArgs=None
		self.type =None
		self.minCnt=None
		self.minSup=None
		self.maxCnt=None
		self.maxSup=None
		self.minLen=None
		self.maxLen=None
		self.top =None
		self.skipTP=False

		#self.size =None
		self.pFile =None
		self.tFile =None
		self.msgoff = True

		self.temp=mtemp.Mtemp()
		self.db = db # 入力データベース
		self.file=self.temp.file()
		items=self.db.items

		# アイテムをシンボルから番号に変換する。
		f =   nm.mjoin(k=self.db.itemFN,K=items.itemFN,m=items.file,f=items.idFN,i=self.db.file)
		f <<= nm.mcut(f=self.db.idFN+","+items.idFN)
		f <<= nm.mtra(k=self.db.idFN,f=items.idFN)
		f <<= nm.mcut(f=items.idFN,nfno=True,o=self.file)
		f.run()
Пример #7
0
    def calGsize(self, file):
        edgesize = nu.mrecount(i=file, nfni=True)

        f = None
        f <<= nm.mcut(i=file, f="0:tra", nfni=True)
        f <<= nm.mtra(f="tra")
        f <<= nm.muniq(k="tra")
        f <<= nm.mcount(a="cnt")
        f <<= nm.mcut(f="cnt")
        f <<= nm.writelist(dtype="cnt:int")
        nodesize = f.run()[0][0]

        return nodesize, edgesize
Пример #8
0
 def __init__(self, db, outtf=True):
     self.size = None
     self.temp = mtemp.Mtemp()
     self.db = db  # 入力データベース
     self.file = self.temp.file()
     items = self.db.items
     self.outtf = outtf
     self.top = None
     self.msgoff = True
     # アイテムをシンボルから番号に変換する。
     f = nm.mjoin(k=self.db.itemFN,
                  K=items.itemFN,
                  m=items.file,
                  f=items.idFN,
                  i=self.db.file)
     f <<= nm.mcut(f=self.db.idFN + "," + self.db.timeFN + "," + items.idFN)
     f <<= nm.mtra(k=self.db.idFN, s=self.db.timeFN + "%n", f=items.idFN)
     f <<= nm.mcut(f=items.idFN, nfno=True, o=self.file)
     f.run()
Пример #9
0
	def __init__(self,iFile,itemFN,taxoFN):
		# アイテムの項目名(=>String)
		self.itemFN = None

		# 分類の項目名(=>String)
		self.taxoFN = None

		# アイテムの種類数(=>Fixnum)
		self.itemSize = None

		# 分類の種類数(=>Fixnum)
		self.taxoSize = None

		# taxonomyデータファイル名(=>String)
		self.file = None

		self.temp = mtemp.Mtemp()

		self.iFile  = iFile

		self.iPath  = os.path.abspath(self.iFile)
		self.itemFN  = itemFN
		self.taxoFN  = taxoFN

		# item順に並べ替えてpathに書き出す
		self.file = self.temp.file()
		para_it = self.itemFN +"," + self.taxoFN
		nm.mcut(f=para_it,i=self.iFile).muniq(k=para_it,o=self.file).run(msg="on")
	

		f = nm.mcut(f=self.itemFN,i=self.iFile)
		f <<= nm.mtrafld(f=self.itemFN,a="__fld",valOnly=True)
		f <<= nm.mtra(f="__fld",r=True)
		f <<= nm.muniq(k="__fld")
		f <<= nm.mcount(a="size")
		f <<= nm.mcut(f="size")
		xx1 = f.run()

		self.itemSize = int(xx1[0][0])

		xx2 = nm.mcut(f=self.taxoFN+":item",i=self.file).muniq(k="item").mcount(a="size").mcut(f="size").run()
		self.taxoSize = int(xx2[0][0])
Пример #10
0
    def convRsl(self, ifile, ofile, map1, map2, logDir=None):

        # 上記iterationで収束したマイクロクラスタグラフを元の節点文字列に直して出力する
        #MCMD::msgLog("converting the numbered nodes into original name ...")
        f = None
        f <<= nm.mcut(nfni=True, f="0:tra", i=ifile)
        f <<= nm.msed(f="tra", c=' $', v="")
        f <<= nm.mnumber(q=True, S=1, a="num1")
        f <<= nm.mtra(r=True, f="tra:num2")
        f <<= nm.mjoin(k="num2", m=map2, f=self.ef2)
        f <<= nm.mjoin(k="num1", m=map1, f=self.ef1)
        f <<= nm.msortf(f="num1%n,num2%n")
        f <<= nm.mcut(f=[self.ef1, self.ef2])

        if logDir:
            f <<= nm.mfldname(q=True, o="{}/#{ofile}".format(logDir, ofile))
        else:
            f <<= nm.mfldname(q=True, o=ofile)

        f.run()
Пример #11
0
	def __init__(self,db,outtf=True):
		self.size  = None
		self.msgoff = True

		self.temp  = nu.Mtemp()
		self.db    = db # 入力データベース
		self.file  = self.temp.file()
		self.outtf = outtf
		items      = self.db.items

		# 重みファイルの作成
		# pos,negのTransactionオブジェクトに対してLCMが扱う整数アイテムによるトランザクションファイルを生成する。
		# この時、pos,negを併合して一つのファイルとして作成され(@wNumTraFile)、
		# 重みファイル(@weightFile[クラス])の作成は以下の通り。
		# 1.対象クラスをpos、その他のクラスをnegとする。
		# 2. negの重みは-1に設定し、posの重みはcalOmegaで計算した値。
		# 3.@wNumTraFileの各行のクラスに対応した重みデータを出力する(1項目のみのデータ)。
		self.weightFile = {}
		self.posWeight  = {}
		self.sigma      = {}

		for cName,posSize in db.clsNameRecSize.items(): 
			self.weightFile[cName] = self.temp.file()
			self.posWeight[cName]  = self.calOmega(posSize)
			cpara = "%s:%s"%(cName,self.posWeight[cName])
			nm.mcut(nfno=True,f=self.db.clsFN,i=self.db.cFile).mchgstr(nfn=True,f=0,O=-1,o=self.weightFile[cName],c=cpara).run()


		# アイテムをシンボルから番号に変換する。
		f=None
		f <<= nm.mjoin(k=self.db.itemFN,K=items.itemFN,m=items.file,f=items.idFN,i=self.db.file)
		f <<= nm.mcut(f=self.db.idFN+","+self.db.timeFN+","+items.idFN)
		f <<= nm.msortf(f=self.db.idFN+","+self.db.timeFN+"%n")
		f <<= nm.mtra(k=self.db.idFN,f=items.idFN)
		f <<= nm.mcut(f=items.idFN,nfno=True,o=self.file)
		f.run()
Пример #12
0
    def run(self):

        tempW = mtemp.Mtemp()

        xxtra = tempW.file()
        xxmap1 = tempW.file()
        xxmap2 = tempW.file()
        lcmout = tempW.file()

        xxt0 = tempW.file()
        xxp0 = tempW.file()
        xx3t = tempW.file()
        xx4t = tempW.file()

        self.pair2tra(self.ei, self.ef1, self.ef2, xxtra, xxmap1, xxmap2)

        runPara = {}
        runPara["type"] = "CIf"
        runPara["sup"] = 1
        runPara["o"] = lcmout
        runPara["i"] = xxtra

        if self.minSize2:
            runPara["l"] = self.minSize2
        if self.maxSize2:
            runPara["u"] = self.maxSize2

        extTake.lcm(runPara)
        extTake.lcmtrans(lcmout, "p", xxt0)

        f = None
        f <<= nm.mdelnull(f="pattern", i=xxt0)
        f <<= nm.mvreplace(vf="pattern", m=xxmap2, K="num2", f="node2")
        f <<= nm.mcut(f="pid,pattern,size:size2")
        f <<= nm.mvsort(vf="pattern")
        f <<= nm.msortf(f="pid")

        if self.byedge:
            f_e0 = nm.mtra(f="pattern", i=f, r=True)
            extTake.lcmtrans(lcmout, "t", xx3t)

            f_e1 = None
            f_e1 <<= nm.mjoin(k="__tid", m=xxmap1, f="node1", K="num1", i=xx3t)
            f_e1 <<= nm.msortf(f="pid")
            ## xx2
            f_e2 = None
            f_e2 <<= nm.mcount(k="pid", a="size1", i=f_e1)
            f_e2 <<= nm.mselnum(f="size1",
                                c="[{},{}]".format(self.minSize1,
                                                   self.maxSize1))

            f_e3 = None
            f_e3 <<= nm.mjoin(k="pid", m=f_e2, f="size1", i=f_e1)
            f_e3 <<= nm.mnjoin(k="pid", m=f_e0, f="pattern,size2")
            f_e3 <<= nm.mcut(f="pid:id,node1:{},pattern:{},size1,size2".format(
                self.ef1, self.ef2),
                             o=self.oFile)
            f_e3.run()

        else:

            extTake.lcmtrans(lcmout, "t", xx4t)
            f_e4 = None
            f_e4 <<= nm.mjoin(k="__tid", m=xxmap1, i=xx4t, f="node1", K="num1")
            f_e4 <<= nm.mtra(k="pid", f="node1")
            f_e4 <<= nm.mvcount(vf="node1:size1")
            f_e4 <<= nm.mjoin(k="pid", m=f, f="pattern,size2")
            f_e4 <<= nm.mselnum(f="size1",
                                c="[{},{}]".format(self.minSize1,
                                                   self.maxSize1))
            f_e4 <<= nm.mvsort(vf="node1,pattern")
            f_e4 <<= nm.msortf(f="node1,pattern")
            f_e4 <<= nm.mcut(f="node1:{},pattern:{},size1,size2".format(
                self.ef1, self.ef2),
                             o=self.oFile)
            f_e4.run()
Пример #13
0
    def run(self, **kw_args):

        os.environ["KG_VerboseLevel"] = "2"
        if "msg" in kw_args:
            if kw_args["msg"] == "on":
                os.environ['KG_ScpVerboseLevel'] = "3"

        temp = Mtemp()
        xxedge = temp.file()
        xxnode = temp.file()
        xxnam2num = temp.file()
        xxnum2nam = temp.file()
        xxebase = temp.file()
        xxbody = temp.file()

        e1 = None
        if self.ew:
            e1 <<= nm.mcut(f="%s:__node1,%s:__node2,%s:__weight" %
                           (self.ef1, self.ef2, self.ew),
                           i=self.eFile)
        else:
            e1 <<= nm.mcut(f="%s:__node1,%s:__node2" % (self.ef1, self.ef2),
                           i=self.eFile)

        e1 <<= nm.muniq(k="__node1,__node2")

        e2 = nm.mfldname(i=e1, f="__node2:__node1,__node1:__node2")

        fe = None
        fe <<= nm.muniq(k="__node1,__node2", i=[e1, e2], o=xxedge)
        fe.run()

        # cleaning the node data (remove duplicate nodes)
        fn = None
        if self.nFile:
            if self.nw:
                fn <<= nm.mcut(f="%s:__node,%s" % (self.nf, self.nw),
                               i=self.nFile)
            else:
                fn <<= nm.mcut(f="%s:__node" % (self.nf), i=self.nFile)

            fn <<= nm.muniq(k="__node", o=xxnode)

        else:
            xxen1 = nm.mcut(f="__node1:__node", i=xxedge)
            xxen2 = nm.mcut(f="__node2:__node", i=xxedge)
            fn <<= nm.muniq(k="__node", o=xxnode, i=[xxen1, xxen2])

        fn.run()

        # 節点名<=>節点番号変換表の作成
        fmap = None
        fmap <<= nm.mcut(f="__node", i=xxnode)
        fmap <<= nm.mnumber(a="__num", S=1, q=True, o=xxnam2num)
        fmap <<= nm.msortf(f="__num", o=xxnum2nam)
        fmap.run()

        # 節点ファイルが指定された場合は枝ファイルとの整合性チェック
        if self.nFile:
            ncheck = nm.mcut(f="__node1:__node", i=xxedge)
            ncheck <<= nm.mcommon(k="__node", m=xxnam2num, r=True)
            nmatch = ncheck.run()
            if len(nmatch) > 0:
                raise Exception(
                    "#ERROR# the node named '%s' in the edge file doesn't exist in the node file."
                    % (nmatch[0][0]))

        # metisのグラフファイルフォーマット
        # 先頭行n m [fmt] [ncon]
        # n: 節点数、m:枝数、ncon: 節点weightの数
        # 1xx: 節点サイズ有り (not used, meaning always "0")
        # x1x: 節点weight有り
        # xx1: 枝がweightを有り
        # s w_1 w_2 ... w_ncon v_1 e_1 v_2 e_2 ... v_k e_k
        # s: 節点サイズ  (節点サイズは利用不可)
        # w_x: 節点weight
        # v_x: 接続のある節点番号(行番号)
        # e_x: 枝weight

        # --------------------
        # generate edge data using the integer numbered nodes
        #fnnum = None
        fnnum = nm.mcut(f="__num:__node_n1", i=xxnam2num)  # {xxnnum}

        fenum = None
        fenum <<= nm.mjoin(k="__node1",
                           K="__node",
                           f="__num:__node_n1",
                           m=xxnam2num,
                           i=xxedge)
        fenum <<= nm.mjoin(k="__node2",
                           K="__node",
                           f="__num:__node_n2",
                           m=xxnam2num)
        fenum <<= nm.msortf(f="__node_n1")  #{xxenum}

        febase = None
        febase <<= nm.mnjoin(k="__node_n1", m=fenum, i=fnnum, n=True)
        febase <<= nm.msortf(f="__node_n1%n,__node_n2%n",
                             o=xxebase)  #{xxebase}"
        febase.run()

        fbody = None
        if not self.ew:
            fbody <<= nm.mcut(f="__node_n1,__node_n2", i=xxebase)
            fbody <<= nm.mtra(k="__node_n1", f="__node_n2", q=True)
            fbody <<= nm.mcut(f="__node_n2", nfno=True, o=xxbody)

        # if ew= is specified, merge the weight data into the edge data.
        else:
            febody = None
            febody <<= nm.mcut(f="__node_n1,__node_n2:__v", i=xxebase)
            febody <<= nm.mnumber(S=0, I=2, a="__seq", q=True)

            fwbody = None
            fwbody <<= nm.mcut(f="__node_n1,__weight:__v", i=xxebase)
            fwbody <<= nm.mnumber(S=1, I=2, a="__seq", q=True)

            fbody <<= nm.msortf(f="__seq%n", i=[febody, fwbody])
            fbody <<= nm.mtra(k="__node_n1", f="__v", q=True)
            fbody <<= nm.mcut(f="__v", nfno=True, o=xxbody)

        fbody.run()
        # xxbody
        # 2 7 3 8 5 9
        # 1 7 3 10 5 11 7 12
        # 1 8 2 10 4 13 7 14

        # --------------------
        # generate node data using integer number
        if self.nFile and self.nw:
            # xxnode
            # __node,v1,v2
            # a,1,1
            # b,1,1
            # c,1,1
            xxnbody = temp.file()
            xxnbody1 = temp.file()
            fnbody = None
            fnbody <<= nm.mjoin(k="__node", f="__num", i=xxnode, m=xxnam2num)
            fnbody <<= nm.msortf(f="__num%n")
            fnbody <<= nm.mcut(f=self.nw, nfno=True)
            fnbody <<= nm.cmd("tr ',' ' ' ")  # tricky!!
            fnbody <<= nm.mwrite(o=xxnbody)
            fnbody.run()
            # xxnbody
            # 1 1
            # 1 1
            # 1 1
            # paste the node weight with edge body
            fnbody1 = None
            fnbody1 <<= nm.mpaste(nfn=True, m=xxbody, i=xxnbody)
            fnbody1 <<= nm.cmd("tr ',' ' ' ")
            fnbody1 <<= nm.mwrite(o=xxnbody1)
            fnbody1.run()
            os.system("mv %s %s" % (xxnbody1, xxbody))

        # xxbody
        # 1 1 2 7 3 8 5 9
        # 1 1 1 7 3 10 5 11 7 12
        # 1 1 1 8 2 10 4 13 7 14

        eSize = mrecount(i=xxedge)
        eSize /= 2
        nSize = mrecount(i=xxnode)
        nwFlag = 1 if self.nw else 0
        ewFlag = 1 if self.ew else 0

        fmt = "0%d%d" % (nwFlag, ewFlag)

        xxhead = temp.file()
        xxgraph = temp.file()

        os.system("echo '%d %d %s %d' > %s" %
                  (nSize, eSize, fmt, self.ncon, xxhead))
        os.system("cat  %s %s > %s" % (xxhead, xxbody, xxgraph))

        if self.mFile:
            nm.mfldname(f="__num:num,__node:node", i=xxnum2nam,
                        o=self.mFile).run()

        if self.dFile:
            os.system("cp %s %s" % (xxgraph, self.dFile))

        if not self.noexe:
            if self.verbose:
                os.system(
                    "gpmetis -seed=%d -ptype=%s -ncuts=%d -ufactor=%d %s %d" %
                    (self.seed, self.ptype, self.ncuts, self.ufactor, xxgraph,
                     self.kway))
            else:
                os.system(
                    "gpmetis -seed=%d -ptype=%s -ncuts=%d -ufactor=%d %s %d  > /dev/null"
                    % (self.seed, self.ptype, self.ncuts, self.ufactor,
                       xxgraph, self.kway))
            import glob
            if len(glob.glob(xxgraph + ".part.*")) == 0:
                raise Exception(
                    "#ERROR# command `gpmetis' didn't output any results")

            # 節点名を数字から元に戻す
            # #{xxgraph}.part.#{kway}
            # 1
            # 0
            # 1
            fo = None
            fo <<= nm.mcut(f="0:cluster",
                           nfni=True,
                           i=xxgraph + ".part." + str(self.kway))
            fo <<= nm.mnumber(S=1, a="__num", q=True)
            fo <<= nm.mjoin(k="__num", f="__node", m=xxnum2nam)
            fo <<= nm.msortf(f="__node,cluster")
            if self.nf:
                fo <<= nm.mcut(f="__node:%s,cluster" % (self.nf), o=self.oFile)
            else:
                fo <<= nm.mcut(f="__node:node,cluster", o=self.oFile)
            fo.run()

        nu.mmsg.endLog(self.__cmdline())
Пример #14
0
    def run(self):

        from datetime import datetime
        t = datetime.now()

        wf = nu.Mtemp()
        xxinp = wf.file()
        xxmap = wf.file()
        input = self.ei

        self.g2pair(self.ni, self.nf, self.ei, self.ef1, self.ef2, xxinp,
                    xxmap)

        xxmace = wf.file()  # maceの出力(tra形式)

        para = {}
        if self.msgoff:
            para["type"] = "Ce_" if self.all else "Me_"
        else:
            para["type"] = "Ce" if self.all else "Me"
        para["i"] = xxinp
        para["o"] = xxmace
        if self.minSize:
            para["l"] = self.minSize
        if self.maxSize:
            para["u"] = self.maxSize
        extTake.mace(para)

        #MCMD::msgLog("converting the numbered nodes into original name ...")
        id = nu.mrecount(i=xxmace, nfni=True)

        # xxpair = wf.file() # 上記traをpair形式に変換したデータ

        fpair = None
        fpair <<= nm.mcut(i=xxmace, nfni=True, f="0:num")
        fpair <<= nm.mnumber(q=True, a="id")
        fpair <<= nm.mvcount(vf="num:size")
        fpair <<= nm.mtra(r=True, f="num")

        # when ni= specified, it add the isolated single cliques.
        if self.ni:

            fpair_u = nm.mread(i=fpair)

            if self.all:
                fpair_u <<= nm.mselstr(f="size", v=1)
            fpair_u <<= nm.mcut(f="num")
            fpair_u <<= nm.muniq(k="num")

            # select all nodes which are not included in any cliques
            xxiso = None
            xxiso <<= nm.mcut(f="num", i=xxmap)
            xxiso <<= nm.mcommon(k="num", m=fpair_u, r=True)
            xxiso <<= nm.mnumber(S=id, a="id", q=True)
            xxiso <<= nm.msetstr(v=1, a="size")
            xxiso <<= nm.mcut(f="id,num,size")
            # mcut入れないとおかしくなるあとで直す
            #ddlist = [fpair.mcut(f="id,num,size"),xxiso]
            xxpair = nm.mcut(i=[fpair, xxiso], f="id,num,size")

        else:

            xxpair = fpair

        xxpair <<= nm.mjoin(m=xxmap, k="num", f="node")
        xxpair <<= nm.mcut(f="id,node,size")
        xxpair <<= nm.msortf(f="id,node", o=self.oFile)
        xxpair.run()

        procTime = datetime.now() - t

        # ログファイル出力
        if self.logFile:
            kv = [["key", "value"]]
            for k, v in self.args.items():
                kv.append([k, str(v)])
            kv.append(["time", str(procTime)])
            nm.writecsv(i=kv, o=self.logFile).run()
Пример #15
0
	def enumerate(self,eArgs):
		"""
		eArgsで与えられた条件で、頻出アイテム集合の列挙を実行する。

		:type eArgs: dict
		:type eArgs['type']: str
		:type eArgs['minCnt']: int
		:type eArgs['minSup']: float
		:type eArgs['maxCnt']: int
		:type eArgs['maxSup']: float
		:type eArgs['minLen']: int
		:type eArgs['maxLen']: int
		:type eArgs['top']: int
		:type eArgs['skipTP']: bool【default:False】
		:param eArgs: 各種列挙パラメータ
		:param eArgs['type']: 抽出するアイテム集合の型【'F':頻出集合, 'C':飽和集合, 'M':極大集合】
		:param eArgs['minCnt']: 最小サポート(件数)
		:param eArgs['minSup']: 最小サポート(確率)
		:param eArgs['maxCnt']: 最大サポート(件数)
		:param eArgs['maxSup']: 最大サポート(確率)
		:param eArgs['minLen']: アイテム集合の最小アイテム数(件数)
		:param eArgs['maxLen']: アイテム集合の最大アイテム数(件数)
		:param eArgs['top']: 列挙するサポート上位件数(件数)
		:param eArgs['skipTP']: トランザクションにマッチするパターン(アイテム集合)の出力を行わない。
		"""

		tf=mtemp.Mtemp()
		self.eArgs=eArgs
		self.type = eArgs["type"]

		if "minCnt" in eArgs and eArgs["minCnt"] != None:
			self.minCnt = int(eArgs["minCnt"])
			self.minSup = float(self.minCnt) / float(self.db.traSize)
		else:
			self.minSup = float(eArgs["minSup"])
			self.minCnt = int(self.minSup * float(self.db.traSize) + 0.99)

		# 最大サポートと最大サポート件数
		self.maxCnt=None
		if ("maxCnt" in eArgs and  eArgs["maxCnt"]!= None) or ( "maxSup" in eArgs and eArgs["maxSup"]!= None):
			if "maxCnt" in eArgs and eArgs["maxCnt"]!= None:
				self.maxCnt = int(eArgs["maxCnt"])
				self.maxSup = float(self.maxCnt) / float(self.db.traSize)
			else:
				self.maxSup    = float(eArgs["maxSup"])
				self.maxCnt = int(self.maxSup * float(self.db.traSize) + 0.99)


		params = {}
		if self.msgoff:
			params["type"] ="%sIf_"%(self.type)
		else:
			params["type"] ="%sIf"%(self.type)


		if self.maxCnt :
			params["U"] = str(self.maxCnt)

		if "minLen" in eArgs and eArgs["minLen"] != None :
			params["l"] = str(eArgs['minLen'])
		
		if "maxLen" in eArgs and eArgs["maxLen"] != None :
			params["u"] = str(eArgs['maxLen'])

		# 列挙パターン数上限が指定されれば、一度lcmを実行して最小サポートを得る
		if "top" in eArgs and eArgs["top"] != None :
			self.top = eArgs["top"]

		if self.top and self.top>0 :

			xxtop = tf.file()
			import copy
			top_params = copy.deepcopy(params)
			top_params["i"] = self.file
			top_params["sup"] = "1"
			top_params["K"] = str(self.top)
			top_params["so"] = xxtop
			import re
			top_params["type"] = re.sub('_$', '', top_params["type"] )

			extTake.lcm(top_params)

			with open(xxtop, "r") as rfile:
				self.minCnt = int(rfile.read().strip())

			if self.minCnt<0 :
				self.minCnt=1 


		self.skipTP=False
		if "skipTP" in eArgs:
			self.skipTP=eArgs["skipTP"]

		# lcm_seq出力ファイル
		lcmout = tf.file()

		# 頻出パターンがなかった場合、lcm出力ファイルが生成されないので
		# そのときのために空ファイルを生成しておいく。
		with open(lcmout, "w") as efile:
			pass

		# lcm実行
		params["i"] = self.file
		params["sup"] = str(self.minCnt)
		params["o"] = lcmout
		extTake.lcm(params)

		# caliculate one itemset for lift value
		xxone= tf.file()
		tpstr = "FIf_" if self.msgoff else "FIf"

		extTake.lcm(type=tpstr,i=self.file,sup=1,o=xxone,l=1,u=1)


		# パターンのサポートを計算しCSV出力する
		#MCMD::msgLog("output patterns to CSV file ...")

		xxp0 = tf.file()
		self.pFile = self.temp.file()
		items=self.db.items
		trans0 = self.temp.file()

		extTake.lcmtrans(lcmout,"p",trans0)

		f =   nm.mdelnull(i=trans0,f="pattern")
		f <<= nm.mvreplace(vf="pattern",m=items.file,K=items.idFN,f=items.itemFN)
		f <<= nm.msetstr(v=self.db.traSize,a="total")
		f <<= nm.mcal(c='${count}/${total}',a="support")
		f <<= nm.mcut(f="pid,pattern,size,count,total,support")
		f <<= nm.mvsort(vf="pattern")
		f <<= nm.msortf(f="pid",o=xxp0)
		f.run()


		# xxp0
		# pid,count,total,support,pattern
		# 0,13,13,1,A
		# 4,6,13,0.4615384615,A B
		xxp1=tf.file()

		# taxonomy指定がない場合(2010/11/20追加)
		if items.taxonomy==None:
			shutil.move(xxp0,xxp1)
		# taxonomy指定がある場合
		else:
			#MCMD::msgLog("reducing redundant rules in terms of taxonomy ...")

			zdd=VSOP.constant(0)
			fobj = nm.mcut(i=xxp0,f='pattern')
			for fldVal in fobj:
				zdd=zdd+VSOP.itemset(fldVal[0])

			
			zdd=self.reduceTaxo(zdd,self.db.items)
			xxz1=tf.file()
			xxz2=tf.file()
			zdd.csvout(xxz1)

			f0=None
			f0 <<= nm.mcut(nfni=True,f="1:pattern",i=xxz1)
			f0 <<= nm.mvsort(vf="pattern")
			f0 <<= nm.msortf(f="pattern")

			f=None
			f <<= nm.msortf(f="pattern",i=xxp0)
			f <<= nm.mcommon(k="pattern",m=f0)
			f <<= nm.msortf(f="pid",o=xxp1)
			f.run()


		# lift値の計算		
		transl = tf.file()
		extTake.lcmtrans(xxone,"p",transl)

		xxp2 =   nm.mdelnull(i=transl,f="pattern")
		xxp2 <<= nm.mvreplace(vf="pattern",m=items.file,K=items.idFN,f=items.itemFN)
		xxp2 <<= nm.msortf(f="pattern")

		xxp3 =   nm.mcut(f="pid,pattern",i=xxp1)
		xxp3 <<= nm.mtra(f="pattern",r=True)
		xxp3 <<= nm.mjoin(k="pattern",m=xxp2,f="count:c1")
		xxp3 <<= nm.mcal(c='ln(${c1})',a="c1ln")
		xxp3 <<= nm.msum(k="pid",f="c1ln")

		# p3
		# pid,pattern,c1,c1ln
		# 0,A,13,2.564949357
		# 1,E,7,1.945910149
		
		#おかしくなる?=>OK
		f3 =   nm.mjoin(k="pid",f="c1ln",i=xxp1,m=xxp3)
		f3 <<= nm.mcal(c='round(exp(ln(${count})-${c1ln}+(${size}-1)*ln(${total})),0.0001)',a="lift")
		f3 <<= nm.mcut(f="pid,size,count,total,support,lift,pattern")
		f3 <<= nm.msortf(f="support%nr",o=self.pFile)
		f3.run()

		#self.size = mrecount.mrecount(i=self.file)

		#MCMD::msgLog("the number of patterns enumerated is #{@size}")

		if not self.skipTP:
			# トランザクション毎に出現するシーケンスを書き出す
			#MCMD::msgLog("output tid-patterns ...")

			self.tFile = self.temp.file()
			xxw3i = tf.file()
			extTake.lcmtrans(lcmout,"t",xxw3i)

			xxw1 = nm.mcut(f=self.db.idFN,i=self.db.file).muniq(k=self.db.idFN).mnumber(S=0,a="__tid",q=True).msortf(f="__tid")
			xxw2 = nm.mcut(f="pid",i=self.pFile)
			xxw3 = nm.mcommon(k="pid",i=xxw3i,m=xxw2).mjoin(k="__tid",m=xxw1,f=self.db.idFN).mcut(f=self.db.idFN+",pid",o=self.tFile)
			xxw3.run()