#  along with this program.  If not, see <http://www.gnu.org/licenses/>.

__author__="*****@*****.**"

import sys
from tinasoft import PytextminerApi

def usage():
    print "USAGE : python apitests.py TestClass configuration_file_path source_filename file_format"

if __name__ == '__main__':
    print sys.argv
    try:
        confFile = sys.argv[1]
        databaseName = sys.argv[2]
        tinasoft = PytextminerApi(confFile)
    except:
        usage()
        exit()
    tinasoft.set_storage( databaseName )
    documents = tinasoft.storage.loadMany( "Document" )
    try:
        while 1:
            id, doc = documents.next()
            if doc['content'] != "":
                doc['content'] = ""
                print "emptying document %s contents"%doc['id']
                tinasoft.storage.insertDocument(doc, overwrite=True)
            else:
                print "no content in document %s"%doc['id']
    except StopIteration, si:
示例#2
0
class tinaWorker:
	def __init__(self):
		self.period = "PERIOD"
	##################################
	def setTinaDir(self,inDir):
		self.tinadir = inDir
		self.tinasourcefiles = self.tinadir+"tina_sourcefiles/"
	def setProjectName(self,inStr):
		self.projectName = inStr
		self.sourcecsv = self.projectName+"_source.csv"
		self.ngramcsv = self.tinadir+"tina_whitelists/"+self.projectName+"_ngrams.csv"
	def setProjectConfig(self,inStr):
		self.tinasoft = PytextminerApi(inStr)
	##################################	
	def processTinaSteps(self,step):
		##############
		if step==1:
			#print "TINA INTERFACE PRODUCING WHITELIST.CSV…"
			extract_res = self.tinasoft.extract_file(
					self.sourcecsv,
					self.projectName,
					outpath=self.ngramcsv,
					format="tinacsv",
					minoccs=1,
			)
			#print "TINA INTERFACE RES: ",extract_res
		##############
		if step==2:
			#print "TINA INTERFACE INDEXING..."
			index_res = self.tinasoft.index_file(
					self.sourcecsv,
					self.projectName,
					whitelistpath=self.ngramcsv,
					format="tinacsv",
			)
			#print "TINA INTERFACE RES: ",index_res
		##############
		# Exporting to current.
		if step==3:
			#print "TINA INTERFACE PRODUCING GRAPH..."
			generg_res = self.tinasoft.generate_graph(
					self.projectName,
					self.period,
					#whitelistpath = self.ngramcsv,
					outpath = 'test_graph',
					ngramgraphconfig={
					#	'edgethreshold': [1.0,'inf'],
					#	'nodethreshold': [1,'inf'],
					#	'alpha': 0.1,
						'proximity': "Cooccurrences"
					#	'proximity': "EquivalenceIndeX"
					#	'proximity': "PseudoInclusion"
					},
					documentgraphconfig={
					#	'edgethreshold': [1.0,'inf'],
					#	'nodethreshold': [1,'inf'],
					#	'proximity': "sharedNGrams"
						'proximity': "logJaccard"
					},
					exportedges=True
			)
			return generg_res
#########################################################
示例#3
0
class tinaWorker:
    def __init__(self):
        self.period = "PERIOD"

    ##################################
    def setTinaDir(self, inDir):
        self.tinadir = inDir
        self.tinasourcefiles = self.tinadir + "tina_sourcefiles/"

    def setProjectName(self, inStr):
        self.projectName = inStr
        self.sourcecsv = self.projectName + "_source.csv"
        self.ngramcsv = self.tinadir + "tina_whitelists/" + self.projectName + "_ngrams.csv"

    def setProjectConfig(self, inStr):
        self.tinasoft = PytextminerApi(inStr)

    ##################################
    def processTinaSteps(self, step):
        ##############
        if step == 1:
            #print "TINA INTERFACE PRODUCING WHITELIST.CSV…"
            extract_res = self.tinasoft.extract_file(
                self.sourcecsv,
                self.projectName,
                outpath=self.ngramcsv,
                format="tinacsv",
                minoccs=1,
            )
            #print "TINA INTERFACE RES: ",extract_res
        ##############
        if step == 2:
            #print "TINA INTERFACE INDEXING..."
            index_res = self.tinasoft.index_file(
                self.sourcecsv,
                self.projectName,
                whitelistpath=self.ngramcsv,
                format="tinacsv",
            )
            #print "TINA INTERFACE RES: ",index_res
        ##############
        # Exporting to current.
        if step == 3:
            #print "TINA INTERFACE PRODUCING GRAPH..."
            generg_res = self.tinasoft.generate_graph(
                self.projectName,
                self.period,
                #whitelistpath = self.ngramcsv,
                outpath='test_graph',
                ngramgraphconfig={
                    #	'edgethreshold': [1.0,'inf'],
                    #	'nodethreshold': [1,'inf'],
                    #	'alpha': 0.1,
                    'proximity': "Cooccurrences"
                    #	'proximity': "EquivalenceIndeX"
                    #	'proximity': "PseudoInclusion"
                },
                documentgraphconfig={
                    #	'edgethreshold': [1.0,'inf'],
                    #	'nodethreshold': [1,'inf'],
                    #	'proximity': "sharedNGrams"
                    'proximity': "logJaccard"
                },
                exportedges=True)
            return generg_res


#########################################################
示例#4
0
	def setProjectConfig(self,inStr):
		self.tinasoft = PytextminerApi(inStr)
示例#5
0
 def setProjectConfig(self, inStr):
     self.tinasoft = PytextminerApi(inStr)