# along with this program. If not, see <http://www.gnu.org/licenses/>. __author__="*****@*****.**" import sys from tinasoft import PytextminerApi def usage(): print "USAGE : python apitests.py TestClass configuration_file_path source_filename file_format" if __name__ == '__main__': print sys.argv try: confFile = sys.argv[1] databaseName = sys.argv[2] tinasoft = PytextminerApi(confFile) except: usage() exit() tinasoft.set_storage( databaseName ) documents = tinasoft.storage.loadMany( "Document" ) try: while 1: id, doc = documents.next() if doc['content'] != "": doc['content'] = "" print "emptying document %s contents"%doc['id'] tinasoft.storage.insertDocument(doc, overwrite=True) else: print "no content in document %s"%doc['id'] except StopIteration, si:
class tinaWorker: def __init__(self): self.period = "PERIOD" ################################## def setTinaDir(self,inDir): self.tinadir = inDir self.tinasourcefiles = self.tinadir+"tina_sourcefiles/" def setProjectName(self,inStr): self.projectName = inStr self.sourcecsv = self.projectName+"_source.csv" self.ngramcsv = self.tinadir+"tina_whitelists/"+self.projectName+"_ngrams.csv" def setProjectConfig(self,inStr): self.tinasoft = PytextminerApi(inStr) ################################## def processTinaSteps(self,step): ############## if step==1: #print "TINA INTERFACE PRODUCING WHITELIST.CSV…" extract_res = self.tinasoft.extract_file( self.sourcecsv, self.projectName, outpath=self.ngramcsv, format="tinacsv", minoccs=1, ) #print "TINA INTERFACE RES: ",extract_res ############## if step==2: #print "TINA INTERFACE INDEXING..." index_res = self.tinasoft.index_file( self.sourcecsv, self.projectName, whitelistpath=self.ngramcsv, format="tinacsv", ) #print "TINA INTERFACE RES: ",index_res ############## # Exporting to current. if step==3: #print "TINA INTERFACE PRODUCING GRAPH..." generg_res = self.tinasoft.generate_graph( self.projectName, self.period, #whitelistpath = self.ngramcsv, outpath = 'test_graph', ngramgraphconfig={ # 'edgethreshold': [1.0,'inf'], # 'nodethreshold': [1,'inf'], # 'alpha': 0.1, 'proximity': "Cooccurrences" # 'proximity': "EquivalenceIndeX" # 'proximity': "PseudoInclusion" }, documentgraphconfig={ # 'edgethreshold': [1.0,'inf'], # 'nodethreshold': [1,'inf'], # 'proximity': "sharedNGrams" 'proximity': "logJaccard" }, exportedges=True ) return generg_res #########################################################
class tinaWorker: def __init__(self): self.period = "PERIOD" ################################## def setTinaDir(self, inDir): self.tinadir = inDir self.tinasourcefiles = self.tinadir + "tina_sourcefiles/" def setProjectName(self, inStr): self.projectName = inStr self.sourcecsv = self.projectName + "_source.csv" self.ngramcsv = self.tinadir + "tina_whitelists/" + self.projectName + "_ngrams.csv" def setProjectConfig(self, inStr): self.tinasoft = PytextminerApi(inStr) ################################## def processTinaSteps(self, step): ############## if step == 1: #print "TINA INTERFACE PRODUCING WHITELIST.CSV…" extract_res = self.tinasoft.extract_file( self.sourcecsv, self.projectName, outpath=self.ngramcsv, format="tinacsv", minoccs=1, ) #print "TINA INTERFACE RES: ",extract_res ############## if step == 2: #print "TINA INTERFACE INDEXING..." index_res = self.tinasoft.index_file( self.sourcecsv, self.projectName, whitelistpath=self.ngramcsv, format="tinacsv", ) #print "TINA INTERFACE RES: ",index_res ############## # Exporting to current. if step == 3: #print "TINA INTERFACE PRODUCING GRAPH..." generg_res = self.tinasoft.generate_graph( self.projectName, self.period, #whitelistpath = self.ngramcsv, outpath='test_graph', ngramgraphconfig={ # 'edgethreshold': [1.0,'inf'], # 'nodethreshold': [1,'inf'], # 'alpha': 0.1, 'proximity': "Cooccurrences" # 'proximity': "EquivalenceIndeX" # 'proximity': "PseudoInclusion" }, documentgraphconfig={ # 'edgethreshold': [1.0,'inf'], # 'nodethreshold': [1,'inf'], # 'proximity': "sharedNGrams" 'proximity': "logJaccard" }, exportedges=True) return generg_res #########################################################
def setProjectConfig(self,inStr): self.tinasoft = PytextminerApi(inStr)
def setProjectConfig(self, inStr): self.tinasoft = PytextminerApi(inStr)