def test_add_parameter(): pName = 'nTopics' yield assert_false, hasattr(config, pName) add_parameter(pName, nTopics, jsonFile) config2 = load_config(jsonFile) yield assert_equal, config2[pName], nTopics
def setup(): global jsonFile global config global nTopics jsonFile = 'config.json' # create cofig.json params = {} with open(jsonFile, 'wb') as f: dump(params, f, sort_keys=True, indent=4) config = load_config(jsonFile) nTopics = 100
parser = argparse.ArgumentParser() parser.add_argument('json', help='json file containing experiment ' 'configuration.') parser.add_argument('--perspectives', '-p', help='list of perspectives to ' 'calculate contrastive opinions for') parser.add_argument('--output', '-o', help='file to save output to') args = parser.parse_args() print args.perspectives print args.output config = load_config(args.json) if args.output: fName = args.output else: fName = config.get('outDir').format('co_words_{}.csv'.format( config.get('nTopics'))) logger.info('writing output to {}'.format(fName)) corpus = get_corpus(config) words = corpus.topic_words() topics = load_topics(config) opinions = load_opinions(config) nks = load_nks(config)
from cptm.utils.experiment import load_config, get_corpus, get_sampler logger = logging.getLogger(__name__) logging.basicConfig(format='%(levelname)s : %(message)s', level=logging.INFO) logging.getLogger('gensim').setLevel(logging.ERROR) logging.getLogger('CPTCorpus').setLevel(logging.ERROR) logging.getLogger('CPT_Gibbs').setLevel(logging.ERROR) parser = argparse.ArgumentParser() parser.add_argument('json', help='json file containing experiment ' 'configuration.') args = parser.parse_args() config = load_config(args.json) corpus = get_corpus(config) nTopics = config.get('expNumTopics') nIter = config.get('nIter') outDir = config.get('outDir') sampleInterval = 10 for nt in nTopics: sampler = get_sampler(config, corpus, nTopics=nt, initialize=False) logging.info('removing parameter sample files for nTopics = {}'.format(nt)) for t in range(sampler.nIter): if t != 0 and (t+1) % sampleInterval != 0: try: remove(sampler.get_theta_file_name(t)) except: pass
logger = logging.getLogger(__name__) logging.basicConfig(format='%(levelname)s : %(message)s', level=logging.DEBUG) logging.getLogger('gensim').setLevel(logging.ERROR) logging.getLogger('CPTCorpus').setLevel(logging.DEBUG) logging.getLogger('CPT_Gibbs').setLevel(logging.DEBUG) parser = argparse.ArgumentParser() parser.add_argument('json', help='json file containing experiment ' 'configuration.') parser.add_argument('data_dir', help='dir containing the input data.') parser.add_argument('out_dir', help='dir to write results to.') args = parser.parse_args() params = load_config(args.json) topicDict = params.get('outDir').format('topicDict.dict') opinionDict = params.get('outDir').format('opinionDict.dict') c_perspectives = get_corpus(params) perspectives = [p.name for p in c_perspectives.perspectives] logger.info('Perspectives found: {}'.format('; '.join(perspectives))) input_dirs = [args.data_dir for p in perspectives] corpus = CPTCorpus(input=input_dirs, topicDict=topicDict, opinionDict=opinionDict, testSplit=100, file_dict=None, topicLines=params.get('topicLines'), opinionLines=params.get('opinionLines'))
from CPTCorpus import CPTCorpus from cptm.utils.experiment import get_sampler, thetaFileName, load_config, \ topicFileName logger = logging.getLogger(__name__) logging.basicConfig(format='%(levelname)s : %(message)s', level=logging.DEBUG) parser = argparse.ArgumentParser() parser.add_argument('json', help='json file containing experiment ' 'configuration.') parser.add_argument('data_dir', help='dir containing the input data.') parser.add_argument('out_dir', help='dir to write results to.') args = parser.parse_args() params = load_config(args.json) input_dir = [args.data_dir] topicDict = params.get('outDir').format('topicDict.dict') opinionDict = params.get('outDir').format('opinionDict.dict') phi_topic_file = topicFileName(params) phi_topic = pd.read_csv(phi_topic_file, index_col=0, encoding='utf-8').values.T #print phi_topic.shape #print phi_topic corpus = CPTCorpus(input=input_dir, topicDict=topicDict, opinionDict=opinionDict, testSplit=100, file_dict=None,