示例#1
0
def test_add_parameter():
    pName = 'nTopics'

    yield assert_false, hasattr(config, pName)

    add_parameter(pName, nTopics, jsonFile)
    config2 = load_config(jsonFile)

    yield assert_equal, config2[pName], nTopics
示例#2
0
def setup():
    global jsonFile
    global config
    global nTopics

    jsonFile = 'config.json'
    # create cofig.json
    params = {}
    with open(jsonFile, 'wb') as f:
        dump(params, f, sort_keys=True, indent=4)
    config = load_config(jsonFile)

    nTopics = 100
parser = argparse.ArgumentParser()
parser.add_argument('json',
                    help='json file containing experiment '
                    'configuration.')
parser.add_argument('--perspectives',
                    '-p',
                    help='list of perspectives to '
                    'calculate contrastive opinions for')
parser.add_argument('--output', '-o', help='file to save output to')
args = parser.parse_args()

print args.perspectives
print args.output

config = load_config(args.json)

if args.output:
    fName = args.output
else:
    fName = config.get('outDir').format('co_words_{}.csv'.format(
        config.get('nTopics')))
logger.info('writing output to {}'.format(fName))

corpus = get_corpus(config)

words = corpus.topic_words()
topics = load_topics(config)
opinions = load_opinions(config)
nks = load_nks(config)
示例#4
0
from cptm.utils.experiment import load_config, get_corpus, get_sampler


logger = logging.getLogger(__name__)
logging.basicConfig(format='%(levelname)s : %(message)s', level=logging.INFO)

logging.getLogger('gensim').setLevel(logging.ERROR)
logging.getLogger('CPTCorpus').setLevel(logging.ERROR)
logging.getLogger('CPT_Gibbs').setLevel(logging.ERROR)

parser = argparse.ArgumentParser()
parser.add_argument('json', help='json file containing experiment '
                    'configuration.')
args = parser.parse_args()

config = load_config(args.json)
corpus = get_corpus(config)
nTopics = config.get('expNumTopics')
nIter = config.get('nIter')
outDir = config.get('outDir')
sampleInterval = 10

for nt in nTopics:
    sampler = get_sampler(config, corpus, nTopics=nt, initialize=False)
    logging.info('removing parameter sample files for nTopics = {}'.format(nt))
    for t in range(sampler.nIter):
        if t != 0 and (t+1) % sampleInterval != 0:
            try:
                remove(sampler.get_theta_file_name(t))
            except:
                pass
示例#5
0
logger = logging.getLogger(__name__)
logging.basicConfig(format='%(levelname)s : %(message)s', level=logging.DEBUG)

logging.getLogger('gensim').setLevel(logging.ERROR)
logging.getLogger('CPTCorpus').setLevel(logging.DEBUG)
logging.getLogger('CPT_Gibbs').setLevel(logging.DEBUG)

parser = argparse.ArgumentParser()
parser.add_argument('json', help='json file containing experiment '
                    'configuration.')
parser.add_argument('data_dir', help='dir containing the input data.')
parser.add_argument('out_dir', help='dir to write results to.')
args = parser.parse_args()

params = load_config(args.json)

topicDict = params.get('outDir').format('topicDict.dict')
opinionDict = params.get('outDir').format('opinionDict.dict')

c_perspectives = get_corpus(params)
perspectives = [p.name for p in c_perspectives.perspectives]
logger.info('Perspectives found: {}'.format('; '.join(perspectives)))

input_dirs = [args.data_dir for p in perspectives]

corpus = CPTCorpus(input=input_dirs, topicDict=topicDict,
                   opinionDict=opinionDict, testSplit=100, file_dict=None,
                   topicLines=params.get('topicLines'),
                   opinionLines=params.get('opinionLines'))
from CPTCorpus import CPTCorpus
from cptm.utils.experiment import get_sampler, thetaFileName, load_config, \
    topicFileName

logger = logging.getLogger(__name__)
logging.basicConfig(format='%(levelname)s : %(message)s', level=logging.DEBUG)

parser = argparse.ArgumentParser()
parser.add_argument('json',
                    help='json file containing experiment '
                    'configuration.')
parser.add_argument('data_dir', help='dir containing the input data.')
parser.add_argument('out_dir', help='dir to write results to.')
args = parser.parse_args()

params = load_config(args.json)

input_dir = [args.data_dir]
topicDict = params.get('outDir').format('topicDict.dict')
opinionDict = params.get('outDir').format('opinionDict.dict')
phi_topic_file = topicFileName(params)

phi_topic = pd.read_csv(phi_topic_file, index_col=0, encoding='utf-8').values.T
#print phi_topic.shape
#print phi_topic

corpus = CPTCorpus(input=input_dir,
                   topicDict=topicDict,
                   opinionDict=opinionDict,
                   testSplit=100,
                   file_dict=None,