import json

from libshorttext.classifier import TextModel
from libshorttext.classifier import predict_single_text

mod = TextModel('models/expends2012.description.sorted-labeled.csv.svm.model')
data = json.load(open('/home/blannon/og_data/expenditures/expends12-unlabeled-csv.json'))

out = open('results/expends2012.unlabeled.full.results.csv','w')

for d in data['rows']:
    if d['descrip']:
        descrip = str(d['descrip'])
    else:
        descrip = ''
    r = predict_single_text(descrip.encode('ascii','ignore'),mod)
    print d['ID'],descrip,r.predicted_y
    out.write('\t'.join([str(a) for a in [d['ID'],r.predicted_y]]))
    out.write('\n')

out.close()
import csv
import unicodedata

from libshorttext.classifier import TextModel
from libshorttext.classifier import predict_single_text

unlabeled_data_filename = 'data/unlabeled/descrip.unlabeled.csv'
model_filename = 'models/descrip.labeled.csv.svm.model'

mod = TextModel(model_filename)
_dialect = csv.Sniffer().sniff(open(unlabeled_data_filename).readline(100))
_dialect.escapechar = '\\'
data = csv.DictReader(open(unlabeled_data_filename), dialect=_dialect)

out = csv.DictWriter(open('results/descrip.unlabeled.results.csv','w'),
        fieldnames=data.fieldnames+['guess'], dialect=_dialect)

out.writeheader()

for d in data:
    if d['descrip']:
        descrip = d['descrip']
    else:
        descrip = ''
    descrip = descrip.decode('utf-8').encode('ascii','ignore')
    r = predict_single_text(descrip,mod)
    d['descrip'] = descrip
    d['guess'] = r.predicted_y
    out.writerow(d)

from libshorttext.classifier import TextModel
from libshorttext.classifier import predict_single_text


data = [line.strip().split('\t') for line in open('data/unlabeled/expends2012.description.unlabeled.csv')]
mod = TextModel('models/expends2012.description.sorted-labeled.csv.svm.model')

out = open('results/expends2012.description.unlabeled.results.csv','w')

for d in data:
    r = predict_single_text(d[1],mod)
    out.write('\t'.join([r.predicted_y,d[1]])+'\n')

out.close()
logger.info('initiating textmodel')
svm_model = classifier.TextModel('../svm_experts/models/fcc-experts.model')

logger.info('listing documents')
flocs = [line.strip() for line in open(os.path.join(settings.PERSIST_DIR,
                                                    'document_index'), 'r')]
logger.info('... found {} documents'.format(len(flocs)))


def get_json(filename):
    return json.load(open(os.path.join(settings.RAW_DIR, filename)))


def get_text(jd):
    txt = jd.get('text', "")
    if txt:
        return asciiDammit(txt)
    else:
        return ""


with open(os.path.join(settings.PERSIST_DIR, 'expert_predictions.csv'), 'w') as fout:
    writer = csv.writer(fout)
    for i, floc in enumerate(flocs):
        fname = os.path.basename(floc)
        if not i % 1000:
            logger.info('predicted {} documents'.format(i))
        result = classifier.predict_single_text(get_text(get_json(fname)),
                                                svm_model)
        writer.writerow((fname, result.predicted_y, result.decvals[0], result.decvals[1]))
    line.strip() for line in open(
        os.path.join(settings.PERSIST_DIR, 'document_index_part_two'), 'r')
]
logger.info('... found {} documents'.format(len(doc_ids)))


def get_json(filename):
    return json.load(open(os.path.join(settings.PROC_DIR, filename)))


def get_text(jd):
    txt = jd.get('text', "")
    if txt:
        return asciiDammit(txt)
    else:
        return ""


with open(
        os.path.join(settings.PERSIST_DIR, 'expert_predictions_part_two.csv'),
        'w') as fout:
    writer = csv.writer(fout)
    for i, doc_id in enumerate(doc_ids):
        fname = '{}.json'.format(doc_id)
        if not i % 1000:
            logger.info('predicted {} documents'.format(i))
        result = classifier.predict_single_text(get_text(get_json(fname)),
                                                svm_model)
        writer.writerow(
            (fname, result.predicted_y, result.decvals[0], result.decvals[1]))