def build_all_the_files(num_training,num_test,n): chinese_sents,english_sents = ref.read_all_the_files(n) with open('chinese_sents.json', 'wb') as fp: json.dump(chinese_sents,fp) build_occurence_dict(0,num_training) test_ref = ref.build_chinese_word_sent_dict(num_training, num_test+num_training,{},n) with open('test_ref_dict.json', 'wb') as fp: json.dump(test_ref,fp) train_data = ref.build_chinese_word_sent_dict(0,num_training,{},n) with open('train_data.json', 'wb') as fp: json.dump(train_data,fp) build_cooccurrence_dict()
def build_all_the_files(num_training, num_test, n): chinese_sents, english_sents = ref.read_all_the_files(n) with open('chinese_sents.json', 'wb') as fp: json.dump(chinese_sents, fp) build_occurence_dict(0, num_training) test_ref = ref.build_chinese_word_sent_dict(num_training, num_test + num_training, {}, n) with open('test_ref_dict.json', 'wb') as fp: json.dump(test_ref, fp) train_data = ref.build_chinese_word_sent_dict(0, num_training, {}, n) with open('train_data.json', 'wb') as fp: json.dump(train_data, fp) build_cooccurrence_dict()
#!/usr/bin/env python # -*- coding: utf-8 -*- from microsofttranslator import Translator # if this fails, do: # sudo easy_install microsofttranslator from ref_definitions2 import read_all_the_files translator = Translator('finalproj', 'LZTVKNyxQEjmIUbMWp1HhkN4x9XkIbnT6fHhaJfLFmo=') #print translator.translate('你好'.decode('utf8'), 'en', 'zh-CHT') (chinese_sentences,english_sentences) = read_all_the_files() of = open('bing_translations.txt', 'w') for chinese_sent in chinese_sentences: chinese_sent = ''.join([x for x in chinese_sent if x != ' ']) print >> of, chinese_sent print >> of, translator.translate(chinese_sent.decode('utf8'), 'en', 'zh-CHT').encode('utf-8') print >> of, '=========='
#!/usr/bin/env python # -*- coding: utf-8 -*- from microsofttranslator import Translator # if this fails, do: # sudo easy_install microsofttranslator from ref_definitions2 import read_all_the_files translator = Translator('finalproj', 'LZTVKNyxQEjmIUbMWp1HhkN4x9XkIbnT6fHhaJfLFmo=') #print translator.translate('你好'.decode('utf8'), 'en', 'zh-CHT') (chinese_sentences, english_sentences) = read_all_the_files() of = open('bing_translations.txt', 'w') for chinese_sent in chinese_sentences: chinese_sent = ''.join([x for x in chinese_sent if x != ' ']) print >> of, chinese_sent print >> of, translator.translate(chinese_sent.decode('utf8'), 'en', 'zh-CHT').encode('utf-8') print >> of, '=========='