Exemplo n.º 1
0
 def test_addtranslation(self):
     """tests that addtranslation() stores strings correctly"""
     tmxfile = tmx.tmxfile()
     tmxfile.addtranslation("A string of characters", "en", "'n String karakters", "af")
     newfile = self.tmxparse(str(tmxfile))
     print(str(tmxfile))
     assert newfile.translate("A string of characters") == "'n String karakters"
Exemplo n.º 2
0
 def test_translate(self):
     tmxfile = tmx.tmxfile()
     assert tmxfile.translate("Anything") is None
     tmxfile.addtranslation("A string of characters", "en",
                            "'n String karakters", "af")
     assert tmxfile.translate(
         "A string of characters") == "'n String karakters"
Exemplo n.º 3
0
    def export(self, rotate=False):
        source_language = self.context.project.source_language.code
        target_language = self.context.language.code

        if not os.path.exists(self.directory):
            os.makedirs(self.directory)

        tmxfile = tmx.tmxfile()
        for store in self.context.stores.live().iterator():
            for unit in store.units.filter(state=TRANSLATED):
                tmxfile.addtranslation(unit.source, source_language,
                                       unit.target, target_language,
                                       unit.developer_comment)

        bs = BytesIO()
        tmxfile.serialize(bs)
        with open(self.abs_filepath, "wb") as f:
            with ZipFile(f, "w") as zf:
                zf.writestr(self.filename.rstrip('.zip'), bs.getvalue())

        last_exported_filepath = self.last_exported_file_path
        self.update_exported_revision()

        removed = []
        if rotate:
            for fn in os.listdir(self.directory):
                # Skip files from other projects.
                if not self.check_tp(fn):
                    continue
                filepath = os.path.join(self.directory, fn)
                if filepath not in [self.abs_filepath, last_exported_filepath]:
                    removed.append(filepath)
                    os.remove(filepath)

        return self.abs_filepath, removed
Exemplo n.º 4
0
    def export(self, rotate=False):
        source_language = self.context.project.source_language.code
        target_language = self.context.language.code

        if not os.path.exists(self.directory):
            os.makedirs(self.directory)

        tmxfile = tmx.tmxfile()
        for store in self.context.stores.live().iterator():
            for unit in store.units.filter(state=TRANSLATED):
                tmxfile.addtranslation(unit.source, source_language,
                                       unit.target, target_language,
                                       unit.developer_comment)

        bs = BytesIO()
        tmxfile.serialize(bs)
        with open(self.abs_filepath, "wb") as f:
            with ZipFile(f, "w") as zf:
                zf.writestr(self.filename.rstrip('.zip'), bs.getvalue())

        last_exported_filepath = self.last_exported_file_path
        self.update_exported_revision()

        removed = []
        if rotate:
            for fn in os.listdir(self.directory):
                # Skip files from other projects.
                if not self.check_tp(fn):
                    continue
                filepath = os.path.join(self.directory, fn)
                if filepath not in [self.abs_filepath, last_exported_filepath]:
                    removed.append(filepath)
                    os.remove(filepath)

        return self.abs_filepath, removed
Exemplo n.º 5
0
def export_BilingualCorpus2File(file_url, sentences, file_type, s_lang,
                                t_lang):
    if file_type == 'txt':
        with open(file_url, 'w', encoding='utf-8') as fout:
            for sentence in sentences:
                fout.write(sentence.source + '|' + sentence.target + '\n')
            fout.close()
    elif file_type == 'csv':
        with open(file_url, 'a', encoding='utf-8') as csvfile:
            writeCSV = csv.writer(csvfile, delimiter=',', dialect='excel')
            for sentence in sentences:
                writeCSV.writerow([sentence.source, sentence.target])
            csvfile.close()
    elif file_type == 'tmx':
        tmx_file = tmxfile()
        for sentence in sentences:
            tmx_file.addtranslation(sentence.source, s_lang, sentence.target,
                                    t_lang)
        tmx_file.savefile(file_url)
    elif file_type == 'xlsx':
        dst_wb = openpyxl.Workbook()
        ss_sheet = dst_wb['Sheet']
        ss_sheet.title = 'transmem'
        dst_wb.save(file_url)
        dst_wb = openpyxl.load_workbook(file_url)
        dst_ws = dst_wb['transmem']
        row = 1
        for sentence in sentences:
            dst_ws.cell(row, 1).value = sentence.source
            dst_ws.cell(row, 2).value = sentence.target
            row += 1
        dst_wb.save(file_url)
    else:
        pass
    return os.path.basename(file_url)
Exemplo n.º 6
0
 def test_withnewlines(self):
     """test addtranslation() with newlines"""
     tmxfile = tmx.tmxfile()
     tmxfile.addtranslation("First line\nSecond line", "en", "Eerste lyn\nTweede lyn", "af")
     newfile = self.tmxparse(bytes(tmxfile))
     print(bytes(tmxfile))
     assert newfile.translate("First line\nSecond line") == "Eerste lyn\nTweede lyn"
Exemplo n.º 7
0
 def test_withnewlines(self):
     """test addtranslation() with newlines"""
     tmxfile = tmx.tmxfile()
     tmxfile.addtranslation("First line\nSecond line", "en", "Eerste lyn\nTweede lyn", "af")
     newfile = self.tmxparse(str(tmxfile))
     print(str(tmxfile))
     assert newfile.translate("First line\nSecond line") == "Eerste lyn\nTweede lyn"
Exemplo n.º 8
0
 def po2tmx(self, posource, sourcelanguage='en', targetlanguage='af'):
     """helper that converts po source to tmx source without requiring files"""
     inputfile = wStringIO.StringIO(posource)
     outputfile = wStringIO.StringIO()
     outputfile.tmxfile = tmx.tmxfile(inputfile=None, sourcelanguage=sourcelanguage)
     po2tmx.convertpo(inputfile, outputfile, templatefile=None, sourcelanguage=sourcelanguage, targetlanguage=targetlanguage)
     return outputfile.tmxfile
Exemplo n.º 9
0
 def test_addtranslation(self):
     """tests that addtranslation() stores strings correctly"""
     tmxfile = tmx.tmxfile()
     tmxfile.addtranslation("A string of characters", "en", "'n String karakters", "af")
     newfile = self.tmxparse(bytes(tmxfile))
     print(bytes(tmxfile))
     assert newfile.translate("A string of characters") == "'n String karakters"
Exemplo n.º 10
0
def split(f_input, orig_lang, dest_lang, f_output, num_entries):
    with open(f_input, 'rb') as fd:
        tmx_file = tmxfile(fd, orig_lang, dest_lang)
    postfix = "" if len(tmx_file.units) < num_entries else "1"
    for entries in chunks(tmx_file.units, num_entries):
        print(len(entries))
        generate_po_from_tmx(f_output + postfix + ".po", entries)
        postfix = str(int("0" + postfix) + 1)
Exemplo n.º 11
0
 def convert_Text2Tmx(self, src, dst):
     tmx_file = tmxfile()
     lines = open(src, encoding='utf-8').read().strip().split('\n')
     for line in lines:
         s = line.split(delimiter)
         tmx_file.addtranslation(s[0], "en", s[1], "th")
     tmx_file.savefile(dst)
     QMessageBox.information(self, "Information", "Converting was done successfully")
Exemplo n.º 12
0
 def convert_Tmx2Text(self, src, dst):
     dst_file = open(dst, 'w', encoding='utf-8')
     with open(src, 'rb') as fin:
         tmx_file = tmxfile(fin, 'en', 'th')
         for node in tmx_file.unit_iter():
             dst_file.write(node.getsource() + delimiter + node.gettarget() + '\n')
     dst_file.close()
     QMessageBox.information(self, "Information", "Converting was done successfully")
Exemplo n.º 13
0
 def test_withcomment(self):
     """tests that addtranslation() stores string's comments correctly"""
     tmxfile = tmx.tmxfile()
     tmxfile.addtranslation("A string of chars",
                            "en", "'n String karakters", "af", "comment")
     newfile = self.tmxparse(str(tmxfile))
     print(str(tmxfile))
     assert newfile.findunit("A string of chars").getnotes() == "comment"
Exemplo n.º 14
0
 def test_withcomment(self):
     """tests that addtranslation() stores string's comments correctly"""
     tmxfile = tmx.tmxfile()
     tmxfile.addtranslation("A string of chars", "en",
                            "'n String karakters", "af", "comment")
     newfile = self.tmxparse(str(tmxfile))
     print str(tmxfile)
     assert newfile.findunit("A string of chars").getnotes() == "comment"
Exemplo n.º 15
0
 def convert_Xliff2Tmx(self, src, dst):
     fin = open(src, 'r', encoding = "utf-8")
     data = fin.read()
     xliff_file = xlifffile.parsestring(data)
     tmx_file = tmxfile()
     for node in xliff_file.unit_iter():
         tmx_file.addtranslation(node.source, "en", node.target, "th")
     tmx_file.savefile(dst)
     QMessageBox.information(self, "Information", "Converting was done successfully")
Exemplo n.º 16
0
 def po2tmx(self, posource, sourcelanguage='en', targetlanguage='af',
            comment=None):
     """helper that converts po source to tmx source without requiring files"""
     inputfile = BytesIO(posource.encode('utf-8'))
     outputfile = BytesIO()
     outputfile.tmxfile = tmx.tmxfile(inputfile=None, sourcelanguage=sourcelanguage)
     po2tmx.convertpo(inputfile, outputfile, templatefile=None,
                      sourcelanguage=sourcelanguage,
                      targetlanguage=targetlanguage, comment=comment)
     return outputfile.tmxfile
Exemplo n.º 17
0
 def test_controls_cleaning(self):
     """test addtranslation() with control chars"""
     tmxfile = tmx.tmxfile()
     tmxfile.addtranslation("Client Version:\x0314 %s", "en", "test one",
                            "ar")
     tmxfile.addtranslation("Client Version:\n%s", "en", "test two", "ar")
     newfile = self.tmxparse(bytes(tmxfile))
     print(bytes(tmxfile))
     assert newfile.translate("Client Version:14 %s") == "test one"
     assert newfile.translate("Client Version:\n%s") == "test two"
Exemplo n.º 18
0
 def po2tmx(self, posource, sourcelanguage='en', targetlanguage='af',
            comment=None):
     """helper that converts po source to tmx source without requiring files"""
     inputfile = BytesIO(posource.encode('utf-8'))
     outputfile = BytesIO()
     outputfile.tmxfile = tmx.tmxfile(inputfile=None, sourcelanguage=sourcelanguage)
     po2tmx.convertpo(inputfile, outputfile, templatefile=None,
                      sourcelanguage=sourcelanguage,
                      targetlanguage=targetlanguage, comment=comment)
     return outputfile.tmxfile
Exemplo n.º 19
0
def get_parallel_corpus():
    multi_lingual_sentences = []
    location = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                            WIKIPEDIA_FILE)
    with open(location, 'rb') as fin:
        tmx_file = tmxfile(fin, 'en', 'he')
        for node in tmx_file.unit_iter():
            sentence = MultiLingualSentence(node.gettarget(), node.getsource())
            multi_lingual_sentences.append(sentence)

    return multi_lingual_sentences
Exemplo n.º 20
0
    def __init__(self, filename, mode=None):
        """initialises tmxmultifile from a seekable inputfile or writable outputfile"""
        self.filename = filename
        if mode is None:
            if os.path.exists(filename):
                mode = 'r'
            else:
                mode = 'w'
        self.mode = mode
#        self.multifilestyle = multifilestyle
        self.multifilename = os.path.splitext(filename)[0]
#        self.multifile = open(filename, mode)
        self.tmxfile = tmx.tmxfile()
Exemplo n.º 21
0
 def test_xmlentities(self):
     """Test that the xml entities '&' and '<'  are escaped correctly"""
     tmxfile = tmx.tmxfile()
     tmxfile.addtranslation("Mail & News", "en", "Nuus & pos", "af")
     tmxfile.addtranslation("Five < ten", "en", "Vyf < tien", "af")
     xmltext = bytes(tmxfile).decode("utf-8")
     print("The generated xml:")
     print(xmltext)
     assert tmxfile.translate("Mail & News") == "Nuus & pos"
     assert xmltext.index("Mail &amp; News")
     assert xmltext.find("Mail & News") == -1
     assert tmxfile.translate("Five < ten") == "Vyf < tien"
     assert xmltext.index("Five &lt; ten")
     assert xmltext.find("Five < ten") == -1
Exemplo n.º 22
0
 def test_xmlentities(self):
     """Test that the xml entities '&' and '<'  are escaped correctly"""
     tmxfile = tmx.tmxfile()
     tmxfile.addtranslation("Mail & News", "en", "Nuus & pos", "af")
     tmxfile.addtranslation("Five < ten", "en", "Vyf < tien", "af")
     xmltext = str(tmxfile)
     print("The generated xml:")
     print(xmltext)
     assert tmxfile.translate('Mail & News') == 'Nuus & pos'
     assert xmltext.index('Mail &amp; News')
     assert xmltext.find('Mail & News') == -1
     assert tmxfile.translate('Five < ten') == 'Vyf < tien'
     assert xmltext.index('Five &lt; ten')
     assert xmltext.find('Five < ten') == -1
def load_tmx_file(file, source_language=None, target_language=None):
    """
    Loads the tmx file
    :param file: The tmx memory file to open
    :param source_language: The source language we are translating
    :param target_language: The target language we are translating to
    :return: The tmx file XML file as a translation.storage.tmx object
    """

    with open(file, 'rb') as tmx:
        tmx_file = tmxfile(tmx, 'en-GB',
                           'fr-FR')  # TODO This does not affect what is loaded

    return tmx_file
Exemplo n.º 24
0
 def test_xmlentities(self):
     """Test that the xml entities '&' and '<'  are escaped correctly"""
     tmxfile = tmx.tmxfile()
     tmxfile.addtranslation("Mail & News", "en", "Nuus & pos", "af")
     tmxfile.addtranslation("Five < ten", "en", "Vyf < tien", "af")
     xmltext = str(tmxfile)
     print "The generated xml:"
     print xmltext
     assert tmxfile.translate('Mail & News') == 'Nuus & pos'
     assert xmltext.index('Mail &amp; News')
     assert xmltext.find('Mail & News') == -1
     assert tmxfile.translate('Five < ten') == 'Vyf < tien'
     assert xmltext.index('Five &lt; ten')
     assert xmltext.find('Five < ten') == -1
    def test_load_tmx_file(self, source_language=None, target_language=None):
        """
        Loads the tmx file
        :param file: The tmx memory file to open
        :param source_language: The source language we are translating
        :param target_language: The target language we are translating to
        :return: The tmx file XML file as a translation.storage.tmx object
        """
        file = 'Tests/Data/en_es.tmx'
        with open(file, 'rb') as tmx:
            tmx_file = tmxfile(
                tmx, 'en-GB',
                'es-ES')  # TODO This does not affect what is loaded

        unit_zero = "CONVENTION ON A COMMON TRANSIT PROCEDURE"
        assert str(tmx_file.getunits()[0].getid()) == unit_zero
Exemplo n.º 26
0
 def convert_Excel2Tmx(self, src, dst):
     src_wb = openpyxl.load_workbook(src)
     src_ws = src_wb.worksheets[0]
     
     en_col = 1
     th_col = 2
     for col in range(1,src_ws.max_column):
         cell_value = src_ws.cell(1, col).value.lower()
         if 'en' == cell_value:
             en_col = col
         if 'th' == cell_value:
             th_col = col
     tmx_file = tmxfile()
     for row in range(2, src_ws.max_row+1):
         tmx_file.addtranslation(src_ws.cell(row, en_col).value, "en", src_ws.cell(row, th_col).value, "th")
     tmx_file.savefile(dst)
     QMessageBox.information(self, "Information", "Converting was done successfully")
Exemplo n.º 27
0
def concordance_search(tm_objects, searchCon, matchRate, search_lang):
    # normalized_levenshtein = NormalizedLevenshtein()
    out_sequences = []
    q_tokens = removeStopwords(searchCon).split()
    for tm_object in tm_objects:
        tm_url = os.path.join(settings.MEDIA_ROOT,
                              getattr(tm_object, 'file_url').name)
        tm_s_lang = getattr(tm_object, 's_lang')
        tm_t_lang = getattr(tm_object, 't_lang')
        tm_name = getattr(tm_object, 'name')
        if os.path.isfile(tm_url):
            fin = open(tm_url, 'rb')
            tmx_file = tmxfile(fin, tm_s_lang, tm_t_lang)
            for node in tmx_file.unit_iter():
                sequence = node.getsource()
                s_tokens = removeStopwords(sequence).split()
                average_rate = 0
                index_list = []
                ordering = False
                for q_token in q_tokens:
                    q_index = s_tokens.index(
                        q_token) if q_token in s_tokens else -1
                    if q_index == -1:
                        matched = difflib.get_close_matches(q_token,
                                                            s_tokens,
                                                            n=1,
                                                            cutoff=0.85)
                        if len(matched) > 0:
                            average_rate += float(
                                textdistance.ratcliff_obershelp(
                                    q_token, matched[0]))
                    else:
                        average_rate += 1
                        index_list.append([q_token, q_index])
                average_rate = int(average_rate /
                                   max(len(s_tokens), len(q_tokens)) * 100)
                if average_rate >= matchRate:
                    out_sequences.append({
                        'source': sequence,
                        'target': node.gettarget(),
                        'tm_name': tm_name,
                        'match_rate': average_rate
                    })
    out_sequences.sort(key=compare_matchrate, reverse=True)
    return out_sequences
Exemplo n.º 28
0
 def po2tmx(posource,
            sourcelanguage="en",
            targetlanguage="af",
            comment=None):
     """helper that converts po source to tmx source without requiring files"""
     inputfile = BytesIO(posource.encode("utf-8"))
     outputfile = BytesIO()
     outputfile.tmxfile = tmx.tmxfile(inputfile=None,
                                      sourcelanguage=sourcelanguage)
     po2tmx.convertpo(
         inputfile,
         outputfile,
         templatefile=None,
         sourcelanguage=sourcelanguage,
         targetlanguage=targetlanguage,
         comment=comment,
     )
     return outputfile.tmxfile
Exemplo n.º 29
0
 def convert_Tmx2Xliff(self, src, dst):
     xliff_file = xlifffile()
     xliff_file.setsourcelanguage('en')
     xliff_file.settargetlanguage('th')
     with open(src, 'rb') as fin:
         tmx_file = tmxfile(fin, 'en', 'th')
         for node in tmx_file.unit_iter():
             new_node = xliffunit(node.getsource())
             new_node.settarget(node.gettarget())
             xliff_file.addunit(new_node)
     xliff_file.savefile(dst)
     fin = open(dst, "r", encoding='utf-8')
     data = fin.read()
     fin.close()
     data = data.replace('<xliff xmlns="urn:oasis:names:tc:xliff:document:1.1" version="1.1">', '<xliff xmlns="urn:oasis:names:tc:xliff:document:1.2" version="1.2">')
     fout = open(dst, 'w', encoding='utf-8')
     fout.write(data)
     fout.close()
     QMessageBox.information(self, "Information", "Converting was done successfully")
Exemplo n.º 30
0
 def convert_Tmx2Excel(self, src, dst):
     if not os.path.exists(dst):
         dst_wb = openpyxl.Workbook()
         ss_sheet = dst_wb['Sheet']
         ss_sheet.title = 'transmem'
         dst_wb.save(dst)
     dst_wb = openpyxl.load_workbook(dst)
     dst_ws = dst_wb['transmem']
     lines = open(src, encoding='utf-8').read().strip().split('\n')
     dst_ws.cell(1, 1).value = 'en'
     dst_ws.cell(1, 2).value = 'th'
     with open(src, 'rb') as fin:
         tmx_file = tmxfile(fin, 'en', 'th')
         row = 2
         for node in tmx_file.unit_iter():
             dst_ws.cell(row, 1).value = node.getsource()
             dst_ws.cell(row, 2).value = node.gettarget()
             row += 1
     dst_wb.save(dst)
     QMessageBox.information(self, "Information", "Converting was done successfully")
Exemplo n.º 31
0
def get_translation(translation_file):
    '''
    Function to extract translation from a user-specified tmx file.
    '''
    try:
        with open(translation_file, 'rb') as file:
            tmx_file = tmxfile(file)
    except FileNotFoundError as fnf_error:
        print(fnf_error)
        sys.exit()
    else:
        translation = []  # List of Segment objects

        for node in tmx_file.unit_iter():
            source_text = node.source
            target_text = node.target
            segment = Segment(source_text, target_text, {}, {})
            translation.append(segment)

        return translation
Exemplo n.º 32
0
def load_sentence(*typ):
    files = {
        'talks': [
            '日常口语_20190906111009_1.tmx', '日常口语_20190906111009_2.tmx',
            '日常口语_20190906111009_3.tmx'
        ],
        'dictexams': [
            '词典例句汇集1.tmx', '词典例句汇集3.tmx', '词典例句汇集5.tmx', '词典例句汇集7.tmx',
            '词典例句汇集2.tmx', '词典例句汇集4.tmx', '词典例句汇集6.tmx', '词典例句汇集8.tmx'
        ]
    }
    iters = {}
    for t, fs in files.items():
        if len(typ) == 0 or t in typ:
            type_iterns = []
            for fname in fs:
                with open(data_file(fname), 'rb') as fin:
                    tmx = tmxfile(fin, 'en', 'cn')
                    type_iterns.append(tmx.unit_iter())
            iters[t] = chain(*type_iterns)
    return iters
Exemplo n.º 33
0
    def export(self):
        source_language = self.context.project.source_language.code
        target_language = self.context.language.code

        if not os.path.exists(self.directory):
            os.makedirs(self.directory)

        tmxfile = tmx.tmxfile()
        for store in self.context.stores.live().iterator():
            for unit in store.units.filter(state=TRANSLATED):
                tmxfile.addtranslation(unit.source, source_language,
                                       unit.target, target_language,
                                       unit.developer_comment)

        bs = BytesIO()
        tmxfile.serialize(bs)
        with open(self.abs_filepath, "wb") as f:
            with ZipFile(f, "w") as zf:
                zf.writestr(self.filename, bs.getvalue())

        self.update_exported_revision()

        return self.abs_filepath
Exemplo n.º 34
0
 def tmxparse(self, tmxsource):
     """helper that parses tmx source without requiring files"""
     dummyfile = wStringIO.StringIO(tmxsource)
     print tmxsource
     tmxfile = tmx.tmxfile(dummyfile)
     return tmxfile
Exemplo n.º 35
0
 def get_storage(self):
     return tmxfile()
Exemplo n.º 36
0
 def tmxparse(self, tmxsource):
     """helper that parses tmx source without requiring files"""
     dummyfile = wStringIO.StringIO(tmxsource)
     print(tmxsource)
     tmxfile = tmx.tmxfile(dummyfile)
     return tmxfile
Exemplo n.º 37
0
 def get_storage(self):
     return tmxfile()
print(test.head())

test_source_sentences = test["source"].astype(str).tolist()
test_target_sentences = test["target"].astype(str).tolist()

source_sentences = list()
target_sentences = list()
retrived_target_sentences = list()
bleu_scores = list()
meteor_scores = list()

tmx_file_path = "result/ES-ES/unapproved.tmx"

with open(tmx_file_path, 'rb') as fin:
    tmx_file = tmxfile(fin, source, target)
    i = 0
    for node in tmx_file.unit_iter():
        i = i + 1
        source_sentence = node.getsource().strip()
        retrieved_target_sentence = node.gettarget().strip()

        index = test_source_sentences.index(
            source_sentence
        ) if source_sentence in test_source_sentences else -1

        if index > -1:
            target_sentence = test_target_sentences[index]
            bleu_score = calculate_bleu_score(target_sentence,
                                              retrieved_target_sentence)
            meteor_score = calculate_meteor_score(target_sentence,
Exemplo n.º 39
0
 def test_translate(self):
     tmxfile = tmx.tmxfile()
     assert tmxfile.translate("Anything") is None
     tmxfile.addtranslation("A string of characters", "en", "'n String karakters", "af")
     assert tmxfile.translate("A string of characters") == "'n String karakters"
Exemplo n.º 40
0
def inittmx(inputfile, columnorder=None):
    return tmx.tmxfile(inputfile)
            file_as_dict[key] = value

    return file_as_dict

if __name__ == '__main__':


    ####################################################################################################################
    # USE THIS BLOCK TO PREPROCESS THE RAW tmx-files AND CREATE THIS FILE: ID \t italianSent \t germanSent

    opensub_it_de = dict()
    ID = 9000000  # the starting ID - this was chosen as all sentence IDs from tatoeba do NOT exceed 9,000,000
                  # this way opensubtitle senences are identifiable by an ID <= 9,000,000
    in_path = "/home/pia/cluwll/de-it.tmx"
    with open(in_path, 'rb') as fin:
        tmx_file = tmxfile(fin, 'de', 'it')
    for node in tmx_file.unit_iter():
        opensub_it_de[ID] = [node.gettarget(), node.getsource()]
        ID += 1
    print(len(opensub_it_de))
    print("done reading tmx - dict was generated")


    out_path = "/home/pia/cluwll/opensubt_id_it_de.txt"
    with open(out_path, 'wt') as fo:
        csv_writer = csv.writer(fo, delimiter='\t')
        for id, sent in opensub_it_de.items():
            csv_writer.writerow([id, sent[0], sent[1]])
    print("finished writing to file: ", out_path)
    ####################################################################################################################