def upload_file(self, pid=None, filepath=None, yes=False, output_filename=None): """Upload file or directory to deposit by given pid.""" bucket_id = self._get_bucket_id(pid) # Check if filepath is file or DIR if os.path.isdir(filepath): # If it's a DIR alert that it is going to be tarballed # and uploaded if yes or \ click.confirm('You are trying to upload a directory.\n' 'Should we upload ' 'a tarball of the directory?'): if output_filename is None: output_filename = "{pid}_{bucket_id}_{time}.tar.gz".format( pid=pid, bucket_id=bucket_id, time=datetime.datetime.now().strftime( '%b-%d-%I%M%p-%G')) make_tarfile(output_filename, filepath) filepath = output_filename else: if output_filename is None: output_filename = os.path.basename(filepath) # data = {'filename': output_filename} return self._make_request( url="files/{bucket_id}/{filename}".format( bucket_id=bucket_id, filename=output_filename), data=open(filepath, 'rb'), method='put', )
def tar_patches_tool(self): log.info('Start to tar patches_tool') patches_tool_path = utils.get_patches_tool_path() full_patch_of_patches_tool = os.path.join(patches_tool_path, SysPath.PATCHES_TOOL_TAR_GZ) if os.path.isfile(full_patch_of_patches_tool): self.remove_tar_patches_tool(full_patch_of_patches_tool) new_tar_of_patches_tool = os.path.join(patches_tool_path, SysPath.PATCHES_TOOL_TAR_GZ) utils.make_tarfile(new_tar_of_patches_tool, patches_tool_path) log.info('Success to tar patches_tool: <%s>' % full_patch_of_patches_tool) return full_patch_of_patches_tool
def pickle2plaintext(testing=False, option="cleanest"): """ Converted ODIN IGTs from the .pk file into tab-delimited plaintexts.""" # Makes a temp output directory for the individual files. TEMPODIN_DIR = "./tmpodin/" # for saving the temp udhr files. if not os.path.exists(TEMPODIN_DIR): os.makedirs(TEMPODIN_DIR) for language, documents in sorted(load_odin_pickle()): tab_igts = [] for d in documents: if d[0].strip() == "": continue src = remove_tags(d[0]) # Removes heading bullets, e.g. (1)... | 1) | ( 12 ) | i. ... | A2. ... src = re.sub(r"^\(?\s?\w{1,5}\s*[):.]\s*", "", src) src = re.sub(r"^\(?\w{1,5}\s*[):.]\s*", "", src) src = re.sub(r"^\(?\w{1,5}\s*[):.]\s*", "", src) morphemes = src # Joins the morphemes up into words. words = re.sub(" *- *", "", src) if option == "cleanest": # Accepts only IGTs without punctuation. if src == "" or any(i for i in string.punctuation if i in src): continue elif option == "cleaner": # Removes the example number at the end. patterns = [r"\(.{1,}\)", r"[\(\)]"] for pat in patterns: src = re.sub(pat, "", src) else: # Accepts IGTs as they are. if src == "": continue # src, eng, gloss, cite = d[0], d[1], d[2], d[3] tab_igts.append([words, morphemes, remove_tags(d[1]), remove_tags(d[2]), d[3]]) if len(tab_igts) > 0: with codecs.open(TEMPODIN_DIR + "odin-" + language + ".txt", "w", "utf8") as fout: for igt in tab_igts: print >> fout, "\t".join(igt) if testing: break if testing: # Compress the utf8 UDHR files into a single tarfile in the test dir. try: make_tarfile("test/odin-" + option + ".tar", TEMPODIN_DIR) except IOError: # if function is called within the sugarlike/src/universalcorpus dir # To move up directory to access sugarlike/data/ and sugarlike/test/. make_tarfile("../test/odin-" + option + ".tar", TEMPODIN_DIR) else: # Compresses the utf8 UDHR files into a single tarfile. try: make_tarfile("../data/odin/odin-" + option + ".tar", TEMPODIN_DIR) except IOError: # if function is called within the sugarlike/src/universalcorpus dir # To move up directory to access sugarlike/data/ and sugarlike/test/. make_tarfile("../data/odin/odin-" + option + ".tar", TEMPODIN_DIR) # Remove the udhr-utf8 directory. shutil.rmtree(TEMPODIN_DIR)
def clean_wikipedia(wiki_raw_dir): '''Clean all files in wiki_raw_dir and write clean files into ../data/wikipedia/clean/''' if not os.path.exists('../data/wikipedia/'): os.makedirs('../data/wikipedia/') WIKIPEDIA_CLEAN_DIR = '../data/wikipedia/clean/' TEMP_WIKIPEDIA_CLEAN_DIR = tempfile.mkdtemp() for root, dirnames, filenames in os.walk(wiki_raw_dir): for filename in filenames: filepath = os.path.join(root, filename) # get number for language file count = re.search('wiki_([\d]+).bz2', filepath).group(1) # get language code from filepath language = re.search('\/([\w]+)wiki-', filepath).group(1) if not os.path.exists('../data/wikipedia/clean/' + language): os.makedirs('../data/wikipedia/clean/' + language) print('cleaning ' + filepath) with bz2.BZ2File(filepath, 'r') as openZip: f = openZip.read() # closing ref tags without a corresponding opening tag are a # problem for BeautifulSoup3 #uni_f = re.sub('</[^d]+.*?>', '', f) #uni_f = re.sub('</br', '', uni_f) uni_f = re.sub('<!\[', '', f) soup = BeautifulSoup('<docs>' + uni_f + '</docs>') doclist = soup.findAll('doc') with codecs.open(TEMP_WIKIPEDIA_CLEAN_DIR + '/' + language + '_' + str(count), 'a', 'utf-8') as out: for doc in doclist: content = doc.getText() cleancontent = clean(content.strip()) out.write(cleancontent.strip() + '\n') make_tarfile(WIKIPEDIA_CLEAN_DIR + language + '/' + language + '_' + str(count) + '.tar', TEMP_WIKIPEDIA_CLEAN_DIR + '/' + language + '_' + str(count))
def get_from_unicodedotorg(testing=False): """ Crawl and clean UDHR files from www.unicode.org . """ TEMP_RAW_DIR = tempfile.mkdtemp() UDHR_DOWNLOAD = 'http://www.unicode.org/udhr/d/' AHREF_REGEX = '<a href="?\'?([^"\'>]*)' # Makes a temp output directory for the files that can be converted into utf8. UDHR_UTF8_DIR = './udhr-utf8/' # for saving the temp udhr files. if not os.path.exists(UDHR_UTF8_DIR): os.makedirs(UDHR_UTF8_DIR) # Get the directory page from the www.unicode.org UDHR page unicode_page = urllib.urlopen(UDHR_DOWNLOAD).read() # Crawls the www.unicode.org page for all udhr txt files. for i in re.findall(AHREF_REGEX, unicode_page): if i.endswith('.txt'): print UDHR_DOWNLOAD + i urllib.urlretrieve(UDHR_DOWNLOAD + i, filename=TEMP_RAW_DIR + i) with io.open(TEMP_RAW_DIR + i, 'r', encoding='utf8') as udhrfile: # Gets the language from the end of the file line. lang = udhrfile.readline().partition('-')[2].strip() # Gets the language code from the filename. langcode = i.partition('.')[0].partition('_')[2] # Skip the header lines. for _ in range(5): udhrfile.readline() # Reads the rest of the lines and that's the udhr data. the_rest = udhrfile.readlines() data = "\n".join( [i.strip() for i in the_rest if i.strip() != '']) ##print langcode, data.split('\n')[0] with codecs.open(UDHR_UTF8_DIR + 'udhr-' + langcode + '.txt', 'w', 'utf8') as outfile: print >> outfile, data if testing: break if testing: # Compress the utf8 UDHR files into a single tarfile in the test dir. try: make_tarfile('../test/udhr-unicode.tar', UDHR_UTF8_DIR) except IOError: # if function is called within the sugarlike/src/universalcorpus dir # To move up directory to access sugarlike/data/ and sugarlike/test/. make_tarfile('../../test/udhr-unicode.tar', UDHR_UTF8_DIR) else: # Compresses the utf8 UDHR files into a single tarfile. try: make_tarfile('../data/udhr/udhr-unicode.tar', UDHR_UTF8_DIR) except IOError: # if function is called within the sugarlike/src/universalcorpus dir # To move up directory to access sugarlike/data/ and sugarlike/test/. make_tarfile('../../data/udhr/udhr-unicode.tar', UDHR_UTF8_DIR) # Remove the udhr-utf8 directory. shutil.rmtree(UDHR_UTF8_DIR)
def get_from_unicodedotorg(testing=False): """ Crawl and clean UDHR files from www.unicode.org . """ TEMP_RAW_DIR = tempfile.mkdtemp() UDHR_DOWNLOAD = 'http://www.unicode.org/udhr/d/' AHREF_REGEX = '<a href="?\'?([^"\'>]*)' # Makes a temp output directory for the files that can be converted into utf8. UDHR_UTF8_DIR = './udhr-utf8/' # for saving the temp udhr files. if not os.path.exists(UDHR_UTF8_DIR): os.makedirs(UDHR_UTF8_DIR) # Get the directory page from the www.unicode.org UDHR page unicode_page = urllib.urlopen(UDHR_DOWNLOAD).read() # Crawls the www.unicode.org page for all udhr txt files. for i in re.findall(AHREF_REGEX,unicode_page): if i.endswith('.txt'): print UDHR_DOWNLOAD+i urllib.urlretrieve(UDHR_DOWNLOAD+i, filename=TEMP_RAW_DIR+i) with io.open(TEMP_RAW_DIR+i,'r',encoding='utf8') as udhrfile: # Gets the language from the end of the file line. lang = udhrfile.readline().partition('-')[2].strip() # Gets the language code from the filename. langcode = i.partition('.')[0].partition('_')[2] # Skip the header lines. for _ in range(5): udhrfile.readline(); # Reads the rest of the lines and that's the udhr data. the_rest = udhrfile.readlines() data = "\n".join([i.strip() for i in the_rest if i.strip() != '']) ##print langcode, data.split('\n')[0] with codecs.open(UDHR_UTF8_DIR+'udhr-'+langcode+'.txt','w','utf8') as outfile: print>>outfile, data if testing: break if testing: # Compress the utf8 UDHR files into a single tarfile in the test dir. try: make_tarfile('../test/udhr-unicode.tar',UDHR_UTF8_DIR) except IOError: # if function is called within the sugarlike/src/universalcorpus dir # To move up directory to access sugarlike/data/ and sugarlike/test/. make_tarfile('../../test/udhr-unicode.tar',UDHR_UTF8_DIR) else: # Compresses the utf8 UDHR files into a single tarfile. try: make_tarfile('../data/udhr/udhr-unicode.tar',UDHR_UTF8_DIR) except IOError: # if function is called within the sugarlike/src/universalcorpus dir # To move up directory to access sugarlike/data/ and sugarlike/test/. make_tarfile('../../data/udhr/udhr-unicode.tar',UDHR_UTF8_DIR) # Remove the udhr-utf8 directory. shutil.rmtree(UDHR_UTF8_DIR)
def main(args): if not args.skip_extractor: extractor = Extractor(video_folder=args.video_folder, pretrained_vibe=args.pretrained_vibe, pretrained_spin=args.pretrained_spin, output_folder=args.extractor_results_folder, render=args.render_extractor_results, tracking_method=args.tracking_method, staf_dir=args.staf_dir, run_smplify=args.run_smplify) extractor.run() synthesiser = Synthesiser(blender=args.blender, debug_blender=args.debug_blender, motion_path=args.extractor_results_folder, target_size=args.target_size, num_frames=args.num_frames) synthesiser.run() make_tarfile(args.output, 'output') logging.info('Done.')
def rename_omniglotphrase_tarfile(intarfile): """ Rename the files and use ISO codes instead of full language names. """ TEMP_DIR = tempfile.mkdtemp() with tarfile.open(intarfile) as tf: for member in tf.getmembers(): tf.extract(member, TEMP_DIR) TEMP_OUT_DIR = tempfile.mkdtemp() for infile in os.listdir(TEMP_DIR): _, lang = infile.split(".") lang = lang.split("_")[0] isocode = langiso(lang) if len(isocode) > 0: with codecs.open(TEMP_DIR + "/" + infile, "r", "utf8") as fin: fout = codecs.open(TEMP_OUT_DIR + "/omniglotphrase-" + isocode[0] + ".txt", "w", "utf8") for line in fin: try: eng, src = line.strip().split("\t") print >> fout, src + "\t" + eng except ValueError: print lang, line pass make_tarfile("../../data/omniglot/omniglotphrases.tar", TEMP_OUT_DIR + "/")
def rename_omniglotphrase_tarfile(intarfile): """ Rename the files and use ISO codes instead of full language names. """ TEMP_DIR = tempfile.mkdtemp() with tarfile.open(intarfile) as tf: for member in tf.getmembers(): tf.extract(member, TEMP_DIR) TEMP_OUT_DIR = tempfile.mkdtemp() for infile in os.listdir(TEMP_DIR): _, lang = infile.split('.') lang = lang.split('_')[0] isocode = langiso(lang) if len(isocode) > 0: with codecs.open(TEMP_DIR + '/' + infile, 'r', 'utf8') as fin: fout = codecs.open(TEMP_OUT_DIR+'/omniglotphrase-'+isocode[0]+'.txt',\ 'w','utf8') for line in fin: try: eng, src = line.strip().split('\t') print >> fout, src + "\t" + eng except ValueError: print lang, line pass make_tarfile('../../data/omniglot/omniglotphrases.tar', TEMP_OUT_DIR + "/")
def clean_wikipedia(wiki_raw_dir, option = "firstfile"): ''' Clean all files in wiki_raw_dir and write clean files into data/wikipedia/clean/ . Options: - firstfile: cleans and stores only one folder (AA) per language. For "normal" WikiExtractor setting, this corresponds to 100 files with 5000K each. Currently this means, that for the 20 most frequent languages (see http://meta.wikimedia.org/wiki/List_of_Wikipedias), part of the data is ignored. - all: cleans and stores all folders ''' c = 1 skippedcount = 1 if not os.path.exists(wiki_raw_dir): print('no such path:' + wiki_raw_dir) if not os.path.exists('data/wikipedia/'): os.makedirs('data/wikipedia/') WIKIPEDIA_CLEAN_DIR = 'data/wikipedia/clean/' TEMP_WIKIPEDIA_CLEAN_DIR = tempfile.mkdtemp() for root, dirnames, filenames in os.walk(wiki_raw_dir): for filename in filenames: filepath = os.path.join(root, filename) # get number for language file and in case of option=firstfile # skip all files that are not in a AA folder count = re.search('wiki_([\d]+).bz2', filepath).group(1) if option == "firstfile" and not 'AA/wiki' in filepath: if count == '00' and 'AB/wiki' in filepath: print('[option=firstfile] More files available ' + str(skippedcount) + ': ' + filepath) skippedcount += 1 continue language = get_iso(filepath) if language == None: continue if not os.path.exists('data/wikipedia/clean/' + language): os.makedirs('data/wikipedia/clean/' + language) print('cleaning file ' + str(c) + ': ' + filepath) c += 1 with bz2.BZ2File(filepath, 'r') as openZip: f = openZip.read() # closing ref tags without a corresponding opening tag are a # problem for BeautifulSoup3 #uni_f = re.sub('</[^d]+.*?>', '', f) #uni_f = re.sub('</br', '', uni_f) uni_f = re.sub('<!\[', '', f) soup = BeautifulSoup('<docs>' + uni_f + '</docs>') doclist = soup.findAll('doc') with codecs.open(TEMP_WIKIPEDIA_CLEAN_DIR + '/' + language + '_' + str(count), 'a', 'utf-8') as out: for doc in doclist: content = doc.getText() cleancontent = clean(content.strip()) out.write(cleancontent.strip() + '\n') make_tarfile(WIKIPEDIA_CLEAN_DIR + language + '/' + language + '_' + str(count) + '.tar', TEMP_WIKIPEDIA_CLEAN_DIR + '/' + language + '_' + str(count))
def get_phrases(with_mp3=False,testing=False): """ Gets phrases list from Omniglot. """ # Downloads and open the phrases index.htm on Omniglot. phrase_lang = urllib2.urlopen(MULTILING_URLS['phrase_lang']).read() # Makes a temp output directory to the phrases files. outputdir= DATADIR+'omniglot-temp/' if not os.path.exists(outputdir): os.makedirs(outputdir) for link in re.findall(AHREF_REGEX,phrase_lang): # Finds all link for the phrases page for each language. if '/language/phrases/' in link and not link.endswith('index.htm'): # Get name of language in English. langname = link.rpartition('/')[2].strip().rpartition('.')[0] # Create a textfile for the output. outfile = codecs.open(outputdir+'omnilgotphrases-'+langname+'.txt', \ 'w','utf8') # Finds the section that starts with <div id="unicode"> soup = bs(urllib2.urlopen(OMNIGLOT+link).read()).findAll(id='unicode')[0] # Get name of language in the particular language. langname2 = bs(str(soup.findAll('th')[1])).text all_phrases = defaultdict(list) # Each <tr>...</tr> is a phrase in the table. phrasetable = soup.findAll('tr') for phrases in phrasetable: try: # Each <td>...</td> is a column in the <tr/>. eng,phrase = bs(unicode(phrases)).findAll('td') eng = str(eng.text) if with_mp3: # Maps the phrase to the corresponding mp3. phrase_mp3 = zip([i.strip() for i in \ unicode(phrase.text).split('\n') if i != ''], re.findall(AHREF_REGEX,str(phrase))) all_phrases[eng]+=phrase_mp3 else: all_phrases[eng]+=[i.strip() for i in \ unicode(phrase.text).split('\n') if i.strip() != ''] except ValueError: pass # Outputs to file. for gloss in all_phrases: eng = gloss.replace('\n ',' ').strip() repls ={'todance':'to dance', 'Christmasand':'Christmas and', 'ladywill':'lady will','hovercraftis':'hovercraft is', 'languageis':'language is'} eng = reduce(lambda a, kv: a.replace(*kv), repls.iteritems(), eng) for trg in all_phrases[gloss]: if type(trg) is tuple: trg = "\t".join(trg) print>>outfile, eng+"\t"+trg+"\t"+OMNIGLOT+link print eng+"\t"+trg+"\t"+OMNIGLOT+link if testing: # only process one page if testing. break time.sleep(random.randrange(5,10)) if testing: # Compresses the omniglot phrases files into the tarfile in the test dir. try: make_tarfile(TESTDIR+'omniglot-phrases.tar',outputdir) except IOError: make_tarfile("../"+TESTDIR+'omniglot-phrases.tar',outputdir) else: # Compresses the omniglot phrases files into a single tarfile. try: make_tarfile(DATADIR+'omniglot/omniglot-phrases.tar',outputdir) except IOError: make_tarfile("../"+DATADIR+'omniglot/omniglot-phrases.tar',outputdir) # Remove the temp phrases directory. try: shutil.rmtree(outputdir) except WindowsError: # If windows complain, glob through and remove file individually. import glob for f in glob.glob(outputdir): os.remove(f)
def get_phrases(with_mp3=False, testing=False): """ Gets phrases list from Omniglot. """ # Downloads and open the phrases index.htm on Omniglot. phrase_lang = urllib2.urlopen(MULTILING_URLS['phrase_lang']).read() # Makes a temp output directory to the phrases files. outputdir = DATADIR + 'omniglot-temp/' if not os.path.exists(outputdir): os.makedirs(outputdir) for link in re.findall(AHREF_REGEX, phrase_lang): # Finds all link for the phrases page for each language. if '/language/phrases/' in link and not link.endswith('index.htm'): # Get name of language in English. langname = link.rpartition('/')[2].strip().rpartition('.')[0] # Create a textfile for the output. outfile = codecs.open(outputdir+'omnilgotphrases-'+langname+'.txt', \ 'w','utf8') # Finds the section that starts with <div id="unicode"> soup = bs(urllib2.urlopen(OMNIGLOT + link).read()).findAll(id='unicode')[0] # Get name of language in the particular language. langname2 = bs(str(soup.findAll('th')[1])).text all_phrases = defaultdict(list) # Each <tr>...</tr> is a phrase in the table. phrasetable = soup.findAll('tr') for phrases in phrasetable: try: # Each <td>...</td> is a column in the <tr/>. eng, phrase = bs(unicode(phrases)).findAll('td') eng = str(eng.text) if with_mp3: # Maps the phrase to the corresponding mp3. phrase_mp3 = zip([i.strip() for i in \ unicode(phrase.text).split('\n') if i != ''], re.findall(AHREF_REGEX,str(phrase))) all_phrases[eng] += phrase_mp3 else: all_phrases[eng]+=[i.strip() for i in \ unicode(phrase.text).split('\n') if i.strip() != ''] except ValueError: pass # Outputs to file. for gloss in all_phrases: eng = gloss.replace('\n ', ' ').strip() repls = { 'todance': 'to dance', 'Christmasand': 'Christmas and', 'ladywill': 'lady will', 'hovercraftis': 'hovercraft is', 'languageis': 'language is' } eng = reduce(lambda a, kv: a.replace(*kv), repls.iteritems(), eng) for trg in all_phrases[gloss]: if type(trg) is tuple: trg = "\t".join(trg) print >> outfile, eng + "\t" + trg + "\t" + OMNIGLOT + link print eng + "\t" + trg + "\t" + OMNIGLOT + link if testing: # only process one page if testing. break time.sleep(random.randrange(5, 10)) if testing: # Compresses the omniglot phrases files into the tarfile in the test dir. try: make_tarfile(TESTDIR + 'omniglot-phrases.tar', outputdir) except IOError: make_tarfile("../" + TESTDIR + 'omniglot-phrases.tar', outputdir) else: # Compresses the omniglot phrases files into a single tarfile. try: make_tarfile(DATADIR + 'omniglot/omniglot-phrases.tar', outputdir) except IOError: make_tarfile("../" + DATADIR + 'omniglot/omniglot-phrases.tar', outputdir) # Remove the temp phrases directory. try: shutil.rmtree(outputdir) except WindowsError: # If windows complain, glob through and remove file individually. import glob for f in glob.glob(outputdir): os.remove(f)
from utils import make_tarfile, upload_file, file_exists_in_bucket, purge_backups import os import sys import datetime as dt log = open("backup.log", "a") sys.stdout = log DIRECTORY = os.environ['BACKUP_DIR'] output_path = make_tarfile(DIRECTORY) print('Starting job @ {}'.format(dt.datetime.now())) print('Successfully compressed {} into {}'.format(DIRECTORY, output_path)) print('Beginning upload...') upload_file(output_path) if not file_exists_in_bucket(output_path): print('\nVerification of file upload to s3 failed...exiting') exit() print('\nVerified file upload to s3...Cleaning up') print('Removing backup archive {}'.format(output_path)) os.remove(output_path) purge_backups()
def pickle2plaintext(testing=False, option='cleanest'): """ Converted ODIN IGTs from the .pk file into tab-delimited plaintexts.""" # Makes a temp output directory for the individual files. TEMPODIN_DIR = './tmpodin/' # for saving the temp udhr files. if not os.path.exists(TEMPODIN_DIR): os.makedirs(TEMPODIN_DIR) for language, documents in sorted(load_odin_pickle()): tab_igts = [] for d in documents: if d[0].strip() == "": continue src = remove_tags(d[0]) # Removes heading bullets, e.g. (1)... | 1) | ( 12 ) | i. ... | A2. ... src = re.sub(r'^\(?\s?\w{1,5}\s*[):.]\s*', '', src) src = re.sub(r'^\(?\w{1,5}\s*[):.]\s*', '', src) src = re.sub(r'^\(?\w{1,5}\s*[):.]\s*', '', src) morphemes = src # Joins the morphemes up into words. words = re.sub(' *- *', '', src) if option == 'cleanest': # Accepts only IGTs without punctuation. if src == '' or any(i for i in string.punctuation if i in src): continue elif option == 'cleaner': # Removes the example number at the end. patterns = [r"\(.{1,}\)", r"[\(\)]"] for pat in patterns: src = re.sub(pat, '', src) else: # Accepts IGTs as they are. if src == '': continue # src, eng, gloss, cite = d[0], d[1], d[2], d[3] tab_igts.append([words, morphemes, remove_tags(d[1]), \ remove_tags(d[2]), d[3]]) if len(tab_igts) > 0: with codecs.open(TEMPODIN_DIR + 'odin-' + language + '.txt', 'w', 'utf8') as fout: for igt in tab_igts: print >> fout, "\t".join(igt) if testing: break if testing: # Compress the utf8 UDHR files into a single tarfile in the test dir. try: make_tarfile('test/odin-' + option + '.tar', TEMPODIN_DIR) except IOError: # if function is called within the sugarlike/src/universalcorpus dir # To move up directory to access sugarlike/data/ and sugarlike/test/. make_tarfile('../test/odin-' + option + '.tar', TEMPODIN_DIR) else: # Compresses the utf8 UDHR files into a single tarfile. try: make_tarfile('../data/odin/odin-' + option + '.tar', TEMPODIN_DIR) except IOError: # if function is called within the sugarlike/src/universalcorpus dir # To move up directory to access sugarlike/data/ and sugarlike/test/. make_tarfile('../data/odin/odin-' + option + '.tar', TEMPODIN_DIR) # Remove the udhr-utf8 directory. shutil.rmtree(TEMPODIN_DIR)