def train_model_pvdm(directory, language):
    if language == ['tags']:
        doc = load_documents(findFiles(directory, ['tag']), "en-text")
    else:
        doc = load_documents(findFiles(directory, [language]), language)
    if not doc:
        return 0
    model = gs.models.doc2vec.Doc2Vec(doc,
                                      size=pvdm_size,
                                      min_count=pvdm_min_count,
                                      window=pvdm_window,
                                      negative=pvdm_negative,
                                      workers=pvdm_workers,
                                      sample=pvdm_sample)
    return model
def copy_main_folders(downloadPath, identifier, downloadedFile):
    if type(identifier) == unicode or type(identifier) == str:
        if type(downloadPath) == unicode or type(downloadPath) == str:
            #List of files to be copied (To flatten directory structure)
            file_list = findFiles(os.path.join(downloadPath, downloadedFile),
                                  ['asset', 'data', 'item', 'ecml'])
            path = os.path.join(downloadPath, identifier)
            #To make the new directory in which files will be eventually stored
            if not os.path.exists(path):
                os.makedirs(path)
            #To make the new sub-directories in which the files will be eventually stores
            location = [
                os.path.join(path, folder)
                for folder in ['assets', 'data', 'items']
            ]
            for loc in location:
                if not os.path.exists(loc):
                    os.makedirs(loc)
            #Copying files
            for f in file_list:
                if (f.find('asset') >= 0):
                    shutil.copy(f, os.path.join(path, 'assets'))
                elif (f.find('data') >= 0):
                    shutil.copy(f, os.path.join(path, 'data'))
                elif (f.find('item') >= 0):
                    shutil.copy(f, os.path.join(path, 'items'))
                else:
                    shutil.copy(f, path)
def imageNames(directory):
	image_names=findFiles(directory,['png','gif','jpg'])
	image_names=[os.path.basename(image) for image in image_names]#Get filename from path
	image_names=[os.path.splitext(image)[0] for image in image_names]#Get filename without file type
#	image_names=[image[:-4] for image in image_names]#Possibly better since it can handle files with '.' in their name
	image_names=[' '.join(image.split('_')) for image in image_names]#Replace underscore('_') by space
	image_names=[' '.join(re.findall('[a-zA-Z]+', image)) for image in image_names]#Filter out numbers
	image_names=[' '.join(camel_case_split(image)) for image in image_names]#Split Camel Case
	image_names=[image.lower() for image in image_names]#Turn all text to lower case
	return(list(set(image_names)))#list(set(.)) removes identical values if any
示例#4
0
def count_file_type_directory(directory, typ):
    x = {}
    for i in typ:
        x[i] = 0
    file_list = findFiles(directory, typ)
    for fl in file_list:
        try:
            x[fl.split('.')[-1]] += 1
        except:
            #In case filename has weird end type like ._oldpng (in org.ekstep.englishsecondlanguage and org.ekstep.esl1)
            {}
    return x
def unzip_files(directory, file_type=['.zip']):
    if type(directory) == unicode or type(directory) == str:
        #Finds all files in a directory that are of type .zip
        zip_list = findFiles(directory, file_type)
        bugs = {}
        for zip_file in zip_list:
            #In case zipfile is bad
            try:
                #Extract zip file
                with zipfile.ZipFile(zip_file, 'r') as z:
                    z.extractall(directory)
                #Delete zip file after extraction
                os.remove(zip_file)
            except:
                #Can return bugs if you want list of buggy zip files
                bugs.append(zip_file)