示例#1
0
def copy_main_folders(root, identifier):
    assert type(identifier) == unicode or type(identifier) == str
    assert type(root) == unicode or type(root) == str
    #List of files to be copied (To flatten directory structure)
    file_list = findFiles(os.path.join(root, 'temp' + identifier),
                          ['asset', 'data', 'item', 'ecml'])
    path = os.path.join(root, identifier)
    #To make the new directory in which files will be eventually stored
    if not os.path.exists(path):
        os.makedirs(path)
    #To make the new sub-directories in which the files will be eventually stores
    location = [
        os.path.join(path, folder) for folder in ['assets', 'data', 'items']
    ]
    for loc in location:
        if not os.path.exists(loc):
            os.makedirs(loc)
    #Copying files
    for f in file_list:
        if (f.find('asset') >= 0):
            shutil.copy(f, os.path.join(path, 'assets'))
        elif (f.find('data') >= 0):
            shutil.copy(f, os.path.join(path, 'data'))
        elif (f.find('item') >= 0):
            shutil.copy(f, os.path.join(path, 'items'))
        else:
            shutil.copy(f, path)
    #Delete the messy download directory
    shutil.rmtree(os.path.join(root, 'temp' + identifier))
示例#2
0
def imageNames(directory):
	image_names=findFiles(directory,['png','gif','jpg'])
	image_names=[os.path.basename(image) for image in image_names]#Get filename from path
	image_names=[os.path.splitext(image)[0] for image in image_names]#Get filename without file type
#	image_names=[image[:-4] for image in image_names]#Possibly better since it can handle files with '.' in their name
	image_names=[' '.join(image.split('_')) for image in image_names]#Replace underscore('_') by space
	image_names=[' '.join(re.findall('[a-zA-Z]+', image)) for image in image_names]#Filter out numbers
	image_names=[' '.join(camel_case_split(image)) for image in image_names]#Split Camel Case
	image_names=[image.lower() for image in image_names]#Turn all text to lower case
	return(list(set(image_names)))#list(set(.)) removes identical values if any
示例#3
0
def count_file_type_directory(directory,typ):
	x={}
	for i in typ:
		x[i]=0
	file_list=findFiles(directory,typ)
	for fl in file_list:
		try:
			x[fl.split('.')[-1]]+=1
		except:
		#In case filename has weird end type like ._oldpng (in org.ekstep.englishsecondlanguage and org.ekstep.esl1)
			{}
	return x
示例#4
0
def unzip_files(directory):
    assert type(directory) == unicode or type(directory) == str
    #Finds all files in a directory that are of type .zip
    zip_list = findFiles(directory, ['.zip'])
    bugs = {}
    for zip_file in zip_list:
        #In case zipfile is bad
        try:
            #Extract zip file
            with zipfile.ZipFile(zip_file, 'r') as z:
                z.extractall(directory)
            #Delete zip file after extraction
            os.remove(zip_file)
        except:
            #Can return bugs if you want list of buggy zip files
            bugs.append(zip_file)
            {}
        if (item_number != ''):
            data = (int(item_number), '\n'.join(json_dictionary[key]))
            if (filename in processed):
                processed[filename].append(data)
            else:
                processed[filename] = [data]
    for k in processed.keys():
        processed[k] = sorted(processed[k], key=itemgetter(0))
        processed[k] = '\n'.join([unicode(item[1]) for item in processed[k]])
    return (processed)


if not os.path.isdir(corpus_dir):
    os.makedirs(corpus_dir)

jsonFiles = findFiles.findFiles(json_dir, ['.json'])
for identifier_path in jsonFiles:
    max_tag_length = 5
    path = os.path.join(corpus_dir, identifier_path.split('/')[-1][:-5])
    if not os.path.isdir(path):
        os.makedirs(path)
    with codecs.open(identifier_path, 'r', encoding='utf-8') as f:
        data = json.load(f, encoding='utf-8')
    f.close()
    tags = [concept for concept in data['concepts']]
    #Data
    x = set()
    data_list = json.loads(''.join(data['data']), encoding='utf-8')
    for key in data_list.keys():
        x.add(''.join(
            process_data(getLowestKeyValue.flattenDict(
示例#6
0
def train_model_pvdbow(directory):
	doc=load_documents(findFiles(directory,['tag']),"en")
	model=gs.models.doc2vec.Doc2Vec(doc, size=50, min_count=3, window=8, negative=10, workers=4, sample=1e-5, dm=0) #Apply PV-DBOW
	return model
示例#7
0
def train_model_pvdm(directory,language):#en-English,id-Hindi
	doc=load_documents(findFiles(directory,['%s-text'%(language)]),language)
	model=gs.models.doc2vec.Doc2Vec(doc, size=50, min_count=3, window=8, negative=10, workers=4, sample=1e-5)
	return model
示例#8
0
tasklist = ["BuildEopEta", "ComputeIC"]

additional_options = ""
if options.EE:
    additional_options += " --EE "
else:
    print(
        "setting up barrel calibration, if you want endcap calibration add the option --EE"
    )

#create outdir
os.system("mkdir -p " + str(options.outdir))

#get ntuples for the calibration
selected_filelist, extracalibtree_filelist = findFiles.findFiles(
    ntuple_dir, "unmerged", tag_list, ignored_ntuples_label_list)

if (len(selected_filelist) > 0):
    print
    print("Run calibration on " + str(len(selected_filelist)) + " files:")
    if (options.verbosity >= 1):
        print("-----------------------")
        for filename in selected_filelist:
            print filename
        print("-----------------------")
        print("auto-generated extraCalibTree filelist")
        for filename in extracalibtree_filelist:
            print filename
        print("-----------------------")

else:
import findFiles

parser = argparse.ArgumentParser()
parser.add_argument('--ld',
                    help='This is the operating directory',
                    default=os.path.join(root, 'Data'))
args = parser.parse_args()
op_dir = args.ld
if not os.path.exists(op_dir):
    os.makedirs(op_dir)

r = requests.get(
    'http://lp-sandbox.ekstep.org:8080/taxonomy-service/v2/analytics/content/list'
).json()
total_identifiers = [obj['identifier'] for obj in r['result']['contents']]
file_list = findFiles.findFiles(op_dir, ['.json'])
present_identifiers = [
    identifier[:-5].split('/')[-1] for identifier in file_list
]
absent_identifiers = [
    identifier for identifier in total_identifiers
    if identifier not in present_identifiers
]
root = os.path.dirname(os.path.abspath(__file__))
for response in r['result']['contents']:
    try:
        if (response['identifier'] not in absent_identifiers
                or response['identifier'] == 'test.org.ekstep.beta-mp3'):
            continue
        subprocess.call([
            'python content2EnrichedJson.py \'http://lp-sandbox.ekstep.org:8080/taxonomy-service/v2/content\' \'%s\''