def get_geo_topic_from_single_json(self, directory):
     files = get_all_files_in_directory(directory)
     for entry in files:
         with open(entry) as input_file:
             filename = os.path.basename(input_file.name).split('.')[0]
             metadata = json.load(input_file)
             self.geo_topic_list.append({filename: metadata})
 def get_geo_topic_from_single_json(self, directory):
     files = get_all_files_in_directory(directory)
     for entry in files:
         with open(entry) as input_file:
             filename = os.path.basename(input_file.name).split('.')[0]
             metadata = json.load(input_file)
             self.geo_topic_list.append({filename: metadata})
 def get_geo_topic_objects_from(self, directory):
     files = get_all_files_in_directory(directory)
     #print len(files)
     for entry in files:
         with open(entry) as input_file:
             #print(input_file.read())
             self.geo_topic_list.extend(json.load(input_file))
 def get_geo_topic_objects_from(self, directory):
     files = get_all_files_in_directory(directory)
     #print len(files)
     for entry in files:
         with open(entry) as input_file:
             #print(input_file.read())
             self.geo_topic_list.extend(json.load(input_file))
Пример #5
0
def move_files(source_folder, destination_folder):
    files = utility.get_all_files_in_directory(source_folder)
    if not os.path.exists(destination_folder):
        os.makedirs(destination_folder)
    for filepath in files:
        filename = filepath.split('/')[-1]
        destination = [destination_folder, '/', filename]
        # print(filepath, ''.join(destination))
        shutil.move(filepath, ''.join(destination))
    return
Пример #6
0
def parse_owl_directory(path):
    file_list = get_all_files_in_directory(path, suffix='.owl')
    entologies = dict()
    for entry in file_list:
        print(entry)
        concept_dictionary = parse_owl_file(entry)
        entologies.update(concept_dictionary)
    dump(entologies, 'sweet_concepts.json')

    categories = transform_to_categories(entologies)
    dump(categories, 'sweet_concept_categories.json')
    return
Пример #7
0
def parse_owl_directory(path):
    file_list = get_all_files_in_directory(path, suffix='.owl')
    entologies = dict()
    for entry in file_list:
        print(entry)
        concept_dictionary = parse_owl_file(entry)
        entologies.update(concept_dictionary)
    dump(entologies, 'sweet_concepts.json')

    categories = transform_to_categories(entologies)
    dump(categories, 'sweet_concept_categories.json')
    return
Пример #8
0
def get_magic_bytes(folder):
    files = utility.get_all_files_in_directory(folder)
    data = {}
    for json_file in files:
        try:
            with open(json_file) as file1:
                magic_bytes = get_magic_bytes_from_file(file1)
                pure_name = file1.name.split('/')[-1]
                data[pure_name] = magic_bytes
        except Exception as e:
            sys.stderr.write(e.message)
            sys.stderr.write('\n')
    json_data = json.dumps(data, indent=4)
    print(json_data)
    return
Пример #9
0
def generate_files_from_directory(path_to_dir):
    files_to_process = utility.get_all_files_in_directory(path_to_dir)
    destination_folder = 'geo-topic-parser-folder'
    if not os.path.exists(destination_folder):
        os.makedirs(destination_folder)

    dir_name = os.path.basename(os.path.normpath(path_to_dir))

    with open(destination_folder + '/geo-topic-' + dir_name + '-files.txt', 'w') as output_file2:
        for line in files_to_process:
            output_file2.write(line)
            output_file2.write('\n')

    # with open(path_to_dir + '_allfiles.txt', 'w') as output_file:
    #     for line in files_to_process:
    #         output_file.write(line)
    #         output_file.write('\n')
    return
Пример #10
0
def generate_files_from_directory(path_to_dir):
    files_to_process = utility.get_all_files_in_directory(path_to_dir)
    destination_folder = 'geo-topic-parser-folder'
    if not os.path.exists(destination_folder):
        os.makedirs(destination_folder)

    dir_name = os.path.basename(os.path.normpath(path_to_dir))

    with open(destination_folder + '/geo-topic-' + dir_name + '-files.txt',
              'w') as output_file2:
        for line in files_to_process:
            output_file2.write(line)
            output_file2.write('\n')

    # with open(path_to_dir + '_allfiles.txt', 'w') as output_file:
    #     for line in files_to_process:
    #         output_file.write(line)
    #         output_file.write('\n')
    return
Пример #11
0
def reverse(dir_list, output_name):

    with open(output_name, 'w') as output_file:

        files = []
        for entry in dir_list:
            files.extend(get_all_files_in_directory(entry))

        json_data = {}
        for entry in files:
            with open(entry) as input_json:
                json_data.update(json.load(input_json))
        print(len(json_data))
        filename_sweet_dictionary = {}
        for key in json_data.keys():
            for filename in json_data[key].keys():
                if filename not in filename_sweet_dictionary:
                    filename_sweet_dictionary[filename] = {}
                filename_sweet_dictionary[filename][key] = 1
        print(len(filename_sweet_dictionary))
        # output_file.write(json.dumps(filename_sweet_dictionary))

    return
Пример #12
0
def reverse(dir_list, output_name):

    with open(output_name, 'w') as output_file:

        files = []
        for entry in dir_list:
            files.extend(get_all_files_in_directory(entry))

        json_data = {}
        for entry in files:
            with open(entry) as input_json:
                json_data.update(json.load(input_json))
        print(len(json_data))
        filename_sweet_dictionary = {}
        for key in json_data.keys():
            for filename in json_data[key].keys():
                if filename not in filename_sweet_dictionary:
                    filename_sweet_dictionary[filename] = {}
                filename_sweet_dictionary[filename][key] = 1
        print(len(filename_sweet_dictionary))
        # output_file.write(json.dumps(filename_sweet_dictionary))

    return
Пример #13
0
def get_file_list(dir_list):
    result_list = []
    for entry in dir_list:
        result_list.extend(utility.get_all_files_in_directory(entry))

    return result_list
    def get_measurement_objects_from(self, directory):
        files = get_all_files_in_directory(directory)

        for entry in files:
            with open(entry) as input_file:
                self.measurement_list.extend(json.load(input_file))
def test():

    doi = sys.argv[2]
    doiDict = {}
    with open(doi, "r") as f:
        for line in f:
            key = line.split('/')[-1]
            doiDict[key.rstrip('\r\n')] = line.rstrip('\r\n')
    #with open("doiDict.json", 'w') as json_file:
    #    json.dump(doiDict, json_file, indent = 4)

    time1 = datetime.datetime.now()

    measurement_storage = MeasurementStorage(
        '/media/ravirajukrishna/My Passport/yao_files/measurement/')
    sweet_storage = SweetStorage(
        '/media/ravirajukrishna/My Passport/yao_files/sweet/filename-sweet.json'
    )

    geo_topic_storage = GeoTopicStorage(
        '/media/ravirajukrishna/My Passport/yao_geoData_Large/')
    geo_topic_storage.get_geo_topic_from_single_json(
        '/media/ravirajukrishna/My Passport/yao_geoData/')

    total_dictionary = dict()

    while measurement_storage.has_next_measurement_object():
        filename, measurement_metadata = measurement_storage.get_next_measurement_object(
        )
        if filename not in total_dictionary:
            if filename in doiDict:
                total_dictionary[filename] = {"doi": doiDict[filename]}
            else:
                total_dictionary[filename] = {"doi": filename}
        total_dictionary[filename].update(measurement_metadata)
    print "parsed measurement_storage"

    while sweet_storage.has_next_sweet_object():
        filename, sweet_metadata = sweet_storage.get_next_sweet_object()
        if filename not in total_dictionary:
            if filename in doiDict:
                total_dictionary[filename] = {"doi": doiDict[filename]}
            else:
                total_dictionary[filename] = {"doi": filename}
        total_dictionary[filename].update(sweet_metadata)
    print "parsed sweet_storage"

    while geo_topic_storage.has_next_geo_topic_object():
        filename, geo_topic_metadata = geo_topic_storage.get_next_geo_topic_object(
        )
        if filename not in total_dictionary:
            if filename in doiDict:
                total_dictionary[filename] = {"doi": doiDict[filename]}
            else:
                total_dictionary[filename] = {"doi": filename}
        total_dictionary[filename].update(geo_topic_metadata)
    print "parsed geo_topic_storage"

    input_dir = sys.argv[1]
    input_files = get_all_files_in_directory(input_dir)
    input_files_set = set(input_files)
    for ip_file in input_files_set:
        #print ip_file
        key = ip_file.split('/')[-1]
        filename_part = key.rstrip(".json")
        #print filename_part
        with open(ip_file) as json_file:
            jsondict = json.load(json_file)
            #print json.dumps(jsondict, indent=4)
            if filename_part not in total_dictionary:
                total_dictionary[filename_part] = {}
            total_dictionary[filename_part].update(jsondict)
    print "parsed grobid_scholar"

    #final write to each json file
    i = 1
    existing_files_set = set(total_dictionary.keys())
    for key in existing_files_set:
        filename = "/media/ravirajukrishna/My Passport/result_json/" + key + ".json"
        i = i + 1
        print i,
        print ":",
        print filename
        with open(filename, 'w') as json_file:
            json.dump(total_dictionary[key], json_file, indent=4)

    time2 = datetime.datetime.now()
    print(time2 - time1)

    return
def function():
    file_list = utility.get_all_files_in_directory('/Users/Frank/PycharmProjects/599assignment1/hw3/visualization/spectrum-grouped-by-domain/')
    for entry in file_list:
        print(os.path.basename(entry))
    def get_measurement_objects_from(self, directory):
        files = get_all_files_in_directory(directory)

        for entry in files:
            with open(entry) as input_file:
                self.measurement_list.extend(json.load(input_file))
def test():
        
    doi = sys.argv[2]
    doiDict = {}
    with open(doi, "r") as f:
        for line in f:
            key = line.split('/')[-1]
            doiDict[key.rstrip('\r\n')] = line.rstrip('\r\n')
    #with open("doiDict.json", 'w') as json_file:
    #    json.dump(doiDict, json_file, indent = 4)
		
    time1 = datetime.datetime.now()
    
    measurement_storage = MeasurementStorage('/media/ravirajukrishna/My Passport/yao_files/measurement/')
    sweet_storage 		= SweetStorage('/media/ravirajukrishna/My Passport/yao_files/sweet/filename-sweet.json')
    
    geo_topic_storage = GeoTopicStorage('/media/ravirajukrishna/My Passport/yao_geoData_Large/')    
    geo_topic_storage.get_geo_topic_from_single_json('/media/ravirajukrishna/My Passport/yao_geoData/')
    
   
    total_dictionary = dict()
		
    while measurement_storage.has_next_measurement_object():
        filename, measurement_metadata = measurement_storage.get_next_measurement_object()
        if filename not in total_dictionary:
            if filename in doiDict:
				total_dictionary[filename] = {"doi" : doiDict[filename]}
            else:
				total_dictionary[filename] = {"doi" : filename}			
        total_dictionary[filename].update(measurement_metadata)
    print "parsed measurement_storage"
    
    while sweet_storage.has_next_sweet_object():
        filename, sweet_metadata = sweet_storage.get_next_sweet_object()
        if filename not in total_dictionary:
            if filename in doiDict:
				total_dictionary[filename] = {"doi" : doiDict[filename]}
            else:
				total_dictionary[filename] = {"doi" : filename}	
        total_dictionary[filename].update(sweet_metadata)
    print "parsed sweet_storage"
        
    while geo_topic_storage.has_next_geo_topic_object():
        filename, geo_topic_metadata = geo_topic_storage.get_next_geo_topic_object()
        if filename not in total_dictionary:
            if filename in doiDict:
				total_dictionary[filename] = {"doi" : doiDict[filename]}
            else:
				total_dictionary[filename] = {"doi" : filename}	
        total_dictionary[filename].update(geo_topic_metadata)
    print "parsed geo_topic_storage"

    input_dir = sys.argv[1]
    input_files = get_all_files_in_directory(input_dir)
    input_files_set = set(input_files)
    for ip_file in input_files_set:
        #print ip_file
        key = ip_file.split('/')[-1]
        filename_part = key.rstrip(".json")
        #print filename_part
        with open(ip_file) as json_file:
            jsondict = json.load(json_file)
            #print json.dumps(jsondict, indent=4)
            if filename_part not in total_dictionary:
                total_dictionary[filename_part] = {}
            total_dictionary[filename_part].update(jsondict)
    print "parsed grobid_scholar"    
    
    #final write to each json file
    i=1
    existing_files_set = set(total_dictionary.keys())	
    for key in existing_files_set:
        filename = "/media/ravirajukrishna/My Passport/result_json/" + key + ".json"
        i = i+1
        print i, 
        print ":",
        print filename
        with open(filename,'w') as json_file:
            json.dump(total_dictionary[key], json_file, indent = 4)



    time2 = datetime.datetime.now()
    print(time2 - time1)


    return