def find_similar_locations_for_given_model(self, location_id, k, model):
        loc_id_key_map = DBUtils.create_location_id_key_map(self._db_operations)
        location_key = loc_id_key_map[location_id]
        primary_df = self.create_concatenated_and_normalised_data_frame_for_model(model, True)
        most_similar_locations = []
        primary_loc_data_frame = primary_df[primary_df["location"] == location_id]

        for id in loc_id_key_map:
            if id != location_id:
                arg_loc_key = loc_id_key_map[id]
                arg_data_frame = primary_df[primary_df["location"] == id]

                loc_similarity = self.get_distance_measure_and_similarity_for_data_frames(arg_loc_key, primary_loc_data_frame, arg_data_frame)

                most_similar_locations_len = len(most_similar_locations)

                if most_similar_locations_len < k:
                    most_similar_locations.append(loc_similarity)
                    most_similar_locations = sorted(most_similar_locations,
                           key=lambda location: location.weighted_distance)
                elif most_similar_locations[k-1].weighted_distance > loc_similarity.weighted_distance:
                    most_similar_locations = most_similar_locations[:k-1]
                    most_similar_locations.append(loc_similarity)
                    most_similar_locations = sorted(most_similar_locations,
                           key=lambda location: location.weighted_distance)

        print("Input location is {0}".format(location_key))
        self.display_result_for_model(most_similar_locations)
Пример #2
0
    def create_concatenated_and_normalised_data_frame_for_model(self, model,input_option):
        """ Concatenate data frame for all locations for a given model
            Parameters
            ----------
            model : model given by user
            input_option : int
                           Type of reduction algorithm 1.PCA 2.SVD 3.LDA

            Returns
            -------
            primary_df : For PCA and LDA, it returns normalised dataframe
                         For SVD dataframe with all locations for a given model
        """
        loc_id_key_map = DBUtils.create_location_id_key_map(self._db_operations)
        primary_df = None
        for id in loc_id_key_map:
            loc_key = loc_id_key_map[id]
            file_name = self.get_file_name_from_input(loc_key, model)
            if primary_df is None:
                primary_df = self.get_data_frame(file_name)
                primary_df.insert(1, "locationId", value=id)
            else:
                data_frame_to_add = self.get_data_frame(file_name)
                data_frame_to_add.insert(1, "locationId", value=id)
                primary_df = pd.concat([primary_df, data_frame_to_add], axis=0, sort=False)
        return primary_df if input_option ==2 else self.normalise_methodformodel[input_option] (primary_df) #if not SVD .then normalise
Пример #3
0
    def reduce_dimensions_given_Location_Model(self, input_param, model, entity_id, k):
        """ Gives 5 related location for a given model and image id  after preforming dimensionality reduction to k latent semantics
            Parameters
            ----------
            vector_space : Original Object Feature Martix
             input_param: int
                           Reduction algorithm given by the user 1.PCA 2.SVD 3.LDA
             model: model given by user
             k : int
                 Number of latent semantics to be which matrix has to be reduced(given by user)
             entity_id: 5
                  Location id given by the users

            Returns
            -------
            reduced_dimensions, post_projection_vectors, loc_id_key_map
            Gives 5 related  locations for a given model and location id
        """

        loc_id_key_map = DBUtils.create_location_id_key_map(self._db_operations)
        vector_space = self.create_concatenated_and_normalized_data_frame_for_a_location_model(entity_id,
                                                                                               input_param, model
                                                                                               )
        (reduced_dimensions, VT) = self.reduction_method[input_param](vector_space, k)
        post_projection_vectors = self.project_data_onto_new_dimensions(entity_id, len(loc_id_key_map), VT, 4, model,
                                                                        input_param)
        return reduced_dimensions, post_projection_vectors, loc_id_key_map
 def visualize_with_ids(self, image_id_loc):
     loc_id_key_map = DBUtils.create_location_id_key_map(self.database_ops)
     image_list = []
     for i in image_id_loc:
         location_key = loc_id_key_map[i['loc']]
         image_list.append(self.img_path + location_key + "/" +
                           str(i['imageId']) + "." + self.format)
     image_viewer = ImageViewerMain()
     image_viewer.start_image_viewer(image_list)
 def prepare_file_list(self, image_indexes, obj_index):
     loc_id_key_map = DBUtils.create_location_id_key_map(self.database_ops)
     file_list = []
     for image_index in image_indexes:
         image_tuple = obj_index.iloc[image_index]
         location_id = image_tuple["location"]
         location_key = loc_id_key_map[location_id]
         image_id = image_tuple[0]
         file_list.append(self.img_path + location_key + "/" +
                          str(image_id) + "." + self.format)
     return file_list
Пример #6
0
 def create_concatenated_and_normalized_data_frame_for_a_location(self, location_id, input_option, model=None):
     loc_id_key_map = DBUtils.create_location_id_key_map(self._db_operations)
     location_key = loc_id_key_map[int(location_id)]
     primary_data_frames_by_model = pd.DataFrame()
     for model in self.get_visual_model_types():
         file_name = self.get_file_name_from_input(location_key, model.name)
         data_frame_to_add = self.get_data_frame(file_name)
         data_frame_to_add.drop(data_frame_to_add.columns[0], axis=1, inplace=True)
         primary_data_frames_by_model = pd.concat([primary_data_frames_by_model, data_frame_to_add], ignore_index= True, axis=1,
                                                  sort=False)
     return primary_data_frames_by_model if input_option == 2 else self.normalise_method[input_option](
         primary_data_frames_by_model)  # if not SVD .then normalise
 def create_concatenated_and_normalised_data_frame_for_model(self, model, normalise=False):
     loc_id_key_map = DBUtils.create_location_id_key_map(self._db_operations)
     primary_df = None
     for id in loc_id_key_map:
         loc_key = loc_id_key_map[id]
         file_name = self.get_file_name_from_input(loc_key, model)
         if primary_df is None:
             primary_df = self.get_data_frame(file_name)
             primary_df.insert(1, "location", value=id)
         else:
             data_frame_to_add = self.get_data_frame(file_name)
             data_frame_to_add.insert(1, "location", value=id)
             primary_df = pd.concat([primary_df, data_frame_to_add], axis=0,ignore_index=True, sort=False)
     return primary_df if not normalise else self.normalise_data_frame(primary_df)
    def find_similar_locations_for_all_models(self, location_id, k):
        loc_id_key_map = DBUtils.create_location_id_key_map(self._db_operations)
        location_key = loc_id_key_map[location_id]
        primary_data_frames_by_model = {}

        most_similar_locations = []
        models = self.get_visual_model_types()

        for model in models:
            file_name = self.get_file_name_from_input(location_key, model.name)
            primary_data_frames_by_model[model.name] = self.get_normalised_data_frame(file_name)

        for id in loc_id_key_map:
            arg_loc_key = loc_id_key_map[id]
            loc_similarity_for_models = []
            total_distance_for_all_models = 0
            if id != location_id:
                for model in models:
                    primary_df = primary_data_frames_by_model[model.name]
                    arg_data_frame = self.get_normalised_data_frame(self.get_file_name_from_input(arg_loc_key, model.name))
                    loc_similarity_for_model = self.get_distance_measure_for_data_frames(arg_loc_key, primary_df, arg_data_frame)
                    loc_similarity_for_model.model = model.name
                    loc_similarity_for_models.append(loc_similarity_for_model)
                    similarity_contribution = loc_similarity_for_model.weighted_distance/model.dimensions;
                    total_distance_for_all_models += similarity_contribution

                most_similar_locations_count = len(most_similar_locations)

                if most_similar_locations_count < k:
                    most_similar_locations.append(TotalLocationSimilarity(arg_loc_key, total_distance_for_all_models,
                                                                          loc_similarity_for_models))
                    most_similar_locations = sorted(most_similar_locations, key=lambda location: location.distance)
                elif most_similar_locations[k - 1].distance > total_distance_for_all_models:
                    most_similar_locations = most_similar_locations[:k - 1]
                    most_similar_locations.append(TotalLocationSimilarity(arg_loc_key, total_distance_for_all_models,
                                                                          loc_similarity_for_models))
                    most_similar_locations = sorted(most_similar_locations, key=lambda location: location.distance)


        print("Input location is {0}".format(location_key))
        self.display_result_for_all_models(most_similar_locations);
        return None
Пример #9
0
    def create_concatenated_and_normalized_data_frame_for_a_location_model(self, location_id, input_option, model):
        """ Get data frame for a given location for a given model
            Parameters
            ----------
            model : model given by user
            location_id : int
                           Location id given by user

            Returns
            -------
            data_frame_to_add : Data frame for a given location for a given model
        """
        loc_id_key_map = DBUtils.create_location_id_key_map(self._db_operations)
        location_key = loc_id_key_map[int(location_id)]
        primary_data_frames_by_model = pd.DataFrame()
        file_name = self.get_file_name_from_input(location_key, model)
        data_frame_to_add = self.get_data_frame(file_name)
        data_frame_to_add.drop(data_frame_to_add.columns[0], axis=1, inplace=True)
        return data_frame_to_add if input_option == 2 else self.normalise_method[input_option](
            data_frame_to_add)
Пример #10
0
    def find_similar_locations(self, location_id, model, k):
        get_terms_query = "select term, {1} from termsPerLocation where locationId = \"{0}\"".format(
            location_id, model)
        source_word_dict = {}

        get_terms_query_result = self._database_operations.executeSelectQuery(
            get_terms_query)
        conversion_func = self.get_conversion_func(model)
        for item in get_terms_query_result:
            source_word_dict[item[0]] = conversion_func(item[1])

        join_query_result = "select te.locationId,te.term,te.{0}, te1.locationId, te1.term, te1.{0} from (select te2.locationId, te2.term, te2.{0} from termsPerLocation te2 where locationId <> {1})" \
                            " te LEFT JOIN (select locationId, term, {0} from termsPerLocation where locationId = {1}) te1 on te1.term=te.term;".format(
            model, location_id)
        result = self._database_operations.executeSelectQuery(
            join_query_result)
        result = self.process_text_result_sets(result, k, source_word_dict,
                                               self.get_conversion_func(model))
        location_map = DBUtils.create_location_id_key_map(
            self._database_operations)
        self.display_location_result(result, location_map)
Пример #11
0
    def reduce_dimensions_givenmodel(self,input_option, model, k, count):
          """ Gives 5 related images and location for a given model and image id  after preforming dimensionality reduction
            Parameters
            ----------
            vector_space : Original Object Feature Martix
             input_option: int
                           Reduction algorithm given by the user 1.PCA 2.SVD 3.LDA
             model: model given by user
             k : int
                 Number of latent semantics to be which matrix has to be reduced(given by user)
             count: 5
                  Given in task 3

            Returns
            -------
            Gives 5 related images and 5 related locations for a given model and image id
          """
          loc_id_key_map = DBUtils.create_location_id_key_map(self._db_operations)
          input_method = int(input_option)
          count=int(count)
          vector_space=self.create_concatenated_and_normalised_data_frame_for_model(model,input_method)
          vector_space=vector_space.rename(columns={ vector_space.columns[0]: "image" })
          vector_space = vector_space.sort_values(['locationId','image'], ascending=[True,True])       # sort based on loc and img
          vector_space.reset_index(drop=True, inplace=True)
          (latent_semantics_matrix,VT)=self.reduction_method[input_method](vector_space.iloc[:,2:],k)
          latent_semantics=pd.DataFrame(latent_semantics_matrix)
          print("Latent semantics are")
          print(pd.DataFrame(VT))
          latent_semantics.reset_index(drop=True, inplace=True)
          reduced_space = pd.concat([vector_space.iloc[:,:2], latent_semantics], axis=1)
          print("Enter Image ID to search")
          image_id = int(input())
          (ImageMatrix,LocationMateix)=DistanceUtils.find_similar_images_locations_for_given_model(image_id, k, model, reduced_space,count,loc_id_key_map)
          df_loc_id_key_map = pd.DataFrame(list(loc_id_key_map.items()), columns=['locationId', 'locationKey'])
          ImageMatrix = pd.DataFrame(pd.merge(ImageMatrix,df_loc_id_key_map,on='locationId',how='left'))
          LocationMateix=pd.DataFrame(pd.merge(LocationMateix,df_loc_id_key_map,on='locationId',how='left'))
          print("5 related Images are")
          print(ImageMatrix.loc[:,['image','locationKey','dist']])
          print("5 related locations are")
          print(LocationMateix.loc[:,['locationKey','dist','locationId']])
 def get_location_location_similarity_matrix_and_reduce(self, k):
     """ Creates a location location similarity matrix based on cosine and reduces it
         Parameters
         ----------
         k : int
         number of dimensions to be reduced to
         Returns
         -------
         void
     """
     (vector_space, object_index_dict,
      term_index_dict) = self.get_vector_space(3, "TF_IDF", True)
     np_vector_space = np.array(vector_space)
     distance_matrix = distance.cdist(np_vector_space,
                                      np_vector_space,
                                      metric='cosine')
     subtract_func = lambda t: 1 - t
     vfunc = np.vectorize(subtract_func)
     distance_matrix = vfunc(distance_matrix)
     (reduced_dimensions,
      projection_direction) = self.reduction_method[2](distance_matrix, k)
     location_index = DBUtils.create_location_id_key_map(
         self._database_operations)
     self.display_topics(projection_direction, location_index)
Пример #13
0
 def reduce_dimensions(self, input_param, data_option, entity_id, k):
     loc_id_key_map = DBUtils.create_location_id_key_map(self._db_operations)
     vector_space = self.data_load_option[data_option](entity_id, input_param)
     (reduced_dimensions, VT) = self.reduction_method[input_param](vector_space, k)
     post_projection_vectors = self.project_data_onto_new_dimensions(entity_id, len(loc_id_key_map), VT,5,None, input_param ) #Model=None
     return reduced_dimensions, post_projection_vectors, loc_id_key_map