def find_similar_locations_for_given_model(self, location_id, k, model):
        loc_id_key_map = DBUtils.create_location_id_key_map(self._db_operations)
        location_key = loc_id_key_map[location_id]
        primary_df = self.create_concatenated_and_normalised_data_frame_for_model(model, True)
        most_similar_locations = []
        primary_loc_data_frame = primary_df[primary_df["location"] == location_id]

        for id in loc_id_key_map:
            if id != location_id:
                arg_loc_key = loc_id_key_map[id]
                arg_data_frame = primary_df[primary_df["location"] == id]

                loc_similarity = self.get_distance_measure_and_similarity_for_data_frames(arg_loc_key, primary_loc_data_frame, arg_data_frame)

                most_similar_locations_len = len(most_similar_locations)

                if most_similar_locations_len < k:
                    most_similar_locations.append(loc_similarity)
                    most_similar_locations = sorted(most_similar_locations,
                           key=lambda location: location.weighted_distance)
                elif most_similar_locations[k-1].weighted_distance > loc_similarity.weighted_distance:
                    most_similar_locations = most_similar_locations[:k-1]
                    most_similar_locations.append(loc_similarity)
                    most_similar_locations = sorted(most_similar_locations,
                           key=lambda location: location.weighted_distance)

        print("Input location is {0}".format(location_key))
        self.display_result_for_model(most_similar_locations)
Пример #2
0
    def reduce_dimensions_given_Location_Model(self, input_param, model, entity_id, k):
        """ Gives 5 related location for a given model and image id  after preforming dimensionality reduction to k latent semantics
            Parameters
            ----------
            vector_space : Original Object Feature Martix
             input_param: int
                           Reduction algorithm given by the user 1.PCA 2.SVD 3.LDA
             model: model given by user
             k : int
                 Number of latent semantics to be which matrix has to be reduced(given by user)
             entity_id: 5
                  Location id given by the users

            Returns
            -------
            reduced_dimensions, post_projection_vectors, loc_id_key_map
            Gives 5 related  locations for a given model and location id
        """

        loc_id_key_map = DBUtils.create_location_id_key_map(self._db_operations)
        vector_space = self.create_concatenated_and_normalized_data_frame_for_a_location_model(entity_id,
                                                                                               input_param, model
                                                                                               )
        (reduced_dimensions, VT) = self.reduction_method[input_param](vector_space, k)
        post_projection_vectors = self.project_data_onto_new_dimensions(entity_id, len(loc_id_key_map), VT, 4, model,
                                                                        input_param)
        return reduced_dimensions, post_projection_vectors, loc_id_key_map
Пример #3
0
    def create_concatenated_and_normalised_data_frame_for_model(self, model,input_option):
        """ Concatenate data frame for all locations for a given model
            Parameters
            ----------
            model : model given by user
            input_option : int
                           Type of reduction algorithm 1.PCA 2.SVD 3.LDA

            Returns
            -------
            primary_df : For PCA and LDA, it returns normalised dataframe
                         For SVD dataframe with all locations for a given model
        """
        loc_id_key_map = DBUtils.create_location_id_key_map(self._db_operations)
        primary_df = None
        for id in loc_id_key_map:
            loc_key = loc_id_key_map[id]
            file_name = self.get_file_name_from_input(loc_key, model)
            if primary_df is None:
                primary_df = self.get_data_frame(file_name)
                primary_df.insert(1, "locationId", value=id)
            else:
                data_frame_to_add = self.get_data_frame(file_name)
                data_frame_to_add.insert(1, "locationId", value=id)
                primary_df = pd.concat([primary_df, data_frame_to_add], axis=0, sort=False)
        return primary_df if input_option ==2 else self.normalise_methodformodel[input_option] (primary_df) #if not SVD .then normalise
 def visualize_with_ids(self, image_id_loc):
     loc_id_key_map = DBUtils.create_location_id_key_map(self.database_ops)
     image_list = []
     for i in image_id_loc:
         location_key = loc_id_key_map[i['loc']]
         image_list.append(self.img_path + location_key + "/" +
                           str(i['imageId']) + "." + self.format)
     image_viewer = ImageViewerMain()
     image_viewer.start_image_viewer(image_list)
 def prepare_file_list(self, image_indexes, obj_index):
     loc_id_key_map = DBUtils.create_location_id_key_map(self.database_ops)
     file_list = []
     for image_index in image_indexes:
         image_tuple = obj_index.iloc[image_index]
         location_id = image_tuple["location"]
         location_key = loc_id_key_map[location_id]
         image_id = image_tuple[0]
         file_list.append(self.img_path + location_key + "/" +
                          str(image_id) + "." + self.format)
     return file_list
Пример #6
0
 def create_concatenated_and_normalized_data_frame_for_a_location(self, location_id, input_option, model=None):
     loc_id_key_map = DBUtils.create_location_id_key_map(self._db_operations)
     location_key = loc_id_key_map[int(location_id)]
     primary_data_frames_by_model = pd.DataFrame()
     for model in self.get_visual_model_types():
         file_name = self.get_file_name_from_input(location_key, model.name)
         data_frame_to_add = self.get_data_frame(file_name)
         data_frame_to_add.drop(data_frame_to_add.columns[0], axis=1, inplace=True)
         primary_data_frames_by_model = pd.concat([primary_data_frames_by_model, data_frame_to_add], ignore_index= True, axis=1,
                                                  sort=False)
     return primary_data_frames_by_model if input_option == 2 else self.normalise_method[input_option](
         primary_data_frames_by_model)  # if not SVD .then normalise
 def create_concatenated_and_normalised_data_frame_for_model(self, model, normalise=False):
     loc_id_key_map = DBUtils.create_location_id_key_map(self._db_operations)
     primary_df = None
     for id in loc_id_key_map:
         loc_key = loc_id_key_map[id]
         file_name = self.get_file_name_from_input(loc_key, model)
         if primary_df is None:
             primary_df = self.get_data_frame(file_name)
             primary_df.insert(1, "location", value=id)
         else:
             data_frame_to_add = self.get_data_frame(file_name)
             data_frame_to_add.insert(1, "location", value=id)
             primary_df = pd.concat([primary_df, data_frame_to_add], axis=0,ignore_index=True, sort=False)
     return primary_df if not normalise else self.normalise_data_frame(primary_df)
Пример #8
0
    def process_desctxt_files(self):

        text_processor = TextFileProcessor(self._base_path)
        xml_processor = XmlFileProcessor(self._base_path)

        xml_processor.parse_xml(self._devset_topics,
                                self.process_devset_topics_xml)
        queries = text_processor.process_text_file(
            self._textTermsPerUserFile, self.process_text_terms_per_user)
        queries += text_processor.process_text_file(
            self._textTermsPerImageFile, self.process_text_terms_per_image)
        queries += text_processor.process_text_file(
            self._textTermsPerPOIwFolderNamesFile,
            self.process_text_terms_per_POI,
            DBUtils.create_location_key_id_map(self._database_operations))
        self._database_operations.executeWriteQueries(queries)
    def find_similar_locations_for_all_models(self, location_id, k):
        loc_id_key_map = DBUtils.create_location_id_key_map(self._db_operations)
        location_key = loc_id_key_map[location_id]
        primary_data_frames_by_model = {}

        most_similar_locations = []
        models = self.get_visual_model_types()

        for model in models:
            file_name = self.get_file_name_from_input(location_key, model.name)
            primary_data_frames_by_model[model.name] = self.get_normalised_data_frame(file_name)

        for id in loc_id_key_map:
            arg_loc_key = loc_id_key_map[id]
            loc_similarity_for_models = []
            total_distance_for_all_models = 0
            if id != location_id:
                for model in models:
                    primary_df = primary_data_frames_by_model[model.name]
                    arg_data_frame = self.get_normalised_data_frame(self.get_file_name_from_input(arg_loc_key, model.name))
                    loc_similarity_for_model = self.get_distance_measure_for_data_frames(arg_loc_key, primary_df, arg_data_frame)
                    loc_similarity_for_model.model = model.name
                    loc_similarity_for_models.append(loc_similarity_for_model)
                    similarity_contribution = loc_similarity_for_model.weighted_distance/model.dimensions;
                    total_distance_for_all_models += similarity_contribution

                most_similar_locations_count = len(most_similar_locations)

                if most_similar_locations_count < k:
                    most_similar_locations.append(TotalLocationSimilarity(arg_loc_key, total_distance_for_all_models,
                                                                          loc_similarity_for_models))
                    most_similar_locations = sorted(most_similar_locations, key=lambda location: location.distance)
                elif most_similar_locations[k - 1].distance > total_distance_for_all_models:
                    most_similar_locations = most_similar_locations[:k - 1]
                    most_similar_locations.append(TotalLocationSimilarity(arg_loc_key, total_distance_for_all_models,
                                                                          loc_similarity_for_models))
                    most_similar_locations = sorted(most_similar_locations, key=lambda location: location.distance)


        print("Input location is {0}".format(location_key))
        self.display_result_for_all_models(most_similar_locations);
        return None
Пример #10
0
    def create_concatenated_and_normalized_data_frame_for_a_location_model(self, location_id, input_option, model):
        """ Get data frame for a given location for a given model
            Parameters
            ----------
            model : model given by user
            location_id : int
                           Location id given by user

            Returns
            -------
            data_frame_to_add : Data frame for a given location for a given model
        """
        loc_id_key_map = DBUtils.create_location_id_key_map(self._db_operations)
        location_key = loc_id_key_map[int(location_id)]
        primary_data_frames_by_model = pd.DataFrame()
        file_name = self.get_file_name_from_input(location_key, model)
        data_frame_to_add = self.get_data_frame(file_name)
        data_frame_to_add.drop(data_frame_to_add.columns[0], axis=1, inplace=True)
        return data_frame_to_add if input_option == 2 else self.normalise_method[input_option](
            data_frame_to_add)
Пример #11
0
    def find_similar_locations(self, location_id, model, k):
        get_terms_query = "select term, {1} from termsPerLocation where locationId = \"{0}\"".format(
            location_id, model)
        source_word_dict = {}

        get_terms_query_result = self._database_operations.executeSelectQuery(
            get_terms_query)
        conversion_func = self.get_conversion_func(model)
        for item in get_terms_query_result:
            source_word_dict[item[0]] = conversion_func(item[1])

        join_query_result = "select te.locationId,te.term,te.{0}, te1.locationId, te1.term, te1.{0} from (select te2.locationId, te2.term, te2.{0} from termsPerLocation te2 where locationId <> {1})" \
                            " te LEFT JOIN (select locationId, term, {0} from termsPerLocation where locationId = {1}) te1 on te1.term=te.term;".format(
            model, location_id)
        result = self._database_operations.executeSelectQuery(
            join_query_result)
        result = self.process_text_result_sets(result, k, source_word_dict,
                                               self.get_conversion_func(model))
        location_map = DBUtils.create_location_id_key_map(
            self._database_operations)
        self.display_location_result(result, location_map)
Пример #12
0
    def reduce_dimensions_givenmodel(self,input_option, model, k, count):
          """ Gives 5 related images and location for a given model and image id  after preforming dimensionality reduction
            Parameters
            ----------
            vector_space : Original Object Feature Martix
             input_option: int
                           Reduction algorithm given by the user 1.PCA 2.SVD 3.LDA
             model: model given by user
             k : int
                 Number of latent semantics to be which matrix has to be reduced(given by user)
             count: 5
                  Given in task 3

            Returns
            -------
            Gives 5 related images and 5 related locations for a given model and image id
          """
          loc_id_key_map = DBUtils.create_location_id_key_map(self._db_operations)
          input_method = int(input_option)
          count=int(count)
          vector_space=self.create_concatenated_and_normalised_data_frame_for_model(model,input_method)
          vector_space=vector_space.rename(columns={ vector_space.columns[0]: "image" })
          vector_space = vector_space.sort_values(['locationId','image'], ascending=[True,True])       # sort based on loc and img
          vector_space.reset_index(drop=True, inplace=True)
          (latent_semantics_matrix,VT)=self.reduction_method[input_method](vector_space.iloc[:,2:],k)
          latent_semantics=pd.DataFrame(latent_semantics_matrix)
          print("Latent semantics are")
          print(pd.DataFrame(VT))
          latent_semantics.reset_index(drop=True, inplace=True)
          reduced_space = pd.concat([vector_space.iloc[:,:2], latent_semantics], axis=1)
          print("Enter Image ID to search")
          image_id = int(input())
          (ImageMatrix,LocationMateix)=DistanceUtils.find_similar_images_locations_for_given_model(image_id, k, model, reduced_space,count,loc_id_key_map)
          df_loc_id_key_map = pd.DataFrame(list(loc_id_key_map.items()), columns=['locationId', 'locationKey'])
          ImageMatrix = pd.DataFrame(pd.merge(ImageMatrix,df_loc_id_key_map,on='locationId',how='left'))
          LocationMateix=pd.DataFrame(pd.merge(LocationMateix,df_loc_id_key_map,on='locationId',how='left'))
          print("5 related Images are")
          print(ImageMatrix.loc[:,['image','locationKey','dist']])
          print("5 related locations are")
          print(LocationMateix.loc[:,['locationKey','dist','locationId']])
 def get_location_location_similarity_matrix_and_reduce(self, k):
     """ Creates a location location similarity matrix based on cosine and reduces it
         Parameters
         ----------
         k : int
         number of dimensions to be reduced to
         Returns
         -------
         void
     """
     (vector_space, object_index_dict,
      term_index_dict) = self.get_vector_space(3, "TF_IDF", True)
     np_vector_space = np.array(vector_space)
     distance_matrix = distance.cdist(np_vector_space,
                                      np_vector_space,
                                      metric='cosine')
     subtract_func = lambda t: 1 - t
     vfunc = np.vectorize(subtract_func)
     distance_matrix = vfunc(distance_matrix)
     (reduced_dimensions,
      projection_direction) = self.reduction_method[2](distance_matrix, k)
     location_index = DBUtils.create_location_id_key_map(
         self._database_operations)
     self.display_topics(projection_direction, location_index)
Пример #14
0
import requests
from lxml import etree
import time
from dbUtils import DBUtils
from multiprocessing import Pool

# 加入请求头
headers = {
    'User-Agent':
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36',
    'Connection': 'close'
}

# 创建数据库工具类
db = DBUtils()

# 数组,存放每页数据
# list_page_info = []

# 标志,是否第一次爬取数据
flag_first_access = True
# 存放基本属性名称
list_basic_attribute_name = []


def get_sub_links(url):
    try:
        res = requests.get(url, headers=headers, timeout=2)
    except (
            requests.Timeout,
            requests.ConnectTimeout,
Пример #15
0
from gsheets import GSheets
from configparser import ConfigParser
from datetime import datetime, timedelta

config = ConfigParser()
config.read('conf.ini')

API_ID = config['CONF']['API_ID']
API_HASH = config['CONF']['API_HASH']
PHONE_NUMBER = config['CONF']['PHONE_NUMBER_IN_INTERNATIONAL_FORMAT']
BOT_TOKEN = config['CONF']['BOT_TOKEN']
DB_URL = config['CONF']['DB_URL']

mongoClient = MongoClient(DB_URL)
db = mongoClient.telegramDB
dbUtils = DBUtils(db)
sheets = GSheets(db)

# Telethon client
client = TelegramClient(f'quart_{PHONE_NUMBER}', API_ID, API_HASH)
bot = TelegramClient('Bot', API_ID, API_HASH)
client.parse_mode = 'html'  # <- Render things nicely
bot.parse_mode = 'html'
botObject = None
clientObject = None
phone = None

# Quart app
app = Quart(__name__)
app.secret_key = 'CHANGE THIS TO SOMETHING SECRET'
logged_in = True
Пример #16
0
def get_shapes():
    return dumps(list(DBUtils().get_collection_obj("LOCAL", "shapes", "shape").find({})))
Пример #17
0
 def reduce_dimensions(self, input_param, data_option, entity_id, k):
     loc_id_key_map = DBUtils.create_location_id_key_map(self._db_operations)
     vector_space = self.data_load_option[data_option](entity_id, input_param)
     (reduced_dimensions, VT) = self.reduction_method[input_param](vector_space, k)
     post_projection_vectors = self.project_data_onto_new_dimensions(entity_id, len(loc_id_key_map), VT,5,None, input_param ) #Model=None
     return reduced_dimensions, post_projection_vectors, loc_id_key_map
Пример #18
0
def get_shape_defination():
    shape = request.args.get("shape")
    if shape is not None:
        return dumps(DBUtils().get_collection_obj("LOCAL", "shapes", "shape").find_one({"id": shape}))
    else:
        return "shape is a compulsory param", 404
Пример #19
0
def analyse_data():
    db = DBUtils()
    # 连接数据库
    db.db_connect()

    # 加载数据库中的指定数据
    db_query_data = db.db_get_info({
        '_id': 0,
        '建筑面积': 1,
        '房屋总价': 1,
        '装修情况': 1,
        '行政区域': 1
    })

    db.db_close()
    # 使用pandas将数据转为为数据集
    dataSet = pd.DataFrame(list(db_query_data))
    # print(dataSet)

    # 修改指定数据格式
    dataSet['建筑面积'] = dataSet['建筑面积'].str[:-1]

    # 将数据转为数组形式
    dataArr = np.array(dataSet)
    # 区域
    x_district = dataArr[:, 2]

    # 按区域分割数据集
    data_address1 = dataArr[x_district == '鼓楼', :]
    # 装修情况
    x1_decoration = data_address1[:, 3]

    # 按装修情况分割数据集
    dataset_address1_decoration = data_address1[x1_decoration == '毛坯', :]
    dataset_address2_decoration = data_address1[x1_decoration == '简装', :]
    dataset_address3_decoration = data_address1[x1_decoration == '精装', :]
    dataset_address4_decoration = data_address1[x1_decoration == '其他', :]
    # print(data_address1)

    # 面积与价格数据
    x1_area_d = dataset_address1_decoration[:, 0].astype('float')
    y1_price_d = dataset_address1_decoration[:, 1].astype('float')

    x2_area_d = dataset_address2_decoration[:, 0].astype('float')
    y2_price_d = dataset_address2_decoration[:, 1].astype('float')

    x3_area_d = dataset_address3_decoration[:, 0].astype('float')
    y3_price_d = dataset_address3_decoration[:, 1].astype('float')

    x4_area_d = dataset_address4_decoration[:, 0].astype('float')
    y4_price_d = dataset_address4_decoration[:, 1].astype('float')

    # 排序
    x1_sort = np.sort(x1_area_d)
    index = np.argsort(x1_area_d)
    y1_sort = [y1_price_d[i] for i in index]

    x2_sort = np.sort(x2_area_d)
    index = np.argsort(x2_area_d)
    y2_sort = [y2_price_d[i] for i in index]

    x3_sort = np.sort(x3_area_d)
    index = np.argsort(x3_area_d)
    y3_sort = [y3_price_d[i] for i in index]

    x4_sort = np.sort(x4_area_d)
    index = np.argsort(x4_area_d)
    y4_sort = [y4_price_d[i] for i in index]

    # print(x1_sort)

    # 绘图
    ax1 = plt.subplot2grid(shape=(2, 2), loc=(0, 0))
    ax1.scatter(x1_sort, y1_sort, s=10)
    ax1.set_ylabel('总价(万元)')
    ax1.set_xlabel('面积(平方米)')
    ax1.set_title('鼓楼区二手房-毛坯-建筑面积与房屋总价格关系图')

    ax2 = plt.subplot2grid(shape=(2, 2), loc=(0, 1))
    ax2.scatter(x2_sort, y2_sort, s=10)
    ax2.set_ylabel('总价(万元)')
    ax2.set_xlabel('面积(平方米)')
    ax2.set_title('鼓楼区二手房-简装-建筑面积与房屋总价格关系图')

    ax3 = plt.subplot2grid(shape=(2, 2), loc=(1, 0))
    ax3.scatter(x3_sort, y3_sort, s=10)
    ax3.set_ylabel('总价(万元)')
    ax3.set_xlabel('面积(平方米)')
    ax3.set_title('鼓楼区二手房-精装-建筑面积与房屋总价格关系图')

    ax4 = plt.subplot2grid(shape=(2, 2), loc=(1, 1))
    ax4.scatter(x4_sort, y4_sort, s=10)
    ax4.set_ylabel('总价(万元)')
    ax4.set_xlabel('面积(平方米)')
    ax4.set_title('鼓楼区二手房-其他-建筑面积与房屋总价格关系图')

    plt.rcParams['font.sans-serif'] = ['SimHei']  # 用来正常显示中文标签
    plt.rcParams['axes.unicode_minus'] = False  # 用来正常显示负号
    plt.subplots_adjust(hspace=0.6, wspace=0.5)
    plt.show()