示例#1
0
 def _create_table_if_not_exist(self, table_name: str) -> None:
     """
     Checks if the a given table exists,
     Creates fire_history table if not exist
     If it exists, the function does nothing
     :param table_name: name of the table to look for or create.
             Only 3 possible values: "fire_history", "fire", "fire_merged"
     """
     # check if the fire_history table exists
     # if the pipeline is run for the first time
     # then the fire_history table will not exist
     is_table_exist = table_name in self.existing_tables
     # table is the result of the statement: sql_check_if_history_table_exists
     if is_table_exist:
         logger.info(f"Found the {table_name} table, continue ...")
     else:
         # if table does not exist, create the table
         logger.info(f"No {table_name} exists. Creating a new one.")
         # choose the corresponding query to execute
         table_name_to_create_query = {
             "fire_history": FireDumper.SQL_CREATE_HISTORY_TABLE,
             "fire": FireDumper.SQL_CREATE_FIRE_TABLE,
             "fire_merged": FireDumper.SQL_CREATE_FIRE_MERGED_TABLE,
             "new_table_testing": "CREATE TABLE new_table_testing(tid int)"
         }[table_name]
         # execute the query to create the table in database
         Connection.sql_execute_commit(table_name_to_create_query)
         # add the table name into the global variable to keep consistency with the database
         self.existing_tables.add(table_name)
         logger.info(f"Table {table_name} created.")
    def insert(self, model_type: str, image_url: str, data: Tuple[float, float]) -> None:
        """
        Inserts image prediction result into images table.
        :param model_type: vgg or resnet for image classification model.
        :param image_url: image's url that identify each distinct image.
        :param data: image prediction result -- probability of being wildfire and not wildfire.
        """
        # extract data to be dumped
        prob_not_wildfire, prob_wildfire = data

        # for vgg model
        if model_type == ImageClassifier.VGG_MODEL:
            try:
                Connection().sql_execute_commit(
                    f"UPDATE images SET not_wildfire_prob = {prob_not_wildfire}, wildfire_prob = {prob_wildfire} "
                    f"WHERE image_url = {repr(image_url)}")

            except Exception:
                logger.error("error: " + traceback.format_exc())

        # for resnet model
        elif model_type == ImageClassifier.RESNET_MODEL:
            try:
                Connection().sql_execute_commit(
                    f"UPDATE images SET resnet_not_wildfire = {prob_not_wildfire}, resnet_wildfire = {prob_wildfire} "
                    f"WHERE image_url = {repr(image_url)}")

            except Exception:
                logger.error("error: " + traceback.format_exc())
        else:
            logger.error("Insertion fail. Please specify the model type to be vgg or resnet.")
示例#3
0
def send_fire_tweet_data2():
    """
        This func gives all historical tweets objects with id

        :returns: a list of tweet objects, each with time, lat, long, id
    """

    request_json = flask_request.get_json(force=True)
    old_north = request_json['oldBound']['_northEast']['lat']
    old_east = request_json['oldBound']['_northEast']['lng']
    old_south = request_json['oldBound']['_southWest']['lat']
    old_west = request_json['oldBound']['_southWest']['lng']
    north = request_json['newBound']['_northEast']['lat']
    east = request_json['newBound']['_northEast']['lng']
    south = request_json['newBound']['_southWest']['lat']
    west = request_json['newBound']['_southWest']['lng']
    start_date_float = request_json['startDate']
    end_date_float = request_json['endDate']

    old_poly = 'polygon(({0} {1}, {0} {2}, {3} {2}, {3} {1}, {0} {1}))'.format(
        old_east, old_south, old_north, old_west)
    poly = 'polygon(({0} {1}, {0} {2}, {3} {2}, {3} {1}, {0} {1}))'.format(
        east, south, north, west)

    if old_poly == poly:
        return make_response(
            jsonify([{
                "createAt": t.timestamp(),
                "lng": long,
                "lat": lat,
                "id": str(id)
            } for t, id, long, lat in Connection.sql_execute(f"""
                    SELECT r.create_at, r.id , st_x(location), st_y(location)
                    FROM records r 
                    where r.create_at BETWEEN to_timestamp({start_date_float / 1000}) 
                                        AND to_timestamp({end_date_float / 1000})
                    and ST_CONTAINS(st_geomfromtext({repr(poly)}) , location)"""
                                                             )]))
    else:
        return make_response(
            jsonify([{
                "createAt": t.timestamp(),
                "lng": long,
                "lat": lat,
                "id": str(id)
            } for t, id, long, lat in Connection.sql_execute(f"""
                SELECT r.create_at, r.id , st_x(location), st_y(location)
                FROM records r 
                where r.create_at BETWEEN to_timestamp({start_date_float / 1000}) 
                                    AND to_timestamp({end_date_float / 1000})
                and ST_CONTAINS(st_difference(st_geomfromtext({repr(poly)}), st_geomfromtext({repr(old_poly)})) , location)"""
                                                             )]))
示例#4
0
def search_administrative_boundaries():
    """
    search administrative boundaries
    support region_id as integer, full-name as string and abbreviated state name as 2-letter string
    :return:
    """
    keyword = flask_request.args.get('keyword')

    # if kw is an id, get geometry directly
    if keyword.isdigit():
        region_id = int(keyword)
        # is a region_id
        query = f'''
        SELECT st_asgeojson(t.geom) as geojson from us_states t where state_id = {region_id}
        union
        SELECT st_asgeojson(t.geom) as geojson from us_counties t where county_id = {region_id}
        union
        SELECT st_asgeojson(t.geom) as geojson from us_cities t where city_id = {region_id}
        '''

        with Connection() as conn:
            cur = conn.cursor()
            cur.execute(query)
            resp = make_response(
                jsonify([json.loads(geom) for geom, in cur.fetchall()]))
            cur.close()

    else:
        # load abbreviation
        # default value is keyword itself if not found in dict
        keyword = us_states_abbr.get(keyword, keyword)

        search_state = "SELECT st_asgeojson(t.geom) from us_states t where lower(state_name)=lower(%s)"
        search_county = "SELECT st_asgeojson(t.geom) from us_counties t where lower(county_name)=lower(%s) limit 1"
        search_city = "SELECT st_asgeojson(t.geom) from us_cities t where lower(city_name)=lower(%s) limit 1"

        with Connection() as conn:
            cur = conn.cursor()
            results = None
            if not results:
                cur.execute(search_state, (keyword, ))
                results = [json.loads(geom) for geom, in cur.fetchall()]
            if not results:
                cur.execute(search_county, (keyword, ))
                results = [json.loads(geom) for geom, in cur.fetchall()]
            if not results:
                cur.execute(search_city, (keyword, ))
                results = [json.loads(geom) for geom, in cur.fetchall()]
            resp = make_response(jsonify(results))
            cur.close()
    return resp
示例#5
0
def send_temperature_data():
    """
        This func gives the second lastest data for temperature within ractangle around US,
        since the most lastest data is always updating (not completed)

        :returns: a list of temp objects, with lat, long, and temp value
    """
    temperature_fetch = Connection.sql_execute("select t.lat, t.long, t.temperature from recent_temperature t "
                                                 "where t.endtime = (select max(t.endtime) from recent_temperature t"
                                                 " where t.endtime <(select max(t.endtime) from recent_temperature t))")

    temperature_data_celsius = []  # format temp data into a dictionary structure

    for row in temperature_fetch:
        temperature_object = {
            "lat": row[0],
            "long": row[1] % (-360),  # convert longtitude range
            "temp": row[2] - 273.15,  # change temp into celsius
        }

        temperature_data_celsius.append(temperature_object)

    temperature_data_us = points_in_us(temperature_data_celsius)  # restrict data within US boundary.

    resp = make_response(jsonify(temperature_data_us))
    resp.headers['Access-Control-Allow-Origin'] = '*'
    # sends temperature data with coordinate within us boundary
    return resp
示例#6
0
    def run(self, model_type: str = ImageClassifier.RESNET_MODEL):
        """
        Gets image_id and image_url from database and dump prediction results into database.
        :param model_type: different model will be used according to model type.
        :return: none
        """
        # set up image classifier
        image_classifier = ImageClassifier(model_type)

        image_classifier.set_model()

        # set up image classification dumper
        img_classification_dumper = ImgClassificationDumper()

        # loop every image in database
        try:
            for id, image_url in Connection().sql_execute(
                    "select id, image_url from images"):
                # get prediction result of image
                prediction_tuple = image_classifier.predict(image_url)
                # dump prediction result into database
                img_classification_dumper.insert(model_type, image_url,
                                                 prediction_tuple)
                logger.info("id " + str(id) + " is done!")
        except:
            logger.error('error: ' + traceback.format_exc())
    def run(self,
            keywords: list = None,
            batch_num: int = 100,
            using_filter_api: bool = False,
            fetch_from_db: bool = False,
            time_interval: int = 2):

        if keywords is None:
            keywords = ['wildfire']

        logger.info('Crawler Starting')
        if fetch_from_db:
            return self._run_fetch_from_db_mode(time_interval)

        # crawling ids:
        if using_filter_api and not isinstance(self.crawler,
                                               TweetFilterAPICrawler):
            self.crawler = TweetFilterAPICrawler()

        while True:
            # add fires that happened in the past month and put these fires into the keywords for search
            fire_names = {
                fire_name.lower()
                for fire_name, in Connection.sql_execute(
                    f"select name from fire_merged where start_time > now() - interval '1 month'"
                )
            }
            keywords_with_fire_names = fire_names | (set(keywords))
            logger.info(f'Full keyword set: {keywords_with_fire_names}')
            logger.info('Running ID mode')
            ids = self.crawler.crawl(list(keywords_with_fire_names), batch_num)
            self.dumper.insert(ids, id_mode=True)

            # prevent API from being banned
            time.sleep(time_interval)
示例#8
0
def fire():
    # return a json of all fire name, fire time, and fire geometry inside the bounding box
    request_json = flask_request.get_json(force=True)
    north = request_json['northEast']['lat']
    east = request_json['northEast']['lon']
    south = request_json['southWest']['lat']
    west = request_json['southWest']['lon']
    size = request_json['size']
    start_date = request_json['startDate'][:10]
    end_date = request_json['endDate'][:10]
    size_getters = {0: "geom_full", 1: "geom_1e4", 2: "geom_1e3", 3: "geom_1e2",
                    4: "geom_center"}
    poly = 'polygon(({0} {1}, {0} {2}, {3} {2}, {3} {1}, {0} {1}))'.format(east, south, north, west)
    query = f"SELECT id, name, agency,start_time, end_time, st_asgeojson({size_getters[size]}) as geom, max_area FROM " \
            f"fire_merged f WHERE ((('{start_date}'::date <= f.end_time::date) AND " \
            f"('{start_date}'::date >= f.start_time::date)) OR (('{end_date}'::date >= f.start_time::date) " \
            f"AND ('{end_date}'::date <= f.end_time::date)) OR (('{start_date}'::date <= f.start_time::date) " \
            f"AND ('{end_date}'::date >= f.end_time::date) )) " \
            f"AND (st_contains(ST_GeomFromText('{poly}'),f.{size_getters[size]}) " \
            f"OR st_overlaps(ST_GeomFromText('{poly}'),f.{size_getters[size]}))"
    return make_response(jsonify([{"type": "Feature",
                                   "id": fid,
                                   "properties": {"name": name, "agency": agency, "starttime": start_time,
                                                  "endtime": end_time, "density": 520, "area": max_area},
                                   "geometry": json.loads(geom)}
                                  for fid, name, agency, start_time, end_time, geom, max_area
                                  in Connection.sql_execute(query)]))
示例#9
0
def send_recent_tweet_data():
    """
        This func gives recent tweets objects which must has a image
        here the interval is 10 month

        :returns: a list of tweet objects, each with time, lat, long, text, id
    """
    with Connection() as conn:
        cur = conn.cursor()
        livetweet_query = "select it.create_at, it.top_left_long, it.top_left_lat, it.bottom_right_long, it.bottom_right_lat, it.id, it.text, i.image_url, it.profile_pic, it.user_name " \
                          "from (select r.create_at, l.top_left_long, l.top_left_lat, l.bottom_right_long, l.bottom_right_lat, l.id, r.text, r.profile_pic, r.user_name " \
                          "from records r, locations l where r.id=l.id and r.profile_pic is not null and r.create_at between (SELECT current_timestamp - interval '10 month') and current_timestamp) AS it LEFT JOIN images i on i.id = it.id where i.image_url is not null "
        cur.execute(livetweet_query)
        resp = make_response(
            jsonify([{
                "create_at": time.isoformat(),
                "long": long,
                "lat": lat,
                "id": id,
                "text": text,
                "image": image,
                "profilePic": profilePic,
                "user": user
            } for time, long, lat, _, _, id, text, image, profilePic, user in
                     cur.fetchall()]))
        cur.close()
    return resp
示例#10
0
def send_boundaries_data():
    """
    get administrative boundaries within specific bounding box
    will DB stored procedure (boundaries.sql)
    :return:
    """
    request_json = flask_request.get_json(force=True)
    states = request_json['states']
    cities = request_json['cities']
    counties = request_json['counties']
    north = request_json['northEast']['lat']
    east = request_json['northEast']['lon']
    south = request_json['southWest']['lat']
    west = request_json['southWest']['lon']

    select_states = "SELECT * from boundaries_states(%s)"
    select_counties = "SELECT * from boundaries_counties(%s)"
    select_cities = "SELECT * from boundaries_cities(%s)"
    poly = 'polygon(({1} {0}, {2} {0}, {2} {3}, {1} {3}, {1} {0}))'.format(
        north, west, east, south)  # lon lat +-180

    with Connection() as conn:
        cur = conn.cursor()
        result_list = list()

        if states:
            result_list.extend(_get_geometry(cur, select_states, poly))
        if counties:
            result_list.extend(_get_geometry(cur, select_counties, poly))
        if cities:
            result_list.extend(_get_geometry(cur, select_cities, poly))
        cur.close()
    return make_response(jsonify(result_list))
示例#11
0
def send_wildfire():
    """
    provide predicted wildfires
    :return:
    """
    # TODO: update the where clause
    request_json = flask_request.get_json(force=True)
    north = request_json['northEast']['lat']
    east = request_json['northEast']['lon']
    south = request_json['southWest']['lat']
    west = request_json['southWest']['lon']
    start = request_json['startDate']
    end = request_json['endDate']
    start = time.mktime(time.strptime(start, "%Y-%m-%dT%H:%M:%S.%fZ"))
    end = time.mktime(time.strptime(end, "%Y-%m-%dT%H:%M:%S.%fZ"))

    return make_response(
        jsonify([{
            "long": lon,
            "lat": lat,
            "nlp": True,
            "text": text
        } for lon, lat, nlp_text, text in Connection.sql_execute(
            f"select l.top_left_long, l.top_left_lat, r.text, r.text from locations l, images i, records r "
            f"where l.id = i.id and r.id = l.id and i.wildfire_prob>0.9 and "
            f"l.top_left_long>{west} and l.top_left_lat<{north} "
            f"and l.bottom_right_long<{east} and l.bottom_right_lat>{south} "
            f"and extract(epoch from r.create_at) >{start} and extract(epoch from r.create_at) <{end}"
        )]))
示例#12
0
    def run(self):
        try:
            """get tweets from database and dump prediction results into database"""
            # set up text classifier
            text_classifier = TextClassifier()

            # use the pre-trained model
            text_classifier.set_model(TEXT_CNN_MODEL_PATH)

            # set up text dumper
            text_dumper = TextClassificationDumper()

            # loop required text in database
            for tweet_id, text in Connection().sql_execute(
                    "select id, text from records where text_cnn_wildfire_prob is null"
            ):
                # preprocess the text
                processed_text = text_classifier.preprocess(text)
                # get prediction result of text, tuple example: tensor([[0.8321, 0.1679]])
                prediction_tuple = text_classifier.predict(processed_text)
                # dump prediction result into database
                text_dumper.insert(tweet_id, prediction_tuple[0][0].item(),
                                   prediction_tuple[0][1].item())
                logger.info("id " + str(tweet_id) + " is done!")
                logger.info("Total affected records: " +
                            str(text_dumper.inserted_count))
        except Exception:
            logger.error('error: ' + traceback.format_exc())
示例#13
0
    def insert(self, date_str: str, weekly_soil_mois: np.array) -> None:
        """
        :param date_str: current data's datetime
        :param weekly_soil_mois: data
        :return: None
        """
        flattened_data = weekly_soil_mois.flatten()

        with Connection() as conn:
            cur = conn.cursor()
            for gid, val in enumerate(flattened_data.tolist()):
                val = float('NaN') if val in [-999, -9999] else val
                try:
                    cur.execute(
                        self.INSERT_SOIL_MOISTURE,
                        (gid, datetime.datetime.strptime(date_str,
                                                         "%Y%m%d"), val))
                    self.inserted_count += cur.rowcount
                    conn.commit()
                except Exception:
                    logger.error("error: " + traceback.format_exc())

            logger.info(
                f'{date_str} finished, total inserted {self.inserted_count}')
            cur.close()
示例#14
0
def tweet_by_date():
    """
    tweet count within specific date range

    @:param start-date: ISO string
    @:param end-date: ISO string
    :return: [ {create_at, id, lat, lon}, ... ]
    """
    start_date_str = flask_request.args.get('start-date').split('.')[0][:-3]
    end_date_str = flask_request.args.get('end-date').split('.')[0][:-3]

    query = f'''
    select r.create_at, r.id, top_left_long, top_left_lat, bottom_right_long, bottom_right_lat 
    from records r, locations l where r.id = l.id and r.create_at <  to_timestamp({end_date_str}) and r.create_at >  to_timestamp({start_date_str})
    '''

    resp = make_response(
        jsonify([{
            'create_at': create_at,
            'id': id,
            'lat': (top_left_lat + bottom_right_lat) / 2,
            'long': (top_left_long + bottom_right_long) / 2
        } for create_at, id, top_left_long, top_left_lat, bottom_right_long,
                 bottom_right_lat in Connection.sql_execute(query)]))

    return resp
 def batch_traverse_tokens(self, token_type: str, probability_type: str,
                           table_name: str, data_list: list,
                           record_id_list: list, page_size: int):
     """
     Traverses each token in data dictionary for the whole batch,
     and calls respective inserting functions to dump them into database.
     :param token_type: which one of X's reaction, Y's reaction and X's intent.
     :param probability_type: type of respective probability.
     :param table_name: the target table to dump data into.
     :param data_list: list of the prediction dictionary.
     :param record_id_list: list of tweet record id corresponding to prediction result.
     :param page_size: an interger for batch insertion into database.
     """
     # list of all the record id
     rids = []
     # list of all the probabilies in data_list
     all_probabilities = []
     # list of all the tokens in data_list
     all_tokens = []
     for j in range(len(data_list)):
         # loop each dictionary in data_list
         data = data_list[j]
         tokens = data[token_type]
         probabilities = data[probability_type]
         for i in range(len(tokens)):
             all_tokens.append(' '.join(tokens[i]))
             all_probabilities.append(probabilities[i])
             rids.append(record_id_list[j])
     with Connection() as conn:
         # eids is a list containing ids of all the tokens
         eids = self.batch_insert_into_tokens(all_tokens, token_type,
                                              page_size, conn)
         self.batch_insert_into_pairs(rids, eids, all_probabilities,
                                      table_name, page_size, conn)
示例#16
0
 def get_aggregated_fire_with_id(self, year: int, name: str, state: str,
                                 id: int) -> List[Tuple]:
     """
     Merges fire events with this id from fire table into several big fire events.
     Some pages might have fires from multiple fire events, so the return value is a list of tuples.
     If there are more than one fire events, then there will be multiple tuples in returned list.
     :param year: int
             e.g. 1999
     :param name: str
             e.g. "FireA"
     :param state: str
             e.g. "California"
     :param id: int
             e.g. 9999
     :return: list of tuples representing fire events
             e.g. [(1999, "FireA",...), (1999, "FireB",....)]
     """
     aggregated_fire_records_with_id = list(
         Connection.sql_execute(self.SQL_GET_LATEST_AGGREGATION.format(id)))
     if not aggregated_fire_records_with_id:
         # if this id is an empty record, then there is no aggregated fire records
         # in this situation, we only mark the url as crawled, by inserting it into fire_history
         self.insert_history(FireEvent(year, state, name, id))
         # return the latest fire id
         raise InvalidFireRecordException(
             f"Record {id} is an empty record. Skipping...")
     logger.info(
         f"Successfully fetch Record #{id}, there are {len(aggregated_fire_records_with_id)} "
         f"aggregated records in this id")
     return aggregated_fire_records_with_id
示例#17
0
 def _insert_ids(ids=List[Tuple[int]]):
     """insert given id list into the database"""
     logger.info("Inserting ids")
     with Connection() as connection:
         cur = connection.cursor()
         extras.execute_values(cur, TweetDumper.INSERT_LOCATION_QUERY, ids)
         connection.commit()
         cur.close()
示例#18
0
 def _get_length_of_select_query_result(query: str) -> int:
     """
     Takes a SELECT query and returns the number of rows the query returns.
     :param query: str.
             e.g. 'SELECT year,state,name FROM fire_history'
     :return: int
     """
     return sum(1 for _ in Connection.sql_execute(query))
示例#19
0
 def get_exists(self):
     """get how far we went last time"""
     with Connection() as conn:
         cur = conn.cursor()
         cur.execute(self.select_exists)
         exists_list = cur.fetchall()
         cur.close()
     return exists_list
示例#20
0
    def f1_score(self):
        total_positive = next(Connection().sql_execute(
            "select count(*) from records where (label1 = label2 and label1= true) or judge=True"
        ))
        print(total_positive)
        l = []
        self.train()
        for id, text in Connection().sql_execute(
                "select id, text from records where label1 is not null"):
            print(text)
            if self.predict(text):
                l.append(id)
        print(l)
        print(len(l))

        precision = len(l) / total_positive
        recall = total_positive

        pass
示例#21
0
 def run(self, batch_num: int = 100):
     try:
         self.dumper.insert({
             id: self.extractor.extract(text)
             for id, text in Connection().sql_execute(
                 f"select id, text from records r WHERE NOT EXISTS (select distinct id from images i where i.id = r.id) limit {batch_num}"
             )
         })
     except Exception:
         logger.error('error: ' + traceback.format_exc())
示例#22
0
def send_tweet_count_data():
    """
        This func gives all historical tweets objects with id

        :returns: a list of tweet objects, each with time, lat, long, id
    """
    return make_response(
        jsonify({
            date.isoformat(): count
            for date, count in Connection().sql_execute("""
    select r.create_at::timestamp::date, count(*) from records r where r.location is not null
    and r.create_at is not null group by r.create_at::timestamp::date""")
        }))
示例#23
0
 def _generate_sql_statement_and_execute(sql_statement: str, data) -> None:
     """
     Generates a SQL statement with the data given as a dictionary and execute, commit the changes.
     :param sql_statement: string
             e.g. "SELECT * FROM fire_history WHERE %{id}=199"
     :param data: data as a dict.
             e.g. {'year': 2019, 'firename': 'TRESTLE', 'agency': 'USFS', 'datetime': .....}
     """
     with Connection() as conn:
         cur = conn.cursor()
         cur.execute(sql_statement, data)
         conn.commit()
         cur.close()
 def _fetch_id_from_db(self):
     """a generator which generates 100 id list at a time"""
     result = list()
     for id, in Connection.sql_execute(
             f"SELECT id FROM records WHERE user_id IS NULL or text is null ORDER BY create_at DESC"
     ):
         if id not in self.cache:
             self.cache.add(id)
             result.append(id)
         if len(result) == 100:
             yield result
             result.clear()
     pickle.dump(self.cache, open(TWITTER_TEXT_CACHE, 'wb+'))
     yield result
示例#25
0
    def run(self, end_clause: int = 210):
        """
        crawling routine
        :param end_clause: number of days we want to crawl, default=7
        :return: None
        """

        current_date = datetime.now(timezone.utc).date()
        end_date = current_date - timedelta(days=end_clause)

        # TODO: stop and continue
        with Connection() as conn:
            cur = conn.cursor()
            cur.execute('select date from usgs_info')
            exist_list = cur.fetchall()
            cur.close()

        date = current_date - timedelta(days=7)  # website update weekly
        diff_date = date - date_parser.parse('20190730').date()
        date = date - timedelta(days=diff_date.days % 7)

        while date >= end_date:
            logger.info(f'[fetch]{date}')
            # skip if exist
            if (date,) in exist_list:
                logger.info(f'skip: {date}')

            saved_zip_path = self.crawler.crawl(date)
            if saved_zip_path is None:
                logger.info(f'{date} not found, skipped')

            else:
                zf = zipfile.ZipFile(saved_zip_path)
                for file in zf.namelist():
                    if file.split('.')[-4] == 'VI_NDVI' and file.split('.')[-1] == 'tif':
                        zf.extract(file, os.path.split(saved_zip_path)[0])
                        tif_file_name = file
                zf.close()
                tif_path = os.path.join(os.path.split(saved_zip_path)[0], tif_file_name)
                if tif_path is not None:
                    unflattened = self.extractor.extract(tif_path)
                    if unflattened is not None:
                        self.dumper.insert(date, unflattened, 'usgs')

                    # clean up
                    os.remove(saved_zip_path)
                    os.remove(tif_path)

            # finish crawling a day
            date = date - timedelta(days=7)
示例#26
0
 def get_latest_fire_id() -> int:
     """
     Gets the latest fire id to pass to the counter in data_from_fire
     :return: int
             e.g. 299
     """
     # execute select statement
     # the latest fire id is the first and only entry of the result
     with Connection() as conn:
         cur = conn.cursor()
         cur.execute(FireDumper.SQL_GET_LATEST_ID)
         result = cur.fetchone()[0]
         cur.close()
     return result
def drop_box():
    """
    auto-completion relies on this API.
    frontend send user types through userInput,
    this API perform DB query through stored procedure (autocomplete.sql)

    return a list/array: [ (city, county, state, id), ... ]
    :return:
    """
    user_input = flask_request.args.get('userInput')
    # request_json = flask_request.get_json(force=True)
    # user_input = request_json['userInput']
    name_list_query = f"select * from fuzzy_search('{user_input + '%'}')"

    return make_response(jsonify(list(Connection.sql_execute(name_list_query))))
示例#28
0
 def fetch_status_id_from_db():
     """a generator which generates 100 id list at a time"""
     count = 0
     result = list()
     for id, in Connection.sql_execute(
             f"SELECT id FROM records WHERE user_id IS NULL order by create_at desc"
     ):
         count += 1
         result.append(id)
         if count >= 100:
             yield result
             time.sleep(20)
             # set sleep time to prevent the twitter api from being banned
             result.clear()
             count = 0
示例#29
0
class Dumper:
    RECORD = 0
    LOCATION = 1

    def __init__(self, json_filename):
        with open(json_filename, 'rb') as file:
            self.tweets = json.load(file)
        self.target_fields = ["id", "bounding_box"]
        self.conn = Connection()()

    def __iter__(self, target):
        if target == Dumper.LOCATION:
            for tweet in self.tweets:
                (a, b), (c, d) = tweet['place']["bounding_box"]
                yield tuple([tweet['id'], a, b, c, d])

        elif target == Dumper.RECORD:
            for tweet in self.tweets:
                yield tuple(
                    [tweet[field] for field in ["id", "create_at", "text"]])

    def dump_all(self, table, value_count):
        try:
            sql = f'INSERT INTO {table} VALUES({"%s " * value_count});'
            for record in self:
                print(record)
                cur = self.conn.cursor()
                cur.execute(sql, record)
                cur.close()
                self.conn.commit()

        except psycopg2.DatabaseError as error:
            print(error)

    def get_location(self):
        pass
示例#30
0
    def get_labeled_data(self):
        self.labeled_data = list()
        for id, text, label1, label2, judge in Connection().sql_execute(
                "select id, text, label1, label2, judge from records where label1 is not null and label2 is not null"
        ):
            self.labeled_data.append((self.pre_process(text),
                                      label1 if label1 == label2 else judge))

        for text_dict, label in self.labeled_data:
            for word in text_dict.copy():
                if self.low_fre_words[word] < 35 and word not in [
                        "THISISALINK", "THISISAUSER"
                ]:
                    del text_dict[word]

        random.shuffle(self.labeled_data)