Пример #1
0
def main():
    try:
        retry_count = 0
        image_count = 0
        while retry_count < 4 and image_count < 87:
            r = requests.get(url, headers=headers).json()['value']
            image_count = len(r)
            logging.info('%s items found.' % image_count)
            if image_count < 87:
                logging.warning('Image count < 87')
                retry_count += 1
                sleep(5)

    except JSONDecodeError as e:
        logging.exception("JSONDecodeError")
    else:
        path_split = r[0]['ImageLink'].split('?')[0].split('/')
        request_datetime = ('%s%s' % (path_split[3], path_split[4])).replace(
            '-', '')
        dest_dir = os.path.join(images_dir, request_datetime)

        if os.path.isdir(dest_dir):
            logging.info('Deleting %s' % dest_dir)
            shutil.rmtree(dest_dir)

        os.makedirs(dest_dir)

        meta_df = []

        logging.info('Downloading images to %s' % dest_dir)
        for item in r:
            dest_path = os.path.join(
                dest_dir, item['ImageLink'].split('?')[0].split('/')[-1])

            download_image(item['ImageLink'], dest_path)

            path_split = item['ImageLink'].split('?')[0].split('/')
            meta_data = [
                item['CameraID'], item['Latitude'], item['Longitude'],
                path_split[3], path_split[-1].split('_')[1],
                path_split[-1].split('.')[0],
                Image.open(dest_path).size
            ]

            meta_df.append(meta_data)

        metadata_dir = os.path.join(data_dir, 'traffic-images-metadata')

        if not os.path.isdir(metadata_dir):
            logging.info('Creating %s' % metadata_dir)
            os.makedirs(metadata_dir)
        metadata_path = os.path.join(metadata_dir,
                                     '%s_images.csv' % request_datetime)

        pd.DataFrame(meta_df,
                     columns=[
                         'CameraID', 'Latitude', 'Longitude', 'Date', 'Time',
                         'Filename', 'Dimensions'
                     ]).to_csv(metadata_path, index=False, header=False)
        logging.info('Saved metadata to %s' % metadata_path)
Пример #2
0
def run():
    conn = open_connection()

    div_ids = dt.insert_divisions_by_tags(dt.ENGLAND, conn=conn)
    data_tags = dt.get_tags_for_ids(div_ids, conn=conn)

    for year_tag in years_tags:
        logging.info(f"Reading football data for year tag {year_tag}")

        for division_id, division_tag in zip(div_ids, data_tags):
            csv_url = f"{url}{year_tag}/{division_tag}.csv"
            try:
                df = read_csv(csv_url)
            except UnicodeDecodeError:
                logging.warning(f"Failed to read csv for year tag {year_tag}")
                page = requests.get(csv_url)
                csv_file = page.text.encode().decode("utf-8")
                with open("tmp.csv", "w") as f:
                    f.write(csv_file)
                df = read_csv("tmp.csv")

            df = df.dropna(thresh=10)
            start_date = df["Date"].replace(["NaN", 'NaT'],
                                            np.nan).dropna().min()
            end_date = df["Date"].replace(["NaN", 'NaT'],
                                          np.nan).dropna().max()
            st_id = st.insert(division_id,
                              year_tag,
                              start_date,
                              end_date,
                              conn=conn)
            match.insert_matches(df, st_id, conn=conn)
            conn.commit()
    close_connection(conn)
Пример #3
0
def createIndex(esServer="localhost:9200",
                indexName="test",
                indexType="doctype",
                mapping=None,
                setting=None,
                clear=False):

    conn = ES(esServer)

    if clear:
        deleteIndex(esServer=esServer, indexName=indexName)

    logging.info('Create index %s ...', indexName)
    try:
        if setting:
            conn.indices.create_index(indexName, setting)
        else:
            conn.indices.create_index(indexName, None)
    except exceptions.IndexAlreadyExistsException:
        logging.warning('Index is already created: %s ...', indexName)

    if mapping is not None:
        logging.info('Put mapping...')
        conn.indices.put_mapping(indexType, mapping, [indexName])

    return True
Пример #4
0
def has_goalcom_url(fifa_id, conn):
    pit_data = pit.get_by_fifa_id(fifa_id, conn=conn)
    if pit_data.shape[0] == 0:
        logging.warning(f"No player for id {fifa_id}")
        return False
    else:
        if pit_data["goalcom_name"].item():
            return True
        return False
def get_feature_vector_for_match(match):
    match_df = mt.get_match(match["id"], conn=conn)
    feature_vector = {}
    if match_df.shape[0]:
        match_data = match_df.iloc[0, :].to_dict()
        feature_vector = {
            **match_data,
            **data_provider.create_match_feature_vector(match_data, conn=conn)
        }
    else:
        logging.warning(f"No match data for match id {match['id']}")
    return feature_vector
Пример #6
0
def is_node_a_state(node: dict) -> bool:
    """
    detects if node is a state (using @configuration key)
    :param node: dict with node
    :return: true if state otherwise false
    """
    try:
        if node['y:GenericNode']['@configuration'] == "com.yworks.entityRelationship.big_entity":
            return True
    except KeyError:
        logging.warning("%s node is incorrect" % node['id'])
        return False
    return False
Пример #7
0
def attach_ids_to_players(players):
    valid_players = []
    invalid_players = []
    for player in players:
        ret = pit.get_id_by_goalcom_url(player["url_id"], conn=conn)
        if ret:
            player["fifa_id"] = ret[0]
            valid_players.append(player)
        else:
            logging.warning(
                f"No player for goalcom player url {player['url_id']}")
            invalid_players.append(player)
    return valid_players, invalid_players
Пример #8
0
def main(conn):
    fifa_ids = pd.read_csv("fifa_ids.csv")["fifa_id"].values
    logging.info(f"Inserting {len(fifa_ids)} players")
    for fifa_id in fifa_ids:
        player_obj = {}
        player_df = get_latest_by_fifa_id(fifa_id, conn=conn)
        if player_df.shape[0] == 0:
            logging.warning(f"No player found for fifa id {fifa_id}")

        player_obj["fifa_name"] = player_df["name"].item()
        player_obj["fifa_id"] = fifa_id
        insert(conn, **player_obj)
        conn.commit()
Пример #9
0
def is_node_a_choice(node: dict) -> bool:
    """
    detects if node is a choice (using @configuration key)
    :param node: dict with node
    :return: true if state otherwise false
    """
    try:
        if node['y:GenericNode']['@configuration'] == "com.yworks.bpmn.Gateway.withShadow":
            return True
    except KeyError:
        logging.warning("%s node is incorrect" % node['id'])
        return False
    return False
def check_if_running(dir_name):
    to_process = False
    currently_detecting = None

    if os.path.isfile(log_path):
        try:
            with open(log_path, 'r') as f:
                currently_detecting = json.load(f)
        except json.decoder.JSONDecodeError:
            logging.warning('Error loading currently detecting json')
            pass
        else:
            logging.info('Loaded current runs')

    if currently_detecting is not None:
        if dir_name in currently_detecting:
            start = datetime.strptime(currently_detecting[dir_name],
                                      '%Y-%m-%d %H:%M:%S %z')
            if (datetime.now(tz=timezone('Singapore')) -
                    start).total_seconds() / 60 > 60:
                logging.info(
                    'Last run for %s started over 60 minutes ago, starting parallel run'
                    % dir_name)
                to_process = True
                currently_detecting[dir_name] = datetime.now(
                    tz=timezone('Singapore')).strftime('%Y-%m-%d %H:%M:%S %z')
            else:
                logging.info(
                    'Last run for %s within 60 mins still running, skipping for now'
                    % dir_name)
        else:
            logging.info('No current runs for %s detected' % dir_name)
            to_process = True
            currently_detecting[dir_name] = datetime.now(
                tz=timezone('Singapore')).strftime('%Y-%m-%d %H:%M:%S %z')

    else:
        logging.info('No current run logs found, generating from scratch')
        to_process = True
        currently_detecting = {
            dir_name:
            datetime.now(
                tz=timezone('Singapore')).strftime('%Y-%m-%d %H:%M:%S %z')
        }

    if to_process:
        with open(log_path, 'w') as f:
            json.dump(currently_detecting, f)

    return to_process
Пример #11
0
def get_state_actions(data: dict) -> str:
    """
    get label with actions from node data
    :param data: node with data
    :return: str with actions
    """
    try:
        data = data['y:GenericNode']
    except KeyError:
        logging.warning("Cannot retrieve state actions %s" % data['id'])
    data = flatten([data], 'y:NodeLabel')
    for label in data:
        if "#text" in label.keys() and '@configuration' in label.keys():
            if label['@configuration'] == 'com.yworks.entityRelationship.label.attributes':
                return label['#text']
    return ""
Пример #12
0
def get_state_label(data: dict) -> str:
    """
    gets state label from node data
    :param data: dict with data
    :return: string with label
    """
    node_id = data['id']
    try:
        data = data['y:GenericNode']['y:NodeLabel']
    except KeyError:
        logging.warning("Cannot retrieve state name %s" % data['id'])
    if not isinstance(data, list):
        data = [data]
    for label in data:
        if "#text" in label.keys() and '@configuration' in label.keys():
            if label['@configuration'] == 'com.yworks.entityRelationship.label.name':
                return label['#text']
    logging.warning("Cannot retrieve state name %s" % node_id)
    return ""
Пример #13
0
def get_group_label(data: dict) -> str:
    """
    gets group node label from node data
    :param data: dict with data
    :return: string with label
    """
    node_id = data['id']
    try:
        data = data['y:ProxyAutoBoundsNode']['y:Realizers']['y:GroupNode']
    except KeyError:
        logging.warning("Cannot retrieve group name %s" % data['id'])
        return ""
    data = flatten([data], 'y:NodeLabel')
    for label in data:
        if "#text" in label.keys() and '@modelName' in label.keys():
            if label['@modelName'] == 'internal':
                return label['#text']
    logging.warning("Cannot retrieve group name %s" % node_id)
    return ""
Пример #14
0
def get_group_actions(data: dict) -> str:
    """
        get label with actions from group node data
        :param data: node with data
        :return: str with actions
        """
    try:
        data = data['y:ProxyAutoBoundsNode']['y:Realizers']['y:GroupNode']
    except KeyError:
        logging.warning("Cannot retrieve group actions %s" % data['id'])
        return ""
    data = flatten([data], 'y:NodeLabel')
    for label in data:
        if "#text" in label.keys() and '@modelName' in label.keys():
            if label['@modelName'] == 'custom':
                return label['#text']
            if label['@alignment'] == 'left':
                return label['#text']
    return ""
def clear_working_files(dir_name):
    images_path = os.path.join(images_dir, dir_name)
    shutil.rmtree(images_path)
    logging.info('Deleted %s' % images_path)

    metadata_path = os.path.join(metadata_dir, '%s_images.csv' % dir_name)
    os.remove(metadata_path)
    logging.info('Deleted %s' % metadata_path)

    try:
        with open(log_path, 'r') as f:
            currently_detecting = json.load(f)
    except json.decoder.JSONDecodeError:
        logging.warning('Error loading currently detecting json')
        pass
    else:
        if dir_name in currently_detecting:
            currently_detecting.pop(dir_name)
            with open(log_path, 'w') as f:
                json.dump(currently_detecting, f)
Пример #16
0
def get_team_features_for_matches(match_id, date, **kwargs):
    lineup = lt.get_by_match_id(match_id, **kwargs)
    if not lineup.shape[0]:
        return {}, {}

    home_players = lineup.loc[:, lineup.columns.str.startswith('hp')].values[0]
    home_subst = lineup.loc[:, lineup.columns.str.startswith('hs')].values[0]
    home_fifa_ids = [player for player in np.concatenate((home_players, home_subst)) if player]
    home_features = calculate_player_features_for_team(home_fifa_ids, date, **kwargs)
    if home_features["gk_handling"] < 60:
        logging.warning(f"No goalkeeper in home team's lineup!! Match id: {match_id}")
    home_features = {f'home_{k}': v for k, v in home_features.items()}

    away_players = lineup.loc[:, lineup.columns.str.startswith('ap')].values[0]
    away_subst = lineup.loc[:, lineup.columns.str.startswith('as')].values[0]
    away_fifa_ids = [player for player in np.concatenate((away_players, away_subst)) if player]
    away_features = calculate_player_features_for_team(away_fifa_ids, date, **kwargs)
    if away_features["gk_handling"] < 60:
        logging.warning(f"No goalkeeper in away team's lineup!! Match id: {match_id}")
    away_features = {f'away_{k}': v for k, v in away_features.items()}

    return home_features, away_features
Пример #17
0
def map_lineup_with_player_data(lineup):
    home_team = lineup["home_team"]
    away_team = lineup["away_team"]
    date = lineup["date"]

    home_team_players, home_invalid_players = attach_ids_to_players(
        lineup["home_team_players"] + lineup["home_team_substitutes"])
    for home_invalid_player in home_invalid_players:
        add_invalid_players_to_missing(home_invalid_player, date, home_team)
    away_team_players, away_invalid_players = attach_ids_to_players(
        lineup["away_team_players"] + lineup["away_team_substitutes"])
    for away_invalid_player in away_invalid_players:
        add_invalid_players_to_missing(away_invalid_player, date, away_team)

    match_id_tuple = mt.get_id_for_game(home_team, away_team, date, conn=conn)

    if not match_id_tuple:
        logging.warning(
            f'No match for {lineup["home_team"]} vs. {lineup["away_team"]} {date}'
        )
        missing_match_urls.append(
            lineup.get("match_link", None) + lineup.get("match_id", None))
    else:
        store_lineups(match_id_tuple[0], home_team_players, away_team_players)
Пример #18
0
def replace_image(job_id,
                  file_name,
                  html_string,
                  bucket_name,
                  bucket_folder='content/'):
    # parse html and put it in a variable
    images = set(re.findall("src='([^']+)'", html_string))

    logging.info("[IMG] Start analyzing html for job %s in file %s", job_id,
                 file_name)

    # run loop for all images in the html
    # Upload images in our bucket and replace image src
    for image in images:
        image_src = image.strip()

        # if image was not uploaded to hackapad s3 ignore
        if not image_src.startswith(
                'https://hackpad-attachments.s3.amazonaws.com/'):
            continue

        logging.info("[IMG] Processing image %s" % image_src)

        #get image mime_type
        mime_type_info = mimetypes.guess_type(image_src)
        mime_type = mime_type_info[0] if mime_type_info[0] else 'image/jpeg'

        # construct expire and cache_control headers
        days = 100
        cache_control = 'max-age= %d' % (60 * 60 * 24 * days)
        expires = datetime.utcnow() + timedelta(days=days)
        expires = expires.strftime("%a, %d %b %Y %H:%M:%S GMT")

        try:
            logging.info("[IMG] First try for image %s", image_src)
            # get image name
            image_url_parts = image_src.split('/')
            image_name = image_url_parts

            # read image url
            image_src_parsed = urllib.parse.urlparse(image_src)
            image_name_encoded = urllib.parse.quote(image_src_parsed.path)
            file = io.BytesIO(
                urllib.request.urlopen(
                    urllib.parse.urljoin(image_src,
                                         image_name_encoded)).read())
            img = Image.open(file, mode='r')
        except urllib.error.HTTPError as error:
            logging.warning(
                "[IMG] First try block resulted in urllib.error.HTTPError: %s"
                % error)
            try:
                logging.info("[IMG] retry for image %s", image_src)
                file = io.BytesIO(urllib.request.urlopen(image_src).read())
                img = Image.open(file, mode='r')
            except urllib.error.HTTPError as error:
                logging.error("[IMG] %s", error.read())
                continue
            except UnicodeEncodeError:
                logging.error("[IMG] UnicodeEncodeError for image %s",
                              image_src)
                continue

        # get the image extension
        image_parts = image_src_parsed.path.split('.')
        image_extension = 'JPEG' if image_parts[-1].upper(
        ) == 'JPG' else image_parts[-1]
        # hack for weird image URLs
        if len(image_extension) > 4:
            image_extension = 'png'

        # stream file in binary mode
        imgByteArr = io.BytesIO()
        img.save(imgByteArr, format=image_extension.upper())
        imgByteArr = imgByteArr.getvalue()

        # upload image to our bucket
        # First check if it already exists
        exists = False
        try:
            s3.Object(bucket_name, bucket_folder + image_name[-1]).load()
        except botocore.exceptions.ClientError as e:
            if e.response['Error']['Code'] == "404":
                exists = False
        else:
            exists = True
        if exists:
            logging.info("[IMG] Skipping upload: %s already exists" %
                         image_src)
        else:
            logging.info("[IMG] Uploading %s" % image_src)
            s3.Bucket(bucket_name).put_object(Key=bucket_folder +
                                              image_name[-1],
                                              Body=imgByteArr,
                                              ACL='public-read',
                                              ContentType=mime_type,
                                              CacheControl=cache_control,
                                              Expires=expires)

        logging.info("[IMG] Replace %s with %s" %
                     (image_src, 'https://s3-eu-west-1.amazonaws.com/' +
                      bucket_name + '/' + bucket_folder + image_name[-1]))
        # replace the src of the image with the new uploaded location
        html_string = html_string.replace(
            image_src, 'https://s3-eu-west-1.amazonaws.com/' + bucket_name +
            '/' + bucket_folder + image_name[-1])

        logging.info("[IMG] Replaced with %s", image_src)

    logging.info("[IMG] Finished analyzing html for job %s in file %s", job_id,
                 file_name)

    return html_string
Пример #19
0
def create_actions(raw_triggers: str, source: str,
                   player_signal: [str]) -> [Trigger]:
    """
    parses raw label text with events and their actions to get a list of Triggers ("exit" and "entry" events ignored)
    we use regexp to split raw data string
    regexp is some non-space symbols, then some space symbols, than "/" symbol
    Example:
        >>>create_actions("entry/
                           BUTTON2_PRESSED/
                           flash(get_color(rgb_table));
                           play_sound(get_random_sound(BLASTER));
                           BUTTON2_PRESSED_FOR_THREE_SECOND/
                           play_sound(get_random_sound(FORCE);
                           BOTH_BUTTONS_PRESSED/
                           change_color(get_color(rgb_table));
                           play_sound(get_sound(BOOT), 5);")

        [Trigger(name="BUTTON2_PRESSED", action="flash(get_color(rgb_table));
                                                play_sound(get_random_sound(BLASTER));", source=5)
         Trigger(name="BUTTON2_PRESSED_FOR_THREE_SECOND"), action="play_sound(get_random_sound(FORCE);", source=5),
         Trigger(name="BOTH_BUTTONS_PRESSED"), action="change_color(get_color(rgb_table));
                                                     play_sound(get_sound(BOOT));", source=5)]
    :param raw_triggers: string with events and reactions
    :param source: id of source node
    :param player_signal - list of all sygnals
    :return: list of Triggers, list of sygnals
    """
    trigger_regexp = r"\S+\s*/" + '\n'
    trigger_list = re.findall(trigger_regexp, raw_triggers)
    trigger_data = re.split(trigger_regexp, raw_triggers)
    triggers = dict(list(zip(trigger_list, trigger_data[1:])))
    actions = []
    for (trigger_id, (trigger, action)) in enumerate(triggers.items(),
                                                     start=1):
        guard = ""
        trigger_name = trigger[:-2].strip()
        if '[' in trigger_name:
            guard_regexp = r"\[.*\]"
            res = re.search(guard_regexp, trigger_name)
            guard = res.group(0)[1:-1]
            trigger_name = re.split(guard_regexp, trigger_name)[0].strip()
            if guard != 'else':
                logging.warning("Internal trigger %s[%s] can't contain guard" %
                                (trigger_name, guard))

        if trigger_name not in player_signal and trigger_name and trigger_name != "entry" and trigger_name != 'exit':
            player_signal.append(trigger_name)
        actions.append(
            Trigger(name=trigger_name,
                    action=action.strip(),
                    source=source,
                    type="internal",
                    guard=guard,
                    target="",
                    id=trigger_id,
                    x=0,
                    y=internal_trigger_height * trigger_id,
                    dx=len(trigger_name) + internal_trigger_delta,
                    dy=0,
                    points=[],
                    action_x=0,
                    action_y=5 * trigger_id - 2,
                    action_width=len(trigger_name) + action_delta))
    return actions, player_signal
Пример #20
0
        # https://github.com/jeremyephron/simple-gmail/issues/6
        compute = googleapiclient.discovery.build('compute',
                                                  'v1',
                                                  credentials=credentials,
                                                  cache_discovery=False)

        with open(os.path.join(proj_dir, 'config/instances.json'), 'r') as f:
            config = json.load(f)

        for instance in config:
            project = config[instance]['project']
            zone = config[instance]['zone']

            status = get_instance(compute, project, zone, instance)['status']
            if status != 'RUNNING':
                logging.warning('%s / %s / %s STATUS: %s' %
                                (project, instance, zone, status))
            else:
                logging.debug('%s / %s / %s STATUS: %s' %
                              (project, instance, zone, status))
            # https://cloud.google.com/compute/docs/instances/instance-life-cycle
            # https://cloud.google.com/compute/docs/instances/preemptible
            if status == 'TERMINATED':
                logging.warning('Restarting instance')
                response = start_instance(compute, project, zone, instance)
                logging.warning(response)

            if status == 'RUNNING' and terminate:
                logging.warning('Terminating instance')
                response = stop_instance(compute, project, zone, instance)
                logging.warning(response)
Пример #21
0
def update_states_with_edges(states: [State], flat_edges: [dict],
                             start_state: State, player_signal: [str],
                             min_x: int, min_y: int):
    """
    function parses events on edges and adds them as external triggers to corresponding state (excluding start_edge)
    and recognizes and adds special labels to a choice edgea
    :param states: list of states
    :param flat_edges: list with edges
    :param start_state - id for start state for exclude start edge
    :param player_signal - list of already created signals
    :return:
    """
    for edge in flat_edges:
        old_source = edge['source']
        if old_source != start_state:
            old_target = edge['target']
            source_state = get_state_by_id(states, old_source, "old")
            target_state = get_state_by_id(states, old_target, "old")
            if is_edge_correct(
                    edge, "y:GenericEdge"
            ) and "#text" in edge['y:GenericEdge']['y:EdgeLabel'].keys():
                action = edge['y:GenericEdge']['y:EdgeLabel']["#text"].split(
                    '/')
                trigger_name = action[0].strip()
                guard = ""
                if '[' in trigger_name and ']' in trigger_name:
                    guard_regexp = r"\[.*\]"
                    res = re.search(guard_regexp, trigger_name)
                    guard = res.group(0)[1:-1]
                    trigger_name = re.split(guard_regexp,
                                            trigger_name)[0].strip()
                    if guard == 'else':
                        logging.warning(
                            "External trigger %s[%s] can't contain 'else'" %
                            (trigger_name, guard))
                trigger_action = action[1].strip() if len(action) > 1 else ""
            else:
                trigger_name = ""
                trigger_action = ""
            x, y, dx, dy, points = get_edge_coordinates(edge)
            new_points = []
            for point in points:
                new_points.append(((point[0] - min_x) // divider,
                                   (point[1] - min_y) // divider))
            action_x, action_y, action_width = get_edge_label_coordinates(edge)
            trig_type = "external"
            if source_state.type == "choice":
                trig_type = "choice_result"
            if target_state.type == "choice":
                trig_type = "choice_start"
            trigger = Trigger(name=trigger_name,
                              type=trig_type,
                              guard=guard,
                              source=old_source,
                              target=old_target,
                              action=trigger_action,
                              id=0,
                              x=(x) // divider,
                              y=(y) // divider,
                              dx=dx // divider,
                              dy=dy // divider,
                              points=new_points,
                              action_x=action_x // divider,
                              action_y=action_y // divider,
                              action_width=action_width // divider + 2)
            source_state.trigs.append(trigger)
            if trigger_name and trigger_name not in player_signal:
                player_signal.append(trigger_name)
    update_state_ids(states)
    return player_signal
Пример #22
0
def warning(session, e):
    logging.warning("Database connection has problem. Retrying...: %s" % e)
    session.rollback()