Пример #1
0
    def categorize_json(self, folder_name):
        log.info(f"Dividing data between raw_fse and raw_dd.")
        json_fse = [
            folder_name + json for json in os.listdir(folder_name)
            if json.endswith('.json') and json.startswith('fse_data')
        ]

        json_dd = [
            folder_name + json for json in os.listdir(folder_name)
            if json.endswith('.json') and json.startswith('domain_data')
        ]
        return json_fse, json_dd
Пример #2
0
def cleanup_json(json: str) -> str:
    """Remove codeblocks, if present."""
    if json.startswith("```") and json.endswith("```"):
        # remove ```json and ``` from start and end
        json = json.strip("```json")
        json = json.strip("```py")  # not documented but want to accept it as well
        return json.strip("```")

    elif json.startswith("`") and json.endswith("`"):
        # inline codeblocks
        return json.strip("`")

    return json
Пример #3
0
def convert_annotation_offline(result_dir):
    """

    :param result_dir: 标注结果保存路径
    :return: 老相机结果, 新相机结果, 图片被标注次数
    """
    total_ann = defaultdict(list)
    old_cam_ann, new_cam_ann = defaultdict(list), defaultdict(list)
    num = 0
    for id in os.listdir(result_dir):
        for camera in ['Old_Camera', 'New_Camera']:
            camera_path = os.path.join(result_dir, id, camera)
            for video in os.listdir(camera_path):
                if os.path.isdir(os.path.join(camera_path, video)):
                    for json in os.listdir(os.path.join(camera_path, video)):
                        if json.endswith('.json'):
                            ann = read_json(
                                os.path.join(
                                    os.path.join(camera_path, video, json)))
                            old_cam_ann[json].append(
                                ann
                            ) if 'old' in json else new_cam_ann[json].append(
                                ann)
        num += 1
    return old_cam_ann, new_cam_ann, num
Пример #4
0
def strip_json(json):

    json = json.strip()

    if not json.startswith("{"):
        json = json[json.index("{"):]

    if not json.endswith("}"):
        json = json[:json.rindex("}") + 1]

    return json
Пример #5
0
def strip_json(json):

    json = json.strip()

    if not json.startswith("{"):
        json = json[json.index("{") :]

    if not json.endswith("}"):
        json = json[: json.rindex("}") + 1]

    return json
Пример #6
0
 def loadFromFile():
     print("Loading tweets from file...")
     tweets = []
     try:
         jsonFiles = [
             json for json in os.listdir("data/" + collectionName)
             if json.endswith('.json')
         ]
     except:
         print(
             "Cannot load tweets from specified path. No directory called: "
             + collectionName)
         return
     for file in jsonFiles:
         with open("data/" + collectionName + "/" + file) as tweet:
             tweets.append(json.load(tweet))
     print("Done. Tweets correctly loaded")
     return tweets
Пример #7
0
def main():
    # 1. TAKING CARE OF STOPWORDS AND NUMBER OF ARGUMENTS
    if len(terms_normal) > 5:
        print("Hey! You have used more than 5 terms, which is not allowed.")
        sys.exit()
    else:
        counter = 0
        stop_words = list(stopwords.words('english'))
        for term in terms_normal:
            if term in stop_words:
                print(
                    "Error: You have used a stop-word in the terms, which is not allowed. Try another word."
                )
                sys.exit()

    #2.1 HANDLING THE SPEECHES
    path = args.path
    try:
        speeches_jsons = [
            json for json in os.listdir(path) if json.endswith('.json')
        ]
    except:
        print("Path not found.")
        sys.exit()
    speeches_df = pd.DataFrame(columns=['Date', 'Score', 'Term'])
    corpus, combined_jsons = [], []
    for text in speeches_jsons:
        with open(os.path.join(speeches_path, text)) as element:
            json_text = json.load(element)
            combined_jsons.append(json_text)
            corpus.append(json_text["Speech"].lower())

    #2.2 CREATING VECTORS
    vectorizer = TfidfVectorizer(ngram_range=(1, 3), stop_words="english")
    vectors = vectorizer.fit_transform(corpus)
    vocab = vectorizer.vocabulary_

    #2.3 CREATING DATAFRAMES
    for term in terms_normal:
        df_term = pd.DataFrame(columns=['Date', 'Score', 'Term'])
        for index, speech_item in enumerate(combined_jsons):
            date = pd.to_datetime(speech_item['Date'], yearfirst=True)
            speech = speech_item['Speech'].lower()
            if term in speech:
                column = vocab[term]
                score = vectors[index, column]
            else:
                score = float(0)
            df_term.loc[index] = [date, score, term]
        speeches_df = speeches_df.append(df_term)

    # 3. MAKING CHARTS
    sns.set_theme()
    plot = sns.relplot(data=speeches_df,
                       x="Date",
                       y="Score",
                       kind="line",
                       hue="Term",
                       legend="auto")
    plt.title(args.title)
    plot.savefig(args.output)
    plt.show()
Пример #8
0
def import_jsons(json_folder_path):
    json_files = [
        json for json in os.listdir(json_folder_path) if json.endswith('.json')
    ]
    return json_files
Пример #9
0
def read_jsons(folder):
    jsons = [json for json in os.listdir(folder) if json.endswith(".json")]

    return jsons