Exemplo n.º 1
0
def main():
    lang = "deu"
    docs_path = "data/nel_" + lang + ".json"
    events = open_json(docs_path).keys()
    # save_sim_entity_overlap(articles, lang)

    # ************************* save similarities

    # whole_featurs = np.load("features/" + lang + "_features.npy", allow_pickle=True)  # last saved file
    # save_sims_articles(whole_featurs, lang, is_bert=True, is_entity=False)
    # total_sims_file_name = compute_sims_combinations(whole_featurs, lang, name_postfix='')

    # *************** compute average prec of retrievals            e.g. 'Barack'{ '2323223', 'similarity': {'Barack','8734645', 0.8 } }
    total_sims_file_name = lang + "_total_similarity_file.json"
    dic_sims_articles = open_json("similarity/" + total_sims_file_name)
    keys = dic_sims_articles.keys()
    avg_event = {}
    ls_features = [
        'sim_bert', 'sim_entity', 'sim_obj', 'sim_loc', 'sim_scene',
        'sim_avg_text', 'sim_avg_visual', 'sim_avg_total'
        # 'sim_max_text', 'sim_max_visual', 'sim_max_total'
    ]
    for selected_feature in ls_features:
        print("*********   " + selected_feature)
        for event in events:
            print(event)
            queries_by_event = []
            for k0 in keys:
                k = k0.split("/")[0]
                if event == k:
                    queries_by_event.append(
                        dic_sims_articles[k0]
                    )  # collects all queries of this event
            avg = []
            for query, i in zip(queries_by_event,
                                range(len(queries_by_event))):
                y_true0, scores0 = convert_to_class(event, query,
                                                    selected_feature)
                y_true = np.zeros([len(y_true0) - 1])
                y_true[:i] = y_true0[:i]
                y_true[i:] = y_true0[i + 1:]
                scores = np.zeros([len(y_true0) - 1])
                scores[:i] = scores0[:i]
                scores[i:] = scores0[i + 1:]
                avg.append(average_precision_score(y_true, scores))
            avg_event[event] = np.mean(avg)
        save_file("similarity/avgs/" + lang + "/" + selected_feature,
                  avg_event)

    print('')
Exemplo n.º 2
0
def create_tagging_schema(directory_source,source,attributes_chosed,raf=False,raf_attribute="",name=""):
    files = os.listdir(directory_source)
    source_sentences = []
    source_example_counts = dict()
    print("Tagging al the sentence of source: "+source+" ...")
    for filename in [file for file in files if file.endswith(".json")]:
        js = utils.open_json(filename,source)
        for attribute in js:
            if raf:
                if isinstance(js[attribute],str) and attribute!="<page title>" and attribute==raf_attribute:
                    sentence = []
                    for token in utils.tokenizer(attribute):
                        sentence.append((token,"O"))
                    sentence.append(("ENDNAME","O"))
                    sentence = sentence + tag_sentence(attribute,js[attribute],attributes_chosed,source,raf,raf_attribute,name)
                    sentence.append(("ENDVALUE","O"))
                    if useful(sentence)>0:
                        source_sentences.append((raf_attribute,sentence))
                        source_example_counts.setdefault(raf_attribute,0)
                        source_example_counts[raf_attribute]=source_example_counts[raf_attribute]+1
            else:
                if isinstance(js[attribute],str) and attribute!="<page title>" and [t for t in d.get_predicate_name(attribute,source,True) if t in attributes_chosed]:
                    sentence = []
                    for token in utils.tokenizer(attribute):
                        sentence.append((token,"O"))
                    sentence.append(("ENDNAME","O"))
                    sentence = sentence + tag_sentence(attribute,js[attribute],attributes_chosed,source)
                    sentence.append(("ENDVALUE","O"))
                    if useful(sentence)>0:
                        p_name = d.get_predicate_name(attribute,source,True)[0]
                        source_sentences.append((p_name,sentence))
                        source_example_counts.setdefault(p_name,0)
                        source_example_counts[p_name]=source_example_counts[p_name]+1
    return (source_example_counts,source_sentences)
    def difference_in_shares():
        """Finds the difference in shares.
           Creates a dict to be used by Output"""

        company_dict = utils.open_json(MONITOR)

        share_difference_dict = {}

        for company in company_dict:
            share_change = 1.0 - (
                company_dict[company]["Initial-share-price"] /
                company_dict[company]["Current-share-price"])

            maximum = 1 - (company_dict[company]["Initial-share-price"] /
                           max(company_dict[company]["Share-price-list"]))

            share_difference_dict[company] = {}
            share_difference_dict[company]["Change"] = share_change
            share_difference_dict[company]["Max"] = max(
                company_dict[company]["Share-price-list"])
            share_difference_dict[company]["Max-change"] = maximum
            share_difference_dict[company]["Initial"] = company_dict[company][
                "Initial-share-price"]
            share_difference_dict[company]["Current"] = company_dict[company][
                "Current-share-price"]

        return share_difference_dict
    def get_initial_company_info():
        """Gets the initial information for each company"""

        company_dict = utils.open_json(MONITOR)

        for company in company_dict:
            # Gets symbol for company
            if company_dict[company]["Symbol"] == "unknown":
                try:
                    with urllib.request.urlopen(
                            f'https://finance.yahoo.com/_finance_doubledown/'
                            f'api/resource/searchassist;searchTerm={company}'
                    ) as response:

                        html = response.read().decode()
                        d = json.loads(html)

                        company_dict[company]["Symbol"] = d['items'][0][
                            'symbol']

                except urllib.error.HTTPError as error:
                    utils.write_to_log(f'Error opening URL: {error}')

            # Gets initial share price
            if company_dict[company]["Initial-share-price"] == 1:
                yahoo = Share(company_dict[company]["Symbol"])
                share = yahoo.get_price()
                company_dict[company]["Initial-share-price"] = float(share)
                company_dict[company]["Current-share-price"] = float(share)

        utils.write_to_json(MONITOR, company_dict)
Exemplo n.º 5
0
def add_question_ids(infile, subject_metadata):
    question_data = open_json(infile)
    max_q = 0
    for q_id in question_data:
        subjects = question_data[q_id]['subjects']
        new_subject_map = [subject_metadata[d]['new_id'] for d in subjects]
        child_subjects = []
        for d1 in subjects:
            is_ok = True
            for d2 in subjects:
                if d1 == d2:
                    continue
                if d1 in subject_metadata[d2]['parents']:
                    is_ok = False
                    break
            if is_ok:
                child_subjects.append(d1)
        question_data[q_id]['new_sub_map'] = new_subject_map
        child_subject_map = [
            subject_metadata[d]['new_id'] for d in child_subjects
        ]
        question_data[q_id]['child_map'] = child_subject_map
        question_data[q_id]['childs'] = child_subjects
        child_whole_map = []
        for child in child_subjects:
            parent = subject_metadata[child]['parents']
            parent = [d for d in parent if d]
            parent = [subject_metadata[d]['new_id'] for d in parent]
            child_whole_map.append(parent)
        question_data[q_id]['child_whole_map'] = child_whole_map
        max_q = max(len(child_whole_map), max_q)

    print(max_q)
    dump_json(infile, question_data)
Exemplo n.º 6
0
def add_false_examples(set,target):
    sentences = []
    for s in [x[1] for x in os.walk(config["DIRECTORY_DATASET"])][0]:
        if s!="www.ebay.com" and s!="www.alibaba.com":
            directory_source = config["DIRECTORY_DATASET"] + s
            files = os.listdir(directory_source)
            print(directory_source+"...")
            for filename in [file for file in files if file.endswith(".json")]:
                js = utils.open_json(filename,s)
                sentence = []
                for attribute in js:
                    if d.get_predicate_name(attribute,s,False)[0]==target and isinstance(js[attribute],str) and js[attribute]!="Black":
                        for token in utils.tokenizer(attribute):
                            sentence.append((token,"O"))
                        sentence.append(("ENDNAME","O"))
                        for token in utils.tokenizer(js[attribute]):
                            sentence.append((token,"O"))
                        sentence.append(("ENDVALUE","O"))
                sentences.append(sentence)
    with open("dataset/"+set+"_set.txt","a") as f:
        for sentence in sentences:
            for (token,tag) in sentence:
                f.write(token+"\t"+tag+"\n")
            f.write("\n")
    f.close()
Exemplo n.º 7
0
def main():
    # Get the contents of the config JSON
    jcontents = utils.open_json(JSONPATH)
    auth = (USERNAME, PASSWORD)

    # Make an empty array to store the client objects
    client_objects = []

    # Iterate through the JSON, and make an object for each client
    for json_config in jcontents["technician"]["clients"]:
        new_client = Client(json_config)
        client_objects.append(new_client)

    # Connect to the Office 365 inbox, get a list of emails
    inbox = get_inbox()
    emails_list = inbox.from_folder('Monthly Reports').fetch_next(50)  # magic

    # Download all the attachments locally
    # This needs a serious rework, it is very unoptimized!
    for client_obj in client_objects:
        for email in emails_list:
            for report_email in client_obj.client_emails:
                if email.getSubject() == report_email["subject"]:
                    email.fetchAttachments()
                    if email.attachments[0].save("./pdfs/"):
                        client_obj.downloaded_report_count += 1
                    else:
                        raise ValueError("File save error!")

        if client_obj.report_count != client_obj.downloaded_report_count:
            raise ValueError("Missing File: {}".format(client_obj.client_name))

    # Parse each document and make a dict of scores
    for client in client_objects:
        sd, ns = soupy.get_score_dict("./pdfs/" +
                                      client.client_emails[0]["filename"])
        client.set_scores_dict(sd)
        client.set_network_health_score(ns)

    # For each client, generate a report based on the scores.
    for count, client in enumerate(client_objects):
        #if count == 1: # Comment in and set break point for testing.
        #break
        if client.network_score == 0:
            print("NO MANAGED SERVICES FOR {}!".format(client.client_name))
        else:
            e_email = client.build_email()
            print("Sending email for: {}... Status: ".format(
                client.client_name),
                  end="")
            print(client.send_email(e_email, auth))
            sleep(10)

    return 0
Exemplo n.º 8
0
def clean():
    args = get_args_clean()
    import os
    full_path = os.path.abspath(args["json_file"])
    folder = os.path.dirname(full_path)
    all_imgs = utils.open_json(args["json_file"])
    filtered = utils.filter_by_type(os.listdir(folder), "_c.jpg")

    clean_json = []
    for img in all_imgs:
        for one in filtered:
            if utils.equal(img["file_name"], one):
                clean_json.append(img)
                break

    utils.save_json(clean_json, args["outfile"])
Exemplo n.º 9
0
def source_dictionary(source):
    d = dict()
    print("Creating values dictionary for : " + source)
    for filename in os.listdir(config["DIRECTORY_DATASET"] + source):
        js = utils.open_json(filename, source)
        if js != "Not json":
            for attribute in js:
                predicate_name = get_predicate_name(attribute, source,
                                                    False)[0]
                if isinstance(js[attribute],
                              str) and predicate_name != "Not found":
                    for value in re.split('[\(\);,]', js[attribute]):
                        if value.lstrip() and len(value.lstrip()) > 1:
                            d.setdefault(predicate_name,
                                         set()).add(value.lstrip())
    return d
    def minus_days():
        """Takes away a day from the "Days-Left",
           removes from monitor.json if == 0"""

        company_dict = utils.open_json(MONITOR)
        remove = []

        for company in company_dict:
            if company_dict[company]["Days-left"] > 0:
                company_dict[company]["Days-left"] -= 1

            elif company_dict[company]["Days-left"] == 0:
                remove.append(company)

        for company in remove:
            # Do I want to keep a record of all the companies that have been mentioned and their prices???
            # Goes here
            del company_dict[company]

        utils.write_to_json(MONITOR, company_dict)
    def get_current_shares():
        """Gets current shares, compares it to initial, finds difference.
           Returns for output to handle"""

        company_dict = utils.open_json(MONITOR)

        for company in company_dict:
            try:
                yahoo = Share(company_dict[company]["Symbol"])
                yahoo.refresh()
                share = yahoo.get_price()

                company_dict[company]["Current-share-price"] = float(share)
                company_dict[company]["Share-price-list"].append(float(share))

            except ValueError:
                # yahoo.get_price() will return None if an error occurs
                print("Could not add to the Current share/Share price list")

        utils.write_to_json(MONITOR, company_dict)
Exemplo n.º 12
0
def raf_dict(ta, atomic):
    values_dict = dict()
    for s in atomic:
        source, raf_attribute = s.split("__")[0], (
            s.split("__")[1]).split("/")[1]
        print("Creating values dictionary for {0} and attribute {1}".format(
            source, raf_attribute))
        if source in values_dict.keys():
            source_dic = values_dict[source]
        else:
            values_dict.setdefault(source, dict())
            source_dic = dict()
        for filename in os.listdir(config["DIRECTORY_DATASET"] + source):
            js = utils.open_json(filename, source)
            if js != "Not json":
                for attribute in js:
                    if attribute == raf_attribute:
                        if isinstance(js[attribute], list):
                            for val in js[attribute]:
                                for value in re.split(
                                        '(?<!\d)[.](?!\d)|[\(\)\/;,]', val):
                                    if value.lstrip() and len(value.lstrip(
                                    )) > 1 and (value.lower().find(
                                            "nikon", 0, len(value)) == -1):
                                        source_dic.setdefault(ta, set()).add(
                                            value.lstrip())
                        if isinstance(js[attribute], str):
                            for value in re.split(
                                    '(?<!\d)[.](?!\d)|[\(\)\/;,]',
                                    js[attribute]):
                                if value.lstrip() and len(
                                        value.lstrip()) > 1 and (
                                            value.lower().find(
                                                "nikon", 0, len(value)) == -1):
                                    source_dic.setdefault(ta, set()).add(
                                        value.lstrip())
        values_dict[source] = source_dic
    f = open("persistent_files/dizionario.pkl", "wb")
    pickle.dump(values_dict, f)
    f.close()
    def check_for_companies(self):
        """Checks list of companies with Trump's tweet
           seeing if any companies are listed in his tweet.
           Inputs matches into monitor.json"""

        matches = []
        punc = ("!", ",", ".", ":", ";", "@", "?", "(", ")")

        self.tweet = ''.join(
            [letter for letter in self.tweet if letter not in punc]).lower()

        with open(COMPANIES) as f:
            companies = [line.strip() for line in f]

        for word in self.tweet.split():
            # Binary search for word
            if utils.find(companies, word):
                matches.append(word)

        company_dict = utils.open_json(MONITOR)
        comp_d = {}

        # Information that is needed by get_initial/current
        for company in matches:
            comp_d[company] = {}
            comp_d[company]["Date-mentioned"] = "{:%d-%m-%Y %H:%M:%S}".format(
                datetime.datetime.now())
            comp_d[company]["Mentioned by"] = self.handle
            comp_d[company]["Tweet"] = self.original_tweet
            comp_d[company]["Days-left"] = 7
            comp_d[company]["Symbol"] = "unknown"
            comp_d[company]["Initial-share-price"] = 1
            comp_d[company]["Current-share-price"] = 1
            comp_d[company]["Share-price-list"] = []

        company_dict.update(comp_d)
        utils.write_to_json(MONITOR, company_dict)

        return matches
def convert_zero_similarities_to_random_retrieval(lang):
    ls_features = ['sim_bert', 'sim_entity', 'sim_obj', 'sim_loc', 'sim_scene', 'sim_avg_text', 'sim_avg_visual',
                   'sim_avg_total']

    similarity_measures = open_json('similarity/' + lang + '.json')

    for (key, sims) in similarity_measures.items():

        for feat in ls_features:

            min_val_sim = 1

            for s in sims['similarity']:
                if s[feat] < min_val_sim:
                    if s[feat] != 0:
                        min_val_sim = s[feat]
            if min_val_sim == 1:  # this means min_val=0
                min_val_sim = 0.01

            for s in sims['similarity']:
                sfeat = s[feat]
                if sfeat == 0.0:
                    s[feat] = np.random.random_sample() * min_val_sim
    save_file('similarity/' + lang + '_random_retrieval_replaced_by_zero.json', similarity_measures)
Exemplo n.º 15
0
def featurize():
    global student_metadata, df, question_metadata
    TRAIN_DATA = 'public_data/train_data/train_task_1_2.csv'


    TEST_DATA = 'starter_kit/submission_templates/submission_task_1_2.csv'
    ANSWER_DATA = 'public_data/metadata/answer_metadata_task_1_2.csv'
    QUESTION_SUBJECTS = 'public_data/personal_data/question_metadata_task_1_2.json'
    STUDENT_FEATURES = 'public_data/personal_data/student_metadata_task_1_2.json'

    question_metadata = open_json(QUESTION_SUBJECTS)  # child map
    student_metadata = open_json(STUDENT_FEATURES)

    #AnswerId,DateAnswered,Confidence,GroupId,QuizId,SchemeOfWorkId
    answer_df = pd.read_csv(ANSWER_DATA)[
        ['AnswerId', 'DateAnswered', 'Confidence', 'GroupId', 'QuizId']]
    answer_df['Confidence'].fillna((answer_df['Confidence'].mean()), inplace=True)
    answer_df['DateAnswered'] = pd.to_datetime(
        answer_df['DateAnswered'], errors='coerce')
    print(answer_df.shape)

    #QuestionId,UserId,AnswerId,IsCorrect,CorrectAnswer,AnswerValue
    train_df = pd.read_csv(TRAIN_DATA)
    train_df['TestMask'] = 1
    print('train_df shape: ', train_df.shape)
    #print(train_df.isnull().values.any())
    correct_df = train_df[['QuestionId', 'CorrectAnswer']
                        ].drop_duplicates('QuestionId')
    print('correct qs shape: ', correct_df.shape)

    #,QuestionId,UserId,AnswerId
    test_df = pd.read_csv(TEST_DATA)[['QuestionId', 'UserId', 'AnswerId']]
    test_df = pd.merge(test_df, correct_df, on='QuestionId')
    test_df['IsCorrect'] = 0
    test_df['TestMask'] = 0
    test_df['AnswerValue'] = 1
    print(test_df.shape)
    #print(test_df.isnull().values.any())
    #


    #get answer id info for train
    train_merged_df = pd.merge(train_df, answer_df, on='AnswerId')
    print(train_merged_df.shape)
    print(train_merged_df.isnull().values.any())

    #get answer id info for test
    test_merged_df = pd.merge(test_df, answer_df, on='AnswerId')
    print(test_merged_df.shape)
    print(test_merged_df.isnull().values.any())


    df = pd.concat([train_merged_df, test_merged_df],
                ignore_index=True, sort=False)
    print(df.shape)

    user_ids = df['UserId'].unique()
    user_data = []
    start_time = time.time()
    with  Pool(30) as p:
        user_data = p.map(f, user_ids)
    end_time = time.time()
    print(end_time-start_time)
        

    print('no of user: '******'public_data/converted_datasets/test_1_2.json', user_data)
#!/usr/bin/env python3

import json
import tweepy
import datetime
import smtplib
import time
import utils
import schedule
import urllib.request
import urllib.error

from yahoo_finance import Share

config = utils.open_json("./Files/config.json")

# File names
LOG = config["Files"]["Log"]
EMAILS = config["Files"]["Emails"]
TWITTER_NAMES = config["Files"]["Twitter"]
COMPANIES = config["Files"]["Companies"]
GENERIC = config["Files"]["Generic"]
MONITOR = config["Files"]["CompaniesToMonitor"]

# Boolean value
INITIAL_START = config["Files"]["InitialStart"]

# Email/Password info
EMAIL = config["Email-Info"]["Email"]
PASSWORD = config["Email-Info"]["Password"]
Exemplo n.º 17
0
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    torch.manual_seed(seedNum)
    np.random.seed(seedNum)
    file_name = [
        params.task, params.model, params.hidden_dim, params.question_dim,
        params.lr, params.dropout, params.default_dim, params.valid_prob
    ]
    if params.dash:
        file_name.append(params.dash)
        file_name.append(params.bidirectional)
    if params.model == 'attn':
        file_name.append(params.head)
    if params.dash:
        answer_filename = 'data_task_1_2/answer_dash_metadata_task_1_2_extra.json'
        answer_meta = open_json(answer_filename)
    else:
        answer_meta = None

    train_data = open_json('data_task_1_2/data_1_2.json')
    for d in train_data:
        d['valid_mask'] = [
            0 if np.random.rand() < params.valid_prob and ds else 1
            for ds in d['test_mask']
        ]

    train_dataset = LSTMDataset(train_data, answer_meta=answer_meta)
    collate_fn = lstm_collate(is_dash=params.dash == 1)
    num_workers = 2
    bs = params.batch_size
    train_loader = torch.utils.data.DataLoader(train_dataset,
Exemplo n.º 18
0
                    type=str,
                    default=random_flower,
                    help='Path to image')
parser.add_argument('--checkpoint',
                    type=str,
                    default='checkpoint.pth',
                    help='Path to checkpoint')
parser.add_argument('--topk',
                    type=int,
                    default=5,
                    help='Top N Classes and Probabilities')
parser.add_argument('--json',
                    type=str,
                    default='cat_to_name.json',
                    help='class_to_name json file')
parser.add_argument('--gpu', type=str, default='cuda', help='GPU or CPU')
arg, unknown = parser.parse_known_args()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class_name = open_json(arg.json)

model = load_checkpoint(arg.checkpoint)

checkpoint = torch.load(arg.checkpoint)

image = process_image(arg.image_dir)

probs, classes = predict(random_flower, model)

prediction_test(class_name, classes, probs, random_folder)
Exemplo n.º 19
0
import discord
import random

from utils import get_value_from_json, open_json

# Read secrets
client_secret = get_value_from_json('ClientSecret', './secrets.json')
client = discord.Client()

# Read image dictionary
image_dictionary = open_json('./image_dictionary.json')


# Events
@client.event
async def on_ready():
    print('Bot successfully logged in as: {0.user}'.format(client))


@client.event
async def on_message(message):

    # Checks if user has typed one of the keywords in the image dictionary
    for key in image_dictionary:
        if (message.content.upper() == key):
            with open(image_dictionary[key], 'rb') as image:
                await message.channel.send(file=discord.File(image))
                break

    # Coinflip simulation
    if (message.content.upper() == '!COINFLIP'):
    file_name = [
        params.model, params.hidden_dim, params.question_dim, params.lr,
        params.dropout, params.mix_active, params.concat_hidden_dim,
        params.concat_dim
    ]
    file_name = [str(d) for d in file_name]
    params.file_name = '_'.join(file_name)
    seedNum = 221
    np.random.seed(seedNum)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    torch.manual_seed(seedNum)
    np.random.seed(seedNum)
    random.seed(seedNum)

    question_meta = open_json('data_task_4/question_metadata_task_3_4.json')
    train_data_path = os.path.normpath('data_task_4/train_task_4.csv')
    valid_data_path = os.path.normpath('data_task_4/valid_task_4.csv')
    valid_df = pd.read_csv(valid_data_path)
    valid_data = pivot_df(valid_df,
                          'AnswerValue')  #n_student, 948:    1 to 4 and -1
    valid_binary_data = pivot_df(valid_df,
                                 'IsCorrect')  # n_student, 948: 1 to 0 and -1
    train_df = pd.read_csv(train_data_path)
    # n_student, 948:    1 to 4 and -1
    train_data = pivot_df(train_df, 'AnswerValue')
    # n_student, 948: 1 to 0 and -1
    train_binary_data = pivot_df(train_df, 'IsCorrect')
    train_dataset = FFDataset(train_data, train_binary_data, question_meta)
    valid_dataset = FFDataset(valid_data, valid_binary_data, question_meta)
    num_workers = 3
Exemplo n.º 21
0
import os
import utils_arcgis_gender
from bs4 import BeautifulSoup
import json
import copy

from arcgis.gis import GIS

# ====================================
# Upload to ArcGIS into staging folder
# ====================================

release = '2021.01'

# Layer info template
layer_info = utils.open_json('utilities/layerinfo.json')
# print(layer_info)

layer_info_properties = list(layer_info.keys())

# minset_catalog
minset_catalog = utils.open_json('master_data/minset_catalog.json')

main_fields = utils.select_dict(
    utils.tsv2dictlist('master_data/ts_catalog_edited.csv'),
    {'main_statistics_field': '1'})
# print(main_fields[0])

column_aliases = utils.open_json('master_data/column_aliases_edited.json')

# print(column_aliases[0])
Exemplo n.º 22
0
def merge():
    args = get_args_merge()
    jsons = []
    for j in args["json_file"]:
        jsons.extend(utils.open_json(j))
    utils.save_json(jsons, args["outfile"])