示例#1
0
def train_and_evaluate_RNN(batch_size, lstm_size):
    with open('./datasets/w2v/vocab-raw.txt') as f:
        vocab_size = len(f.read().splitlines())
    tf.set_random_seed(2021)
    tf.reset_default_graph()
    rnn = RNN(vocab_size=vocab_size,
              embedding_size=300,
              lstm_size=lstm_size,
              batch_size=batch_size)
    predicted_labels, loss = rnn.build_graph()
    train_op = rnn.trainer(loss=loss, learning_rate=0.01)
    with tf.Session() as sess:
        train_data_reader = DataReader(
            data_path='./datasets/w2v/20news-trainencoded.txt',
            batch_size=batch_size,
            vocab_size=vocab_size)
        test_data_reader = DataReader(
            data_path='./datasets/w2v/20news-testencoded.txt',
            batch_size=batch_size,
            vocab_size=vocab_size)
        step = 0
        MAX_STEP = 5000
        sess.run(tf.global_variables_initializer())
        while step < MAX_STEP:
            next_train_batch = train_data_reader.next_batch()
            train_data, train_labels, train_sentence_lengths = next_train_batch
            plabels_eval, loss_eval, _ = sess.run(
                [predicted_labels, loss, train_op],
                feed_dict={
                    rnn._data: train_data,
                    rnn._labels: train_labels,
                    rnn._sentence_lengths: train_sentence_lengths,
                },
            )
            step += 1
            if step % 50 == 0:
                print("Step: {}, Loss: {}".format(str(step), str(loss_eval)))

            if train_data_reader._batch_id == 0:
                num_true_preds = 0
                while True:
                    next_test_batch = test_data_reader.next_batch()
                    test_data, test_labels, test_sentence_lengths = next_test_batch
                    test_plabels_eval = sess.run(predicted_labels,
                                                 feed_dict={
                                                     rnn._data:
                                                     test_data,
                                                     rnn._labels:
                                                     test_labels,
                                                     rnn._sentence_lengths:
                                                     test_sentence_lengths,
                                                 })
                    matches = np.equal(test_plabels_eval, test_labels)
                    num_true_preds += np.sum(matches.astype(float))

                    if test_data_reader._batch_id == 0:
                        break
                print('Epoch:', train_data_reader._num_epoch)
                print('Accuracy on test data:',
                      num_true_preds * 100 / len(test_data_reader._data))
示例#2
0
def trainModel(experiment, testRun, setTarg):
    print("Training model ...")

    datasetTrain = DataReader(experiment.data["path"])
    datasetTrain.setDatasetClassic("train", experiment.data["feature"],
                                   experiment.data["annotation"])
    if setTarg == "MeanStd": datasetTrain.setTargetMeanStd()
    if testRun: datasetTrain = keepOne(datasetTrain)
    datasetDev = DataReader(experiment.data["path"])
    datasetDev.setDatasetClassic("dev", experiment.data["feature"],
                                 experiment.data["annotation"])
    if setTarg == "MeanStd": datasetDev.setTargetMeanStd()
    if testRun: datasetDev = keepOne(datasetDev)
    if testRun: experiment.maxEpoch = 1
    inp, tar = datasetDev[0]
    experiment.inputDim = inp.shape[1]
    experiment.outputDim = tar.shape[1]
    # print("experiment.outputDim", tar.shape)
    wrapper = getWrapper(experiment)
    wrapper.trainModel(datasetTrain,
                       datasetDev,
                       batchSize=experiment.batchSize,
                       maxEpoch=experiment.maxEpoch,
                       loadBefore=True,
                       tolerance=experiment.tolerance,
                       minForTolerance=experiment.minForTolerance)
    wrapper.saveLogToCSV()
示例#3
0
def generate_templates(inputfilepath, series, filename_noise_psds, vec_r_lim,
                       mat_theta_lim, filename_templates):
    print('generate_templates')

    E_min = 0.
    E_max = 1E12

    V = get_noise_psds(filename_noise_psds)

    gen = TemplateGeneratorNxM(V, calc_r, calc_theta, E_min, E_max, vec_r_lim,
                               mat_theta_lim)

    dr = DataReader()
    dr.OpenFile(inputfilepath, series, 0)

    event_count = 0

    while dr.LoadEvent(trigger='Trigger'):
        gen.IncludeEvent(dr.GetTraces())
        event_count += 1

        if event_count % STEP_MONITOR == 1:
            print('Event', event_count)

    dr.CloseFile()

    templates = gen.GetTemplates()

    if type(templates) == list:
        map_bins_part = gen.GetMapBinsPart()
        save_templates_nxm(templates, E_min, E_max, map_bins_part,
                           filename_templates)

    gen.Draw(PATH + '/png')
示例#4
0
def main(argv):
  try:
    rdr = DataReader(argv[1])
    level = int(argv[2])
  except:
    print "usage: python Extractor.py <bson_file> <level>"
  print level
  with open('catid_to_levelid.json', 'r') as ldict_file:
    cat_dict = json.load(ldict_file)
  with open('l%d_dict.json' % level, 'r') as ldict_file:
    l_dict = json.load(ldict_file)
  category_cnts = [0 for i in xrange(len(l_dict))]
  category_img_cnts = [0 for i in xrange(len(l_dict))]
  cnt = 1
  while 1:
    try:
      tup = rdr.getOne()
      if cnt % 10000 == 0:
        print cnt
      cnt += 1
      catid = tup[0]
      cat_id = cat_dict[str(catid)][level-1]
      category_img_cnts[cat_id] += len(tup[1])
    except StopIteration:
      print "none"
      break
  with open("l%d_histo.json" % level, 'r') as histo_file:
    json.dump(histo_file, category_img_cnts)
示例#5
0
    def _readData(self, dataPath, dataType):
        def findURL(dataPath, file):
            from bs4 import BeautifulSoup
            content = open(dataPath + file, 'r', encoding='utf-8').read()
            soup = BeautifulSoup(content, 'html.parser')
            ref = soup.find('link').get('href')
            return ref.split('/')[-1].lower().replace('.html', '')

        dataReader = DataReader(dataPath, dataType, self.project)
        numOfFiles = dataReader.getNumberOfFiles()
        for i in range(numOfFiles):
            _file, context = dataReader.readData(i)
            if dataType == 'UserManual':
                manual = Manual()
                manual.id = i
                manual.name = _file.split('.')[0].lower()
                if self.project == 'komodo':
                    manual.url = findURL(dataPath, _file)
                manual.sentences = context
                self.manuals.append(manual)
            elif dataType == 'IssueReport':
                issue = Issue()
                issue.number = _file
                issue.html = context
                self.issues.append(issue)
示例#6
0
def main():
    # Delete portfolio.db if it exists
    os.remove("portfolio.db")
    print("portfolio.db removed successfully")

    # Get data paths
    stock_filename = "data_stocks.csv"
    bond_filename = "data_bonds.csv"

    # Initialize dataReader
    dataReader = DataReader(stock_filename, bond_filename)

    # Get stock and bond data
    stockData = dataReader.getStockData()
    bondData = dataReader.getBondData()

    # Initialize an investor
    investor = Investor("Bob", "Smith", "123 Fake St, Denver, CO 80221",
                        "303.777.1234")

    # Initialize a portfolio
    portfolio = Portfolio(investor)

    # Add the stocks and bonds to the portfolio
    portfolio.addStocks(stockData)
    portfolio.addBonds(bondData)

    # Initialize a report
    report = Report(portfolio)

    # Print the report
    report.print()
示例#7
0
 def identifier(self):
     #for each data folder
     for i in range(0, len(self.data_list)):
         data_folder = self.data_dir + self.data_list[i] + '/estimated/txt/'
         #reading data from txt files
         dff = DataReader(data_folder)
         data, time = dff.reader()
         #calculation of the number of frame to individuate a stance fase
         frameTime = float(time[1]) - float(time[0])
         framesRange = round(0.15 / frameTime)
         #extracting right ankle coordinates
         lankle = []
         for k in range(0, len(data[14])):
             lankle.append([data[14][k][0], data[14][k][1]])
         #deburrings elimination
         lankle = self.deburringsElimination(lankle, framesRange)
         #individuation of points in which a stance phase begins
         stanceId = StancesIdentifier(lankle, framesRange)
         print("\n\n\t" + self.data_list[i])
         stanceBeginnings = stanceId.identifier()
         #identification of the beginning and the end of each stride
         strides = self.stridesIdentification(stanceBeginnings)
         #store the information about each stride
         ss = StridesStorage(self.data_list[i], data, time, strides)
         ss.storage()
示例#8
0
    def __init__(self,
                 seg_type='jieba',
                 data=DEV_DATA,
                 fill='NULL',
                 needfill=True):
        self.base_fname = data.split('.')[0]
        self.word_file = data.split('.')[0] + '.word'
        self.bin_file = data.split('.')[0] + '.bin'
        self.seg_file = data.split('.')[0] + '.seg'
        self.vec_file = data.split('.')[0] + '.vec'

        self.seg = Segmentation(seg_type)
        self.dr = DataReader(data)
        self.dr.filt()

        self.qu = []
        self.wr_ans = []
        self.cor_ans = []

        self.qu_vec = []
        self.wr_ans_vec = []
        self.cor_ans_vec = []

        self.max_len = 0
        self.fill_len = 0
        self.fill = fill
        # self.needfill = needfill

        self.freq = [0] * 1050

        self.stop_word = self._load_stop_word()
示例#9
0
    def __init__(self,
                 ex_type,
                 coin,
                 min_tick_size,
                 start_time,
                 end_time,
                 delay=timedelta(milliseconds=100),
                 path='./'):
        ''' Initiate Exchange of specific ex_type
        Inputs:
            path, request, ex_type: used for getting data, see DataReader
            delay: set to 100ms for delay of trading operations
            start_time, end_time: datetime object specifying start and end of backtest
        '''

        start_date = start_time.date()
        end_date = end_time.date()

        # read data for specific coin type, firstly try read pickle
        # if pickle not exist, try request from web
        try:
            print('Reading data from {}'.format('{}data/{}.pkl'.format(
                path, coin)))
            self.data = pd.read_pickle('{}data/{}.pkl'.format(
                path, coin))[start_time:end_time]
        except:
            print(
                'Pickle file does not exist, trying to request data using DataReader:'
            )
            dr = DataReader(ex_type)
            dr.read_data(start_date, end_date, path=path, request=True)
            try:
                print('Request completed, now reading data from {}'.format(
                    '{}data/{}.pkl'.format(path, coin)))
                self.data = pd.read_pickle('{}data/{}.pkl'.format(
                    path, coin))[start_time:end_time]
            except:
                raise Exception('Data for {} not avaliable from {}'.format(
                    coin, ex_type))

        # orders: strategies submitted orders that are not yet excuted, currently 1d list: for 1 strategy
        self.orders = {'Buy': [], 'Sell': []}
        self.order_prices = []

        # subscriber: now for convenience just kept 1 strategy, might change later to list of strategies,
        self.subscriber = None

        # current_time: current time point of the exchange
        self.current_time = start_time

        self.DELAY = delay
        self.end_time = end_time
        self.last_sell_price = np.inf
        self.last_buy_price = 0

        self.coin = coin
        self.timestamps = self.data.index.to_list()
        # heap of (time, order, coin)
        self.queue = []
        self.min_tick_size = min_tick_size
示例#10
0
 def getSimilarityScores(self, testingIcon, numOfNeighbours, min_libs, testingIcons):
     numOfNeighbours = 50
     score_dict = {}
     a = self.datas[testingIcon]
     sha256_a = a[5]
     dataReader = DataReader(Database_name)
     filtering_res = dataReader.query_detail(sha256_a, a[3], a[10], a[6], testingIcons, self.if_apk)
     for item in filtering_res:
         b = self.datas[item]
         sha256_b = b[5]
         libs = b[-2].split(";")
         if len(libs) < min_libs:
             continue
         # calculate the simScore of testingIcon
         simCounter = SimCounter()
         score = simCounter.image_similarity_score(a[8], sha256_a, b[8], sha256_b)
         score_dict[item] = score
     score_lst = dict2sortedlist(score_dict)[:numOfNeighbours]
     topn = {}
     flag = 0
     if len(score_lst) >= numOfNeighbours:
         for item in score_lst:
             if item[1]: flag = 1
             topn[item[0]] = item[1]
         if flag:
             return topn
         else:
             return None
     else:
         return None
示例#11
0
def make_readable(locale_path, recipes):
    end_pattern = r'=(?P<new_name>.*)'
    d = DataReader(locale_path)
    remove_recipes = []
    for entry in recipes:
        pattern = entry.name + end_pattern
        match = re.search(pattern, d.content)
        if match:
            entry.name = match.group("new_name")
        else:
            remove_recipes.append(entry)
            continue
        remove_resources = []
        for pair in entry.get_resources().items():
            pattern = pair[0].name + end_pattern
            match = re.search(pattern, d.content)
            if match:
                pair[0].name = match.group("new_name")
            else:
                remove_resources.append(pair[0])
                continue
        for resource in remove_resources:
            entry.remove_resource(resource)
        if entry.number_of_resources() == 0:
            remove_recipes.append(entry)
    for entry in remove_recipes:
        recipes.remove(entry)
    return recipes
示例#12
0
def get_tweets_labels(tweet_file, labels_file,tests_file):
#Simply read in data
    data_reader = DataReader(tweet_file, labels_file,tests_file)
    tweets = data_reader.read_tweets()
    labels = data_reader.read_labels()
    tests = data_reader.read_tests()
    return tweets, labels, tests
    def getSimilarityScores(self, testingIcon, numOfNeighbours, min_libs, testingIcons):

        numOfNeighbours = 50

        score_dict = {}
        a = self.datas[testingIcon]
        sha256 = a[5]
        dataReader = DataReader(Database_name)
        filtering_res = dataReader.query_detail(sha256, a[3], a[10], a[6], testingIcons, self.if_apk)
        for item in filtering_res:
            b = self.datas[item]
            libs = b[-2].split(";")
            if len(libs) < min_libs:
                continue
            # calculate the simScore of testingIcon
            simCounter = SimCounter()
            score1 = simCounter.edit_distance(a[1], b[1])
            score2 = simCounter.edit_distance(a[2], b[2])
            score3 = simCounter.edit_distance(a[4], b[4])
            score_dict[item] = self.alpha * score1 + self.beta * score2 + self.gamma * score3
        score_lst = dict2sortedlist(score_dict)[:numOfNeighbours]
        print(score_lst)
        topn = {}
        flag = 0
        if len(score_lst) >= numOfNeighbours:
            for item in score_lst:
                if item[1]: flag = 1
                topn[item[0]] = item[1]
            if flag:
                return topn
            else:
                return None
        else:
            return None
def run(args):
    # Load data
    data = DataReader(args.input)

    # Determine data for validation
    data.loadFeatures = False
    isVal = np.array([x['Validation'] for x in data])
    isTrn = np.logical_not(isVal)
    hasEmo = np.array([not np.any(np.isnan(x['Emotion'])) for x in data])
    isTrnEmo = np.logical_and(isTrn, hasEmo)

    # Create samplers
    bsVal = BatchSampler(SubsetSampler(np.where(isVal)[0]), args.batchSize,
                         False)
    dlVal = DataLoader(data, collate_fn=data.collate, batch_sampler=bsVal)
    bsTrn = BatchReplace(np.where(isTrn)[0], args.batchSize)
    dlTrn = DataLoader(data, collate_fn=data.collate, batch_sampler=bsTrn)
    itTrn = iter(dlTrn)
    bsTrnEmo = BatchReplace(np.where(isTrnEmo)[0], args.batchSize)
    dlTrnEmo = DataLoader(data,
                          collate_fn=data.collate,
                          batch_sampler=bsTrnEmo)
    itTrnEmo = iter(dlTrnEmo)

    # Get output weights using train and val data
    allEmo = np.stack([x['Emotion'] for x in data])
    allDs = np.stack([x['Dataset'] for x in data])
    wEmo = np.nansum(allEmo, axis=0)
    wDs = np.nansum(allDs, axis=0)
    wEmo = np.sum(wEmo) / (wEmo * len(wEmo))
    wDs = (np.sum(wDs) - wDs) / wDs
    data.loadFeatures = True

    # Setup model
    model = Maddog(data.featLen, wDs, wEmo, args)

    # Get validation ground truth
    valActual = np.concatenate([x['Emotion'] for x in dlVal])
    valActual = np.argmax(valActual, axis=-1)

    # Loop through all epochs
    bestUar = None
    for ep in range(args.maxEpochs):
        # Train for one epoch
        model.Fit(itTrn, itTrnEmo)

        # Predict VAL
        valPred = np.argmax(model.Predict(dlVal), axis=-1)
        valUar = calcUar(valActual, valPred)

        # Print UAR
        if args.verbose:
            print('Epoch', ep, '  UAR:', valUar)

        # Check for best val UAR
        if bestUar is None or valUar > bestUar:
            bestUar = valUar
            model.Save(args.model)
 def __init__(self):
     self.dr = DataReader()
     
     self.score = self.dr.get_score_data()
     self.cb_matrix = None
     self.interacted_cb_matrix = None
     self.cl_matrix = None
     self.ensemble_matrix = None
     self.tech_keyword_matrix = self.get_tech_keyword_matrix()
示例#16
0
def test_data_reader_assigned_duration():
    """Tests the DataReader overrides the default value if a duration is
     assigned during construction

    Returns
    -------
    None
    """
    dr = DataReader("test_file.csv", (0, 2))
    assert dr.duration == (0, 2)
示例#17
0
def test_read_csv_voltage():
    """Tests the read_csv function of the data reader for reading in the
    voltage numpy array from the csv file.

    Returns
    -------
    None
    """
    dr = DataReader("test_file.csv")
    expected_voltage = np.array([10, 15, 20])
    assert np.array_equal(dr.output_dict["voltage"], expected_voltage)
示例#18
0
    def __init__(self, graph_file_path):
        self.graph_file_path = graph_file_path

        dr = DataReader(graph_file_path)
        gr = dr.readGraphFile()
        self.graph = dr.readGraphFile()
        self.num_nodes = len(gr)
        self.degree = [len(l) for l in gr]
        self.community_record = None
        self.std_Q = None
        self.P_Mat = None
示例#19
0
def test_validate_csv_file_bad_file_extension():
    """Tests the validate_csv_file function for raising a ValueError when
    there is a bad file extension (i.e. one that is not .csv)

    Returns
    -------
    None
    """
    with pytest.raises(ValueError):
        dr = DataReader("BadExtensionTest.txt")
        dr.validate_csv_file("BadExtensionTest.txt")
示例#20
0
def test_read_csv_time():
    """Tests the read_csv function of the data reader for reading in the
    time numpy array from the csv file.

    Returns
    -------
    None
    """
    dr = DataReader("test_file.csv")
    expected_time = np.array([0, 1, 2])
    assert np.array_equal(dr.output_dict["time"], expected_time)
示例#21
0
def test_data_reader_default_duration():
    """Tests that data reader assigns a default value to the duration parameter
     if one is not assigned during construction. Default value should be min
     max values from the time array.

    Returns
    -------
    None
    """
    dr = DataReader("test_file.csv")
    assert dr.duration == (0, 2)
示例#22
0
def main():
    print("Running on BIO-NLP data\n\n")

    home_dir = "../dl4nlp"

    # The hyper-parameters of the word embedding trained model
    window_size = 5
    embed_vector_size = 50
    min_count = 1000

    # Define the data files
    data_folder = os.path.join("..\\", "sample_data", "drugs_and_diseases")
    train_file_path = os.path.join(data_folder, "Drug_and_Disease_train.txt")
    test_file_path = os.path.join(data_folder, "Drug_and_Disease_test.txt")
    data_file_path = os.path.join(data_folder, "unlabeled_test_sample.txt")
    resources_pickle_file = os.path.join(home_dir, "models", "resources.pkl")
    embedding_pickle_file = os.path.join(home_dir, "models", "w2vmodel_pubmed_vs_{}_ws_{}_mc_{}.pkl" \
            .format(embed_vector_size, window_size, min_count))
    print("embedding_pickle_file= {}".format(embedding_pickle_file))

    # The hyperparameters of the LSTM trained model
    #network_type= 'unidirectional'
    network_type = 'bidirectional'
    num_layers = 2
    num_hidden_units = 150
    num_epochs = 10
    batch_size = 50
    dropout = 0.2
    reg_alpha = 0.0

    model_file_path = os.path.join(home_dir,'models','lstm_{}_model_units_{}_lyrs_{}_epchs_{}_vs_{}_ws_{}_mc_{}.h5'.\
                  format(network_type, num_hidden_units, num_layers,  num_epochs, embed_vector_size, window_size, min_count))

    print("Training the model... num_epochs = {}, num_layers = {}, num_hidden_units = {}".\
            format(num_epochs, num_layers,num_hidden_units))

    reader = DataReader()

    entityExtractor = EntityExtractor(reader, embedding_pickle_file)

    entityExtractor.train (train_file_path, \
        output_resources_pickle_file = resources_pickle_file, \
        network_type = network_type, \
        num_epochs = num_epochs, \
        batch_size = batch_size, \
        dropout = dropout, \
        reg_alpha = reg_alpha, \
        num_hidden_units = num_hidden_units, \
        num_layers = num_layers)

    #Save the model
    entityExtractor.save(model_file_path)

    print("Done.")
示例#23
0
def main():
    # Get data path
    filename = "allStocks.json"

    # Initialize dataReader and get data
    dataReader = DataReader(filename)
    data = dataReader.getData()

    # Initialize a report and call print
    report = Report(data)
    report.print()
示例#24
0
def test_validate_csv_file_bad_file_name():
    """Tests the validate_csv_file for raising an exception when a
    file that
    does not exist is inputted for the csv_file_name argument in DataReader.

    Returns
    -------
    None
    """
    with pytest.raises(FileNotFoundError):
        dr = DataReader("random_file_name.csv")
        dr.validate_csv_file("random_file_name.csv")
 def __init__(self):
     self.dr = DataReader()
     
     self.score = self.dr.get_score_data()
     self.tech_keyword_matrix = self.get_tech_keyword_matrix() 
     
     self.tech_keyword_sim_matrix = None
     self.item_based_sim_matrix = None
     self.ensemble_sim_matrix = None
     
     # Auxiliary variable
     self.tech_id_set_for_item_based = None
示例#26
0
    def cluster_on_graph(self, rwfile):
        if not os.path.exists(rwfile):
            print("no random walk file provided, exit...")
            exit(1)

        dreader = DataReader(rwfile)
        rwList = dreader.readRWFile()

        Clusters = [[x] for x in range(self.graph_file.num_nodes)]

        self.community_cluster = Clusters
        return Clusters
示例#27
0
def main():
    logging.basicConfig(filename="HRM_logs.txt",
                        format='%(asctime)s %(levelname)s:%(message)s',
                        datefmt='%m/%d/%Y %I:%M:%S %p')

    file_name = get_file_name()

    wants_duration = get_wants_duration()
    try:
        if wants_duration:
            duration = get_duration()
            dr = DataReader(file_name, duration)
        else:
            dr = DataReader(file_name)

        hrm = HRM_Processor(dr)
        dw = DataWriter(hrm)
    except (FileNotFoundError, ValueError, TypeError):
        logging.info("Driver script terminated unsuccessfully.")

    logging.info("Successful termination of HRM_Driver")
示例#28
0
def testModel(experiment, testRun, setTarg):
    # print("Testing model ...")
    dataset = DataReader(experiment.data["path"])
    dataset.setDatasetClassic("test", experiment.data["feature"],
                              experiment.data["annotation"])
    if setTarg == "MeanStd": dataset.setTargetMeanStd()
    inp, tar = dataset[0]
    experiment.inputDim = inp.shape[1]
    experiment.outputDim = tar.shape[1]

    firstID1 = list(dataset.dataPart.keys())[0]
    firstID2 = list(dataset.dataPart[firstID1]["annotations"])[0]
    headers = dataset.dataPart[firstID1]["annotations"][firstID2]["headers"]
    if setTarg == "MeanStd": headers = ["mean", "std"]
    # print(headers)
    wrapper = getWrapper(experiment, getBest=True)
    modelOutPath = os.path.join(wrapper.savePath, "ouputs")
    if testRun: dataset = keepOne(dataset)
    IDs = dataset.dataPart.keys()
    for key in experiment.evaluation.keys():
        metrics = {}
        for idx, ID in enumerate(IDs):
            savePath = os.path.join(modelOutPath, ID + ".csv")
            outputs = pd.read_csv(savePath).to_numpy()
            targets = dataset.targetReader(ID)
            # print(targets.shape, outputs.shape)
            if idx == 0:
                results = [[] for _ in range(targets.shape[1])]
                # bestresult = 0; bestID = "0"
                for dim in range(targets.shape[1]):
                    metrics[headers[dim]] = {}
            for dim in range(targets.shape[1]):
                output = outputs[:, dim]
                target = targets[:, dim]
                while target.shape[0] > output.shape[0]:
                    output = np.append(output, outputs[-1])
                while target.shape[0] < output.shape[0]:
                    output = outputs[:target.shape[0]].reshape(target.shape[0])
                result = getMetric(target, output, metric=key)
                # if result > bestresult: bestresult=result; bestID = ID
                # print(ID, result, len(output))
                results[dim].append(result)
            printProgressBar(idx + 1,
                             len(IDs),
                             prefix='Testing model with ' + key + ':',
                             suffix='',
                             length="fit")
        for dim in range(targets.shape[1]):
            metrics[headers[dim]]['mean'] = np.mean(np.array(results[dim]))
            metrics[headers[dim]]['std'] = np.std(np.array(results[dim]))
        experiment.evaluation[key] = metrics
    return experiment
示例#29
0
 def eval_one_data(random_seed):
     data_reader = DataReader()
     data = DataObject(data_reader, 1, random_seed=random_seed)
     result = []
     recommender = ItemKNNCFRecommender(data.urm_train)
     recommender.fit(topK = args["topK"], shrink=args["shrink"], similarity=args["similarity"], feature_weighting=args["feature_weighting"])
     for n, users, description in data.urm_train_users_by_type:
         eval, map = MyEvaluator.evaluate_algorithm(data.urm_test, users, recommender, at=10, remove_top=0)
         result.append(map)
     users = data.ids_target_users
     eval, map = MyEvaluator.evaluate_algorithm(data.urm_test, users, recommender, at=10, remove_top=0)
     result.append(map)
     return result
示例#30
0
 def __init__(self, oscars):
     self.Data = DataReader("tmdb-movies.csv")
     self.Data.formatData()
     self.OscarFile = pd.read_csv(oscars)
     self.ActorsDictionary = {}
     self.MovieDF = self.Data.getMovieDF()
     self.Categories = [
         "ACTOR", "ACTRESS", "ACTOR IN A SUPPORTING ROLE",
         "ACTRESS IN A SUPPORTING ROLE", "ACTOR IN A LEADING ROLE",
         "ACTRESS IN A LEADING ROLE"
     ]
     self.OutputData = self.Data.getOutput()
     self.cleanOscarData()