Exemplo n.º 1
0
def p_comparison(p):
    """comparison : cterm EQ cterm
                    | cterm NOTEQ cterm
                    | cterm LT cterm
                    | cterm GT cterm
                    | cterm LTEQ cterm
                    | cterm GTEQ cterm
                    | cterm LIKE STRING_LITERAL
                    | cterm CLIKE STRING_LITERAL
                    | cterm IS NULL
                    | cterm IS NOT NULL"""
    if len(p) == 4:
        if p.slice[2].type == 'EQ':
            p[0] = Comparison(Comparison.EQ, p[1], p[3])
        elif p.slice[2].type == 'NOTEQ':
            p[0] = Comparison(Comparison.NOTEQ, p[1], p[3])
        elif p.slice[2].type == 'LT':
            p[0] = Comparison(Comparison.LT, p[1], p[3])
        elif p.slice[2].type == 'GT':
            p[0] = Comparison(Comparison.GT, p[1], p[3])
        elif p.slice[2].type == 'LTEQ':
            p[0] = Comparison(Comparison.LTEQ, p[1], p[3])
        elif p.slice[2].type == 'GTEQ':
            p[0] = Comparison(Comparison.GTEQ, p[1], p[3])
        elif p.slice[2].type == 'LIKE':
            p[0] = Comparison(Comparison.LIKE, p[1], CompareTerm(p[3]))
        elif p.slice[2].type == 'CLIKE':
            p[0] = Comparison(Comparison.CLIKE, p[1], CompareTerm(p[3]))
        elif p.slice[2].type == 'CLIKE':
            p[0] = Comparison(Comparison.IS_NULL, p[1], None)
    else:
        p[0] = Comparison(Comparison.IS_NOT_NULL, p[1], None)
Exemplo n.º 2
0
 def __init__(self, sigma, wm_c, max_ar, sem_dim):
     Comparison.__init__(self, sigma)
     self.__wm_c = wm_c
     self.__max_ar = max_ar
     self.__sem_dim = sem_dim
     self.__n_search = int(spm.factorial(wm_c))
     self.__create_graph()
Exemplo n.º 3
0
 def test_match(self):
     """Тест1"""
     data = [
         (u'все включено',
          u'скачать все включено сейчас',
          1.0),
         (u'все включено',
          u'All inclusive, или Всё включено',
          1.0),
         (u'все включено',
          u'All inclusive, или Всё включено (2011) » Новое Кино\
           смотреть онлайн скачать бесплатно', 1.0),
         (u'все включено', u'мотреть фильм Все включено онлайн бесплатно\
          без регистрации', 1.0),
         (u'все включено', u'All inclusive или Все включено (2011). Фильм.', 1.0),
         (u'все включено', u'Все включено! (2011) - All inclusive или Все включено! - информация о фильме -  российские фильмы и сериалы - Кино-Театр.РУ', 1.0),
         (u'все включено', u'ВСЕ ВКЛЮЧЕНО 2011 » СМОТРЕТЬ ОНЛАЙН БЕСПЛАТНО', 1.0),
         (u'все включено', u'Фильм All inclusive, или Всё включено смотреть онлайн бесплатно в хорошем качестве', 1.0),
         (u'все включено', u'Все включено Онлайн фильм, Смотреть фильмы онлайн бесплатно, Кино онлайн', 1.0),
         (u'все включено', u'Смотреть онлайн All inclusive, или Всё включено | Комедии | Фильм All inclusive, или Всё включено смотреть бесплатно на Films-Online-net.ru', 1.0),
         (u'все включено', u'Фильм «Все включено»,  Эдуард Радзюкевич,  Михаил Беспало', 1.0),
         (u'все включено', u'Фильм All inclusive, или Всё включено отзывы и рецензия', 1.0),
         (u'все включено', u'ильм All inclusive, или Всё включено смотреть онлайн бесплатно в хорошем hd качестве', 1.0)
     ]
     comparison = Comparison()
     for entry in data:
         val = comparison.match(entry[0], entry[1])
         print entry[0], entry[1], val
         self.assertTrue(val >= entry[2])
Exemplo n.º 4
0
    def compare_all_masters(self):
        comparison_list = []
        for master in self.masters:
            comparison = Comparison(master, self)
            db1 = master.databases[0]['name']
            db2 = self.databases[0]['name']
            print("DB1: {}\tDB2: {}".format(db1, db2))
            master_data = comparison.get_comparison(db1, db2)
            comparison_list.append(master_data)

        slave_dict = self.host_info()
        slave_dict['token'] = self.token
        slave_dict['masters'] = comparison_list

        return slave_dict
Exemplo n.º 5
0
 def compare_books(self, user_id):
     """ Compare books between the authenticated user and another. """
     if not self.session:
         raise GoodreadsSessionError("No authenticated session.")
     data_dict = self.session.get('user/compare/' + user_id,
                                  {'format': 'xml'})
     return Comparison(data_dict['compare'])
 def from_dict(cls, kv):
     comment = kv.get('comment')
     raw_date = kv.get('date')
     if raw_date is None:
         date = None
     else:
         date = datetime.strptime(raw_date, date_format)
     from comparison import Comparison
     return cls(Comparison.from_dict(kv), date, comment)
    def __init__(self, articles):

        #Holds all the articles from the web scraper
        self.articles = articles

        #holds all the pairs of articles that are found to be similar
        self.comparisons = []

        #Holds all the keywords that appeared as matches
        self.keywords = []

        #Check each article's keywords against all the other keywords to find similar articles
        for article in self.articles:
            for otherArticle in self.articles:
                #ignore the same article which will be a perfect match
                if (article.title != otherArticle.title):
                    #Finds the similar keywords between articles
                    simKeys = set(article.keywords) & set(
                        otherArticle.keywords)

                    #We chose 4 by comapring what actual similar articles returned
                    if (len(simKeys) >= 5):
                        #Tracks whether or not we've already found this match of articles before
                        alreadyMatched = False

                        #Check all the comparisons to make sure we haven't already found it
                        for comparison in self.comparisons:
                            if (comparison.getFirstArticle().title
                                    == article.title
                                    or comparison.getSecondArticle().title
                                    == article.title):
                                alreadyMatched = True
                                break

                        #A comparison tha has already been matched should not be added to control duplicates
                        if not alreadyMatched:
                            self.comparisons.append(
                                Comparison(article, otherArticle, simKeys))
Exemplo n.º 8
0
def NAND(*args):
    return Comparison().NAND(*args)
Exemplo n.º 9
0
def OR(*args):
    return Comparison().OR(*args)
Exemplo n.º 10
0
def AND(*args):
    return Comparison().AND(*args)
Exemplo n.º 11
0
def NOR(*args):
    return Comparison().NOR(*args)
Exemplo n.º 12
0
border=

#train.pyで学習したモデルのダウンロード
#from train import train
#model=Train.train()
#保存したいとき
#model.save("name.gz")

#フォルダからpoincareモデル(epoch=100)のダウンロード
from gensim.models.poincare import PoincareModel,PoincareRelations
model=PoincareModel.load("../data/poincare_1.gz")


#比較する文章のデータベースの作成
from database import Database
database,id2doc=Database.database()

#対象の文章の読み込み
import glob
path=glob.glob("../data/target_data/*")
path=path[0]
f = open(path)
text=f.read()
f.close()

#入力(対象の文章、判別モデル、文章のデータベース、しきい値)から出力(類似度がしきい値以下の似てる文章を昇順で)を出す
from comparison import Comparison
ind=Comparison.comparison(text,database,model,border)
for i in ind:
	print(id2doc[i])
Exemplo n.º 13
0
    def score(self,
              spaceagg,
              timeagg,
              store_minimum=False,
              pp_model=None,
              quantile=''):
        """
        Read the obs and clim. make a comparison object which computes the scores in the dask dataframe. 
        This dask dataframe is exported.
        Returns a list with intermediate filenames of the raw, climatological and corrected scores.
        Has a post-processing step if the pp_model is supplied. Fit is the same regardless of the quantile, so done only once.
        If there are no quantiles to predict or binary variable, we force equidistant sampling (random = True led to overestimations of the crps)
        """
        alignment = ForecastToObsAlignment(season=self.season,
                                           cycle=self.cycle)
        alignment.recollect(booksname=self.log.loc[(spaceagg, timeagg),
                                                   ('booksname', '')])

        climatology = Climatology(
            self.basevar, **{
                'name': self.log.loc[(spaceagg, timeagg),
                                     ('climname', quantile)]
            })
        climatology.localclim(
        )  # loading in this case. Creation was done in the makeclim method.

        if not self.log.loc[(spaceagg, timeagg), (
                'modelclimname', [quantile]
        )].isna().any(
        ):  # Supply model quantile climatology if that was computed earlier. Will be preferred for the raw briescoring in the comparison Class
            modelclimatology = ModelClimatology(
                cycle=self.cycle,
                variable=self.basevar,
                **{
                    'name':
                    self.log.loc[(spaceagg, timeagg),
                                 ('modelclimname', quantile)]
                })
            modelclimatology.local_clim()
            assert self.newvar == 'anom', 'This modelclimatology has likely no adapted units, only when anomalies the quantiles in Kelvin will be compatible with the aligned forecast anomalies in Celsius.'
        else:
            modelclimatology = None

        comp = Comparison(alignment=alignment,
                          climatology=climatology,
                          modelclimatology=modelclimatology)

        # Fitting or accepting external fits (meaning the column is already filled):
        if not pp_model is None:
            if not isinstance(
                    self.log.loc[(spaceagg, timeagg),
                                 ('externalfits', quantile)], str):
                comp.fit_pp_models(pp_model=pp_model,
                                   groupers=['leadtime', 'clustid'])
                firstfitname = comp.export(fits=True, frame=False)
                self.log.loc[(spaceagg, timeagg), (
                    'externalfits', slice(None)
                )] = firstfitname  # Specifically useful for the looping over quantiles.
            else:
                fitname = self.log.loc[(spaceagg, timeagg),
                                       ('externalfits', quantile)]
                print('loading fit from:', fitname)
                comp.fits = dd.read_hdf(
                    comp.basedir + fitname + '.h5',
                    key='fits')  # Loading of the fits of the first quantile.
                comp.fitgroupers = ['leadtime', 'clustid']

        # Going to the scoring.
        if isinstance(quantile, float):
            if not pp_model is None:
                comp.make_pp_forecast(pp_model=pp_model)
            comp.brierscore()
        else:
            if not pp_model is None:
                comp.make_pp_forecast(pp_model=pp_model,
                                      random=False,
                                      n_members=self.ndraws if isinstance(
                                          pp_model, NGR) else None)
                comp.export(fits=False, frame=False, preds=True)
            if (self.newvar is None) or (self.newvar == 'anom'):
                comp.crpsscore()
            else:  # Meaning a custom binary predictand
                comp.brierscore()

        scorefile = comp.export(fits=False,
                                frame=True,
                                store_minimum=store_minimum)

        return (scorefile)