Exemplo n.º 1
0
    def filterByOp(self,clone):
        opStr1 = ""
        opStr2 = ""
        indx1,start1,end1 = clone[1]
        indx2,start2,end2 = clone[2]

        for i in range(start1,end1+1):
            opStr1 += str(self.op1_hash.get(i,-1))
        for i in range(start2,end2+1):
            opStr2 += str(self.op2_hash.get(i,-1))

        if config.DEBUG is True:
            print "start1 = %d, end1 = %d, ops = %s" % (start1,end1,opStr1)
            print "start2 = %d, end2 = %d, ops = %s" % (start2,end2,opStr2)

#        if ((self.hasChanged(opStr1) is False) or
#            (self.hasChanged(opStr2) is False)):
        if not (self.hasChanged(opStr1) and self.hasChanged(opStr2)):
            return None

        idx = NGram(N=config.NGRAM)
        ngram1 = list(idx.ngrams(opStr1))
        ngram2 = list(idx.ngrams(opStr2))
        metric = self.compareList(ngram1,ngram2)

        return metric
Exemplo n.º 2
0
def test():
    filter = opFilter()

    opStr1 = "nnn+"
    opStr2 = "nn+"

    idx = NGram(N=config.NGRAM)
    l1 = list(idx.ngrams(opStr1))
    l2 = list(idx.ngrams(opStr2))

    print filter.compareList(l1,l2)
def ngram_compare(a, b, N):
    ngram = NGram(N=N)
    A = set(ngram.ngrams(a))
    B = set(ngram.ngrams(b))
    return len(A & B) / len(A | B)
Exemplo n.º 4
0
    def main():

        # NGramで解析する単位の設定
        index = NGram(N=2)
        list_recruit = []
        count = [0] * 10
        # projects_hit_count = [[count_hits,id]]
        projects_hit_count = []

        # DB
        connector = psycopg2.connect(host="localhost", database="wantedly")
        cursor = connector.cursor()

        # 質問読み込み
        print "質問:"
        input_q = raw_input().decode("utf-8")
        for input_q in index.ngrams(index.pad(input_q)):

            # 入力されたメッセージを含まれたものを全て出すため
            input_q = "%" + input_q + "%"

            # SQL
            sql = "select id,company_id,title,description,location,keywords from projects where looking_for like '%s'" % input_q

            # print sql
            cursor.execute(sql)
            result = cursor.fetchall()

            for row in result:
                count[row[0]] = count[row[0]] + 1

        for i in range(0, 10):
            if count[i] != 0:
                projects_hit_count.append([count[i], i])

        # ヒット数高い順
        projects_hit_count.sort()
        projects_hit_count.reverse()
        print projects_hit_count

        if len(projects_hit_count) == 0:
            a = 0
            # 当てはめる募集がなかった場合
            print "やりたい仕事がありますか?それか興味ある分野とは?"
        elif len(projects_hit_count) < 3:
            a = len(projects_hit_count)
            print "\nこんにちは、今のような募集がありますが、いかがですか?"
        else:
            a = 3
            print "\nこんにちは、今のような募集がありますが、いかがですか?"

        for i in range(0, a):
            sql = "select id,company_id,title,description,location,keywords from projects where id ='%s'" % projects_hit_count[
                i][1]

            # print sql
            cursor.execute(sql)
            result = cursor.fetchall()
            for row in result:
                print str(i + 1) + "."
                print str(row[2])
                print str(row[5])

        connector.commit()

        cursor.close()
        connector.close()
  def main():

    # NGramで解析する単位の設定
    index = NGram(N=2)
    list_recruit = []
    count = [0] * 10
    # projects_hit_count = [[count_hits,id]]
    projects_hit_count = []

    # DB
    connector = psycopg2.connect(host="localhost",database="wantedly")
    cursor    = connector.cursor()

    # 質問読み込み
    print "質問:"
    input_q = raw_input().decode("utf-8")
    for input_q in index.ngrams(index.pad(input_q)):

        # 入力されたメッセージを含まれたものを全て出すため
        input_q = "%"+input_q+"%"

        # SQL
        sql="select id,company_id,title,description,location,keywords from projects where looking_for like '%s'"%input_q

        # print sql
        cursor.execute(sql)
        result = cursor.fetchall()

        for row in result:
          count[row[0]] = count[row[0]] +1

    for i in range(0,10):
      if count[i] != 0:
        projects_hit_count.append([count[i],i])

    # ヒット数高い順
    projects_hit_count.sort()
    projects_hit_count.reverse()
    print projects_hit_count

    if len(projects_hit_count) ==0:
      a=0
      # 当てはめる募集がなかった場合
      print "やりたい仕事がありますか?それか興味ある分野とは?"
    elif len(projects_hit_count) < 3:
      a=len(projects_hit_count)
      print "\nこんにちは、今のような募集がありますが、いかがですか?"
    else:
      a=3
      print "\nこんにちは、今のような募集がありますが、いかがですか?"

    for i in range(0,a):
      sql="select id,company_id,title,description,location,keywords from projects where id ='%s'"%projects_hit_count[i][1]

      # print sql
      cursor.execute(sql)
      result = cursor.fetchall()
      for row in result:
        print str(i+1)+"."
        print str(row[2])
        print str(row[5])


    connector.commit()

    cursor.close()
    connector.close()