Exemplo n.º 1
0
def crTrieIndex():
    name = request.form['name']
    files = request.form.getlist('files[]')
    files = list(filter(lambda n: n != "", files))

    TRIEINDEX = '../../Model/trieIndex/'
    INVERTINDEXPATH = '../../Model/invertIndex/'

    try:
        startTime = time()
        trieIndex, reverseTrieIndex = createPrefixTree(INVERTINDEXPATH, name)
        resTime = time() - startTime

        query = "UPDATE `dictionary` SET `trieIndex`=1 WHERE `name`='" + name + "'"
        con = connection.getConnection()
        with con:
            cur = con.cursor()
            cur.execute(query)
            con.commit()

        trieIndex.save(TRIEINDEX + name)
        reverseTrieIndex.save(TRIEINDEX + "r_" + name)
        return {'success': True, 'time': resTime}
    except:
        abort(500)
Exemplo n.º 2
0
def crInvertIndex():
    name = request.form['name']
    files = request.form.getlist('files[]')
    files = list(filter(lambda n: n != "", files))

    FILEPATH = '../../Model/books/'
    INVERTINDEXPATH = '../../Model/invertIndex/'
    MATRIXPATH = '../../Model/matrix/'

    startTime = time()
    invertIndex = createInvertIndex(FILEPATH, files, 0)
    confMatrix = createMatrixFromIndex(invertIndex, len(files))
    resTime = time() - startTime

    query = "UPDATE `dictionary` SET `invertIndex`=1 WHERE `name`='" + name + "'"
    con = connection.getConnection()
    with con:
        cur = con.cursor()
        cur.execute(query)
        con.commit()

    with codecs.open(INVERTINDEXPATH + name, 'w', 'utf-8') as f:
        json.dump(invertIndex, f, ensure_ascii=False)
    with codecs.open(MATRIXPATH + name, 'w', 'utf-8') as f:
        json.dump(confMatrix, f, ensure_ascii=False)
    return {'success': True, 'time': resTime}
Exemplo n.º 3
0
def crCoordIndex():
    name = request.form['name']
    files = request.form.getlist('files[]')
    files = list(filter(lambda n: n != "", files))

    FILEPATH = '../../Model/books/'
    COORDINDEX = '../../Model/coordIndex/'
    DICTIONARYPATH = '../../Model/dictionaries/'
    try:
        startTime = time()
        coordIndex = getCoordIndex(FILEPATH, DICTIONARYPATH, name, files)
        resTime = time() - startTime

        query = "UPDATE `dictionary` SET `coordIndex`=1 WHERE `name`='" + name + "'"
        con = connection.getConnection()
        with con:
            cur = con.cursor()
            cur.execute(query)
            con.commit()

        with codecs.open(COORDINDEX + name, 'w', 'utf-8') as f:
            json.dump(coordIndex, f, ensure_ascii=False)
        return {'success': True, 'time': resTime}
    except:
        abort(500)
Exemplo n.º 4
0
def writeToDBAndToFile(name, allWords, uniqueWords, collectionSize,
                       timeToCreate, booksNum, ids):
    con = connection.getConnection()
    with con:
        cur = con.cursor()
        query = "INSERT INTO `dictionary`(`name`, `allWords`, `uniqueWords`, `collectionSize`, `timeToCreate`, `booksNum`, `ids`) VALUES ('" + name + "'," + str(
            allWords) + "," + str(uniqueWords) + "," + str(
                collectionSize) + "," + str(timeToCreate) + "," + str(
                    booksNum) + ",'" + ids + "')"
        cur.execute(query)
        con.commit()
Exemplo n.º 5
0
def read_sql(ImageID, VarName):
    try:
        conn = getConnection()
        cur = conn.cursor()
        args = [ImageID]
        cur.callproc("sp_join_skyghi", args)
        result = cur.fetchall()[0]
        result = result[VarName]
        conn.close()
        return result
    except:
        return 'NaN'
Exemplo n.º 6
0
def getCreatedDicts():
    name = request.args['name']
    con = connection.getConnection()
    with con:
        cur = con.cursor()
        cur.execute(
            "SELECT `dict`, `invertIndex`, `coordIndex`, `2WordsIndex`, `gramIndex`, `permutationIndex`, `trieIndex`, `invertIndexByParts` FROM `dictionary` WHERE `name`='"
            + name + "'")
        rows = cur.fetchall()
        count = 1
        res = {}
        for row in rows:
            res[count] = row
            count += 1
        return res
Exemplo n.º 7
0
def createDictionary():
    # get params
    name = request.form['name']
    ids = request.form.getlist('id[]')
    con = connection.getConnection()
    files = []

    # check if dictionary with this name is already exist
    cur = con.cursor()
    cur.execute("Select `name` from `dictionary` where `name`='" + name + "'")
    row = cur.fetchone()
    if row is not None:
        return "Dict is already exist"
    # get books
    for i in ids:
        with con:
            cur.execute("SELECT * FROM `books`  where id=" + i)
            row = cur.fetchone()
            files.append(row['name'])

    FILEPATH = '../../Model/books/'

    # get all dicts and lists for record
    startTime = time()
    uniqueWords, sizeOfFiles, uniqueWordsNumber, allWordsNumber = createWordsListAndInfo(
        FILEPATH, files)
    resTime = time() - startTime

    # record all information
    writeToDBAndToFile(name, allWordsNumber, uniqueWordsNumber, sizeOfFiles,
                       resTime, len(files), ' '.join(ids))

    return json.dumps({
        'success': True,
        'name': name,
        'size': sizeOfFiles,
        'allWords': allWordsNumber,
        'uniqueWords': uniqueWordsNumber,
        'time': resTime,
        'booksNum': len(files)
    }), 200, {
        'ContentType': 'application/json'
    }
Exemplo n.º 8
0
def cr2WordIndex():
    name = request.form['name']
    files = request.form.getlist('files[]')
    files = list(filter(lambda n: n != "", files))

    FILEPATH = '../../Model/books/'
    WORDS2INDPATH = '../../Model/words2Index/'

    startTime = time()
    words2Ind = create2wordsIndex(FILEPATH, files)
    resTime = time() - startTime

    query = "UPDATE `dictionary` SET `2WordsIndex`=1 WHERE `name`='" + name + "'"
    con = connection.getConnection()
    with con:
        cur = con.cursor()
        cur.execute(query)
        con.commit()

    with codecs.open(WORDS2INDPATH + name, 'w', 'utf-8') as f:
        json.dump(words2Ind, f, ensure_ascii=False)
    return {'success': True, 'time': resTime}
Exemplo n.º 9
0
def crDict():
    name = request.form['name']
    files = request.form.getlist('files[]')
    files = list(filter(lambda n: n != "", files))
    FILEPATH = '../../Model/books/'
    DICTIONARYPATH = '../../Model/dictionaries/'

    startTime = time()
    uniqueWords = createWordsList(FILEPATH, files)
    resTime = time() - startTime

    query = "UPDATE `dictionary` SET `dict`=1 WHERE `name`='" + name + "'"
    con = connection.getConnection()
    with con:
        cur = con.cursor()
        cur.execute(query)
        con.commit()

    with codecs.open(DICTIONARYPATH + name, 'w', 'utf-8') as f:
        json.dump(uniqueWords, f, ensure_ascii=False)

    print(memory_usage())
    return {'success': True, 'time': resTime}
Exemplo n.º 10
0
def getServerSocket():
    global _server_socket
    if not _server_socket:
        _server_socket = connection.getConnection()
    return _server_socket
Exemplo n.º 11
0
import connection
import groups
import users
import logging

if __name__ == '__main__':

    logging.getLogger().setLevel(logging.INFO)

    import sys
    dni = sys.argv[1]
    office = sys.argv[2]

    logging.info('agregando {} a la oficina {}'.format(dni, office))

    con = connection.getConnection()
    try:
        selectedOffice = None
        offices = groups.OfficeDAO.findAll(con)
        for oid in offices:
            off = groups.OfficeDAO.findById(con, oid)
            if off.name == office:
                selectedOffice = off
                break

        if selectedOffice is None:
            logging.info('La oficina no existe')
            sys.exit(1)

        uid = users.UserDAO.findByDni(con, dni)[0]
        if uid not in selectedOffice.users:
Exemplo n.º 12
0
class fighterSpider(scrapy.Spider):
    conn = connection.getConnection()

    name = 'event'
    allowed_domains = ['www.bestfightodds.com']
    start_urls = [
        'https://www.bestfightodds.com/events/ufc-231-holloway-vs-ortega-1584'
    ]

    def parse(self, response):
        print '==============================================================='
        event = {}
        event['name'] = response.css(
            'div.table-outer-wrapper div.table-div div.table-header a::text'
        ).extract_first()
        event['date'] = response.css(
            'div.table-outer-wrapper div.table-div div.table-header span.table-header-date::text'
        ).extract_first()

        fighters = {}
        fighters['name'] = response.css(
            'div.table-outer-wrapper div.table-div div.table-inner-wrapper div.table-scroller table.odds-table tbody th span.tw::text'
        ).extract()

        if self.conn is not None:
            # save event info in database
            event_id = self.save_event(self.conn, event)
            # save fighters info in database
            fighters_ids = self.save_fighters(self.conn, fighters)
            # save event fighter mapping
            self.event_fighter_mapping(self.conn, event_id, fighters_ids)
            self.conn.close()

            print 'Event and Fighters extracted and saved in database'
        else:
            print 'Not able to connect to database'

        print '==============================================================='

    def save_event(self, conn, event):
        event_name = event.get('name')
        event_date = event.get('date')

        cursor = conn.cursor()
        query = "select event_id,event_name from events where event_name=%s"
        cursor.execute(query, (event_name, ))
        result = cursor.fetchone()
        if result < 0:
            query = "INSERT INTO events (event_name,event_date) VALUES (%s,%s)RETURNING event_id;"
            try:
                cursor.execute(query, (event_name, event_date))
                event_id = cursor.fetchone()[0]
                return event_id
            except:
                print 'Error in executing query'
                traceback.print_exc(file=sys.stdout)

            cursor.close()
            conn.commit()
        else:
            print "Event already exists in the database!!!"
            event_id = result[0]
            return event_id

    def save_fighters(self, conn, fighters):
        fighter_name = fighters.get('name')
        fighter_coun = len(fighter_name)
        fighters_ids = []

        cursor = conn.cursor()

        if fighter_coun > 0:
            for i in range(fighter_coun):
                fighters_name = fighter_name[i]
                query = "select fighter_id,fighter_name from fighters where fighter_name=%s"
                cursor.execute(query, (fighters_name, ))
                result = cursor.fetchone()

                if result < 0:
                    query = "INSERT INTO fighters (fighter_name) VALUES (%s)RETURNING fighter_id;"
                    try:
                        cursor.execute(query, (fighters_name, ))
                        fighters_id = cursor.fetchone()[0]
                        fighters_ids.append(fighters_id)
                    except:
                        print 'Error in executing query'
                        traceback.print_exc(file=sys.stdout)
                else:
                    fighters_id = result[0]
                    fighters_ids.append(fighters_id)

        cursor.close()
        conn.commit()
        return fighters_ids

    def event_fighter_mapping(self, conn, event_id, fighters_ids):
        event_id = event_id
        fighters_id = fighters_ids
        fighters_id_count = len(fighters_id)

        cursor = conn.cursor()

        for i in range(fighters_id_count):
            fighter_id = fighters_id[i]

            query = "select fighter_id,event_id from event_fighter_mapping where fighter_id = %s AND event_id = %s"
            cursor.execute(query, (str(fighter_id), str(event_id)))
            result = cursor.fetchone()

            if result < 0:
                query = "INSERT INTO event_fighter_mapping (fighter_id,event_id) VALUES (%s,%s)"
                try:
                    cursor.execute(query, (fighter_id, event_id))
                except:
                    print 'Error in executing query'
                    traceback.print_exc(file=sys.stdout)

        cursor.close()
        conn.commit()
Exemplo n.º 13
0
# Import packages and libraries
import datetime
import glob
import re
import connection

# Search for files and paths for current date
root = '/mnt/nas/CMS_Data'
year = datetime.date.today().strftime("%Y")
month = datetime.date.today().strftime("%Y-%m")
day = datetime.date.today().strftime("%Y-%m-%d")
files = glob.glob("%s/StationsData_*/*/Converted to TXT/%s/%s/*%s.txt" %
                  (root, year, month, day))
load_sql = ("LOAD DATA LOCAL INFILE '%s' "
            "REPLACE INTO TABLE %s "
            "FIELDS TERMINATED BY '\t' "
            "LINES TERMINATED BY '\n' "
            "IGNORE 1 LINES; ")

# Load text files to MySQL DB
connection = connection.getConnection()
cursor = connection.cursor()
for file in files:
    print(file)
    station = re.findall('\[([0-9]+)\]', file)[0]
    table = 'SIN' + station
    print(table)
    cursor.execute(load_sql % (file, table))
    connection.commit()
connection.close()
Exemplo n.º 14
0
 def insertDB(self):
     sql = f'Insert INTO items1 (url, name,Price , email) VALUES (\'{self.url}\', \'{self.name}\',\'{self.price}\', \'{self.email}\')'
     cr = connection.getCursor()
     cr.execute(sql)
     db = connection.getConnection()
     db.commit()
Exemplo n.º 15
0
def boolSearch():
    # get request params
    timeStart = time()
    name = request.form['name']
    req = request.form['request']
    booksNum = int(request.form['num'])

    # get matrix
    f = codecs.open('../../Model/matrix/' + name, 'r', 'utf-8')
    dict = json.load(f)

    req = req.lower()
    # get operations and words
    words = re.findall("[^&|\^]+", req)
    words = list(map(replaceSpace, words))
    operations = re.findall("[&|\^]", req)

    resWords = None
    # make not operation and transfer matrix into 0b
    for i in range(len(words)):
        words[i] = replaceSpace(words[i])
        if words[i].count(" ") > 0:
            words[i] = fraseSearch(words[i], name, booksNum)
        elif words[i].startswith("!"):
            words[i] = words[i].replace("!", "")
            words[i] = int(
                dict[words[i]].replace("1",
                                       "2").replace("0",
                                                    "1").replace("2", "0"), 2)
        elif "*" in words[i]:
            words[i], resWords = joker(words[i], name, booksNum)
        else:
            words[i] = int(dict[words[i]], 2)

    res = words[0]

    # make all other operations
    for i in range(len(operations)):
        if operations[i] == "&":
            res &= words[i + 1]
        if operations[i] == "|":
            res |= words[i + 1]
        if operations[i] == "^":
            res ^= words[i + 1]

    # record final result
    res = bin(res).replace("0b", "")

    if len(res) != booksNum:
        for i in range(booksNum - len(res)):
            res = "0" + res

    con = connection.getConnection()
    #
    cur = con.cursor()
    cur.execute("SELECT `ids` FROM `dictionary` where `name` = '" + name + "'")
    row = cur.fetchone()['ids'].split(" ")
    # change 0b result to names
    names = []
    for i in range(len(res)):
        if res[i] == "1":
            cur.execute("SELECT `name` FROM `books` where `id` = " + row[i])
            names.append(cur.fetchone()['name'])

    print(names.append(resWords))
    resTime = time() - timeStart
    return json.dumps({
        'success': True,
        'names': names,
        'time': resTime
    }), 200, {
        'ContentType': 'application/json'
    }
Exemplo n.º 16
0
def crInvertIndexByParts():
    name = request.form['name']
    files = request.form.getlist('files[]')
    files = list(filter(lambda n: n != "", files))

    SIZE_LIMIT = 10000000

    FILEPATH = '../../Model/books/'
    INVERTINDEXBYPARTSPATH = '../../Model/invertIndexByParts/'

    count = 0

    startTime = time()

    step = 0
    while count < len(files):
        sizeCur = 0
        filesCur = []
        step += 1
        startBook = count
        while sizeCur < SIZE_LIMIT and count < len(files):
            sizeCur += os.path.getsize(FILEPATH + files[count])
            filesCur.append(files[count])
            count += 1
        invertIndex = createInvertIndex(FILEPATH, filesCur, startBook)

        with codecs.open(INVERTINDEXBYPARTSPATH + name + "_" + str(step), 'w',
                         'utf-8') as out_file:
            s = ""
            for k, v in invertIndex.items():
                s += str(k) + ":" + str(v) + "\n"
            out_file.write(s)

    output = codecs.open(INVERTINDEXBYPARTSPATH + name, 'w', 'utf-8')
    output.write("{")
    readers = [
        codecs.open(INVERTINDEXBYPARTSPATH + name + "_" + str(i + 1), 'r',
                    'utf-8') for i in range(step)
    ]
    lines = [i.readline().replace("\n", "") for i in readers]

    while len(lines) != 0:
        currWord = sorted(lines)[0].split(":")[0]
        arr = []
        for i in range(len(lines)):
            if lines[i].split(":")[0] == currWord:
                arr += json.loads(lines[i].split(":")[-1])
                lines[i] = readers[i].readline().replace("\n", "")
        arr.sort()
        output.write('"' + currWord + '": ' + str(arr))
        if "" in lines:
            lines.remove("")
        if len(lines) != 0:
            output.write(", ")
    output.write("}")
    output.close()

    for i in range(step):
        readers[i].close()
        os.remove(INVERTINDEXBYPARTSPATH + name + "_" + str(i + 1))
    resTime = time() - startTime

    query = "UPDATE `dictionary` SET `invertIndexByParts`=1 WHERE `name`='" + name + "'"
    con = connection.getConnection()
    with con:
        cur = con.cursor()
        cur.execute(query)
        con.commit()

    return {'success': True, 'time': resTime}