def store(num_imag):
    #Loading images and labels from file
    images = ParseTripe.ParseTriples("images.ttl")
    labels = ParseTripe.ParseTriples("labels_en.ttl")

    try: 
        imagesTable = dynamodbClient.describe_table(TableName="images")
    except dynamodbClient.exceptions.ResourceNotFoundException:
        imagesTable = createImagesTable('images')
    
    try: 
        labelsTable = dynamodbClient.describe_table(TableName="labels")
    except dynamodbClient.exceptions.ResourceNotFoundException:
        labelsTable = createImagesTable('labels')
    

    tempImages = {}
    num = int(num_imag)
    while(num > 0):
        line = images.getNext()
        if "depiction" in line[1]:
            if imagesTable.get(line[0]) is None:
                tempImages[line[0]] = 1
            else:
                tempImages[line[0]] += 1
            put('images', line[0], str(tempImages[line[0]]), line[2])
            num -= 1

    tag = {}



    label = labels.getNext()
    print("creating labels")
    while(label):
        if label[1] == "http://www.w3.org/2000/01/rdf-schema#label":
            #label[0] in tempImages and 
            labelsTable = label[2].split(" ")
            for entry in labelsTable:
                #Save stem word
                entry = Stemmer.stem(entry)
                #if there is already an entry with the same label then increases the sort value
                if tag.get(entry) is None:
                    tag[entry] =  0
                else:
                    tag[entry] += 1
                #save sort word: table, keyword, inx and value
                put('labels', entry, str(tag[entry]), label[0])
        label = labels.getNext()
Пример #2
0
import keyvalue.sqlitekeyvalue as KeyValue
import keyvalue.parsetriples as ParseTriple
import keyvalue.stemmer as Stemmer
import sys

# Make connections to KeyValue
kv_labels = KeyValue.SqliteKeyValue("sqlite_labels.db", "labels", sortKey=True)
kv_images = KeyValue.SqliteKeyValue("sqlite_images.db", "images")

# Process Logic.
imageLine = ParseTriple.ParseTriples('./dataset/images.ttl')
labelLine = ParseTriple.ParseTriples('./dataset/labels_en.ttl')
repeatedWords = {}

for i in range(int(sys.argv[1])):
    imageTriple = imageLine.getNext()
    if imageTriple[1] == 'http://xmlns.com/foaf/0.1/depiction':
        kv_images.put(imageTriple[0], imageTriple[2])

    labelTriple = labelLine.getNext()
    if labelTriple[1] == 'http://www.w3.org/2000/01/rdf-schema#label':
        labelWords = Stemmer.stem(labelTriple[2]).split()
        for word in labelWords:
            if word not in repeatedWords:
                kv_labels.putSort(word, str(0), labelTriple[0])
                repeatedWords[word] = 1
            else:
                kv_labels.putSort(word, str(repeatedWords[word]),
                                  labelTriple[0])
                repeatedWords[word] += 1
Пример #3
0
import keyvalue.dynamostorage as DynamoStorage
import keyvalue.parsetriples as ParseTriple
import keyvalue.stemmer as Stemmer
import sys

# Make connections to KeyValue
kv_labels = DynamoStorage.DynamoStorage("labels")
kv_images = DynamoStorage.DynamoStorage("images")

# Process Logic.
image_line = ParseTriple.ParseTriples("./dataset/images.ttl")
label_line = ParseTriple.ParseTriples("./dataset/labels_en.ttl")

quantity = int(sys.argv[1])

value_set = set()
kv_images.put_many_images(image_line, quantity, value_set)
kv_labels.put_many_labels(label_line, quantity, value_set, Stemmer)
Пример #4
0
import keyvalue.parsetriples as ParseTriples
import keyvalue.stemmer as Stemmer
import keyvalue.dynamostorage as dynamo
from botocore.exceptions import ClientError

kv_labels = dynamo.DynamodbKeyValue('terms')
kv_images = dynamo.DynamodbKeyValue('images')

# Process Logic.
parse_images = ParseTriples.ParseTriples('./data/images.ttl')
parse_labels = ParseTriples.ParseTriples('./data/labels_en.ttl')

# Insert Images
for i in range(10000):
    line = parse_images.getNext()
    category = line[0]
    B = line[1]
    imageURL = line[2]
    if B == 'http://xmlns.com/foaf/0.1/depiction':
        kv_images.put(category, i, imageURL)

# Insert Labels
for i in range(5000):
    line = parse_labels.getNext()
    category = line[0]
    B = line[1]
    terms = line[2]
    if B == 'http://www.w3.org/2000/01/rdf-schema#label':
        for token in terms.split(' '):
            stemmedWord = Stemmer.stem(token)
            kv_labels.put(stemmedWord, i, category)
Пример #5
0
import keyvalue.sqlitekeyvalue as KeyValue
import keyvalue.parsetriples as ParseTripe
import keyvalue.stemmer as Stemmer

# Make connections to KeyValue
kv_labels = KeyValue.SqliteKeyValue("sqlite_labels.db", "labels", sortKey=True)
kv_images = KeyValue.SqliteKeyValue("sqlite_images.db", "images")

# Process Algorithm.

#Read Images and Labels from the file
parseTriplesImages = ParseTripe.ParseTriples("images.ttl")
parseTriplesLabels = ParseTripe.ParseTriples("labels_en.ttl")

#Parse Images to the Image Collection
#Only the first 1000 images

countImages = 0
while (countImages <= 1000):
    triple = parseTriplesImages.getNext()
    #print(triple)
    if (triple[1] == 'http://xmlns.com/foaf/0.1/depiction'):
        kv_images.put(triple[0], triple[2])
        countImages += 1

#Parse Labels to the Label Collection
#Store sortKey per keys
terms = {}

#Get the first label
label = parseTriplesLabels.getNext()
import keyvalue.dynamostorage as KeyValue
import keyvalue.parsetriples as ParseTriple
import keyvalue.stemmer as Stemmer
from botocore.exceptions import ClientError

# Make connections to KeyValue
kv_labels = KeyValue.DynamoDB("labels")
kv_images = KeyValue.DynamoDB("images")

try:
    # Process Logic.
    print("-------------------------------------------")
    labelsDataset = ParseTriple.ParseTriples('./datasets/labels_en.ttl')

    for i in range(1, 1000):
        word = labelsDataset.getNext()
        category = word[0]
        label = word[2]
        steam = Stemmer.stem(label)
        for x in steam.split(' '):
            print("category: " + category + " ____________ labe:" + x)
            kv_labels.put(x, len(x), category)

    print("-------------------------------------------")

    print("-------------------------------------------")
    imagesDataset = ParseTriple.ParseTriples('./datasets/images.ttl')
    for i in range(0, 2000):
        img = imagesDataset.getNextImage()
        category = img[0]
        imgPath = img[2]
Пример #7
0
import keyvalue.sqlitekeyvalue as KeyValue
import keyvalue.parsetriples as ParseTripe
import keyvalue.stemmer as Stemmer
import keyvalue.dynamostorage as DynamoStorage

BATCH_LIMIT = 24
IMAGE_LIMIT = 2000
IMAGES_FILE = "images.ttl"
LABELS_FILE = "labels_en.ttl"
AWS_REGION = "us-east-1"
IMAGE_TYPE = "http://xmlns.com/foaf/0.1/depiction"
LABEL_TYPE = "http://www.w3.org/2000/01/rdf-schema#label"

# Init parsers
image_parser = ParseTripe.ParseTriples(IMAGES_FILE)
label_parser = ParseTripe.ParseTriples(LABELS_FILE)

# Make connections to Dynamodb
db_images = DynamoStorage.DynamoStorage('images', region=AWS_REGION)
db_labels = DynamoStorage.DynamoStorage('terms', True, AWS_REGION)

# Internal storage
allImages = {}
allLabels = {}
images = {}
labels = {}

# Process images
i = 0
for x in range(IMAGE_LIMIT):
    image = image_parser.getNext()
Пример #8
0
import keyvalue.sqlitekeyvalue as KeyValue
import keyvalue.parsetriples as ParseTripe
import keyvalue.stemmer as Stemmer


# Make connections to KeyValue
kv_labels = KeyValue.SqliteKeyValue("sqlite_labels.db","labels",sortKey=True)
kv_images = KeyValue.SqliteKeyValue("sqlite_images.db","images")

# Process Logic.

#reading ttl files
images = ParseTripe.ParseTriples("images.ttl")

#setting a limit of 100 images to load
i = 0
while (i <= 400):
    image = images.getNext()
    #filter if the image is not a thumbnail
    if "depiction" in image[1]:
       kv_images.put(image[0], image[2])
       #i only increases if the image is not a thumbnail
       i += 1

labels = ParseTripe.ParseTriples("labels_en.ttl")
label = labels.getNext()

tag = {}
while(label):
    #Check if the image on the label is inside of the kv_images
    if kv_images.get(label[0]):
Пример #9
0
import keyvalue.parsetriples as ParseTripe
import keyvalue.stemmer as Stemmer

# Make connections to KeyValue
kv_labels = KeyValue.SqliteKeyValue("db/sqlite_labels.db",
                                    "labels",
                                    sortKey=True)
kv_images = KeyValue.SqliteKeyValue("db/sqlite_images.db", "images")

# Process Logic.
images = []
terms = []
temp_cat = []

temp = 0
images_ttl = ParseTripe.ParseTriples("resources/images.ttl")
labels_ttl = ParseTripe.ParseTriples("resources/labels_en.ttl")

while temp < 100:
    temp_images = images_ttl.getNext()
    if (temp_images[1] == "http://xmlns.com/foaf/0.1/depiction"):
        temp_arr = [temp_images[0], temp_images[2]]
        images.append(temp_arr)
        kv_images.put(str(temp_images[0]), str(temp_images[2]))
        if not temp_images[0] in temp_cat:
            temp_cat.append(temp_images[0])
    temp += 1
temp = 0
while temp < 100:
    temp_labels = labels_ttl.getNext()
    if (temp_labels[1] == "http://www.w3.org/2000/01/rdf-schema#label"):
Пример #10
0
import keyvalue.parsetriples as ParseTriples
import keyvalue.stemmer as Stemmer
import keyvalue.dynamostorage as Dynamo
from botocore.exceptions import ClientError

kv_labels = Dynamo.DynamodbKeyValue('labels')
kv_images = Dynamo.DynamodbKeyValue('images')

# Process Logic.
parse_images = ParseTriples.ParseTriples('../Dataset/images.ttl')
parse_labels = ParseTriples.ParseTriples('../Dataset/labels_en.ttl')

# Insert Images
for i in range(2000):
    line = parse_images.getNext()
    category = line[0]
    b = line[1]
    imageURL = line[2]
    if b == 'http://xmlns.com/foaf/0.1/depiction':
        kv_images.put(category, len(category), imageURL)
    else:
        i = i - 1

#Labels
for i in range(5000):
    line = parse_labels.getNext()
    category = line[0]
    b = line[1]
    terms = line[2]
    findImage = kv_images.get(category, len(category))
    if b == 'http://www.w3.org/2000/01/rdf-schema#label' and findImage is not None:
Пример #11
0
def dynamoStorage(num):
    #GET and/or create Images Table
    try:
        images = dynamodbClient.describe_table(TableName="images")
    except dynamodbClient.exceptions.ResourceNotFoundException:
        print('Could not found `images` table')
        images = createImagesTable('images')

    print('Images table load')

    #GET and/or create Labels
    try:
        labels = dynamodbClient.describe_table(TableName="labels")
    except dynamodbClient.exceptions.ResourceNotFoundException:
        print('Could not found `labels` table')
        labels = createImagesTable('labels')

    print('Labels table load')

    #Read Images and Labels from the file
    parseTriplesImages = ParseTripe.ParseTriples("images.ttl")
    parseTriplesLabels = ParseTripe.ParseTriples("labels_en.ttl")

    print('Images file load')
    print('Labels file load')

    images = {}

    #Filter and upload images
    countImages = 1
    #TODO - verify this validation
    num = int(num)
    while (countImages <= num):
        triple = parseTriplesImages.getNext()
        if (triple[1] == 'http://xmlns.com/foaf/0.1/depiction'):
            if (images.get(triple[0]) is None):
                images[triple[0]] = 1
            else:
                images[triple[0]] += 1
            put('images', triple[0], str(images[triple[0]]), triple[2])
            countImages += 1

    print('Images stored in DynamoDB')

    terms = {}

    #Filter and upload labels
    label = parseTriplesLabels.getNext()
    while (label):
        if (label[0] in images
                and label[1] == 'http://www.w3.org/2000/01/rdf-schema#label'):
            labels = label[2].split()
            for l in labels:
                l = Stemmer.stem(l)
                if (terms.get(l) is None):
                    terms[l] = 1
                else:
                    terms[l] += 1
                put('labels', l, str(terms[l]), label[0])
        label = parseTriplesLabels.getNext()

    print('Labels stored in DynamoDB')
Пример #12
0
import keyvalue.sqlitekeyvalue as KeyValue
import keyvalue.parsetriples as ParseTriple
import keyvalue.stemmer as Stemmer

# Make connections to KeyValue
kv_labels = KeyValue.SqliteKeyValue("sqlite_labels.db", "labels", sortKey=True)
kv_images = KeyValue.SqliteKeyValue("sqlite_images.db", "images")

# Process Logic.
parserImages = ParseTriple.ParseTriples("images.ttl")
parserLabels = ParseTriple.ParseTriples("labels_en.ttl")
############### Poblando sqlite_images.db ###############
images = []
contadorImagenes = 0
while contadorImagenes < 500:
    tupla = parserImages.getNext()
    if tupla != None and tupla[1] == "http://xmlns.com/foaf/0.1/depiction":
        #Le quito <B> de -> <A><B><C>:
        tupla = tupla[:1] + tupla[2:]
        images.append(tupla)
        #print("Inserted in images:",tupla[0], "->", tupla[1])
        kv_images.put(tupla[0], tupla[1])
        contadorImagenes += 1
############### Poblando sqlite_labels.db ###############
#print("NOW FROM LABELS ******************************************")
etiquetas = []
stemmedWords = []
stemmedWordsIndexes = []
contadorLabels = 0
while contadorLabels < 5000:
    tupla = parserLabels.getNext()
Пример #13
0
import keyvalue.sqlitekeyvalue as KeyValue
import keyvalue.parsetriples as ParseTripe
import keyvalue.stemmer as Stemmer
from dynamoDB.dynamodb import Dynamodb

# Make connections to KeyValue
kv_labels = KeyValue.SqliteKeyValue("sqlite_labels.db", "labels", sortKey=True)
kv_images = KeyValue.SqliteKeyValue("sqlite_images.db", "images")
dynamoDB = Dynamodb('C:\\Users\\joaqu\\Desktop\\Cloud1\\dynamoDB\\config.json')
print("Se han creado las tablas")

# Process Logic.
images = []
parserImg = ParseTripe.ParseTriples("./DataSet/images.ttl")
imagesToLoad = 100  #cantidad de imágenes a cargar

for i in range(0, imagesToLoad):
    tuple = parserImg.getNext()
    if tuple[1] == "http://xmlns.com/foaf/0.1/depiction":
        #mapenado en la tupla el link con la imagen
        tuple = tuple[:1] + tuple[2:]
        #print("tupla", tuple)
        images.append(tuple)
        #kv_images.put(tuple[0], tuple[1])

print("images", images)
print("images len:", len(images))

#testing
#print(kv_images.get('http://wikidata.dbpedia.org/resource/Q18'))