Пример #1
0
 def __init__( self, string):
     '''
         Recibe la cadena "Telcel no da el servicio de 3g en la facultad de ingenieria"
         Busca documentos en wikipedia, y Google y los guarda en self.documents.
         '''
     self.documents = []
     #Pruebas
     self.documents = self.documents + GoogleSearch(string).documents
     for word in string.split():
         self.wikipediaSearch(word)
Пример #2
0
def main():

    data = "../resources/SOusers-Mar13.csv"  # File containing SO user dump
    results = "../resources/features3.csv"  # File where features will be stored
    picPath = "../resources/SOpictures/"  # Directory where pictures will be downloaded

    fr = open(os.path.join(data), 'rb')
    fw = open(os.path.join(results), 'ab')

    if _RANDOM:
        reader = RandomReader(fr)
    else:
        reader = UnicodeReader(fr)

    writer = UnicodeWriter(fw)

    queue = Queue()
    if _FACE:
        faceDetector = FaceDetector()

    threads = []
    SOhashes = {}  # Dictionary of user's hashes

    # Use multiple threads to download and get information
    for i in xrange(10):
        threads.append(Downloader(queue))
        threads[-1].start()

    idx = 0
    size = 4500  # Number of subjects

    for row in reader:
        if idx < size:
            so_uid = row[0]
            so_hash = row[2]
            if (not (SOhashes.has_key(so_hash))):
                SOhashes[so_hash] = so_uid
                if (not isDefaultGravatarPic(so_hash)):
                    data = [so_uid]
                    if _VISUAL_FEATURES:

                        # Download picture
                        filepath = os.path.join('%s%d.jpg' %
                                                (picPath, int(so_uid)))
                        if not os.path.isfile(filepath):
                            queue.put(
                                ('http://www.gravatar.com/avatar/%s' % so_hash,
                                 filepath))
                            time.sleep(2)

                        # Load picture
                        pic = picUtils.loadPicture(filepath)

                        if _FACE:
                            if faceDetector.isFrontFace(
                                    pic) or faceDetector.isProfileFace(pic):
                                data.append(str(True))
                            else:
                                data.append(str(False))

                        if _MOST_COMMON_COLORS:
                            _, f1, _, f2 = picUtils.mostCommonColor(pic)
                            data.append(str(f1 + f2))

                        if _NBCOLORS:
                            data.append(str(picUtils.getNbOfColors(pic)))

                        if _FARTHEST_NEIGHBOR:
                            F1 = picUtils.farthestNeighborMetric(pic, 10)
                            F2 = picUtils.farthestNeighborMetric(pic, 200)
                            data.append(str(F1))
                            data.append(str(F2))
                            if F1 != 0:
                                data.append(str(F2 / F1))
                            else:
                                data.append('?')

                        if _AVERAGE_SATURATION:
                            data.append(str(picUtils.avgSaturation(pic)))

                        if _THRESHOLD_BRIGHTNESS:
                            data.append(str(picUtils.threBrightness(pic, 0.2)))

                    if _GOOGLE:
                        gi = GoogleImage('http://www.gravatar.com/avatar/%s' %
                                         so_hash)
                        bestGuess = gi.getBestGuess()
                        if bestGuess:
                            bestGuess = bestGuess.encode('utf8')
                            data.append(bestGuess)
                            if _WIKIPEDIA:
                                gs = GoogleSearch("%s site:en.wikipedia.org" %
                                                  bestGuess)
                                wikiTitlePage = gs.getWikipediaTitlePage()
                                if wikiTitlePage:
                                    wiki = Wikipedia(wikiTitlePage)
                                    wiki.categoryGraph(4)
                                    nbCats = 10
                                    i = 0
                                    cats = wiki.sortGraphByDegree()
                                    while i < nbCats and i < len(cats):
                                        data.append(str(cats[i]))
                                        i += 1

                    # Write all information collected in the csv file
                    try:
                        print data
                        writer.writerow(data)
                        idx += 1
                    except:
                        print "Error with data"
        else:
            break
    fr.close()
    fw.close()

    # If here, download finished. Stop threads
    for i in xrange(10):
        queue.put((None, None))
Пример #3
0
 def test_google_search(self):
     goo = GoogleSearch()
     for item in goo.get("something"):
         print item