示例#1
0
def getProperty(prop_id):
    try:
        record = PropertyDAO().getRecord(prop_id)[0]
        property = Property(record[0],record[1],record[2],record[3])
        return property.getJson();
    except:
        return 'No record found!'
示例#2
0
    def populateTokens(self, property: Property, sentences):
        searchfor = [
            'professionally managed', 'no pets', 'parking stall',
            'available now', 'building amenities', 'near school', 'brand new',
            'suite laundry'
        ]
        if sentences is not None:
            for sentence in sentences:
                if sentence in searchfor:
                    property.setToken(sentence)

        return property
示例#3
0
def update_package_by_id(package_id):
    package = db_session.query(Package) \
        .filter(Package.pid == package_id,
                or_(Package.last_updated.is_(None),
                    Package.last_updated <= datetime.utcnow() - timedelta(hours=2))) \
        .options(load_only(Package.owner,
                           Package.repo,
                           Package.path,
                           Package.ptype,
                           Package.date)) \
        .first()
    if package:
        loop = asyncio.new_event_loop()
        asyncio.set_event_loop(loop)
        try:
            loop.run_until_complete(
                asyncio.ensure_future(update_package(package)))
            last_updated_prop = Property("last_updated",
                                         date_val=datetime.utcnow())
            db_session.merge(last_updated_prop)
            db_session.commit()
        except Exception as ex:
            LOGGER.error(ex)
            LOGGER.debug(traceback.format_exc())
        finally:
            loop.close()

    return redirect(url_for("index"))
示例#4
0
 def __getitem__(self, key):
     retValue = None
     error = None
     if key is not None:
         dynamickey = key
         if '.' in key:
             dynamickey = key.split('.')[0]
         if dynamickey in self.properties:
             values = self.properties[dynamickey]
             if not isinstance(values, list):
                 values = [values]
             if dynamickey != key:
                 dynamickeyattributes = key.split('.')[1:]
                 for attribute in dynamickeyattributes:
                     newvalues = None
                     for i, value in enumerate(values):
                         if value:
                             if attribute in value:
                                 if not newvalues:
                                     newvalues = []
                                 newvalues.append(value.get(attribute))
                             else:
                                 error = Error(
                                     type=Error.MISSING_DYNAMIC_VALUE,
                                     message="No key: '" + str(attribute) +
                                     "' in object: " + dynamickey +
                                     " for dynamic key: '" + str(key))
                                 break
                     values = newvalues
             retValue = values
     return Property(key, retValue, error)
示例#5
0
def add_mirror():
    auth_check = check_auth()
    if auth_check:
        return auth_check

    url = request.args.get("url")
    if not url:
        return Response("url is empty, try '{}?url=http://url/'".format(
            request.url_root + request.path.lstrip("/")),
                        400,
                        mimetype="text/plain")

    try:
        resp = requests.head(url)
        if resp.status_code != 200:
            return Response("'{}' not available. Status code was {}\n".format(
                url, resp.status_code),
                            400,
                            mimetype="text/plain")
    except Exception as ex:
        return Response("Error occured while checking url: {}".format(ex),
                        400,
                        mimetype="text/plain")

    mirrors = db_session.query(Property) \
        .filter(Property.identifier.like("MIRROR_%")) \
        .options(load_only(Property.identifier, Property.text_val)) \
        .all()

    duplicate = False
    for mirror in mirrors:
        if mirror.text_val == url:
            duplicate = True
            break

    if duplicate:
        return Response("'{}' is already a mirror.\n".format(url),
                        200,
                        mimetype="text/plain")

    if mirrors:
        new_mirror_nr = max(
            [int(mirror.identifier[len("MIRROR_"):])
             for mirror in mirrors]) + 1
    else:
        new_mirror_nr = 0
    new_mirror = Property("MIRROR_" + str(new_mirror_nr), text_val=url)
    db_session.add(new_mirror)
    db_session.commit()

    return Response("'{}' added as mirror.\n".format(url),
                    200,
                    mimetype="text/plain")
示例#6
0
    def populateRoomSize(self, property: Property):
        sp = property.characteristics.split('-')
        for s in sp:
            if s is not None:
                s = str(s)
                if 'br' in s:
                    rooms = s.replace('br', '').strip()
                    property.setRooms(rooms)
                    property.setUpdate()
                elif 'ft_sq' in s:
                    size = s.replace('ft_sq', '').strip()
                    property.setSize(size)
                    property.setUpdate()

        return property
示例#7
0
from db.postgresl import PropertyDAO
from model.Property import Property
from service.TextMiningService import TextMiningService

propertyDao = PropertyDAO()
textMiningService = TextMiningService()

rows = propertyDao.getRecords(500)
#rows = propertyDao.getRecord(6829944535);

print("Records: ", len(rows))
records = []

for row in rows:
    records.append(Property(row[0], row[1].encode("utf-8")))

text = ""

print("\nShow me the first 5 records: ", "\n")
for x in range(0, len(rows)):
    text += str(records[x].description) + "\n"
    if x < 5:
        print("Id: ", records[x].id, " Desc: ", records[x].description)

# remove special characters
text = textMiningService.removeSpecialCharacters(text)

tokenized_text = textMiningService.getSentenceTokenize(text)
print(tokenized_text)
示例#8
0
def test():
    record = PropertyDAO().getRecord(6842453594)[0]
    property = Property(record[0],record[1],record[2],record[3])
    return property.getJson();
示例#9
0
    def tryGetBedroomFromDescription(self, property: Property):
        numbers = ['one', 'two', 'three', 'four',
                   'five']  # writed numbers to be found
        exact = ['1bd', '2bd', '3bd', '4bd',
                 '5bd']  # numbers with bd together pattern
        exact2 = ['1-br', '2-br', '3-br', '4-br',
                  '5-br']  # numbers with bd together pattern
        exact3 = ['1bed', '2bed', '3bed', '4bed',
                  '5bed']  # numbers with bd together pattern

        desc = str(property.title).lower() + ' ' + str(
            property.description).lower()  # parse the texto to lowercase
        words = desc.split(' ')
        for i in range(0, len(words)):
            if words[
                    i] in exact:  # check for a specific pattern in text and return the exact number of rooms from array position
                r = exact.index(words[i]) + 1
                property.setRooms(r)
                property.setUpdate()
                return property

            if words[
                    i] in exact2:  # check for a specific pattern in text and return the exact number of rooms from array position
                r = exact2.index(words[i]) + 1
                property.setRooms(r)
                property.setUpdate()
                return property

            if words[
                    i] in exact3:  # check for a specific pattern in text and return the exact number of rooms from array position
                r = exact3.index(words[i]) + 1
                property.setRooms(r)
                property.setUpdate()
                return property

            # check for a split pattern in text and apply algorithm to identify the number
            if words[i] in [
                    'bedroom', 'bedrooms', 'bed', 'br', 'brm', 'bdrm', 'bdr'
            ] or 'bed' in words[i] or 'bd' in words[i]:
                lw = str(words[i - 1]).strip()
                if (lw in numbers):
                    lw = numbers.index(lw) + 1
                r = -1
                try:
                    r = int(lw)
                except:
                    r = -1
                if r == -1:
                    try:
                        r = float(lw)
                    except:
                        r = -1
                if r >= 0 and r <= 7:  # if the number is too high probably is not right
                    property.setRooms(r)
                    property.setUpdate()
                    return property

            if 'studio' in words or 'bachelor ' in desc or 'bachlor ' in desc:
                property.setRooms(1)
                property.setUpdate()
                return property

        find = re.search("\d{1,5}[b][d]", desc)
        if find:
            size = str(find.group()).replace('bd', '').strip()
            r = -1
            try:
                r = float(size)
                if r >= 0 and r <= 7:
                    property.setRooms(r)
                    property.setUpdate()
                    return property

            except:
                r = -1

        return property
示例#10
0
    def tryGetBathFromDescription(self, property: Property):
        numbers = ['one', 'two', 'three', 'four',
                   'five']  # writed numbers to be found
        desc = str(property.characteristics).lower() + ' ' + str(
            property.title).lower() + ' ' + str(
                property.description).lower()  # parse the texto to lowercase
        desc = desc.replace('\\xc2', ' ').replace('\\xa0', ' ')  #clean dirty
        desc = desc.replace('&nbsp;',
                            ' ').replace('+', ' ').replace('/', ' ').replace(
                                '-', ' ')  #clean dirty
        desc = desc.replace('full', '').replace('private', '').replace(
            '  ', ' '
        )  #replace word full there is found between the bathroom word and number
        words = desc.split(' ')
        wordContains = ['bath', 'bths']

        if 'one and half ba' in desc:
            property.setBath(1.5)
            property.setUpdate()
            return property

        find = re.search("\d{1,5}ba|\d[.]\d{1,5}ba", desc)
        if find:
            bath = str(find.group()).replace('ba', '').strip()
            r = -1
            try:
                r = float(bath)
                if r > 0 and r <= 7:
                    property.setBath(r)
                    property.setUpdate()
                    return property
            except:
                pass

        # check for a split pattern in text and apply algorithm to identify the number
        for i in range(0, len(words)):
            #check if word has exact pattern to search for number in next word
            if words[i] in [
                    'bath:', 'bathroom:', 'bathrooms:', 'bathroom(s):'
            ]:
                lw = str(words[i + 1]).strip().replace('+',
                                                       '').replace(',', '')
                if (lw in numbers):
                    lw = numbers.index(lw) + 1
                r = -1
                try:
                    r = float(lw)
                    if r > 0 and r <= 7:
                        property.setBath(r)
                        property.setUpdate()
                        return property
                except:
                    pass

            #check if word contains pattern to search for number in previous word
            if any(w in words[i] for w in wordContains):
                lw = str(words[i - 1]).strip().replace('+', '').replace(
                    ',', '.').replace('/', '')
                if (lw in numbers):
                    lw = numbers.index(lw) + 1
                r = -1
                try:
                    r = float(lw)
                    if r > 0 and r <= 7:
                        property.setBath(r)
                        property.setUpdate()
                        return property
                except:
                    pass

        if 'bath' in desc:
            property.setBath(1)
            property.setUpdate()
            return property

        return property
示例#11
0
    def tryGetSizeFromDescription(self, property: Property):

        desc = str(property.characteristics).lower() + ' ' + str(
            property.title).lower() + ' ' + str(
                property.description).lower()  # parse the texto to lowercase
        words = desc.split(' ')
        for i in range(0, len(words)):

            # check for a split pattern in text and apply algorithm to identify the number
            if words[i] in [
                    'square', 'sqft', 'sq.', 'sqt', 'sqf', 'sqft)', 'sq.ft.',
                    'sqft.', 'sq', 'sqft,', 'sf', 'sq.ft', 'sq.ft.,', 'sqft).',
                    'sq/ft', 'sq.ft'
            ]:
                lw = str(words[i - 1]).strip().replace('+',
                                                       '').replace(',', '')
                r = -1
                try:
                    r = int(lw)
                except:
                    r = -1
                if r == -1:
                    try:
                        r = float(lw)
                    except:
                        r = -1
                if r >= 100:
                    property.setSize(r)
                    property.setUpdate()
                    return property

            if words[i] in ['sq/ft:', 'footage:']:
                lw = str(words[i + 1]).strip().replace('+',
                                                       '').replace(',', '')
                r = -1
                try:
                    r = int(lw)
                except:
                    r = -1
                if r == -1:
                    try:
                        r = float(lw)
                    except:
                        r = -1
                if r >= 100:
                    property.setSize(r)
                    property.setUpdate()
                    return property

        find = re.search("\d{1,5}.sq", desc)
        if not find:
            find = re.search("\d{1,5}.ft", desc)
        if find:
            size = str(find.group()).replace('sq', '').replace('ft',
                                                               '').strip()
            r = -1
            try:
                r = float(size)
                if r >= 100:
                    property.setSize(r)
                    property.setUpdate()
                    return property

            except:
                r = -1

        return property
示例#12
0
def packages_json_generate():
    yield '{{"name":"{}","packages":['.format(REPO_NAME)

    cached_packages = db_session.query(Package) \
        .filter(Package.last_updated.isnot(None),
                Package.last_update_successful,
                Package.last_updated >= datetime.utcnow() - timedelta(hours=24)) \
        .options(load_only(Package.owner,
                           Package.name,
                           Package.description,
                           Package.filename,
                           Package.date,
                           Package.version,
                           Package.download_url,
                           Package.homepage))
    iter_cached_packages = iter(cached_packages)
    package = next(iter_cached_packages, None)
    if package:
        yield json_dump_package(package)
    for package in iter_cached_packages:
        yield "," + json_dump_package(package)

    update_packages = db_session.query(Package) \
        .filter(or_(Package.last_updated.is_(None),
                    and_(Package.last_update_successful,
                         Package.last_updated < datetime.utcnow() - timedelta(hours=24)),
                    and_(not_(Package.last_update_successful),
                         Package.last_updated < datetime.utcnow() - timedelta(hours=4)))) \
        .options(load_only(Package.owner,
                           Package.repo,
                           Package.path,
                           Package.ptype,
                           Package.date))
    loop = asyncio.new_event_loop()
    asyncio.set_event_loop(loop)
    update_tasks = [
        asyncio.ensure_future(update_package(package))
        for package in update_packages
    ]
    iter_update_tasks = asyncio.as_completed(update_tasks)
    if not package:
        update_task = next(iter_update_tasks, None)
        if update_task:
            updated_package = None
            try:
                updated_package = loop.run_until_complete(update_task)
            except Exception as ex:
                LOGGER.error(ex)
                LOGGER.debug(traceback.format_exc())
            if updated_package:
                yield json_dump_package(updated_package)
    for update_task in iter_update_tasks:
        try:
            updated_package = loop.run_until_complete(update_task)
        except Exception as ex:
            LOGGER.error(ex)
            LOGGER.debug(traceback.format_exc())
            continue
        if updated_package:
            yield "," + json_dump_package(updated_package)
    loop.close()

    if update_tasks:
        last_updated_prop = Property("last_updated",
                                     date_val=datetime.utcnow())
        last_updated_prop = db_session.merge(last_updated_prop)
        db_session.commit()
        last_updated = last_updated_prop.date_val
    else:
        last_updated = db_session.query(Property.date_val).filter(
            Property.identifier == "last_updated").scalar()

    yield '],"last_updated":"{}"}}'.format(
        last_updated.isoformat() if last_updated else "")
示例#13
0
from model.Property import Property
from service.TextMiningService import TextMiningService

propertyDao = PropertyDAO()
textMiningService = TextMiningService()
text = ""
rows = propertyDao.getRecords(50)

# rows = propertyDao.getRecord(6829944535);

# print("Records: ", len(rows))
records = []

for row in rows:
    records.append(
        Property(row['_id'], row['house_description'].encode("utf-8")))

print("\nShow me the first 5 records: ", "\n")
for x in range(0, len(records)):
    text += str(records[x].description) + "\n"
    if x < 5:
        print("Id: ", records[x].id, " Desc: ", records[x].description)

# remove special characters
text = textMiningService.removeSpecialCharacters(text)

tokenized_text = textMiningService.getSentenceTokenize(text)
print(tokenized_text)

tokenized_word = textMiningService.getWordTokenize(text)
print(tokenized_word)
示例#14
0
from db.postgresl import PropertyDAO
from model.Property import Property

propertyDao = PropertyDAO()
propertyService = PropertyService()
sklearnService = SkLearnService()
textMiningService = TextMiningService()

rows = propertyDao.getRecordsWithNoLocation()

print("Records: ", len(rows))
records = []

for row in rows:
    records.append(
        Property(row[0], row[1].encode("utf-8"), row[2], row[3], row[4]))

size = len(rows)
count = 0
result = defaultdict(list)
for property in records:
    property = propertyService.populateRoomSize(property)
    sentences = propertyService.getSentences(str(property.description))
    property.setLocation(
        propertyService.getLocationFromSentences(sentences, property.link))
    if len(property.location) > 0:
        result[property.location].append(property.id)
    print(property.id, ' - location: ', property.location)
    #propertyDao.updateRecord(property)
    count += 1
    try: