Python Article.properties примеры использования

Язык программирования: Python

Пространство имен/Пакет: amcat.models

Класс/Тип: Article

Метод/Функция: properties

Примеров на hotexamples.com: 2

Python Article.properties - 2 примера найдено. Это лучшие примеры Python кода для amcat.models.Article.properties, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Article(28)

create_articles(22)

set_property(6)

fromdict(2)

get_properties(2)

static_fields(2)

author(1)

get_property(1)

length(1)

properties(1)

save(1)

save_trees(1)

text(1)

Пример #1

Показать файл

Файл: migrate_34_35.py Проект: amcat/amcat

    def get_articles(self, fn,  media):
        csv.field_size_limit(sys.maxsize)
        def _int(x):
            return int(x) if x else None
        def hash2binary(hash):
            if hash:
                if not isinstance(hash, str):
                    raise TypeError("Hash should be str, not {}".format(type(hash)))
                return "\\x" + hash


        r = csv.reader(open(fn))
        header = next(r)
        index = {col: i for (i, col) in enumerate(header)}
        AID = index['article_id']
        if self.maxid:
            logging.info("*** max(id) set by user: {self.maxid}".format(**locals()))
            max_id, self.n_rows = self.maxid, self.maxid
        else:
            logging.info("*** Scan input CSV to determine #rows and max(id)")
            for row in r:
                max_id = max(max_id, int(row[AID]))
                self.n_rows += 1
                if not self.n_rows  % 10000000:
                    logging.info(".. scanned {self.n_rows} rows".format(**locals()))
            self.maxid = max_id
            
        logging.info("{self.n_rows} rows, max ID {max_id}, allocating memory for hashes".format(**locals()))

        hashes = ctypes.create_string_buffer(max_id*28)
        NULL_HASH = b'\x00' * 28
        orphans = "PLENTY"
        passno = 1

        if self._continue:
            logging.info("Continuing from previous migration, getting state from DB")
            with conn().cursor('migration-continue') as c:
                c.itersize = 10000 # how much records to buffer on a client
                c.execute("SELECT article_id, hash FROM articles")
                i = 0
                while True:
                    rows = c.fetchmany(10000)
                    if not rows:
                        break
                    i += len(rows)
                    if not i % 1000000:
                        logging.info("Retrieved {i} rows...".format(**locals()))
                    for (aid, hash) in rows:
                        offset = (aid - 1) * 28
                        hashes[offset:offset+28] = hash
            self.n_rows -= i
            logging.info("Continuing migration, {i} articles retrieved, up to {self.n_rows} to go".format(**locals()))
        
        while orphans:
            norphans = len(orphans) if isinstance(orphans, list) else orphans
            logging.info("*** Pass {passno}, #orphans {norphans}".format(**locals()))
            passno += 1

            if orphans == "PLENTY":
                r = csv.reader(open(fn))
                next(r) # skip header
                todo = r
            else:
                todo = orphans
            
            orphans = []
            MAX_ORPHANS_BUFFER = 50000
            
            for i, row in enumerate(todo):
                if not i % 1000000:
                    norphans = len(orphans) if isinstance(orphans, list) else orphans
                    logging.info("Row {i}, #orphans: {norphans}".format(**locals()))

                aid = int(row[AID])
                
                offset = (aid - 1) * 28
                stored_hash = hashes[offset:offset+28]
                if stored_hash != NULL_HASH:
                    continue
                
                parent_id = _int(row[index['parent_article_id']])
                if (parent_id == aid) or (parent_id in SKIP_PARENTS):
                    parent_id = None
                if parent_id:
                    poffset = (parent_id - 1) * 28
                    parent_hash = hashes[poffset:poffset+28]
                    if parent_hash == NULL_HASH:
                        # it's an orphan, can't process it now, so either buffer or re-iterate
                        if orphans != "PLENTY": # try to buffer
                            if len(orphans) > MAX_ORPHANS_BUFFER:
                                orphans = "PLENTY"
                            else:
                                orphans.append(row)
                        continue
                    parent_hash = binascii.hexlify(parent_hash).decode("ascii")
                else:
                    parent_hash = None

                date = row[index['date']]
                date = date.split("+")[0]
                date = datetime.strptime(date[:19], '%Y-%m-%d %H:%M:%S')

                
                a = Article(
                    project_id = row[index['project_id']],
                    date = date,
                    title = row[index['headline']],
                    url = row[index['url']] or None,
                    text = row[index['text']],
                    parent_hash=parent_hash)
                
                a.properties = {v: row[index[v]] for v in PROP_FIELDS if row[index[v]]}
                a.properties['medium'] = media[int(row[index['medium_id']])]
                a.properties['uuid'] = str(a.properties['uuid'])
                props = json.dumps(a.properties)
            
                hash = amcates.get_article_dict(a)['hash']
                hashes[offset:offset+28] = binascii.unhexlify(hash)

                yield (a.project_id, aid, a.date, a.title, a.url, a.text,
                       hash2binary(hash), hash2binary(a.parent_hash), props)

Пример #2

Показать файл

    def get_articles(self, fn, media):
        csv.field_size_limit(sys.maxsize)

        def _int(x):
            return int(x) if x else None

        def hash2binary(hash):
            if hash:
                if not isinstance(hash, str):
                    raise TypeError("Hash should be str, not {}".format(
                        type(hash)))
                return "\\x" + hash

        r = csv.reader(open(fn))
        header = next(r)
        index = {col: i for (i, col) in enumerate(header)}
        AID = index['article_id']
        if self.maxid:
            logging.info(
                "*** max(id) set by user: {self.maxid}".format(**locals()))
            max_id, self.n_rows = self.maxid, self.maxid
        else:
            logging.info("*** Scan input CSV to determine #rows and max(id)")
            for row in r:
                max_id = max(max_id, int(row[AID]))
                self.n_rows += 1
                if not self.n_rows % 10000000:
                    logging.info(
                        ".. scanned {self.n_rows} rows".format(**locals()))

        logging.info(
            "{self.n_rows} rows, max ID {max_id}, allocating memory for hashes"
            .format(**locals()))

        hashes = ctypes.create_string_buffer(max_id * 28)
        NULL_HASH = b'\x00' * 28
        orphans = "N/A"
        passno = 1

        if self._continue:
            logging.info(
                "Continuing from previous migration, getting state from DB")
            c = conn().cursor('migration-continue')
            c.itersize = 10000  # how much records to buffer on a client
            c.execute("SELECT article_id, hash FROM articles")
            i = 0
            while True:
                rows = c.fetchmany(10000)
                if not rows:
                    break
                i += len(rows)
                if not i % 1000000:
                    logging.info("Retrieved {i} rows...")
                for (aid, hash) in rows:
                    offset = (aid - 1) * 28
                    hashes[offset:offset + 28] = hash
            self.n_rows -= i
            logging.info(
                "Continuing migration, {i} articles retrieved, {self.n_rows} to go"
                .format(**locals()))

        while orphans:
            logging.info(
                "*** Pass {passno}, #orphans {orphans}".format(**locals()))
            passno += 1
            orphans = 0

            r = csv.reader(open(fn))
            next(r)  # skip header

            for row in r:
                aid = int(row[AID])

                offset = (aid - 1) * 28
                stored_hash = hashes[offset:offset + 28]
                if stored_hash != NULL_HASH:
                    continue

                parent_id = _int(row[index['parent_article_id']])
                if (parent_id == aid) or (parent_id in SKIP_PARENTS):
                    parent_id = None
                if parent_id:
                    poffset = (parent_id - 1) * 28
                    parent_hash = hashes[poffset:poffset + 28]
                    if parent_hash == NULL_HASH:
                        orphans += 1
                        continue
                    parent_hash = binascii.hexlify(parent_hash).decode("ascii")
                else:
                    parent_hash = None

                date = row[index['date']]
                date = date.split("+")[0]
                date = datetime.strptime(date, '%Y-%m-%d %H:%M:%S')

                a = Article(project_id=row[index['project_id']],
                            date=date,
                            title=row[index['headline']],
                            url=row[index['url']] or None,
                            text=row[index['text']],
                            parent_hash=parent_hash)

                a.properties = {
                    v: row[index[v]]
                    for v in PROP_FIELDS if row[index[v]]
                }
                a.properties['medium'] = media[int(row[index['medium_id']])]
                a.properties['uuid'] = str(a.properties['uuid'])
                props = json.dumps(a.properties)

                hash = amcates.get_article_dict(a)['hash']
                hashes[offset:offset + 28] = binascii.unhexlify(hash)

                yield (a.project_id, aid, a.date, a.title, a.url, a.text,
                       hash2binary(hash), hash2binary(a.parent_hash), props)