Exemplo n.º 1
0
class pythonTests(unittest.TestCase):
    def setUp(self):
        ARANGODB_ROOT_USERNAME = os.getenv('ARANGODB_ROOT_USERNAME', 'root')
        ARANGODB_ROOT_PASSWORD = os.getenv('ARANGODB_ROOT_PASSWORD', 'root')
        self.conn = Connection(username=ARANGODB_ROOT_USERNAME,
                               password=ARANGODB_ROOT_PASSWORD)

    def tearDown(self):
        # TODO: disconnect session and delete db
        pass

    def test_create_database(self):
        self.conn.createDatabase(name="test_db")
        self.db = self.conn["test_db"]
Exemplo n.º 2
0
class pythonTests(unittest.TestCase):

    def setUp(self):
        ARANGODB_ROOT_USERNAME = os.getenv('ARANGODB_ROOT_USERNAME', 'root')
        ARANGODB_ROOT_PASSWORD = os.getenv('ARANGODB_ROOT_PASSWORD', 'root')
        self.conn = Connection(username=ARANGODB_ROOT_USERNAME, password=ARANGODB_ROOT_PASSWORD)

    def tearDown(self):
        # TODO: disconnect session and delete db
        pass

    def test_create_database(self):
        self.conn.createDatabase(name = "test_db")
        self.db = self.conn["test_db"]
Exemplo n.º 3
0
 def db(self):
     conn = Connection(username=self.config.user,
                       password=self.config.password)
     name = self.config.database
     if not conn.hasDatabase(name):
         return conn.createDatabase(name)
     return conn[name]
Exemplo n.º 4
0
    def __init__(self):
        conn = Connection(arangoURL=f"http://{DBConfig.host}:{DBConfig.port}",
                          username=DBConfig.User.name,
                          password=DBConfig.User.password)

        try:
            self.database = conn.createDatabase(name=DBConfig.database)
        except CreationError:
            self.database = conn[DBConfig.database]
Exemplo n.º 5
0
def main():
    assert ES_INDEX
    assert ES_HOST
    assert ARANGO_URL
    assert ARANGO_ROOT_PASSWORD
    assert ARANGO_COLLECTION
    assert ARANGO_USERNAME

    # ES connection
    es = Elasticsearch([ES_HOST])

    # Arango connection
    conn = Connection(arangoURL=ARANGO_URL, username=ARANGO_USERNAME, password=ARANGO_ROOT_PASSWORD)
    if ES_INDEX not in conn.databases:
        conn.createDatabase(name=ES_INDEX)
    db = conn[ES_INDEX]
    if not db.hasCollection(ARANGO_COLLECTION):
        db.createCollection(name=ARANGO_COLLECTION)

    # Build queries
    existed_patents_total = db.AQLQuery("RETURN LENGTH(Patents)").response['result'][0] or 1000
    existed_patents = db.AQLQuery(
        f"FOR doc IN {ARANGO_COLLECTION} RETURN doc._file",
        batchSize=existed_patents_total
    ).response['result']
    es_query_exclude_existed = {"query": {"bool": {"must_not": [{"ids": {"values": existed_patents}}]}}}
    aql_query_insert = f"INSERT @doc INTO {ARANGO_COLLECTION} LET newDoc = NEW RETURN newDoc"

    # Handle ES pagination
    patents = es.search(index=ES_INDEX, body=es_query_exclude_existed, scroll='1m', size=100)
    scroll_id = patents['_scroll_id']
    scroll_size = len(patents['hits']['hits'])
    while scroll_size > 0:

        # Add patents to Arango
        for hit in patents['hits']['hits']:
            hit['_file'] = hit['_id']
            db.AQLQuery(aql_query_insert, bindVars={'doc': hit})
            logging.info(f"Added: {hit['_file']}")

        # Scroll next batch
        patents = es.scroll(scroll_id=scroll_id, scroll='1m')
        scroll_id = patents['_scroll_id'],
        scroll_size = len(patents['hits']['hits'])
Exemplo n.º 6
0
def _get_db():
    """Return DB & collection
    
    Returns:
        (db, collection)
    """
    conn = Connection(arangoURL='http://localhost:8529',
                      username='******',
                      password='******')

    if not conn.hasDatabase(DB):
        db = conn.createDatabase(DB)
    else:
        db = conn[DB]

    if not db.hasCollection(COLLECTION):
        collection = db.createCollection(name=COLLECTION)
    else:
        collection = db.collections[COLLECTION]

    return db, collection
class ArangoDbPersister(Persister):
    """
    A basic ArangoDB persister.
    >>> from py2store.persisters._arangodb_in_progress import ArangoDbPersister
    >>> s = ArangoDbPersister()
    >>> k = {'key': '777'} # Each collection will happily accept user-defined _key values.
    >>> v = {'val': 'bar'}
    >>> for _key in s:
    ...     del s[_key]
    ...
    >>> k in s
    False
    >>> len(s)
    0
    >>> s[k] = v
    >>> len(s)
    1
    >>> s[k]
    {'val': 'bar'}
    >>> s.get(k)
    {'val': 'bar'}
    >>> s.get({'not': 'a key'}, {'default': 'val'})  # testing s.get with default
    {'default': 'val'}
    >>> list(s.values())
    [{'val': 'bar'}]
    >>> k in s  # testing __contains__ again
    True
    >>> del s[k]
    >>> len(s)
    0
    >>> s = ArangoDbPersister(db_name='py2store', key_fields=('name',))
    >>> for _key in s:
    ...     del s[_key]
    ...
    >>> s[{'name': 'guido'}] = {'yob': 1956, 'proj': 'python', 'bdfl': False}
    >>> s[{'name': 'guido'}]
    {'yob': 1956, 'proj': 'python', 'bdfl': False}
    >>> s[{'name': 'vitalik'}] = {'yob': 1994, 'proj': 'ethereum', 'bdfl': True}
    >>> s[{'name': 'vitalik'}]
    {'yob': 1994, 'proj': 'ethereum', 'bdfl': True}
    >>> for key, val in s.items():
    ...     print(f"{key}: {val}")
    {'name': 'guido'}: {'yob': 1956, 'proj': 'python', 'bdfl': False}
    {'name': 'vitalik'}: {'yob': 1994, 'proj': 'ethereum', 'bdfl': True}
    """

    # reserved by the database fields
    _reserved = {"_key", "_id", "_rev"}

    def __init__(
            self,
            user='******',
            password='******',
            url='http://127.0.0.1:8529',
            db_name='py2store',
            collection_name='test',
            key_fields=('key', )  # _id, _key and _rev are reserved by db
    ):
        self._connection = Connection(arangoURL=url,
                                      username=user,
                                      password=password)
        self._db_name = db_name
        self._collection_name = collection_name
        # if db not created
        if not self._connection.hasDatabase(self._db_name):
            self._connection.createDatabase(self._db_name)

        self._adb = self._connection[self._db_name]
        # if collection not created
        if not self._adb.hasCollection(self._collection_name):
            self._collection = self._adb.createCollection(
                name=self._collection_name)

        self._collection = self._adb[self._collection_name]

        if isinstance(key_fields, str):
            key_fields = (key_fields, )

        self._key_fields = key_fields

    def __fetchitem__(self, k):
        f = self._collection.fetchFirstExample(k)
        if f is not None and len(f) == 1:
            return f[0]

        return None

    def __getitem__(self, k):
        f = self.__fetchitem__(k)
        if f is not None:
            d = f.getStore()
            # exclude reserved keys and corresponded values
            d = {
                x: d[x]
                for x in d
                if x not in self._reserved and x not in self._key_fields
            }
            return d
        else:
            raise KeyError(f"No document found for query: {k}")

    def __setitem__(self, k, v):
        doc = self._collection.createDocument(dict(k, **v))
        doc.save()

    def __delitem__(self, k):
        if len(k) > 0:
            f = self.__fetchitem__(k)
            if f is not None:
                return f.delete()

        raise KeyError(f"You can't removed that key: {k}")

    def __iter__(self):
        docs = self._collection.fetchAll()

        yield from [{x: d[x]
                     for x in d.getStore() if x in self._key_fields}
                    for d in docs]

    def __len__(self):
        return self._collection.count()
Exemplo n.º 8
0
class dbConnector:
    """Connects with ONLY ONE db and perform operation on it."""
    def __init__(self,
                 db_name,
                 auth=False,
                 password='',
                 user='******',
                 arangoURL='http://127.0.0.1:8529'):
        """Init the database conection and set atributes db atribute."""
        if auth:
            self.conn = Connection(username=user,
                                   password=password,
                                   arangoURL=arangoURL)
        else:
            self.conn = Connection(username=user, arangoURL=arangoURL)

        if self.conn.hasDatabase(db_name):
            # conectartla
            self.db = pyArango.database.Database(self.conn, db_name)
            pass
        else:
            self.db = self.conn.createDatabase(name=db_name)
            # Crearla

    def retrieve_collection(self, coll_name):  # doc, bd, coll?
        # Posiblemente innecesario
        """Return the collection in a list form."""
        self.db.reload()
        coll = self.db.collections[coll_name]
        document_list = []
        for document in coll.fetchAll():
            document_list.append(document._store)
        return document_list

    def save_document(self, doc, coll_name):  # doc, bd, coll?
        """
        Save the document in the database.

        It is saved in the collection with the name specified.
        Doc, is in a python dic form.
        """
        self.db.reload()
        if self.db.hasCollection(coll_name):
            coll = self.db.collections[coll_name]
            document = coll.createDocument()
            document._store = doc
            document.save()
        else:
            print('There is no collection with that name')

    def retrieve_document(self, coll_name, doc_name):  # doc, bd, coll?
        """Return the document in a python dic form."""
        self.db.reload()
        # FIXME: Modo cutre de encontrar documentos sin clave
        # Usar AQL quizas
        document_list = self.retrieve_collection(coll_name)
        for doc in document_list:
            if doc[doc_name]:
                return doc
        # doc._store
        pass

    def create_collection(self, coll_name):  # doc, bd, coll?
        """Create and return the collection."""
        self.db.reload()
        if self.db.hasCollection(coll_name):
            print('The database already has a collection with that name')
        else:
            self.db.createCollection(name=coll_name)
Exemplo n.º 9
0
from pyArango.connection import Connection

conn = Connection(username="******", password="******")
db_name = "logistics"
if not conn.databases.get(db_name, None):
    db = conn.createDatabase(name="logistics")

db_conn = conn[db_name]

if not db_conn.hasCollection("provider"):
    db_conn.createCollection(name="provider")
    # pc = db_conn.createDocument()
    """
    Name, Email, Phone Number, Language, Currency
    # name, email, mobile, language, currency
    """
if not db_conn.hasCollection("service_area"):
    db_conn.createCollection(name="service_area")
    """
    provider id, geojson polygons, polygon name, price
    """
Exemplo n.º 10
0
class ArangoDbPersister(Persister):
    """
    A basic ArangoDB persister.
    >>> from py2store.persisters.arangodb_w_pyarango import ArangoDbPersister
    >>> s = ArangoDbPersister()
    >>> k = {'key': '777'} # Each collection will happily accept user-defined _key values.
    >>> v = {'val': 'bar'}
    >>> for _key in s:
    ...     del s[_key]
    ...
    >>> k in s
    False
    >>> len(s)
    0
    >>> s[k] = v
    >>> len(s)
    1
    >>> s[k]
    {'val': 'bar'}
    >>> s.get(k)
    {'val': 'bar'}
    >>> s.get({'not': 'a key'}, {'default': 'val'})  # testing s.get with default
    {'default': 'val'}
    >>> list(s.values())
    [{'val': 'bar'}]
    >>> k in s  # testing __contains__ again
    True
    >>> del s[k]
    >>> len(s)
    0
    >>> s = ArangoDbPersister(db_name='py2store', key_fields=('name',))
    >>> for _key in s:
    ...     del s[_key]
    ...
    >>> s[{'name': 'guido'}] = {'yob': 1956, 'proj': 'python', 'bdfl': False}
    >>> s[{'name': 'guido'}]
    {'yob': 1956, 'proj': 'python', 'bdfl': False}
    >>> s[{'name': 'vitalik'}] = {'yob': 1994, 'proj': 'ethereum', 'bdfl': True}
    >>> s[{'name': 'vitalik'}]
    {'yob': 1994, 'proj': 'ethereum', 'bdfl': True}
    >>> for key, val in s.items():
    ...     print(f"{key}: {val}")
    {'name': 'guido'}: {'yob': 1956, 'proj': 'python', 'bdfl': False}
    {'name': 'vitalik'}: {'yob': 1994, 'proj': 'ethereum', 'bdfl': True}
    """

    # reserved by the database fields
    _reserved = {"_key", "_id", "_rev"}

    def __init__(
        self,
        user='******',
        password='******',
        url='http://127.0.0.1:8529',
        db_name='py2store',
        collection_name='test',
        key_fields=('key', ),  # _id, _key and _rev are reserved by db
        key_fields_separator='::',
    ):
        self._connection = Connection(
            arangoURL=url,
            username=user,
            password=password,
        )

        self._db_name = db_name
        self._collection_name = collection_name

        # If DB not created:
        if not self._connection.hasDatabase(self._db_name):
            self._connection.createDatabase(self._db_name)

        self._adb = self._connection[self._db_name]

        # If collection not created:
        if not self._adb.hasCollection(self._collection_name):
            self._collection = self._adb.createCollection(
                name=self._collection_name)

        self._collection = self._adb[self._collection_name]

        if isinstance(key_fields, str):
            key_fields = (key_fields, )

        self._key_fields = key_fields
        self._key_fields_separator = key_fields_separator

    def _make_key(self, keys_dict):
        """
        Convert a dict of keys into a real key-string by joining dict values in a predefined order.

        DB requirements for the key:
            The key must be a string value.
            Keys are case-sensitive.
            Numeric keys are not allowed.
            The key must be from 1 byte to 254 bytes long.
            It must consist of:
                - letters a-z (lower or upper case),
                - digits 0-9
                - any of the following characters: _ - : . @ ( ) + , = ; $ ! * ' %

            Any other characters cannot be used inside key values.
        """
        key_values = [keys_dict[key_label] for key_label in self._key_fields]
        key_str = self._key_fields_separator.join(key_values)
        return key_str

    def _split_key(self, joined_key_str):
        """
        Convert a key-string used by DB internally
        into a user-friendly dict of key labels and values.
        """
        key_values = joined_key_str.split(self._key_fields_separator)
        keys_dict = dict(zip(self._key_fields, key_values))
        return keys_dict

    def __fetchitem__(self, keys_dict):
        key = self._make_key(keys_dict)
        try:
            return self._collection[key]
        except DocumentNotFoundError:
            raise KeyError(f"No document found for query: {keys_dict}")

    def __getitem__(self, keys_dict):
        item = self.__fetchitem__(keys_dict)
        doc = item.getStore()

        # todo (Mike): maybe move this cleanup to a base Arango Store?
        # exclude reserved keys and corresponded values
        data = {
            key: doc[key]
            for key in doc
            if key not in self._reserved and key not in self._key_fields
        }
        return data

    def __setitem__(self, keys_dict, values_dict):
        try:
            doc = self.__fetchitem__(keys_dict)
        except KeyError:
            doc = self._collection.createDocument()
            doc._key = self._make_key(keys_dict)

        for k, v in values_dict.items():
            doc[k] = v

        doc.save()

    def __delitem__(self, keys_dict):
        doc = self.__fetchitem__(keys_dict)
        doc.delete()

    def __iter__(self):
        docs = self._collection.fetchAll()

        yield from ({key_name: doc[key_name]
                     for key_name in self._key_fields} for doc in docs)

    def __len__(self):
        return self._collection.count()
Exemplo n.º 11
0
SCOPE = [
    'https://spreadsheets.google.com/feeds',
    'https://www.googleapis.com/auth/drive'
]

CREDENTIALS = ServiceAccountCredentials.from_json_keyfile_name(
    f'{DIR_PATH}/credentials.json', SCOPE)
CLIENT = gspread.authorize(CREDENTIALS)
SHEET = CLIENT.open_by_key(CONFIG['google_sheet']['key'])

REVIEW_FIRST_DAY = date(2018, 5, 3)

CONN = Connection(username=CONFIG['db']['username'],
                  password=CONFIG['db']['password'])
if not CONN.hasDatabase('utopian'):
    CONN.createDatabase('utopian')

DB = CONN['utopian']

POSTS_COLLECTION = 'posts'


def connect_collection(db, col_name):
    if not db.hasCollection(name=col_name):
        db.createCollection(name=col_name)
    return db[col_name]


postCol = connect_collection(DB, POSTS_COLLECTION)

Exemplo n.º 12
0
class ArangoDB:  # pylint: disable = R0902
    """Handle creation of all required Documents."""
    def __init__(self) -> None:
        self.conn = Connection(arangoURL=config.db_host,
                               username=config.db_username,
                               password=config.db_password)
        self.db = self._get_db(config.db_name)
        self.groups: Chats = self._get_collection('Chats')
        self.ab_bio_blacklist: AutobahnBioBlacklist = self._get_collection(
            'AutobahnBioBlacklist')
        self.ab_string_blacklist: AutobahnStringBlacklist = self._get_collection(
            'AutobahnStringBlacklist')
        self.ab_filename_blacklist: AutobahnFilenameBlacklist = self._get_collection(
            'AutobahnFilenameBlacklist')
        self.ab_channel_blacklist: AutobahnChannelBlacklist = self._get_collection(
            'AutobahnChannelBlacklist')
        self.ab_domain_blacklist: AutobahnDomainBlacklist = self._get_collection(
            'AutobahnDomainBlacklist')
        self.ab_collection_map = {
            '0x0': self.ab_bio_blacklist,
            '0x1': self.ab_string_blacklist,
            '0x2': self.ab_filename_blacklist,
            '0x3': self.ab_channel_blacklist,
            '0x4': self.ab_domain_blacklist
        }
        self.banlist: BanList = self._get_collection('BanList')

    def query(self,
              query: str,
              batch_size: int = 100,
              raw_results: bool = False,
              bind_vars: Dict = None,
              options: Dict = None,
              count: bool = False,
              full_count: bool = False,
              json_encoder: bool = None,
              **kwargs: Any) -> AQLQuery:  # pylint: disable = R0913
        """Wrapper around the pyArango AQLQuery to avoid having to do `db.db.AQLQuery`."""
        bind_vars = bind_vars or {}
        options = options or {}
        return self.db.AQLQuery(query,
                                rawResults=raw_results,
                                batchSize=batch_size,
                                bindVars=bind_vars,
                                options=options,
                                count=count,
                                fullCount=full_count,
                                json_encoder=json_encoder,
                                **kwargs)

    def _get_db(self, db: str) -> Database:
        """Return a database. Create it if it doesn't exist yet.

        Args:
            db: The name of the Database

        Returns: The Database object

        """
        if self.conn.hasDatabase(db):
            return self.conn[db]
        else:
            return self.conn.createDatabase(db)

    def _get_collection(self, collection: str) -> Collection:
        """Return a collection of create it if it doesn't exist yet.

        Args:
            collection: The name of the collection

        Returns: The Collection object

        """
        if self.db.hasCollection(collection):
            return self.db[collection]
        else:
            return self.db.createCollection(collection)
Exemplo n.º 13
0
    def __init__(self, host=None, username=None, password=None, fresh=False):

        # database
        self.host = host
        self.username = username
        self.password = password
        self.databaseName = 'XRP_Ledger'
        self.collectionsList = ['accounts', 'transactions']
        self.collections = {}
        self.edgeCollectionsList = ['transactionOutput']
        self.edgeCollections = {}

        # processes
        self.maxProcess = int(cpu_count() / 2)
        self.batchSize = 500
        self.maxQueueSize = self.batchSize * self.maxProcess

        # queue
        self.accountsQueue = Manager().Queue(maxsize=self.maxQueueSize)
        self.transactionsQueue = Manager().Queue(maxsize=self.maxQueueSize)
        self.transactionsOutputQueue = Manager().Queue(
            maxsize=self.maxQueueSize)

        # tracking
        self.lastStoredSeq = None

        # create connection
        try:
            conn = Connection(arangoURL=host,
                              username=username,
                              password=password)
        except ConnectionError:
            print("Unable to establish connection to the database")
            sys.exit(1)

        # setup database
        try:
            db = conn.createDatabase(name=self.databaseName)
        except CreationError:
            db = conn[self.databaseName]

        if fresh:
            for collection in self.collectionsList + self.edgeCollectionsList:
                if db.hasCollection(collection):
                    db.collections[collection].delete()
            db.reload()

        # setup collections
        for collection in self.collectionsList:
            if not db.hasCollection(collection):
                db.createCollection(name=collection, className='Collection')

        # setup edge collections
        for edge in self.edgeCollectionsList:
            if not db.hasCollection(edge):
                db.createCollection(name=edge, className='Edges')

        # set last processed ledger seq
        aql = "FOR tx IN transactions SORT tx.LedgerIndex DESC LIMIT 1 RETURN tx.LedgerIndex"
        queryResult = db.AQLQuery(aql, rawResults=True)
        if len(queryResult) > 0:
            self.lastStoredSeq = queryResult[0]

        # run the threads
        self.processes = []

        for i in range(self.maxProcess):
            self.processes.append(
                BulkInsert(self.get_connection('accounts'), self.accountsQueue,
                           self.batchSize))
            self.processes.append(
                BulkInsert(self.get_connection('transactions'),
                           self.transactionsQueue, self.batchSize))
            self.processes.append(
                BulkInsert(self.get_connection('transactionOutput'),
                           self.transactionsOutputQueue, self.batchSize))

        for t in self.processes:
            t.start()