Source code for dataset.database

import logging
import threading
from urllib.parse import parse_qs, urlparse

from sqlalchemy import create_engine, inspect
from sqlalchemy.sql import text
from sqlalchemy.schema import MetaData
from sqlalchemy.util import safe_reraise
from sqlalchemy import event

from alembic.migration import MigrationContext
from alembic.operations import Operations

from dataset.table import Table
from dataset.util import ResultIter, row_type, safe_url, QUERY_STEP
from dataset.util import normalize_table_name
from dataset.types import Types

log = logging.getLogger(__name__)


[docs]class Database(object):
    """A database object represents a SQL database with multiple tables."""

    def __init__(
        self,
        url,
        schema=None,
        engine_kwargs=None,
        ensure_schema=True,
        row_type=row_type,
        sqlite_wal_mode=True,
        on_connect_statements=None,
    ):
        """Configure and connect to the database."""
        if engine_kwargs is None:
            engine_kwargs = {}

        parsed_url = urlparse(url)
        # if parsed_url.scheme.lower() in 'sqlite':
        #     # ref: https://github.com/pudo/dataset/issues/163
        #     if 'poolclass' not in engine_kwargs:
        #         engine_kwargs['poolclass'] = StaticPool

        self.lock = threading.RLock()
        self.local = threading.local()
        self.connections = {}

        if len(parsed_url.query):
            query = parse_qs(parsed_url.query)
            if schema is None:
                schema_qs = query.get("schema", query.get("searchpath", []))
                if len(schema_qs):
                    schema = schema_qs.pop()

        self.schema = schema
        self.engine = create_engine(url, **engine_kwargs)
        self.is_postgres = self.engine.dialect.name == "postgresql"
        self.is_sqlite = self.engine.dialect.name == "sqlite"
        if on_connect_statements is None:
            on_connect_statements = []

        def _run_on_connect(dbapi_con, con_record):
            # reference:
            # https://stackoverflow.com/questions/9671490/how-to-set-sqlite-pragma-statements-with-sqlalchemy
            # https://stackoverflow.com/a/7831210/1890086
            for statement in on_connect_statements:
                dbapi_con.execute(statement)

        if self.is_sqlite and parsed_url.path != "" and sqlite_wal_mode:
            # we only enable WAL mode for sqlite databases that are not in-memory
            on_connect_statements.append("PRAGMA journal_mode=WAL")

        if len(on_connect_statements):
            event.listen(self.engine, "connect", _run_on_connect)

        self.types = Types(is_postgres=self.is_postgres)
        self.url = url
        self.row_type = row_type
        self.ensure_schema = ensure_schema
        self._tables = {}

    @property
    def executable(self):
        """Connection against which statements will be executed."""
        with self.lock:
            tid = threading.get_ident()
            if tid not in self.connections:
                self.connections[tid] = self.engine.connect()
            return self.connections[tid]

    @property
    def op(self):
        """Get an alembic operations context."""
        ctx = MigrationContext.configure(self.executable)
        return Operations(ctx)

    @property
    def inspect(self):
        """Get a SQLAlchemy inspector."""
        return inspect(self.executable)

    def has_table(self, name):
        return self.inspect.has_table(name, schema=self.schema)

    @property
    def metadata(self):
        """Return a SQLAlchemy schema cache object."""
        return MetaData(schema=self.schema, bind=self.executable)

    @property
    def in_transaction(self):
        """Check if this database is in a transactional context."""
        if not hasattr(self.local, "tx"):
            return False
        return len(self.local.tx) > 0

    def _flush_tables(self):
        """Clear the table metadata after transaction rollbacks."""
        for table in self._tables.values():
            table._table = None

[docs]    def begin(self):
        """Enter a transaction explicitly.

        No data will be written until the transaction has been committed.
        """
        if not hasattr(self.local, "tx"):
            self.local.tx = []
        self.local.tx.append(self.executable.begin())

[docs]    def commit(self):
        """Commit the current transaction.

        Make all statements executed since the transaction was begun permanent.
        """
        if hasattr(self.local, "tx") and self.local.tx:
            tx = self.local.tx.pop()
            tx.commit()
            # Removed in 2020-12, I'm a bit worried this means that some DDL
            # operations in transactions won't cause metadata to refresh any
            # more:
            # self._flush_tables()

[docs]    def rollback(self):
        """Roll back the current transaction.

        Discard all statements executed since the transaction was begun.
        """
        if hasattr(self.local, "tx") and self.local.tx:
            tx = self.local.tx.pop()
            tx.rollback()
            self._flush_tables()

    def __enter__(self):
        """Start a transaction."""
        self.begin()
        return self

    def __exit__(self, error_type, error_value, traceback):
        """End a transaction by committing or rolling back."""
        if error_type is None:
            try:
                self.commit()
            except Exception:
                with safe_reraise():
                    self.rollback()
        else:
            self.rollback()

    def close(self):
        """Close database connections. Makes this object unusable."""
        with self.lock:
            for conn in self.connections.values():
                conn.close()
            self.connections.clear()
        self.engine.dispose()
        self._tables = {}
        self.engine = None

    @property
    def tables(self):
        """Get a listing of all tables that exist in the database."""
        return self.inspect.get_table_names(schema=self.schema)

    @property
    def views(self):
        """Get a listing of all views that exist in the database."""
        return self.inspect.get_view_names(schema=self.schema)

    def __contains__(self, table_name):
        """Check if the given table name exists in the database."""
        try:
            table_name = normalize_table_name(table_name)
            if table_name in self.tables:
                return True
            if table_name in self.views:
                return True
            return False
        except ValueError:
            return False

[docs]    def create_table(
        self, table_name, primary_id=None, primary_type=None, primary_increment=None
    ):
        """Create a new table.

        Either loads a table or creates it if it doesn't exist yet. You can
        define the name and type of the primary key field, if a new table is to
        be created. The default is to create an auto-incrementing integer,
        ``id``. You can also set the primary key to be a string or big integer.
        The caller will be responsible for the uniqueness of ``primary_id`` if
        it is defined as a text type. You can disable auto-increment behaviour
        for numeric primary keys by setting `primary_increment` to `False`.

        Returns a :py:class:`Table <dataset.Table>` instance.
        ::

            table = db.create_table('population')

            # custom id and type
            table2 = db.create_table('population2', 'age')
            table3 = db.create_table('population3',
                                     primary_id='city',
                                     primary_type=db.types.text)
            # custom length of String
            table4 = db.create_table('population4',
                                     primary_id='city',
                                     primary_type=db.types.string(25))
            # no primary key
            table5 = db.create_table('population5',
                                     primary_id=False)
        """
        assert not isinstance(
            primary_type, str
        ), "Text-based primary_type support is dropped, use db.types."
        table_name = normalize_table_name(table_name)
        with self.lock:
            if table_name not in self._tables:
                self._tables[table_name] = Table(
                    self,
                    table_name,
                    primary_id=primary_id,
                    primary_type=primary_type,
                    primary_increment=primary_increment,
                    auto_create=True,
                )
            return self._tables.get(table_name)

[docs]    def load_table(self, table_name):
        """Load a table.

        This will fail if the tables does not already exist in the database. If
        the table exists, its columns will be reflected and are available on
        the :py:class:`Table <dataset.Table>` object.

        Returns a :py:class:`Table <dataset.Table>` instance.
        ::

            table = db.load_table('population')
        """
        table_name = normalize_table_name(table_name)
        with self.lock:
            if table_name not in self._tables:
                self._tables[table_name] = Table(self, table_name)
            return self._tables.get(table_name)

[docs]    def get_table(
        self,
        table_name,
        primary_id=None,
        primary_type=None,
        primary_increment=None,
    ):
        """Load or create a table.

        This is now the same as ``create_table``.
        ::

            table = db.get_table('population')
            # you can also use the short-hand syntax:
            table = db['population']
        """
        if not self.ensure_schema:
            return self.load_table(table_name)
        return self.create_table(
            table_name, primary_id, primary_type, primary_increment
        )

    def __getitem__(self, table_name):
        """Get a given table."""
        return self.get_table(table_name)

    def _ipython_key_completions_(self):
        """Completion for table names with IPython."""
        return self.tables

[docs]    def query(self, query, *args, **kwargs):
        """Run a statement on the database directly.

        Allows for the execution of arbitrary read/write queries. A query can
        either be a plain text string, or a `SQLAlchemy expression
        <http://docs.sqlalchemy.org/en/latest/core/tutorial.html#selecting>`_.
        If a plain string is passed in, it will be converted to an expression
        automatically.

        Further positional and keyword arguments will be used for parameter
        binding. To include a positional argument in your query, use question
        marks in the query (i.e. ``SELECT * FROM tbl WHERE a = ?``). For
        keyword arguments, use a bind parameter (i.e. ``SELECT * FROM tbl
        WHERE a = :foo``).
        ::

            statement = 'SELECT user, COUNT(*) c FROM photos GROUP BY user'
            for row in db.query(statement):
                print(row['user'], row['c'])

        The returned iterator will yield each result sequentially.
        """
        if isinstance(query, str):
            query = text(query)
        _step = kwargs.pop("_step", QUERY_STEP)
        if _step is False or _step == 0:
            _step = None
        rp = self.executable.execute(query, *args, **kwargs)
        return ResultIter(rp, row_type=self.row_type, step=_step)

    def __repr__(self):
        """Text representation contains the URL."""
        return "<Database(%s)>" % safe_url(self.url)