How can I connect to SQL Server in Python using Polars or ConnectorX without receiving a runtime error?

y3bcpkx1  于 2023-06-04  发布在  SQL Server
关注(0)|答案(1)|浏览(151)

I am trying to connect to SQL Server using either polars or connectorx and I always get a
RuntimeError: Timed out in bb8

My code is along these lines

import pandas as pd
import polars as pl
import connectorx as cx

user='my_user'
password='my_password'
server='my_server'
database='my_database'

conn =f"mssql+pyodbc://{user}:{password}@{server}/{database}"

query = "SELECT * FROM [dbo].[my_table]"

When using trying with cx:

df = cx.read_sql(conn,query)

I get the error:

RuntimeError Traceback (most recent call last) Input In [4], in () ----> 1 df = cx.read_sql(conn,query)

File c:\Program Files\Python39\lib\site-packages\connectorx_init_.py:224, in read_sql(conn, query, return_type, protocol, partition_on, partition_range, partition_num, index_col) 221 except ModuleNotFoundError: 222 raise ValueError("You need to install pandas first") --> 224 result = _read_sql( 225 conn, 226 "pandas", 227 queries=queries, 228 protocol=protocol, 229 partition_query=partition_query, 230 ) 231 df = reconstruct_pandas(result) 233 if index_col is not None:

RuntimeError: Timed out in bb8

If instead I use

df = pl.read_sql(query,conn)

I get

RuntimeError                              Traceback (most recent call last)
Input In [5], in ()
----> 1 df = pl.read_sql(query,conn)

File c:\Program Files\Python39\lib\site-packages\polars\io\database.py:196, in read_sql(query, connection_uri, partition_on, partition_range, partition_num, protocol, engine)
    121 """
    122 Read a SQL query into a DataFrame.
    123 
   (...)
    188 
    189 """
    190 warnings.warn(
    191     "`read_sql` has been renamed; this"
    192     " redirect is temporary, please use `read_database` instead",
    193     category=DeprecationWarning,
    194     stacklevel=find_stacklevel(),
    195 )
--> 196 return read_database(
    197     query=query,
    198     connection_uri=connection_uri,
    199     partition_on=partition_on,
    200     partition_range=partition_range,
    201     partition_num=partition_num,
    202     protocol=protocol,
    203     engine=engine,
    204 )

File c:\Program Files\Python39\lib\site-packages\polars\io\database.py:95, in read_database(query, connection_uri, partition_on, partition_range, partition_num, protocol, engine)
     24 """
     25 Read a SQL query into a DataFrame.
     26 
   (...)
     92 
     93 """
     94 if engine == "connectorx":
---> 95     return _read_sql_connectorx(
     96         query,
     97         connection_uri,
     98         partition_on=partition_on,
     99         partition_range=partition_range,
    100         partition_num=partition_num,
    101         protocol=protocol,
    102     )
    103 elif engine == "adbc":
    104     if not isinstance(query, str):

File c:\Program Files\Python39\lib\site-packages\polars\io\database.py:222, in _read_sql_connectorx(query, connection_uri, partition_on, partition_range, partition_num, protocol)
    217 except ImportError:
    218     raise ImportError(
    219         "connectorx is not installed. Please run `pip install connectorx>=0.3.1`."
    220     ) from None
--> 222 tbl = cx.read_sql(
    223     conn=connection_uri,
    224     query=query,
    225     return_type="arrow2",
    226     partition_on=partition_on,
    227     partition_range=partition_range,
    228     partition_num=partition_num,
    229     protocol=protocol,
    230 )
    232 return from_arrow(tbl)

File c:\Program Files\Python39\lib\site-packages\connectorx\__init__.py:257, in read_sql(conn, query, return_type, protocol, partition_on, partition_range, partition_num, index_col)
    254 except ModuleNotFoundError:
    255     raise ValueError("You need to install pyarrow first")
--> 257 result = _read_sql(
    258     conn,
    259     "arrow" if return_type in {"arrow", "polars"} else "arrow2",
    260     queries=queries,
    261     protocol=protocol,
    262     partition_query=partition_query,
    263 )
    264 df = reconstruct_arrow(result)
    265 if return_type in {"polars", "polars2"}:

RuntimeError: Timed out in bb8

I'd like to get the dataframe directly into polars without having to use pl.from_pandas() method.

Any advice?

dy2hfwbg

dy2hfwbg1#

The thing is, unlike pandas/sqlalchemy, I need to give the connection string directly, not an engine;

import polars as pl

user='my_user'
password='my_password'
server='my_server'
database='my_database'

conn_str =f"mssql://{user}:{password}@{server}/{database}?encrypt=true"

query = "SELECT * FROM [dbo].[my_table]"
df = pl.read_database(query=sql_text, connection_uri=conn_str)

If I want to combine sqlalchemy to polars in order to bind variables to my query, then I also need the query to be a string. I added the workaround below (compiled_query), unless someone know a better way with connectorx

import polars as pl
import sqlalchemy
conn_str = f"mssql://{user}:{password}@{server}/{database}?encrypt=true"
sql_text = sqlalchemy.text(
""" 
SELECT CONVERT(DATE, Date) AS [Date]
, Exchange_Rate / 100 AS [Exchange_Rate] FROM [dbo].[my_table] WHERE Date >= :Date 
"""
).bindparams(sqlalchemy.bindparam("Date", value="2022-05-31"))
compiled_query = str(sql_text.compile(compile_kwargs={"literal_binds": True})) 
df = pl.read_database(compiled_query, conn_str)

相关问题