from clickhouse_driver import Client
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import pprint
import pandas as pd
from infi.clickhouse_orm.engines import Memory
from infi.clickhouse_orm.fields import UInt16Field, StringField, Float32Field
from infi.clickhouse_orm.models import Model
from sqlalchemy import create_engine
import pandahouse as ph
scope = ["https://spreadsheets.google.com/feeds","https://www.googleapis.com/auth/spreadsheets.readonly", "https://www.googleapis.com/auth/drive"]
creds = ServiceAccountCredentials.from_json_keyfile_name('client_secret.json',scope)
client = gspread.authorize(creds)
sheet = client.open('Data base').worksheet("Sum Data")
# create table
data = sheet.get_all_values()
headers = data.pop(0)
pdf = pd.DataFrame(data, columns=headers)
print(pdf.head())
client = Client(database='default',
host='localhost',
user='default',
password='xxxx')
# define the ClickHouse table schema
class test(Model):
id = UInt16Field()
link = StringField()
status = StringField()
pomo_resource = StringField()
partner_name = StringField()
date_added = UInt16Field()
responsibleid = StringField()
country = StringField()
language = StringField()
integration = StringField()
price = Float32Field()
placement_type = StringField()
placement_link = StringField()
tracking_link = StringField()
price_in_usd = Float32Field()
placement_date = UInt16Field()
end_date = UInt16Field()
rfa = StringField()
rfp = StringField()
approval = StringField()
approval_comment = StringField()
payment_status = StringField()
payment_date = UInt16Field()
link = StringField()
connection = create_engine("clickhouse://default:xxxx@localhost/default")
# df processing blablabla...
# df processing blablabla...
ph.to_clickhouse(pdf, 'test', index=False, chunksize=100000, connection=connection)
我正试图通过电子表格中的api在clickhouse中输出Dataframe,但出现以下错误:
[5 rows x 26 columns]
Traceback (most recent call last):
File "spreadsheets.py", line 74, in <module>
ph.to_clickhouse(pdf, 'test', index=False, chunksize=100000, connection=connection)
File "/home/ubuntu/.local/lib/python2.7/site-packages/pandahouse/core.py", line 62, in to_clickhouse
query, df = insertion(df, table, index=index)
File "/home/ubuntu/.local/lib/python2.7/site-packages/pandahouse/core.py", line 23, in insertion
_, df = normalize(df, index=index)
File "/home/ubuntu/.local/lib/python2.7/site-packages/pandahouse/convert.py", line 45, in normalize
dtypes = valmap(PD2CH.get, OrderedDict(df.dtypes))
File "/home/ubuntu/.local/lib/python2.7/site-packages/toolz/dicttoolz.py", line 83, in valmap
rv.update(zip(iterkeys(d), map(func, itervalues(d))))
File "/home/ubuntu/.local/lib/python2.7/site-packages/pandas/core/generic.py", line 1816, in __hash__
' hashed'.format(self.__class__.__name__))
TypeError: 'Series' objects are mutable, thus they cannot be hashed
暂无答案!
目前还没有任何答案,快来回答吧!