pythonDataframe从api到clickhouse系列对象是可变的,因此它们不能被散列

wfveoks0  于 2021-06-15  发布在  ClickHouse
关注(0)|答案(0)|浏览(273)
from clickhouse_driver import Client
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import pprint
import pandas as pd
from infi.clickhouse_orm.engines import Memory
from infi.clickhouse_orm.fields import UInt16Field, StringField, Float32Field
from infi.clickhouse_orm.models import Model
from sqlalchemy import create_engine
import pandahouse as ph

scope = ["https://spreadsheets.google.com/feeds","https://www.googleapis.com/auth/spreadsheets.readonly", "https://www.googleapis.com/auth/drive"]
creds = ServiceAccountCredentials.from_json_keyfile_name('client_secret.json',scope)
client = gspread.authorize(creds)

sheet = client.open('Data base').worksheet("Sum Data")

# create table

data = sheet.get_all_values()
headers = data.pop(0)

pdf = pd.DataFrame(data, columns=headers)

print(pdf.head())

client = Client(database='default',
                  host='localhost',
                  user='default',
                  password='xxxx')

# define the ClickHouse table schema

class test(Model):
    id = UInt16Field()
    link = StringField()
    status = StringField()
    pomo_resource = StringField()
    partner_name = StringField()
    date_added = UInt16Field()
    responsibleid = StringField()
    country = StringField()
    language = StringField()
    integration = StringField()
    price = Float32Field()
    placement_type = StringField()
    placement_link = StringField()
    tracking_link = StringField()
    price_in_usd = Float32Field()
    placement_date = UInt16Field()
    end_date = UInt16Field()
    rfa = StringField()
    rfp = StringField()
    approval = StringField()
    approval_comment = StringField()
    payment_status = StringField()
    payment_date = UInt16Field()
    link = StringField()

connection = create_engine("clickhouse://default:xxxx@localhost/default")

# df processing blablabla...

# df processing blablabla...

ph.to_clickhouse(pdf, 'test', index=False, chunksize=100000, connection=connection)

我正试图通过电子表格中的api在clickhouse中输出Dataframe,但出现以下错误:

[5 rows x 26 columns]
Traceback (most recent call last):
  File "spreadsheets.py", line 74, in <module>
    ph.to_clickhouse(pdf, 'test', index=False, chunksize=100000, connection=connection)
  File "/home/ubuntu/.local/lib/python2.7/site-packages/pandahouse/core.py", line 62, in to_clickhouse
    query, df = insertion(df, table, index=index)
  File "/home/ubuntu/.local/lib/python2.7/site-packages/pandahouse/core.py", line 23, in insertion
    _, df = normalize(df, index=index)
  File "/home/ubuntu/.local/lib/python2.7/site-packages/pandahouse/convert.py", line 45, in normalize
    dtypes = valmap(PD2CH.get, OrderedDict(df.dtypes))
  File "/home/ubuntu/.local/lib/python2.7/site-packages/toolz/dicttoolz.py", line 83, in valmap
    rv.update(zip(iterkeys(d), map(func, itervalues(d))))
  File "/home/ubuntu/.local/lib/python2.7/site-packages/pandas/core/generic.py", line 1816, in __hash__
    ' hashed'.format(self.__class__.__name__))
TypeError: 'Series' objects are mutable, thus they cannot be hashed

暂无答案!

目前还没有任何答案,快来回答吧!

相关问题