I'm using Python to grab a generated report, clean the file up, format it, and then send it to the SQL Server for housing and to push to Tableau after.
The error is in a FirstName column where I have a float object.
'float' object has no attribute 'replace'
The problem line of code is:
df[['FirstName', 'Business_Title']] = df['FirstName'].astype(str).str.rsplit(' ', 1, expand=True)
This is the code I am using:
df[['FirstName', 'Business_Title']] = df['FirstName'].astype(str).str.rsplit(' ', 1, expand=True)
I have previously tried using:
df[['FirstName', 'Business_Title']] = df['FirstName'].str.rsplit(' ', 1, expand=True)
df[['FirstName', 'Business_Title']] = df['FirstName'].apply(lambda x: pd.Series(str(x).rsplit(' ', 1)) if pd.notna(x) and isinstance(x, str) else pd.Series([x, '']))
Thank you for any assistance.
This is the whole code I am working with (with PII covered)(The problem line is block quoted)
import pandas as pd
import pyodbc
def read_and_split_name(file_path, server_name, database_name):
try:
# Read the Excel file into a pandas DataFrame, skip the first 4 rows, and use the 5th row as the header
df = pd.read_excel(file_path, header=4)
# Split the "Name" column into "First Name" and "Last Name" based on the comma
df[['LastName', 'FirstName']] = df['Rendering'].astype(str).str.split(', ', 1, expand=True)
# Further split the "First Name" column by spaces and add a new column "Business_Title"
# Handle non-string values by converting them to strings
df['FirstName'] = df['FirstName'].astype(str)
> df[['FirstName', 'Business_Title']] = df['FirstName'].astype(str).str.rsplit(' ', 1, expand=True)
>
# Use transform('nunique') to get the count of unique values for each group
df['_Unique'] = df.groupby(['Loc Name', 'LastName', 'FirstName'])['Enc Dt'].transform('nunique')
# Use transform('count') to get the total count for each group
df['_ENC'] = df.groupby(['Loc Name', 'LastName'])['Enc Dt'].transform('count')
# Drop rows where last name is 'Nurse' or 'Enabling'
df = df[~df['LastName'].isin(['Nurse', 'Enabling'])]
# Replace spaces and dashes with underscores in the "Loc Name" column
df['Loc Name'] = df['Loc Name'].str.replace(' ', '_').str.replace('-', '_')
# Calculate the new column "ENC_Per_"
df['ENC_Per_'] = df['_ENC'] / df['_Unique']
# Replace non-finite values (NA or inf) with a suitable replacement (e.g., 0)
df['_Unique'] = df['_Unique'].fillna(0).astype(int)
df['_ENC'] = df['_ENC'].fillna(0).astype(int)
df['ENC_Per_'] = df['ENC_Per_'].fillna(0).astype(int)
# Extract the month part of the date in MMM format
df['MMM_Format'] = pd.to_datetime(df['Enc Dt'], errors='coerce', format='%m/%d/%Y').dt.strftime('%b')
# Convert the month abbreviations to uppercase
df['MMM_Format'] = df['MMM_Format'].str.upper()
# Update column headers based on the formatted month
df.rename(columns={
'_ENC': f"{df['MMM_Format'][0]}_ENC",
'ENC_Per_': f"ENC_Per_{df['MMM_Format'][0]}",
'_Unique': f"{df['MMM_Format'][0]}_Unique"
}, inplace=True)
# Export the DataFrame to a Microsoft SQL Server table using pyodbc
conn_str = f"DRIVER={{ODBC Driver 17 for SQL Server}};SERVER={server_name};DATABASE={database_name};Trusted_Connection=yes;"
with pyodbc.connect(conn_str) as conn:
cursor = conn.cursor()
for index, row in df.iterrows():
table_name = row['Loc Name'].replace(' ', '_').replace('-', '_') # Use Loc Name as the table name
update_query = f"""
UPDATE {table_name}
SET FirstName = '{row['FirstName']}',
LastName = '{row['LastName']}',
Business_Title = '{row['Business_Title']}',
{row['MMM_Format']}_Unique = {row[f"{row['MMM_Format']}_Unique"]},
{row['MMM_Format']}_ENC = {row[f"{row['MMM_Format']}_ENC"]}
WHERE FirstName = '{row['FirstName']}' AND LastName = '{row['LastName']}'
"""
cursor.execute(update_query)
print("Update to SQL Server successful.")
except Exception as e:
print(f"An error occurred: {e}")
# Specify the path to your .xls Excel file
file_path = "FILEPATH"
# Specify SQL Server connection details
server_name = "SERVERNAME"
database_name = "DATABASENAME"
# Call the function to read, split names, replace spaces and dashes, calculate new column, convert to integers, update column headers, and update SQL Server
read_and_split_name(file_path, server_name, database_name)
1条答案
按热度按时间6jygbczu1#
Explicitly declaring dtypes on read allow using
.str.rsplit
without casting:Since your error is regarding
replace
however, I don't think these lines are your issue. There are two lines that reference replace without casting:You can include
'Loc Name':str
in the dtype dictionary to check if these are the issue.