我正在尝试运行以下代码:
import time
import pandas as pd
import numpy as np
CITY_DATA = {'chicago': 'chicago.csv',
'new york city': 'new_york_city.csv',
'washington': 'washington.csv'}
def get_filters():
"""
Asks user to specify a city, month, and day to analyze.
Returns:
(str) city - name of the city to analyze
(str) month - name of the month to filter by, or "all" to apply no month filter
(str) day - name of the day of week to filter by, or "all" to apply no day filter
"""
print('Hello! Let\'s explore some US bikeshare data!')
# get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
while True:
city = input('Which city you would like to explore : "chicago" , "new york city" , or "washington" :' )
if city not in ('chicago', 'new york city', 'washington'):
print(" You entered wrong choice , please try again")
continue
else:
break
# get user input for month (all, january, february, ... , june)
while True:
month = input('Enter "all" for all data or chose a month : "january" , "february" , "march", "april" , "may" or "june " :')
if month not in ("all", "january", "february", "march", "april", "may", "june"):
print(" You entered wrong choice , please try again")
continue
else:
break
# get user input for day of week (all, monday, tuesday, ... sunday)
while True:
day = input('Enter "all" for all days or chose a day : "saturday", "sunday", "monday", "tuesday", "wednesday", "thursday", "friday": ')
if day not in ("all","saturday", "sunday", "monday", "tuesday", "wednesday", "thursday", "friday"):
print(" You entered wrong choice , please try again")
continue
else:
break
print('-'*60)
return city, month, day
def load_data(city, month, day):
"""
Loads data for the specified city and filters by month and day if applicable.
Args:
(str) city - name of the city to analyze
(str) month - name of the month to filter by, or "all" to apply no month filter
(str) day - name of the day of week to filter by, or "all" to apply no day filter
Returns:
df - Pandas DataFrame containing city data filtered by month and day
"""
df = pd.read_csv(CITY_DATA[city])
# convert the Start Time column to datetime
df['Start Time'] = pd.to_datetime(df['Start Time'])
# extract month , day of week , and hour from Start Time to new columns
df['month'] = df['Start Time'].dt.month
df['day_of_week'] = df['Start Time'].dt.day_name
df['hour'] = df['Start Time'].dt.hour
# filter by month if applicable
if month != 'all':
# use the index of the month_list to get the corresponding int
months = ['january', 'february', 'march', 'april', 'may', 'june']
month = months.index(month) + 1
# filter by month to create the new dataframe
df = df[df['month'] == month]
# filter by day of week if applicable
if day != 'all':
# filter by day of week to create the new dataframe
df = df[df['day_of_week'] == day.title()]
return df
def time_stats(df):
"""Displays statistics on the most frequent times of travel."""
print('\nCalculating The Most Frequent Times of Travel...\n')
start_time = time.time()
# display the most common month
popular_month = df['month'].mode()[0]
print('\n The most popular month is : \n', popular_month)
# display the most common day of week
popular_day = df['day_of_week'].mode()[0]
print('\n The most popular day of the week is : \n', str(popular_day))
# display the most common start hour
popular_hour = df['hour'].mode()[0]
print('\n The most popular hour of the day is :\n ', popular_hour)
print("\nThis took %s seconds.\n" % (time.time() - start_time))
print('-'*60)
def station_stats(df):
"""Displays statistics on the most popular stations and trip."""
print('\nCalculating The Most Popular Stations and Trip...\n')
start_time = time.time()
# display most commonly used start station
start_station = df['Start Station'].value_counts().idxmax()
print('\n The most commonly used start station is : \n', start_station)
# display most commonly used end station
end_station = df['End Station'].value_counts().idxmax()
print('\nThe most commonly used end station is: \n', end_station)
# display most frequent combination of start station and end station trip
combination = df.groupby(['Start Station','End Station']).value_counts().idxmax()
print('\nThe most frequent combination of start station and end station are: \n', combination)
print("\nThis took %s seconds." % (time.time() - start_time))
print('-'*40)
def trip_duration_stats(df):
"""Displays statistics on the total and average trip duration."""
start_time = time.time()
travel_time = sum(df['Trip Duration'])
print('Total travel time:', travel_time / 86400, " Days")
# display total travel time
total_time = sum(df['Trip Duration'])
print('\nThe total travel time is {} seconds: \n', total_time)
# display mean travel time
mean_time = df['Trip Duration'].mean()
print('\n The average travel time is \n', mean_time)
print("\nThis took %s seconds." % (time.time() - start_time))
print('-'*40)
def user_stats(df):
"""Displays statistics on bikeshare users."""
print('\nCalculating User Stats...\n')
start_time = time.time()
# TO DO: Display counts of user types
user_types = df['User Type'].value_counts()
#print(user_types)
print('User Types:\n', user_types)
# TO DO: Display counts of gender
print("\nThis took %s seconds." % (time.time() - start_time))
print('-'*40)
def main():
while True:
city, month, day = get_filters()
df = load_data(city, month, day)
time_stats(df)
station_stats(df)
trip_duration_stats(df)
user_stats(df)
restart = input('\nWould you like to restart? Enter yes or no.\n')
if restart.lower() != 'yes':
break
if __name__ == "__main__":
main()
我收到以下错误,有人能帮助我解决这些错误吗?
> Traceback (most recent call last):
File "C:\Users\DELL\PycharmProjects\Professional\venv\Lib\site-packages\pandas\core\indexes\range.py", line 391, in get_loc
return self._range.index(new_key)
^^^^^^^^^^^^^^^^^^^^^^^^^^
ValueError: 0 is not in range
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:\Users\DELL\PycharmProjects\Professional\Bikeshare.py", line 203, in <module>
main()
File "C:\Users\DELL\PycharmProjects\Professional\Bikeshare.py", line 192, in main
time_stats(df)
File "C:\Users\DELL\PycharmProjects\Professional\Bikeshare.py", line 100, in time_stats
popular_month = df['month'].mode()[0]
~~~~~~~~~~~~~~~~~~^^^
File "C:\Users\DELL\PycharmProjects\Professional\venv\Lib\site-packages\pandas\core\series.py", line 981, in __getitem__
Calculating The Most Frequent Times of Travel...
return self._get_value(key)
^^^^^^^^^^^^^^^^^^^^
File "C:\Users\DELL\PycharmProjects\Professional\venv\Lib\site-packages\pandas\core\series.py", line 1089, in _get_value
loc = self.index.get_loc(label)
^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\DELL\PycharmProjects\Professional\venv\Lib\site-packages\pandas\core\indexes\range.py", line 393, in get_loc
raise KeyError(key) from err
KeyError: 0
我希望过滤PandasDataFrame返回月,星期几,小时执行一些统计。
2条答案
按热度按时间watbbzwu1#
KeyError
意味着键是无效的,因为它不存在。在这种情况下,当试图获取第一个模式时获取KeyError
的一个原因是,当 Dataframe 中的列'month'
为空时,因此mode()返回一个空集合,所以当试图获取它的第一个元素时,您会得到KeyError: 0
。要避免这种情况,您可以替换:
其中:
因为如果'month'列上没有数据,就没有必要尝试获取它的模式。
有关处理异常的详细信息:www.example.comhttps://docs.python.org/3/tutorial/errors.html#handling-exceptions
au9on6nz2#
另外,当我尝试(不使用过滤器)在第二个和第三个输入中选择“all“时,我得到了以下结果:
最受欢迎的月份是:6
一周中最受欢迎的一天是:
〈绑定方法PandasDelegate._add_delegate_accessors.._create_delegator_method.. f,属于〈位于0x 0000022 B7 CD 5E 890的Pandas.核心.索引.访问器.日期时间属性对象〉〉
一天中最受欢迎的时段是:17
这需要0.0260775089263916秒。
正在计算最受欢迎的站点和行程...
最常用的起点桩号为:
斯特里特大道和格兰德大道
最常用的终端站是:
斯特里特大道和格兰德大道
起点桩号和终点桩号最常见的组合为:
("彼得森大街西2112号“、”彼得森大街西2112号“、1064651、时间戳(”2017 -06-02 07:59:13“)、”2017-06-02 08:25:42“、1589、”订阅者“、”女性“、1963.0、6、绑定方法PandasDelegate._add_delegate_accessors.._create_delegator_method..位于0x 000022 B7 CD 5E 890〉〉的〈Pandas.核心.索引.访问器.日期时间属性对象的f,7)
这需要2.1254045963287354秒。
总行程时间:3250.830868055555天
总行程时间为{}秒:280871787
平均行程时间为936.23929
这需要0.06502270698547363秒。
正在计算用户统计信息...
用户类型:用户238889客户61110家属1姓名:用户类型,数据类型:整数64
这需要0.022009611129760742秒。
是否要重新启动?请输入yes或no。