python 通过文本文件创建词典

xriantvc  于 2023-04-04  发布在  Python
关注(0)|答案(6)|浏览(208)

在课堂实验中,我们要阅读一个文本文件,其中一行一行地包含世界杯冠军的国家名、年份、队长和教练,我们要用这些国家名作为关键词来编一本字典。
我本来想在字典里做一个字典,以国家名为键,然后在那个键下有一个键/值对,分别是胜利,年份,队长,教练。

def main():
    inFile = open('world_cup_champions.txt', 'r')
    champions = createDict(inFile)
    printChamps(champions)
    

def createDict(file):
    dict = {}
    try:
        for line in file:
            line = (line.strip()).split(",")
            dict[line[1]] = {}
            dict['Wins'] = 0
            for word in line:
                if (word == dict[line[1]]):
                    dict['Wins'] += 1
                    dict['Year(s)'] = line[0]
                    dict['Captain'] = line[2]
                    dict['Coach'] = line[3]
        return (dict)
    except Exception as e:
        print(e)    

def printChamps(unsorted_dict):
    print(f"{'Country':10} {'Wins':5} {'Year':32} {'Captain':72} {'Coach':30}\n")
    sorted_dict = dict(sorted(unsorted_dict.items()))  
    for key in sorted_dict:
        print ("%-10s | %2i | %-30s | %-70s | %-50s" % (key, sorted_dict[key]['Wins'], sorted_dict[key]['Year(s)'], sorted_dict[key]['Captain'], sorted_dict[key]['Coach']))

if __name__ == '__main__':
    main()

这就是我现在所拥有的,但我在createDict中制作字典时感到困惑。下面是文本文件的外观:

Year,Country,Coach,Captain
1930,Uruguay,Alberto Suppici,Jose Nasazzi
1934,Italy,Vittorio Pozzo,Gianpiero Combi
1938,Italy,Vittorio Pozzo,Giuseppe Meazza
1950,Uruguay,Juan Lopez,Obdulio Varela
1954,Germany,Sepp Herberger,Fritz Walter
1958,Brazil,Vicente Feola,Hilderaldo Bellini
1962,Brazil,Aymore Moreira,Mauro Ramos
1966,England,Alf Ramsey,Bobby Moore
1970,Brazil,Mario Zagallo,Carlos Alberto
1974,Germany,Helmut Schon,Franz Beckenbauer
1978,Argentina,Cesar Luis Menotti,Daniel Passarella
1982,Italy,Enzo Bearzot,Dino Zoff
1986,Argentina,Carlos Bilardo,Diego Maradona
1990,Germany,Franz Beckenbauer,Lothar Matth�us
1994,Brazil,Carlos Alberto Parreira,Dunga
1998,France,Aime Jacquet,Didier Deschamps
2002,Brazil,Luiz Felipe Scolari,Cafu
2006,Italy,Marcello Lippi,Fabio Cannavaro
2010,Spain,Vicente del Bosque,Iker Casillas
2014,Germany,Joachim Low,Philipp Lahm

我想打印一个格式,列出国家的名字,他们有多少胜利,与他们赢得了几年。
以下是预期输出:

xkftehaa

xkftehaa1#

也许调用pandas库和groupy方法会有帮助。

sulc1iza

sulc1iza2#

使用csv.DictReader。通过给出正确的数据 * 分隔符 *,你会得到一个字典的迭代器...如果第一行给出了列的名称,它们将是键。

# ########################################
# this is used just to mimic a file object
from io import StringIO

data = """Year,Country,Coach,Captain
1930,Uruguay,Alberto Suppici,Jose Nasazzi
1934,Italy,Vittorio Pozzo,Gianpiero Combi
1938,Italy,Vittorio Pozzo,Giuseppe Meazza
1950,Uruguay,Juan Lopez,Obdulio Varela
1954,Germany,Sepp Herberger,Fritz Walter
1958,Brazil,Vicente Feola,Hilderaldo Bellini
1962,Brazil,Aymore Moreira,Mauro Ramos
1966,England,Alf Ramsey,Bobby Moore
1970,Brazil,Mario Zagallo,Carlos Alberto
1974,Germany,Helmut Schon,Franz Beckenbauer
1978,Argentina,Cesar Luis Menotti,Daniel Passarella
1982,Italy,Enzo Bearzot,Dino Zoff
1986,Argentina,Carlos Bilardo,Diego Maradona
1990,Germany,Franz Beckenbauer,Lothar Matth�us
1994,Brazil,Carlos Alberto Parreira,Dunga
1998,France,Aime Jacquet,Didier Deschamps
2002,Brazil,Luiz Felipe Scolari,Cafu
2006,Italy,Marcello Lippi,Fabio Cannavaro
2010,Spain,Vicente del Bosque,Iker Casillas
2014,Germany,Joachim Low,Philipp Lahm"""

f = StringIO(data)
# ################

import csv
from collections import defaultdict

# main part
dd = defaultdict(dict)
r = csv.DictReader(f, delimiter=',')
for line in r:
    if line['Country'] in dd:
        dd[line['Country']]['Wins'] += 1
    else:
        dd[line['Country']]['Wins'] = 1

    dd[line['Country']].setdefault("Years", []).append(line['Year'])

# alphabetic order 
dd_ordered = {c: dd[c] for c in sorted(dd)}

# check output
print(*dd_ordered.items(), sep='\n')
n1bvdmb6

n1bvdmb63#

如果你不想自己格式化表格,最好使用许多可用的python库之一。另外,最好的方法可能是使用pandas库,它允许你轻松地对数据进行排序并按国家分组。
如果您不想使用任何库,下面的解决方案应该可以帮助您。它创建一个字典来保存国家/地区的详细信息,然后使用循环在最后格式化表格。您可能需要更新表格的格式。

from collections import defaultdict
def main():
    inFile = [
        "Year,Country,Coach,Captain",
        "1930,Uruguay,Alberto Suppici,Jose Nasazzi",
        "1934,Italy,Vittorio Pozzo,Gianpiero Combi",
        "1938,Italy,Vittorio Pozzo,Giuseppe Meazza",
        "1950,Uruguay,Juan Lopez,Obdulio Varela",
        "1954,Germany,Sepp Herberger,Fritz Walter",
        "1958,Brazil,Vicente Feola,Hilderaldo Bellini",
        "1962,Brazil,Aymore Moreira,Mauro Ramos",
        "1966,England,Alf Ramsey,Bobby Moore",
        "1970,Brazil,Mario Zagallo,Carlos Alberto",
        "1974,Germany,Helmut Schon,Franz Beckenbauer",
        "1978,Argentina,Cesar Luis Menotti,Daniel Passarella",
        "1982,Italy,Enzo Bearzot,Dino Zoff",
        "1986,Argentina,Carlos Bilardo,Diego Maradona",
        "1990,Germany,Franz Beckenbauer,Lothar Matthus",
        "1994,Brazil,Carlos Alberto Parreira,Dunga",
        "1998,France,Aime Jacquet,Didier Deschamps",
        "2002,Brazil,Luiz Felipe Scolari,Cafu",
        "2006,Italy,Marcello Lippi,Fabio Cannavaro",
        "2010,Spain,Vicente del Bosque,Iker Casillas",
        "2014,Germany,Joachim Low,Philipp Lahm"
    ]
    champions = createDict(inFile)
    printChamps(champions)
    

def createDict(file):
    winners = defaultdict(str)
    try:
        for line in file:
            year, country, coach, captain = (line.strip()).split(",")
            if(year == "Year"): ## ignoring the first line
                continue
            if(winners.get(country) is None): 
                winners[country] = {
                    'wins': 1,
                    'years': [year],
                    'captains': [captain],
                    'coaches': [coach]
                }
            else: 
                winners[country]['wins'] += 1
                winners[country]['years'].append(year)
                winners[country]['coaches'].append(coach)
                winners[country]['captains'].append(captain)
        return (winners)
    except Exception as e:
        print(e)    

def printChamps(unsorted_dict):
    print("-------------------------------------------------------------------------|")
    print(f"{'Country':10} | {'Wins':5} | {'Year':5} | {'Captain':20} | {'Coach':20} |")
    print("-------------------------------------------------------------------------|")
    sorted_dict = dict(sorted(unsorted_dict.items(), key=lambda x: x[1]['wins'], reverse=True))
    for key in sorted_dict:
        print ("%-10s | %5i | %-5s | %-20s | %-20s |" % (key, sorted_dict[key]['wins'], ", ".join(sorted_dict[key]['years']), ", ".join(sorted_dict[key]['captains']), ", ".join(sorted_dict[key]['coaches'])))
        print("-------------------------------------------------------------------------|")
    print("-------------------------------------------------------------------------|")
if __name__ == '__main__':
    main()

输出

---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
Country    | Wins  | Year                         | Captain                                                       | Coach                                                                                      |
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
Brazil     |     5 | 1958, 1962, 1970, 1994, 2002 | Hilderaldo Bellini, Mauro Ramos, Carlos Alberto, Dunga, Cafu  | Vicente Feola, Aymore Moreira, Mario Zagallo, Carlos Alberto Parreira, Luiz Felipe Scolari |
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
Italy      |     4 | 1934, 1938, 1982, 2006       | Gianpiero Combi, Giuseppe Meazza, Dino Zoff, Fabio Cannavaro  | Vittorio Pozzo, Vittorio Pozzo, Enzo Bearzot, Marcello Lippi                               |
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
Germany    |     4 | 1954, 1974, 1990, 2014       | Fritz Walter, Franz Beckenbauer, Lothar Matthus, Philipp Lahm | Sepp Herberger, Helmut Schon, Franz Beckenbauer, Joachim Low                               |
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
Uruguay    |     2 | 1930, 1950                   | Jose Nasazzi, Obdulio Varela                                  | Alberto Suppici, Juan Lopez                                                                |
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
Argentina  |     2 | 1978, 1986                   | Daniel Passarella, Diego Maradona                             | Cesar Luis Menotti, Carlos Bilardo                                                         |
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
England    |     1 | 1966                         | Bobby Moore                                                   | Alf Ramsey                                                                                 |
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
France     |     1 | 1998                         | Didier Deschamps                                              | Aime Jacquet                                                                               |
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
Spain      |     1 | 2010                         | Iker Casillas                                                 | Vicente del Bosque                                                                         |
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
irtuqstp

irtuqstp4#

这个解决方案利用了pandas库的dataframe功能。

import pandas as pd

print(f'Country   Wins Years')
data = pd.read_csv('world_cup_champions.txt') # Create a pandas dataframe fron the data
bycountry = data.groupby('Country')  # Group the dataframe by country
for country, df in bycountry:  # For each (country) group
    years = [str(x[1]['Year']) for x in df.iterrows()] # Extract the years into a list
    print(f'{country:<10} {len(years)}  ', ', '.join(years))  # Print results

可以简化为:

import pandas

print(f'Country   Wins Years')
for country, df in pandas.read_csv('world_cup_champions.txt').groupby('Country'):
    years = [str(x[1]['Year']) for x in df.iterrows()]
    print(f'{country:<10} {len(years)}  ', ', '.join(years))

输出:

Country   Wins Years
Argentina  2   1978, 1986
Brazil     5   1958, 1962, 1970, 1994, 2002
England    1   1966
France     1   1998
Germany    4   1954, 1974, 1990, 2014
Italy      4   1934, 1938, 1982, 2006
Spain      1   2010
Uruguay    2   1930, 1950
uqzxnwby

uqzxnwby5#

建议一:使用pandas

如果允许使用pandas,则可以

# import pandas as pd
def main():
    inp_filepath = 'world_cup_champions.txt'

    df = pd.read_csv(inp_filepath) ## read from file 
    champions_df = df.groupby('Country').agg(
        Wins=('Year',len), Years=('Year',list), 
        Captains=('Captain', list), Coaches=('Coach', list) )

    champions = champions_df.to_dict('index')

    print(champions_df[['Wins']].assign(
        Years=champions_df['Years'].apply(lambda y: ', '.join(f'{i}' for i in y)) 
    ).sort_values('Wins', ascending=False).to_markdown(tablefmt='rst'))

[使用.sort_index()而不是 .sort_values('Wins', ascending=False) 按国家的字母顺序而不是大多数胜利排序行。]

champions看起来像

{
  'Argentina': {'Wins': 2, 'Years': [1978, 1986], 'Captains': ['Daniel Passarella', 'Diego Maradona'], 'Coaches': ['Cesar Luis Menotti', 'Carlos Bilardo']},
  'Brazil': {'Wins': 5, 'Years': [1958, 1962, 1970, 1994, 2002], 'Captains': ['Hilderaldo Bellini', 'Mauro Ramos', 'Carlos Alberto', 'Dunga', 'Cafu'], 'Coaches': ['Vicente Feola', 'Aymore Moreira', 'Mario Zagallo', 'Carlos Alberto Parreira', 'Luiz Felipe Scolari']},
  'England': {'Wins': 1, 'Years': [1966], 'Captains': ['Bobby Moore'], 'Coaches': ['Alf Ramsey']},
  'France': {'Wins': 1, 'Years': [1998], 'Captains': ['Didier Deschamps'], 'Coaches': ['Aime Jacquet']},
  'Germany': {'Wins': 4, 'Years': [1954, 1974, 1990, 2014], 'Captains': ['Fritz Walter', 'Franz Beckenbauer', 'Lothar Matth�us', 'Philipp Lahm'], 'Coaches': ['Sepp Herberger', 'Helmut Schon', 'Franz Beckenbauer', 'Joachim Low']},
  'Italy': {'Wins': 4, 'Years': [1934, 1938, 1982, 2006], 'Captains': ['Gianpiero Combi', 'Giuseppe Meazza', 'Dino Zoff', 'Fabio Cannavaro'], 'Coaches': ['Vittorio Pozzo', 'Vittorio Pozzo', 'Enzo Bearzot', 'Marcello Lippi']},
  'Spain': {'Wins': 1, 'Years': [2010], 'Captains': ['Iker Casillas'], 'Coaches': ['Vicente del Bosque']},
  'Uruguay': {'Wins': 2, 'Years': [1930, 1950], 'Captains': ['Jose Nasazzi', 'Obdulio Varela'], 'Coaches': ['Alberto Suppici', 'Juan Lopez']}
}

打印输出将是

=========  ======  ============================
Country      Wins  Years
=========  ======  ============================
Brazil          5  1958, 1962, 1970, 1994, 2002
Germany         4  1954, 1974, 1990, 2014
Italy           4  1934, 1938, 1982, 2006
Argentina       2  1978, 1986
Uruguay         2  1930, 1950
England         1  1966
France          1  1998
Spain           1  2010
=========  ======  ============================

建议二:无pandas

一个一个三个一个一个一个一个一个四个一个一个一个一个一个五个一个
champions看起来与pandas解决方案相同,但打印输出看起来更接近您的问题中描述的the desired output

Country     Wins  Years                         
=======     ====  =====                         
Argentina   2     1978, 1986                    
Brazil      5     1958, 1962, 1970, 1994, 2002  
England     1     1966                          
France      1     1998                          
Germany     4     1954, 1974, 1990, 2014        
Italy       4     1934, 1938, 1982, 2006        
Spain       1     2010                          
Uruguay     2     1930, 1950

如果你

  • 或者将printChamps中的默认lenRef更改为{'Years':30,'Captains':70,'Coaches':50}(当前已注解掉),
  • 或者从main命名为printChamps,比如 printChamps(champions, Years=30,Captains=70,Coaches=50)

那么打印出来的结果就像

Country     Wins  Years                           Captains                                                                Coaches                                           
=======     ====  =====                           ========                                                                =======                                           
Argentina   2     1978, 1986                      Daniel Passarella, Diego Maradona                                       Cesar Luis Menotti, Carlos Bilardo                
Brazil      5     1958, 1962, 1970, 1994, 2002    Hilderaldo Bellini, Mauro Ramos, Carlos Alberto, Dunga, Cafu            Vicente Feola, Aymore Moreira, Mario Zagallo, Carlos Alberto Parreira, Luiz Felipe Scolari
England     1     1966                            Bobby Moore                                                             Alf Ramsey                                        
France      1     1998                            Didier Deschamps                                                        Aime Jacquet                                      
Germany     4     1954, 1974, 1990, 2014          Fritz Walter, Franz Beckenbauer, Lothar Matth�us, Philipp Lahm          Sepp Herberger, Helmut Schon, Franz Beckenbauer, Joachim Low
Italy       4     1934, 1938, 1982, 2006          Gianpiero Combi, Giuseppe Meazza, Dino Zoff, Fabio Cannavaro            Vittorio Pozzo, Vittorio Pozzo, Enzo Bearzot, Marcello Lippi
Spain       1     2010                            Iker Casillas                                                           Vicente del Bosque                                
Uruguay     2     1930, 1950                      Jose Nasazzi, Obdulio Varela                                            Alberto Suppici, Juan Lopez
uyhoqukh

uyhoqukh6#

这可能是一个可能的解决方案。

import csv
import io
from collections import defaultdict

file = """Year,Country,Coach,Captain
1930,Uruguay,Alberto Suppici,Jose Nasazzi
1934,Italy,Vittorio Pozzo,Gianpiero Combi
1938,Italy,Vittorio Pozzo,Giuseppe Meazza
1950,Uruguay,Juan Lopez,Obdulio Varela
1954,Germany,Sepp Herberger,Fritz Walter
1958,Brazil,Vicente Feola,Hilderaldo Bellini
1962,Brazil,Aymore Moreira,Mauro Ramos
1966,England,Alf Ramsey,Bobby Moore
1970,Brazil,Mario Zagallo,Carlos Alberto
1974,Germany,Helmut Schon,Franz Beckenbauer
1978,Argentina,Cesar Luis Menotti,Daniel Passarella
1982,Italy,Enzo Bearzot,Dino Zoff
1986,Argentina,Carlos Bilardo,Diego Maradona
1990,Germany,Franz Beckenbauer,Lothar Matth�us
1994,Brazil,Carlos Alberto Parreira,Dunga
1998,France,Aime Jacquet,Didier Deschamps
2002,Brazil,Luiz Felipe Scolari,Cafu
2006,Italy,Marcello Lippi,Fabio Cannavaro
2010,Spain,Vicente del Bosque,Iker Casillas
2014,Germany,Joachim Low,Philipp Lahm"""

d = defaultdict(list)
max_len = len('Country')

with io.StringIO(file) as f: # with open('world_cup_champions.txt', 'r') as f:
    reader = csv.DictReader(f)
    for row in reader:
        d[row['Country']].append(row['Year'])
        if len(row['Country']) > max_len:
            max_len = len(row['Country'])

header = ['Country', 'Wins', 'Year(s)']

fmt_str = f'{{0:{max_len}}}  {{1:>4}} {{2}}'

print(fmt_str.format(*header))

for country, yrs in sorted(d.items()):
    print(fmt_str.format(country, len(yrs), ','.join(yrs)))

图纸:

Country    Wins Year(s)
Argentina     2 1978,1986
Brazil        5 1958,1962,1970,1994,2002
England       1 1966
France        1 1998
Germany       4 1954,1974,1990,2014
Italy         4 1934,1938,1982,2006
Spain         1 2010
Uruguay       2 1930,1950

相关问题