Python:如何处理插入PostgreSQL表时的间接引用?

6tr1vspr  于 2023-05-17  发布在  PostgreSQL
关注(0)|答案(1)|浏览(120)

我有两个.csv文件名为“Country.csv”,看起来像这样:

Country Code,Country Name,Country-ID
US,United States,0
DE,Germany,1
AU,Australia,2
CZ,Czechia,3
CA,Canada,4
AR,Argentina,5
BR,Brazil,6
PT,Portugal,7
GB,United Kingdom,8
IT,Italy,9
GG,Guernsey,10
RO,Romania,11

和“users.csv”,看起来像这样:

User-ID,Age,username,Country-ID
1,,madMeerkat6#yHazv,0
2,18.0,innocentUnicorn8#eCMNj,1
3,,jubilantStork8#YgoL-,0
4,17.0,hushedOatmeal4#y5QVW,0
5,,thrilledRhino7#3PYN3,0
6,61.0,insecureCaviar4#xosWW,0
7,,artisticGarlic3#Sla7S,2
8,,dearMandrill9#c1J0m,1
9,,cynicalDinosaur3#0wSxC,0
10,26.0,gloomyCake2#eRcdC,0
11,14.0,sincereCockatoo6#eDuI_,0

我必须使用(精确地)命令生成以下PostgreSQL表:

CREATE TABLE Country (
    ISO_3166 CHAR(2) PRIMARY KEY,
    CountryName VARCHAR(256),
    CID varchar(16)
);
CREATE TABLE Users (
    UID INT PRIMARY KEY,
    Username VARCHAR(256),
    DoB DATE,
    Age INT,
    ISO_3166 CHAR(2) REFERENCES Country (ISO_3166)
);

现在我想把csv文件中的值插入到表中。我的尝试是以下Python脚本:

import csv
import sys
import psycopg2
from psycopg2 import extras
import re
import ast
from datetime import date

def csv_to_dictionary(csv_name, delimiter):
    input_file = csv.DictReader(open(csv_name, 'r', encoding='utf-8'), delimiter=delimiter)
    return input_file

sql_con = psycopg2.connect(host='localhost', port='5432', database="XYZ", user='postgres', password='XYZ')
cursor = sql_con.cursor()

country_dictionary = csv_to_dictionary("country.csv", ',')
for row in country_dictionary:
    cursor.execute(""" INSERT INTO country (iso_3166, countryname, cid)  VALUES (%s, %s, %s) """, (row["Country Code"], row["Country Name"], row["Country-ID"]))

user_dictionary = csv_to_dictionary("user.csv", ',')
for row in user_dictionary:
    if row["Age"] == "" and row["Country-ID"] == "0":
        cursor.execute(""" INSERT INTO users (uid, username)  VALUES (%s, %s) """, (int(row["User-ID"]), row["username"]))
    elif row["Age"] != "" and row["Country-ID"] == "0":
        cursor.execute(""" INSERT INTO users (uid, username, age)  VALUES (%s, %s, %s) """, (int(row["User-ID"]), row["username"], int(float(row["Age"]))))
    elif row["Age"] == "" and row["Country-ID"] != "0":
        cursor.execute(""" INSERT INTO users (uid, username, iso_3166)  VALUES (%s, %s, %s) """, (int(row["User-ID"]), row["username"], row["Country-ID"]))
    else:
        cursor.execute(""" INSERT INTO users (uid, username, age, iso_3166)  VALUES (%s, %s, %s, %s) """, (int(row["User-ID"]), row["username"], int(float(row["Age"])), row["Country-ID"]))

sql_con.commit()

cursor.close()
sql_con.close()

从“country.csv”中插入数据工作正常,但是,这里的问题是,“Users”表中的“ISO_3166”引用了“Country”表中的“ISO_3166”,但是users.csv”只包含中的“Country-ID”(与“users.csv”中的“Country-ID”相同)。我知道“Country”和“Country-ID”(在“country.csv”中)之间存在1-1对应关系,但我不知道如何从相应的“Country_ID”中获取“Country”。
你能告诉我如何做到这一点吗?

0sgqnhkj

0sgqnhkj1#

如果你安装并使用pandas,你将使用类似于表的结构(称为“DataFrames”,在脚本中由“df”引用),它将为你完成大部分繁重的工作:

import pandas as pd
import psycopg2

def read_csv(file_name):
    df = pd.read_csv(file_name)
    return df

def create_country_id_to_iso_mapper(country_df):
    mapper = {}
    for index, row in country_df.iterrows():
        mapper[row['Country-ID']] = row['ISO_3166']
    return mapper

def insert_data_into_table(table_name, df):
    sql_con = psycopg2.connect(host='localhost', port='5432', database="XYZ", user='postgres', password='XYZ')
    cursor = sql_con.cursor()

    if table_name == "users":
        for index, row in df.iterrows():
            row_age = row["Age"]
            row_country_id = row["Country-ID"]
            row_iso_3166 = row["ISO_3166"]

            if pd.isna(row_age) and row_country_id == "0":
                query = f"INSERT INTO users (uid, username) VALUES ({row['User-ID']}, '{row['username']}')"
            elif not pd.isna(row_age) and row_country_id == "0":
                query = f"INSERT INTO users (uid, username, age) VALUES ({row['User-ID']}, '{row['username']}', {row_age})"
            elif pd.isna(row_age) and row_country_id != "0":
                query = f"INSERT INTO users (uid, username, iso_3166) VALUES ({row['User-ID']}, '{row['username']}', '{row_iso_3166}')"
            else:
                query = f"INSERT INTO users (uid, username, age, iso_3166) VALUES ({row['User-ID']}, '{row['username']}', {row_age}, '{row_iso_3166}')"

            cursor.execute(query)

    elif table_name == "country":
        for index, row in df.iterrows():
            query = f"INSERT INTO country (Country-ID, ISO_3166) VALUES ({row['Country-ID']}, '{row['ISO_3166']}')"
            cursor.execute(query)

    sql_con.commit()
    cursor.close()
    sql_con.close()

country_df = read_csv("country.csv")
users_df = read_csv("user.csv")

country_id_to_iso_mapper = create_country_id_to_iso_mapper(country_df)

# Update the 'Country-ID' column in the users DataFrame with the corresponding 'ISO_3166' values
users_df['ISO_3166'] = users_df['Country-ID'].map(country_id_to_iso_mapper)

# Insert data into the 'country' and 'users' tables
insert_data_into_table("country", country_df)
insert_data_into_table("users", users_df)

这将读取CSV文件,将国家IDMap到ISO代码,然后更新“用户”DataFrameMap。最后,它插入到国家表,并将现在更新的数据插入到“用户”表

相关问题