我试着从PowerQuery中的这段代码开始:
let
Origen = Folder.Files("C:\Users\mimorales\OneDrive - Gategroup\Desktop\Miguel\Analytics LATAM\Despachos\2023\03 Marzo 2023\GP4"),
#"Archivos ocultos filtrados1" = Table.SelectRows(Origen, each [Attributes]?[Hidden]? <> true),
#"Invocar función personalizada1" = Table.AddColumn(#"Archivos ocultos filtrados1", "Transformar archivo", each #"Transformar archivo"([Content])),
#"Columnas con nombre cambiado1" = Table.RenameColumns(#"Invocar función personalizada1", {"Name", "Source.Name"}),
#"Otras columnas quitadas1" = Table.SelectColumns(#"Columnas con nombre cambiado1", {"Source.Name", "Transformar archivo"}),
#"Columna de tabla expandida1" = Table.ExpandTableColumn(#"Otras columnas quitadas1", "Transformar archivo", Table.ColumnNames(#"Transformar archivo"(#"Archivo de ejemplo"))),
#"Tipo cambiado" = Table.TransformColumnTypes(#"Columna de tabla expandida1",{{"Source.Name", type text}, {"AIRLINE_CODE", type text}, {"FLIGHT_NO", Int64.Type}, {"AIRCRAFT_TYPE_CODE", Int64.Type}, {"DEP_PORT_CODE", type text}, {"ARR_PORT_CODE", type text}, {"DEP_DATE", type date}, {"ARR_DATE", type date}, {"STD", Int64.Type}, {"STA", Int64.Type}, {"BLOCK_TIME", Int64.Type}, {"LEG", Int64.Type}, {"PART_NO", type text}, {"PART_NAME", type text}, {"PART_DESC", type text}, {"PART_SECTOR_USAGE_CODE", type text}, {"PART_SECTOR_USAGE_NAME", type text}, {"PART_CATEGORY_CODE", type any}, {"PAX_CLASS", type text}, {"EXCHANGE_TYPE_CODE", type text}, {"PART_QTY", Int64.Type}, {"PART_WEIGHT", type text}, {"IS_DEADHEAD", Int64.Type}}),
#"Columnas quitadas" = Table.RemoveColumns(#"Tipo cambiado",{"Source.Name"}),
#"Personalizada agregada" = Table.AddColumn(#"Columnas quitadas", "DEP_PORT_CODE_FINAL", each if [DEP_PORT_CODE] = "AKL" then "AKL" else
if [DEP_PORT_CODE] = "BCN" then "BCN" else
if [DEP_PORT_CODE] = "BOG" then "BOG" else
if [DEP_PORT_CODE] = "BOS" then "BOS" else
if [DEP_PORT_CODE] = "CDG" then "CDG" else
if [DEP_PORT_CODE] = "CUN" then "CUN" else
if [DEP_PORT_CODE] = "FCO" then "FCO" else
if [DEP_PORT_CODE] = "FOR" then "GRU-BOND" else
if [DEP_PORT_CODE] = "FRA" then "FRA" else
if [DEP_PORT_CODE] = "GYE" then "GYE" else
if [DEP_PORT_CODE] = "JFK" then "JFK" else
if [DEP_PORT_CODE] = "LAX" then "LAX" else
if [DEP_PORT_CODE] = "LHR" then "LHR" else
if [DEP_PORT_CODE] = "LIS" then "LIS" else
if [DEP_PORT_CODE] = "MAD" then "MAD" else
if [DEP_PORT_CODE] = "MCO" then "MCO" else
if [DEP_PORT_CODE] = "MDE" then "MDE" else
if [DEP_PORT_CODE] = "MEX" then "MEX" else
if [DEP_PORT_CODE] = "MIA" then "MIA" else
if [DEP_PORT_CODE] = "MXP" then "MXP" else
if [DEP_PORT_CODE] = "SCL" then "SCL" else
if [DEP_PORT_CODE] = "UIO" then "UIO" else
if [DEP_PORT_CODE] = "GIG" and [FLIGHT_NO] >= 8000 then "GRU-BOND" else
if [DEP_PORT_CODE] = "GIG" and [FLIGHT_NO] < 8000 then "GIG" else
if [DEP_PORT_CODE] = "GRU" and [FLIGHT_NO] >= 8000 then "GRU-BOND" else
if [DEP_PORT_CODE] = "GRU" and [FLIGHT_NO] >=1000 and [FLIGHT_NO]<=1400 then "GRU-BOND" else
if [DEP_PORT_CODE] = "GRU" and [FLIGHT_NO] <1000 then "GRU" else
if [DEP_PORT_CODE] = "GRU" and [FLIGHT_NO] >=1400 and [FLIGHT_NO]<8000 then "GRU" else
if [DEP_PORT_CODE] = "LIM" and [FLIGHT_NO] >= 3000 then "LIM-ROW" else
if [DEP_PORT_CODE] = "LIM" and [FLIGHT_NO] >=2000 and [FLIGHT_NO]<3000 then "LIM-LP" else
if [DEP_PORT_CODE] = "LIM" and [FLIGHT_NO] <2000 then "LIM-ROW" else
if [DEP_PORT_CODE] = "PUJ" then "PUJ" else 0),
#"Filas filtradas1" = Table.SelectRows(#"Personalizada agregada", each ([DEP_PORT_CODE_FINAL] <> "PUJ")),
#"Filas filtradas" = Table.SelectRows(#"Filas filtradas1", each ([DEP_DATE] <> #date(2023, 4, 1) and [DEP_DATE] <> #date(2023, 4, 2) and [DEP_DATE] <> #date(2023, 4, 3) and [DEP_DATE] <> #date(2023, 4, 4)))
in
#"Filas filtradas"
在Python中进行类似的分析,通过使用以下代码:
import pandas as pd
import matplotlib as plt
import numpy as np
df1 = pd.read_csv('S1.csv')
df2 = pd.read_csv('S2.csv')
df3 = pd.read_csv('S3.csv')
df4 = pd.read_csv('S4.csv')
df5 = pd.read_csv('S5.csv')
df = pd.concat([df1, df2, df3, df4, df4, df5], keys=['df1', 'df2', 'df3', 'df4', 'df5'])
# I've created a dictionary of conditions and values for the new column
conditions = {'AKL': 'AKL',
'BCN': 'BCN',
'BOG': 'BOG',
'BOS': 'BOS',
'CDG': 'CDG',
'CUN': 'CUN',
'FCO': 'FCO',
'FOR': 'GRU-BOND',
'FRA': 'FRA',
'GYE': 'GYE',
'JFK': 'JFK',
'LAX': 'LAX',
'LHR': 'LHR',
'LIS': 'LIS',
'MAD': 'MAD',
'MCO': 'MCO',
'MDE': 'MDE',
'MEX': 'MEX',
'MIA': 'MIA',
'MXP': 'MXP',
'SCL': 'SCL',
'UIO': 'UIO',
'GRU-BOND': lambda row: 'GRU-BOND' if (row['DEP_PORT_CODE']=='GIG' and row['FLIGHT_NO']>=8000) or (row['DEP_PORT_CODE']=='GRU' and row['FLIGHT_NO']>=8000) or (row['DEP_PORT_CODE']=='GRU' and row['FLIGHT_NO']>=1000 and row['FLIGHT_NO']<=1400) else 0,
'GIG': lambda row: 'GRU-BOND' if row['FLIGHT_NO']>=8000 else 'GIG',
'GRU': lambda row: 'GRU-BOND' if row['FLIGHT_NO']>=8000 or (row['FLIGHT_NO']>=1400 and row['FLIGHT_NO']<8000) or (row['FLIGHT_NO']>=1000 and row['FLIGHT_NO']<=1400) else 'GRU',
'LIM-ROW': lambda row: 'LIM-ROW' if row['FLIGHT_NO']>=3000 or row['DEP_PORT_CODE']=='PUJ' else 0,
'LIM-LP': lambda row: 'LIM-LP' if row['FLIGHT_NO']>=2000 and row['FLIGHT_NO']<3000 else 0,
'PUJ': 'PUJ',
0: 0}
# then created the new column using the conditions and values dictionary
df['DEP_PORT_CODE_FINAL'] = df.apply(lambda row: conditions.get(row['DEP_PORT_CODE'], 0) if row['DEP_PORT_CODE'] in conditions else 0, axis=1)
# desired output
print(df.head(5))
但是,当我尝试获取一个包含所创建的新列的单个名称的列表时,它不会显示与PowerQuery相同的结果。
该列表的代码如下:unique_values = df['DEP_PORT_CODE_FINAL'].unique() for value in unique_values: print(str(value))
蚂蚁的答案是:AKL BOG CDG FCO FRA <function <lambda> at 0x0000016183BDEA20> <function <lambda> at 0x0000016183BDF240> GYE JFK LAX LHR 0 LIS MAD MCO MEX MIA SCL UIO BOS MXP BCN PUJ CUN GRU-BOND
我不明白为什么我得到了〈function at 0x 0000016183 BDEA 20〉〈function at 0x 0000016183 BDF 240〉和0,而它不应该在那里,我也错过了像GIG,GRU,LIM-LP和LIM-ROW这样的值
1条答案
按热度按时间lhcgjxsq1#
conditions
dict中的值是字符串字面量(例如'AKL'
)和lambda函数的组合。没有任何东西调用这些函数,因此该值以字符串的字符串表示形式给出。请注意,在lambda函数中,
if row['DEP_PORT_CODE'] in conditions else 0
部分是多余的,因为您已经将0作为默认参数提供给get
。你可以改变
到
你可以把所有的东西都转换成函数(
'AKL': lambda row: 'AKL'
),但是我希望创建一个单独的函数来进行转换,并简单地将它应用到框架中会更干净: