def find_pos(df):
# Extract the unique string from Address
s = np.array(df['Address'].iloc[0].split())
# Extract words from WordInAddr
w = df['WordInAddr'].values[:, None]
# Create the boolean dense matrix
m = s == w
# Reset the lower triangle
m[np.tril_indices_from(m, k=-1)] = False
# Return the position
return pd.Series(np.argmax(m, axis=1) + 1, index=df.index)
df['Position'] = df.groupby('roll Num').apply(find_pos).droplevel(0)
输出:
>>> df
roll Num Address WordInAddr Position
0 1 Block A Block 1
1 1 Block A A 2
2 2 South New Jersey Street Jersey South 1
3 2 South New Jersey Street Jersey Jersey 3
4 2 South New Jersey Street Jersey Street 4
5 2 South New Jersey Street Jersey Jersey 5
1条答案
按热度按时间2vuwiymt1#
你可以使用numpy广播来计算密集矩阵。重置密集矩阵的下三角形,并获得第一个真值的位置(索引)。
输出: