bug描述 Describe the Bug
BUG:
PaddlePaddle在GPU上的矩阵乘法与【Pytorch和Numpy】的结果存在精度差异。
注意:PyTorch和Numpy的精度是对齐的,虽然这里没有比较。
版本
- PaddlePaddle 2.6
- PyTorch 1.7.1
- Numpy 1.24.3
- Python 3.9
精度
- float 32
测试代码【calculate_alignment_num()代码附在最后】
if __name__ == '__main__':
import paddle
import torch
import numpy as np
torch.set_printoptions(precision=9, sci_mode=False) # 为了方便比对有效数字,禁用科学计数法,并显示充足的小数
paddle.set_printoptions(precision=9, sci_mode=False)
data = [[0.1234567], [0.2345678], [0.3456789]]
numpy_x = np.array(data, dtype=np.float32)
torch_gpu_x = torch.tensor(data, dtype=torch.float32, device=torch.device('cuda:0'))
paddle_cpu_x = paddle.to_tensor(data, dtype='float32', place=paddle.CPUPlace())
paddle_gpu_x = paddle.to_tensor(data, dtype='float32', place=paddle.CUDAPlace(0))
numpy_res = np.matmul(numpy_x, numpy_x.T)
torch_gpu_res = torch_gpu_x.mm(torch_gpu_x.t())
paddle_cpu_res = paddle.mm(paddle_cpu_x, paddle_cpu_x.t())
paddle_gpu_res = paddle.mm(paddle_gpu_x, paddle_gpu_x.t())
print(f"Numpy:\n{numpy_res}")
print(f"PyTorch(GPU):\n{torch_gpu_res}")
print(f"PaddlePaddle(CPU):\n{paddle_cpu_res}")
print(f"PaddlePaddle(GPU):\n{paddle_gpu_res}")
print("The number of significant numbers aligned(MIN & AVG):")
print("paddle_gpu_res | torch_gpu_res :")
calculate_alignment_num(paddle_gpu_res, torch_gpu_res)
print("paddle_gpu_res | numpy_res :")
calculate_alignment_num(paddle_gpu_res, numpy_res)
print("paddle_cpu_res | torch_gpu_res :")
calculate_alignment_num(paddle_cpu_res, torch_gpu_res)
print("paddle_cpu_res | numpy_res :")
calculate_alignment_num(paddle_cpu_res, numpy_res)
输出结果
W0808 16:32:40.628000 219632 gpu_resources.cc:119] Please NOTE: device: 0, GPU Compute Capability: 8.6, Driver API Version: 11.2, Runtime API Version: 11.1
W0808 16:32:40.633858 219632 gpu_resources.cc:164] device: 0, cuDNN Version: 8.2.
Numpy:
[[0.01524156 0.02895897 0.04267638]
[0.02895897 0.05502206 0.08108514]
[0.04267638 0.08108514 0.1194939 ]]
PyTorch(GPU):
tensor([[0.015241557, 0.028958967, 0.042676378],
[0.028958967, 0.055022057, 0.081085138],
[0.042676378, 0.081085138, 0.119493902]], device='cuda:0')
PaddlePaddle(CPU):
Tensor(shape=[3, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
[[0.015241557, 0.028958967, 0.042676378],
[0.028958967, 0.055022057, 0.081085138],
[0.042676378, 0.081085138, 0.119493902]])
PaddlePaddle(GPU):
Tensor(shape=[3, 3], dtype=float32, place=Place(gpu:0), stop_gradient=True,
[[0.015245859, 0.028969392, 0.042685390],
[0.028969392, 0.055046141, 0.081108570],
[0.042685390, 0.081108570, 0.119510651]])
The number of significant numbers aligned(MIN & AVG):
paddle_gpu_res | torch_gpu_res :
min alignment: 2
avg alignment:2.888888888888889
paddle_gpu_res | numpy_res :
min alignment: 2
avg alignment:2.888888888888889
paddle_cpu_res | torch_gpu_res :
min alignment: 7
avg alignment:7.0
paddle_cpu_res | numpy_res :
min alignment: 7
avg alignment:7.0
Process finished with exit code 0
附:计算有效数字对齐位数的【calculate_alignment_num()】函数代码
def calculate_alignment_num(data1, data2):
data1_np = tensor_to_numpy(data1)
data2_np = tensor_to_numpy(data2)
# Flatten the data to ensure we can iterate over them element-wise
flat_data1 = data1_np.flatten()
flat_data2 = data2_np.flatten()
# Initialize list to keep track of all alignment counts
alignment_counts = []
for i in range(len(flat_data1)):
alignment_counts.append(count_the_number_of_alignments(flat_data1[i], flat_data2[i]))
# Calculate minimum and average alignment counts
min_alignment = min(alignment_counts)
avg_alignment = sum(alignment_counts) / len(alignment_counts)
print(f"min alignment: {min_alignment}")
print(f"avg alignment:{avg_alignment}")
return min_alignment, avg_alignment
def tensor_to_numpy(tensor):
if isinstance(tensor, torch.Tensor):
return tensor.detach().cpu().numpy()
elif isinstance(tensor, paddle.Tensor):
return tensor.cpu().numpy()
elif isinstance(tensor, np.ndarray):
return tensor
else:
raise TypeError("Unsupported data type")
def count_the_number_of_alignments(x, y):
x = extract_7_significant_digits(x)
y = extract_7_significant_digits(y)
aligned_count = 0
for i in range(len(x)):
if x[i] == y[i]:
aligned_count += 1
else:
break
return aligned_count
def extract_7_significant_digits(num):
# 绝对值去掉符号;科学技术法个位有且只有1位非零的数,保留小数点后7位,这样一共就有8位有效数字,并且能确保四舍五入不影响前7位有效数字
num_str = "{:.7e}".format(abs(num))
e_index = num_str.find('e')
# 先取e前面的数字,然后去掉小数点,最后截取前7位有效数字
digits = num_str[:e_index].replace('.', '')[:7]
return digits
其他补充信息 Additional Supplementary Information
No response
4条答案
按热度按时间esyap4oy1#
感谢反馈,我们将安排相关同学排查下
xoshrz7s2#
你好,我在最新的paddle上没有复现到这个问题,可以尝试更新下paddle版本
9njqaruj3#
你好,我在最新的paddle上没有复现到这个问题,可以尝试更新下paddle版本
感谢您的回复。
我的环境是:paddlepaddle-gpu 2.6.1.post112;
请问我需要更新到3.0-beta版本吗?
n8ghc7c14#
是的,可以尝试在3.0版本上测试下