不加内存优化时,前6个batch的loss值为:[2.3050585, 4.64196, 2.0815804, 2.0276387, 2.0014627, 1.6691642]加上fluid.memory_optimize(fluid.default_main_program())后,为:[2.3050585, 2.3096275, 5.387212, 35.37569, 33.65272, 29.050358]
hs1rzwqc1#
使用Paddle release/0.15.0 和如下代码未复现问题,请 @kolinwei 给出最小复现代码。
from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np import paddle import paddle.fluid as fluid def cnn_model(data, is_simple=False): conv_pool_1 = fluid.nets.simple_img_conv_pool( input=data, filter_size=5, num_filters=20, pool_size=2, pool_stride=2, act="relu") conv_pool_2 = fluid.nets.simple_img_conv_pool( input=conv_pool_1, filter_size=5, num_filters=50, pool_size=2, pool_stride=2, act="relu") SIZE = 10 input_shape = conv_pool_2.shape param_shape = [reduce(lambda a, b: a * b, input_shape[1:], 1)] + [SIZE] scale = (2.0 / (param_shape[0]**2 * SIZE))**0.5 predict = fluid.layers.fc( input=data if is_simple else conv_pool_2, size=SIZE, act="softmax", param_attr=fluid.param_attr.ParamAttr( initializer=fluid.initializer.NormalInitializer( loc=0.0, scale=scale, seed=1))) return predict def run_benchmark(): # Input data fluid.default_startup_program().random_seed = 2 images = fluid.layers.data(name='pixel', shape=[1, 28, 28], dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') # Train program predict = cnn_model(images) cost = fluid.layers.cross_entropy(input=predict, label=label) avg_cost = fluid.layers.mean(x=cost) # Optimization opt = fluid.optimizer.DecayedAdagradOptimizer( learning_rate=0.001, decay=0.95, epsilon=1.0e-6) opt.minimize(avg_cost) # fluid.memory_optimize(fluid.default_main_program()) # Initialize executor place = fluid.CUDAPlace(0) exe = fluid.Executor(place) # Parameter initialization exe.run(fluid.default_startup_program()) # Reader train_reader = paddle.batch( paddle.dataset.mnist.train(), batch_size=8192) pe = fluid.ParallelExecutor(use_cuda=True, main_program=fluid.default_main_program()) for batch_id, data in enumerate(train_reader()): img_data = np.array( [x[0].reshape([1, 28, 28]) for x in data]).astype('float32') y_data = np.array([x[1] for x in data]).astype("int64") y_data = y_data.reshape([len(y_data), 1]) loss = pe.run( feed={"pixel": img_data, "label": y_data}, fetch_list=[avg_cost.name] ) # The accuracy is the accumulation of batches, but not the current batch. print(loss) if __name__ == '__main__': run_benchmark()
txu3uszq2#
我试了下,单机下复现不了,多机时会出现
2条答案
按热度按时间hs1rzwqc1#
使用Paddle release/0.15.0 和如下代码未复现问题,请 @kolinwei 给出最小复现代码。
txu3uszq2#
我试了下,单机下复现不了,多机时会出现