#!/bin/bash
# Run your scripts here... Following sleep commands as an example
sleep 5 &
sleep 3 &
sleep 3 &
# Here, we get the pid of each running process an put in the array "pids"
pids=( $(jobs -p | tr '\n' ' ') )
echo "pids = ${pids[@]}"
non_blocking_wait()
{
PID=$1
if [ ! -d "/proc/$PID" ]; then
wait $PID
CODE=$?
else
CODE=127
fi
echo $CODE
}
while true; do
# Check if all processes are still running
n_running=$(jobs -l | grep -c "Running")
if [ "${n_running}" -ne "3" ]; then
# At least one processes finished/returned here,
# check if exited in error
for pid in ${pids[@]}; do
ret=$(non_blocking_wait ${pid})
echo "non_blocking_wait ${pid} ret = ${ret}"
if [ "${ret}" -ne "0" ] && [ "${ret}" -ne "127" ]; then
echo "Process ${pid} exited with error ${ret}"
# Here we can take any desirable action such as
# killing all children and exiting the program:
kill $(jobs -p) > /dev/null 2>&1
exit 1
fi
done
if [ "${n_running}" -eq "0" ]; then
echo "All processes finished successfully"
exit 0
fi
fi
sleep 1
done
如果只是运行它,它将在所有进程结束时退出0:
$ ./script.sh
pids = 17913 17914 17915
non_blocking_wait 17913 ret = 127
non_blocking_wait 17914 ret = 0
non_blocking_wait 17915 ret = 0
non_blocking_wait 17913 ret = 127
non_blocking_wait 17914 ret = 0
non_blocking_wait 17915 ret = 0
non_blocking_wait 17913 ret = 0
All processes finished successfully
您可以从其中一个sleep命令中移除参数,使其失败,并看到程序立即返回:
$ ./script.sh
sleep: missing operand
Try 'sleep --help' for more information.
pids = 18005 18006 18007
non_blocking_wait 18005 ret = 127
non_blocking_wait 18006 ret = 1
Process 18006 exited with error 1
import subprocess
import time
def do_that(scripts):
ps = [subprocess.Popen('./'+s, shell=True) for s in scripts]
while True:
done = True
for p in ps:
rc = p.poll()
if rc is None: # Script is still running
done = False
elif rc:
# if rc==0, script success to finish
# otherwise it failed
print('This script run failed:', p.args)
running = set(ps) - {p}
for i in running:
i.terminate()
print('Force terminate', i.args)
return 1
if done:
print('All done.')
return 0
def timeit(func):
def runner(*args, **kwargs):
start = time.time()
res = func(*args, **kwargs)
end = time.time()
print(func.__name__, 'cost:', round(end-start,1))
return res
return runner
@timeit
def main():
scripts = ('script1.sh', 'script2.sh')
do_that(scripts)
if __name__ == '__main__':
main()
4条答案
按热度按时间lbsnaicq1#
TL;DR
实际答案
在并行运行作业时,我发现考虑GNU Parallel非常有用,因为它可以简化很多方面:
因此,我创建了4个虚拟作业
script1.sh
到script4.sh
,如下所示:script3.sh
除外,它在其他之前失败:因此,以下是并行运行4个作业的默认方式,每个作业的输出都被收集起来,并一个接一个地显示:
您可以首先看到
script3.sh
模具,然后首先收集并显示其所有输出,接着是其他模具的分组输出。简单地说,输出按作业分组,并在每个作业完成时显示。现在让我们再做一次,但只按行缓冲输出,而不是等待作业完成并按作业收集输出:
我们可以清楚地看到,
script3.sh
在其他线程之前死亡和退出,但它们仍然运行到完成。现在,我们希望GNU Parallel在任何一个作业死亡时立即终止任何正在运行的作业:
您可以看到
script3.sh
已死亡,其他作业都没有完成,因为GNU Parallel已将它们杀死。您还可以获得失败退出状态:
它比我展示的 * 灵活得多 *。你可以将
now
更改为soon
,而不是杀死其他作业,它不会启动任何新的作业。你可以将fail=1
更改为success=50%
,这样当一半的作业成功退出时,它就会停止,等等。您还可以添加
--eta
或--bar
来生成进度报告,并在网络上分发作业等等。值得一阅读,在CPU越来越胖(更多内核)而不是越来越高(更多GHz)的今天-有一个很好的PDF可用here。注意:默认情况下,GNU Parallel将保持与CPU内核数一样多的作业并行运行。因此,如果您的内核数少于4个,您可能应该在我的建议答案中加上
-j 4
,以告诉它即使只有1或2个内核也可以并行运行多达4个作业。h79rfbju2#
这里有一个脚本可以帮你完成这件事。我从here借用(并修改)了
non_blocking_wait
函数。如果只是运行它,它将在所有进程结束时退出0:
您可以从其中一个sleep命令中移除参数,使其失败,并看到程序立即返回:
ctehm74n3#
一种解决方案是使用子过程:
ou6hu8tu4#
wait -n
等待下一个程序退出并返回其退出状态。