# !/bin/bash
LOG_DIR=/tmp/my_log_dir
# Set fail counter before parallel processes
FAIL=0
echo "Parallel loading 1, 2 and 3..."
hive -hiveconf "some_var"="$some_value" -f myscript_1.hql 2>&1 | tee $LOG_DIR/myscript_1.log &
hive -hiveconf "some_var"="$some_value" -f myscript_2.hql 2>&1 | tee $LOG_DIR/myscript_2.log &
hive -hiveconf "some_var"="$some_value" -f myscript_3.hql 2>&1 | tee $LOG_DIR/myscript_3.log &
# Wait for three processes to finish
for job in `jobs -p`
do
echo $job
wait $job || let "FAIL+=1"
done
# Exit if some process has failed
if [ "$FAIL" != "0" ];
then
echo "Failed processes=($FAIL) Giving up..."
exit 1
fi
# Set fail counter before parallel processes
FAIL=0
echo "Continue with next parallel steps 4,5..."
hive -hiveconf "some_var"="$some_value" -f myscript_4.hql 2>&1 | tee $LOG_DIR/myscript_4.log &
# and so on
2条答案
按热度按时间eblbsuwk1#
您可以使用任何工具进行工作流管理。最佳实践取决于用例和专业知识。
传统上在企业中,可以使用-control-m或cron调度器。
来自大数据生态系统:oozie还是azkaban
还有其他几种工具可用于工作流管理。
8hhllhi22#
这也可以在shell脚本中轻松实现,您可以启动并行进程,等待它们,然后启动其他进程。命令末尾的“与”号指示shell运行后台进程。请参见此示例:
还有其他运行后台进程的方法:https://www.codeword.xyz/2015/09/02/three-ways-to-script-processes-in-parallel/