Bash Parallel Processing
In Bash it is relatively easy to break a task into subprocesses. What is less easy is to sanely coordinate output and track result codes, especially for verbose operations. Below are two different implementations of the same thing.
Parallel Deployment example with SSH
This example shows performing a deployment operation across a collection of servers. It coordinates output into /tmp before reporting on success or failure of the deployment.
set -o pipefail
let EXIT_STATUS=0
RED='\033[0;31m'
GRN='\033[0;32m'
YLW='\033[0;33m'
BLU='\033[0;34m'
MAG='\033[0;35m'
CYA='\033[0;36m'
LRED='\033[0;91m'
LGRN='\033[0;92m'
LYLW='\033[0;93m'
LBLU='\033[0;94m'
LMAG='\033[0;95m'
LCYA='\033[0;96m'
GRY='\033[0;90m'
NC='\033[0m'
SERVERS=() # populate this with a server list, can be pulled from AWS etc
# DEPLOYMENT SCRIPT:
read -r -d '' DEPLOY <<'EOF'
# make sure we exit the process on first failure
set -e
# PUT operations for build or deploy here
EOF
#################################################################
## BUNCH OF STUFF TO DO A CONCURRENT DEPLOYMENT
deployToServer() {
host="$1"
index="$2"
# ssh to host and run deployment, capture output and return status
output=$(ssh USERHERE@$host "$DEPLOY" 2>&1)
result=$?
# store output and return the status
echo "$output" > "/tmp/output_$index"
exit $result
}
# ITERATE AND DEPLOY
echo -e "${LBLU}--------------------------------${NC}"
let index=0
for host in "${SERVERS[@]}"
do
echo -e "${LBLU}BEGIN DEPLOY TO SERVER:${NC} $host $index"
deployToServer $host $index &
PIDS+=($!)
((index++))
done
echo -e "${LBLU}--------------------------------${NC}"
echo ""
#################################################################
#################################################################
## BUNCH OF STUFF TO DEAL WITH DEPLOYMENT RESULTS
echo -e "${LBLU}--------------------------------${NC}"
let index=0
for pid in "${PIDS[@]}"
do
wait $pid
exit_status=$?
host="${SERVERS[$index]}"
outputfile="/tmp/output_${index}"
if [ $exit_status -ne 0 ]; then
echo -e "${LRED}FAILURE${NC} $host"
let EXIT_STATUS=1
else
echo -e "${LGRN}SUCCESS${NC} $host"
fi
((index++))
done
echo -e "${LBLU}--------------------------------${NC}"
echo ""
# OUTPUT DETAIlS
let index=0
for host in "${SERVERS[@]}"
do
outputfile="/tmp/output_${index}"
echo -e "${GRY}--------------------------------${NC}"
echo -e "${GRY}DETAILS${NC} $host:"
cat $outputfile
echo -e "${GRY}--------------------------------${NC}"
echo ""
((index++))
rm $outputfile
done
#################################################################
# EXIT WITH OVERALL STATUS
exit $EXIT_STATUS
Simpler Parallels with Batching
This example is less noisy and does not concern itself with overall exit status or output capturing for local purposes. It also lets you batch your parallel jobs so you dont overwhelm your system. It shows updating a composer package across a bunch of different files paths.
librarypaths=() # put paths in this list
update_library() {
path="$1"
index="$2"
package="PACKAGE-HERE"
pushd $path > /dev/null
spwd=$(pwd)
echo "syncing package $path"
branch=$(git rev-parse --abbrev-ref HEAD)
composer update "$package" > /dev/null 2>&1
changed=$(git add composer.lock --dry-run composer.lock)
if [ ! -z "$changed" ]; then
git add composer.lock
git commit -m 'updated library'
git push origin "$branch"
echo "* completed $path"
else
echo "* skipped no changes $path"
fi
popd > /dev/null
exit 0
}
batchsize=3
len="${#librarypaths[@]}"
for i in $(seq 1 $batchsize $len); do
PIDS=()
PIDPATHS=()
for b in $(seq 1 $batchsize); do
pathindex=$(($i + $b - 2))
#echo "> $b $pathindex"
if [[ $pathindex -lt $len ]]; then
path="${librarypaths[$pathindex]}"
echo "> PROCESS $path"
update_library $path $pathindex &
PIDS+=($!)
PIDPATHS+=($path)
fi
done
# wait and collect processes
let index=0
for pid in "${PIDS[@]}"
do
wait $pid
exit_status=$?
path="${PIDPATHS[$index]}"
if [ $exit_status -ne 0 ]; then
echo -e "> FAILURE $path"
else
echo -e "> SUCCESS $path"
fi
((index++))
done
done