Skip to content

Commit efa51ca

Browse files
authored
Merge branch 'main' into fix-llvm
2 parents 4dd3091 + 6470e46 commit efa51ca

16 files changed

+186
-81
lines changed

EESSI-extend-easybuild.eb

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,12 +96,24 @@ sysroot = os.getenv("EESSI_EPREFIX")
9696
-- Check if we have GPU capabilities and configure CUDA compute capabilities
9797
eessi_accelerator_target = os.getenv("EESSI_ACCELERATOR_TARGET")
9898
if (eessi_accelerator_target ~= nil) then
99-
cuda_compute_capability = string.match(eessi_accelerator_target, "^accel/nvidia/cc([0-9][0-9])$")
99+
cuda_compute_capability = string.match(eessi_accelerator_target, "^accel/nvidia/cc([0-9]+)$")
100100
if (cuda_compute_capability ~= nil) then
101-
easybuild_cuda_compute_capabilities = cuda_compute_capability:sub(1, 1) .. "." .. cuda_compute_capability:sub(2, 2)
101+
-- The last digit should be the minor version, insert a dot in the one-but-last position
102+
major_version = cuda_compute_capability:sub(1, #cuda_compute_capability - 1)
103+
minor_version = cuda_compute_capability:sub(#cuda_compute_capability)
104+
easybuild_cuda_compute_capabilities = string.format("%s.%s", major_version, minor_version)
102105
else
103106
LmodError("Incorrect value for $EESSI_ACCELERATOR_TARGET: " .. eessi_accelerator_target)
104107
end
108+
109+
-- If architectures are 9.0, 10.0 or 12.0, enable architecture or family-specific optimizations
110+
if easybuild_cuda_compute_capabilities == '9.0' then
111+
easybuild_cuda_compute_capabilities = '9.0a'
112+
elseif easybuild_cuda_compute_capabilities == '10.0' then
113+
easybuild_cuda_compute_capabilities = '10.0f'
114+
elseif easybuild_cuda_compute_capabilities == '12.0' then
115+
easybuild_cuda_compute_capabilities = '12.0f'
116+
end
105117
end
106118
107119
-- Some environment variables affect behaviour, let's gather them once
@@ -228,6 +240,7 @@ if mode() == "unload" or mode() == "dependencyCk" or convertToCanonical(easybuil
228240
setenv ("EASYBUILD_CUDA_SANITY_CHECK_ERROR_ON_FAILED_CHECKS", "1")
229241
setenv ("EASYBUILD_FAIL_ON_MOD_FILES_GCCCORE", "1")
230242
setenv ("EASYBUILD_LOCAL_VAR_NAMING_CHECK", "error")
243+
setenv ("EASYBUILD_PARALLEL_EXTENSIONS_INSTALL", "1")
231244
-- Set environment variables that are EESSI version specific
232245
if convertToCanonical(eessi_version) > convertToCanonical("2023.06") then
233246
setenv ("EASYBUILD_PREFER_PYTHON_SEARCH_PATH", "EBPYTHONPREFIXES")

EESSI-install-software.sh

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ else
149149

150150
# make sure the the software and modules directory exist
151151
# (since it's expected by init/eessi_environment_variables when using archdetect and by the EESSI module)
152-
mkdir -p ${EESSI_PREFIX}/software/${EESSI_OS_TYPE}/${EESSI_SOFTWARE_SUBDIR_OVERRIDE}/{modules,software}
152+
mkdir -p -v ${EESSI_PREFIX}/software/${EESSI_OS_TYPE}/${EESSI_SOFTWARE_SUBDIR_OVERRIDE}/{modules,software}
153153

154154
# If EESSI_ACCELERATOR_TARGET_OVERRIDE is defined, we are building for an accelerator target
155155
# In that case, make sure the modulepath for the accelerator subdir exists, otherwise the EESSI module will not
@@ -160,7 +160,7 @@ else
160160
# Note that ${EESSI_PREFIX}/software/${EESSI_OS_TYPE}/${EESSI_SOFTWARE_SUBDIR_OVERRIDE}/${EESSI_ACCELERATOR_TARGET_OVERRIDE}/modules/all
161161
# is only the correct path if EESSI_ACCEL_SOFTWARE_SUBDIR_OVERRIDE is not set
162162
if [ -z $EESSI_ACCEL_SOFTWARE_SUBDIR_OVERRIDE ]; then
163-
mkdir -p ${EESSI_PREFIX}/software/${EESSI_OS_TYPE}/${EESSI_SOFTWARE_SUBDIR_OVERRIDE}/${EESSI_ACCELERATOR_TARGET_OVERRIDE}/modules/all
163+
mkdir -p -v ${EESSI_PREFIX}/software/${EESSI_OS_TYPE}/${EESSI_SOFTWARE_SUBDIR_OVERRIDE}/${EESSI_ACCELERATOR_TARGET_OVERRIDE}/modules/all
164164
else
165165
# At runtime, one might want to use a different CPU subdir for a given accelerator. E.g. one could use
166166
# a zen2 CPU subdir on a zen4 node if the required GPU software isn't available in the zen4 tree.
@@ -207,7 +207,7 @@ fi
207207

208208
# the install_scripts.sh script relies on knowing the location of the PR diff
209209
# assume there's only one diff file that corresponds to the PR patch file
210-
pr_diff=$(ls [0-9]*.diff | head -1)
210+
pr_diff=$(ls [0-9]*.diff | head -n 1)
211211
export PR_DIFF="$PWD/$pr_diff"
212212

213213
# Only run install_scripts.sh if not in dev.eessi.io for security
@@ -249,6 +249,8 @@ fi
249249

250250
# avoid that pyc files for EasyBuild are stored in EasyBuild installation directory
251251
export PYTHONPYCACHEPREFIX=$TMPDIR/pycache
252+
# force Python's stdout and stderr streams to be unbuffered
253+
export PYTHONUNBUFFERED=1
252254

253255
# if we run the script for the first time, e.g., to start building for a new
254256
# stack, we need to ensure certain files are present in
@@ -368,7 +370,9 @@ else
368370
fi
369371

370372
# use PR patch file to determine in which easystack files stuff was added
371-
changed_easystacks=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep 'easystacks/.*yml$' | egrep -v 'known-issues|missing')
373+
# Note that we exclude the scripts/gpu_support/ dir, since those are not meant to be built in the
374+
# software-layer, but they are helper easystacks for installing e.g. CUDA in host_injections
375+
changed_easystacks=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep 'easystacks/.*yml$' | egrep -v 'known-issues|missing' | (grep -v "scripts/gpu_support/" || true))
372376
if [ -z "${changed_easystacks}" ]; then
373377
echo "No missing installations, party time!" # Ensure the bot report success, as there was nothing to be build here
374378
else

bot/build.sh

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,22 @@ else
171171
fi
172172
echo "bot/build.sh: EESSI_ACCELERATOR_TARGET_OVERRIDE='${EESSI_ACCELERATOR_TARGET_OVERRIDE}'"
173173

174+
# Log the full lscpu and os-release info:
175+
lscpu > _bot_job${SLURM_JOB_ID}.lscpu
176+
cat /etc/os-release > _bot_job${SLURM_JOB_ID}.os
177+
178+
# Also: fetch CPU flags into an array, so that we can implement a hard check against a reference
179+
lscpu_flags_line=$(lscpu | grep "Flags:" || echo "")
180+
# strip leading "Flags:" and spaces, and put result in a bash array
181+
if [[ $lscpu_flags =~ Flags:\ (.*) ]]; then lscpu_flags=(${BASH_REMATCH[1]}); fi
182+
# for now, just print
183+
echo "bot/build.sh: CPU flags=${lscpu_flags[@]}"
184+
# TODO: an actual comparison with a reference bash array, e.g. through
185+
# diff_result=$(diff <(printf "%s\n" "${lscpu_flags[@]}" | sort) <(printf "%s\n" "${lscpu_flags_ref[@]}" | sort))
186+
# if [ ! -z "$diff_result" ]; then
187+
# echo "bot/build.sh: ERROR: difference between reported lscpu flags and reference for this ($EESSI_SOFTWARE_SUBDIR_OVERRIDE) CPU architecture. This could mean an incorrect build host was used to build for this target.
188+
# fi
189+
174190
# get EESSI_OS_TYPE from .architecture.os_type in ${JOB_CFG_FILE} (default: linux)
175191
EESSI_OS_TYPE=$(cfg_get_value "architecture" "os_type")
176192
export EESSI_OS_TYPE=${EESSI_OS_TYPE:-linux}
@@ -265,16 +281,24 @@ BUILD_TMPDIR=$(grep ' as tmp directory ' ${build_outerr} | cut -d ' ' -f 2)
265281
TARBALL_STEP_ARGS+=("--resume" "${BUILD_TMPDIR}")
266282

267283
timestamp=$(date +%s)
284+
# determine compression/extension for tarball, check in order of preference
285+
if [[ -x "$(command -v zstd)" ]]; then
286+
tarball_extension="tar.zst"
287+
elif [[ -x "$(command -v gzip)" ]]; then
288+
tarball_extension="tar.gz"
289+
else
290+
tarball_extension="tar"
291+
fi
268292
# to set EESSI_VERSION we need to source init/eessi_defaults now
269293
source $software_layer_dir/init/eessi_defaults
270294
# Note: if ${EESSI_DEV_PROJECT} is defined (building for dev.eessi.io), then we
271295
# append the project (subdirectory) name to the end tarball name. This is information
272296
# then used at the ingestion stage. If ${EESSI_DEV_PROJECT} is not defined, nothing is
273297
# appended
274298
if [[ -z ${EESSI_ACCELERATOR_TARGET_OVERRIDE} ]]; then
275-
export TGZ=$(printf "eessi-%s-software-%s-%s-%b%d.tar.gz" ${EESSI_VERSION} ${EESSI_OS_TYPE} ${EESSI_SOFTWARE_SUBDIR_OVERRIDE//\//-} ${EESSI_DEV_PROJECT:+$EESSI_DEV_PROJECT-} ${timestamp})
299+
export TARBALL=$(printf "eessi-%s-software-%s-%s-%b%d.${tarball_extension}" ${EESSI_VERSION} ${EESSI_OS_TYPE} ${EESSI_SOFTWARE_SUBDIR_OVERRIDE//\//-} ${EESSI_DEV_PROJECT:+$EESSI_DEV_PROJECT-} ${timestamp})
276300
else
277-
export TGZ=$(printf "eessi-%s-software-%s-%s-%s-%b%d.tar.gz" ${EESSI_VERSION} ${EESSI_OS_TYPE} ${EESSI_SOFTWARE_SUBDIR_OVERRIDE//\//-} ${EESSI_ACCELERATOR_TARGET_OVERRIDE//\//-} ${EESSI_DEV_PROJECT:+$EESSI_DEV_PROJECT-} ${timestamp})
301+
export TARBALL=$(printf "eessi-%s-software-%s-%s-%s-%b%d.${tarball_extension}" ${EESSI_VERSION} ${EESSI_OS_TYPE} ${EESSI_SOFTWARE_SUBDIR_OVERRIDE//\//-} ${EESSI_ACCELERATOR_TARGET_OVERRIDE//\//-} ${EESSI_DEV_PROJECT:+$EESSI_DEV_PROJECT-} ${timestamp})
278302
fi
279303

280304
# Export EESSI_DEV_PROJECT to use it (if needed) when making tarball
@@ -288,8 +312,8 @@ export EESSI_DEV_PROJECT=${EESSI_DEV_PROJECT}
288312
TMP_IN_CONTAINER=/tmp
289313
echo "Executing command to create tarball:"
290314
echo "$software_layer_dir/eessi_container.sh ${COMMON_ARGS[@]} ${TARBALL_STEP_ARGS[@]}"
291-
echo " -- $software_layer_dir/create_tarball.sh ${TMP_IN_CONTAINER} ${EESSI_VERSION} ${EESSI_SOFTWARE_SUBDIR_OVERRIDE} \"${EESSI_ACCELERATOR_TARGET_OVERRIDE}\" /eessi_bot_job/${TGZ} 2>&1 | tee -a ${tar_outerr}"
315+
echo " -- $software_layer_dir/create_tarball.sh ${TMP_IN_CONTAINER} ${EESSI_VERSION} ${EESSI_SOFTWARE_SUBDIR_OVERRIDE} \"${EESSI_ACCELERATOR_TARGET_OVERRIDE}\" /eessi_bot_job/${TARBALL} 2>&1 | tee -a ${tar_outerr}"
292316
$software_layer_dir/eessi_container.sh "${COMMON_ARGS[@]}" "${TARBALL_STEP_ARGS[@]}" \
293-
-- $software_layer_dir/create_tarball.sh ${TMP_IN_CONTAINER} ${EESSI_VERSION} ${EESSI_SOFTWARE_SUBDIR_OVERRIDE} "${EESSI_ACCELERATOR_TARGET_OVERRIDE}" /eessi_bot_job/${TGZ} 2>&1 | tee -a ${tar_outerr}
317+
-- $software_layer_dir/create_tarball.sh ${TMP_IN_CONTAINER} ${EESSI_VERSION} ${EESSI_SOFTWARE_SUBDIR_OVERRIDE} "${EESSI_ACCELERATOR_TARGET_OVERRIDE}" /eessi_bot_job/${TARBALL} 2>&1 | tee -a ${tar_outerr}
294318

295319
exit 0

bot/check-build.sh

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
# - SUCCESS (all of)
1919
# - working directory contains slurm-JOBID.out file
20-
# - working directory contains eessi*tar.gz
20+
# - working directory contains eessi*tar*
2121
# - no message FATAL
2222
# - no message ERROR
2323
# - no message FAILED
@@ -165,19 +165,19 @@ if [[ ${SLURM_OUTPUT_FOUND} -eq 1 ]]; then
165165
fi
166166

167167
if [[ $USE_CHECK_BUILD_ARTEFACTS_SCRIPT -eq 0 ]]; then
168-
TGZ=-1
168+
TARBALL_CREATED=-1
169169
TARBALL=
170170
if [[ ${SLURM_OUTPUT_FOUND} -eq 1 ]]; then
171-
GP_tgz_created="\.tar\.gz created!"
172-
grep_out=$(grep -v "^>> searching for " ${job_dir}/${job_out} | grep "${GP_tgz_created}" | sort -u)
171+
GP_tarball_created="\.tar.* created!"
172+
grep_out=$(grep -v "^>> searching for " ${job_dir}/${job_out} | grep "${GP_tarball_created}" | sort -u)
173173
if [[ $? -eq 0 ]]; then
174-
TGZ=1
174+
TARBALL_CREATED=1
175175
TARBALL=$(echo ${grep_out} | sed -e 's@^.*/\(eessi[^/ ]*\) .*$@\1@')
176176
else
177-
TGZ=0
177+
TARBALL_CREATED=0
178178
fi
179179
# have to be careful to not add searched for pattern into slurm out file
180-
[[ ${VERBOSE} -ne 0 ]] && echo ">> searching for '"${GP_tgz_created}"'"
180+
[[ ${VERBOSE} -ne 0 ]] && echo ">> searching for '"${GP_tarball_created}"'"
181181
[[ ${VERBOSE} -ne 0 ]] && echo "${grep_out}"
182182
fi
183183
fi
@@ -190,7 +190,7 @@ fi
190190
[[ ${VERBOSE} -ne 0 ]] && echo " REQ_MISSING: $([[ $MISSING -eq 1 ]] && echo 'yes' || echo 'no') (no)"
191191
[[ ${VERBOSE} -ne 0 ]] && echo " NO_MISSING.: $([[ $NO_MISSING -eq 1 ]] && echo 'yes' || echo 'no') (yes)"
192192
if [[ $USE_CHECK_BUILD_ARTEFACTS_SCRIPT -eq 0 ]]; then
193-
[[ ${VERBOSE} -ne 0 ]] && echo " TGZ_CREATED: $([[ $TGZ -eq 1 ]] && echo 'yes' || echo 'no') (yes)"
193+
[[ ${VERBOSE} -ne 0 ]] && echo " TARBALL_CREATED: $([[ $TARBALL -eq 1 ]] && echo 'yes' || echo 'no') (yes)"
194194
fi
195195

196196
# Here, we try to do some additional analysis on the output file
@@ -219,7 +219,7 @@ if [[ ${SLURM_OUTPUT_FOUND} -eq 1 ]] && \
219219
[[ ${FAILED} -eq 0 ]] && \
220220
[[ ${MISSING} -eq 0 ]] && \
221221
[[ ${NO_MISSING} -eq 1 ]] && \
222-
[[ $USE_CHECK_BUILD_ARTEFACTS_SCRIPT -ne 0 || ${TGZ} -eq 1 ]] && \
222+
[[ $USE_CHECK_BUILD_ARTEFACTS_SCRIPT -ne 0 || ${TARBALL_CREATED} -eq 1 ]] && \
223223
[[ $USE_CHECK_BUILD_ARTEFACTS_SCRIPT -ne 0 || -n ${TARBALL} ]]; then
224224
# SUCCESS
225225
status="SUCCESS"
@@ -429,9 +429,9 @@ failure_msg="no message matching <code>${GP_no_missing}</code>"
429429
comment_details_list=${comment_details_list}$(add_detail ${NO_MISSING} 1 "${success_msg}" "${failure_msg}")
430430

431431
if [[ $USE_CHECK_BUILD_ARTEFACTS_SCRIPT -eq 0 ]]; then
432-
success_msg="found message matching <code>${GP_tgz_created}</code>"
433-
failure_msg="no message matching <code>${GP_tgz_created}</code>"
434-
comment_details_list=${comment_details_list}$(add_detail ${TGZ} 1 "${success_msg}" "${failure_msg}")
432+
success_msg="found message matching <code>${GP_tarball_created}</code>"
433+
failure_msg="no message matching <code>${GP_tarball_created}</code>"
434+
comment_details_list=${comment_details_list}$(add_detail ${TARBALL_CREATED} 1 "${success_msg}" "${failure_msg}")
435435
fi
436436

437437
# Now, do the actual replacement of __DETAILS_FMT__
@@ -478,7 +478,16 @@ if [[ $USE_CHECK_BUILD_ARTEFACTS_SCRIPT -eq 0 ]]; then
478478
size="$(stat --dereference --printf=%s ${TARBALL})"
479479
size_mib=$((${size} >> 20))
480480
tmpfile=$(mktemp --tmpdir=. tarfiles.XXXX)
481-
tar tf ${TARBALL} > ${tmpfile}
481+
if [[ "${TARBALL}" == *.tar.zst ]]; then
482+
tar --use-compress-program=zstd -tf ${TARBALL} > ${tmpfile}
483+
elif [[ "${TARBALL}" == *.tar.gz ]]; then
484+
tar --use-compress-program=gzip -tf ${TARBALL} > ${tmpfile}
485+
elif [[ "${TARBALL}" == *.tar ]]; then
486+
tar -tf ${TARBALL} > ${tmpfile}
487+
else
488+
echo "ERROR: Unsupported tarball extension!" >&2
489+
exit 1
490+
fi
482491
entries=$(cat ${tmpfile} | wc -l)
483492
# determine prefix from job config: VERSION/software/OS_TYPE/CPU_FAMILY/ARCHITECTURE
484493
# e.g., 2023.06/software/linux/x86_64/intel/skylake_avx512
@@ -503,7 +512,7 @@ if [[ $USE_CHECK_BUILD_ARTEFACTS_SCRIPT -eq 0 ]]; then
503512
modules_entries=$(grep "${prefix}/modules" ${tmpfile})
504513
software_entries=$(grep "${prefix}/software" ${tmpfile})
505514
reprod_entries=$(grep "${prefix}/reprod" ${tmpfile})
506-
reprod_shortened=$(echo "${reprod_entries}" | sed -e "s@${prefix}/reprod/@@" | awk -F/ '{if (NR >= 4) {print $1 "/" $2 "/" $3}}' | sort -u)
515+
reprod_shortened=$(echo "${reprod_entries}" | sed -e "s@${prefix}/reprod/@@" | awk -F/ '{if (NF >= 4) {print $1 "/" $2 "/" $3}}' | sort -u)
507516
other_entries=$(cat ${tmpfile} | grep -v "${prefix}/modules" | grep -v "${prefix}/software" | grep -v "${prefix}/reprod")
508517
other_shortened=$(echo "${other_entries}" | sed -e "s@^.*${prefix}/@@" | sort -u)
509518
modules=$(echo "${modules_entries}" | grep "/all/.*/.*lua$" | sed -e 's@^.*/\([^/]*/[^/]*.lua\)$@\1@' | sort -u)

bot/test.sh

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ if [[ ! -z ${SINGULARITY_CACHEDIR} ]]; then
100100
fi
101101

102102
# try to determine tmp directory from build job
103-
RESUME_DIR=$(grep 'Using .* as tmp directory' slurm-${SLURM_JOBID}.out | head -1 | awk '{print $2}')
103+
RESUME_DIR=$(grep 'Using .* as tmp directory' slurm-${SLURM_JOBID}.out | head -n 1 | awk '{print $2}')
104104

105105
if [[ -z ${RESUME_DIR} ]]; then
106106
RESUME_TGZ=${PWD}/previous_tmp/build_step/$(ls previous_tmp/build_step)
@@ -189,6 +189,10 @@ EESSI_OS_TYPE=$(cfg_get_value "architecture" "os_type")
189189
export EESSI_OS_TYPE=${EESSI_OS_TYPE:-linux}
190190
echo "bot/test.sh: EESSI_OS_TYPE='${EESSI_OS_TYPE}'"
191191

192+
# Get node_type from .architecture.node_type in ${JOB_CFG_FILE}
193+
export BOT_NODE_TYPE=$(cfg_get_value "architecture" "node_type")
194+
echo "bot/test.sh: BOT_NODE_TYPE='${BOT_NODE_TYPE}"
195+
192196
# prepare arguments to eessi_container.sh common to build and tarball steps
193197
declare -a COMMON_ARGS=()
194198
COMMON_ARGS+=("--verbose")
@@ -237,6 +241,9 @@ fi
237241
if [[ ${SHARED_FS_PATH} ]]; then
238242
TEST_SUITE_ARGS+=("--shared-fs-path" "${SHARED_FS_PATH}")
239243
fi
244+
if [[ ${BOT_NODE_TYPE} ]]; then
245+
TEST_SUITE_ARGS+=("--partition" "${BOT_NODE_TYPE}")
246+
fi
240247
# [[ ! -z ${BUILD_LOGS_DIR} ]] && TEST_SUITE_ARGS+=("--build-logs-dir" "${BUILD_LOGS_DIR}")
241248
# [[ ! -z ${SHARED_FS_PATH} ]] && TEST_SUITE_ARGS+=("--shared-fs-path" "${SHARED_FS_PATH}")
242249

create_tarball.sh

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ eessi_tmpdir=$1
1212
eessi_version=$2
1313
cpu_arch_subdir=$3
1414
accel_subdir=$4
15-
target_tgz=$5
15+
target_tarball=$5
1616

1717
tmpdir=`mktemp -d`
1818
echo ">> tmpdir: $tmpdir"
@@ -114,10 +114,9 @@ fi
114114

115115
topdir=${cvmfs_repo}/versions/
116116

117-
echo ">> Creating tarball ${target_tgz} from ${topdir}..."
118-
tar cfvz ${target_tgz} -C ${topdir} --files-from=${files_list}
119-
120-
echo ${target_tgz} created!
117+
echo ">> Creating tarball ${target_tarball} from ${topdir}..."
118+
tar cavf ${target_tarball} -C ${topdir} --files-from=${files_list}
119+
echo ${target_tarball} created!
121120

122121
echo ">> Cleaning up tmpdir ${tmpdir}..."
123122
rm -r ${tmpdir}

0 commit comments

Comments
 (0)