Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions mlir/test/common_utils/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,16 @@ def get_agents():
return agents


def get_default_agent():
"""Returns the architecture of device 0, which HIP uses by default."""
device_count = hip_check(hip.hipGetDeviceCount())
if device_count > 0:
props = hip.hipDeviceProp_t()
hip_check(hip.hipGetDeviceProperties(props, 0))
return props.gcnArchName.decode('utf-8')
return None


def is_xdlops_present() -> bool:
"""This function checks whether a GPU with xdlops support is present"""
return any([agent.startswith("gfx9") for agent in get_agents()])
23 changes: 22 additions & 1 deletion mlir/test/e2e/generateE2ETest.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def hip_check(call_result):


def get_arch():
"""Returns all unique GPU architectures in the system."""
agents = set()
device_count = hip_check(hip.hipGetDeviceCount())
for device in range(device_count):
Expand All @@ -60,6 +61,16 @@ def get_arch():
return agents


def get_default_arch():
"""Returns the architecture of device 0, which HIP uses by default."""
device_count = hip_check(hip.hipGetDeviceCount())
if device_count > 0:
props = hip.hipDeviceProp_t()
hip_check(hip.hipGetDeviceProperties(props, 0))
return props.gcnArchName.decode('utf-8')
return None


def generate_option_list(prefixes: dict, table: list, key1: str, key2: str):
options_list = []
for item in table[key1]:
Expand Down Expand Up @@ -134,7 +145,17 @@ def usage():
axis_prefixes[axis["name"]] = axis["prefix"]

arch_names = get_arch()
arch = ','.join(arch_names)
default_arch = get_default_arch()
# Use device 0's architecture (HIP default) for compilation
# This ensures compiled binaries run on the default GPU
if default_arch:
if len(arch_names) > 1:
print(f"Note: Multiple GPU architectures detected: {', '.join(sorted(arch_names))}. "
f"Using device 0 architecture '{default_arch}' for test generation. "
f"Use HIP_VISIBLE_DEVICES to select a different GPU.")
arch = default_arch
else:
arch = ""
combinations = generate_option_list(axis_prefixes, toml_dict, "axis", "values")

for suite in toml_dict["suite"]:
Expand Down
14 changes: 11 additions & 3 deletions mlir/test/e2e/lit.cfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,21 @@
config.substitutions.append(('%shlibext', config.llvm_shlib_ext))
config.substitutions.append(("%mlir_src_root", config.mlir_src_root))
config.substitutions.append(('%random_data', config.random_data))
config.substitutions.append(('%constrained_float_range_random_data',
config.constrained_float_range_random_data))
config.substitutions.append(
('%constrained_float_range_random_data', config.constrained_float_range_random_data))
config.substitutions.append(('%rocmlir_gen_flags', config.rocmlir_gen_flags))
config.substitutions.append(('%arch', config.arch))
config.substitutions.append(('%pv', config.populate_validation))

llvm_config.with_system_environment(['HOME', 'INCLUDE', 'LIB', 'TMP', 'TEMP'])
llvm_config.with_system_environment(
['HOME', 'INCLUDE', 'LIB', 'TMP', 'TEMP', 'HIP_VISIBLE_DEVICES'])

# When multiple GPUs are present, limit HIP to device 0 to ensure
# compiled binaries match the execution device
# But respect user's HIP_VISIBLE_DEVICES if already set
if hasattr(config, 'multi_gpu_detected') and config.multi_gpu_detected:
if 'HIP_VISIBLE_DEVICES' not in os.environ:
config.environment['HIP_VISIBLE_DEVICES'] = '0'

##############
# FIXME: adding a path to the environment isn't appearing to work as
Expand Down
33 changes: 28 additions & 5 deletions mlir/test/e2e/lit.site.cfg.py.in
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ config.rocmlir_common_python_tests_utils = "@ROCMLIR_COMMON_PYTHON_TESTS_UTILS@"

# Add common python test utils
sys.path.append(config.rocmlir_common_python_tests_utils)
from common import get_agents, get_arch_features
from common import get_agents, get_arch_features, get_default_agent

# Support substitution of the tools_dir with user parameters. This is
# used when we can't determine the tool dir at configuration time.
Expand All @@ -42,19 +42,42 @@ except KeyError:

# If rocm_agent_enumerator shows no viable GPUs, skip tests that need one,
# because the default target will lead to compilation failures.
import os
config.no_AMD_GPU = False
config.arch = ""
config.features = None
config.arch_support_mfma = False
config.arch_support_wmma = False
config.arch_support_accel_fp8 = False
config.multi_gpu_detected = False
if config.rocm_path:
try:
# Check if user already set HIP_VISIBLE_DEVICES - respect their choice
user_hip_visible = os.environ.get('HIP_VISIBLE_DEVICES')

agents = get_agents()
config.arch = ','.join(agents)
for x in agents:
config.features, config.arch_support_mfma, config.arch_support_wmma, config.arch_support_accel_fp8 = get_arch_features(x)
config.substitutions.append(('%features', config.features))
default_agent = get_default_agent()
if default_agent:
if len(agents) > 1:
config.multi_gpu_detected = True
if user_hip_visible is not None:
# User specified which GPU to use - respect it
# HIP will see their chosen GPU as device 0
lit_config.note("Multiple GPU architectures detected: %s. "
"Using user-specified HIP_VISIBLE_DEVICES=%s. "
"Device 0 (after filtering) architecture: '%s'."
% (', '.join(sorted(agents)), user_hip_visible, default_agent))
else:
# No user preference - use device 0 and set HIP_VISIBLE_DEVICES
lit_config.note("Multiple GPU architectures detected: %s. "
"Using device 0 architecture '%s' for E2E tests. "
"HIP_VISIBLE_DEVICES will be set to '0' to ensure binary compatibility."
% (', '.join(sorted(agents)), default_agent))
config.arch = default_agent
# Get features for the device we'll actually use
config.features, config.arch_support_mfma, config.arch_support_wmma, config.arch_support_accel_fp8 = get_arch_features(default_agent)
if config.features:
config.substitutions.append(('%features', config.features))

# Check other features here
if not config.arch:
Expand Down
10 changes: 9 additions & 1 deletion mlir/test/fusion/e2e/lit.cfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,15 @@
config.substitutions.append(('%arch', config.arch))
config.substitutions.append(('%pv', config.populate_validation))

llvm_config.with_system_environment(['HOME', 'INCLUDE', 'LIB', 'TMP', 'TEMP'])
llvm_config.with_system_environment(
['HOME', 'INCLUDE', 'LIB', 'TMP', 'TEMP', 'HIP_VISIBLE_DEVICES'])

# When multiple GPUs are present, limit HIP to device 0 to ensure
# compiled binaries match the execution device
# But respect user's HIP_VISIBLE_DEVICES if already set
if hasattr(config, 'multi_gpu_detected') and config.multi_gpu_detected:
if 'HIP_VISIBLE_DEVICES' not in os.environ:
config.environment['HIP_VISIBLE_DEVICES'] = '0'

##############
# FIXME: adding a path to the environment isn't appearing to work as
Expand Down
32 changes: 26 additions & 6 deletions mlir/test/fusion/e2e/lit.site.cfg.py.in
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ config.rocmlir_common_python_tests_utils = "@ROCMLIR_COMMON_PYTHON_TESTS_UTILS@"

# Add common python test utils
sys.path.append(config.rocmlir_common_python_tests_utils)
from common import get_agents
from common import get_agents, get_default_agent

# Support substitution of the tools_dir with user parameters. This is
# used when we can't determine the tool dir at configuration time.
Expand All @@ -49,20 +49,40 @@ except KeyError:

# If rocm_agent_enumerator shows no viable GPUs, skip tests that need one,
# because the default target will lead to compilation failures.
import os
config.no_AMD_GPU = False
config.arch = ""
config.arch_support_mfma = False
config.arch_support_wmma = False
config.multi_gpu_detected = False
if config.rocm_path:
try:
# Check if user already set HIP_VISIBLE_DEVICES - respect their choice
user_hip_visible = os.environ.get('HIP_VISIBLE_DEVICES')

agents = get_agents()
config.arch = ','.join(agents)
for x in agents:
if any([arch in x for arch in ["gfx908", "gfx90a", "gfx942", "gfx950"]]):
default_agent = get_default_agent()
if default_agent:
if len(agents) > 1:
config.multi_gpu_detected = True
if user_hip_visible is not None:
# User specified which GPU to use - respect it
lit_config.note("Multiple GPU architectures detected: %s. "
"Using user-specified HIP_VISIBLE_DEVICES=%s. "
"Device 0 (after filtering) architecture: '%s'."
% (', '.join(sorted(agents)), user_hip_visible, default_agent))
else:
# No user preference - use device 0 and set HIP_VISIBLE_DEVICES
lit_config.note("Multiple GPU architectures detected: %s. "
"Using device 0 architecture '%s' for E2E tests. "
"HIP_VISIBLE_DEVICES will be set to '0' to ensure binary compatibility."
% (', '.join(sorted(agents)), default_agent))
config.arch = default_agent
# Check features for the device we'll actually use
if any([arch in default_agent for arch in ["gfx908", "gfx90a", "gfx942", "gfx950"]]):
config.arch_support_mfma = True
elif "gfx11" in x or "gfx12" in x:
elif "gfx11" in default_agent or "gfx12" in default_agent:
config.arch_support_wmma = True
# Check other features here
if not config.arch:
config.no_AMD_GPU = True
except subprocess.CalledProcessError:
Expand Down
14 changes: 11 additions & 3 deletions mlir/test/lit.cfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,21 @@
config.substitutions.append(('%shlibext', config.llvm_shlib_ext))
config.substitutions.append(("%mlir_src_root", config.mlir_src_root))
config.substitutions.append(('%random_data', config.random_data))
config.substitutions.append(('%constrained_float_range_random_data',
config.constrained_float_range_random_data))
config.substitutions.append(
('%constrained_float_range_random_data', config.constrained_float_range_random_data))
config.substitutions.append(('%rocmlir_gen_flags', config.rocmlir_gen_flags))
config.substitutions.append(('%arch', config.arch))
config.substitutions.append(('%pv', config.populate_validation))

llvm_config.with_system_environment(['HOME', 'INCLUDE', 'LIB', 'TMP', 'TEMP'])
llvm_config.with_system_environment(
['HOME', 'INCLUDE', 'LIB', 'TMP', 'TEMP', 'HIP_VISIBLE_DEVICES'])

# When multiple GPUs are present, limit HIP to device 0 to ensure
# compiled binaries match the execution device
# But respect user's HIP_VISIBLE_DEVICES if already set
if hasattr(config, 'multi_gpu_detected') and config.multi_gpu_detected:
if 'HIP_VISIBLE_DEVICES' not in os.environ:
config.environment['HIP_VISIBLE_DEVICES'] = '0'

##############
# FIXME: adding a path to the environment isn't appearing to work as
Expand Down
31 changes: 26 additions & 5 deletions mlir/test/lit.site.cfg.py.in
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ config.rocmlir_common_python_tests_utils = "@ROCMLIR_COMMON_PYTHON_TESTS_UTILS@"

# Add common python test utils
sys.path.append(config.rocmlir_common_python_tests_utils)
from common import get_agents, get_arch_features
from common import get_agents, get_arch_features, get_default_agent

# Support substitution of the tools_dir with user parameters. This is
# used when we can't determine the tool dir at configuration time.
Expand All @@ -73,19 +73,40 @@ except KeyError:

# If rocm_agent_enumerator shows no viable GPUs, skip tests that need one,
# because the default target will lead to compilation failures.
import os
config.no_AMD_GPU = False
config.arch = ""
config.arch_support_mfma = False
config.arch_support_wmma = False
config.arch_support_accel_fp8 = False
config.features = None
config.multi_gpu_detected = False
if config.rocm_path:
try:
# Check if user already set HIP_VISIBLE_DEVICES - respect their choice
user_hip_visible = os.environ.get('HIP_VISIBLE_DEVICES')

agents = get_agents()
config.arch = ','.join(agents)
for x in agents:
if not config.features:
config.features, config.arch_support_mfma, config.arch_support_wmma, config.arch_support_accel_fp8 = get_arch_features(x)
default_agent = get_default_agent()
if default_agent:
if len(agents) > 1:
config.multi_gpu_detected = True
if user_hip_visible is not None:
# User specified which GPU to use - respect it
lit_config.note("Multiple GPU architectures detected: %s. "
"Using user-specified HIP_VISIBLE_DEVICES=%s. "
"Device 0 (after filtering) architecture: '%s'."
% (', '.join(sorted(agents)), user_hip_visible, default_agent))
else:
# No user preference - use device 0 and set HIP_VISIBLE_DEVICES
lit_config.note("Multiple GPU architectures detected: %s. "
"Using device 0 architecture '%s' for E2E tests. "
"HIP_VISIBLE_DEVICES will be set to '0' to ensure binary compatibility."
% (', '.join(sorted(agents)), default_agent))
config.arch = default_agent
# Get features for the device we'll actually use
config.features, config.arch_support_mfma, config.arch_support_wmma, config.arch_support_accel_fp8 = get_arch_features(default_agent)
if config.features:
config.substitutions.append(('%features', config.features))
if not config.arch:
config.no_AMD_GPU = True
Expand Down