From 7d65caeeb4bfc4b98386dae7cc665ffdde97c64b Mon Sep 17 00:00:00 2001 From: Anton Pozharskiy Date: Fri, 19 Dec 2025 18:18:31 +0100 Subject: [PATCH 1/4] A, perhaps hacky, hiding of the runtime in the GLOBAL_METHOD_TABLE overlay --- src/jlgen.jl | 6 ++---- src/runtime.jl | 25 ++++++++++++++++++++++++- src/utils.jl | 4 ++++ 3 files changed, 30 insertions(+), 5 deletions(-) diff --git a/src/jlgen.jl b/src/jlgen.jl index 330cf7f8..738e3190 100644 --- a/src/jlgen.jl +++ b/src/jlgen.jl @@ -293,10 +293,6 @@ end end # !HAS_INTEGRATED_CACHE -## method overrides - -Base.Experimental.@MethodTable(GLOBAL_METHOD_TABLE) - # Implements a priority lookup for method tables, where the first match in the stack get's returned. # An alternative to this would be to use a "Union" where we would query the parent method table and # do a most-specific match. @@ -314,6 +310,7 @@ CC.isoverlayed(::StackedMethodTable) = true # https://github.com/JuliaLang/julia/pull/51078 # same API as before but without returning isoverlayed flag function CC.findall(@nospecialize(sig::Type), table::StackedMethodTable; limit::Int=-1) + println("findall: sig: $(sig), mt: $(table)") result = CC._findall(sig, table.mt, table.world, limit) result === nothing && return nothing # to many matches nr = CC.length(result) @@ -335,6 +332,7 @@ CC.isoverlayed(::StackedMethodTable) = true end function CC.findsup(@nospecialize(sig::Type), table::StackedMethodTable) + println("findall: sig: $(sig), mt: $(table)") match, valid_worlds = CC._findsup(sig, table.mt, table.world) match !== nothing && return match, valid_worlds parent_match, parent_valid_worlds = CC.findsup(sig, table.parent) diff --git a/src/runtime.jl b/src/runtime.jl index 2b11d915..92349466 100644 --- a/src/runtime.jl +++ b/src/runtime.jl @@ -12,6 +12,26 @@ module Runtime using ..GPUCompiler using LLVM using LLVM.Interop +using ExprTools: splitdef, combinedef + + +macro device_function(ex) + ex = macroexpand(__module__, ex) + def = splitdef(ex) + + # generate a function that errors + def[:body] = quote + error("This function is not intended for use on the CPU") + end + + esc(quote + $(combinedef(def)) + + # NOTE: no use of `@consistent_overlay` here because the regular function errors + Base.Experimental.@overlay($(GPUCompiler).GLOBAL_METHOD_TABLE, $ex) + end) +end + ## representation of a runtime method instance @@ -71,6 +91,8 @@ function compile(def, return_type, types, llvm_return_type=nothing, llvm_types=n meth = RuntimeMethodInstance(def, return_type, types, name, llvm_return_type, llvm_types, llvm_name) + println("Compile called for def $(def)") + if haskey(methods, name) error("Runtime function $name has already been registered!") end @@ -81,8 +103,9 @@ function compile(def, return_type, types, llvm_return_type=nothing, llvm_types=n # work around that by generating an llvmcall stub. can we do better by # using the new nonrecursive codegen to handle function lookup ourselves? if def isa Symbol + println("Symbol passed to compile: $(def)") args = [gensym() for typ in types] - @eval @inline $def($(args...)) = + @eval @device_function @inline $def($(args...)) = ccall($("extern $llvm_name"), llvmcall, $return_type, ($(types...),), $(args...)) end diff --git a/src/utils.jl b/src/utils.jl index 095f22dc..60ed9779 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -182,3 +182,7 @@ function kernels(mod::LLVM.Module) end return vals end + +## method overrides + +Base.Experimental.@MethodTable(GLOBAL_METHOD_TABLE) From 743b80717677ab7785728b619c427215936428ca Mon Sep 17 00:00:00 2001 From: Anton Pozharskiy Date: Mon, 22 Dec 2025 15:35:56 +0100 Subject: [PATCH 2/4] cleanup some debugging --- src/runtime.jl | 22 ---------------------- src/utils.jl | 19 +++++++++++++++++++ 2 files changed, 19 insertions(+), 22 deletions(-) diff --git a/src/runtime.jl b/src/runtime.jl index 92349466..36bf8ef8 100644 --- a/src/runtime.jl +++ b/src/runtime.jl @@ -12,26 +12,6 @@ module Runtime using ..GPUCompiler using LLVM using LLVM.Interop -using ExprTools: splitdef, combinedef - - -macro device_function(ex) - ex = macroexpand(__module__, ex) - def = splitdef(ex) - - # generate a function that errors - def[:body] = quote - error("This function is not intended for use on the CPU") - end - - esc(quote - $(combinedef(def)) - - # NOTE: no use of `@consistent_overlay` here because the regular function errors - Base.Experimental.@overlay($(GPUCompiler).GLOBAL_METHOD_TABLE, $ex) - end) -end - ## representation of a runtime method instance @@ -91,7 +71,6 @@ function compile(def, return_type, types, llvm_return_type=nothing, llvm_types=n meth = RuntimeMethodInstance(def, return_type, types, name, llvm_return_type, llvm_types, llvm_name) - println("Compile called for def $(def)") if haskey(methods, name) error("Runtime function $name has already been registered!") @@ -103,7 +82,6 @@ function compile(def, return_type, types, llvm_return_type=nothing, llvm_types=n # work around that by generating an llvmcall stub. can we do better by # using the new nonrecursive codegen to handle function lookup ourselves? if def isa Symbol - println("Symbol passed to compile: $(def)") args = [gensym() for typ in types] @eval @device_function @inline $def($(args...)) = ccall($("extern $llvm_name"), llvmcall, $return_type, ($(types...),), $(args...)) diff --git a/src/utils.jl b/src/utils.jl index 60ed9779..8242e5a2 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -186,3 +186,22 @@ end ## method overrides Base.Experimental.@MethodTable(GLOBAL_METHOD_TABLE) +using ExprTools: splitdef, combinedef +macro device_function(ex) + ex = macroexpand(__module__, ex) + def = splitdef(ex) + + # generate a function that errors + def[:body] = quote + error("This function is not intended for use on the CPU") + end + + esc(quote + $(combinedef(def)) + + # NOTE: no use of `@consistent_overlay` here because the regular function errors + Base.Experimental.@overlay($(GPUCompiler).GLOBAL_METHOD_TABLE, $ex) + end) +end + + From 12251c1fae1dce418c12f62c024539f33a21546e Mon Sep 17 00:00:00 2001 From: Anton Pozharskiy Date: Mon, 22 Dec 2025 16:48:42 +0100 Subject: [PATCH 3/4] fix issue caused by moving device_function --- src/runtime.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/runtime.jl b/src/runtime.jl index 36bf8ef8..d2fdf012 100644 --- a/src/runtime.jl +++ b/src/runtime.jl @@ -83,7 +83,7 @@ function compile(def, return_type, types, llvm_return_type=nothing, llvm_types=n # using the new nonrecursive codegen to handle function lookup ourselves? if def isa Symbol args = [gensym() for typ in types] - @eval @device_function @inline $def($(args...)) = + @eval GPUCompiler.@device_function @inline $def($(args...)) = ccall($("extern $llvm_name"), llvmcall, $return_type, ($(types...),), $(args...)) end From a3f974b3ecb8e5dd3ecf31cb5c9e52502c1067e6 Mon Sep 17 00:00:00 2001 From: Anton Pozharskiy Date: Tue, 23 Dec 2025 17:32:56 +0100 Subject: [PATCH 4/4] dummy CPU functions now seem to get us further but check_ir is kicking us out of some KernelAbstractions compilations in e.g OpenCL.jl --- src/driver.jl | 1 - src/jlgen.jl | 6 +++--- src/rtlib.jl | 6 ++++++ src/runtime.jl | 6 ++++-- src/utils.jl | 11 ++++++++--- src/validation.jl | 3 +++ 6 files changed, 24 insertions(+), 9 deletions(-) diff --git a/src/driver.jl b/src/driver.jl index 950ea272..07d880af 100644 --- a/src/driver.jl +++ b/src/driver.jl @@ -93,7 +93,6 @@ function compile_unhooked(output::Symbol, @nospecialize(job::CompilerJob); kwarg ## LLVM IR ir, ir_meta = emit_llvm(job) - if output == :llvm if job.config.strip @tracepoint "strip debug info" strip_debuginfo!(ir) diff --git a/src/jlgen.jl b/src/jlgen.jl index 738e3190..67046656 100644 --- a/src/jlgen.jl +++ b/src/jlgen.jl @@ -310,7 +310,7 @@ CC.isoverlayed(::StackedMethodTable) = true # https://github.com/JuliaLang/julia/pull/51078 # same API as before but without returning isoverlayed flag function CC.findall(@nospecialize(sig::Type), table::StackedMethodTable; limit::Int=-1) - println("findall: sig: $(sig), mt: $(table)") + #println("findall: sig: $(sig), mt: $(table)") result = CC._findall(sig, table.mt, table.world, limit) result === nothing && return nothing # to many matches nr = CC.length(result) @@ -332,7 +332,7 @@ CC.isoverlayed(::StackedMethodTable) = true end function CC.findsup(@nospecialize(sig::Type), table::StackedMethodTable) - println("findall: sig: $(sig), mt: $(table)") + #println("findsup: sig: $(sig), mt: $(table)") match, valid_worlds = CC._findsup(sig, table.mt, table.world) match !== nothing && return match, valid_worlds parent_match, parent_valid_worlds = CC.findsup(sig, table.parent) @@ -488,7 +488,7 @@ CC.lock_mi_inference(interp::GPUInterpreter, mi::MethodInstance) = nothing CC.unlock_mi_inference(interp::GPUInterpreter, mi::MethodInstance) = nothing function CC.add_remark!(interp::GPUInterpreter, sv::CC.InferenceState, msg) - @safe_debug "Inference remark during GPU compilation of $(sv.linfo): $msg" + #@safe_debug "Inference remark during GPU compilation of $(sv.linfo): $msg" end CC.may_optimize(interp::GPUInterpreter) = true diff --git a/src/rtlib.jl b/src/rtlib.jl index 91b4c71c..616041ed 100644 --- a/src/rtlib.jl +++ b/src/rtlib.jl @@ -77,6 +77,9 @@ function emit_function!(mod, config::CompilerConfig, f, method) new_mod, meta = compile_unhooked(:llvm, CompilerJob(source, config)) ft = function_type(meta.entry) expected_ft = convert(LLVM.FunctionType, method) + + println("emit_function!: source: $(source)") + #println(code_typed(CompilerJob(source, config))) if return_type(ft) != return_type(expected_ft) error("Invalid return type for runtime function '$(method.name)': expected $(return_type(expected_ft)), got $(return_type(ft))") end @@ -108,12 +111,15 @@ function build_runtime(@nospecialize(job::CompilerJob)) config = CompilerConfig(job.config; kernel=false, toplevel=false, only_entry=false, strip=false) for method in values(Runtime.methods) + #println("build_runtime: method.def: $(method.def)") + #println("build_runtime: method.name: $(method.name)") def = if isa(method.def, Symbol) isdefined(runtime_module(job), method.def) || continue getfield(runtime_module(job), method.def) else method.def end + println("build_runtime: def: $(def)") emit_function!(mod, config, typeof(def), method) end diff --git a/src/runtime.jl b/src/runtime.jl index d2fdf012..2f7312e1 100644 --- a/src/runtime.jl +++ b/src/runtime.jl @@ -83,8 +83,10 @@ function compile(def, return_type, types, llvm_return_type=nothing, llvm_types=n # using the new nonrecursive codegen to handle function lookup ourselves? if def isa Symbol args = [gensym() for typ in types] - @eval GPUCompiler.@device_function @inline $def($(args...)) = - ccall($("extern $llvm_name"), llvmcall, $return_type, ($(types...),), $(args...)) + @eval GPUCompiler.@device_function($return_type, + @inline $def($(args...)) = + ccall($("extern $llvm_name"), llvmcall, $return_type, ($(types...),), $(args...)) + ) end return diff --git a/src/utils.jl b/src/utils.jl index 8242e5a2..79bb5d78 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -187,13 +187,18 @@ end Base.Experimental.@MethodTable(GLOBAL_METHOD_TABLE) using ExprTools: splitdef, combinedef -macro device_function(ex) +macro device_function(rt, ex) ex = macroexpand(__module__, ex) def = splitdef(ex) - # generate a function that errors + # generate a function that warns and returns the expected type + # FIXME: The type may not have a default constructor, what do we do then? + # Currently we are using the constructor with an Int64(1) as an argument. + # NOTE: using Int64(1) is a bit odd. This is because Ptr(Int64(0)) == C_NULL, and julia code lowering + # seems to get rid of this automatically. def[:body] = quote - error("This function is not intended for use on the CPU") + @warn "This function is not intended for use on the CPU something may have gone wrong" + $rt(1) end esc(quote diff --git a/src/validation.jl b/src/validation.jl index 0190d1c9..7045194c 100644 --- a/src/validation.jl +++ b/src/validation.jl @@ -177,6 +177,9 @@ function check_ir!(job, errors::Vector{IRError}, mod::LLVM.Module) # custom validation append!(errors, validate_ir(job, mod)) + if !isempty(errors) + write("error_ir.ll", string(mod)) + end return errors end