diff --git a/src/driver.jl b/src/driver.jl index 950ea272..07d880af 100644 --- a/src/driver.jl +++ b/src/driver.jl @@ -93,7 +93,6 @@ function compile_unhooked(output::Symbol, @nospecialize(job::CompilerJob); kwarg ## LLVM IR ir, ir_meta = emit_llvm(job) - if output == :llvm if job.config.strip @tracepoint "strip debug info" strip_debuginfo!(ir) diff --git a/src/jlgen.jl b/src/jlgen.jl index 330cf7f8..67046656 100644 --- a/src/jlgen.jl +++ b/src/jlgen.jl @@ -293,10 +293,6 @@ end end # !HAS_INTEGRATED_CACHE -## method overrides - -Base.Experimental.@MethodTable(GLOBAL_METHOD_TABLE) - # Implements a priority lookup for method tables, where the first match in the stack get's returned. # An alternative to this would be to use a "Union" where we would query the parent method table and # do a most-specific match. @@ -314,6 +310,7 @@ CC.isoverlayed(::StackedMethodTable) = true # https://github.com/JuliaLang/julia/pull/51078 # same API as before but without returning isoverlayed flag function CC.findall(@nospecialize(sig::Type), table::StackedMethodTable; limit::Int=-1) + #println("findall: sig: $(sig), mt: $(table)") result = CC._findall(sig, table.mt, table.world, limit) result === nothing && return nothing # to many matches nr = CC.length(result) @@ -335,6 +332,7 @@ CC.isoverlayed(::StackedMethodTable) = true end function CC.findsup(@nospecialize(sig::Type), table::StackedMethodTable) + #println("findsup: sig: $(sig), mt: $(table)") match, valid_worlds = CC._findsup(sig, table.mt, table.world) match !== nothing && return match, valid_worlds parent_match, parent_valid_worlds = CC.findsup(sig, table.parent) @@ -490,7 +488,7 @@ CC.lock_mi_inference(interp::GPUInterpreter, mi::MethodInstance) = nothing CC.unlock_mi_inference(interp::GPUInterpreter, mi::MethodInstance) = nothing function CC.add_remark!(interp::GPUInterpreter, sv::CC.InferenceState, msg) - @safe_debug "Inference remark during GPU compilation of $(sv.linfo): $msg" + #@safe_debug "Inference remark during GPU compilation of $(sv.linfo): $msg" end CC.may_optimize(interp::GPUInterpreter) = true diff --git a/src/rtlib.jl b/src/rtlib.jl index 91b4c71c..616041ed 100644 --- a/src/rtlib.jl +++ b/src/rtlib.jl @@ -77,6 +77,9 @@ function emit_function!(mod, config::CompilerConfig, f, method) new_mod, meta = compile_unhooked(:llvm, CompilerJob(source, config)) ft = function_type(meta.entry) expected_ft = convert(LLVM.FunctionType, method) + + println("emit_function!: source: $(source)") + #println(code_typed(CompilerJob(source, config))) if return_type(ft) != return_type(expected_ft) error("Invalid return type for runtime function '$(method.name)': expected $(return_type(expected_ft)), got $(return_type(ft))") end @@ -108,12 +111,15 @@ function build_runtime(@nospecialize(job::CompilerJob)) config = CompilerConfig(job.config; kernel=false, toplevel=false, only_entry=false, strip=false) for method in values(Runtime.methods) + #println("build_runtime: method.def: $(method.def)") + #println("build_runtime: method.name: $(method.name)") def = if isa(method.def, Symbol) isdefined(runtime_module(job), method.def) || continue getfield(runtime_module(job), method.def) else method.def end + println("build_runtime: def: $(def)") emit_function!(mod, config, typeof(def), method) end diff --git a/src/runtime.jl b/src/runtime.jl index 2b11d915..2f7312e1 100644 --- a/src/runtime.jl +++ b/src/runtime.jl @@ -71,6 +71,7 @@ function compile(def, return_type, types, llvm_return_type=nothing, llvm_types=n meth = RuntimeMethodInstance(def, return_type, types, name, llvm_return_type, llvm_types, llvm_name) + if haskey(methods, name) error("Runtime function $name has already been registered!") end @@ -82,8 +83,10 @@ function compile(def, return_type, types, llvm_return_type=nothing, llvm_types=n # using the new nonrecursive codegen to handle function lookup ourselves? if def isa Symbol args = [gensym() for typ in types] - @eval @inline $def($(args...)) = - ccall($("extern $llvm_name"), llvmcall, $return_type, ($(types...),), $(args...)) + @eval GPUCompiler.@device_function($return_type, + @inline $def($(args...)) = + ccall($("extern $llvm_name"), llvmcall, $return_type, ($(types...),), $(args...)) + ) end return diff --git a/src/utils.jl b/src/utils.jl index 095f22dc..79bb5d78 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -182,3 +182,31 @@ function kernels(mod::LLVM.Module) end return vals end + +## method overrides + +Base.Experimental.@MethodTable(GLOBAL_METHOD_TABLE) +using ExprTools: splitdef, combinedef +macro device_function(rt, ex) + ex = macroexpand(__module__, ex) + def = splitdef(ex) + + # generate a function that warns and returns the expected type + # FIXME: The type may not have a default constructor, what do we do then? + # Currently we are using the constructor with an Int64(1) as an argument. + # NOTE: using Int64(1) is a bit odd. This is because Ptr(Int64(0)) == C_NULL, and julia code lowering + # seems to get rid of this automatically. + def[:body] = quote + @warn "This function is not intended for use on the CPU something may have gone wrong" + $rt(1) + end + + esc(quote + $(combinedef(def)) + + # NOTE: no use of `@consistent_overlay` here because the regular function errors + Base.Experimental.@overlay($(GPUCompiler).GLOBAL_METHOD_TABLE, $ex) + end) +end + + diff --git a/src/validation.jl b/src/validation.jl index 0190d1c9..7045194c 100644 --- a/src/validation.jl +++ b/src/validation.jl @@ -177,6 +177,9 @@ function check_ir!(job, errors::Vector{IRError}, mod::LLVM.Module) # custom validation append!(errors, validate_ir(job, mod)) + if !isempty(errors) + write("error_ir.ll", string(mod)) + end return errors end