Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion compiler/rustc_codegen_llvm/messages.ftl
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,12 @@ codegen_llvm_lto_bitcode_from_rlib = failed to get bitcode from object file for
codegen_llvm_mismatch_data_layout =
data-layout for target `{$rustc_target}`, `{$rustc_layout}`, differs from LLVM target's `{$llvm_target}` default layout, `{$llvm_layout}`

codegen_llvm_offload_without_enable = using the offload feature requires -Z offload=Enable
codegen_llvm_offload_bundleimages_failed = call to BundleImages failed, `host.out` was not created
codegen_llvm_offload_embed_failed = call to EmbedBufferInModule failed, `host.o` was not created
codegen_llvm_offload_no_abs_path = using the `-Z offload=Host=/absolute/path/to/host.out` flag requires an absolute path
codegen_llvm_offload_no_host_out = using the `-Z offload=Host=/absolute/path/to/host.out` flag must point to a `host.out` file
codegen_llvm_offload_nonexisting = the given path/file to `host.out` does not exist. Did you forget to run the device compilation first?
codegen_llvm_offload_without_enable = using the offload feature requires -Z offload=<Device or Host=/absolute/path/to/host.out>
codegen_llvm_offload_without_fat_lto = using the offload feature requires -C lto=fat

codegen_llvm_parse_bitcode = failed to parse bitcode for LTO module
Expand Down
92 changes: 87 additions & 5 deletions compiler/rustc_codegen_llvm/src/back/write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -707,11 +707,9 @@ pub(crate) unsafe fn llvm_optimize(
llvm::set_value_name(new_fn, &name);
}

if cgcx.target_is_like_gpu && config.offload.contains(&config::Offload::Enable) {
if cgcx.target_is_like_gpu && config.offload.contains(&config::Offload::Device) {
let cx =
SimpleCx::new(module.module_llvm.llmod(), module.module_llvm.llcx, cgcx.pointer_size);
// For now we only support up to 10 kernels named kernel_0 ... kernel_9, a follow-up PR is
// introducing a proper offload intrinsic to solve this limitation.
for func in cx.get_functions() {
let offload_kernel = "offload-kernel";
if attributes::has_string_attr(func, offload_kernel) {
Expand Down Expand Up @@ -773,12 +771,96 @@ pub(crate) unsafe fn llvm_optimize(
)
};

if cgcx.target_is_like_gpu && config.offload.contains(&config::Offload::Enable) {
if cgcx.target_is_like_gpu && config.offload.contains(&config::Offload::Device) {
let device_path = cgcx.output_filenames.path(OutputType::Object);
let device_dir = device_path.parent().unwrap();
let device_out = device_dir.join("host.out");
let device_out_c = path_to_c_string(device_out.as_path());
unsafe {
llvm::LLVMRustBundleImages(module.module_llvm.llmod(), module.module_llvm.tm.raw());
// 1) Bundle device module into offload image host.out (device TM)
let ok = llvm::LLVMRustBundleImages(
module.module_llvm.llmod(),
module.module_llvm.tm.raw(),
device_out_c.as_ptr(),
);
if !ok || !device_out.exists() {
dcx.emit_err(crate::errors::OffloadBundleImagesFailed);
}
}
}

// This assumes that we previously compiled our kernels for a gpu target, which created a
// `host.out` artifact. The user is supposed to provide us with a path to this artifact, we
// don't need any other artifacts from the previous run. We will embed this artifact into our
// LLVM-IR host module, to create a `host.o` ObjectFile, which we will write to disk.
// The last, not yet automated steps uses the `clang-linker-wrapper` to process `host.o`.
if !cgcx.target_is_like_gpu {
if let Some(device_path) = config
.offload
.iter()
.find_map(|o| if let config::Offload::Host(path) = o { Some(path) } else { None })
{
let device_pathbuf = PathBuf::from(device_path);
if device_pathbuf.is_relative() {
dcx.emit_err(crate::errors::OffloadWithoutAbsPath);
} else if device_pathbuf
.file_name()
.and_then(|n| n.to_str())
.is_some_and(|n| n != "host.out")
{
dcx.emit_err(crate::errors::OffloadWrongFileName);
} else if !device_pathbuf.exists() {
dcx.emit_err(crate::errors::OffloadNonexistingPath);
}
let host_path = cgcx.output_filenames.path(OutputType::Object);
let host_dir = host_path.parent().unwrap();
let out_obj = host_dir.join("host.o");
let host_out_c = path_to_c_string(device_pathbuf.as_path());

// 2) Finalize host: lib.bc + host.out -> host.o (host TM)
// We create a full clone of our LLVM host module, since we will embed the device IR
// into it, and this might break caching or incremental compilation otherwise.
let llmod2 = llvm::LLVMCloneModule(module.module_llvm.llmod());
let ok =
unsafe { llvm::LLVMRustOffloadEmbedBufferInModule(llmod2, host_out_c.as_ptr()) };
if !ok {
dcx.emit_err(crate::errors::OffloadEmbedFailed);
}
write_output_file(
dcx,
module.module_llvm.tm.raw(),
config.no_builtins,
llmod2,
&out_obj,
None,
llvm::FileType::ObjectFile,
&cgcx.prof,
true,
);
// We ignore cgcx.save_temps here and unconditionally always keep our `host.out` artifact.
// Otherwise, recompiling the host code would fail since we deleted that device artifact
// in the previous host compilation, which would be confusing at best.

// New, replace linker-wrapper:
// $ llvm-offload-binary host.o --image=file=dev.o,arch=gfx942
// $ clang --target=amdgcn-amd-amdhsa -mcpu=gfx942 dev.o -o image -l<libraries>
// $ llvm-offload-wrapper --triple=x86_64-unknown-linux -kind=hip image -o out.bc
// $ clang --target=x86_64-unknown-linux out.bc -o reg.o
// $ ld.lld host.o reg.o -o a.out
//let ok = unsafe {
// llvm::LLVMRustBundleImages(
// module.module_llvm.llmod(),
// module.module_llvm.tm.raw(),
// device_out_c.as_ptr(),
// )
//};
//if !ok || !device_out.exists() {
// dcx.emit_err(crate::errors::OffloadBundleImagesFailed);
//}
unsafe { llvm::LLVMRustWrapImages() };
// call c++
}
}
result.into_result().unwrap_or_else(|()| llvm_err(dcx, LlvmError::RunLlvmPasses))
}

Expand Down
20 changes: 20 additions & 0 deletions compiler/rustc_codegen_llvm/src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,26 @@ pub(crate) struct OffloadWithoutEnable;
#[diag(codegen_llvm_offload_without_fat_lto)]
pub(crate) struct OffloadWithoutFatLTO;

#[derive(Diagnostic)]
#[diag(codegen_llvm_offload_no_abs_path)]
pub(crate) struct OffloadWithoutAbsPath;

#[derive(Diagnostic)]
#[diag(codegen_llvm_offload_no_host_out)]
pub(crate) struct OffloadWrongFileName;

#[derive(Diagnostic)]
#[diag(codegen_llvm_offload_nonexisting)]
pub(crate) struct OffloadNonexistingPath;

#[derive(Diagnostic)]
#[diag(codegen_llvm_offload_bundleimages_failed)]
pub(crate) struct OffloadBundleImagesFailed;

#[derive(Diagnostic)]
#[diag(codegen_llvm_offload_embed_failed)]
pub(crate) struct OffloadEmbedFailed;

#[derive(Diagnostic)]
#[diag(codegen_llvm_lto_bitcode_from_rlib)]
pub(crate) struct LtoBitcodeFromRlib {
Expand Down
8 changes: 1 addition & 7 deletions compiler/rustc_codegen_llvm/src/intrinsic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -202,13 +202,7 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
return Ok(());
}
sym::offload => {
if !tcx
.sess
.opts
.unstable_opts
.offload
.contains(&rustc_session::config::Offload::Enable)
{
if tcx.sess.opts.unstable_opts.offload.is_empty() {
let _ = tcx.dcx().emit_almost_fatal(OffloadWithoutEnable);
}

Expand Down
24 changes: 22 additions & 2 deletions compiler/rustc_codegen_llvm/src/llvm/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1722,7 +1722,16 @@ mod Offload {
use super::*;
unsafe extern "C" {
/// Processes the module and writes it in an offload compatible way into a "host.out" file.
pub(crate) fn LLVMRustBundleImages<'a>(M: &'a Module, TM: &'a TargetMachine) -> bool;
pub(crate) fn LLVMRustBundleImages<'a>(
M: &'a Module,
TM: &'a TargetMachine,
host_out: *const c_char,
) -> bool;
pub(crate) unsafe fn LLVMRustOffloadEmbedBufferInModule<'a>(
_M: &'a Module,
_host_out: *const c_char,
) -> bool;
pub(crate) unsafe fn LLVMRustWrapImages();
pub(crate) fn LLVMRustOffloadMapper<'a>(OldFn: &'a Value, NewFn: &'a Value);
}
}
Expand All @@ -1736,7 +1745,18 @@ mod Offload_fallback {
/// Processes the module and writes it in an offload compatible way into a "host.out" file.
/// Marked as unsafe to match the real offload wrapper which is unsafe due to FFI.
#[allow(unused_unsafe)]
pub(crate) unsafe fn LLVMRustBundleImages<'a>(_M: &'a Module, _TM: &'a TargetMachine) -> bool {
pub(crate) unsafe fn LLVMRustBundleImages<'a>(
_M: &'a Module,
_TM: &'a TargetMachine,
_host_out: *const c_char,
) -> bool {
unimplemented!("This rustc version was not built with LLVM Offload support!");
}
pub(crate) unsafe fn LLVMRustWrapImages();
pub(crate) unsafe fn LLVMRustOffloadEmbedBufferInModule<'a>(
_M: &'a Module,
_host_out: *const c_char,
) -> bool {
unimplemented!("This rustc version was not built with LLVM Offload support!");
}
#[allow(unused_unsafe)]
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_interface/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -837,7 +837,7 @@ fn test_unstable_options_tracking_hash() {
tracked!(no_profiler_runtime, true);
tracked!(no_trait_vptr, true);
tracked!(no_unique_section_names, true);
tracked!(offload, vec![Offload::Enable]);
tracked!(offload, vec![Offload::Device]);
tracked!(on_broken_pipe, OnBrokenPipe::Kill);
tracked!(oom, OomStrategy::Panic);
tracked!(osx_rpath_install_name, true);
Expand Down
87 changes: 82 additions & 5 deletions compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,10 @@
// available. As such, we only try to build it in the first place, if
// llvm.offload is enabled.
#ifdef OFFLOAD
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/Object/OffloadBinary.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
#endif

// for raw `write` in the bad-alloc handler
Expand Down Expand Up @@ -174,12 +176,13 @@ static Error writeFile(StringRef Filename, StringRef Data) {
// --image=file=device.bc,triple=amdgcn-amd-amdhsa,arch=gfx90a,kind=openmp
// The input module is the rust code compiled for a gpu target like amdgpu.
// Based on clang/tools/clang-offload-packager/ClangOffloadPackager.cpp
extern "C" bool LLVMRustBundleImages(LLVMModuleRef M, TargetMachine &TM) {
extern "C" bool LLVMRustBundleImages(LLVMModuleRef M, TargetMachine &TM,
const char *HostOutPath) {
std::string Storage;
llvm::raw_string_ostream OS1(Storage);
llvm::WriteBitcodeToFile(*unwrap(M), OS1);
OS1.flush();
auto MB = llvm::MemoryBuffer::getMemBufferCopy(Storage, "module.bc");
auto MB = llvm::MemoryBuffer::getMemBufferCopy(Storage, "device.bc");

SmallVector<char, 1024> BinaryData;
raw_svector_ostream OS2(BinaryData);
Expand All @@ -188,19 +191,38 @@ extern "C" bool LLVMRustBundleImages(LLVMModuleRef M, TargetMachine &TM) {
ImageBinary.TheImageKind = object::IMG_Bitcode;
ImageBinary.Image = std::move(MB);
ImageBinary.TheOffloadKind = object::OFK_OpenMP;
ImageBinary.StringData["triple"] = TM.getTargetTriple().str();
ImageBinary.StringData["arch"] = TM.getTargetCPU();

std::string TripleStr = TM.getTargetTriple().str();
llvm::StringRef CPURef = TM.getTargetCPU();
ImageBinary.StringData["triple"] = TripleStr;
ImageBinary.StringData["arch"] = CPURef;
llvm::SmallString<0> Buffer = OffloadBinary::write(ImageBinary);
if (Buffer.size() % OffloadBinary::getAlignment() != 0)
// Offload binary has invalid size alignment
return false;
OS2 << Buffer;
if (Error E = writeFile("host.out",
if (Error E = writeFile(HostOutPath,
StringRef(BinaryData.begin(), BinaryData.size())))
return false;
return true;
}

extern "C" bool LLVMRustOffloadEmbedBufferInModule(LLVMModuleRef HostM,
const char *HostOutPath) {
auto MBOrErr = MemoryBuffer::getFile(HostOutPath);
if (!MBOrErr) {
auto E = MBOrErr.getError();
auto _B = errorCodeToError(E);
return false;
}
MemoryBufferRef Buf = (*MBOrErr)->getMemBufferRef();
Module *M = unwrap(HostM);
StringRef SectionName = ".llvm.offloading";
Align Alignment = Align(8);
llvm::embedBufferInModule(*M, Buf, SectionName, Alignment);
return true;
}

extern "C" void LLVMRustOffloadMapper(LLVMValueRef OldFn, LLVMValueRef NewFn) {
llvm::Function *oldFn = llvm::unwrap<llvm::Function>(OldFn);
llvm::Function *newFn = llvm::unwrap<llvm::Function>(NewFn);
Expand All @@ -220,6 +242,61 @@ extern "C" void LLVMRustOffloadMapper(LLVMValueRef OldFn, LLVMValueRef NewFn) {
llvm::CloneFunctionChangeType::LocalChangesOnly,
returns);
}

#include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/Frontend/Offloading/OffloadWrapper.h"
#include "llvm/Frontend/Offloading/Utility.h"
#include "llvm/Object/OffloadBinary.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileOutputBuffer.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/StringSaver.h"
#include "llvm/Support/WithColor.h"
#include "llvm/TargetParser/Host.h"
extern "C" bool LLVMRustWrapImages() {
LLVMContext Context;
Module M("offload.wrapper.module", Context);
M.setTargetTriple(llvm::Triple("x86_64-unknown-linux"));
//M.setTargetTriple(llvm::Triple("amdgcn-amd-amdhsa"));
SmallVector<std::unique_ptr<MemoryBuffer>> Buffers;
SmallVector<ArrayRef<char>> BuffersToWrap;
StringRef Input = "/p/lustre1/drehwald1/prog/offload/r/image";
//for (StringRef Input : InputFiles) {
ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
MemoryBuffer::getFileOrSTDIN(Input);
if (std::error_code EC = BufferOrErr.getError())
return false;
std::unique_ptr<MemoryBuffer> &Buffer =
Buffers.emplace_back(std::move(*BufferOrErr));
BuffersToWrap.emplace_back(
ArrayRef<char>(Buffer->getBufferStart(), Buffer->getBufferSize()));
//}
//static const char ImagePath[] = "/p/lustre1/drehwald1/prog/offload/r/image";

//ArrayRef<char> Buf(ImagePath, std::strlen(ImagePath));
//ArrayRef<ArrayRef<char>> BuffersToWrap(Buf);
llvm::errs() << "wraping\n";

if (Error Err = offloading::wrapOpenMPBinaries(
M, BuffersToWrap, offloading::getOffloadEntryArray(M),
/*Suffix=*/"", /*Relocatable=*/false))
return false;
llvm::errs() << "wraping\n";

int FD = -1;
std::string OutputFile = "/p/lustre1/drehwald1/prog/offload/r/out.bc";
if (std::error_code EC = sys::fs::openFileForWrite(OutputFile, FD))
return false;
llvm::raw_fd_ostream OS(FD, true);
WriteBitcodeToFile(M, OS);
llvm::errs() << "wraping\n";

return true;
}
#endif

extern "C" LLVMValueRef LLVMRustGetNamedValue(LLVMModuleRef M, const char *Name,
Expand Down
12 changes: 6 additions & 6 deletions compiler/rustc_session/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -193,10 +193,12 @@ pub enum CoverageLevel {
}

// The different settings that the `-Z offload` flag can have.
#[derive(Clone, Copy, PartialEq, Hash, Debug)]
#[derive(Clone, PartialEq, Hash, Debug)]
pub enum Offload {
/// Enable the llvm offload pipeline
Enable,
/// Entry point for `std::offload`, enables kernel compilation for a gpu device
Device,
/// Second step in the offload pipeline, generates the host code to call kernels.
Host(String),
}

/// The different settings that the `-Z autodiff` flag can have.
Expand Down Expand Up @@ -2631,9 +2633,7 @@ pub fn build_session_options(early_dcx: &mut EarlyDiagCtxt, matches: &getopts::M
)
}

if !nightly_options::is_unstable_enabled(matches)
&& unstable_opts.offload.contains(&Offload::Enable)
{
if !nightly_options::is_unstable_enabled(matches) && !unstable_opts.offload.is_empty() {
early_dcx.early_fatal(
"`-Zoffload=Enable` also requires `-Zunstable-options` \
and a nightly compiler",
Expand Down
Loading
Loading