diff --git a/libc/startup/gpu/amdgpu/start.cpp b/libc/startup/gpu/amdgpu/start.cpp index cc30982e148f..9915dff94f6f 100644 --- a/libc/startup/gpu/amdgpu/start.cpp +++ b/libc/startup/gpu/amdgpu/start.cpp @@ -8,11 +8,12 @@ #include "src/__support/RPC/rpc_client.h" -extern "C" int main(int argc, char **argv); +extern "C" int main(int argc, char **argv, char **envp); extern "C" [[gnu::visibility("protected"), clang::amdgpu_kernel]] void -_start(int argc, char **argv, int *ret, void *in, void *out, void *buffer) { +_start(int argc, char **argv, char **envp, int *ret, void *in, void *out, + void *buffer) { __llvm_libc::rpc::client.reset(in, out, buffer); - __atomic_fetch_or(ret, main(argc, argv), __ATOMIC_RELAXED); + __atomic_fetch_or(ret, main(argc, argv, envp), __ATOMIC_RELAXED); } diff --git a/libc/test/integration/startup/gpu/CMakeLists.txt b/libc/test/integration/startup/gpu/CMakeLists.txt index 5451a27c2887..9bd7f675eeaa 100644 --- a/libc/test/integration/startup/gpu/CMakeLists.txt +++ b/libc/test/integration/startup/gpu/CMakeLists.txt @@ -8,4 +8,7 @@ add_integration_test( args_test.cpp ARGS 1 2 3 + ENV + FRANCE=Paris + GERMANY=Berlin ) diff --git a/libc/test/integration/startup/gpu/args_test.cpp b/libc/test/integration/startup/gpu/args_test.cpp index f3a5410691c2..1cc5a0e76927 100644 --- a/libc/test/integration/startup/gpu/args_test.cpp +++ b/libc/test/integration/startup/gpu/args_test.cpp @@ -17,11 +17,22 @@ static bool my_streq(const char *lhs, const char *rhs) { return *l == '\0' && *r == '\0'; } -TEST_MAIN(int argc, char **argv) { +TEST_MAIN(int argc, char **argv, char **envp) { ASSERT_TRUE(argc == 4); ASSERT_TRUE(my_streq(argv[1], "1")); ASSERT_TRUE(my_streq(argv[2], "2")); ASSERT_TRUE(my_streq(argv[3], "3")); + bool found_france = false; + bool found_germany = false; + for (; *envp != nullptr; ++envp) { + if (my_streq(*envp, "FRANCE=Paris")) + found_france = true; + if (my_streq(*envp, "GERMANY=Berlin")) + found_germany = true; + } + + ASSERT_TRUE(found_france && found_germany); + return 0; } diff --git a/libc/utils/gpu/loader/Loader.h b/libc/utils/gpu/loader/Loader.h index a24b8b1e982e..aecd6db25b54 100644 --- a/libc/utils/gpu/loader/Loader.h +++ b/libc/utils/gpu/loader/Loader.h @@ -11,4 +11,4 @@ /// Generic interface to load the \p image and launch execution of the _start /// kernel on the target device. Copies \p argc and \p argv to the device. /// Returns the final value of the `main` function on the device. -int load(int argc, char **argv, void *image, size_t size); +int load(int argc, char **argv, char **evnp, void *image, size_t size); diff --git a/libc/utils/gpu/loader/Main.cpp b/libc/utils/gpu/loader/Main.cpp index 435bda6cc7e4..00354720dda9 100644 --- a/libc/utils/gpu/loader/Main.cpp +++ b/libc/utils/gpu/loader/Main.cpp @@ -16,7 +16,7 @@ #include #include -int main(int argc, char **argv) { +int main(int argc, char **argv, char **envp) { if (argc < 2) { printf("USAGE: ./loader , ...\n"); return EXIT_SUCCESS; @@ -39,7 +39,7 @@ int main(int argc, char **argv) { fclose(file); // Drop the loader from the program arguments. - int ret = load(argc - 1, &argv[1], image, size); + int ret = load(argc - 1, &argv[1], envp, image, size); free(image); return ret; diff --git a/libc/utils/gpu/loader/amdgpu/Loader.cpp b/libc/utils/gpu/loader/amdgpu/Loader.cpp index 0d631e98aae4..fcb5119a0f87 100644 --- a/libc/utils/gpu/loader/amdgpu/Loader.cpp +++ b/libc/utils/gpu/loader/amdgpu/Loader.cpp @@ -32,6 +32,7 @@ constexpr const char *KERNEL_START = "_start.kd"; struct kernel_args_t { int argc; void *argv; + void *envp; void *ret; void *inbox; void *outbox; @@ -164,7 +165,7 @@ hsa_status_t get_agent_memory_pool(hsa_agent_t agent, return iterate_agent_memory_pools(agent, cb); } -int load(int argc, char **argv, void *image, size_t size) { +int load(int argc, char **argv, char **envp, void *image, size_t size) { // Initialize the HSA runtime used to communicate with the device. if (hsa_status_t err = hsa_init()) handle_error(err); @@ -299,6 +300,30 @@ int load(int argc, char **argv, void *image, size_t size) { static_cast(dev_argv)[i] = dev_str; } + // Allocate fine-grained memory on the host to hold the pointer array for the + // copied environment array and allow the GPU agent to access it. + int envc = 0; + for (char **env = envp; *env != 0; ++env) + ++envc; + void *dev_envp; + if (hsa_status_t err = + hsa_amd_memory_pool_allocate(finegrained_pool, envc * sizeof(char *), + /*flags=*/0, &dev_envp)) + handle_error(err); + hsa_amd_agents_allow_access(1, &dev_agent, nullptr, dev_envp); + for (int i = 0; i < envc; ++i) { + size_t size = strlen(envp[i]) + 1; + void *dev_str; + if (hsa_status_t err = hsa_amd_memory_pool_allocate(finegrained_pool, size, + /*flags=*/0, &dev_str)) + handle_error(err); + hsa_amd_agents_allow_access(1, &dev_agent, nullptr, dev_str); + // Load the host memory buffer with the pointer values of the newly + // allocated strings. + std::memcpy(dev_str, envp[i], size); + static_cast(dev_envp)[i] = dev_str; + } + // Allocate space for the return pointer and initialize it to zero. void *dev_ret; if (hsa_status_t err = @@ -333,6 +358,7 @@ int load(int argc, char **argv, void *image, size_t size) { kernel_args_t *kernel_args = reinterpret_cast(args); kernel_args->argc = argc; kernel_args->argv = dev_argv; + kernel_args->envp = dev_envp; kernel_args->ret = dev_ret; kernel_args->inbox = server_outbox; kernel_args->outbox = server_inbox;