Update gitcommit to 2.7-rc3
Signed-off-by: Tom Rix <Tom.Rix@amd.com>
This commit is contained in:
parent
bd11f4aa1a
commit
e80f34f74d
3 changed files with 254 additions and 3 deletions
120
0001-Add-cmake-varaible-USE_ROCM_CK.patch
Normal file
120
0001-Add-cmake-varaible-USE_ROCM_CK.patch
Normal file
|
|
@ -0,0 +1,120 @@
|
|||
From 0f33e0a7bbd1522ee74f8fc1fbe3af7563318c79 Mon Sep 17 00:00:00 2001
|
||||
From: Tom Rix <Tom.Rix@amd.com>
|
||||
Date: Fri, 28 Mar 2025 15:33:09 -0700
|
||||
Subject: [PATCH] Add cmake varaible USE_ROCM_CK
|
||||
|
||||
To control the use of ROCm Composable Kernel usage.
|
||||
|
||||
CK is not compatible with all rocBLAS gpu's, so the user
|
||||
must explicitly choose to use CK.
|
||||
|
||||
Signed-off-by: Tom Rix <Tom.Rix@amd.com>
|
||||
---
|
||||
CMakeLists.txt | 1 +
|
||||
aten/src/ATen/CMakeLists.txt | 8 ++++++--
|
||||
aten/src/ATen/cuda/CUDABlas.cpp | 10 +++++-----
|
||||
cmake/Dependencies.cmake | 3 +++
|
||||
4 files changed, 15 insertions(+), 7 deletions(-)
|
||||
|
||||
diff --git a/CMakeLists.txt b/CMakeLists.txt
|
||||
index f3fee2f7ffc2..73903acce452 100644
|
||||
--- a/CMakeLists.txt
|
||||
+++ b/CMakeLists.txt
|
||||
@@ -249,6 +249,7 @@ cmake_dependent_option(
|
||||
BUILD_LAZY_CUDA_LINALG "Build cuda linalg ops as separate library" ON
|
||||
"USE_CUDA AND LINUX AND BUILD_PYTHON" OFF)
|
||||
cmake_dependent_option(USE_ROCM "Use ROCm" ON "LINUX" OFF)
|
||||
+cmake_dependent_option(USE_ROCM_CK "Use ROCm Composable Kernel" ON "USE_ROCM" ON)
|
||||
option(CAFFE2_STATIC_LINK_CUDA "Statically link CUDA libraries" OFF)
|
||||
cmake_dependent_option(USE_CUDNN "Use cuDNN" ON "USE_CUDA" OFF)
|
||||
cmake_dependent_option(USE_STATIC_CUDNN "Use cuDNN static libraries" OFF
|
||||
diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt
|
||||
index 085af373ec22..af268ab88572 100644
|
||||
--- a/aten/src/ATen/CMakeLists.txt
|
||||
+++ b/aten/src/ATen/CMakeLists.txt
|
||||
@@ -361,13 +361,17 @@ endif()
|
||||
${native_quantized_hip_hip}
|
||||
${native_transformers_hip_hip} ${native_transformers_src_hip_hip}
|
||||
)
|
||||
- if(WIN32) # Windows doesn't support Composable Kernels and Triton
|
||||
+ if(NOT USE_ROCM_CK) # Windows doesn't support Composable Kernels and Triton
|
||||
file(GLOB native_hip_bgemm "native/hip/bgemm_kernels/*.hip")
|
||||
file(GLOB native_hip_ck "native/hip/ck*.hip")
|
||||
exclude(ATen_HIP_SRCS "${ATen_HIP_SRCS}"
|
||||
- ${native_hip_bgemm} ${native_hip_ck}
|
||||
+ ${native_hip_bgemm} ${native_hip_ck})
|
||||
+ endif()
|
||||
+ if(WIN32) # Windows doesn't support Composable Kernels and Triton
|
||||
+ exclude(ATen_HIP_SRCS "${ATen_HIP_SRCS}"
|
||||
${native_transformers_hip_hip} ${native_transformers_hip_cpp})
|
||||
endif()
|
||||
+
|
||||
# TODO: Codegen separate files for HIP and use those (s/cuda_generated_sources/hip_generated_sources)
|
||||
list(APPEND all_hip_cpp
|
||||
${native_nested_hip_cpp}
|
||||
diff --git a/aten/src/ATen/cuda/CUDABlas.cpp b/aten/src/ATen/cuda/CUDABlas.cpp
|
||||
index a62b028fd4ff..a3dbf76848ea 100644
|
||||
--- a/aten/src/ATen/cuda/CUDABlas.cpp
|
||||
+++ b/aten/src/ATen/cuda/CUDABlas.cpp
|
||||
@@ -708,7 +708,7 @@ void bgemm_internal<at::BFloat16>(CUDABLAS_BGEMM_ARGTYPES(at::BFloat16))
|
||||
if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
|
||||
bgemm_internal_cublaslt<at::BFloat16>(CUDABLAS_BGEMM_ARGS(at::BFloat16));
|
||||
}
|
||||
-#ifdef USE_ROCM
|
||||
+#ifdef USE_ROCM_CK
|
||||
else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
|
||||
at::native::bgemm_internal_ck<at::BFloat16>(CUDABLAS_BGEMM_ARGS(at::BFloat16));
|
||||
}
|
||||
@@ -1061,7 +1061,7 @@ void gemm_internal<double>(CUDABLAS_GEMM_ARGTYPES(double))
|
||||
gemm_internal_cublaslt<double>(CUDABLAS_GEMM_ARGS(double));
|
||||
#endif
|
||||
}
|
||||
-#ifdef USE_ROCM
|
||||
+#ifdef USE_ROCM_CK
|
||||
else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
|
||||
at::native::gemm_internal_ck<double>(CUDABLAS_GEMM_ARGS(double));
|
||||
}
|
||||
@@ -1077,7 +1077,7 @@ void gemm_internal<float>(CUDABLAS_GEMM_ARGTYPES(float))
|
||||
if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
|
||||
gemm_internal_cublaslt<float>(CUDABLAS_GEMM_ARGS(float));
|
||||
}
|
||||
-#ifdef USE_ROCM
|
||||
+#ifdef USE_ROCM_CK
|
||||
else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
|
||||
at::native::gemm_internal_ck<float>(CUDABLAS_GEMM_ARGS(float));
|
||||
}
|
||||
@@ -1125,7 +1125,7 @@ void gemm_internal<at::Half>(CUDABLAS_GEMM_ARGTYPES(at::Half))
|
||||
if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
|
||||
gemm_internal_cublaslt<at::Half>(CUDABLAS_GEMM_ARGS(at::Half));
|
||||
}
|
||||
-#ifdef USE_ROCM
|
||||
+#ifdef USE_ROCM_CK
|
||||
else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
|
||||
at::native::gemm_internal_ck<at::Half>(CUDABLAS_GEMM_ARGS(at::Half));
|
||||
}
|
||||
@@ -1141,7 +1141,7 @@ void gemm_internal<at::BFloat16>(CUDABLAS_GEMM_ARGTYPES(at::BFloat16))
|
||||
if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
|
||||
gemm_internal_cublaslt<at::BFloat16>(CUDABLAS_GEMM_ARGS(at::BFloat16));
|
||||
}
|
||||
-#ifdef USE_ROCM
|
||||
+#ifdef USE_ROCM_CK
|
||||
else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
|
||||
at::native::gemm_internal_ck<at::BFloat16>(CUDABLAS_GEMM_ARGS(at::BFloat16));
|
||||
}
|
||||
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
|
||||
index 30917bdf39f5..2ca6091030f1 100644
|
||||
--- a/cmake/Dependencies.cmake
|
||||
+++ b/cmake/Dependencies.cmake
|
||||
@@ -1046,6 +1046,9 @@ if(USE_ROCM)
|
||||
if(HIPBLASLT_VEC_EXT)
|
||||
list(APPEND HIP_CXX_FLAGS -DHIPBLASLT_VEC_EXT)
|
||||
endif()
|
||||
+ if(USE_ROCM_CK)
|
||||
+ list(APPEND HIP_CXX_FLAGS -DUSE_ROCM_CK)
|
||||
+ endif()
|
||||
list(APPEND HIP_HIPCC_FLAGS --offload-compress)
|
||||
if(WIN32)
|
||||
add_definitions(-DROCM_ON_WINDOWS)
|
||||
--
|
||||
2.48.1
|
||||
|
||||
112
0001-python-torch-disable-ck.patch
Normal file
112
0001-python-torch-disable-ck.patch
Normal file
|
|
@ -0,0 +1,112 @@
|
|||
From 027dad1eaed51c1172e2497da611e3267d42d2f0 Mon Sep 17 00:00:00 2001
|
||||
From: Tom Rix <Tom.Rix@amd.com>
|
||||
Date: Fri, 28 Mar 2025 09:16:03 -0700
|
||||
Subject: [PATCH] python-torch: disable ck
|
||||
|
||||
---
|
||||
aten/src/ATen/CMakeLists.txt | 7 +++----
|
||||
aten/src/ATen/Context.cpp | 1 +
|
||||
aten/src/ATen/cuda/CUDABlas.cpp | 10 +++++-----
|
||||
3 files changed, 9 insertions(+), 9 deletions(-)
|
||||
|
||||
diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt
|
||||
index 085af373ec22..84808880e51c 100644
|
||||
--- a/aten/src/ATen/CMakeLists.txt
|
||||
+++ b/aten/src/ATen/CMakeLists.txt
|
||||
@@ -134,7 +134,7 @@ file(GLOB native_cuda_cu "native/cuda/*.cu")
|
||||
file(GLOB native_cuda_cpp "native/cuda/*.cpp")
|
||||
file(GLOB native_cuda_h "native/cuda/*.h" "native/cuda/*.cuh")
|
||||
file(GLOB native_cuda_linalg_cpp "native/cuda/linalg/*.cpp")
|
||||
-file(GLOB native_hip_h "native/hip/*.h" "native/hip/*.cuh" "native/hip/bgemm_kernels/*.h")
|
||||
+file(GLOB native_hip_h "native/hip/*.h" "native/hip/*.cuh" )
|
||||
file(GLOB native_cudnn_cpp "native/cudnn/*.cpp")
|
||||
file(GLOB native_sparse_cuda_cu "native/sparse/cuda/*.cu")
|
||||
file(GLOB native_sparse_cuda_cpp "native/sparse/cuda/*.cpp")
|
||||
@@ -145,7 +145,7 @@ file(GLOB native_nested_h "native/nested/*.h")
|
||||
file(GLOB native_nested_cuda_cu "native/nested/cuda/*.cu")
|
||||
file(GLOB native_nested_cuda_cpp "native/nested/cuda/*.cpp")
|
||||
|
||||
-file(GLOB native_hip_hip "native/hip/*.hip" "native/hip/bgemm_kernels/*.hip")
|
||||
+file(GLOB native_hip_hip "native/hip/*.hip" )
|
||||
file(GLOB native_hip_cpp "native/hip/*.cpp")
|
||||
file(GLOB native_hip_linalg_cpp "native/hip/linalg/*.cpp")
|
||||
file(GLOB native_miopen_cpp "native/miopen/*.cpp")
|
||||
@@ -361,13 +361,12 @@ endif()
|
||||
${native_quantized_hip_hip}
|
||||
${native_transformers_hip_hip} ${native_transformers_src_hip_hip}
|
||||
)
|
||||
- if(WIN32) # Windows doesn't support Composable Kernels and Triton
|
||||
file(GLOB native_hip_bgemm "native/hip/bgemm_kernels/*.hip")
|
||||
file(GLOB native_hip_ck "native/hip/ck*.hip")
|
||||
exclude(ATen_HIP_SRCS "${ATen_HIP_SRCS}"
|
||||
${native_hip_bgemm} ${native_hip_ck}
|
||||
${native_transformers_hip_hip} ${native_transformers_hip_cpp})
|
||||
- endif()
|
||||
+
|
||||
# TODO: Codegen separate files for HIP and use those (s/cuda_generated_sources/hip_generated_sources)
|
||||
list(APPEND all_hip_cpp
|
||||
${native_nested_hip_cpp}
|
||||
diff --git a/aten/src/ATen/Context.cpp b/aten/src/ATen/Context.cpp
|
||||
index f598fc3a39d3..03dab6ff38fe 100644
|
||||
--- a/aten/src/ATen/Context.cpp
|
||||
+++ b/aten/src/ATen/Context.cpp
|
||||
@@ -355,6 +355,7 @@ at::BlasBackend Context::blasPreferredBackend() {
|
||||
}
|
||||
|
||||
void Context::setBlasPreferredBackend(at::BlasBackend b) {
|
||||
+ return;
|
||||
#ifdef _MSC_VER
|
||||
TORCH_WARN_ONCE(
|
||||
"torch.backends.cuda.preferred_blas_library is an experimental feature. "
|
||||
diff --git a/aten/src/ATen/cuda/CUDABlas.cpp b/aten/src/ATen/cuda/CUDABlas.cpp
|
||||
index a62b028fd4ff..cba38426ea1f 100644
|
||||
--- a/aten/src/ATen/cuda/CUDABlas.cpp
|
||||
+++ b/aten/src/ATen/cuda/CUDABlas.cpp
|
||||
@@ -708,7 +708,7 @@ void bgemm_internal<at::BFloat16>(CUDABLAS_BGEMM_ARGTYPES(at::BFloat16))
|
||||
if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
|
||||
bgemm_internal_cublaslt<at::BFloat16>(CUDABLAS_BGEMM_ARGS(at::BFloat16));
|
||||
}
|
||||
-#ifdef USE_ROCM
|
||||
+#ifdef USE_ROCM_NO_CK
|
||||
else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
|
||||
at::native::bgemm_internal_ck<at::BFloat16>(CUDABLAS_BGEMM_ARGS(at::BFloat16));
|
||||
}
|
||||
@@ -1061,7 +1061,7 @@ void gemm_internal<double>(CUDABLAS_GEMM_ARGTYPES(double))
|
||||
gemm_internal_cublaslt<double>(CUDABLAS_GEMM_ARGS(double));
|
||||
#endif
|
||||
}
|
||||
-#ifdef USE_ROCM
|
||||
+#ifdef USE_ROCM_NO_CK
|
||||
else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
|
||||
at::native::gemm_internal_ck<double>(CUDABLAS_GEMM_ARGS(double));
|
||||
}
|
||||
@@ -1077,7 +1077,7 @@ void gemm_internal<float>(CUDABLAS_GEMM_ARGTYPES(float))
|
||||
if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
|
||||
gemm_internal_cublaslt<float>(CUDABLAS_GEMM_ARGS(float));
|
||||
}
|
||||
-#ifdef USE_ROCM
|
||||
+#ifdef USE_ROCM_NO_CK
|
||||
else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
|
||||
at::native::gemm_internal_ck<float>(CUDABLAS_GEMM_ARGS(float));
|
||||
}
|
||||
@@ -1125,7 +1125,7 @@ void gemm_internal<at::Half>(CUDABLAS_GEMM_ARGTYPES(at::Half))
|
||||
if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
|
||||
gemm_internal_cublaslt<at::Half>(CUDABLAS_GEMM_ARGS(at::Half));
|
||||
}
|
||||
-#ifdef USE_ROCM
|
||||
+#ifdef USE_ROCM_NO_CK
|
||||
else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
|
||||
at::native::gemm_internal_ck<at::Half>(CUDABLAS_GEMM_ARGS(at::Half));
|
||||
}
|
||||
@@ -1141,7 +1141,7 @@ void gemm_internal<at::BFloat16>(CUDABLAS_GEMM_ARGTYPES(at::BFloat16))
|
||||
if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
|
||||
gemm_internal_cublaslt<at::BFloat16>(CUDABLAS_GEMM_ARGS(at::BFloat16));
|
||||
}
|
||||
-#ifdef USE_ROCM
|
||||
+#ifdef USE_ROCM_NO_CK
|
||||
else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
|
||||
at::native::gemm_internal_ck<at::BFloat16>(CUDABLAS_GEMM_ARGS(at::BFloat16));
|
||||
}
|
||||
--
|
||||
2.48.1
|
||||
|
||||
|
|
@ -6,10 +6,10 @@
|
|||
# So pre releases can be tried
|
||||
%bcond_with gitcommit
|
||||
%if %{with gitcommit}
|
||||
# v2.7.0-rc2
|
||||
%global commit0 b1940b5867e40e40ebdce4db76f76d3d0b71d3f4
|
||||
# v2.7.0-rc3
|
||||
%global commit0 b04d8358d959925bee0adfd67cc17987af9fbb9d
|
||||
%global shortcommit0 %(c=%{commit0}; echo ${c:0:7})
|
||||
%global date0 20250413
|
||||
%global date0 20250326
|
||||
%global pypi_version 2.7.0
|
||||
%global flatbuffers_version 23.3.3
|
||||
%global miniz_version 3.0.2
|
||||
|
|
@ -104,6 +104,13 @@ Patch11: 0001-Improve-finding-and-using-the-rocm_version.h.patch
|
|||
# Patches need to be refactored for ToT
|
||||
# These are ROCm packages
|
||||
Patch101: 0001-cuda-hip-signatures.patch
|
||||
%else
|
||||
# https://github.com/pytorch/pytorch/issues/150187
|
||||
# The hack job
|
||||
# Patch11: 0001-python-torch-disable-ck.patch
|
||||
# Cleaned up hack job
|
||||
Patch11: 0001-Add-cmake-varaible-USE_ROCM_CK.patch
|
||||
|
||||
%endif
|
||||
|
||||
ExclusiveArch: x86_64 aarch64
|
||||
|
|
@ -159,6 +166,9 @@ BuildRequires: python3dist(sympy)
|
|||
%endif
|
||||
|
||||
%if %{with rocm}
|
||||
%if %{with gitcommit}
|
||||
BuildRequires: composable_kernel-devel
|
||||
%endif
|
||||
BuildRequires: hipblas-devel
|
||||
BuildRequires: hipblaslt-devel
|
||||
BuildRequires: hipcub-devel
|
||||
|
|
@ -330,6 +340,8 @@ sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -Wno-pass
|
|||
sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -Wno-unused-command-line-argument@' cmake/Dependencies.cmake
|
||||
sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -Wno-unused-result@' cmake/Dependencies.cmake
|
||||
sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -Wno-deprecated-declarations@' cmake/Dependencies.cmake
|
||||
# Use parallel jobs
|
||||
sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -parallel-jobs=4@' cmake/Dependencies.cmake
|
||||
|
||||
# No third_party fmt, use system
|
||||
sed -i -e 's@fmt::fmt-header-only@fmt@' CMakeLists.txt
|
||||
|
|
@ -447,6 +459,9 @@ sed -i -e 's@rocm-core/rocm_version.h@rocm_version.h@' aten/src/ATen/hip/tunable
|
|||
%if %{with gitcommit}
|
||||
# https://github.com/pytorch/pytorch/issues/149805
|
||||
sed -i -e 's@rocm-core/rocm_version.h@rocm_version.h@' cmake/public/LoadHIP.cmake
|
||||
# Fedora installs to /usr/include, not /usr/include/rocm-core
|
||||
sed -i -e 's@rocm-core/rocm_version.h@rocm_version.h@' aten/src/ATen/hip/tunable/Tunable.cpp
|
||||
sed -i -e 's@rocm-core/rocm_version.h@rocm_version.h@' aten/src/ATen/cuda/tunable/Tunable.cpp
|
||||
%endif
|
||||
# use any hip, correct CMAKE_MODULE_PATH
|
||||
sed -i -e 's@lib/cmake/hip@lib64/cmake/hip@' cmake/public/LoadHIP.cmake
|
||||
|
|
@ -574,6 +589,7 @@ export BUILD_TEST=ON
|
|||
%if %{with rocm}
|
||||
|
||||
export USE_ROCM=ON
|
||||
export USE_ROCM_CK=OFF
|
||||
export USE_MAGMA=ON
|
||||
export HIP_PATH=`hipconfig -p`
|
||||
export ROCM_PATH=`hipconfig -R`
|
||||
|
|
@ -595,6 +611,7 @@ export PYTORCH_ROCM_ARCH=%{rocm_gpu_list_default}
|
|||
|
||||
%if %{with rocm}
|
||||
export USE_ROCM=ON
|
||||
export USE_ROCM_CK=OFF
|
||||
export HIP_PATH=`hipconfig -p`
|
||||
export ROCM_PATH=`hipconfig -R`
|
||||
RESOURCE_DIR=`%{rocmllvm_bindir}/clang -print-resource-dir`
|
||||
|
|
@ -620,8 +637,10 @@ export PYTORCH_ROCM_ARCH=%{rocm_gpu_list_default}
|
|||
%files -n python3-%{pypi_name}
|
||||
%license LICENSE
|
||||
%doc README.md
|
||||
%if %{without gitcommit}
|
||||
%{_bindir}/convert-caffe2-to-onnx
|
||||
%{_bindir}/convert-onnx-to-caffe2
|
||||
%endif
|
||||
%{_bindir}/torchrun
|
||||
%{_bindir}/torchfrtrace
|
||||
%{python3_sitearch}/%{pypi_name}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue