Update gitcommit to 2.7-rc3

Signed-off-by: Tom Rix <Tom.Rix@amd.com>
2025-03-29 05:11:02 -07:00 · 2025-03-29 05:11:02 -07:00 · e80f34f74d
commit e80f34f74d
parent bd11f4aa1a
3 changed files with 254 additions and 3 deletions
--- a/0001-Add-cmake-varaible-USE_ROCM_CK.patch
+++ b/0001-Add-cmake-varaible-USE_ROCM_CK.patch
@ -0,0 +1,120 @@
+From 0f33e0a7bbd1522ee74f8fc1fbe3af7563318c79 Mon Sep 17 00:00:00 2001
+From: Tom Rix <Tom.Rix@amd.com>
+Date: Fri, 28 Mar 2025 15:33:09 -0700
+Subject: [PATCH] Add cmake varaible USE_ROCM_CK
+
+To control the use of ROCm Composable Kernel usage.
+
+CK is not compatible with all rocBLAS gpu's, so the user
+must explicitly choose to use CK.
+
+Signed-off-by: Tom Rix <Tom.Rix@amd.com>
+---
+ CMakeLists.txt                  |  1 +
+ aten/src/ATen/CMakeLists.txt    |  8 ++++++--
+ aten/src/ATen/cuda/CUDABlas.cpp | 10 +++++-----
+ cmake/Dependencies.cmake        |  3 +++
+ 4 files changed, 15 insertions(+), 7 deletions(-)
+
+diff --git a/CMakeLists.txt b/CMakeLists.txt
+index f3fee2f7ffc2..73903acce452 100644
+--- a/CMakeLists.txt
+++ b/CMakeLists.txt
+@@ -249,6 +249,7 @@ cmake_dependent_option(
+   BUILD_LAZY_CUDA_LINALG "Build cuda linalg ops as separate library" ON
+   "USE_CUDA AND LINUX AND BUILD_PYTHON" OFF)
+ cmake_dependent_option(USE_ROCM "Use ROCm" ON "LINUX" OFF)
+cmake_dependent_option(USE_ROCM_CK "Use ROCm Composable Kernel" ON "USE_ROCM" ON)
+ option(CAFFE2_STATIC_LINK_CUDA "Statically link CUDA libraries" OFF)
+ cmake_dependent_option(USE_CUDNN "Use cuDNN" ON "USE_CUDA" OFF)
+ cmake_dependent_option(USE_STATIC_CUDNN "Use cuDNN static libraries" OFF
+diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt
+index 085af373ec22..af268ab88572 100644
+--- a/aten/src/ATen/CMakeLists.txt
+++ b/aten/src/ATen/CMakeLists.txt
+@@ -361,13 +361,17 @@ endif()
+     ${native_quantized_hip_hip}
+     ${native_transformers_hip_hip} ${native_transformers_src_hip_hip}
+   )
+-  if(WIN32) # Windows doesn't support Composable Kernels and Triton
+  if(NOT USE_ROCM_CK) # Windows doesn't support Composable Kernels and Triton
+     file(GLOB native_hip_bgemm "native/hip/bgemm_kernels/*.hip")
+     file(GLOB native_hip_ck "native/hip/ck*.hip")
+     exclude(ATen_HIP_SRCS "${ATen_HIP_SRCS}"
+-      ${native_hip_bgemm} ${native_hip_ck}
+      ${native_hip_bgemm} ${native_hip_ck})
+  endif()
+  if(WIN32) # Windows doesn't support Composable Kernels and Triton
+    exclude(ATen_HIP_SRCS "${ATen_HIP_SRCS}"
+       ${native_transformers_hip_hip} ${native_transformers_hip_cpp})
+   endif()
+
+   # TODO: Codegen separate files for HIP and use those (s/cuda_generated_sources/hip_generated_sources)
+   list(APPEND all_hip_cpp
+     ${native_nested_hip_cpp}
+diff --git a/aten/src/ATen/cuda/CUDABlas.cpp b/aten/src/ATen/cuda/CUDABlas.cpp
+index a62b028fd4ff..a3dbf76848ea 100644
+--- a/aten/src/ATen/cuda/CUDABlas.cpp
+++ b/aten/src/ATen/cuda/CUDABlas.cpp
+@@ -708,7 +708,7 @@ void bgemm_internal<at::BFloat16>(CUDABLAS_BGEMM_ARGTYPES(at::BFloat16))
+   if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
+     bgemm_internal_cublaslt<at::BFloat16>(CUDABLAS_BGEMM_ARGS(at::BFloat16));
+   }
+-#ifdef USE_ROCM
+#ifdef USE_ROCM_CK
+   else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
+     at::native::bgemm_internal_ck<at::BFloat16>(CUDABLAS_BGEMM_ARGS(at::BFloat16));
+   }
+@@ -1061,7 +1061,7 @@ void gemm_internal<double>(CUDABLAS_GEMM_ARGTYPES(double))
+     gemm_internal_cublaslt<double>(CUDABLAS_GEMM_ARGS(double));
+ #endif
+   }
+-#ifdef USE_ROCM
+#ifdef USE_ROCM_CK
+   else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
+     at::native::gemm_internal_ck<double>(CUDABLAS_GEMM_ARGS(double));
+   }
+@@ -1077,7 +1077,7 @@ void gemm_internal<float>(CUDABLAS_GEMM_ARGTYPES(float))
+   if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
+     gemm_internal_cublaslt<float>(CUDABLAS_GEMM_ARGS(float));
+   }
+-#ifdef USE_ROCM
+#ifdef USE_ROCM_CK
+   else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
+     at::native::gemm_internal_ck<float>(CUDABLAS_GEMM_ARGS(float));
+   }
+@@ -1125,7 +1125,7 @@ void gemm_internal<at::Half>(CUDABLAS_GEMM_ARGTYPES(at::Half))
+   if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
+     gemm_internal_cublaslt<at::Half>(CUDABLAS_GEMM_ARGS(at::Half));
+   }
+-#ifdef USE_ROCM
+#ifdef USE_ROCM_CK
+   else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
+     at::native::gemm_internal_ck<at::Half>(CUDABLAS_GEMM_ARGS(at::Half));
+   }
+@@ -1141,7 +1141,7 @@ void gemm_internal<at::BFloat16>(CUDABLAS_GEMM_ARGTYPES(at::BFloat16))
+   if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
+     gemm_internal_cublaslt<at::BFloat16>(CUDABLAS_GEMM_ARGS(at::BFloat16));
+   }
+-#ifdef USE_ROCM
+#ifdef USE_ROCM_CK
+   else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
+     at::native::gemm_internal_ck<at::BFloat16>(CUDABLAS_GEMM_ARGS(at::BFloat16));
+   }
+diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
+index 30917bdf39f5..2ca6091030f1 100644
+--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
+@@ -1046,6 +1046,9 @@ if(USE_ROCM)
+     if(HIPBLASLT_VEC_EXT)
+       list(APPEND HIP_CXX_FLAGS -DHIPBLASLT_VEC_EXT)
+     endif()
+    if(USE_ROCM_CK)
+      list(APPEND HIP_CXX_FLAGS -DUSE_ROCM_CK)
+    endif()
+     list(APPEND HIP_HIPCC_FLAGS --offload-compress)
+     if(WIN32)
+       add_definitions(-DROCM_ON_WINDOWS)
+-- 
+2.48.1
+
--- a/0001-python-torch-disable-ck.patch
+++ b/0001-python-torch-disable-ck.patch
@ -0,0 +1,112 @@
+From 027dad1eaed51c1172e2497da611e3267d42d2f0 Mon Sep 17 00:00:00 2001
+From: Tom Rix <Tom.Rix@amd.com>
+Date: Fri, 28 Mar 2025 09:16:03 -0700
+Subject: [PATCH] python-torch: disable ck
+
+---
+ aten/src/ATen/CMakeLists.txt    |  7 +++----
+ aten/src/ATen/Context.cpp       |  1 +
+ aten/src/ATen/cuda/CUDABlas.cpp | 10 +++++-----
+ 3 files changed, 9 insertions(+), 9 deletions(-)
+
+diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt
+index 085af373ec22..84808880e51c 100644
+--- a/aten/src/ATen/CMakeLists.txt
+++ b/aten/src/ATen/CMakeLists.txt
+@@ -134,7 +134,7 @@ file(GLOB native_cuda_cu "native/cuda/*.cu")
+ file(GLOB native_cuda_cpp "native/cuda/*.cpp")
+ file(GLOB native_cuda_h "native/cuda/*.h" "native/cuda/*.cuh")
+ file(GLOB native_cuda_linalg_cpp "native/cuda/linalg/*.cpp")
+-file(GLOB native_hip_h "native/hip/*.h" "native/hip/*.cuh" "native/hip/bgemm_kernels/*.h")
+file(GLOB native_hip_h "native/hip/*.h" "native/hip/*.cuh" )
+ file(GLOB native_cudnn_cpp "native/cudnn/*.cpp")
+ file(GLOB native_sparse_cuda_cu "native/sparse/cuda/*.cu")
+ file(GLOB native_sparse_cuda_cpp "native/sparse/cuda/*.cpp")
+@@ -145,7 +145,7 @@ file(GLOB native_nested_h "native/nested/*.h")
+ file(GLOB native_nested_cuda_cu "native/nested/cuda/*.cu")
+ file(GLOB native_nested_cuda_cpp "native/nested/cuda/*.cpp")
+ 
+-file(GLOB native_hip_hip "native/hip/*.hip" "native/hip/bgemm_kernels/*.hip")
+file(GLOB native_hip_hip "native/hip/*.hip" )
+ file(GLOB native_hip_cpp "native/hip/*.cpp")
+ file(GLOB native_hip_linalg_cpp "native/hip/linalg/*.cpp")
+ file(GLOB native_miopen_cpp "native/miopen/*.cpp")
+@@ -361,13 +361,12 @@ endif()
+     ${native_quantized_hip_hip}
+     ${native_transformers_hip_hip} ${native_transformers_src_hip_hip}
+   )
+-  if(WIN32) # Windows doesn't support Composable Kernels and Triton
+     file(GLOB native_hip_bgemm "native/hip/bgemm_kernels/*.hip")
+     file(GLOB native_hip_ck "native/hip/ck*.hip")
+     exclude(ATen_HIP_SRCS "${ATen_HIP_SRCS}"
+       ${native_hip_bgemm} ${native_hip_ck}
+       ${native_transformers_hip_hip} ${native_transformers_hip_cpp})
+-  endif()
+
+   # TODO: Codegen separate files for HIP and use those (s/cuda_generated_sources/hip_generated_sources)
+   list(APPEND all_hip_cpp
+     ${native_nested_hip_cpp}
+diff --git a/aten/src/ATen/Context.cpp b/aten/src/ATen/Context.cpp
+index f598fc3a39d3..03dab6ff38fe 100644
+--- a/aten/src/ATen/Context.cpp
+++ b/aten/src/ATen/Context.cpp
+@@ -355,6 +355,7 @@ at::BlasBackend Context::blasPreferredBackend() {
+ }
+ 
+ void Context::setBlasPreferredBackend(at::BlasBackend b) {
+  return;
+ #ifdef _MSC_VER
+   TORCH_WARN_ONCE(
+     "torch.backends.cuda.preferred_blas_library is an experimental feature. "
+diff --git a/aten/src/ATen/cuda/CUDABlas.cpp b/aten/src/ATen/cuda/CUDABlas.cpp
+index a62b028fd4ff..cba38426ea1f 100644
+--- a/aten/src/ATen/cuda/CUDABlas.cpp
+++ b/aten/src/ATen/cuda/CUDABlas.cpp
+@@ -708,7 +708,7 @@ void bgemm_internal<at::BFloat16>(CUDABLAS_BGEMM_ARGTYPES(at::BFloat16))
+   if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
+     bgemm_internal_cublaslt<at::BFloat16>(CUDABLAS_BGEMM_ARGS(at::BFloat16));
+   }
+-#ifdef USE_ROCM
+#ifdef USE_ROCM_NO_CK
+   else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
+     at::native::bgemm_internal_ck<at::BFloat16>(CUDABLAS_BGEMM_ARGS(at::BFloat16));
+   }
+@@ -1061,7 +1061,7 @@ void gemm_internal<double>(CUDABLAS_GEMM_ARGTYPES(double))
+     gemm_internal_cublaslt<double>(CUDABLAS_GEMM_ARGS(double));
+ #endif
+   }
+-#ifdef USE_ROCM
+#ifdef USE_ROCM_NO_CK
+   else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
+     at::native::gemm_internal_ck<double>(CUDABLAS_GEMM_ARGS(double));
+   }
+@@ -1077,7 +1077,7 @@ void gemm_internal<float>(CUDABLAS_GEMM_ARGTYPES(float))
+   if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
+     gemm_internal_cublaslt<float>(CUDABLAS_GEMM_ARGS(float));
+   }
+-#ifdef USE_ROCM
+#ifdef USE_ROCM_NO_CK
+   else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
+     at::native::gemm_internal_ck<float>(CUDABLAS_GEMM_ARGS(float));
+   }
+@@ -1125,7 +1125,7 @@ void gemm_internal<at::Half>(CUDABLAS_GEMM_ARGTYPES(at::Half))
+   if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
+     gemm_internal_cublaslt<at::Half>(CUDABLAS_GEMM_ARGS(at::Half));
+   }
+-#ifdef USE_ROCM
+#ifdef USE_ROCM_NO_CK
+   else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
+     at::native::gemm_internal_ck<at::Half>(CUDABLAS_GEMM_ARGS(at::Half));
+   }
+@@ -1141,7 +1141,7 @@ void gemm_internal<at::BFloat16>(CUDABLAS_GEMM_ARGTYPES(at::BFloat16))
+   if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
+     gemm_internal_cublaslt<at::BFloat16>(CUDABLAS_GEMM_ARGS(at::BFloat16));
+   }
+-#ifdef USE_ROCM
+#ifdef USE_ROCM_NO_CK
+   else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
+     at::native::gemm_internal_ck<at::BFloat16>(CUDABLAS_GEMM_ARGS(at::BFloat16));
+   }
+-- 
+2.48.1
+
--- a/python-torch.spec
+++ b/python-torch.spec
@ -6,10 +6,10 @@
 # So pre releases can be tried
 %bcond_with gitcommit
 %if %{with gitcommit}
-# v2.7.0-rc2
-%global commit0 b1940b5867e40e40ebdce4db76f76d3d0b71d3f4
+# v2.7.0-rc3
+%global commit0 b04d8358d959925bee0adfd67cc17987af9fbb9d
 %global shortcommit0 %(c=%{commit0}; echo ${c:0:7})
-%global date0 20250413
+%global date0 20250326
 %global pypi_version 2.7.0
 %global flatbuffers_version 23.3.3
 %global miniz_version 3.0.2
@ -104,6 +104,13 @@ Patch11:       0001-Improve-finding-and-using-the-rocm_version.h.patch
 # Patches need to be refactored for ToT
 # These are ROCm packages
 Patch101:      0001-cuda-hip-signatures.patch
+%else
+# https://github.com/pytorch/pytorch/issues/150187
+# The hack job
+# Patch11:       0001-python-torch-disable-ck.patch
+# Cleaned up hack job
+Patch11:       0001-Add-cmake-varaible-USE_ROCM_CK.patch
+
 %endif

 ExclusiveArch:  x86_64 aarch64
@ -159,6 +166,9 @@ BuildRequires:  python3dist(sympy)
 %endif

 %if %{with rocm}
+%if %{with gitcommit}
+BuildRequires:  composable_kernel-devel
+%endif
 BuildRequires:  hipblas-devel
 BuildRequires:  hipblaslt-devel
 BuildRequires:  hipcub-devel
@ -330,6 +340,8 @@ sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -Wno-pass
 sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -Wno-unused-command-line-argument@' cmake/Dependencies.cmake
 sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -Wno-unused-result@' cmake/Dependencies.cmake
 sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -Wno-deprecated-declarations@' cmake/Dependencies.cmake
+# Use parallel jobs
+sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -parallel-jobs=4@' cmake/Dependencies.cmake

 # No third_party fmt, use system
 sed -i -e 's@fmt::fmt-header-only@fmt@' CMakeLists.txt
@ -447,6 +459,9 @@ sed -i -e 's@rocm-core/rocm_version.h@rocm_version.h@' aten/src/ATen/hip/tunable
 %if %{with gitcommit}
 # https://github.com/pytorch/pytorch/issues/149805
 sed -i -e 's@rocm-core/rocm_version.h@rocm_version.h@' cmake/public/LoadHIP.cmake
+# Fedora installs to /usr/include, not /usr/include/rocm-core
+sed -i -e 's@rocm-core/rocm_version.h@rocm_version.h@' aten/src/ATen/hip/tunable/Tunable.cpp
+sed -i -e 's@rocm-core/rocm_version.h@rocm_version.h@' aten/src/ATen/cuda/tunable/Tunable.cpp
 %endif
 # use any hip, correct CMAKE_MODULE_PATH
 sed -i -e 's@lib/cmake/hip@lib64/cmake/hip@' cmake/public/LoadHIP.cmake
@ -574,6 +589,7 @@ export BUILD_TEST=ON
 %if %{with rocm}

 export USE_ROCM=ON
+export USE_ROCM_CK=OFF
 export USE_MAGMA=ON
 export HIP_PATH=`hipconfig -p`
 export ROCM_PATH=`hipconfig -R`
@ -595,6 +611,7 @@ export PYTORCH_ROCM_ARCH=%{rocm_gpu_list_default}

 %if %{with rocm}
 export USE_ROCM=ON
+export USE_ROCM_CK=OFF
 export HIP_PATH=`hipconfig -p`
 export ROCM_PATH=`hipconfig -R`
 RESOURCE_DIR=`%{rocmllvm_bindir}/clang -print-resource-dir`
@ -620,8 +637,10 @@ export PYTORCH_ROCM_ARCH=%{rocm_gpu_list_default}
 %files -n python3-%{pypi_name}
 %license LICENSE
 %doc README.md 
+%if %{without gitcommit}
 %{_bindir}/convert-caffe2-to-onnx
 %{_bindir}/convert-onnx-to-caffe2
+%endif
 %{_bindir}/torchrun
 %{_bindir}/torchfrtrace
 %{python3_sitearch}/%{pypi_name}