Update to 2.8.0-rc8

Signed-off-by: Tom Rix <Tom.Rix@amd.com>
2025-07-31 05:52:50 -07:00 · 2025-07-31 05:52:50 -07:00 · cec8b79644
commit cec8b79644
parent 72ad1f0389
6 changed files with 17 additions and 168 deletions
--- a/.gitignore
+++ b/.gitignore
@ -21,3 +21,6 @@
 /pytorch-v2.5.1.tar.gz
 /pytorch-v2.7.0.tar.gz
 /v2.13.6.tar.gz
+/pytorch-a1cb3cc.tar.gz
+/v24.12.23.tar.gz
+/kineto-5e75018.tar.gz
--- a/0001-Add-cmake-varaible-USE_ROCM_CK.patch
+++ b/0001-Add-cmake-varaible-USE_ROCM_CK.patch
@ -1,120 +0,0 @@
-From 0f33e0a7bbd1522ee74f8fc1fbe3af7563318c79 Mon Sep 17 00:00:00 2001
-From: Tom Rix <Tom.Rix@amd.com>
-Date: Fri, 28 Mar 2025 15:33:09 -0700
-Subject: [PATCH] Add cmake varaible USE_ROCM_CK
-
-To control the use of ROCm Composable Kernel usage.
-
-CK is not compatible with all rocBLAS gpu's, so the user
-must explicitly choose to use CK.
-
-Signed-off-by: Tom Rix <Tom.Rix@amd.com>
---
- CMakeLists.txt                  |  1 +
- aten/src/ATen/CMakeLists.txt    |  8 ++++++--
- aten/src/ATen/cuda/CUDABlas.cpp | 10 +++++-----
- cmake/Dependencies.cmake        |  3 +++
- 4 files changed, 15 insertions(+), 7 deletions(-)
-
-diff --git a/CMakeLists.txt b/CMakeLists.txt
-index f3fee2f7ffc2..73903acce452 100644
--- a/CMakeLists.txt
-+++ b/CMakeLists.txt
-@@ -249,6 +249,7 @@ cmake_dependent_option(
-   BUILD_LAZY_CUDA_LINALG "Build cuda linalg ops as separate library" ON
-   "USE_CUDA AND LINUX AND BUILD_PYTHON" OFF)
- cmake_dependent_option(USE_ROCM "Use ROCm" ON "LINUX" OFF)
-+cmake_dependent_option(USE_ROCM_CK "Use ROCm Composable Kernel" ON "USE_ROCM" ON)
- option(CAFFE2_STATIC_LINK_CUDA "Statically link CUDA libraries" OFF)
- cmake_dependent_option(USE_CUDNN "Use cuDNN" ON "USE_CUDA" OFF)
- cmake_dependent_option(USE_STATIC_CUDNN "Use cuDNN static libraries" OFF
-diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt
-index 085af373ec22..af268ab88572 100644
--- a/aten/src/ATen/CMakeLists.txt
-+++ b/aten/src/ATen/CMakeLists.txt
-@@ -361,13 +361,17 @@ endif()
-     ${native_quantized_hip_hip}
-     ${native_transformers_hip_hip} ${native_transformers_src_hip_hip}
-   )
-  if(WIN32) # Windows doesn't support Composable Kernels and Triton
-+  if(NOT USE_ROCM_CK) # Windows doesn't support Composable Kernels and Triton
-     file(GLOB native_hip_bgemm "native/hip/bgemm_kernels/*.hip")
-     file(GLOB native_hip_ck "native/hip/ck*.hip")
-     exclude(ATen_HIP_SRCS "${ATen_HIP_SRCS}"
-      ${native_hip_bgemm} ${native_hip_ck}
-+      ${native_hip_bgemm} ${native_hip_ck})
-+  endif()
-+  if(WIN32) # Windows doesn't support Composable Kernels and Triton
-+    exclude(ATen_HIP_SRCS "${ATen_HIP_SRCS}"
-       ${native_transformers_hip_hip} ${native_transformers_hip_cpp})
-   endif()
-+
-   # TODO: Codegen separate files for HIP and use those (s/cuda_generated_sources/hip_generated_sources)
-   list(APPEND all_hip_cpp
-     ${native_nested_hip_cpp}
-diff --git a/aten/src/ATen/cuda/CUDABlas.cpp b/aten/src/ATen/cuda/CUDABlas.cpp
-index a62b028fd4ff..a3dbf76848ea 100644
--- a/aten/src/ATen/cuda/CUDABlas.cpp
-+++ b/aten/src/ATen/cuda/CUDABlas.cpp
-@@ -708,7 +708,7 @@ void bgemm_internal<at::BFloat16>(CUDABLAS_BGEMM_ARGTYPES(at::BFloat16))
-   if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
-     bgemm_internal_cublaslt<at::BFloat16>(CUDABLAS_BGEMM_ARGS(at::BFloat16));
-   }
-#ifdef USE_ROCM
-+#ifdef USE_ROCM_CK
-   else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
-     at::native::bgemm_internal_ck<at::BFloat16>(CUDABLAS_BGEMM_ARGS(at::BFloat16));
-   }
-@@ -1061,7 +1061,7 @@ void gemm_internal<double>(CUDABLAS_GEMM_ARGTYPES(double))
-     gemm_internal_cublaslt<double>(CUDABLAS_GEMM_ARGS(double));
- #endif
-   }
-#ifdef USE_ROCM
-+#ifdef USE_ROCM_CK
-   else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
-     at::native::gemm_internal_ck<double>(CUDABLAS_GEMM_ARGS(double));
-   }
-@@ -1077,7 +1077,7 @@ void gemm_internal<float>(CUDABLAS_GEMM_ARGTYPES(float))
-   if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
-     gemm_internal_cublaslt<float>(CUDABLAS_GEMM_ARGS(float));
-   }
-#ifdef USE_ROCM
-+#ifdef USE_ROCM_CK
-   else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
-     at::native::gemm_internal_ck<float>(CUDABLAS_GEMM_ARGS(float));
-   }
-@@ -1125,7 +1125,7 @@ void gemm_internal<at::Half>(CUDABLAS_GEMM_ARGTYPES(at::Half))
-   if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
-     gemm_internal_cublaslt<at::Half>(CUDABLAS_GEMM_ARGS(at::Half));
-   }
-#ifdef USE_ROCM
-+#ifdef USE_ROCM_CK
-   else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
-     at::native::gemm_internal_ck<at::Half>(CUDABLAS_GEMM_ARGS(at::Half));
-   }
-@@ -1141,7 +1141,7 @@ void gemm_internal<at::BFloat16>(CUDABLAS_GEMM_ARGTYPES(at::BFloat16))
-   if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
-     gemm_internal_cublaslt<at::BFloat16>(CUDABLAS_GEMM_ARGS(at::BFloat16));
-   }
-#ifdef USE_ROCM
-+#ifdef USE_ROCM_CK
-   else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
-     at::native::gemm_internal_ck<at::BFloat16>(CUDABLAS_GEMM_ARGS(at::BFloat16));
-   }
-diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
-index 30917bdf39f5..2ca6091030f1 100644
--- a/cmake/Dependencies.cmake
-+++ b/cmake/Dependencies.cmake
-@@ -1046,6 +1046,9 @@ if(USE_ROCM)
-     if(HIPBLASLT_VEC_EXT)
-       list(APPEND HIP_CXX_FLAGS -DHIPBLASLT_VEC_EXT)
-     endif()
-+    if(USE_ROCM_CK)
-+      list(APPEND HIP_CXX_FLAGS -DUSE_ROCM_CK)
-+    endif()
-     list(APPEND HIP_HIPCC_FLAGS --offload-compress)
-     if(WIN32)
-       add_definitions(-DROCM_ON_WINDOWS)
-- 
-2.48.1
-
--- a/next/0001-Add-cmake-variable-USE_ROCM_CK.patch
+++ b/next/0001-Add-cmake-variable-USE_ROCM_CK.patch
--- a/next/0001-Fix-compilation-and-import-torch-issues-for-cpython-.patch
+++ b/next/0001-Fix-compilation-and-import-torch-issues-for-cpython-.patch
--- a/python-torch.spec
+++ b/python-torch.spec
@ -15,8 +15,11 @@
 %global miniz_version 3.0.2
 %global pybind11_version 2.13.6
 %else
-%global pypi_version 2.7.0
-%global flatbuffers_version 23.3.3
+%global commit0 a1cb3cc05d46d198467bebbb6e8fba50a325d4e7
+%global shortcommit0 %(c=%{commit0}; echo ${c:0:7})
+%global date0 20250723
+%global pypi_version 2.8.0
+%global flatbuffers_version 24.12.23
 %global miniz_version 3.0.2
 %global pybind11_version 2.13.6
 %endif
@ -33,11 +36,7 @@
 %endif

 # For testing distributed+rccl etc.
-%if %{with gitcommit}
 %bcond_without rccl
-%else
-%bcond_with rccl
-%endif
 %bcond_with gloo
 %bcond_without mpi
 %bcond_without tensorpipe
@ -56,7 +55,7 @@ Name:           python-%{pypi_name}
 %if %{with gitcommit}
 Version:        %{pypi_version}^git%{date0}.%{shortcommit0}
 %else
-Version:        %{pypi_version}
+Version:        %{pypi_version}.rc8
 %endif
 Release:        %autorelease
 Summary:        PyTorch AI/ML framework
@ -68,7 +67,8 @@ URL:            https://pytorch.org/
 Source0:        %{forgeurl}/archive/%{commit0}/pytorch-%{shortcommit0}.tar.gz
 Source1000:     pyproject.toml
 %else
-Source0:        %{forgeurl}/releases/download/v%{version}/pytorch-v%{version}.tar.gz
+Source0:        %{forgeurl}/archive/%{commit0}/pytorch-%{shortcommit0}.tar.gz
+Source1000:     pyproject.toml
 %endif
 Source1:        https://github.com/google/flatbuffers/archive/refs/tags/v%{flatbuffers_version}.tar.gz
 Source2:        https://github.com/pybind/pybind11/archive/refs/tags/v%{pybind11_version}.tar.gz
@ -96,25 +96,16 @@ Source70:       https://github.com/yhirose/cpp-httplib/archive/%{hl_commit}/cpp-
 %endif

 %if %{without kineto}
-%if %{with gitcommit}
 %global ki_commit 5e7501833f1021ce6f618572d3baf657b6319658
-%else
-%global ki_commit be1317644c68b4bfc4646024a6b221066e430031
-%endif
 %global ki_scommit %(c=%{ki_commit}; echo ${c:0:7})
 Source80:       https://github.com/pytorch/kineto/archive/%{ki_commit}/kineto-%{ki_scommit}.tar.gz
 %endif

-%if %{without gitcommit}
-# https://github.com/pytorch/pytorch/issues/150187
-Patch11:       0001-Add-cmake-varaible-USE_ROCM_CK.patch
-%else
 # https://github.com/pytorch/pytorch/issues/150187
 Patch11:       0001-Add-cmake-variable-USE_ROCM_CK.patch
 # https://github.com/pytorch/pytorch/issues/156595
 # Patch12:       0001-Use-horrible-dynamo-stub.patch
 Patch12:       0001-Fix-compilation-and-import-torch-issues-for-cpython-.patch
-%endif

 ExclusiveArch:  x86_64 aarch64
 %global toolchain gcc
@ -157,9 +148,7 @@ BuildRequires:  python3dist(filelock)
 BuildRequires:  python3dist(jinja2)
 BuildRequires:  python3dist(networkx)
 BuildRequires:  python3dist(numpy)
-%if %{with gitcommit}
 BuildRequires:  python3dist(pip)
-%endif
 BuildRequires:  python3dist(pyyaml)
 BuildRequires:  python3dist(setuptools)
 BuildRequires:  python3dist(sphinx)
@ -178,9 +167,7 @@ BuildRequires:  hipcub-devel
 BuildRequires:  hipfft-devel
 BuildRequires:  hiprand-devel
 BuildRequires:  hipsparse-devel
-%if %{with gitcommit}
 BuildRequires:  hipsparselt-devel
-%endif
 BuildRequires:  hipsolver-devel
 BuildRequires:  magma-devel
 BuildRequires:  miopen-devel
@ -198,10 +185,8 @@ BuildRequires:  rocm-core-devel
 BuildRequires:  rocm-hip-devel
 BuildRequires:  rocm-runtime-devel
 BuildRequires:  rocm-rpm-macros
-%if %{with gitcommit}
 BuildRequires:  rocsolver-devel
 BuildRequires:  rocm-smi-devel
-%endif
 BuildRequires:  rocthrust-devel
 BuildRequires:  roctracer-devel

@ -275,7 +260,9 @@ Requires:       python3-%{pypi_name}%{?_isa} = %{version}-%{release}
 cp %{SOURCE1000} .

 %else
-%autosetup -p1 -n pytorch-v%{version}
+%autosetup -p1 -n pytorch-%{commit0}
+# Overwrite with a git checkout of the pyproject.toml
+cp %{SOURCE1000} .
 %endif

 # Remove bundled egg-info
@ -349,10 +336,8 @@ sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -Wno-unus
 sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -Wno-deprecated-declarations@' cmake/Dependencies.cmake
 # Use parallel jobs
 sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -parallel-jobs=4@' cmake/Dependencies.cmake
-%if %{with gitcommit}
 # Need to link with librocm_smi64
 sed -i -e 's@hiprtc::hiprtc@hiprtc::hiprtc rocm_smi64@' cmake/Dependencies.cmake
-%endif

 # No third_party fmt, use system
 sed -i -e 's@fmt::fmt-header-only@fmt@' CMakeLists.txt
@ -449,9 +434,7 @@ mv googletest third_party
 #
 # Fake out pocketfft, and system header will be used
 mkdir third_party/pocketfft
-%if %{with gitcommit}
 cp /usr/include/pocketfft_hdronly.h third_party/pocketfft/
-%endif

 #
 # Use the system valgrind headers
@ -608,19 +591,11 @@ export DEVICE_LIB_PATH=${RESOURCE_DIR}/amdgcn/bitcode
 export HIP_CLANG_PATH=%{rocmllvm_bindir}
 export PYTORCH_ROCM_ARCH=%{rocm_gpu_list_default}

-%if %{with gitcommit}
 %pyproject_wheel
-%else
-%py3_build
-%endif

 %else

-%if %{with gitcommit}
 %pyproject_wheel
-%else
-%py3_build
-%endif

 %endif

@ -637,33 +612,21 @@ export DEVICE_LIB_PATH=${RESOURCE_DIR}/amdgcn/bitcode
 # pytorch uses clang, not hipcc
 export HIP_CLANG_PATH=%{rocmllvm_bindir}
 export PYTORCH_ROCM_ARCH=%{rocm_gpu_list_default}
-%if %{with gitcommit}
 %pyproject_install
 %pyproject_save_files '*torch*'
-%else
-%py3_install
-%endif

 %else

-%if %{with gitcommit}
 %pyproject_install
 %pyproject_save_files '*torch*'
-%else
-%py3_install
-%endif

 %endif



 %check
-%if %{with gitcommit}
 # Not working yet
 # pyproject_check_import torch
-%else
-%py3_check_import torch
-%endif

 # Do not remote the empty files

--- a/3
+++ b/3
@ -7,3 +7,6 @@ SHA512 (libnop-910b558.tar.gz) = 74c5324eaa1b6b2ac8dfef94c835b5c5b044625f8e5efe3
 SHA512 (v1.14.2.tar.gz) = 97635bbaf6dd567c201451dfaf7815b2052fe50d9bccc97aade86cfa4a92651374d167296a5453031b2681dc302806a289bca011a9e79ddc381a17d6118971d7
 SHA512 (cpp-httplib-3b6597b.tar.gz) = 8f1090658c498d04f14fec5c2f301847b1f3360bf92b18d82927643ee04ab61a6b274733a01c7850f9c030205120d674d1d961358d49fdd15636736fb8704f55
 SHA512 (kineto-be13176.tar.gz) = 41a08c7da9eea7d12402f80a5550c9d4df79798719cc52b12a507828c8c896ba28a37c35d8adf809ca72589e1d84965d5ef6dd01f3f8dc1c803c5ed67b03a43a
+SHA512 (pytorch-a1cb3cc.tar.gz) = 92bf8b2c2ef0b459406b60169ecebdc50652c75943e3d6087e4d261f6e308dbad365529561e0f07ea3f0b71790efb68b5e4ab2f44e270462097208d924dc2d95
+SHA512 (v24.12.23.tar.gz) = f97762ba41b9cfef648e93932fd789324c6bb6ebc5b7aeca8185c9ef602294b67d73aea7ae371035579a1419cbfbeba7c3e88b31b5a5848db98f5e8a03b982b1
+SHA512 (kineto-5e75018.tar.gz) = 921b96a56e01d69895b79e67582d8977ed6f873573ab41557c5d026ada5d1f6365e4ed0a0c6804057c52e92510749fc58619f554a164c1ba9d8cd13e789bebd0