diff --git a/.gitignore b/.gitignore index 25abff5..a4ed35b 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,6 @@ /pytorch-v2.5.1.tar.gz /pytorch-v2.7.0.tar.gz /v2.13.6.tar.gz +/pytorch-a1cb3cc.tar.gz +/v24.12.23.tar.gz +/kineto-5e75018.tar.gz diff --git a/0001-Add-cmake-varaible-USE_ROCM_CK.patch b/0001-Add-cmake-varaible-USE_ROCM_CK.patch deleted file mode 100644 index b34e07a..0000000 --- a/0001-Add-cmake-varaible-USE_ROCM_CK.patch +++ /dev/null @@ -1,120 +0,0 @@ -From 0f33e0a7bbd1522ee74f8fc1fbe3af7563318c79 Mon Sep 17 00:00:00 2001 -From: Tom Rix -Date: Fri, 28 Mar 2025 15:33:09 -0700 -Subject: [PATCH] Add cmake varaible USE_ROCM_CK - -To control the use of ROCm Composable Kernel usage. - -CK is not compatible with all rocBLAS gpu's, so the user -must explicitly choose to use CK. - -Signed-off-by: Tom Rix ---- - CMakeLists.txt | 1 + - aten/src/ATen/CMakeLists.txt | 8 ++++++-- - aten/src/ATen/cuda/CUDABlas.cpp | 10 +++++----- - cmake/Dependencies.cmake | 3 +++ - 4 files changed, 15 insertions(+), 7 deletions(-) - -diff --git a/CMakeLists.txt b/CMakeLists.txt -index f3fee2f7ffc2..73903acce452 100644 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -249,6 +249,7 @@ cmake_dependent_option( - BUILD_LAZY_CUDA_LINALG "Build cuda linalg ops as separate library" ON - "USE_CUDA AND LINUX AND BUILD_PYTHON" OFF) - cmake_dependent_option(USE_ROCM "Use ROCm" ON "LINUX" OFF) -+cmake_dependent_option(USE_ROCM_CK "Use ROCm Composable Kernel" ON "USE_ROCM" ON) - option(CAFFE2_STATIC_LINK_CUDA "Statically link CUDA libraries" OFF) - cmake_dependent_option(USE_CUDNN "Use cuDNN" ON "USE_CUDA" OFF) - cmake_dependent_option(USE_STATIC_CUDNN "Use cuDNN static libraries" OFF -diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt -index 085af373ec22..af268ab88572 100644 ---- a/aten/src/ATen/CMakeLists.txt -+++ b/aten/src/ATen/CMakeLists.txt -@@ -361,13 +361,17 @@ endif() - ${native_quantized_hip_hip} - ${native_transformers_hip_hip} ${native_transformers_src_hip_hip} - ) -- if(WIN32) # Windows doesn't support Composable Kernels and Triton -+ if(NOT USE_ROCM_CK) # Windows doesn't support Composable Kernels and Triton - file(GLOB native_hip_bgemm "native/hip/bgemm_kernels/*.hip") - file(GLOB native_hip_ck "native/hip/ck*.hip") - exclude(ATen_HIP_SRCS "${ATen_HIP_SRCS}" -- ${native_hip_bgemm} ${native_hip_ck} -+ ${native_hip_bgemm} ${native_hip_ck}) -+ endif() -+ if(WIN32) # Windows doesn't support Composable Kernels and Triton -+ exclude(ATen_HIP_SRCS "${ATen_HIP_SRCS}" - ${native_transformers_hip_hip} ${native_transformers_hip_cpp}) - endif() -+ - # TODO: Codegen separate files for HIP and use those (s/cuda_generated_sources/hip_generated_sources) - list(APPEND all_hip_cpp - ${native_nested_hip_cpp} -diff --git a/aten/src/ATen/cuda/CUDABlas.cpp b/aten/src/ATen/cuda/CUDABlas.cpp -index a62b028fd4ff..a3dbf76848ea 100644 ---- a/aten/src/ATen/cuda/CUDABlas.cpp -+++ b/aten/src/ATen/cuda/CUDABlas.cpp -@@ -708,7 +708,7 @@ void bgemm_internal(CUDABLAS_BGEMM_ARGTYPES(at::BFloat16)) - if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) { - bgemm_internal_cublaslt(CUDABLAS_BGEMM_ARGS(at::BFloat16)); - } --#ifdef USE_ROCM -+#ifdef USE_ROCM_CK - else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) { - at::native::bgemm_internal_ck(CUDABLAS_BGEMM_ARGS(at::BFloat16)); - } -@@ -1061,7 +1061,7 @@ void gemm_internal(CUDABLAS_GEMM_ARGTYPES(double)) - gemm_internal_cublaslt(CUDABLAS_GEMM_ARGS(double)); - #endif - } --#ifdef USE_ROCM -+#ifdef USE_ROCM_CK - else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) { - at::native::gemm_internal_ck(CUDABLAS_GEMM_ARGS(double)); - } -@@ -1077,7 +1077,7 @@ void gemm_internal(CUDABLAS_GEMM_ARGTYPES(float)) - if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) { - gemm_internal_cublaslt(CUDABLAS_GEMM_ARGS(float)); - } --#ifdef USE_ROCM -+#ifdef USE_ROCM_CK - else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) { - at::native::gemm_internal_ck(CUDABLAS_GEMM_ARGS(float)); - } -@@ -1125,7 +1125,7 @@ void gemm_internal(CUDABLAS_GEMM_ARGTYPES(at::Half)) - if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) { - gemm_internal_cublaslt(CUDABLAS_GEMM_ARGS(at::Half)); - } --#ifdef USE_ROCM -+#ifdef USE_ROCM_CK - else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) { - at::native::gemm_internal_ck(CUDABLAS_GEMM_ARGS(at::Half)); - } -@@ -1141,7 +1141,7 @@ void gemm_internal(CUDABLAS_GEMM_ARGTYPES(at::BFloat16)) - if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) { - gemm_internal_cublaslt(CUDABLAS_GEMM_ARGS(at::BFloat16)); - } --#ifdef USE_ROCM -+#ifdef USE_ROCM_CK - else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) { - at::native::gemm_internal_ck(CUDABLAS_GEMM_ARGS(at::BFloat16)); - } -diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake -index 30917bdf39f5..2ca6091030f1 100644 ---- a/cmake/Dependencies.cmake -+++ b/cmake/Dependencies.cmake -@@ -1046,6 +1046,9 @@ if(USE_ROCM) - if(HIPBLASLT_VEC_EXT) - list(APPEND HIP_CXX_FLAGS -DHIPBLASLT_VEC_EXT) - endif() -+ if(USE_ROCM_CK) -+ list(APPEND HIP_CXX_FLAGS -DUSE_ROCM_CK) -+ endif() - list(APPEND HIP_HIPCC_FLAGS --offload-compress) - if(WIN32) - add_definitions(-DROCM_ON_WINDOWS) --- -2.48.1 - diff --git a/next/0001-Add-cmake-variable-USE_ROCM_CK.patch b/0001-Add-cmake-variable-USE_ROCM_CK.patch similarity index 100% rename from next/0001-Add-cmake-variable-USE_ROCM_CK.patch rename to 0001-Add-cmake-variable-USE_ROCM_CK.patch diff --git a/next/0001-Fix-compilation-and-import-torch-issues-for-cpython-.patch b/0001-Fix-compilation-and-import-torch-issues-for-cpython-.patch similarity index 100% rename from next/0001-Fix-compilation-and-import-torch-issues-for-cpython-.patch rename to 0001-Fix-compilation-and-import-torch-issues-for-cpython-.patch diff --git a/python-torch.spec b/python-torch.spec index 03fbf30..1fbad8e 100644 --- a/python-torch.spec +++ b/python-torch.spec @@ -15,8 +15,11 @@ %global miniz_version 3.0.2 %global pybind11_version 2.13.6 %else -%global pypi_version 2.7.0 -%global flatbuffers_version 23.3.3 +%global commit0 a1cb3cc05d46d198467bebbb6e8fba50a325d4e7 +%global shortcommit0 %(c=%{commit0}; echo ${c:0:7}) +%global date0 20250723 +%global pypi_version 2.8.0 +%global flatbuffers_version 24.12.23 %global miniz_version 3.0.2 %global pybind11_version 2.13.6 %endif @@ -33,11 +36,7 @@ %endif # For testing distributed+rccl etc. -%if %{with gitcommit} %bcond_without rccl -%else -%bcond_with rccl -%endif %bcond_with gloo %bcond_without mpi %bcond_without tensorpipe @@ -56,7 +55,7 @@ Name: python-%{pypi_name} %if %{with gitcommit} Version: %{pypi_version}^git%{date0}.%{shortcommit0} %else -Version: %{pypi_version} +Version: %{pypi_version}.rc8 %endif Release: %autorelease Summary: PyTorch AI/ML framework @@ -68,7 +67,8 @@ URL: https://pytorch.org/ Source0: %{forgeurl}/archive/%{commit0}/pytorch-%{shortcommit0}.tar.gz Source1000: pyproject.toml %else -Source0: %{forgeurl}/releases/download/v%{version}/pytorch-v%{version}.tar.gz +Source0: %{forgeurl}/archive/%{commit0}/pytorch-%{shortcommit0}.tar.gz +Source1000: pyproject.toml %endif Source1: https://github.com/google/flatbuffers/archive/refs/tags/v%{flatbuffers_version}.tar.gz Source2: https://github.com/pybind/pybind11/archive/refs/tags/v%{pybind11_version}.tar.gz @@ -96,25 +96,16 @@ Source70: https://github.com/yhirose/cpp-httplib/archive/%{hl_commit}/cpp- %endif %if %{without kineto} -%if %{with gitcommit} %global ki_commit 5e7501833f1021ce6f618572d3baf657b6319658 -%else -%global ki_commit be1317644c68b4bfc4646024a6b221066e430031 -%endif %global ki_scommit %(c=%{ki_commit}; echo ${c:0:7}) Source80: https://github.com/pytorch/kineto/archive/%{ki_commit}/kineto-%{ki_scommit}.tar.gz %endif -%if %{without gitcommit} -# https://github.com/pytorch/pytorch/issues/150187 -Patch11: 0001-Add-cmake-varaible-USE_ROCM_CK.patch -%else # https://github.com/pytorch/pytorch/issues/150187 Patch11: 0001-Add-cmake-variable-USE_ROCM_CK.patch # https://github.com/pytorch/pytorch/issues/156595 # Patch12: 0001-Use-horrible-dynamo-stub.patch Patch12: 0001-Fix-compilation-and-import-torch-issues-for-cpython-.patch -%endif ExclusiveArch: x86_64 aarch64 %global toolchain gcc @@ -157,9 +148,7 @@ BuildRequires: python3dist(filelock) BuildRequires: python3dist(jinja2) BuildRequires: python3dist(networkx) BuildRequires: python3dist(numpy) -%if %{with gitcommit} BuildRequires: python3dist(pip) -%endif BuildRequires: python3dist(pyyaml) BuildRequires: python3dist(setuptools) BuildRequires: python3dist(sphinx) @@ -178,9 +167,7 @@ BuildRequires: hipcub-devel BuildRequires: hipfft-devel BuildRequires: hiprand-devel BuildRequires: hipsparse-devel -%if %{with gitcommit} BuildRequires: hipsparselt-devel -%endif BuildRequires: hipsolver-devel BuildRequires: magma-devel BuildRequires: miopen-devel @@ -198,10 +185,8 @@ BuildRequires: rocm-core-devel BuildRequires: rocm-hip-devel BuildRequires: rocm-runtime-devel BuildRequires: rocm-rpm-macros -%if %{with gitcommit} BuildRequires: rocsolver-devel BuildRequires: rocm-smi-devel -%endif BuildRequires: rocthrust-devel BuildRequires: roctracer-devel @@ -275,7 +260,9 @@ Requires: python3-%{pypi_name}%{?_isa} = %{version}-%{release} cp %{SOURCE1000} . %else -%autosetup -p1 -n pytorch-v%{version} +%autosetup -p1 -n pytorch-%{commit0} +# Overwrite with a git checkout of the pyproject.toml +cp %{SOURCE1000} . %endif # Remove bundled egg-info @@ -349,10 +336,8 @@ sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -Wno-unus sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -Wno-deprecated-declarations@' cmake/Dependencies.cmake # Use parallel jobs sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -parallel-jobs=4@' cmake/Dependencies.cmake -%if %{with gitcommit} # Need to link with librocm_smi64 sed -i -e 's@hiprtc::hiprtc@hiprtc::hiprtc rocm_smi64@' cmake/Dependencies.cmake -%endif # No third_party fmt, use system sed -i -e 's@fmt::fmt-header-only@fmt@' CMakeLists.txt @@ -449,9 +434,7 @@ mv googletest third_party # # Fake out pocketfft, and system header will be used mkdir third_party/pocketfft -%if %{with gitcommit} cp /usr/include/pocketfft_hdronly.h third_party/pocketfft/ -%endif # # Use the system valgrind headers @@ -608,19 +591,11 @@ export DEVICE_LIB_PATH=${RESOURCE_DIR}/amdgcn/bitcode export HIP_CLANG_PATH=%{rocmllvm_bindir} export PYTORCH_ROCM_ARCH=%{rocm_gpu_list_default} -%if %{with gitcommit} %pyproject_wheel -%else -%py3_build -%endif %else -%if %{with gitcommit} %pyproject_wheel -%else -%py3_build -%endif %endif @@ -637,33 +612,21 @@ export DEVICE_LIB_PATH=${RESOURCE_DIR}/amdgcn/bitcode # pytorch uses clang, not hipcc export HIP_CLANG_PATH=%{rocmllvm_bindir} export PYTORCH_ROCM_ARCH=%{rocm_gpu_list_default} -%if %{with gitcommit} %pyproject_install %pyproject_save_files '*torch*' -%else -%py3_install -%endif %else -%if %{with gitcommit} %pyproject_install %pyproject_save_files '*torch*' -%else -%py3_install -%endif %endif %check -%if %{with gitcommit} # Not working yet # pyproject_check_import torch -%else -%py3_check_import torch -%endif # Do not remote the empty files diff --git a/sources b/sources index 4021d40..c7eae22 100644 --- a/sources +++ b/sources @@ -7,3 +7,6 @@ SHA512 (libnop-910b558.tar.gz) = 74c5324eaa1b6b2ac8dfef94c835b5c5b044625f8e5efe3 SHA512 (v1.14.2.tar.gz) = 97635bbaf6dd567c201451dfaf7815b2052fe50d9bccc97aade86cfa4a92651374d167296a5453031b2681dc302806a289bca011a9e79ddc381a17d6118971d7 SHA512 (cpp-httplib-3b6597b.tar.gz) = 8f1090658c498d04f14fec5c2f301847b1f3360bf92b18d82927643ee04ab61a6b274733a01c7850f9c030205120d674d1d961358d49fdd15636736fb8704f55 SHA512 (kineto-be13176.tar.gz) = 41a08c7da9eea7d12402f80a5550c9d4df79798719cc52b12a507828c8c896ba28a37c35d8adf809ca72589e1d84965d5ef6dd01f3f8dc1c803c5ed67b03a43a +SHA512 (pytorch-a1cb3cc.tar.gz) = 92bf8b2c2ef0b459406b60169ecebdc50652c75943e3d6087e4d261f6e308dbad365529561e0f07ea3f0b71790efb68b5e4ab2f44e270462097208d924dc2d95 +SHA512 (v24.12.23.tar.gz) = f97762ba41b9cfef648e93932fd789324c6bb6ebc5b7aeca8185c9ef602294b67d73aea7ae371035579a1419cbfbeba7c3e88b31b5a5848db98f5e8a03b982b1 +SHA512 (kineto-5e75018.tar.gz) = 921b96a56e01d69895b79e67582d8977ed6f873573ab41557c5d026ada5d1f6365e4ed0a0c6804057c52e92510749fc58619f554a164c1ba9d8cd13e789bebd0