Fix hip

device lib path is no longer needed Signed-off-by: Tom Rix <Tom.Rix@amd.com>
Continue to support aarch64 with myself maintaining
2026-01-12 16:38:15 -08:00 · 2025-12-21 13:33:32 +00:00 · 2025-12-20 18:19:17 -05:00 · 2025-12-18 13:52:00 -08:00 · 2025-11-24 07:03:31 -08:00 · 2025-11-17 14:11:32 -08:00
23 changed files with 1164 additions and 1563 deletions
--- a/.gitignore
+++ b/.gitignore
@ -11,9 +11,23 @@
 /libnop-910b558.tar.gz
 /pytorch-97ff6cf.tar.gz
 /pytorch-v2.3.0.tar.gz
-/xnnpack-fcbf55a.tar.gz
-/FXdiv-63058ef.tar.gz
-/FP16-0a92994.tar.gz
-/psimd-072586a.tar.gz
-/pthreadpool-4fe0e1e.tar.gz
-/pocketfft-076cb3d.tar.gz
+/pytorch-v2.3.1.tar.gz
+/pytorch-v2.4.0.tar.gz
+/v1.14.2.tar.gz
+/cpp-httplib-3b6597b.tar.gz
+/kineto-be13176.tar.gz
+/pytorch-v2.4.1.tar.gz
+/pytorch-v2.5.0.tar.gz
+/pytorch-v2.5.1.tar.gz
+/pytorch-v2.7.0.tar.gz
+/v2.13.6.tar.gz
+/pytorch-a1cb3cc.tar.gz
+/v24.12.23.tar.gz
+/kineto-5e75018.tar.gz
+/pytorch-v2.8.0.tar.gz
+/v1.18.0.tar.gz
+/pytorch-715dca6.tar.gz
+/pytorch-fd36458.tar.gz
+/pytorch-0fabc3b.tar.gz
+/pytorch-v2.9.0.tar.gz
+/pytorch-v2.9.1.tar.gz
--- a/0001-Add-cmake-variable-USE_ROCM_CK.patch
+++ b/0001-Add-cmake-variable-USE_ROCM_CK.patch
@ -0,0 +1,202 @@
+From 193854993cd939de186de19589c1add4c4b2cf66 Mon Sep 17 00:00:00 2001
+From: Tom Rix <Tom.Rix@amd.com>
+Date: Mon, 21 Jul 2025 11:35:03 -0700
+Subject: [PATCH] Add cmake variable USE_ROCM_CK
+
+---
+ CMakeLists.txt                  |  1 +
+ aten/src/ATen/CMakeLists.txt    | 40 ++++++++++++++++-----------------
+ aten/src/ATen/cuda/CUDABlas.cpp | 22 +++++++++---------
+ cmake/Dependencies.cmake        |  3 +++
+ 4 files changed, 35 insertions(+), 31 deletions(-)
+
+diff --git a/CMakeLists.txt b/CMakeLists.txt
+index a5d25e6afa0f..afc1b53efa64 100644
+--- a/CMakeLists.txt
+++ b/CMakeLists.txt
+@@ -240,6 +240,7 @@ cmake_dependent_option(
+   BUILD_LAZY_CUDA_LINALG "Build cuda linalg ops as separate library" ON
+   "USE_CUDA AND LINUX AND BUILD_PYTHON" OFF)
+ cmake_dependent_option(USE_ROCM "Use ROCm" ON "LINUX" OFF)
+cmake_dependent_option(USE_ROCM_CK "Use ROCm Composable Kernel" ON "USE_ROCM" ON)
+ option(CAFFE2_STATIC_LINK_CUDA "Statically link CUDA libraries" OFF)
+ cmake_dependent_option(USE_CUDNN "Use cuDNN" ON "USE_CUDA" OFF)
+ cmake_dependent_option(USE_STATIC_CUDNN "Use cuDNN static libraries" OFF
+diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt
+index c9cfd74b501e..59f6178218ee 100644
+--- a/aten/src/ATen/CMakeLists.txt
+++ b/aten/src/ATen/CMakeLists.txt
+@@ -373,26 +373,26 @@ if(USE_ROCM)
+   # is header only, so this should be ok, except that the CMake build generates
+   # a ck/config.h. We just do that part here. Without this, the ck.h from the
+   # ROCM SDK may get accidentally used instead.
+-  function(_pytorch_rocm_generate_ck_conf)
+-    set(CK_ENABLE_INT8 "ON")
+-    set(CK_ENABLE_FP16 "ON")
+-    set(CK_ENABLE_FP32 "ON")
+-    set(CK_ENABLE_FP64 "ON")
+-    set(CK_ENABLE_BF16 "ON")
+-    set(CK_ENABLE_FP8 "ON")
+-    set(CK_ENABLE_BF8 "ON")
+-    set(CK_USE_XDL "ON")
+-    set(CK_USE_WMMA "ON")
+-    configure_file(
+-      "${Torch_SOURCE_DIR}/third_party/composable_kernel/include/ck/config.h.in"
+-      "${CMAKE_CURRENT_BINARY_DIR}/composable_kernel/ck/config.h"
+-      )
+-  endfunction()
+#  function(_pytorch_rocm_generate_ck_conf)
+#    set(CK_ENABLE_INT8 "ON")
+#    set(CK_ENABLE_FP16 "ON")
+#    set(CK_ENABLE_FP32 "ON")
+#    set(CK_ENABLE_FP64 "ON")
+#    set(CK_ENABLE_BF16 "ON")
+#    set(CK_ENABLE_FP8 "ON")
+#    set(CK_ENABLE_BF8 "ON")
+#    set(CK_USE_XDL "ON")
+#    set(CK_USE_WMMA "ON")
+#    configure_file(
+#      "${Torch_SOURCE_DIR}/third_party/composable_kernel/include/ck/config.h.in"
+#      "${CMAKE_CURRENT_BINARY_DIR}/composable_kernel/ck/config.h"
+#      )
+#  endfunction()
+   list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/hip)
+-  list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/include)
+-  list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/library/include)
+-  list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/composable_kernel)
+-  _pytorch_rocm_generate_ck_conf()
+#  list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/include)
+#  list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/library/include)
+#  list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/composable_kernel)
+#  _pytorch_rocm_generate_ck_conf()
+ 
+   # Next two lines are needed because TunableOp uses third-party/fmt
+   list(APPEND ATen_HIP_INCLUDE $<TARGET_PROPERTY:fmt::fmt-header-only,INTERFACE_INCLUDE_DIRECTORIES>)
+@@ -409,7 +409,7 @@ endif()
+     ${native_quantized_hip_hip}
+     ${native_transformers_hip_hip} ${native_transformers_src_hip_hip}
+   )
+-  if(WIN32) # Windows doesn't support Composable Kernels
+  if(NOT USE_ROCM_CK) # Windows doesn't support Composable Kernels
+     file(GLOB native_hip_bgemm "native/hip/bgemm_kernels/*.hip")
+     file(GLOB native_hip_ck "native/hip/ck*.hip")
+     exclude(ATen_HIP_SRCS "${ATen_HIP_SRCS}"
+diff --git a/aten/src/ATen/cuda/CUDABlas.cpp b/aten/src/ATen/cuda/CUDABlas.cpp
+index 89350a11bea7..e5b7960177cf 100644
+--- a/aten/src/ATen/cuda/CUDABlas.cpp
+++ b/aten/src/ATen/cuda/CUDABlas.cpp
+@@ -752,7 +752,7 @@ template <>
+ void bgemm_internal<double>(CUDABLAS_BGEMM_ARGTYPES(double))
+ {
+   if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
+-#ifdef USE_ROCM
+#ifdef USE_ROCM_CK
+     // hipblaslt does not support double gemm yet
+     bgemm_internal_cublas<double>(CUDABLAS_BGEMM_ARGS(double));
+ #else
+@@ -836,7 +836,7 @@ void bgemm_internal<at::BFloat16>(CUDABLAS_BGEMM_ARGTYPES(at::BFloat16))
+       bgemm_internal_cublas<at::BFloat16>(CUDABLAS_BGEMM_ARGS(at::BFloat16));
+     }
+   }
+-#if defined(USE_ROCM) && !defined(_MSC_VER)
+#if defined(USE_ROCM) && defined(USE_ROCM_CK)
+   else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
+     at::native::bgemm_internal_ck<at::BFloat16>(CUDABLAS_BGEMM_ARGS(at::BFloat16));
+   }
+@@ -1270,14 +1270,14 @@ template <>
+ void gemm_internal<double>(CUDABLAS_GEMM_ARGTYPES(double))
+ {
+   if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
+-#ifdef USE_ROCM
+#ifdef USE_ROCM_CK
+     // hipblaslt does not support double gemm yet
+     gemm_internal_cublas<double>(CUDABLAS_GEMM_ARGS(double));
+ #else
+     gemm_internal_cublaslt<double>(CUDABLAS_GEMM_ARGS(double));
+ #endif
+   }
+-#if defined(USE_ROCM) && !defined(_MSC_VER)
+#if defined(USE_ROCM) && defined(USE_ROCM_CK)
+   else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
+     at::native::gemm_internal_ck<double>(CUDABLAS_GEMM_ARGS(double));
+   }
+@@ -1293,7 +1293,7 @@ void gemm_internal<float>(CUDABLAS_GEMM_ARGTYPES(float))
+   if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
+     gemm_internal_cublaslt<float>(CUDABLAS_GEMM_ARGS(float));
+   }
+-#if defined(USE_ROCM) && !defined(_MSC_VER)
+#if defined(USE_ROCM) && defined(USE_ROCM_CK)
+   else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
+     if (at::detail::getCUDAHooks().isGPUArch({"gfx1100"})) { //no CK GEMM version for gfx1100
+       gemm_internal_cublaslt<float>(CUDABLAS_GEMM_ARGS(float));
+@@ -1311,7 +1311,7 @@ template <>
+ void gemm_internal<c10::complex<double>>(CUDABLAS_GEMM_ARGTYPES(c10::complex<double>))
+ {
+   if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
+-#ifdef USE_ROCM
+#ifdef USE_ROCM_CK
+     // hipblaslt does not support complex gemm yet
+     gemm_internal_cublas<c10::complex<double>>(CUDABLAS_GEMM_ARGS(c10::complex<double>));
+ #else
+@@ -1327,7 +1327,7 @@ template <>
+ void gemm_internal<c10::complex<float>>(CUDABLAS_GEMM_ARGTYPES(c10::complex<float>))
+ {
+   if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
+-#ifdef USE_ROCM
+#ifdef USE_ROCM_CK
+     // hipblaslt does not support complex gemm yet
+     gemm_internal_cublas<c10::complex<float>>(CUDABLAS_GEMM_ARGS(c10::complex<float>));
+ #else
+@@ -1345,7 +1345,7 @@ void gemm_internal<at::Half>(CUDABLAS_GEMM_ARGTYPES(at::Half))
+   if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
+     gemm_internal_cublaslt<at::Half>(CUDABLAS_GEMM_ARGS(at::Half));
+   }
+-#if defined(USE_ROCM) && !defined(_MSC_VER)
+#if defined(USE_ROCM) && defined(USE_ROCM_CK)
+   else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
+     at::native::gemm_internal_ck<at::Half>(CUDABLAS_GEMM_ARGS(at::Half));
+   }
+@@ -1361,7 +1361,7 @@ void gemm_internal<at::BFloat16>(CUDABLAS_GEMM_ARGTYPES(at::BFloat16))
+   if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
+     gemm_internal_cublaslt<at::BFloat16>(CUDABLAS_GEMM_ARGS(at::BFloat16));
+   }
+-#if defined(USE_ROCM) && !defined(_MSC_VER)
+#if defined(USE_ROCM) && defined(USE_ROCM_CK)
+   else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
+     at::native::gemm_internal_ck<at::BFloat16>(CUDABLAS_GEMM_ARGS(at::BFloat16));
+   }
+@@ -1382,7 +1382,7 @@ void gemm_internal<at::Half, float>(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(at::Half,
+   if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
+     gemm_internal_cublaslt<at::Half, float>(CUDABLAS_GEMM_ARGS(at::Half));
+   }
+-#if defined(USE_ROCM) && !defined(_MSC_VER)
+#if defined(USE_ROCM) && defined(USE_ROCM_CK)
+   else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
+     TORCH_CHECK(false, "gemm input type at::Half and output type float is not supported for ROCm");
+   }
+@@ -1398,7 +1398,7 @@ void gemm_internal<at::BFloat16, float>(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(at::B
+   if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
+     gemm_internal_cublaslt<at::BFloat16, float>(CUDABLAS_GEMM_ARGS(at::BFloat16));
+   }
+-#if defined(USE_ROCM) && !defined(_MSC_VER)
+#if defined(USE_ROCM) && defined(USE_ROCM_CK)
+   else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
+     TORCH_CHECK(false, "gemm input type at::Half and output type float is not supported for ROCm");
+   }
+diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
+index a93386c27f8d..be1368999d38 100644
+--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
+@@ -1031,6 +1031,9 @@ if(USE_ROCM)
+     if(HIPBLASLT_VEC_EXT)
+       list(APPEND HIP_CXX_FLAGS -DHIPBLASLT_VEC_EXT)
+     endif()
+    if(USE_ROCM_CK)
+      list(APPEND HIP_CXX_FLAGS -DUSE_ROCM_CK)
+    endif()
+     list(APPEND HIP_HIPCC_FLAGS --offload-compress)
+     if(WIN32)
+       add_definitions(-DROCM_ON_WINDOWS)
+-- 
+2.49.0
+
--- a/0001-Fix-compilation-and-import-torch-issues-for-cpython-.patch
+++ b/0001-Fix-compilation-and-import-torch-issues-for-cpython-.patch
@ -0,0 +1,359 @@
+From f2a544b2e3a5bdc04985f6e06223c0c1700120a0 Mon Sep 17 00:00:00 2001
+From: albanD <desmaison.alban@gmail.com>
+Date: Sat, 12 Jul 2025 03:42:33 -0400
+Subject: [PATCH] Fix compilation and "import torch" issues for cpython 3.14
+
+Imported from
+https://github.com/albanD/pytorch/tree/cpython314_build
+commit 88bb9cdb72449f4277829e20d94ad8aec1894216
+
+Signed-off-by: Tom Rix <Tom.Rix@amd.com>
+---
+ torch/_dynamo/bytecode_analysis.py        |  2 +-
+ torch/ao/quantization/__init__.py         |  5 +++-
+ torch/ao/quantization/qconfig.py          |  4 ++-
+ torch/ao/quantization/utils.py            |  7 +++--
+ torch/csrc/dynamo/cpython_defs.c          | 16 +++++++++++
+ torch/csrc/dynamo/cpython_includes.h      | 17 ++++++++++++
+ torch/csrc/dynamo/eval_frame.c            | 34 +++++++++++++++--------
+ torch/csrc/dynamo/framelocals_mapping.cpp | 14 ++++++++++
+ torch/csrc/utils/python_compat.h          |  1 +
+ torch/onnx/__init__.py                    |  1 -
+ torch/utils/weak.py                       | 29 +++++++++++++++++--
+ 11 files changed, 111 insertions(+), 19 deletions(-)
+
+diff --git a/torch/_dynamo/bytecode_analysis.py b/torch/_dynamo/bytecode_analysis.py
+index 3252ea91409f..2de74ee5bf8d 100644
+--- a/torch/_dynamo/bytecode_analysis.py
+++ b/torch/_dynamo/bytecode_analysis.py
+@@ -33,7 +33,7 @@ if sys.version_info >= (3, 11):
+     TERMINAL_OPCODES.add(dis.opmap["JUMP_FORWARD"])
+ else:
+     TERMINAL_OPCODES.add(dis.opmap["JUMP_ABSOLUTE"])
+-if sys.version_info >= (3, 12):
+if (3, 12) <= sys.version_info < (3, 14):
+     TERMINAL_OPCODES.add(dis.opmap["RETURN_CONST"])
+ if sys.version_info >= (3, 13):
+     TERMINAL_OPCODES.add(dis.opmap["JUMP_BACKWARD_NO_INTERRUPT"])
+diff --git a/torch/ao/quantization/__init__.py b/torch/ao/quantization/__init__.py
+index ffc1792fd23f..cf5a8b99a894 100644
+--- a/torch/ao/quantization/__init__.py
+++ b/torch/ao/quantization/__init__.py
+@@ -1,5 +1,6 @@
+ # mypy: allow-untyped-defs
+ 
+import sys
+ from typing import Callable, Optional, Union
+ 
+ import torch
+@@ -33,7 +34,9 @@ from .stubs import *  # noqa: F403
+ 
+ # ensure __module__ is set correctly for public APIs
+ ObserverOrFakeQuantize = Union[ObserverBase, FakeQuantizeBase]
+-ObserverOrFakeQuantize.__module__ = "torch.ao.quantization"
+if sys.version_info < (3, 14):
+    ObserverOrFakeQuantize.__module__ = "torch.ao.quantization"
+
+ for _f in [
+     compare_results,
+     extract_results_from_loggers,
+diff --git a/torch/ao/quantization/qconfig.py b/torch/ao/quantization/qconfig.py
+index efee5302ad42..d9a8fc78bab4 100644
+--- a/torch/ao/quantization/qconfig.py
+++ b/torch/ao/quantization/qconfig.py
+@@ -1,5 +1,6 @@
+ # mypy: allow-untyped-defs
+ import copy
+import sys
+ import warnings
+ from collections import namedtuple
+ from typing import Any, Optional, Union
+@@ -568,7 +569,8 @@ def _assert_valid_qconfig(qconfig: Optional[QConfig], mod: torch.nn.Module) -> N
+ 
+ 
+ QConfigAny = Optional[QConfig]
+-QConfigAny.__module__ = "torch.ao.quantization.qconfig"
+if sys.version_info < (3, 14):
+    QConfigAny.__module__ = "torch.ao.quantization.qconfig"
+ 
+ 
+ def _add_module_to_qconfig_obs_ctr(
+diff --git a/torch/ao/quantization/utils.py b/torch/ao/quantization/utils.py
+index 4ac3112ec072..3b1503e01701 100644
+--- a/torch/ao/quantization/utils.py
+++ b/torch/ao/quantization/utils.py
+@@ -4,6 +4,7 @@ Utils shared by different modes of quantization (eager/graph)
+ """
+ 
+ import functools
+import sys
+ import warnings
+ from collections import OrderedDict
+ from inspect import getfullargspec, signature
+@@ -16,7 +17,8 @@ from torch.nn.utils.parametrize import is_parametrized
+ 
+ 
+ NodePattern = Union[tuple[Node, Node], tuple[Node, tuple[Node, Node]], Any]
+-NodePattern.__module__ = "torch.ao.quantization.utils"
+if sys.version_info < (3, 14):
+    NodePattern.__module__ = "torch.ao.quantization.utils"
+ 
+ # This is the Quantizer class instance from torch/quantization/fx/quantize.py.
+ # Define separately to prevent circular imports.
+@@ -31,7 +33,8 @@ QuantizerCls = Any
+ Pattern = Union[
+     Callable, tuple[Callable, Callable], tuple[Callable, tuple[Callable, Callable]], Any
+ ]
+-Pattern.__module__ = "torch.ao.quantization.utils"
+if sys.version_info < (3, 14):
+    Pattern.__module__ = "torch.ao.quantization.utils"
+ 
+ 
+ # TODO: maybe rename this to MatchInputNode
+diff --git a/torch/csrc/dynamo/cpython_defs.c b/torch/csrc/dynamo/cpython_defs.c
+index b68ef894aeaa..244d4165d5e8 100644
+--- a/torch/csrc/dynamo/cpython_defs.c
+++ b/torch/csrc/dynamo/cpython_defs.c
+@@ -2,6 +2,20 @@
+ #include <torch/csrc/dynamo/cpython_includes.h>
+ #include <torch/csrc/dynamo/debug_macros.h>
+ 
+#if IS_PYTHON_3_14_PLUS
+
+const uint8_t* THP_PyOpcode_Caches = NULL;
+const int THP_PyOpcode_Caches_size = 0;
+
+void
+THP_PyThreadState_PopFrame(PyThreadState *tstate, _PyInterpreterFrame * frame)
+{}
+void
+THP_PyFrame_Clear(_PyInterpreterFrame *frame)
+{}
+
+#else
+
+ #if IS_PYTHON_3_11_PLUS
+ 
+ #define Py_BUILD_CORE
+@@ -360,3 +374,5 @@ const uint8_t* THP_PyOpcode_Caches = NULL;
+ const int THP_PyOpcode_Caches_size = 0;
+ 
+ #endif
+
+#endif // IS_PYTHON_3_14_PLUS
+\ No newline at end of file
+diff --git a/torch/csrc/dynamo/cpython_includes.h b/torch/csrc/dynamo/cpython_includes.h
+index 6b99c1d5aec8..616be16563cf 100644
+--- a/torch/csrc/dynamo/cpython_includes.h
+++ b/torch/csrc/dynamo/cpython_includes.h
+@@ -21,6 +21,14 @@
+ 
+ #if IS_PYTHON_3_11_PLUS
+ #include <internal/pycore_frame.h>
+#if IS_PYTHON_3_14_PLUS
+#include <internal/pycore_interpframe_structs.h>
+#include <internal/pycore_stackref.h>
+#endif
+#endif
+
+#if IS_PYTHON_3_14_PLUS
+#include <internal/pycore_code.h>
+ #endif
+ 
+ #undef Py_BUILD_CORE
+@@ -30,6 +38,13 @@
+ extern "C" {
+ #endif
+ 
+#if IS_PYTHON_3_14_PLUS
+
+#define F_CODE(x) (PyCodeObject*)PyStackRef_AsPyObjectBorrow(x->f_executable)
+#define PREV_INSTR(x) (x)->instr_ptr
+
+#else
+
+ #if IS_PYTHON_3_13_PLUS
+ #define F_CODE(x) ((PyCodeObject*)(x)->f_executable)
+ #define PREV_INSTR(x) (x)->instr_ptr
+@@ -38,6 +53,8 @@ extern "C" {
+ #define PREV_INSTR(x) (x)->prev_instr
+ #endif
+ 
+#endif // IS_PYTHON_3_14_PLUS
+
+ #if IS_PYTHON_3_12_PLUS
+ #define FUNC(x) ((x)->f_funcobj)
+ #else
+diff --git a/torch/csrc/dynamo/eval_frame.c b/torch/csrc/dynamo/eval_frame.c
+index f413782b2d30..72bb8839bac3 100644
+--- a/torch/csrc/dynamo/eval_frame.c
+++ b/torch/csrc/dynamo/eval_frame.c
+@@ -224,17 +224,6 @@ const char* get_frame_name(THP_EVAL_API_FRAME_OBJECT* frame) {
+   return PyUnicode_AsUTF8(F_CODE(frame)->co_name);
+ }
+ 
+-void clear_old_frame_if_python_312_plus(
+-    PyThreadState* tstate,
+-    THP_EVAL_API_FRAME_OBJECT* frame) {
+-#if IS_PYTHON_3_12_PLUS
+-
+-  THP_PyFrame_Clear(frame);
+-  THP_PyThreadState_PopFrame(tstate, frame);
+-
+-#endif
+-}
+-
+ static PyObject* dynamo_eval_custom_code_impl(
+     PyThreadState* tstate,
+     THP_EVAL_API_FRAME_OBJECT* frame,
+@@ -485,6 +474,18 @@ static PyObject* dynamo__custom_eval_frame_shim(
+ 
+ static void enable_eval_frame_shim(PyThreadState* tstate) {}
+ static void enable_eval_frame_default(PyThreadState* tstate) {}
+PyObject* dynamo_eval_custom_code(
+    PyThreadState* tstate,
+    THP_EVAL_API_FRAME_OBJECT* frame,
+    PyCodeObject* code,
+    const char* trace_annotation,
+    int throw_flag) {}
+THPPyInterpreterFrame* THPPyInterpreterFrame_New(
+    THP_EVAL_API_FRAME_OBJECT* frame) {}
+PyObject* dynamo_eval_frame_default(
+    PyThreadState* tstate,
+    THP_EVAL_API_FRAME_OBJECT* frame,
+    int throw_flag) {}
+ 
+ static struct PyGetSetDef THPPyInterpreterFrame_properties[] = {NULL};
+ 
+@@ -498,6 +499,17 @@ static PyTypeObject THPPyInterpreterFrameType = {
+ 
+ #endif // !(IS_PYTHON_3_14_PLUS)
+ 
+void clear_old_frame_if_python_312_plus(
+    PyThreadState* tstate,
+    THP_EVAL_API_FRAME_OBJECT* frame) {
+#if IS_PYTHON_3_12_PLUS
+
+  THP_PyFrame_Clear(frame);
+  THP_PyThreadState_PopFrame(tstate, frame);
+
+#endif
+}
+
+ static PyObject* increment_working_threads(
+     PyThreadState* tstate,
+     PyObject* module) {
+diff --git a/torch/csrc/dynamo/framelocals_mapping.cpp b/torch/csrc/dynamo/framelocals_mapping.cpp
+index b839fb26fc91..c4ee36d87767 100644
+--- a/torch/csrc/dynamo/framelocals_mapping.cpp
+++ b/torch/csrc/dynamo/framelocals_mapping.cpp
+@@ -26,9 +26,13 @@ FrameLocalsMapping::FrameLocalsMapping(FrameLocalsFrameType* frame)
+   PyCodeObject* co = F_CODE(frame);
+   _framelocals.resize(co->co_nlocalsplus, nullptr);
+ 
+#if IS_PYTHON_3_14_PLUS
+  TORCH_CHECK(false, "Python 3.14+ not supported");
+#else
+   if (!frame->stacktop) {
+     return;
+   }
+#endif
+ 
+   auto update_framelocals = [&](int i, PyObject* value) {
+     _PyLocals_Kind kind = _PyLocals_GetKind(co->co_localspluskinds, i);
+@@ -53,11 +57,21 @@ FrameLocalsMapping::FrameLocalsMapping(FrameLocalsFrameType* frame)
+   };
+ 
+   auto offset = co->co_nlocalsplus - co->co_nfreevars;
+#if IS_PYTHON_3_14_PLUS
+  TORCH_CHECK(false, "Python 3.14+ not supported");
+#else
+   for (int i = 0; i < offset; i++) {
+     update_framelocals(i, frame->localsplus[i]);
+   }
+#endif
+
+   // Get references to closure variables
+#if IS_PYTHON_3_14_PLUS
+  PyObject* closure;
+  TORCH_CHECK(false, "Python 3.14+ not supported");
+#else
+   PyObject* closure = ((PyFunctionObject*)FUNC(frame))->func_closure;
+#endif
+   for (int i = 0; i < co->co_nfreevars; i++) {
+     update_framelocals(offset + i, PyTuple_GET_ITEM(closure, i));
+   }
+diff --git a/torch/csrc/utils/python_compat.h b/torch/csrc/utils/python_compat.h
+index a1537611cc47..16292e4fd030 100644
+--- a/torch/csrc/utils/python_compat.h
+++ b/torch/csrc/utils/python_compat.h
+@@ -13,6 +13,7 @@ extern "C" {
+ #define IS_PYTHON_3_12_PLUS PY_VERSION_HEX >= 0x030C0000
+ #define IS_PYTHON_3_13_PLUS PY_VERSION_HEX >= 0x030D0000
+ #define IS_PYTHON_3_14_PLUS PY_VERSION_HEX >= 0x030E0000
+#define IS_PYTHON_3_15_PLUS PY_VERSION_HEX >= 0x030F0000
+ 
+ static inline int PyCode_GetNCellvars(PyCodeObject* code) {
+ // gh-26364 added co_ncellvars to Python 3.11.0rc1
+diff --git a/torch/onnx/__init__.py b/torch/onnx/__init__.py
+index 345ffd2a065b..ceeadde5365b 100644
+--- a/torch/onnx/__init__.py
+++ b/torch/onnx/__init__.py
+@@ -104,7 +104,6 @@ ONNXProgram.__module__ = "torch.onnx"
+ OnnxExporterError.__module__ = "torch.onnx"
+ _OrtBackend.__module__ = "torch.onnx"
+ _OrtBackendOptions.__module__ = "torch.onnx"
+-_OrtExecutionProvider.__module__ = "torch.onnx"
+ enable_fake_mode.__module__ = "torch.onnx"
+ is_onnxrt_backend_supported.__module__ = "torch.onnx"
+ 
+diff --git a/torch/utils/weak.py b/torch/utils/weak.py
+index 8bf2ba5ed02b..9c7218cb2ad3 100644
+--- a/torch/utils/weak.py
+++ b/torch/utils/weak.py
+@@ -3,8 +3,6 @@ from __future__ import annotations
+ 
+ import collections.abc as _collections_abc
+ import weakref
+-
+-from _weakrefset import _IterationGuard  # type: ignore[attr-defined]
+ from collections.abc import Mapping, MutableMapping
+ from weakref import ref
+ 
+@@ -22,6 +20,33 @@ __all__ = [
+ ]
+ 
+ 
+# TODO: make weakref properly thread safe following
+# https://github.com/python/cpython/pull/125325
+class _IterationGuard:
+    # This context manager registers itself in the current iterators of the
+    # weak container, such as to delay all removals until the context manager
+    # exits.
+    # This technique should be relatively thread-safe (since sets are).
+
+    def __init__(self, weakcontainer):
+        # Don't create cycles
+        self.weakcontainer = ref(weakcontainer)
+
+    def __enter__(self):
+        w = self.weakcontainer()
+        if w is not None:
+            w._iterating.add(self)
+        return self
+
+    def __exit__(self, e, t, b):
+        w = self.weakcontainer()
+        if w is not None:
+            s = w._iterating
+            s.remove(self)
+            if not s:
+                w._commit_removals()
+
+
+ # This file defines a variant of WeakKeyDictionary that overrides the hashing
+ # behavior of the key to use object identity, rather than the builtin
+ # __eq__/__hash__ functions.  This is useful for Tensor weak keys, as their
+-- 
+2.49.0
+
--- a/0001-Optionally-use-hipblaslt.patch
+++ b/0001-Optionally-use-hipblaslt.patch
@ -1,262 +0,0 @@
-From d77e05d90df006322cda021f1a8affdcc2c7eaef Mon Sep 17 00:00:00 2001
-From: Tom Rix <trix@redhat.com>
-Date: Fri, 23 Feb 2024 08:27:30 -0500
-Subject: [PATCH] Optionally use hipblaslt
-
-The hipblaslt package is not available on Fedora.
-Instead of requiring the package, make it optional.
-If it is found, define the preprocessor variable HIPBLASLT
-Convert the checks for ROCM_VERSION >= 507000 to HIPBLASLT checks
-
-Signed-off-by: Tom Rix <trix@redhat.com>
---
- aten/src/ATen/cuda/CUDABlas.cpp          |  7 ++++---
- aten/src/ATen/cuda/CUDABlas.h            |  2 +-
- aten/src/ATen/cuda/CUDAContextLight.h    |  4 ++--
- aten/src/ATen/cuda/CublasHandlePool.cpp  |  4 ++--
- aten/src/ATen/cuda/tunable/TunableGemm.h |  6 +++---
- aten/src/ATen/native/cuda/Blas.cpp       | 14 ++++++++------
- cmake/Dependencies.cmake                 |  3 +++
- cmake/public/LoadHIP.cmake               |  4 ++--
- 8 files changed, 25 insertions(+), 19 deletions(-)
-
-diff --git a/aten/src/ATen/cuda/CUDABlas.cpp b/aten/src/ATen/cuda/CUDABlas.cpp
-index d534ec5a178..e815463f630 100644
--- a/aten/src/ATen/cuda/CUDABlas.cpp
-+++ b/aten/src/ATen/cuda/CUDABlas.cpp
-@@ -14,7 +14,7 @@
- #include <c10/util/irange.h>
- 
- #ifdef USE_ROCM
-#if ROCM_VERSION >= 60000
-+#ifdef HIPBLASLT
- #include <hipblaslt/hipblaslt-ext.hpp>
- #endif
- // until hipblas has an API to accept flags, we must use rocblas here
-@@ -781,7 +781,7 @@ void gemm<at::BFloat16>(CUDABLAS_GEMM_ARGTYPES(at::BFloat16)) {
-   }
- }
- 
-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
-+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
- 
- #if defined(USE_ROCM) && ROCM_VERSION >= 50700 && ROCM_VERSION < 60000
- // only for rocm 5.7 where we first supported hipblaslt, it was difficult
-@@ -912,6 +912,7 @@ class CuBlasLtMatmulPreference : public CuBlasLtDescriptor<
- };
- } // namespace
- 
-+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
- template <typename Dtype>
- void gemm_and_bias(
-     bool transpose_mat1,
-@@ -1124,7 +1125,7 @@ template void gemm_and_bias(
-     at::BFloat16* result_ptr,
-     int64_t result_ld,
-     GEMMAndBiasActivationEpilogue activation);
-
-+#endif
- void scaled_gemm(
-     char transa,
-     char transb,
-diff --git a/aten/src/ATen/cuda/CUDABlas.h b/aten/src/ATen/cuda/CUDABlas.h
-index eb12bb350c5..068607467dd 100644
--- a/aten/src/ATen/cuda/CUDABlas.h
-+++ b/aten/src/ATen/cuda/CUDABlas.h
-@@ -82,7 +82,7 @@ void gemm_internal<at::Half>(CUDABLAS_GEMM_ARGTYPES(at::Half));
- template <>
- void gemm_internal<at::BFloat16>(CUDABLAS_GEMM_ARGTYPES(at::BFloat16));
- 
-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
-+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
- enum GEMMAndBiasActivationEpilogue {
-   None,
-   RELU,
-diff --git a/aten/src/ATen/cuda/CUDAContextLight.h b/aten/src/ATen/cuda/CUDAContextLight.h
-index 4ec35f59a21..e28dc42034f 100644
--- a/aten/src/ATen/cuda/CUDAContextLight.h
-+++ b/aten/src/ATen/cuda/CUDAContextLight.h
-@@ -9,7 +9,7 @@
- 
- // cublasLT was introduced in CUDA 10.1 but we enable only for 11.1 that also
- // added bf16 support
-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
-+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
- #include <cublasLt.h>
- #endif
- 
-@@ -82,7 +82,7 @@ TORCH_CUDA_CPP_API c10::Allocator* getCUDADeviceAllocator();
- /* Handles */
- TORCH_CUDA_CPP_API cusparseHandle_t getCurrentCUDASparseHandle();
- TORCH_CUDA_CPP_API cublasHandle_t getCurrentCUDABlasHandle();
-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
-+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
- TORCH_CUDA_CPP_API cublasLtHandle_t getCurrentCUDABlasLtHandle();
- #endif
- 
-diff --git a/aten/src/ATen/cuda/CublasHandlePool.cpp b/aten/src/ATen/cuda/CublasHandlePool.cpp
-index 6913d2cd95e..3d4276be372 100644
--- a/aten/src/ATen/cuda/CublasHandlePool.cpp
-+++ b/aten/src/ATen/cuda/CublasHandlePool.cpp
-@@ -29,7 +29,7 @@ namespace at::cuda {
- 
- namespace {
- 
-#if defined(USE_ROCM) && ROCM_VERSION >= 50700
-+#if defined(USE_ROCM) && defined(HIPBLASLT)
- void createCublasLtHandle(cublasLtHandle_t *handle) {
-   TORCH_CUDABLAS_CHECK(cublasLtCreate(handle));
- }
-@@ -190,7 +190,7 @@ cublasHandle_t getCurrentCUDABlasHandle() {
-   return handle;
- }
- 
-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
-+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
- cublasLtHandle_t getCurrentCUDABlasLtHandle() {
- #ifdef USE_ROCM
-   c10::DeviceIndex device = 0;
-diff --git a/aten/src/ATen/cuda/tunable/TunableGemm.h b/aten/src/ATen/cuda/tunable/TunableGemm.h
-index 3ba0d761277..dde1870cfbf 100644
--- a/aten/src/ATen/cuda/tunable/TunableGemm.h
-+++ b/aten/src/ATen/cuda/tunable/TunableGemm.h
-@@ -11,7 +11,7 @@
- 
- #include <ATen/cuda/tunable/GemmCommon.h>
- #ifdef USE_ROCM
-#if ROCM_VERSION >= 50700
-+#ifdef HIPBLASLT
- #include <ATen/cuda/tunable/GemmHipblaslt.h>
- #endif
- #include <ATen/cuda/tunable/GemmRocblas.h>
-@@ -166,7 +166,7 @@ class GemmTunableOp : public TunableOp<GemmParams<T>, StreamTimer> {
-     }
- #endif
- 
-#if defined(USE_ROCM) && ROCM_VERSION >= 50700
-+#if defined(USE_ROCM) && defined(HIPBLASLT)
-     static const char *env = std::getenv("PYTORCH_TUNABLEOP_HIPBLASLT_ENABLED");
-     if (env == nullptr || strcmp(env, "1") == 0) {
-       // disallow tuning of hipblaslt with c10::complex
-@@ -240,7 +240,7 @@ class GemmStridedBatchedTunableOp : public TunableOp<GemmStridedBatchedParams<T>
-     }
- #endif
- 
-#if defined(USE_ROCM) && ROCM_VERSION >= 50700
-+#if defined(USE_ROCM) && defined(HIPBLASLT)
-     static const char *env = std::getenv("PYTORCH_TUNABLEOP_HIPBLASLT_ENABLED");
-     if (env == nullptr || strcmp(env, "1") == 0) {
-       // disallow tuning of hipblaslt with c10::complex
-diff --git a/aten/src/ATen/native/cuda/Blas.cpp b/aten/src/ATen/native/cuda/Blas.cpp
-index 29e5c5e3cf1..df56f3d7f1d 100644
--- a/aten/src/ATen/native/cuda/Blas.cpp
-+++ b/aten/src/ATen/native/cuda/Blas.cpp
-@@ -155,7 +155,7 @@ enum class Activation {
-   GELU,
- };
- 
-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
-+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
- cuda::blas::GEMMAndBiasActivationEpilogue activation_to_gemm_and_blas_arg(Activation a) {
-   switch (a) {
-     case Activation::None:
-@@ -193,6 +193,7 @@ static bool getDisableAddmmCudaLt() {
- 
- #ifdef USE_ROCM
- static bool isSupportedHipLtROCmArch(int index) {
-+#if defined(HIPBLASLT)
-     hipDeviceProp_t* prop = at::cuda::getDeviceProperties(index);
-     std::string device_arch = prop->gcnArchName;
-     static const std::vector<std::string> archs = {"gfx90a", "gfx940", "gfx941", "gfx942"};
-@@ -203,6 +204,7 @@ static bool isSupportedHipLtROCmArch(int index) {
-         }
-     }
-     TORCH_CHECK(false, "Attempting to use hipBLASLt on a unsupported architecture!");
-+#endif
-     return false;
- }
- #endif
-@@ -228,7 +230,7 @@ Tensor& addmm_out_cuda_impl(Tensor& result, const Tensor& self, const Tensor& ma
-   at::ScalarType scalar_type = self.scalar_type();
-   c10::MaybeOwned<Tensor> self_;
-   if (&result != &self) {
-#if (defined(CUDA_VERSION) && CUDA_VERSION >= 11040 && !defined(_MSC_VER)) || defined(USE_ROCM) && ROCM_VERSION >= 50700
-+#if (defined(CUDA_VERSION) && CUDA_VERSION >= 11040 && !defined(_MSC_VER)) || defined(USE_ROCM) && defined(HIPBLASLT)
-     // Strangely, if mat2 has only 1 row or column, we get
-     // CUBLAS_STATUS_INVALID_VALUE error from cublasLtMatmulAlgoGetHeuristic.
-     // self.dim() == 1 && result.dim() == 2 && self.sizes()[0] == mat2_sizes[1]
-@@ -271,7 +273,7 @@ Tensor& addmm_out_cuda_impl(Tensor& result, const Tensor& self, const Tensor& ma
-     }
-     self__sizes = self_->sizes();
-   } else {
-#if defined(USE_ROCM) && ROCM_VERSION >= 50700
-+#if defined(USE_ROCM) && defined(HIPBLASLT)
-     useLtInterface = !disable_addmm_cuda_lt &&
-         result.dim() == 2 && result.is_contiguous() &&
-         isSupportedHipLtROCmArch(self.device().index()) &&
-@@ -322,7 +324,7 @@ Tensor& addmm_out_cuda_impl(Tensor& result, const Tensor& self, const Tensor& ma
- 
-   TORCH_INTERNAL_ASSERT_DEBUG_ONLY(!args.result->is_conj());
- 
-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
-+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
-   if (useLtInterface) {
-     AT_DISPATCH_FLOATING_TYPES_AND2(
-         at::ScalarType::Half,
-@@ -876,7 +878,7 @@ _scaled_mm_out_cuda(const Tensor& mat1, const Tensor& mat2,
-   at::native::resize_output(out, {mat1_sizes[0], mat2_sizes[1]});
-   at::native::resize_output(amax, {});
- 
-#if !defined(USE_ROCM) && !defined(_MSC_VER) || (defined(USE_ROCM) && ROCM_VERSION >= 60000)
-+#if !defined(USE_ROCM) && !defined(_MSC_VER) || (defined(USE_ROCM) && defined(HIPBLASLT))
-   cublasCommonArgs args(mat1, mat2, out);
-   const auto out_dtype_ = args.result->scalar_type();
-   TORCH_CHECK(args.transa == 't' && args.transb == 'n', "Only multiplication of row-major and column-major matrices is supported by cuBLASLt");
-@@ -906,7 +908,7 @@ _scaled_mm_out_cuda(const Tensor& mat1, const Tensor& mat2,
-   TORCH_CHECK(false, "_scaled_mm_out_cuda is not compiled for this platform.");
- #endif
- 
-#if defined(USE_ROCM) && ROCM_VERSION >= 60000
-+#if defined(USE_ROCM) && defined(HIPBLASLT)
-   // rocm's hipblaslt does not yet support amax, so calculate separately
-   auto out_float32 = out.to(kFloat);
-   out_float32.abs_();
-diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
-index b7ffbeb07dc..2b6c3678984 100644
--- a/cmake/Dependencies.cmake
-+++ b/cmake/Dependencies.cmake
-@@ -1273,6 +1273,9 @@ if(USE_ROCM)
-     if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "6.0.0")
-       list(APPEND HIP_CXX_FLAGS -DHIPBLAS_V2)
-     endif()
-+    if(hipblast_FOUND)
-+      list(APPEND HIP_CXX_FLAGS -DHIPBLASLT)
-+    endif()
-     if(HIPBLASLT_CUSTOM_DATA_TYPE)
-       list(APPEND HIP_CXX_FLAGS -DHIPBLASLT_CUSTOM_DATA_TYPE)
-     endif()
-diff --git a/cmake/public/LoadHIP.cmake b/cmake/public/LoadHIP.cmake
-index f6ca263c5e5..53eb0b63c1a 100644
--- a/cmake/public/LoadHIP.cmake
-+++ b/cmake/public/LoadHIP.cmake
-@@ -156,7 +156,7 @@ if(HIP_FOUND)
-   find_package_and_print_version(rocblas REQUIRED)
-   find_package_and_print_version(hipblas REQUIRED)
-   if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "5.7.0")
-    find_package_and_print_version(hipblaslt REQUIRED)
-+    find_package_and_print_version(hipblaslt)
-   endif()
-   find_package_and_print_version(miopen REQUIRED)
-   if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "4.1.0")
-@@ -191,7 +191,7 @@ if(HIP_FOUND)
-   # roctx is part of roctracer
-   find_library(ROCM_ROCTX_LIB roctx64 HINTS ${ROCM_PATH}/lib)
- 
-  if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "5.7.0")
-+  if(hipblastlt_FOUND)
-     # check whether hipblaslt is using its own datatype
-     set(file "${PROJECT_BINARY_DIR}/hipblaslt_test_data_type.cc")
-     file(WRITE ${file} ""
-- 
-2.43.2
-
--- a/0001-Reenable-dim-for-python-3.12.patch
+++ b/0001-Reenable-dim-for-python-3.12.patch
@ -1,115 +0,0 @@
-From ee3fb343a376cdba6f4ce188cac90023f13e2aea Mon Sep 17 00:00:00 2001
-From: Tom Rix <trix@redhat.com>
-Date: Thu, 4 Apr 2024 14:21:38 -0600
-Subject: [PATCH] Reenable dim for python 3.12
-
-In 3.12:
-
-_PyArg_Parser added an element to the start of the structure.
-So existing positional initialization is off.  Switch to element
-initialization.
-
-_Py_CODEUNIT changed to from an int to a union, but relevant_op
-is passed an int for the return of decoder.opcode, so the parameter
-type is wrong, switch it to int.
-
-The opcode PRECALL was removed, so reduce its handling to 3.11
-
-Signed-off-by: Tom Rix <trix@redhat.com>
---
- functorch/csrc/dim/dim.cpp     | 24 +++++-------------------
- functorch/csrc/dim/minpybind.h |  4 ++--
- 2 files changed, 7 insertions(+), 21 deletions(-)
-
-diff --git a/functorch/csrc/dim/dim.cpp b/functorch/csrc/dim/dim.cpp
-index 4cc027504c77..e48b0d58081f 100644
--- a/functorch/csrc/dim/dim.cpp
-+++ b/functorch/csrc/dim/dim.cpp
-@@ -6,20 +6,6 @@
- 
- #include <torch/csrc/utils/python_compat.h>
- 
-
-// Many APIs have changed/don't exist anymore
-#if IS_PYTHON_3_12_PLUS
-
-#include "dim.h"
-
-// Re-enable this some day
-PyObject* Dim_init() {
-    PyErr_SetString(PyExc_RuntimeError, "First class dim doesn't work with python 3.12");
-    return nullptr;
-}
-
-#else
-
- #include "minpybind.h"
- #include <frameobject.h>
- #include <opcode.h>
-@@ -441,7 +427,7 @@ static PyObject* DimList_bind(DimList *self,
-     PY_BEGIN
-     mpy::handle sizes;
-     static const char * const _keywords[] = {"sizes", nullptr};
-    static _PyArg_Parser parser = {"O", _keywords, 0};
-+    static _PyArg_Parser parser = { .format = "O", .keywords = _keywords};
-     if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &parser, &sizes)) {
-         return nullptr;
-     }
-@@ -465,7 +451,7 @@ static PyObject* DimList_bind_len(DimList *self,
-     PY_BEGIN
-     int size;
-     static const char * const _keywords[] = {"N", nullptr};
-    static _PyArg_Parser parser = {"i", _keywords, 0};
-+    static _PyArg_Parser parser = { .format = "i", .keywords = _keywords};
-     if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &parser, &size)) {
-         return nullptr;
-     }
-@@ -1468,7 +1454,7 @@ PyTypeObject Tensor::Type = {
- 
- // dim() --------------------
- 
-static bool relevant_op(_Py_CODEUNIT c) {
-+static bool relevant_op(int c) {
-     switch(c) {
-         case STORE_NAME:
-         case STORE_GLOBAL:
-@@ -1587,7 +1573,7 @@ static PyObject* _dims(PyObject *self,
-     auto c = mpy::obj<PyCodeObject>::steal(PyFrame_GetCode(f.ptr()));
-     auto lasti = PyFrame_GetLasti(f.ptr());
-     auto decoder = PyInstDecoder(c.ptr(), lasti);
-    #if IS_PYTHON_3_11_PLUS
-+    #if IS_PYTHON_3_11
-     // When py3.11 adapts bytecode lasti points to the precall
-     // rather than the call instruction after it
-     if (decoder.opcode() == PRECALL) {
-@@ -3268,4 +3254,4 @@ PyObject* Dim_init() {
-     }
- }
- 
-#endif
-+
-diff --git a/functorch/csrc/dim/minpybind.h b/functorch/csrc/dim/minpybind.h
-index de82b5af95a4..d76d4828bf80 100644
--- a/functorch/csrc/dim/minpybind.h
-+++ b/functorch/csrc/dim/minpybind.h
-@@ -621,7 +621,7 @@ struct vector_args {
-             PyObject *dummy = NULL;
-             _PyArg_ParseStackAndKeywords((PyObject*const*)args, nargs, kwnames.ptr(), _parser, &dummy, &dummy, &dummy, &dummy, &dummy);
- #else
-            _PyArg_Parser* _parser = new _PyArg_Parser{NULL, &names_buf[0], fname_cstr, 0};
-+            _PyArg_Parser* _parser = new _PyArg_Parser{ .keywords = &names_buf[0], .fname = fname_cstr};
-             std::unique_ptr<PyObject*[]> buf(new PyObject*[names.size()]);
-             _PyArg_UnpackKeywords((PyObject*const*)args, nargs, NULL, kwnames.ptr(), _parser, required, (Py_ssize_t)values.size() - kwonly, 0, &buf[0]);
- #endif
-@@ -706,7 +706,7 @@ inline object handle::call_vector(vector_args args) {
- #define MPY_PARSE_ARGS_KWNAMES(fmt, FORALL_ARGS) \
-     static const char * const kwlist[] = { FORALL_ARGS(MPY_ARGS_NAME) nullptr}; \
-     FORALL_ARGS(MPY_ARGS_DECLARE) \
-    static _PyArg_Parser parser = {fmt, kwlist, 0}; \
-+    static _PyArg_Parser parser = { .format = fmt, .keywords = kwlist}; \
-     if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &parser, FORALL_ARGS(MPY_ARGS_POINTER) nullptr)) { \
-         throw mpy::exception_set(); \
-     }
-- 
-2.44.0
-
--- a/0001-Regenerate-flatbuffer-header.patch
+++ b/0001-Regenerate-flatbuffer-header.patch
@ -1,39 +0,0 @@
-From 5b8e51b24513fa851eeff42f23d942bde301e321 Mon Sep 17 00:00:00 2001
-From: Tom Rix <trix@redhat.com>
-Date: Fri, 29 Sep 2023 06:19:29 -0700
-Subject: [PATCH] Regenerate flatbuffer header
-
-For this error
-torch/csrc/jit/serialization/mobile_bytecode_generated.h:12:41:
-error: static assertion failed: Non-compatible flatbuffers version included
-   12 |               FLATBUFFERS_VERSION_MINOR == 3 &&
-
-PyTorch is expecting 23.3.3, what f38 has
-Rawhide is at 23.5.26
-
-Regenerate with
-flatc --cpp --gen-mutable --no-prefix --scoped-enums mobile_bytecode.fbs
-
-Signed-off-by: Tom Rix <trix@redhat.com>
---
- torch/csrc/jit/serialization/mobile_bytecode_generated.h | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/torch/csrc/jit/serialization/mobile_bytecode_generated.h b/torch/csrc/jit/serialization/mobile_bytecode_generated.h
-index cffe8bc7a6..83575e4c19 100644
--- a/torch/csrc/jit/serialization/mobile_bytecode_generated.h
-+++ b/torch/csrc/jit/serialization/mobile_bytecode_generated.h
-@@ -9,8 +9,8 @@
- // Ensure the included flatbuffers.h is the same version as when this file was
- // generated, otherwise it may not be compatible.
- static_assert(FLATBUFFERS_VERSION_MAJOR == 23 &&
-              FLATBUFFERS_VERSION_MINOR == 3 &&
-              FLATBUFFERS_VERSION_REVISION == 3,
-+              FLATBUFFERS_VERSION_MINOR == 5 &&
-+              FLATBUFFERS_VERSION_REVISION == 26,
-              "Non-compatible flatbuffers version included");
- 
- namespace torch {
-- 
-2.43.0
-
--- a/0001-Stub-in-kineto-ActivityType.patch
+++ b/0001-Stub-in-kineto-ActivityType.patch
@ -1,73 +0,0 @@
-From 3ef82b814179da571b2478f61d4279717ab0b23a Mon Sep 17 00:00:00 2001
-From: Tom Rix <trix@redhat.com>
-Date: Fri, 29 Sep 2023 06:25:23 -0700
-Subject: [PATCH] Stub in kineto ActivityType
-
-There is an error with kineto is not used, the shim still
-requires the ActivityTYpe.h header to get the enum Activity type.
-So cut-n-paste just enough of the header in to do this.
-
-Signed-off-by: Tom Rix <trix@redhat.com>
---
- torch/csrc/profiler/kineto_shim.h | 44 +++++++++++++++++++++++++++++++
- 1 file changed, 44 insertions(+)
-
-diff --git a/torch/csrc/profiler/kineto_shim.h b/torch/csrc/profiler/kineto_shim.h
-index e92cbf003d..68985ab7d0 100644
--- a/torch/csrc/profiler/kineto_shim.h
-+++ b/torch/csrc/profiler/kineto_shim.h
-@@ -12,7 +12,51 @@
- #undef USE_KINETO
- #endif
- 
-+#ifdef USE_KINETO
- #include <ActivityType.h>
-+#else
-+namespace libkineto {
-+// copied from header
-+/*
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-+ * All rights reserved.
-+ *
-+ * This source code is licensed under the BSD-style license found in the
-+ * LICENSE file in the root directory of this source tree.
-+ */
-+
-+// Note : All activity types are not enabled by default. Please add them
-+// at correct position in the enum
-+enum class ActivityType {
-+    // Activity types enabled by default
-+    CPU_OP = 0, // cpu side ops
-+    USER_ANNOTATION,
-+    GPU_USER_ANNOTATION,
-+    GPU_MEMCPY,
-+    GPU_MEMSET,
-+    CONCURRENT_KERNEL, // on-device kernels
-+    EXTERNAL_CORRELATION,
-+    CUDA_RUNTIME, // host side cuda runtime events
-+    CUDA_DRIVER, // host side cuda driver events
-+    CPU_INSTANT_EVENT, // host side point-like events
-+    PYTHON_FUNCTION,
-+    OVERHEAD, // CUPTI induced overhead events sampled from its overhead API.
-+
-+    // Optional Activity types
-+    CUDA_SYNC, // synchronization events between runtime and kernels
-+    GLOW_RUNTIME, // host side glow runtime events
-+    MTIA_RUNTIME, // host side MTIA runtime events
-+    CUDA_PROFILER_RANGE, // CUPTI Profiler range for performance metrics
-+    MTIA_CCP_EVENTS, // MTIA ondevice CCP events
-+    HPU_OP, // HPU host side runtime event
-+    XPU_RUNTIME, // host side xpu runtime events
-+
-+    ENUM_COUNT, // This is to add buffer and not used for any profiling logic. Add your new type before it.
-+    OPTIONAL_ACTIVITY_TYPE_START = CUDA_SYNC,
-+};
-+}
-+
-+#endif
- 
- #include <torch/csrc/Export.h>
- #include <torch/csrc/profiler/api.h>
-- 
-2.43.0
-
--- a/0001-can-not-use-with-c-files.patch
+++ b/0001-can-not-use-with-c-files.patch
@ -1,25 +0,0 @@
-From a5dff521691a17701b5a02ec75e84cfe1bf605f7 Mon Sep 17 00:00:00 2001
-From: Tom Rix <trix@redhat.com>
-Date: Sat, 3 Feb 2024 06:41:49 -0500
-Subject: [PATCH] can not use with c files
-
---
- cmake/Dependencies.cmake | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
-index 4dd8042058..5f91f3ffab 100644
--- a/cmake/Dependencies.cmake
-+++ b/cmake/Dependencies.cmake
-@@ -1269,7 +1269,7 @@ if(USE_ROCM)
-     list(APPEND HIP_CXX_FLAGS -Wno-duplicate-decl-specifier)
-     list(APPEND HIP_CXX_FLAGS -DCAFFE2_USE_MIOPEN)
-     list(APPEND HIP_CXX_FLAGS -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_HIP)
-    list(APPEND HIP_CXX_FLAGS -std=c++17)
-+#    list(APPEND HIP_CXX_FLAGS -std=c++17)
-     if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "6.0.0")
-       list(APPEND HIP_CXX_FLAGS -DHIPBLAS_V2)
-     endif()
-- 
-2.43.0
-
--- a/0001-cuda-hip-signatures.patch
+++ b/0001-cuda-hip-signatures.patch
@ -1,42 +0,0 @@
-From 214dc959acc809e1959643272c344ee5335d5a69 Mon Sep 17 00:00:00 2001
-From: Tom Rix <trix@redhat.com>
-Date: Thu, 1 Feb 2024 11:29:47 -0500
-Subject: [PATCH] cuda - hip signatures
-
---
- aten/src/ATen/cuda/detail/LazyNVRTC.cpp | 9 +++++++++
- 1 file changed, 9 insertions(+)
-
-diff --git a/aten/src/ATen/cuda/detail/LazyNVRTC.cpp b/aten/src/ATen/cuda/detail/LazyNVRTC.cpp
-index 1b85e7776e..bb6f88783a 100644
--- a/aten/src/ATen/cuda/detail/LazyNVRTC.cpp
-+++ b/aten/src/ATen/cuda/detail/LazyNVRTC.cpp
-@@ -134,8 +134,13 @@ nvrtcResult nvrtcCreateProgram(nvrtcProgram *prog,
-                                const char *src,
-                                const char *name,
-                                int numHeaders,
-+#if !defined(USE_ROCM)
-                                const char * const *headers,
-                                const char * const *includeNames) {
-+#else
-+                               const char **headers,
-+			       const char **includeNames) {
-+#endif
-   auto fn = reinterpret_cast<decltype(&nvrtcCreateProgram)>(getNVRTCLibrary().sym(__func__));
-   if (!fn)
-     throw std::runtime_error("Can't get nvrtcCreateProgram");
-@@ -150,7 +155,11 @@ NVRTC_STUB2(nvrtcGetPTX, nvrtcProgram, char *);
- NVRTC_STUB2(nvrtcGetCUBINSize, nvrtcProgram, size_t *);
- NVRTC_STUB2(nvrtcGetCUBIN, nvrtcProgram, char *);
- #endif
-+#if !defined(USE_ROCM)
- NVRTC_STUB3(nvrtcCompileProgram, nvrtcProgram, int, const char * const *);
-+#else
-+NVRTC_STUB3(nvrtcCompileProgram, nvrtcProgram, int, const char **);
-+#endif
- _STUB_1(NVRTC, nvrtcGetErrorString, const char *, nvrtcResult);
- NVRTC_STUB2(nvrtcGetProgramLogSize,nvrtcProgram, size_t*);
- NVRTC_STUB2(nvrtcGetProgramLog, nvrtcProgram, char *);
-- 
-2.43.0
-
--- a/0001-disable-submodule-search.patch
+++ b/0001-disable-submodule-search.patch
@ -1,25 +0,0 @@
-From e0b0ea90ecc0dbefc6aef2650e88ba88260935b9 Mon Sep 17 00:00:00 2001
-From: Tom Rix <trix@redhat.com>
-Date: Fri, 29 Sep 2023 17:21:13 -0700
-Subject: [PATCH] disable submodule search
-
---
- setup.py | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/setup.py b/setup.py
-index 0fd886d945..e397df8fb6 100644
--- a/setup.py
-+++ b/setup.py
-@@ -458,7 +458,7 @@ def mirror_files_into_torchgen():
- def build_deps():
-     report("-- Building version " + version)
- 
-    check_submodules()
-+    # check_submodules()
-     check_pydep("yaml", "pyyaml")
- 
-     build_caffe2(
-- 
-2.43.0
-
--- a/0001-disable-use-of-aotriton.patch
+++ b/0001-disable-use-of-aotriton.patch
@ -1,46 +0,0 @@
-From 33d48f71db7530f00dbd8cff281b65aa8b355b2a Mon Sep 17 00:00:00 2001
-From: Tom Rix <trix@redhat.com>
-Date: Tue, 19 Mar 2024 11:32:37 -0400
-Subject: [PATCH] disable use of aotriton
-
---
- aten/src/ATen/native/transformers/cuda/sdp_utils.cpp | 6 ++++++
- 1 file changed, 6 insertions(+)
-
-diff --git a/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp b/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp
-index 96b839820efd..2d3dd0cb4b0f 100644
--- a/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp
-+++ b/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp
-@@ -21,9 +21,11 @@
- #include <cmath>
- #include <functional>
- 
-+#ifdef USE_FLASH_ATTENTION
- #if USE_ROCM
- #include <aotriton/flash.h>
- #endif
-+#endif
- 
- /**
- * Note [SDPA Runtime Dispatch]
-@@ -183,6 +185,7 @@ bool check_sm_version(cudaDeviceProp * dprops) {
- }
- 
- bool check_flash_attention_hardware_support(sdp_params const& params, bool debug) {
-+#ifdef USE_FLASH_ATTENTION
-   // Check that the gpu is capable of running flash attention
-   using sm80 = SMVersion<8, 0>;
-   using sm90 = SMVersion<9, 0>;
-@@ -211,6 +214,9 @@ bool check_flash_attention_hardware_support(sdp_params const& params, bool debug
-   }
- #endif
-   return true;
-+#else
-+  return false;
-+#endif
- }
- 
- bool check_mem_efficient_hardware_support(sdp_params const& params, bool debug) {
-- 
-2.44.0
-
--- a/0001-dynamo-3.12-enable-dynamo-on-3.12-enable-most-dynamo.patch
+++ b/0001-dynamo-3.12-enable-dynamo-on-3.12-enable-most-dynamo.patch
@ -1,226 +0,0 @@
-From b9d45eb1cc90696a4de76676221219e24423c709 Mon Sep 17 00:00:00 2001
-From: William Wen <williamwen@meta.com>
-Date: Wed, 3 Apr 2024 17:58:46 -0700
-Subject: [PATCH] [dynamo, 3.12] enable dynamo on 3.12, enable most dynamo
- unittests on 3.12 (#123216)
-
-Pull Request resolved: https://github.com/pytorch/pytorch/pull/123216
-Approved by: https://github.com/jansel, https://github.com/malfet
---
- test/dynamo/test_autograd_function.py   |  3 ++
- test/dynamo/test_misc.py                | 63 +++++++++++++++++++++++++
- test/functorch/test_eager_transforms.py |  7 ++-
- test/run_test.py                        |  3 --
- torch/__init__.py                       |  5 +-
- torch/_dynamo/eval_frame.py             |  4 +-
- torch/_dynamo/test_case.py              |  8 +---
- 7 files changed, 74 insertions(+), 19 deletions(-)
-
-diff --git a/test/dynamo/test_autograd_function.py b/test/dynamo/test_autograd_function.py
-index d23fec607afa..bc5ebc767038 100644
--- a/test/dynamo/test_autograd_function.py
-+++ b/test/dynamo/test_autograd_function.py
-@@ -2,6 +2,8 @@
- 
- import copy
- import math
-+import sys
-+import unittest
- 
- import torch
- 
-@@ -528,6 +530,7 @@ class AutogradFunctionTests(torch._dynamo.test_case.TestCase):
-     # I pulled all of these test cases from test_autograd.py
-     # In the future, we should make the Dynamo test suite actually
-     # run on test_autograd.py (it's disabled right now) and delete these.
-+    @unittest.skipIf(sys.version_info >= (3, 12), "invalid free in 3.12+")
-     def test_smoke_from_test_autograd(self):
-         class Func(torch.autograd.Function):
-             @staticmethod
-diff --git a/test/dynamo/test_misc.py b/test/dynamo/test_misc.py
-index a73de8b1c7e9..8f54e0564e6b 100644
--- a/test/dynamo/test_misc.py
-+++ b/test/dynamo/test_misc.py
-@@ -9760,6 +9760,69 @@ fn
-             lambda mod: mod,
-         )
- 
-+    @xfailIfPy311
-+    def test_outside_linear_module_free(self):
-+        # Compared to test_linear_module_free, the linear
-+        # layer is not the code object that is directly compiled.
-+        def model_inp_ctr():
-+            fc = torch.nn.Linear(100, 100)
-+
-+            class Mod(torch.nn.Module):
-+                def __init__(self):
-+                    super().__init__()
-+                    self.fc_ref = fc
-+
-+                def forward(self, x):
-+                    return fc(x[0])
-+
-+            # return fc to keep it alive in _test_compile_model_free
-+            return Mod(), (torch.randn(100, 100), fc)
-+
-+        self._test_compile_model_free(model_inp_ctr, lambda mod: mod.fc_ref)
-+
-+    @unittest.skipIf(sys.version_info >= (3, 12), "leaks in 3.12+")
-+    def test_parameter_free(self):
-+        def model_inp_ctr():
-+            param = torch.nn.Parameter(torch.randn(100, 100))
-+
-+            class Mod(torch.nn.Module):
-+                def __init__(self):
-+                    super().__init__()
-+                    self.param = param
-+
-+                def forward(self, x):
-+                    return self.param * x[0]
-+
-+            # return param to keep it alive in _test_compile_model_free
-+            return Mod(), (torch.randn(100, 100), param)
-+
-+        self._test_compile_model_free(model_inp_ctr, lambda mod: mod.param)
-+
-+    def test_raises_importerror1(self):
-+        @torch.compile(backend="eager")
-+        def fn(x):
-+            try:
-+                import some_module_that_surely_does_not_exist
-+
-+                return
-+            except ImportError:
-+                pass
-+            return x.sin()
-+
-+        x = torch.randn(8)
-+        self.assertEqual(fn(x), x.sin())
-+
-+    def test_raises_importerror2(self):
-+        @torch.compile(backend="eager")
-+        def fn(x):
-+            import some_module_that_surely_does_not_exist
-+
-+            return x + 1
-+
-+        x = torch.randn(8)
-+        with self.assertRaises(ImportError):
-+            fn(x)
-+
-     def test_dynamo_cache_move_to_front(self):
-         class Mod(torch.nn.Module):
-             def __init__(self):
-diff --git a/test/functorch/test_eager_transforms.py b/test/functorch/test_eager_transforms.py
-index 09415cf8f48e..60790ec06059 100644
--- a/test/functorch/test_eager_transforms.py
-+++ b/test/functorch/test_eager_transforms.py
-@@ -4762,8 +4762,7 @@ class TestCompileTransforms(TestCase):
-     # Triton only supports GPU with SM70 or later.
-     @expectedFailureIf((IS_ARM64 and not IS_MACOS) or
-                        IS_WINDOWS or
-                       (TEST_CUDA and not SM70OrLater) or
-                       (sys.version_info >= (3, 12)))
-+                       (TEST_CUDA and not SM70OrLater))
-     def test_compile_vmap_hessian(self, device):
-         # The model and inputs are a smaller version
-         # of code at benchmark repo:
-@@ -4792,8 +4791,8 @@ class TestCompileTransforms(TestCase):
-         actual = opt_fn(params_and_buffers, x)
-         self.assertEqual(actual, expected)
- 
-    # torch.compile is not supported on Windows or on Python 3.12+
-    @expectedFailureIf(IS_WINDOWS or (sys.version_info >= (3, 12)))
-+    # torch.compile is not supported on Windows
-+    @expectedFailureIf(IS_WINDOWS)
-     @torch._dynamo.config.patch(suppress_errors=False)
-     @torch._dynamo.config.patch(capture_func_transforms=True)
-     @skipIfTorchDynamo("Do not test torch.compile on top of torch.compile")
-diff --git a/test/run_test.py b/test/run_test.py
-index e86af9623042..ebb14df4167d 100755
--- a/test/run_test.py
-+++ b/test/run_test.py
-@@ -74,7 +74,6 @@ sys.path.remove(str(REPO_ROOT))
- RERUN_DISABLED_TESTS = os.getenv("PYTORCH_TEST_RERUN_DISABLED_TESTS", "0") == "1"
- DISTRIBUTED_TEST_PREFIX = "distributed"
- INDUCTOR_TEST_PREFIX = "inductor"
-DYNAMO_TEST_PREFIX = "dynamo"
- 
- 
- # Note [ROCm parallel CI testing]
-@@ -324,7 +323,6 @@ JIT_EXECUTOR_TESTS = [
- ]
- 
- INDUCTOR_TESTS = [test for test in TESTS if test.startswith(INDUCTOR_TEST_PREFIX)]
-DYNAMO_TESTS = [test for test in TESTS if test.startswith(DYNAMO_TEST_PREFIX)]
- DISTRIBUTED_TESTS = [test for test in TESTS if test.startswith(DISTRIBUTED_TEST_PREFIX)]
- TORCH_EXPORT_TESTS = [test for test in TESTS if test.startswith("export")]
- FUNCTORCH_TESTS = [test for test in TESTS if test.startswith("functorch")]
-@@ -1361,7 +1359,6 @@ def get_selected_tests(options) -> List[str]:
-     # these tests failing in Python 3.12 temporarily disabling
-     if sys.version_info >= (3, 12):
-         options.exclude.extend(INDUCTOR_TESTS)
-        options.exclude.extend(DYNAMO_TESTS)
-         options.exclude.extend(
-             [
-                 "functorch/test_dims",
-diff --git a/torch/__init__.py b/torch/__init__.py
-index d381712b4a35..26cdffe81d29 100644
--- a/torch/__init__.py
-+++ b/torch/__init__.py
-@@ -1861,9 +1861,8 @@ def compile(model: Optional[Callable] = None, *,
- 
-     """
-     _C._log_api_usage_once("torch.compile")
-    # Temporary until we get proper support for python 3.12
-    if sys.version_info >= (3, 12):
-        raise RuntimeError("Dynamo is not supported on Python 3.12+")
-+    if sys.version_info >= (3, 13):
-+        raise RuntimeError("Dynamo is not supported on Python 3.13+")
- 
-     # Decorator mode
-     if model is None:
-diff --git a/torch/_dynamo/eval_frame.py b/torch/_dynamo/eval_frame.py
-index 53ab0df3a947..0a80eeea99ed 100644
--- a/torch/_dynamo/eval_frame.py
-+++ b/torch/_dynamo/eval_frame.py
-@@ -589,8 +589,8 @@ class _NullDecorator(contextlib.nullcontext):  # type: ignore[type-arg]
- 
- 
- def check_if_dynamo_supported():
-    if sys.version_info >= (3, 12):
-        raise RuntimeError("Python 3.12+ not yet supported for torch.compile")
-+    if sys.version_info >= (3, 13):
-+        raise RuntimeError("Python 3.13+ not yet supported for torch.compile")
- 
- 
- def is_dynamo_supported():
-diff --git a/torch/_dynamo/test_case.py b/torch/_dynamo/test_case.py
-index e3cbef09eaae..297ea6e2bc2a 100644
--- a/torch/_dynamo/test_case.py
-+++ b/torch/_dynamo/test_case.py
-@@ -1,7 +1,6 @@
- import contextlib
- import importlib
- import logging
-import sys
- 
- import torch
- import torch.testing
-@@ -20,12 +19,7 @@ log = logging.getLogger(__name__)
- def run_tests(needs=()):
-     from torch.testing._internal.common_utils import run_tests
- 
-    if (
-        TEST_WITH_TORCHDYNAMO
-        or IS_WINDOWS
-        or TEST_WITH_CROSSREF
-        or sys.version_info >= (3, 12)
-    ):
-+    if TEST_WITH_TORCHDYNAMO or IS_WINDOWS or TEST_WITH_CROSSREF:
-         return  # skip testing
- 
-     if isinstance(needs, str):
-- 
-2.44.0
-
--- a/0001-no-third_party-FXdiv.patch
+++ b/0001-no-third_party-FXdiv.patch
@ -1,54 +0,0 @@
-From b3b307add5724ee5730f161e16594fa702f34a19 Mon Sep 17 00:00:00 2001
-From: Tom Rix <trix@redhat.com>
-Date: Sat, 3 Feb 2024 08:20:28 -0500
-Subject: [PATCH] no third_party FXdiv
-
---
- caffe2/CMakeLists.txt | 24 ++++++++++++------------
- 1 file changed, 12 insertions(+), 12 deletions(-)
-
-diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
-index b2f3adbfae..80a5625c8d 100644
--- a/caffe2/CMakeLists.txt
-+++ b/caffe2/CMakeLists.txt
-@@ -110,15 +110,15 @@ endif()
- # Note: the folders that are being commented out have not been properly
- # addressed yet.
- 
-if(NOT MSVC AND USE_XNNPACK)
-  if(NOT TARGET fxdiv)
-    set(FXDIV_BUILD_TESTS OFF CACHE BOOL "")
-    set(FXDIV_BUILD_BENCHMARKS OFF CACHE BOOL "")
-    add_subdirectory(
-      "${FXDIV_SOURCE_DIR}"
-      "${CMAKE_BINARY_DIR}/FXdiv")
-  endif()
-endif()
-+#if(NOT MSVC AND USE_XNNPACK)
-+#  if(NOT TARGET fxdiv)
-+#    set(FXDIV_BUILD_TESTS OFF CACHE BOOL "")
-+#    set(FXDIV_BUILD_BENCHMARKS OFF CACHE BOOL "")
-+#    add_subdirectory(
-+#      "${FXDIV_SOURCE_DIR}"
-+#      "${CMAKE_BINARY_DIR}/FXdiv")
-+#  endif()
-+#endif()
- 
- add_subdirectory(core)
- add_subdirectory(serialize)
-@@ -1081,9 +1081,9 @@ if(USE_XPU)
-   target_compile_definitions(torch_xpu PRIVATE USE_XPU)
- endif()
- 
-if(NOT MSVC AND USE_XNNPACK)
-  TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv)
-endif()
-+#if(NOT MSVC AND USE_XNNPACK)
-+#  TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv)
-+#endif()
- 
- # ==========================================================
- # formerly-libtorch flags
-- 
-2.43.0
-
--- a/0001-no-third_party-fmt.patch
+++ b/0001-no-third_party-fmt.patch
@ -1,65 +0,0 @@
-From 2ce255b75760a0a513fb1706629b416f76a5c822 Mon Sep 17 00:00:00 2001
-From: Tom Rix <trix@redhat.com>
-Date: Sat, 3 Feb 2024 08:16:04 -0500
-Subject: [PATCH] no third_party fmt
-
---
- c10/CMakeLists.txt       | 2 +-
- cmake/Dependencies.cmake | 6 +++---
- torch/CMakeLists.txt     | 2 +-
- 3 files changed, 5 insertions(+), 5 deletions(-)
-
-diff --git a/c10/CMakeLists.txt b/c10/CMakeLists.txt
-index 1f742f4c176..4fa08913bdd 100644
--- a/c10/CMakeLists.txt
-+++ b/c10/CMakeLists.txt
-@@ -87,7 +87,7 @@ endif()
- if(C10_USE_GLOG)
-   target_link_libraries(c10 PUBLIC glog::glog)
- endif()
-target_link_libraries(c10 PRIVATE fmt::fmt-header-only)
-+target_link_libraries(c10 PRIVATE fmt)
- 
- if(C10_USE_NUMA)
-   message(STATUS "NUMA paths:")
-diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
-index 6f5a2d5feff..42fbf80f6e8 100644
--- a/cmake/Dependencies.cmake
-+++ b/cmake/Dependencies.cmake
-@@ -1837,7 +1837,7 @@ endif()
- #
- set(TEMP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS})
- set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libs" FORCE)
-add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)
-+# add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)
- 
- # Disable compiler feature checks for `fmt`.
- #
-@@ -1846,9 +1846,9 @@ add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)
- # CMAKE_CXX_FLAGS in ways that break feature checks. Since we already know
- # `fmt` is compatible with a superset of the compilers that PyTorch is, it
- # shouldn't be too bad to just disable the checks.
-set_target_properties(fmt-header-only PROPERTIES INTERFACE_COMPILE_FEATURES "")
-+# set_target_properties(fmt-header-only PROPERTIES INTERFACE_COMPILE_FEATURES "")
- 
-list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only)
-+# list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only)
- set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS} CACHE BOOL "Build shared libs" FORCE)
- 
- # ---[ Kineto
-diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt
-index 97a72eed55b..9e5014d1980 100644
--- a/torch/CMakeLists.txt
-+++ b/torch/CMakeLists.txt
-@@ -80,7 +80,7 @@ set(TORCH_PYTHON_LINK_LIBRARIES
-     python::python
-     pybind::pybind11
-     shm
-    fmt::fmt-header-only
-+    fmt
-     ATEN_CPU_FILES_GEN_LIB)
- 
- if(USE_ASAN AND TARGET Sanitizer::address)
-- 
-2.43.2
-
--- a/0001-no-third_party-foxi.patch
+++ b/0001-no-third_party-foxi.patch
@ -1,36 +0,0 @@
-From 8cb61cf9282102ac225645fcc9fb4a1bb7cb15a2 Mon Sep 17 00:00:00 2001
-From: Tom Rix <trix@redhat.com>
-Date: Sat, 3 Feb 2024 08:11:55 -0500
-Subject: [PATCH] no third_party foxi
-
---
- cmake/Dependencies.cmake | 6 +++---
- 1 file changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
-index 5f91f3ffab..8e1461af81 100644
--- a/cmake/Dependencies.cmake
-+++ b/cmake/Dependencies.cmake
-@@ -1567,7 +1567,7 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX)
-       set_target_properties(onnx_proto PROPERTIES CXX_STANDARD 17)
-     endif()
-   endif()
-  add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/foxi EXCLUDE_FROM_ALL)
-+  # add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/foxi EXCLUDE_FROM_ALL)
- 
-   add_definitions(-DONNX_NAMESPACE=${ONNX_NAMESPACE})
-   if(NOT USE_SYSTEM_ONNX)
-@@ -1600,8 +1600,8 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX)
-     message("-- Found onnx: ${ONNX_LIBRARY} ${ONNX_PROTO_LIBRARY}")
-     list(APPEND Caffe2_DEPENDENCY_LIBS onnx_proto onnx)
-   endif()
-  include_directories(${FOXI_INCLUDE_DIRS})
-  list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader)
-+#  include_directories(${FOXI_INCLUDE_DIRS})
-+#  list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader)
-   # Recover the build shared libs option.
-   set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS})
- endif()
-- 
-2.43.0
-
--- a/0001-reenable-foxi-linking.patch
+++ b/0001-reenable-foxi-linking.patch
@ -1,25 +0,0 @@
-From 58ccda271e8f51c3fa5b7518cf6ee52ce204fd37 Mon Sep 17 00:00:00 2001
-From: Tom Rix <trix@redhat.com>
-Date: Thu, 22 Feb 2024 09:28:11 -0500
-Subject: [PATCH] reenable foxi linking
-
---
- cmake/Dependencies.cmake | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
-index 42fbf80f6e8..bc3a2dc6fee 100644
--- a/cmake/Dependencies.cmake
-+++ b/cmake/Dependencies.cmake
-@@ -1604,7 +1604,7 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX)
-     list(APPEND Caffe2_DEPENDENCY_LIBS onnx_proto onnx)
-   endif()
- #  include_directories(${FOXI_INCLUDE_DIRS})
-#  list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader)
-+  list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader)
-   # Recover the build shared libs option.
-   set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS})
- endif()
-- 
-2.43.2
-
--- a/0001-silence-an-assert.patch
+++ b/0001-silence-an-assert.patch
@ -1,25 +0,0 @@
-From 04dd33db93b852fdfd7ea408813080b2e2026650 Mon Sep 17 00:00:00 2001
-From: Tom Rix <trix@redhat.com>
-Date: Sat, 3 Feb 2024 06:41:20 -0500
-Subject: [PATCH] silence an assert
-
---
- aten/src/ATen/native/cuda/IndexKernel.cu | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/aten/src/ATen/native/cuda/IndexKernel.cu b/aten/src/ATen/native/cuda/IndexKernel.cu
-index 657c0c77b3..b406aa6687 100644
--- a/aten/src/ATen/native/cuda/IndexKernel.cu
-+++ b/aten/src/ATen/native/cuda/IndexKernel.cu
-@@ -249,7 +249,7 @@ void index_put_kernel_quantized_cuda(TensorIterator& iter, const IntArrayRef ind
- 
-     gpu_index_kernel(iter, index_size, index_stride, [inv_scale, zero_point, qmin, qmax]C10_DEVICE(char* const out_data, const char* const in_data, const int64_t offset) {
-       int64_t qvalue = static_cast<int64_t>(zero_point + nearbyintf(*(float*)in_data * inv_scale));
-      qvalue = std::clamp(qvalue, qmin, qmax);
-+      //qvalue = std::clamp(qvalue, qmin, qmax);
-       *(scalar_t*)(out_data + offset) = static_cast<scalar_t>(qvalue);
-     });
-   });
-- 
-2.43.0
-
--- a/0001-use-any-hip.patch
+++ b/0001-use-any-hip.patch
@ -1,34 +0,0 @@
-From 4248211ce9a9de81bb3ade5d421ba709b19ead08 Mon Sep 17 00:00:00 2001
-From: Tom Rix <trix@redhat.com>
-Date: Sat, 3 Feb 2024 15:01:28 -0500
-Subject: [PATCH] use any hip
-
---
- cmake/public/LoadHIP.cmake | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/cmake/public/LoadHIP.cmake b/cmake/public/LoadHIP.cmake
-index 1abeb06228..28458c4146 100644
--- a/cmake/public/LoadHIP.cmake
-+++ b/cmake/public/LoadHIP.cmake
-@@ -30,7 +30,7 @@ endif()
- message("Building PyTorch for GPU arch: ${PYTORCH_ROCM_ARCH}")
- 
- # Add HIP to the CMAKE Module Path
-set(CMAKE_MODULE_PATH ${ROCM_PATH}/lib/cmake/hip ${CMAKE_MODULE_PATH})
-+set(CMAKE_MODULE_PATH ${ROCM_PATH}/lib64/cmake/hip ${CMAKE_MODULE_PATH})
- 
- macro(find_package_and_print_version PACKAGE_NAME)
-   find_package("${PACKAGE_NAME}" ${ARGN})
-@@ -38,7 +38,7 @@ macro(find_package_and_print_version PACKAGE_NAME)
- endmacro()
- 
- # Find the HIP Package
-find_package_and_print_version(HIP 1.0)
-+find_package_and_print_version(HIP MODULE)
- 
- if(HIP_FOUND)
-   set(PYTORCH_FOUND_HIP TRUE)
-- 
-2.43.0
-
--- a/README.NVIDIA
+++ b/README.NVIDIA
@ -0,0 +1,15 @@
+Some help for building this package for NVIDIA/CUDA
+
+Review NVIDIA's documenation
+https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html
+
+Review PyTorch documentation
+https://github.com/pytorch/pytorch#from-source
+
+Some convience strings to cut-n-paste
+
+F39
+dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/fedora39/x86_64/cuda-fedora39.repo
+
+Building is local.
+Build machine has a supported GPU, the drivers are loaded and CUDA SDK is installed.
--- a/next/0001-Use-horrible-dynamo-stub.patch
+++ b/next/0001-Use-horrible-dynamo-stub.patch
@ -0,0 +1,85 @@
+From fd535f7bf44f2034cca2a66b4cc7d68d962341df Mon Sep 17 00:00:00 2001
+From: Tom Rix <Tom.Rix@amd.com>
+Date: Sun, 20 Jul 2025 12:47:58 -0700
+Subject: [PATCH] Use horrible dynamo stub
+
+Rawhide's update of python is too fast for dynamo
+So paper of the problem with a horrible stub that throws
+runtime exceptions if dynamo is used.
+
+Signed-off-by: Tom Rix <Tom.Rix@amd.com>
+---
+ build_variables.bzl                        | 26 ++++++++++++----------
+ torch/csrc/dynamo/horrible_dynamo_stub.cpp | 16 +++++++++++++
+ 2 files changed, 30 insertions(+), 12 deletions(-)
+ create mode 100644 torch/csrc/dynamo/horrible_dynamo_stub.cpp
+
+diff --git a/build_variables.bzl b/build_variables.bzl
+index b266c80e8843..a3be6893349b 100644
+--- a/build_variables.bzl
+++ b/build_variables.bzl
+@@ -140,7 +140,8 @@ core_trainer_sources = [
+     "torch/csrc/autograd/variable.cpp",
+     "torch/csrc/autograd/utils/warnings.cpp",
+     "torch/csrc/autograd/jit_decomp_interface.cpp",
+-    "torch/csrc/dynamo/compiled_autograd.cpp",
+#    "torch/csrc/dynamo/compiled_autograd.cpp",
+    "torch/csrc/dynamo/horrible_dynamo_stub.cpp",
+     "torch/csrc/jit/frontend/name_mangler.cpp",
+     "torch/csrc/jit/ir/type_hashing.cpp",
+     "torch/csrc/jit/serialization/pickler.cpp",
+@@ -868,17 +869,18 @@ libtorch_python_core_sources = [
+     "torch/csrc/autograd/python_torch_functions_manual.cpp",
+     "torch/csrc/autograd/python_variable.cpp",
+     "torch/csrc/autograd/python_variable_indexing.cpp",
+-    "torch/csrc/dynamo/python_compiled_autograd.cpp",
+-    "torch/csrc/dynamo/cache_entry.cpp",
+-    "torch/csrc/dynamo/cpp_shim.cpp",
+-    "torch/csrc/dynamo/cpython_defs.c",
+-    "torch/csrc/dynamo/eval_frame.c",
+-    "torch/csrc/dynamo/eval_frame_cpp.cpp",
+-    "torch/csrc/dynamo/extra_state.cpp",
+-    "torch/csrc/dynamo/framelocals_mapping.cpp",
+-    "torch/csrc/dynamo/guards.cpp",
+-    "torch/csrc/dynamo/utils.cpp",
+-    "torch/csrc/dynamo/init.cpp",
+#    "torch/csrc/dynamo/python_compiled_autograd.cpp",
+#    "torch/csrc/dynamo/cache_entry.cpp",
+#    "torch/csrc/dynamo/cpp_shim.cpp",
+#    "torch/csrc/dynamo/cpython_defs.c",
+#    "torch/csrc/dynamo/eval_frame.c",
+#    "torch/csrc/dynamo/eval_frame_cpp.cpp",
+#    "torch/csrc/dynamo/extra_state.cpp",
+#    "torch/csrc/dynamo/framelocals_mapping.cpp",
+#    "torch/csrc/dynamo/guards.cpp",
+#    "torch/csrc/dynamo/utils.cpp",
+#    "torch/csrc/dynamo/init.cpp",
+    "torch/csrc/dynamo/horrible_dynamo_stub.cpp",
+     "torch/csrc/functorch/init.cpp",
+     "torch/csrc/fx/node.cpp",
+     "torch/csrc/mps/Module.cpp",
+diff --git a/torch/csrc/dynamo/horrible_dynamo_stub.cpp b/torch/csrc/dynamo/horrible_dynamo_stub.cpp
+new file mode 100644
+index 000000000000..3ac1324d4557
+--- /dev/null
+++ b/torch/csrc/dynamo/horrible_dynamo_stub.cpp
+@@ -0,0 +1,16 @@
+#include <torch/csrc/autograd/engine.h>
+#include <torch/csrc/dynamo/compiled_autograd.h>
+
+namespace torch::dynamo::autograd {
+const std::unique_ptr<PyCompilerInterface>& getPyCompilerInterface() {
+  throw std::runtime_error("Dynamo not supported");
+  return nullptr;
+}
+std::vector<std::optional<InputMetadata>> get_input_metadata(
+    const edge_list& edges) {
+  std::vector<std::optional<InputMetadata>> r;
+  throw std::runtime_error("Dynamo not supported");
+  return r;
+}
+
+}
+-- 
+2.49.0
+
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,46 +1,165 @@
+# Package ######################################################################
+
 [build-system]
 requires = [
-    "setuptools",
-    "wheel",
-    "astunparse",
-    "numpy",
+    # 70.1.0: min version for integrated bdist_wheel command from wheel package
+    # 77.0.0: min version for SPDX expression support for project.license
+    "setuptools>=70.1.0,<80.0",
+    "cmake>=3.27",
    "ninja",
+    "numpy",
+    "packaging",
    "pyyaml",
-    "cmake",
-    "typing-extensions",
    "requests",
+    "six",  # dependency chain: NNPACK -> PeachPy -> six
+    "typing-extensions>=4.10.0",
 ]
-# Use legacy backend to import local packages in setup.py
-build-backend = "setuptools.build_meta:__legacy__"
+build-backend = "setuptools.build_meta"

+[dependency-groups]
+dev = [
+    # This list should be kept in sync with the requirements-build.txt
+    # in PyTorch root until the project fully migrates to pyproject.toml
+    # after which this can be removed as it is already specified in the
+    # [build-system] section
+    "setuptools>=70.1.0,<80.0",  # setuptools develop deprecated on 80.0
+    "cmake>=3.27",
+    "ninja",
+    "numpy",
+    "packaging",
+    "pyyaml",
+    "requests",
+    "six",  # dependency chain: NNPACK -> PeachPy -> six
+    "typing-extensions>=4.10.0",

-[tool.black]
-# Uncomment if pyproject.toml worked fine to ensure consistency with flake8
-# line-length = 120
-target-version = ["py38", "py39", "py310", "py311"]
+    # This list should be kept in sync with the requirements.txt in
+    # PyTorch root until the project fully migrates to pyproject.toml
+    "build[uv]",
+    "expecttest>=0.3.0",
+    "filelock",
+    "fsspec>=0.8.5",
+    "hypothesis",
+    "jinja2",
+    "lintrunner; platform_machine != 's390x' and platform_machine != 'riscv64'",
+    "networkx>=2.5.1",
+    "optree>=0.13.0",
+    "psutil",
+    "sympy>=1.13.3",
+    "typing-extensions>=4.13.2",
+    "wheel",
+]

+[project]
+name = "torch"
+description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration"
+readme = "README.md"
+requires-python = ">=3.10"
+# TODO: change to `license = "BSD-3-Clause"` and enable PEP 639 after pinning setuptools>=77
+# FIXME: As of 2025.06.20, it is hard to ensure the minimum version of setuptools in our CI environment.
+# TOML-table-based license deprecated in setuptools>=77, and the deprecation warning will be changed
+# to an error on 2026.02.18. See also: https://github.com/pypa/setuptools/issues/4903
+license = { text = "BSD-3-Clause" }
+authors = [{ name = "PyTorch Team", email = "packages@pytorch.org" }]
+keywords = ["pytorch", "machine learning"]
+classifiers = [
+    "Development Status :: 5 - Production/Stable",
+    "Intended Audience :: Developers",
+    "Intended Audience :: Education",
+    "Intended Audience :: Science/Research",
+    "Topic :: Scientific/Engineering",
+    "Topic :: Scientific/Engineering :: Mathematics",
+    "Topic :: Scientific/Engineering :: Artificial Intelligence",
+    "Topic :: Software Development",
+    "Topic :: Software Development :: Libraries",
+    "Topic :: Software Development :: Libraries :: Python Modules",
+    "Programming Language :: C++",
+    "Programming Language :: Python :: 3 :: Only",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+    "Programming Language :: Python :: 3.14",
+]
+dynamic = [
+    "entry-points",
+    "dependencies",
+    "scripts",
+    "version",
+]
+
+[project.urls]
+Homepage = "https://pytorch.org"
+Repository = "https://github.com/pytorch/pytorch"
+Documentation = "https://pytorch.org/docs"
+"Issue Tracker" = "https://github.com/pytorch/pytorch/issues"
+Forum = "https://discuss.pytorch.org"
+
+[project.optional-dependencies]
+optree = ["optree>=0.13.0"]
+opt-einsum = ["opt-einsum>=3.3"]
+pyyaml = ["pyyaml"]
+
+# Linter tools #################################################################
+
+[tool.isort]
+src_paths = ["caffe2", "torch", "torchgen", "functorch", "test"]
+extra_standard_library = ["typing_extensions"]
+skip_gitignore = true
+skip_glob = ["third_party/*"]
+atomic = true
+profile = "black"
+indent = 4
+line_length = 88
+lines_after_imports = 2
+multi_line_output = 3
+include_trailing_comma = true
+combine_as_imports = true
+
+[tool.usort.known]
+first_party = ["caffe2", "torch", "torchgen", "functorch", "test"]
+standard_library = ["typing_extensions"]

 [tool.ruff]
-target-version = "py38"
+line-length = 88
+src = ["caffe2", "torch", "torchgen", "functorch", "test"]

+[tool.ruff.format]
+docstring-code-format = true
+quote-style = "double"
+
+[tool.ruff.lint]
 # NOTE: Synchoronize the ignores with .flake8
+external = [
+    "B001",
+    "B902",
+    "B950",
+    "E121",
+    "E122",
+    "E128",
+    "E131",
+    "E704",
+    "E723",
+    "F723",
+    "F812",
+    "P201",
+    "P204",
+    "T484",
+    "TOR901",
+]
 ignore = [
    # these ignores are from flake8-bugbear; please fix!
    "B007", "B008", "B017",
    "B018", # Useless expression
-    "B019",
    "B023",
    "B028", # No explicit `stacklevel` keyword argument found
-    "B904",
    "E402",
    "C408", # C408 ignored because we like the dict keyword argument syntax
    "E501", # E501 is not flexible enough, we're using B950 instead
    "E721",
-    "E731", # Assign lambda expression
    "E741",
    "EXE001",
    "F405",
-    "F841",
+    "FURB122", # writelines
    # these ignores are from flake8-logging-format; please fix!
    "G101",
    # these ignores are from ruff NPY; please fix!
@ -48,39 +167,49 @@ ignore = [
    # these ignores are from ruff PERF; please fix!
    "PERF203",
    "PERF401",
-    "PERF403",
    # these ignores are from PYI; please fix!
-    "PYI019",
    "PYI024",
    "PYI036",
    "PYI041",
    "PYI056",
    "SIM102", "SIM103", "SIM112", # flake8-simplify code styles
    "SIM105", # these ignores are from flake8-simplify. please fix or ignore with commented reason
-    "SIM108",
+    "SIM108", # SIM108 ignored because we prefer if-else-block instead of ternary expression
    "SIM110",
    "SIM114", # Combine `if` branches using logical `or` operator
    "SIM115",
    "SIM116", # Disable Use a dictionary instead of consecutive `if` statements
    "SIM117",
    "SIM118",
-    "UP006", # keep-runtime-typing
    "UP007", # keep-runtime-typing
+    "UP045", # keep-runtime-typing
+    "TC006",
+    # TODO: Remove Python-3.10 specific suppressions
+    "B905",
+    "UP035",
+    "UP036",
+    "UP038",
+    "UP041",
+    "FURB161",
 ]
-line-length = 120
 select = [
    "B",
+    "B904", # Re-raised error without specifying the cause via the from keyword
    "C4",
    "G",
    "E",
    "EXE",
    "F",
    "SIM1",
+    "SIM911",
    "W",
    # Not included in flake8
+    "FURB",
+    "LOG",
    "NPY",
    "PERF",
    "PGH004",
+    "PIE790",
    "PIE794",
    "PIE800",
    "PIE804",
@ -89,40 +218,96 @@ select = [
    "PLC0131", # type bivariance
    "PLC0132", # type param mismatch
    "PLC0205", # string as __slots__
+    "PLC3002", # unnecessary-direct-lambda-call
    "PLE",
    "PLR0133", # constant comparison
    "PLR0206", # property with params
    "PLR1722", # use sys exit
+    "PLR1736", # unnecessary list index
    "PLW0129", # assert on string literal
+    "PLW0131", # named expr without context
+    "PLW0133", # useless exception statement
+    "PLW0245", # super without brackets
    "PLW0406", # import self
    "PLW0711", # binary op exception
+    "PLW1501", # bad open mode
+    "PLW1507", # shallow copy os.environ
    "PLW1509", # preexec_fn not safe with threads
+    "PLW2101", # useless lock statement
    "PLW3301", # nested min max
    "PT006", # TODO: enable more PT rules
+    "PT014", # duplicate parameterize case
    "PT022",
    "PT023",
    "PT024",
    "PT025",
    "PT026",
    "PYI",
+    "Q003",  # avoidable escaped quote
+    "Q004",  # unnecessary escaped quote
+    "RSE",
    "RUF008", # mutable dataclass default
+    "RUF013", # ban implicit optional
    "RUF015", # access first ele in constant time
    "RUF016", # type error non-integer index
    "RUF017",
-    "TRY200",
-    "TRY302",
+    "RUF018", # no assignment in assert
+    "RUF019", # unnecessary-key-check
+    "RUF020", # never union
+    "RUF024", # from keys mutable
+    "RUF026", # default factory kwarg
+    "RUF030", # No print statement in assert
+    "RUF033", # default values __post_init__ dataclass
+    "RUF041", # simplify nested Literal
+    "RUF048", # properly parse `__version__`
+    "RUF200", # validate pyproject.toml
+    "S324", # for hashlib FIPS compliance
+    "SLOT",
+    "TC",
+    "TRY002", # ban vanilla raise (todo fix NOQAs)
+    "TRY203",
+    "TRY401", # verbose-log-message
    "UP",
+    "YTT",
 ]

-[tool.ruff.per-file-ignores]
+[tool.ruff.lint.pyupgrade]
+# Preserve types, even if a file imports `from __future__ import annotations`.
+keep-runtime-typing = true
+
+[tool.ruff.lint.per-file-ignores]
 "__init__.py" = [
    "F401",
 ]
+"*.pyi" = [
+    "PYI011", # typed-argument-default-in-stub
+    "PYI021", # docstring-in-stub
+    "PYI053", # string-or-bytes-too-long
+]
+"functorch/notebooks/**" = [
+    "F401",
+]
+"test/export/**" = [
+    "PGH004"
+]
+"test/typing/**" = [
+    "PGH004"
+]
 "test/typing/reveal/**" = [
    "F821",
 ]
 "test/torch_np/numpy_tests/**" = [
    "F821",
+    "NPY201",
+]
+"test/dynamo/test_bytecode_utils.py" = [
+    "F821",
+]
+"test/dynamo/test_debug_utils.py" = [
+    "UP037",
+]
+"test/dynamo/test_misc.py" = [
+    "PGH004",
 ]
 "test/jit/**" = [
    "PLR0133", # tests require this for JIT
@ -136,19 +321,33 @@ select = [
    "RUF015",
    "UP", # We don't want to modify the jit test as they test specify syntax
 ]
-
-"torch/onnx/**" = [
-    "UP037", # ONNX does runtime type checking
+"test/inductor/s429861_repro.py" = [
+    "PGH004",
+]
+"test/inductor/test_torchinductor.py" = [
+    "UP037",
+]
+# autogenerated #TODO figure out why file level noqa is ignored
+"torch/_appdirs.py" = ["PGH004"]
+"torch/jit/_shape_functions.py" = ["PGH004"]
+"torch/_inductor/fx_passes/serialized_patterns/**" = ["F401", "F501"]
+"torch/_inductor/autoheuristic/artifacts/**" = ["F401", "F501"]
+"torch/_inductor/codegen/**" = [
+    "PGH004"
 ]
-
 "torchgen/api/types/__init__.py" = [
    "F401",
    "F403",
 ]
-"torchgen/executorch/api/types/__init__.py" = [
-    "F401",
-    "F403",
-]
 "torch/utils/collect_env.py" = [
    "UP", # collect_env.py needs to work with older versions of Python
 ]
+"torch/_vendor/**" = [
+    "UP", # No need to mess with _vendor
+]
+"tools/linter/**" = [
+    "LOG015" # please fix
+]
+
+[tool.codespell]
+ignore-words = "tools/linter/dictionary.txt"
--- a/python-torch.spec
+++ b/python-torch.spec
@ -6,13 +6,20 @@
 # So pre releases can be tried
 %bcond_with gitcommit
 %if %{with gitcommit}
-# ToT
-%global commit0 b36e01801b89a516f4271f796773d5f4b43f1186
+# v2.9.0-rc9
+%global commit0 0fabc3ba44823f257e70ce397d989c8de5e362c1
 %global shortcommit0 %(c=%{commit0}; echo ${c:0:7})
-%global date0 20240521
-%global pypi_version 2.4.0
+%global date0 20251008
+%global pypi_version 2.9.0
+%global flatbuffers_version 24.12.23
+%global miniz_version 3.0.2
+%global pybind11_version 2.13.6
+%global rc_tag -rc9
 %else
-%global pypi_version 2.3.0
+%global pypi_version 2.9.1
+%global flatbuffers_version 24.12.23
+%global miniz_version 3.0.2
+%global pybind11_version 2.13.6
 %endif

 # For -test subpackage
@ -23,75 +30,31 @@
 %bcond_with test

 %ifarch x86_64
-# ROCm support came in F40
-%if 0%{?fedora} > 39
 %bcond_without rocm
-%else
-%bcond_with rocm
-%endif
-%endif
-# hipblaslt is in development
-%bcond_with hipblaslt
-# Which families gpu build for
-%global rocm_gpu_list gfx8 gfx9 gfx10 gfx11
-%global rocm_default_gpu default
-%bcond_without rocm_loop
-
-# Caffe2 support came in F41
-%if 0%{?fedora} > 40
-%bcond_without caffe2
-%else
-%bcond_with caffe2
 %endif

-# Distributed support came in F41
-%if 0%{?fedora} > 40
-%bcond_without distributed
 # For testing distributed+rccl etc.
 %bcond_without rccl
 %bcond_with gloo
 %bcond_without mpi
 %bcond_without tensorpipe
-%else
-%bcond_with distributed
-%endif
-
-# OpenCV support came in F41
-%if 0%{?fedora} > 40
-%bcond_without opencv
-%else
-%bcond_with opencv
-%endif
-
-# Do no confuse xnnpack versions
-%if 0%{?fedora} > 40
-%bcond_without xnnpack
-%else
-%bcond_with xnnpack
-%endif
-
-%if 0%{?fedora} > 39
-%bcond_without pthreadpool
-%else
-%bcond_with pthreadpool
-%endif
-
-%if 0%{?fedora} > 39
-%bcond_without pocketfft
-%else
-%bcond_with pocketfft
-%endif
-
-# For testing cuda
-%ifarch x86_64
-%bcond_with cuda
-%endif

 # Disable dwz with rocm because memory can be exhausted
 %if %{with rocm}
 %define _find_debuginfo_dwz_opts %{nil}
 %endif

+# These came in 2.4 and not yet in Fedora
+%bcond_with opentelemetry
+%bcond_with httplib
+%bcond_with kineto
+
+%if 0%{?fedora}
+%bcond_without onnx
+%else
+%bcond_with onnx
+%endif
+
 Name:           python-%{pypi_name}
 %if %{with gitcommit}
 Version:        %{pypi_version}^git%{date0}.%{shortcommit0}
@ -106,21 +69,13 @@ License:        BSD-3-Clause AND BSD-2-Clause AND 0BSD AND Apache-2.0 AND MIT AN
 URL:            https://pytorch.org/
 %if %{with gitcommit}
 Source0:        %{forgeurl}/archive/%{commit0}/pytorch-%{shortcommit0}.tar.gz
-Source100:        pyproject.toml
+Source1000:     pyproject.toml
 %else
 Source0:        %{forgeurl}/releases/download/v%{version}/pytorch-v%{version}.tar.gz
 %endif
-Source1:        https://github.com/google/flatbuffers/archive/refs/tags/v23.3.3.tar.gz
-Source2:        https://github.com/pybind/pybind11/archive/refs/tags/v2.11.1.tar.gz
+Source1:        https://github.com/google/flatbuffers/archive/refs/tags/v%{flatbuffers_version}.tar.gz
+Source2:        https://github.com/pybind/pybind11/archive/refs/tags/v%{pybind11_version}.tar.gz

-%if %{with cuda}
-%global cuf_ver 1.1.2
-Source10:       https://github.com/NVIDIA/cudnn-frontend/archive/refs/tags/v%{cuf_ver}.tar.gz
-%global cul_ver 3.4.1
-Source11:       https://github.com/NVIDIA/cutlass/archive/refs/tags/v%{cul_ver}.tar.gz
-%endif
-
-%if %{with tensorpipe}
 # Developement on tensorpipe has stopped, repo made read only July 1, 2023, this is the last commit
 %global tp_commit 52791a2fd214b2a9dc5759d36725909c1daa7f2e
 %global tp_scommit %(c=%{tp_commit}; echo ${c:0:7})
@ -131,120 +86,73 @@ Source21:       https://github.com/libuv/libuv/archive/refs/tags/v1.41.0.tar.gz
 %global nop_commit 910b55815be16109f04f4180e9adee14fb4ce281
 %global nop_scommit %(c=%{nop_commit}; echo ${c:0:7})
 Source22:       https://github.com/google/libnop/archive/%{nop_commit}/libnop-%{nop_scommit}.tar.gz
+
+%if %{without opentelemetry}
+%global ot_ver 1.14.2
+Source60:       https://github.com/open-telemetry/opentelemetry-cpp/archive/refs/tags/v%{ot_ver}.tar.gz
 %endif

-%if %{without xnnpack}
-%global xnn_commit fcbf55af6cf28a4627bcd1f703ab7ad843f0f3a2
-%global xnn_scommit %(c=%{xnn_commit}; echo ${c:0:7})
-Source30:       https://github.com/google/xnnpack/archive/%{xnn_commit}/xnnpack-%{xnn_scommit}.tar.gz
-%global fx_commit 63058eff77e11aa15bf531df5dd34395ec3017c8
-%global fx_scommit %(c=%{fx_commit}; echo ${c:0:7})
-Source31:       https://github.com/Maratyszcza/fxdiv/archive/%{fx_commit}/FXdiv-%{fx_scommit}.tar.gz
-%global fp_commit 0a92994d729ff76a58f692d3028ca1b64b145d91
-%global fp_scommit %(c=%{fp_commit}; echo ${c:0:7})
-Source32:       https://github.com/Maratyszcza/FP16/archive/%{fp_commit}/FP16-%{fp_scommit}.tar.gz
-%global ps_commit 072586a71b55b7f8c584153d223e95687148a900
-%global ps_scommit %(c=%{ps_commit}; echo ${c:0:7})
-Source33:       https://github.com/Maratyszcza/psimd/archive/%{ps_commit}/psimd-%{ps_scommit}.tar.gz
+%if %{without httplib}
+%global hl_commit 3b6597bba913d51161383657829b7e644e59c006
+%global hl_scommit %(c=%{hl_commit}; echo ${c:0:7})
+Source70:       https://github.com/yhirose/cpp-httplib/archive/%{hl_commit}/cpp-httplib-%{hl_scommit}.tar.gz
 %endif

-%if %{without pthreadpool}
-%global pt_commit 4fe0e1e183925bf8cfa6aae24237e724a96479b8
-%global pt_scommit %(c=%{pt_commit}; echo ${c:0:7})
-Source40:       https://github.com/Maratyszcza/pthreadpool/archive/%{pt_commit}/pthreadpool-%{pt_scommit}.tar.gz
+%if %{without kineto}
+%global ki_commit 5e7501833f1021ce6f618572d3baf657b6319658
+%global ki_scommit %(c=%{ki_commit}; echo ${c:0:7})
+Source80:       https://github.com/pytorch/kineto/archive/%{ki_commit}/kineto-%{ki_scommit}.tar.gz
 %endif

-%if %{without pocketfft}
-%global pf_commit 076cb3d2536b7c5d0629093ad886e10ac05f3623
-%global pf_scommit %(c=%{pf_commit}; echo ${c:0:7})
-Source50:       https://github.com/mreineck/pocketfft/archive/%{pf_commit}/pocketfft-%{pf_scommit}.tar.gz
-%endif
-
-Patch0:        0001-no-third_party-foxi.patch
-Patch3:        0001-Stub-in-kineto-ActivityType.patch
-Patch5:        0001-disable-submodule-search.patch
-
-%if %{with caffe2}
-Patch6:        0001-reenable-foxi-linking.patch
-%endif
-
-# Bring some patches forward
-%if %{without gitcommit}
-# https://github.com/pytorch/pytorch/pull/123384
-Patch7:        0001-Reenable-dim-for-python-3.12.patch
-
-# Dynamo/Inductor on 3.12
-Patch8:        0001-dynamo-3.12-enable-dynamo-on-3.12-enable-most-dynamo.patch
-%endif
-
-%if %{with rocm}
-# ROCm patches
-# https://github.com/pytorch/pytorch/pull/120551
-Patch100:      0001-Optionally-use-hipblaslt.patch
-Patch101:      0001-cuda-hip-signatures.patch
-Patch102:      0001-silence-an-assert.patch
-Patch103:      0001-can-not-use-with-c-files.patch
-Patch104:      0001-use-any-hip.patch
-Patch105:      0001-disable-use-of-aotriton.patch
-%endif
-
-# Do not claim aarch64 on anything newer than F41
-%if 0%{?fedora} > 40
-ExclusiveArch:  x86_64 aarch64
-%else
-ExclusiveArch:  x86_64
-%endif
+%global ox_ver 1.18.0
+Source90:       https://github.com/onnx/onnx/archive/refs/tags/v%{ox_ver}.tar.gz

+%global pt_arches x86_64 aarch64
+ExclusiveArch:  %pt_arches
 %global toolchain gcc
 %global _lto_cflags %nil

 BuildRequires:  cmake
-BuildRequires:  cpuinfo-devel
 BuildRequires:  eigen3-devel
+BuildRequires:  flexiblas-devel
 BuildRequires:  fmt-devel
-%if %{with caffe2}
 BuildRequires:  foxi-devel
-%endif
 BuildRequires:  gcc-c++
 BuildRequires:  gcc-gfortran
-%if %{with distributed}
+
 %if %{with gloo}
 BuildRequires:  gloo-devel
 %endif
-%endif
-BuildRequires:  ninja-build
-BuildRequires:  onnx-devel
+BuildRequires:  json-devel
+
 BuildRequires:  libomp-devel
-%if %{with distributed}
+BuildRequires:  moodycamel-concurrentqueue-devel
+BuildRequires:  numactl-devel
+BuildRequires:  ninja-build
+%if %{with onnx}
+BuildRequires:  onnx-devel
+%endif
 %if %{with mpi}
 BuildRequires:  openmpi-devel
 %endif
-%endif
-BuildRequires:  openblas-devel
 BuildRequires:  protobuf-devel
 BuildRequires:  sleef-devel
 BuildRequires:  valgrind-devel
-
-%if %{with pocketfft}
 BuildRequires:  pocketfft-devel
-%endif
-
-%if %{with pthreadpool}
 BuildRequires:  pthreadpool-devel
-%endif

-%if %{with xnnpack}
+BuildRequires:  cpuinfo-devel
 BuildRequires:  FP16-devel
 BuildRequires:  fxdiv-devel
 BuildRequires:  psimd-devel
-BuildRequires:  xnnpack-devel = 0.0^git20240229.fcbf55a
-%endif
+BuildRequires:  xnnpack-devel = 0.0^git20240814.312eb7e

 BuildRequires:  python3-devel
 BuildRequires:  python3dist(filelock)
 BuildRequires:  python3dist(jinja2)
 BuildRequires:  python3dist(networkx)
 BuildRequires:  python3dist(numpy)
+BuildRequires:  python3dist(pip)
 BuildRequires:  python3dist(pyyaml)
 BuildRequires:  python3dist(setuptools)
 BuildRequires:  python3dist(sphinx)
@ -258,39 +166,37 @@ BuildRequires:  python3dist(sympy)

 %if %{with rocm}
 BuildRequires:  hipblas-devel
-%if %{with hipblaslt}
 BuildRequires:  hipblaslt-devel
-%endif
 BuildRequires:  hipcub-devel
 BuildRequires:  hipfft-devel
 BuildRequires:  hiprand-devel
 BuildRequires:  hipsparse-devel
+BuildRequires:  hipsparselt-devel
 BuildRequires:  hipsolver-devel
+# Magma is broken on ROCm 7
+# BuildRequires:  magma-devel
 BuildRequires:  miopen-devel
 BuildRequires:  rocblas-devel
 BuildRequires:  rocrand-devel
 BuildRequires:  rocfft-devel
-%if %{with distributed}
 %if %{with rccl}
 BuildRequires:  rccl-devel
 %endif
-%endif
 BuildRequires:  rocprim-devel
 BuildRequires:  rocm-cmake
 BuildRequires:  rocm-comgr-devel
+BuildRequires:  rocm-compilersupport-macros
 BuildRequires:  rocm-core-devel
 BuildRequires:  rocm-hip-devel
 BuildRequires:  rocm-runtime-devel
 BuildRequires:  rocm-rpm-macros
-BuildRequires:  rocm-rpm-macros-modules
+BuildRequires:  rocsolver-devel
+BuildRequires:  rocm-smi-devel
 BuildRequires:  rocthrust-devel
 BuildRequires:  roctracer-devel

-Requires:       rocm-rpm-macros-modules
-%endif
+Requires:       amdsmi

-%if %{with opencv}
-BuildRequires:  opencv-devel
 %endif

 %if %{with test}
@ -298,49 +204,9 @@ BuildRequires:  google-benchmark-devel
 %endif

 Requires:       python3dist(dill)
+Requires:       python3dist(yaml)

-# For convience
-Provides:       pytorch
-
-# Apache-2.0
-Provides:       bundled(flatbuffers) = 22.3.3
-# MIT
-Provides:       bundled(miniz) = 2.1.0
-Provides:       bundled(pybind11) = 2.11.1
-
-%if %{with tensorpipe}
-# BSD-3-Clause
-Provides:       bundled(tensorpipe)
-# Apache-2.0
-Provides:       bundled(libnop)
-# MIT AND CC-BY-4.0 AND ISC AND BSD-2-Clause
-Provides:       bundled(libuv) = 1.41.0
-%endif
-
-# These are already in Fedora
-%if %{without xnnpack}
-# BSD-3-Clause
-Provides:       bundled(xnnpack)
-# MIT
-Provides:       bundled(FP16)
-# MIT
-Provides:       bundled(fxdiv)
-# MIT
-Provides:       bundled(psimd)
-%endif
-
-%if %{without pthreadpool}
-# BSD-2-Clause
-Provides:       bundled(pthreadpool)
-%endif
-
-%if %{without pocketfft}
-# BSD-3-Clause
-Provides:       bundled(pocketfft)
-%endif
-
-# For convience
-Provides:       pytorch
+Obsoletes:      caffe  = 1.0^git20200212.9b89154

 %description
 PyTorch is a Python package that provides two high-level features:
@ -354,6 +220,24 @@ and Cython to extend PyTorch when needed.
 %package -n     python3-%{pypi_name}
 Summary:        %{summary}

+# For convience
+Provides:       pytorch
+
+# Apache-2.0
+Provides:       bundled(flatbuffers) = %{flatbuffers_version}
+# MIT
+Provides:       bundled(miniz) = %{miniz_version}
+Provides:       bundled(pybind11) = %{pybind11_version}
+
+%if %{with tensorpipe}
+# BSD-3-Clause
+Provides:       bundled(tensorpipe)
+# Apache-2.0
+Provides:       bundled(libnop)
+# MIT AND CC-BY-4.0 AND ISC AND BSD-2-Clause
+Provides:       bundled(libuv) = 1.41.0
+%endif
+
 %description -n python3-%{pypi_name}
 PyTorch is a Python package that provides two high-level features:

@ -363,33 +247,6 @@ PyTorch is a Python package that provides two high-level features:
 You can reuse your favorite Python packages such as NumPy, SciPy,
 and Cython to extend PyTorch when needed.

-%if %{with rocm}
-%package -n python3-%{pypi_name}-rocm-gfx8
-Summary:        %{name} for ROCm gfx8
-
-%description -n python3-%{pypi_name}-rocm-gfx8
-%{summary}
-
-%package -n python3-%{pypi_name}-rocm-gfx9
-Summary:        %{name} for ROCm gfx9
-
-%description -n python3-%{pypi_name}-rocm-gfx9
-%{summary}
-
-%package -n python3-%{pypi_name}-rocm-gfx10
-Summary:        %{name} for ROCm gfx10
-
-%description -n python3-%{pypi_name}-rocm-gfx10
-%{summary}
-
-%package -n python3-%{pypi_name}-rocm-gfx11
-Summary:        %{name} for ROCm gfx11
-
-%description -n python3-%{pypi_name}-rocm-gfx11
-%{summary}
-
-%endif
-
 %if %{with test}
 %package -n python3-%{pypi_name}-test
 Summary:        Tests for %{name}
@ -405,7 +262,8 @@ Requires:       python3-%{pypi_name}%{?_isa} = %{version}-%{release}
 %if %{with gitcommit}
 %autosetup -p1 -n pytorch-%{commit0}
 # Overwrite with a git checkout of the pyproject.toml
-cp %{SOURCE100} .
+cp %{SOURCE1000} .
+
 %else
 %autosetup -p1 -n pytorch-v%{version}
 %endif
@ -415,20 +273,11 @@ rm -rf %{pypi_name}.egg-info

 tar xf %{SOURCE1}
 rm -rf third_party/flatbuffers/*
-cp -r flatbuffers-23.3.3/* third_party/flatbuffers/
+cp -r flatbuffers-%{flatbuffers_version}/* third_party/flatbuffers/

 tar xf %{SOURCE2}
 rm -rf third_party/pybind11/*
-cp -r pybind11-2.11.1/* third_party/pybind11/
-
-%if %{with cuda}
-tar xf %{SOURCE10}
-rm -rf third_party/cudnn_frontend/*
-cp -r cudnn-frontend-%{cuf_ver}/* third_party/cudnn_frontend/
-tar xf %{SOURCE11}
-rm -rf third_party/cutlass/*
-cp -r cutlass-%{cul_ver}/* third_party/cutlass/
-%endif
+cp -r pybind11-%{pybind11_version}/* third_party/pybind11/

 %if %{with tensorpipe}
 tar xf %{SOURCE20}
@ -440,70 +289,90 @@ cp -r libuv-*/* third_party/tensorpipe/third_party/libuv/
 tar xf %{SOURCE22}
 rm -rf third_party/tensorpipe/third_party/libnop/*
 cp -r libnop-*/* third_party/tensorpipe/third_party/libnop/
+
+# gcc 15 include cstdint
+sed -i '/#include <tensorpipe.*/a#include <cstdint>' third_party/tensorpipe/tensorpipe/common/allocator.h
+sed -i '/#include <tensorpipe.*/a#include <cstdint>' third_party/tensorpipe/tensorpipe/common/memory.h
 %endif

-%if %{without xnnpack}
-tar xf %{SOURCE30}
-rm -rf third_party/XNNPACK/*
-cp -r XNNPACK-*/* third_party/XNNPACK/
-tar xf %{SOURCE31}
-rm -rf third_party/FXdiv/*
-cp -r FXdiv-*/* third_party/FXdiv/
-tar xf %{SOURCE32}
-rm -rf third_party/FP16/*
-cp -r FP16-*/* third_party/FP16/
-tar xf %{SOURCE33}
-rm -rf third_party/psimd/*
-cp -r psimd-*/* third_party/psimd/
+%if %{without opentelemtry}
+tar xf %{SOURCE60}
+rm -rf third_party/opentelemetry-cpp/*
+cp -r opentelemetry-cpp-*/* third_party/opentelemetry-cpp/
 %endif

-%if %{without pthreadpool}
-tar xf %{SOURCE40}
-rm -rf third_party/pthreadpool/*
-cp -r pthreadpool-*/* third_party/pthreadpool/
+%if %{without httplib}
+tar xf %{SOURCE70}
+rm -rf third_party/cpp-httplib/*
+cp -r cpp-httplib-*/* third_party/cpp-httplib/
 %endif

-%if %{without pocketfft}
-tar xf %{SOURCE50}
-rm -rf third_party/pocketfft/*
-cp -r pocketfft-*/* third_party/pocketfft/
+%if %{without kineto}
+tar xf %{SOURCE80}
+rm -rf third_party/kineto/*
+cp -r kineto-*/* third_party/kineto/
 %endif

-%if %{with opencv}
-%if %{without gitcommit}
-# Reduce requirements, *FOUND is not set 
-sed -i -e 's/USE_OPENCV AND OpenCV_FOUND AND USE_FFMPEG AND FFMPEG_FOUND/USE_OPENCV AND USE_FFMPEG/' caffe2/video/CMakeLists.txt
-sed -i -e 's/USE_OPENCV AND OpenCV_FOUND/USE_OPENCV/' caffe2/image/CMakeLists.txt
-sed -i -e 's/STATUS/FATAL/' caffe2/image/CMakeLists.txt
-%endif
+%if %{without onnx}
+tar xf %{SOURCE90}
+rm -rf third_party/onnx/*
+cp -r onnx-*/* third_party/onnx/
 %endif

+# Adjust for the hipblaslt's we build
+sed -i -e 's@"gfx90a", "gfx940", "gfx941", "gfx942"@"gfx90a", "gfx1103", "gfx1150", "gfx1151", "gfx1100", "gfx1101", "gfx1200", "gfx1201"@' aten/src/ATen/native/cuda/Blas.cpp
+
 %if 0%{?rhel}
 # In RHEL but too old
 sed -i -e '/typing-extensions/d' setup.py
 # Need to pip these
 sed -i -e '/sympy/d' setup.py
 sed -i -e '/fsspec/d' setup.py
+%else
+# for 2.5.0
+sed -i -e 's@sympy==1.13.1@sympy>=1.13.1@' setup.py
 %endif

 # A new dependency
 # Connected to USE_FLASH_ATTENTION, since this is off, do not need it
 sed -i -e '/aotriton.cmake/d' cmake/Dependencies.cmake
+# Compress hip
+sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc --offload-compress@' cmake/Dependencies.cmake
+# Silence noisy warning
+sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -Wno-pass-failed@' cmake/Dependencies.cmake
+sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -Wno-unused-command-line-argument@' cmake/Dependencies.cmake
+sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -Wno-unused-result@' cmake/Dependencies.cmake
+sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -Wno-deprecated-declarations@' cmake/Dependencies.cmake
+# Use parallel jobs
+sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -parallel-jobs=4@' cmake/Dependencies.cmake
+# Need to link with librocm_smi64
+sed -i -e 's@hiprtc::hiprtc@hiprtc::hiprtc rocm_smi64@' cmake/Dependencies.cmake

 # No third_party fmt, use system
 sed -i -e 's@fmt::fmt-header-only@fmt@' CMakeLists.txt
+sed -i -e 's@fmt::fmt-header-only@fmt@' aten/src/ATen/CMakeLists.txt
+sed -i -e 's@list(APPEND ATen_HIP_INCLUDE $<TARGET_PROPERTY:fmt,INTERFACE_INCLUDE_DIRECTORIES>)@@' aten/src/ATen/CMakeLists.txt
+
+sed -i -e 's@fmt::fmt-header-only@fmt@' third_party/kineto/libkineto/CMakeLists.txt
 sed -i -e 's@fmt::fmt-header-only@fmt@' c10/CMakeLists.txt
 sed -i -e 's@fmt::fmt-header-only@fmt@' torch/CMakeLists.txt
 sed -i -e 's@fmt::fmt-header-only@fmt@' cmake/Dependencies.cmake
+sed -i -e 's@fmt::fmt-header-only@fmt@' caffe2/CMakeLists.txt
+
 sed -i -e 's@add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)@#add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)@' cmake/Dependencies.cmake
 sed -i -e 's@set_target_properties(fmt-header-only PROPERTIES INTERFACE_COMPILE_FEATURES "")@#set_target_properties(fmt-header-only PROPERTIES INTERFACE_COMPILE_FEATURES "")@' cmake/Dependencies.cmake
 sed -i -e 's@list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only)@#list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only)@' cmake/Dependencies.cmake

 # No third_party FXdiv
-%if %{with xnnpack}
 sed -i -e 's@if(NOT TARGET fxdiv)@if(MSVC AND USE_XNNPACK)@' caffe2/CMakeLists.txt
 sed -i -e 's@TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv)@#TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv)@' caffe2/CMakeLists.txt
-%endif
+
+# https://github.com/pytorch/pytorch/issues/149803
+# Tries to checkout nccl
+sed -i -e 's@    checkout_nccl()@    True@' tools/build_pytorch_libs.py
+
+# Disable the use of check_submodule's in the setup.py, we are a tarball, not a git repo
+sed -i -e 's@check_submodules()$@#check_submodules()@' setup.py

 # Release comes fully loaded with third party src
 # Remove what we can
@ -513,7 +382,7 @@ sed -i -e 's@TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv)@#TARGET_LINK_LIBRARI
 # the third_party dir to compile the file.
 # mimiz is licensed MIT
 # https://github.com/richgel999/miniz/blob/master/LICENSE
-mv third_party/miniz-2.1.0 .
+mv third_party/miniz-%{miniz_version} .
 #
 # setup.py depends on this script
 mv third_party/build_bundled.py .
@ -523,28 +392,24 @@ mv third_party/flatbuffers .

 mv third_party/pybind11 .

-%if %{with cuda}
-mv third_party/cudnn_frontend .
-mv third_party/cutlass .
-%endif
-
 %if %{with tensorpipe}
 mv third_party/tensorpipe .
 %endif

-%if %{without xnnpack}
-mv third_party/XNNPACK .
-mv third_party/FXdiv .
-mv third_party/FP16 .
-mv third_party/psimd .
+%if %{without opentelemetry}
+mv third_party/opentelemetry-cpp .
 %endif

-%if %{without pthreadpool}
-mv third_party/pthreadpool .
+%if %{without httplib}
+mv third_party/cpp-httplib .
 %endif

-%if %{without pocketfft}
-mv third_party/pocketfft .
+%if %{without kineto}
+mv third_party/kineto .
+%endif
+
+%if %{without onnx}
+mv third_party/onnx .
 %endif

 %if %{with test}
@ -555,54 +420,55 @@ mv third_party/googletest .
 rm -rf third_party/*
 # Put stuff back
 mv build_bundled.py third_party
-mv miniz-2.1.0 third_party
+mv miniz-%{miniz_version} third_party
 mv flatbuffers third_party
 mv pybind11 third_party

-%if %{with cuda}
-mv cudnn_frontend third_party
-mv cutlass third_party
-%endif
-
 %if %{with tensorpipe}
 mv tensorpipe third_party
 %endif

-%if %{without xnnpack}
-mv XNNPACK third_party
-mv FXdiv third_party
-mv FP16 third_party
-mv psimd third_party
+%if %{without opentelemetry}
+mv opentelemetry-cpp third_party
 %endif

-%if %{without pthreadpool}
-mv pthreadpool third_party
+%if %{without httplib}
+mv cpp-httplib third_party
 %endif

-%if %{without pocketfft}
-mv pocketfft third_party
+%if %{without kineto}
+mv kineto third_party
+%endif
+
+%if %{without onnx}
+mv onnx third_party
 %endif

 %if %{with test}
 mv googletest third_party
 %endif

-%if %{with pocketfft}
 #
 # Fake out pocketfft, and system header will be used
 mkdir third_party/pocketfft
-%endif
+cp /usr/include/pocketfft_hdronly.h third_party/pocketfft/

 #
 # Use the system valgrind headers
 mkdir third_party/valgrind-headers
 cp %{_includedir}/valgrind/* third_party/valgrind-headers

-%if %{without gitcommit}
-# Remove unneeded OpenCL files that confuse the lincense scanner
-rm caffe2/contrib/opencl/OpenCL/cl.hpp
-rm caffe2/mobile/contrib/libopencl-stub/include/CL/*.h
-rm caffe2/mobile/contrib/libopencl-stub/include/CL/*.hpp
+# Fix installing to /usr/lib64
+sed -i -e 's@DESTINATION ${PYTHON_LIB_REL_PATH}@DESTINATION ${CMAKE_INSTALL_PREFIX}/${PYTHON_LIB_REL_PATH}@' caffe2/CMakeLists.txt
+
+# reenable foxi linking
+sed -i -e 's@list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader)@#list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader)@' cmake/Dependencies.cmake
+
+# cmake version changed
+sed -i -e 's@cmake_minimum_required(VERSION 3.4)@cmake_minimum_required(VERSION 3.5)@' third_party/tensorpipe/third_party/libuv/CMakeLists.txt
+sed -i -e 's@cmake_minimum_required(VERSION 3.4)@cmake_minimum_required(VERSION 3.5)@' libuv*/CMakeLists.txt
+%if %{without opentelemtry}
+sed -i -e 's@cmake_minimum_required(VERSION 3.1)@cmake_minimum_required(VERSION 3.5)@' third_party/opentelemetry-cpp/CMakeLists.txt
 %endif

 %if %{with rocm}
@ -610,32 +476,45 @@ rm caffe2/mobile/contrib/libopencl-stub/include/CL/*.hpp
 ./tools/amd_build/build_amd.py
 # Fedora installs to /usr/include, not /usr/include/rocm-core
 sed -i -e 's@rocm-core/rocm_version.h@rocm_version.h@' aten/src/ATen/hip/tunable/TunableGemm.h
+# https://github.com/pytorch/pytorch/issues/149805
+sed -i -e 's@rocm-core/rocm_version.h@rocm_version.h@' cmake/public/LoadHIP.cmake
+# Fedora installs to /usr/include, not /usr/include/rocm-core
+sed -i -e 's@rocm-core/rocm_version.h@rocm_version.h@' aten/src/ATen/hip/tunable/Tunable.cpp
+sed -i -e 's@rocm-core/rocm_version.h@rocm_version.h@' aten/src/ATen/cuda/tunable/Tunable.cpp
+# use any hip, correct CMAKE_MODULE_PATH
+sed -i -e 's@lib/cmake/hip@lib64/cmake/hip@' cmake/public/LoadHIP.cmake
+sed -i -e 's@HIP 1.0@HIP MODULE@'            cmake/public/LoadHIP.cmake
+# silence an assert
+# sed -i -e '/qvalue = std::clamp(qvalue, qmin, qmax);/d' aten/src/ATen/native/cuda/IndexKernel.cu
+
 %endif

-%if %{with cuda}
-# build complains about not being able to build -pie without -fPIC
-sed -i -e 's@string(APPEND CMAKE_CUDA_FLAGS " -D_GLIBCXX_USE_CXX11_ABI=${GLIBCXX_USE_CXX11_ABI}")@string(APPEND CMAKE_CUDA_FLAGS " -fPIC -D_GLIBCXX_USE_CXX11_ABI=${GLIBCXX_USE_CXX11_ABI}")@' CMakeLists.txt
-%endif
+# moodycamel include path needs adjusting to use the system's
+sed -i -e 's@${PROJECT_SOURCE_DIR}/third_party/concurrentqueue@/usr/include/concurrentqueue@' cmake/Dependencies.cmake

 %build

+# Export the arches
+# echo "%%pytorch_arches %pt_arches"   > macros.pytorch
+
 #
 # Control the number of jobs
 #
 # The build can fail if too many threads exceed the physical memory
-# So count core and and memory and increase the build memory util the build succeeds
+# Run at least one thread, more if CPU & memory resources are available.
 #
+%ifarch x86_64
 # Real cores, No hyperthreading
 COMPILE_JOBS=`cat /proc/cpuinfo | grep -m 1 'cpu cores' | awk '{ print $4 }'`
+%else
+# cpuinfo format varies on other arches, fall back to nproc
+COMPILE_JOBS=`nproc`
+%endif
 if [ ${COMPILE_JOBS}x = x ]; then
    COMPILE_JOBS=1
 fi
 # Take into account memmory usage per core, do not thrash real memory
-%if %{with cuda}
-BUILD_MEM=4
-%else
 BUILD_MEM=2
-%endif
 MEM_KB=0
 MEM_KB=`cat /proc/meminfo | grep MemTotal | awk '{ print $2 }'`
 MEM_MB=`eval "expr ${MEM_KB} / 1024"`
@ -667,15 +546,17 @@ export CAFFE2_LINK_LOCAL_PROTOBUF=OFF
 export INTERN_BUILD_MOBILE=OFF
 export USE_DISTRIBUTED=OFF
 export USE_CUDA=OFF
+export USE_FAKELOWP=OFF
 export USE_FBGEMM=OFF
 export USE_FLASH_ATTENTION=OFF
-export USE_GOLD_LINKER=OFF
 export USE_GLOO=OFF
 export USE_ITT=OFF
 export USE_KINETO=OFF
+export USE_KLEIDIAI=OFF
 export USE_LITE_INTERPRETER_PROFILER=OFF
 export USE_LITE_PROTO=OFF
 export USE_MAGMA=OFF
+export USE_MEM_EFF_ATTENTION=OFF
 export USE_MKLDNN=OFF
 export USE_MPI=OFF
 export USE_NCCL=OFF
@ -683,43 +564,25 @@ export USE_NNPACK=OFF
 export USE_NUMPY=ON
 export USE_OPENMP=ON
 export USE_PYTORCH_QNNPACK=OFF
-export USE_QNNPACK=OFF
 export USE_ROCM=OFF
-export USE_SYSTEM_CPUINFO=ON
 export USE_SYSTEM_SLEEF=ON
 export USE_SYSTEM_EIGEN_INSTALL=ON
+%if %{with onnx}
 export USE_SYSTEM_ONNX=ON
+%endif
 export USE_SYSTEM_PYBIND11=OFF
 export USE_SYSTEM_LIBS=OFF
+export USE_SYSTEM_NCCL=OFF
 export USE_TENSORPIPE=OFF
-export USE_XNNPACK=ON
-
-%if %{with pthreadpool}
+export USE_XNNPACK=OFF
+export USE_XPU=OFF
 export USE_SYSTEM_PTHREADPOOL=ON
-%endif
-
-%if %{with xnnpack}
+export USE_SYSTEM_CPUINFO=ON
 export USE_SYSTEM_FP16=ON
 export USE_SYSTEM_FXDIV=ON
 export USE_SYSTEM_PSIMD=ON
-export USE_SYSTEM_XNNPACK=ON
-%endif
+export USE_SYSTEM_XNNPACK=OFF

-%if %{with caffe2}
-export BUILD_CAFFE2=ON
-%endif
-
-%if %{with cuda}
-%if %{without rocm}
-export CUDACXX=/usr/local/cuda/bin/nvcc
-export CPLUS_INCLUDE_PATH=/usr/local/cuda/include
-export USE_CUDA=ON
-# The arches to build for
-export TORCH_CUDA_ARCH_LIST="8.0 8.6 8.9 9.0"
-%endif
-%endif
-
-%if %{with distributed}
 export USE_DISTRIBUTED=ON
 %if %{with tensorpipe}
 export USE_TENSORPIPE=ON
@ -733,11 +596,6 @@ export USE_SYSTEM_GLOO=ON
 %if %{with mpi}
 export USE_MPI=ON
 %endif
-%endif
-
-%if %{with opencv}
-export USE_OPENCV=ON
-%endif

 %if %{with test}
 export BUILD_TEST=ON
@ -752,112 +610,73 @@ export BUILD_TEST=ON
 #
 # See BZ 2244862

-
 %if %{with rocm}

 export USE_ROCM=ON
+export USE_ROCM_CK_SDPA=OFF
+export USE_ROCM_CK_GEMM=OFF
+export USE_FBGEMM_GENAI=OFF
+
+# Magma is broken on ROCm 7
+# export USE_MAGMA=ON
 export HIP_PATH=`hipconfig -p`
 export ROCM_PATH=`hipconfig -R`
-export HIP_CLANG_PATH=`hipconfig -l`
-RESOURCE_DIR=`${HIP_CLANG_PATH}/clang -print-resource-dir`
-export DEVICE_LIB_PATH=${RESOURCE_DIR}/amdgcn/bitcode
+#RESOURCE_DIR=`%{rocmllvm_bindir}/clang -print-resource-dir`
+#export DEVICE_LIB_PATH=${RESOURCE_DIR}/amdgcn/bitcode

-gpu=%{rocm_default_gpu}
-module load rocm/$gpu
-export PYTORCH_ROCM_ARCH=$ROCM_GPUS
-%py3_build
-mv build build-${gpu}
-module purge
+# pytorch uses clang, not hipcc
+export HIP_CLANG_PATH=%{rocmllvm_bindir}
+export PYTORCH_ROCM_ARCH=%{rocm_gpu_list_default}

-%if %{with rocm_loop}
-for gpu in %{rocm_gpu_list}
-do
-    module load rocm/$gpu
-    export PYTORCH_ROCM_ARCH=$ROCM_GPUS
-    %py3_build
-    mv build build-${gpu}
-    module purge
-done
 %endif

+%if 0%{?fedora}
+%pyproject_wheel
 %else
-
 %py3_build
-
 %endif

+
 %install

-%if %{with rocm}
+# pytorch rpm macros
+# install -Dpm 644 macros.pytorch \
+#    %{buildroot}%{_rpmmacrodir}/macros.pytorch

+%if %{with rocm}
 export USE_ROCM=ON
+export USE_ROCM_CK=OFF
 export HIP_PATH=`hipconfig -p`
 export ROCM_PATH=`hipconfig -R`
-export HIP_CLANG_PATH=`hipconfig -l`
-RESOURCE_DIR=`${HIP_CLANG_PATH}/clang -print-resource-dir`
-export DEVICE_LIB_PATH=${RESOURCE_DIR}/amdgcn/bitcode
+# RESOURCE_DIR=`%{rocmllvm_bindir}/clang -print-resource-dir`
+# export DEVICE_LIB_PATH=${RESOURCE_DIR}/amdgcn/bitcode

-gpu=%{rocm_default_gpu}
-module load rocm/$gpu
-export PYTORCH_ROCM_ARCH=$ROCM_GPUS
-mv build-${gpu} build
-%py3_install
-mv build build-${gpu}
-module purge
+# pytorch uses clang, not hipcc
+export HIP_CLANG_PATH=%{rocmllvm_bindir}
+export PYTORCH_ROCM_ARCH=%{rocm_gpu_list_default}

-%if %{with rocm_loop}
-for gpu in %{rocm_gpu_list}
-do
-    module load rocm/$gpu
-    export PYTORCH_ROCM_ARCH=$ROCM_GPUS
-    mv build-${gpu} build
-    # need to customize the install location, so replace py3_install
-    %{__python3} %{py_setup} %{?py_setup_args} install -O1 --skip-build --root %{buildroot} --prefix /usr/lib64/rocm/${gpu} %{?*}
-    rm -rfv %{buildroot}/usr/lib/rocm/${gpu}/bin/__pycache__
-    mv build build-${gpu}
-    module purge
-done
 %endif

+%if 0%{?fedora}
+%pyproject_install
+%pyproject_save_files '*torch*'
 %else
 %py3_install
-
 %endif

+
+%check
+# Not working yet
+# pyproject_check_import torch
+
 # Do not remote the empty files

-
-%files -n python3-%{pypi_name} 
+%files -n python3-%{pypi_name}
 %license LICENSE
 %doc README.md 
-%{_bindir}/convert-caffe2-to-onnx
-%{_bindir}/convert-onnx-to-caffe2
 %{_bindir}/torchrun
-%{python3_sitearch}/%{pypi_name}
-%{python3_sitearch}/%{pypi_name}-*.egg-info
+%{python3_sitearch}/%{pypi_name}*
 %{python3_sitearch}/functorch
-%{python3_sitearch}/torchgen
-%if %{with caffe2}
-%{python3_sitearch}/caffe2
-%endif
-
-%if %{with rocm}
-%files -n python3-%{pypi_name}-rocm-gfx8
-%{_libdir}/rocm/gfx8/bin/*
-%{_libdir}/rocm/gfx8/lib64/*
-
-%files -n python3-%{pypi_name}-rocm-gfx9
-%{_libdir}/rocm/gfx9/bin/*
-%{_libdir}/rocm/gfx9/lib64/*
-
-%files -n python3-%{pypi_name}-rocm-gfx10
-%{_libdir}/rocm/gfx10/bin/*
-%{_libdir}/rocm/gfx10/lib64/*
-
-%files -n python3-%{pypi_name}-rocm-gfx11
-%{_libdir}/rocm/gfx11/bin/*
-%{_libdir}/rocm/gfx11/lib64/*
-%endif

 %changelog
 %autochangelog
--- a/30
+++ b/30
@ -1,19 +1,19 @@
-SHA512 (pytorch-v2.1.0.tar.gz) = 59421bf6cea6661d61ed66ab16526e3a07162e70e53381cbd5987042917610ec993d2f151fb086f0f98e5a396fe69e82bbc76f840bebffe4ebe7f50458c3aa44
-SHA512 (pytorch-v2.1.2.tar.gz) = b7305407ad9dda877d277a0e7009f65f6d69f39370f2231b8bb8c6a9b711022d2129febdb00f5c83751b6664e01000fe2d30c5e5c13757de89fb8b2b99197a28
-SHA512 (pytorch-975d428.tar.gz) = a02195b18d832db9a739c3eeecd0cd0c8868d8b92e4a2fca42e4bdd20735f0745d84573df28d9ae1db014cf79ffd005a8409b3e8bb92f9db2a446f784ef46ff4
+SHA512 (pytorch-v2.7.0.tar.gz) = 17e875a66f1669901f5f770c9d829ba5bfa3967296cfb71550e8a92507181db742548eaf7cc9a2c478c4b91e366f27cc480e2e1bbb328db8501d30e1649839e6
 SHA512 (v23.3.3.tar.gz) = 4066c94f2473c7ea16917d29a613e16f840a329089c88e0bdbdb999aef3442ba00abfd2aa92266fa9c067e399dc88e6f0ccac40dc151378857e665638e78bbf0
-SHA512 (v2.11.1.tar.gz) = ed1512ff0bca3bc0a45edc2eb8c77f8286ab9389f6ff1d5cb309be24bc608abbe0df6a7f5cb18c8f80a3bfa509058547c13551c3cd6a759af708fd0cdcdd9e95
-SHA512 (pytorch-6a89a75.tar.gz) = 6978acc6f37d7c5adc71517a6f379c7133b2bbd040189deddba7753acde41f6ddba2e9f2e397928e89c776d6a5458b8a74f8e04beb312d71fd30b072687ba98f
-SHA512 (pytorch-74832f1.tar.gz) = bd553bfbbb422d353bbbf616c201251b2517b905e2621fa05bfe3d97726b078caad377583adccdc0cca234235a11fcb4730a93e834907b2ca4c06d552b2a2683
-SHA512 (pytorch-4bb5cb5.tar.gz) = 430ae996ddee560537787646ae9f7aa01498f37c99c2e3fe4c5f66ee732ee3fe4ecf337fdf857bc0c7fe27634af75cee3ce576bbe2576463b81e27dbbfacf6ef
+SHA512 (v2.13.6.tar.gz) = 497c25b33b09a9c42f67131ab82e35d689e8ce089dd7639be997305ff9a6d502447b79c824508c455d559e61f0186335b54dd2771d903a7c1621833930622d1a
 SHA512 (tensorpipe-52791a2.tar.gz) = 1e5faf17a7236c5506c08cb28be16069b11bb929bbca64ed9745ce4277d46739186ab7d6597da7437d90ed2d166d4c37ef2f3bceabe8083ef3adbb0e8e5f227e
 SHA512 (v1.41.0.tar.gz) = bb08a1970a10e8d9571ffea3d021643de30ec212cd51317b98d6cf0cfe55d6877992921fb01d1188a6d466687335b77885685d924f8cb7200a0bec30eee05c65
 SHA512 (libnop-910b558.tar.gz) = 74c5324eaa1b6b2ac8dfef94c835b5c5b044625f8e5efe3522470b1ecc4798ff43d344a013cee2f6901e83267c6167072947b754e63f1552ae7044cffe234c36
-SHA512 (pytorch-97ff6cf.tar.gz) = 105ebcba298558fe833f90e7e40b003d35a74609e777f9dc4c47f5668c884f603455113ac0ff252a62b83c81137ae66ceb1a862d351203925dcfc3dcf9f73580
-SHA512 (pytorch-v2.3.0.tar.gz) = 0c2ffc7bf2fd86070e9958c34eca1f03a0248a011ac6ffaeb69f65306ff856edd5359986f02af25888433187e6d7f29b60edded092e2ac30c8cec49023166eda
-SHA512 (xnnpack-fcbf55a.tar.gz) = 8063e27686f7b71cfba05b0c004c46db4506638689ffb112f013b3886de58653b60ca5487978c3f96275c17bb1136883ca4c93ddb2241a2c31925a950cb51759
-SHA512 (FXdiv-63058ef.tar.gz) = da33eab4d006645f383a1f24fc3e747db3aeb0613219297ec0ae69aa2617f07ba050ebd6a64a8cbde6d25481f176d0ec3b9753a95d1fbcead2136595f3e50e97
-SHA512 (FP16-0a92994.tar.gz) = 3f094f242425ea37de274eb8539dc5f8ab0c13fd5325d14180ef12e9c04e6002a110d086c4c667f7c8054af337deab096d59482eb95cc8a632c3c412b48e89d1
-SHA512 (psimd-072586a.tar.gz) = a18faea093423dd9fe19ece8b228e011dccce0a2a22222f777ea19b023a13173966d4a8aea01147e8fc58de5d39cffcedeb2221a1572ae52bd5aba1295f86a94
-SHA512 (pthreadpool-4fe0e1e.tar.gz) = 764d81219f2bf1f056983b5c2576f377aeef37f0f2282e74f81bfe1eac5353e175603f80a6647c96165b24ebdcb7bc2189a376e8577ce4319d82679c33750451
-SHA512 (pocketfft-076cb3d.tar.gz) = a5dc5348b2079377787384722bf31be0cc6eed3bfacbf8a7fc3a4bc5d65832deb0da47686c12795c7e925286a3b43f6b5368ee380ddbe839c36edd106f1321a9
+SHA512 (v1.14.2.tar.gz) = 97635bbaf6dd567c201451dfaf7815b2052fe50d9bccc97aade86cfa4a92651374d167296a5453031b2681dc302806a289bca011a9e79ddc381a17d6118971d7
+SHA512 (cpp-httplib-3b6597b.tar.gz) = 8f1090658c498d04f14fec5c2f301847b1f3360bf92b18d82927643ee04ab61a6b274733a01c7850f9c030205120d674d1d961358d49fdd15636736fb8704f55
+SHA512 (kineto-be13176.tar.gz) = 41a08c7da9eea7d12402f80a5550c9d4df79798719cc52b12a507828c8c896ba28a37c35d8adf809ca72589e1d84965d5ef6dd01f3f8dc1c803c5ed67b03a43a
+SHA512 (pytorch-a1cb3cc.tar.gz) = 92bf8b2c2ef0b459406b60169ecebdc50652c75943e3d6087e4d261f6e308dbad365529561e0f07ea3f0b71790efb68b5e4ab2f44e270462097208d924dc2d95
+SHA512 (v24.12.23.tar.gz) = f97762ba41b9cfef648e93932fd789324c6bb6ebc5b7aeca8185c9ef602294b67d73aea7ae371035579a1419cbfbeba7c3e88b31b5a5848db98f5e8a03b982b1
+SHA512 (kineto-5e75018.tar.gz) = 921b96a56e01d69895b79e67582d8977ed6f873573ab41557c5d026ada5d1f6365e4ed0a0c6804057c52e92510749fc58619f554a164c1ba9d8cd13e789bebd0
+SHA512 (pytorch-v2.8.0.tar.gz) = 791e658eab87fb957f025558cb9f925078d2426ab7b6f60771d9841dfb691f67d905ba1330a800008efe7c938b6c69bdc52232bccfe8d4860e795a532cd69d28
+SHA512 (v1.18.0.tar.gz) = 2f38664947c8d1efc40620a7c1b1953d2aa4b0a37b67c4886b86e77c1d697363c26413413ddda8eabc545892fb1bcb43afc7e93e62f0901527524a2727e1ea8d
+SHA512 (pytorch-715dca6.tar.gz) = 09c9aae54fab3eb17901fc3226fece1c13f41cb8e45a2cb066021823abeb8d27c340993088e01d8e55bb37ed5f94334ec31e6c539cddfacbad157abd27c5e907
+SHA512 (pytorch-fd36458.tar.gz) = acbb7475b92ad4a8e8d779f3745da22d8438e4c5ef2d6e76d71c987789f2752c8aef7022c87c9a74640fe4f9c1f1a61a3f12a796f63b1e6be24da8e5aacf37dc
+SHA512 (pytorch-0fabc3b.tar.gz) = 2e87975de0bf6f3dcede168b379e1928712bca16170c2a8ee7d63459f53086c01baac05e0763e4d5d28cdaf1c7d8912225ee06adeff96ead4f6f456ee174b341
+SHA512 (pytorch-v2.9.0.tar.gz) = ae989e3a7fe30f9ea90944dc25e21ca92f2a94ee40d8de974a168c292d82c16ee8920624eff91a85755469ad05473dce0f85893e3ed7794ec5c6bdd89cbd2023
+SHA512 (pytorch-v2.9.1.tar.gz) = 88de0289fa2760abd69bef505b5ae3b6d7ff176b415cbb31bbc89ce5476a3800b322a97c4490f270f8b89657aff931bf9a5516202b268e0bb8b1f63dbb87b34a
Author	SHA1	Message	Date
Tom Rix	294accd75d	Fix hip device lib path is no longer needed Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2026-01-12 16:38:15 -08:00
Alexander F. Lent	767d576d1d	Continue to support aarch64 with myself maintaining Signed-off-by: Alexander F. Lent <lx@xanderlent.com>	2025-12-21 13:33:32 +00:00
Alexander F. Lent	7ddebb112b	Improve build times on non-x86 systems Signed-off-by: Alexander F. Lent <lx@xanderlent.com>	2025-12-20 18:19:17 -05:00
Tom Rix	7908450a47	Update to 2.9.1 Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2025-12-18 13:52:00 -08:00
Tom Rix	b3977567d2	Always include onnx src Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2025-11-24 07:03:31 -08:00
Tom Rix	e0030b3ec5	Rebuild for ROCm 7.1 Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2025-11-17 14:11:32 -08:00
Tom Rix	741c412249	Remove pytorch-rpm-macros package. This does not work when building on a general arch Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2025-10-17 10:05:50 -07:00
Tom Rix	ef01976cf4	Update to 2.9.0 Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2025-10-15 16:27:01 -07:00
Tom Rix	b615a5f89b	Update to 2.9.0-rc9 Introduce pytorch-rpm-macros package. Add %pytorch_arches to the macros, set to aarch64 and x86_64 Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2025-10-13 10:17:32 -07:00
Tom Rix	1509cbcd60	Update to 2.9.0-rc6 aarch64 is not building, so disable. Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2025-10-06 14:27:41 -07:00
Tom Rix	f29cac5d83	Update to 2.9.0-rc4 Work around ROCm 7 build issue in 2.8.0 Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2025-09-30 07:35:43 -07:00
Tom Rix	89daf765fd	Disable magma Magma is broken on ROCm 7. Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2025-09-26 14:24:46 -07:00
Python Maint	95f1f6fe22	Rebuilt for Python 3.14.0rc3 bytecode	2025-09-19 14:37:44 +02:00
Python Maint	a6dcc4b8d8	Rebuilt for Python 3.14.0rc2 bytecode	2025-08-15 15:02:18 +02:00
Tom Rix	1b986b4993	Build on EPEL Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2025-08-14 09:10:18 -07:00
Tom Rix	d67e1e127a	Update to 2.8.0 Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2025-08-08 14:00:11 -07:00
Tom Rix	eaa9838b3c	Change a couple cmake mins Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2025-08-04 16:23:41 -07:00
Tom Rix	cec8b79644	Update to 2.8.0-rc8 Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2025-07-31 05:52:50 -07:00
Tom Rix	72ad1f0389	Fix some issues with switching to pyproject macros Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2025-07-26 17:16:51 -07:00
Fedora Release Engineering	6158e4810c	Rebuilt for https://fedoraproject.org/wiki/Fedora_43_Mass_Rebuild	2025-07-25 10:49:07 +00:00
Tom Rix	61ccf033a8	Update gitcommit to 2.8.0-rc8 Patch problem with 3.14 Start converting over py3 macros Handle new dependency on rocmsmi Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2025-07-24 06:07:03 -07:00
Tom Rix	42c33b8dcd	Update the next gitcommit to v2.8.0-rc6 Remove old patches. Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2025-07-20 12:44:41 -07:00
Tom Rix	27593d78b3	update gitcommit to 2.8-rc3 Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2025-06-27 14:44:29 -07:00
Python Maint	edfa2c25e3	Rebuilt for Python 3.14	2025-06-06 16:14:25 +02:00
Tom Rix	e6d73d7c49	Rebuild for magma Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2025-05-04 07:36:00 -07:00
Tom Rix	aeb5b118d5	Turn off kleidai Breaks aarch64 Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2025-05-01 08:40:21 -07:00
Tom Rix	2f3d92b7c5	Update to 2.7.0 Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2025-04-25 12:39:04 -07:00
Tom Rix	fb64b28d49	Update gitcommit to 2.7.0-rc10 Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2025-04-19 08:20:35 -07:00
Tom Rix	f0eda9ace1	Update gitcommit to 2.7.0-rc9 Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2025-04-13 07:11:27 -07:00
Tom Rix	e3c2449e4f	Update gitcomit to 2.7.0-rc8 Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2025-04-10 05:27:21 -07:00
Tom Rix	96edd6c2ec	Update gitcommit to v2.7.0-rc6 Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2025-04-05 08:56:18 -07:00
Tom Rix	e80f34f74d	Update gitcommit to 2.7-rc3 Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2025-03-29 05:11:02 -07:00
Tom Rix	bd11f4aa1a	Update gitcommit to v2.7.0-rc2 Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2025-03-22 11:58:26 -07:00
Tom Rix	23f5d11926	Update gitcommit Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2025-03-13 05:07:43 -07:00
Tom Rix	dd353fd56b	Remove papering over c++ assert problem. Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2025-03-12 08:07:40 -07:00
Tom Rix	7569831b20	cmake version changed Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2025-03-01 07:52:12 -08:00
Tom Rix	2508009c1f	Remove gold linker Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2025-02-26 07:32:31 -08:00
Tom Rix	9c39544f2c	Remove rocm loop Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2025-02-17 05:15:28 -08:00
Tom Rix	3429048672	Rebuild	2025-01-31 04:02:13 -08:00
Tom Rix	c36179d420	Document the issue for c++ asserts in upstream Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2025-01-24 04:39:34 -08:00
Tom Rix	14a9d71118	triage build break gcc 15 libstdc++ change broke things. Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2025-01-23 12:13:37 -08:00
Fedora Release Engineering	d8b4f3d9d9	Rebuilt for https://fedoraproject.org/wiki/Fedora_42_Mass_Rebuild	2025-01-18 20:11:05 +00:00
Tom Rix	749438b8bb	Rebuild for onnx Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2025-01-01 16:54:46 -08:00
Tom Rix	3400ff6024	Remove many options These options were not being tested so they are being removed. Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2024-12-24 04:59:37 -08:00
Tom Rix	64906bb61c	Obsolete caffe Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2024-12-23 10:12:09 -08:00
Tom Rix	91a938b757	Update to 2.5.1 Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2024-12-21 07:04:12 -08:00
Tom Rix	0cf3ac43fb	Update for 2.5.0 Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2024-11-26 15:53:19 -08:00
Tom Rix	2819806afc	Use rocmllvm_bindir Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2024-11-14 11:23:54 -08:00
Peter Robinson	0d47a8ae9c	drop old versions of pytorch from sources Saves quite a lot of unnecessary downloads when building locally.	2024-10-31 11:21:08 +00:00
Peter Robinson	8dce5406c1	Add binutils-gold build dep We explicitly set the USE_GOLD_LINKER optoin but we don't add the binutils-gold build dep so it errors, add the dep to fix that.	2024-10-31 11:18:03 +00:00
Peter Robinson	f6e9c14785	Fix various Provides including the pytotch provides The Provides need to be in the python3-torch package as the root package isn't one that users see, it's purely the src.rpm so move all the Provides to the right location and drop duplicate pytotch provides while we're at it. Fixes: RHBZ #2272064	2024-10-31 11:01:13 +00:00
Tom Rix	30effbf7a8	Use the new xnnpack Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2024-10-29 12:48:21 -07:00
Tom Rix	1c67f87710	Update gitcommit to v2.5.0-rc9 Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2024-10-11 06:41:52 -07:00
Tom Rix	07745618d1	Update for llvm18 Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2024-10-10 04:07:13 -07:00
Tom Rix	905814b8c0	Some help finding llvm18 Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2024-10-07 12:27:39 -07:00
Tom Rix	b9295a009b	Update gitcommit Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2024-09-29 17:22:33 -07:00
Tom Rix	93a8fc8c0d	Simplify cuda versions Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2024-09-15 11:04:21 -07:00
Tom Rix	e6c59f2665	Update gitcommit Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2024-09-15 06:27:11 -07:00
Tom Rix	d6f80bf26e	Update to 2.4.1 Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2024-09-09 09:43:45 -07:00
Tom Rix	2904630b42	amdsmi is a runtime dependency for ROCm Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2024-09-03 05:30:39 -07:00
Tom Rix	9d8df35466	Update the gitcommit Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2024-08-30 15:31:17 -07:00
Tom Rix	fdb139d12b	Start tracking 2.5 Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2024-08-15 10:24:43 -07:00
Tom Rix	c5c84ab4b2	Disable fbgemm with rocm With building python-torchvision, there is this link error. trix@fedora:~/fedora/python-torchvision$ c++filt _ZN2at4cuda4blas9int8_gemmEbblllPKalS3_lPil at::cuda::blas::int8_gemm(bool, bool, long, long, long, signed char const, long, signed char const, long,\ int*, long) So disable fbgemm with rocm is enabled. Signed-off-by: Tom Rix <Tom.Rix@amd.com>	2024-08-07 11:13:43 -07:00
Tom Rix	548a9ad819	Enable hipblaslt Remove gfx10,gfx11 - koji taking too long to build. Signed-off-by: Tom Rix <trix@amd.com>	2024-08-05 05:02:57 -07:00
Tom Rix	da9c85c23d	Remove the packages	2024-08-04 06:12:14 -07:00
Tom Rix	5b0238639f	Simplify ROCm gpu list gfx8 is obsolete gfx110,gfx942 and gfx90a are redundant. Signed-off-by: Tom Rix	2024-08-03 20:47:55 -07:00
Tom Rix	2efd1f0d79	Fbgemm not available on aarch64 Signed-off-by: Tom Rix <trix@redhat.com>	2024-07-27 08:58:44 -04:00
Sérgio M. Basto	f30087e5e7	Rebuild for opencv 4.10.0	2024-07-25 23:51:21 +01:00
Tom Rix	86185b46a2	PyTorch 2.4 Signed-off-by: Tom Rix <trix@redhat.com>	2024-07-25 16:33:22 -06:00
Tom Rix	2debc89ffd	Fix USE_NUMA Needed a BuildRequires: numactl-devel Signed-off-by: Tom Rix <trix@redhat.com>	2024-07-20 15:43:46 -06:00
Tom Rix	2d9b5647c3	Use fbgemm on 2.4 Signed-off-by: Tom Rix <trix@redhat.com>	2024-07-20 11:41:10 -06:00
Tom Rix	f3030ddf1d	Merge #3 `Rebuilt for fmt 11`	2024-07-20 12:14:57 +00:00
Fedora Release Engineering	9bb05d0f36	Rebuilt for https://fedoraproject.org/wiki/Fedora_41_Mass_Rebuild	2024-07-19 16:35:07 +00:00
Kefu Chai	5a34ef7cfd	Rebuilt for fmt 11	2024-07-16 08:02:17 +08:00
Tom Rix	6b67ee1804	Update to 2.4-rc8 Also change logic to use compat gcc Add logic to these magma package Signed-off-by: Tom Rix <trix@redhat.com>	2024-07-10 06:35:10 -06:00
Tom Rix	9028e14b85	Switch from openblas to flexiblas (rhbz#2295953) Suggested-by: Inaki Ucar Signed-off-by: Tom Rix <trix@redhat.com>	2024-07-05 10:20:14 -06:00
Tom Rix	d5247c7f63	Show use of hipblaslt package PyTorch+ROCm requires hipblaslt. Instead of patching to work around this requirement, use hipblaslt. Signed-off-by: Tom Rix <trix@redhat.com>	2024-07-04 09:35:32 -06:00
Tom Rix	854533551e	Revisions of patches for 2.4 Signed-off-by: Tom Rix <trix@redhat.com>	2024-07-04 07:29:25 -06:00
Tom Rix	3f2aad9a21	Add a CUDA subpackage Signed-off-by: Tom Rix <trix@redhat.com>	2024-06-26 09:46:41 -04:00
Tom Rix	3a008de5e5	Update gitcommit to v2.4.0-rc6 Signed-off-by: Tom Rix <trix@redhat.com>	2024-06-26 05:50:40 -06:00
Tom Rix	bcdd1822e3	Add CUDA BuildRequires Signed-off-by: Tom Rix <trix@redhat.com>	2024-06-25 08:09:08 -04:00
Tom Rix	c7679453c9	Update gitcommit to 2.4.0-rc5 Signed-off-by: Tom Rix <trix@redhat.com>	2024-06-24 16:21:55 -04:00
Tom Rix	4e1bebf65f	Update gitcommit to 2.4.0-rc3 Signed-off-by: Tom Rix <trix@redhat.com>	2024-06-21 05:35:17 -06:00
Benjamin A. Beasley	dc54ab1a71	Patch for sleef 3.6	2024-06-18 05:34:11 -06:00
Python Maint	5c8e5d96d2	Rebuilt for Python 3.13	2024-06-14 10:41:08 +02:00
Tom Rix	751813c99a	Update gitcommit Signed-off-by: Tom Rix <trix@redhat.com>	2024-06-13 15:49:35 -04:00
Tom Rix	1311b02e13	Use specific version of CUDA base on disto release Signed-off-by: Tom Rix <trix@redhat.com>	2024-06-13 09:35:46 -04:00
Tom Rix	af5905971b	Fix broken cpuinfo for aarch64 Signed-off-by: Tom Rix <trix@redhat.com>	2024-06-11 15:07:44 -06:00
Tom Rix	3c49a148a5	Reduce amd gpu list on F40 Signed-off-by: Tom Rix <trix@redhat.com>	2024-06-11 15:04:34 -06:00
Tom Rix	4189bb90d9	Start a readme for NVIDIA For brave folks that will build manually their own rpms, here is some help. Signed-off-by: Tom Rix <trix@redhat.com>	2024-06-10 14:04:38 -04:00
Tom Rix	0073eb5260	Fix the normal build. Signed-off-by: Tom Rix <trix@redhat.com>	2024-06-10 14:03:07 -04:00
Tom Rix	81b0cc6197	Update gitcommit Fixup CUDA build on F39 Signed-off-by: Tom Rix <trix@redhat.com>	2024-06-09 15:08:04 -04:00
Tom Rix	4f7937aa0d	Update to 2.3.1 Signed-off-by: Tom Rix <trix@redhat.com>	2024-06-09 09:46:55 -06:00
Tom Rix	f33a5cb176	Add --with compat_gcc Experimental gcc 13 on rawhide. Signed-off-by: Tom Rix <trix@redhat.com>	2024-06-08 13:43:02 -06:00
Tom Rix	cefcc3d9e0	Do not apply ROCm patches with CUDA build Signed-off-by: Tom Rix <trix@redhat.com>	2024-06-08 12:04:20 -04:00
Tom Rix	a4bcccda60	Do not conditionally patch Signed-off-by: Tom Rix <trix@redhat.com>	2024-06-06 18:17:37 -06:00
Tom Rix	abd06324b2	Update for ROCm 6.1.1 Signed-off-by: Tom Rix <trix@redhat.com>	2024-06-06 15:54:22 -06:00
Tom Rix	511c020e43	Update the ToT git commit This broke the 0001-disable-submodule-serach.patch. So convert patch into a sed command. Signed-off-by: Tom Rix <trix@redhat.com>	2024-06-05 14:10:18 -06:00