diff --git a/.gitignore b/.gitignore
index 3f2501f..c424df5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,33 @@
 /pytorch-v2.1.0.tar.gz
 /pytorch-v2.1.2.tar.gz
+/pytorch-975d428.tar.gz
+/v23.3.3.tar.gz
+/v2.11.1.tar.gz
+/pytorch-6a89a75.tar.gz
+/pytorch-74832f1.tar.gz
+/pytorch-4bb5cb5.tar.gz
+/tensorpipe-52791a2.tar.gz
+/v1.41.0.tar.gz
+/libnop-910b558.tar.gz
+/pytorch-97ff6cf.tar.gz
+/pytorch-v2.3.0.tar.gz
+/pytorch-v2.3.1.tar.gz
+/pytorch-v2.4.0.tar.gz
+/v1.14.2.tar.gz
+/cpp-httplib-3b6597b.tar.gz
+/kineto-be13176.tar.gz
+/pytorch-v2.4.1.tar.gz
+/pytorch-v2.5.0.tar.gz
+/pytorch-v2.5.1.tar.gz
+/pytorch-v2.7.0.tar.gz
+/v2.13.6.tar.gz
+/pytorch-a1cb3cc.tar.gz
+/v24.12.23.tar.gz
+/kineto-5e75018.tar.gz
+/pytorch-v2.8.0.tar.gz
+/v1.18.0.tar.gz
+/pytorch-715dca6.tar.gz
+/pytorch-fd36458.tar.gz
+/pytorch-0fabc3b.tar.gz
+/pytorch-v2.9.0.tar.gz
+/pytorch-v2.9.1.tar.gz
diff --git a/0001-Add-cmake-variable-USE_ROCM_CK.patch b/0001-Add-cmake-variable-USE_ROCM_CK.patch
new file mode 100644
index 0000000..925e03b
--- /dev/null
+++ b/0001-Add-cmake-variable-USE_ROCM_CK.patch
@@ -0,0 +1,202 @@
+From 193854993cd939de186de19589c1add4c4b2cf66 Mon Sep 17 00:00:00 2001
+From: Tom Rix <Tom.Rix@amd.com>
+Date: Mon, 21 Jul 2025 11:35:03 -0700
+Subject: [PATCH] Add cmake variable USE_ROCM_CK
+
+---
+ CMakeLists.txt                  |  1 +
+ aten/src/ATen/CMakeLists.txt    | 40 ++++++++++++++++-----------------
+ aten/src/ATen/cuda/CUDABlas.cpp | 22 +++++++++---------
+ cmake/Dependencies.cmake        |  3 +++
+ 4 files changed, 35 insertions(+), 31 deletions(-)
+
+diff --git a/CMakeLists.txt b/CMakeLists.txt
+index a5d25e6afa0f..afc1b53efa64 100644
+--- a/CMakeLists.txt
++++ b/CMakeLists.txt
+@@ -240,6 +240,7 @@ cmake_dependent_option(
+   BUILD_LAZY_CUDA_LINALG "Build cuda linalg ops as separate library" ON
+   "USE_CUDA AND LINUX AND BUILD_PYTHON" OFF)
+ cmake_dependent_option(USE_ROCM "Use ROCm" ON "LINUX" OFF)
++cmake_dependent_option(USE_ROCM_CK "Use ROCm Composable Kernel" ON "USE_ROCM" ON)
+ option(CAFFE2_STATIC_LINK_CUDA "Statically link CUDA libraries" OFF)
+ cmake_dependent_option(USE_CUDNN "Use cuDNN" ON "USE_CUDA" OFF)
+ cmake_dependent_option(USE_STATIC_CUDNN "Use cuDNN static libraries" OFF
+diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt
+index c9cfd74b501e..59f6178218ee 100644
+--- a/aten/src/ATen/CMakeLists.txt
++++ b/aten/src/ATen/CMakeLists.txt
+@@ -373,26 +373,26 @@ if(USE_ROCM)
+   # is header only, so this should be ok, except that the CMake build generates
+   # a ck/config.h. We just do that part here. Without this, the ck.h from the
+   # ROCM SDK may get accidentally used instead.
+-  function(_pytorch_rocm_generate_ck_conf)
+-    set(CK_ENABLE_INT8 "ON")
+-    set(CK_ENABLE_FP16 "ON")
+-    set(CK_ENABLE_FP32 "ON")
+-    set(CK_ENABLE_FP64 "ON")
+-    set(CK_ENABLE_BF16 "ON")
+-    set(CK_ENABLE_FP8 "ON")
+-    set(CK_ENABLE_BF8 "ON")
+-    set(CK_USE_XDL "ON")
+-    set(CK_USE_WMMA "ON")
+-    configure_file(
+-      "${Torch_SOURCE_DIR}/third_party/composable_kernel/include/ck/config.h.in"
+-      "${CMAKE_CURRENT_BINARY_DIR}/composable_kernel/ck/config.h"
+-      )
+-  endfunction()
++#  function(_pytorch_rocm_generate_ck_conf)
++#    set(CK_ENABLE_INT8 "ON")
++#    set(CK_ENABLE_FP16 "ON")
++#    set(CK_ENABLE_FP32 "ON")
++#    set(CK_ENABLE_FP64 "ON")
++#    set(CK_ENABLE_BF16 "ON")
++#    set(CK_ENABLE_FP8 "ON")
++#    set(CK_ENABLE_BF8 "ON")
++#    set(CK_USE_XDL "ON")
++#    set(CK_USE_WMMA "ON")
++#    configure_file(
++#      "${Torch_SOURCE_DIR}/third_party/composable_kernel/include/ck/config.h.in"
++#      "${CMAKE_CURRENT_BINARY_DIR}/composable_kernel/ck/config.h"
++#      )
++#  endfunction()
+   list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/hip)
+-  list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/include)
+-  list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/library/include)
+-  list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/composable_kernel)
+-  _pytorch_rocm_generate_ck_conf()
++#  list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/include)
++#  list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/library/include)
++#  list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/composable_kernel)
++#  _pytorch_rocm_generate_ck_conf()
+ 
+   # Next two lines are needed because TunableOp uses third-party/fmt
+   list(APPEND ATen_HIP_INCLUDE $<TARGET_PROPERTY:fmt::fmt-header-only,INTERFACE_INCLUDE_DIRECTORIES>)
+@@ -409,7 +409,7 @@ endif()
+     ${native_quantized_hip_hip}
+     ${native_transformers_hip_hip} ${native_transformers_src_hip_hip}
+   )
+-  if(WIN32) # Windows doesn't support Composable Kernels
++  if(NOT USE_ROCM_CK) # Windows doesn't support Composable Kernels
+     file(GLOB native_hip_bgemm "native/hip/bgemm_kernels/*.hip")
+     file(GLOB native_hip_ck "native/hip/ck*.hip")
+     exclude(ATen_HIP_SRCS "${ATen_HIP_SRCS}"
+diff --git a/aten/src/ATen/cuda/CUDABlas.cpp b/aten/src/ATen/cuda/CUDABlas.cpp
+index 89350a11bea7..e5b7960177cf 100644
+--- a/aten/src/ATen/cuda/CUDABlas.cpp
++++ b/aten/src/ATen/cuda/CUDABlas.cpp
+@@ -752,7 +752,7 @@ template <>
+ void bgemm_internal<double>(CUDABLAS_BGEMM_ARGTYPES(double))
+ {
+   if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
+-#ifdef USE_ROCM
++#ifdef USE_ROCM_CK
+     // hipblaslt does not support double gemm yet
+     bgemm_internal_cublas<double>(CUDABLAS_BGEMM_ARGS(double));
+ #else
+@@ -836,7 +836,7 @@ void bgemm_internal<at::BFloat16>(CUDABLAS_BGEMM_ARGTYPES(at::BFloat16))
+       bgemm_internal_cublas<at::BFloat16>(CUDABLAS_BGEMM_ARGS(at::BFloat16));
+     }
+   }
+-#if defined(USE_ROCM) && !defined(_MSC_VER)
++#if defined(USE_ROCM) && defined(USE_ROCM_CK)
+   else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
+     at::native::bgemm_internal_ck<at::BFloat16>(CUDABLAS_BGEMM_ARGS(at::BFloat16));
+   }
+@@ -1270,14 +1270,14 @@ template <>
+ void gemm_internal<double>(CUDABLAS_GEMM_ARGTYPES(double))
+ {
+   if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
+-#ifdef USE_ROCM
++#ifdef USE_ROCM_CK
+     // hipblaslt does not support double gemm yet
+     gemm_internal_cublas<double>(CUDABLAS_GEMM_ARGS(double));
+ #else
+     gemm_internal_cublaslt<double>(CUDABLAS_GEMM_ARGS(double));
+ #endif
+   }
+-#if defined(USE_ROCM) && !defined(_MSC_VER)
++#if defined(USE_ROCM) && defined(USE_ROCM_CK)
+   else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
+     at::native::gemm_internal_ck<double>(CUDABLAS_GEMM_ARGS(double));
+   }
+@@ -1293,7 +1293,7 @@ void gemm_internal<float>(CUDABLAS_GEMM_ARGTYPES(float))
+   if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
+     gemm_internal_cublaslt<float>(CUDABLAS_GEMM_ARGS(float));
+   }
+-#if defined(USE_ROCM) && !defined(_MSC_VER)
++#if defined(USE_ROCM) && defined(USE_ROCM_CK)
+   else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
+     if (at::detail::getCUDAHooks().isGPUArch({"gfx1100"})) { //no CK GEMM version for gfx1100
+       gemm_internal_cublaslt<float>(CUDABLAS_GEMM_ARGS(float));
+@@ -1311,7 +1311,7 @@ template <>
+ void gemm_internal<c10::complex<double>>(CUDABLAS_GEMM_ARGTYPES(c10::complex<double>))
+ {
+   if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
+-#ifdef USE_ROCM
++#ifdef USE_ROCM_CK
+     // hipblaslt does not support complex gemm yet
+     gemm_internal_cublas<c10::complex<double>>(CUDABLAS_GEMM_ARGS(c10::complex<double>));
+ #else
+@@ -1327,7 +1327,7 @@ template <>
+ void gemm_internal<c10::complex<float>>(CUDABLAS_GEMM_ARGTYPES(c10::complex<float>))
+ {
+   if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
+-#ifdef USE_ROCM
++#ifdef USE_ROCM_CK
+     // hipblaslt does not support complex gemm yet
+     gemm_internal_cublas<c10::complex<float>>(CUDABLAS_GEMM_ARGS(c10::complex<float>));
+ #else
+@@ -1345,7 +1345,7 @@ void gemm_internal<at::Half>(CUDABLAS_GEMM_ARGTYPES(at::Half))
+   if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
+     gemm_internal_cublaslt<at::Half>(CUDABLAS_GEMM_ARGS(at::Half));
+   }
+-#if defined(USE_ROCM) && !defined(_MSC_VER)
++#if defined(USE_ROCM) && defined(USE_ROCM_CK)
+   else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
+     at::native::gemm_internal_ck<at::Half>(CUDABLAS_GEMM_ARGS(at::Half));
+   }
+@@ -1361,7 +1361,7 @@ void gemm_internal<at::BFloat16>(CUDABLAS_GEMM_ARGTYPES(at::BFloat16))
+   if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
+     gemm_internal_cublaslt<at::BFloat16>(CUDABLAS_GEMM_ARGS(at::BFloat16));
+   }
+-#if defined(USE_ROCM) && !defined(_MSC_VER)
++#if defined(USE_ROCM) && defined(USE_ROCM_CK)
+   else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
+     at::native::gemm_internal_ck<at::BFloat16>(CUDABLAS_GEMM_ARGS(at::BFloat16));
+   }
+@@ -1382,7 +1382,7 @@ void gemm_internal<at::Half, float>(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(at::Half,
+   if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
+     gemm_internal_cublaslt<at::Half, float>(CUDABLAS_GEMM_ARGS(at::Half));
+   }
+-#if defined(USE_ROCM) && !defined(_MSC_VER)
++#if defined(USE_ROCM) && defined(USE_ROCM_CK)
+   else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
+     TORCH_CHECK(false, "gemm input type at::Half and output type float is not supported for ROCm");
+   }
+@@ -1398,7 +1398,7 @@ void gemm_internal<at::BFloat16, float>(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(at::B
+   if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
+     gemm_internal_cublaslt<at::BFloat16, float>(CUDABLAS_GEMM_ARGS(at::BFloat16));
+   }
+-#if defined(USE_ROCM) && !defined(_MSC_VER)
++#if defined(USE_ROCM) && defined(USE_ROCM_CK)
+   else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
+     TORCH_CHECK(false, "gemm input type at::Half and output type float is not supported for ROCm");
+   }
+diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
+index a93386c27f8d..be1368999d38 100644
+--- a/cmake/Dependencies.cmake
++++ b/cmake/Dependencies.cmake
+@@ -1031,6 +1031,9 @@ if(USE_ROCM)
+     if(HIPBLASLT_VEC_EXT)
+       list(APPEND HIP_CXX_FLAGS -DHIPBLASLT_VEC_EXT)
+     endif()
++    if(USE_ROCM_CK)
++      list(APPEND HIP_CXX_FLAGS -DUSE_ROCM_CK)
++    endif()
+     list(APPEND HIP_HIPCC_FLAGS --offload-compress)
+     if(WIN32)
+       add_definitions(-DROCM_ON_WINDOWS)
+-- 
+2.49.0
+
diff --git a/0001-Fix-compilation-and-import-torch-issues-for-cpython-.patch b/0001-Fix-compilation-and-import-torch-issues-for-cpython-.patch
new file mode 100644
index 0000000..b6a282c
--- /dev/null
+++ b/0001-Fix-compilation-and-import-torch-issues-for-cpython-.patch
@@ -0,0 +1,359 @@
+From f2a544b2e3a5bdc04985f6e06223c0c1700120a0 Mon Sep 17 00:00:00 2001
+From: albanD <desmaison.alban@gmail.com>
+Date: Sat, 12 Jul 2025 03:42:33 -0400
+Subject: [PATCH] Fix compilation and "import torch" issues for cpython 3.14
+
+Imported from
+https://github.com/albanD/pytorch/tree/cpython314_build
+commit 88bb9cdb72449f4277829e20d94ad8aec1894216
+
+Signed-off-by: Tom Rix <Tom.Rix@amd.com>
+---
+ torch/_dynamo/bytecode_analysis.py        |  2 +-
+ torch/ao/quantization/__init__.py         |  5 +++-
+ torch/ao/quantization/qconfig.py          |  4 ++-
+ torch/ao/quantization/utils.py            |  7 +++--
+ torch/csrc/dynamo/cpython_defs.c          | 16 +++++++++++
+ torch/csrc/dynamo/cpython_includes.h      | 17 ++++++++++++
+ torch/csrc/dynamo/eval_frame.c            | 34 +++++++++++++++--------
+ torch/csrc/dynamo/framelocals_mapping.cpp | 14 ++++++++++
+ torch/csrc/utils/python_compat.h          |  1 +
+ torch/onnx/__init__.py                    |  1 -
+ torch/utils/weak.py                       | 29 +++++++++++++++++--
+ 11 files changed, 111 insertions(+), 19 deletions(-)
+
+diff --git a/torch/_dynamo/bytecode_analysis.py b/torch/_dynamo/bytecode_analysis.py
+index 3252ea91409f..2de74ee5bf8d 100644
+--- a/torch/_dynamo/bytecode_analysis.py
++++ b/torch/_dynamo/bytecode_analysis.py
+@@ -33,7 +33,7 @@ if sys.version_info >= (3, 11):
+     TERMINAL_OPCODES.add(dis.opmap["JUMP_FORWARD"])
+ else:
+     TERMINAL_OPCODES.add(dis.opmap["JUMP_ABSOLUTE"])
+-if sys.version_info >= (3, 12):
++if (3, 12) <= sys.version_info < (3, 14):
+     TERMINAL_OPCODES.add(dis.opmap["RETURN_CONST"])
+ if sys.version_info >= (3, 13):
+     TERMINAL_OPCODES.add(dis.opmap["JUMP_BACKWARD_NO_INTERRUPT"])
+diff --git a/torch/ao/quantization/__init__.py b/torch/ao/quantization/__init__.py
+index ffc1792fd23f..cf5a8b99a894 100644
+--- a/torch/ao/quantization/__init__.py
++++ b/torch/ao/quantization/__init__.py
+@@ -1,5 +1,6 @@
+ # mypy: allow-untyped-defs
+ 
++import sys
+ from typing import Callable, Optional, Union
+ 
+ import torch
+@@ -33,7 +34,9 @@ from .stubs import *  # noqa: F403
+ 
+ # ensure __module__ is set correctly for public APIs
+ ObserverOrFakeQuantize = Union[ObserverBase, FakeQuantizeBase]
+-ObserverOrFakeQuantize.__module__ = "torch.ao.quantization"
++if sys.version_info < (3, 14):
++    ObserverOrFakeQuantize.__module__ = "torch.ao.quantization"
++
+ for _f in [
+     compare_results,
+     extract_results_from_loggers,
+diff --git a/torch/ao/quantization/qconfig.py b/torch/ao/quantization/qconfig.py
+index efee5302ad42..d9a8fc78bab4 100644
+--- a/torch/ao/quantization/qconfig.py
++++ b/torch/ao/quantization/qconfig.py
+@@ -1,5 +1,6 @@
+ # mypy: allow-untyped-defs
+ import copy
++import sys
+ import warnings
+ from collections import namedtuple
+ from typing import Any, Optional, Union
+@@ -568,7 +569,8 @@ def _assert_valid_qconfig(qconfig: Optional[QConfig], mod: torch.nn.Module) -> N
+ 
+ 
+ QConfigAny = Optional[QConfig]
+-QConfigAny.__module__ = "torch.ao.quantization.qconfig"
++if sys.version_info < (3, 14):
++    QConfigAny.__module__ = "torch.ao.quantization.qconfig"
+ 
+ 
+ def _add_module_to_qconfig_obs_ctr(
+diff --git a/torch/ao/quantization/utils.py b/torch/ao/quantization/utils.py
+index 4ac3112ec072..3b1503e01701 100644
+--- a/torch/ao/quantization/utils.py
++++ b/torch/ao/quantization/utils.py
+@@ -4,6 +4,7 @@ Utils shared by different modes of quantization (eager/graph)
+ """
+ 
+ import functools
++import sys
+ import warnings
+ from collections import OrderedDict
+ from inspect import getfullargspec, signature
+@@ -16,7 +17,8 @@ from torch.nn.utils.parametrize import is_parametrized
+ 
+ 
+ NodePattern = Union[tuple[Node, Node], tuple[Node, tuple[Node, Node]], Any]
+-NodePattern.__module__ = "torch.ao.quantization.utils"
++if sys.version_info < (3, 14):
++    NodePattern.__module__ = "torch.ao.quantization.utils"
+ 
+ # This is the Quantizer class instance from torch/quantization/fx/quantize.py.
+ # Define separately to prevent circular imports.
+@@ -31,7 +33,8 @@ QuantizerCls = Any
+ Pattern = Union[
+     Callable, tuple[Callable, Callable], tuple[Callable, tuple[Callable, Callable]], Any
+ ]
+-Pattern.__module__ = "torch.ao.quantization.utils"
++if sys.version_info < (3, 14):
++    Pattern.__module__ = "torch.ao.quantization.utils"
+ 
+ 
+ # TODO: maybe rename this to MatchInputNode
+diff --git a/torch/csrc/dynamo/cpython_defs.c b/torch/csrc/dynamo/cpython_defs.c
+index b68ef894aeaa..244d4165d5e8 100644
+--- a/torch/csrc/dynamo/cpython_defs.c
++++ b/torch/csrc/dynamo/cpython_defs.c
+@@ -2,6 +2,20 @@
+ #include <torch/csrc/dynamo/cpython_includes.h>
+ #include <torch/csrc/dynamo/debug_macros.h>
+ 
++#if IS_PYTHON_3_14_PLUS
++
++const uint8_t* THP_PyOpcode_Caches = NULL;
++const int THP_PyOpcode_Caches_size = 0;
++
++void
++THP_PyThreadState_PopFrame(PyThreadState *tstate, _PyInterpreterFrame * frame)
++{}
++void
++THP_PyFrame_Clear(_PyInterpreterFrame *frame)
++{}
++
++#else
++
+ #if IS_PYTHON_3_11_PLUS
+ 
+ #define Py_BUILD_CORE
+@@ -360,3 +374,5 @@ const uint8_t* THP_PyOpcode_Caches = NULL;
+ const int THP_PyOpcode_Caches_size = 0;
+ 
+ #endif
++
++#endif // IS_PYTHON_3_14_PLUS
+\ No newline at end of file
+diff --git a/torch/csrc/dynamo/cpython_includes.h b/torch/csrc/dynamo/cpython_includes.h
+index 6b99c1d5aec8..616be16563cf 100644
+--- a/torch/csrc/dynamo/cpython_includes.h
++++ b/torch/csrc/dynamo/cpython_includes.h
+@@ -21,6 +21,14 @@
+ 
+ #if IS_PYTHON_3_11_PLUS
+ #include <internal/pycore_frame.h>
++#if IS_PYTHON_3_14_PLUS
++#include <internal/pycore_interpframe_structs.h>
++#include <internal/pycore_stackref.h>
++#endif
++#endif
++
++#if IS_PYTHON_3_14_PLUS
++#include <internal/pycore_code.h>
+ #endif
+ 
+ #undef Py_BUILD_CORE
+@@ -30,6 +38,13 @@
+ extern "C" {
+ #endif
+ 
++#if IS_PYTHON_3_14_PLUS
++
++#define F_CODE(x) (PyCodeObject*)PyStackRef_AsPyObjectBorrow(x->f_executable)
++#define PREV_INSTR(x) (x)->instr_ptr
++
++#else
++
+ #if IS_PYTHON_3_13_PLUS
+ #define F_CODE(x) ((PyCodeObject*)(x)->f_executable)
+ #define PREV_INSTR(x) (x)->instr_ptr
+@@ -38,6 +53,8 @@ extern "C" {
+ #define PREV_INSTR(x) (x)->prev_instr
+ #endif
+ 
++#endif // IS_PYTHON_3_14_PLUS
++
+ #if IS_PYTHON_3_12_PLUS
+ #define FUNC(x) ((x)->f_funcobj)
+ #else
+diff --git a/torch/csrc/dynamo/eval_frame.c b/torch/csrc/dynamo/eval_frame.c
+index f413782b2d30..72bb8839bac3 100644
+--- a/torch/csrc/dynamo/eval_frame.c
++++ b/torch/csrc/dynamo/eval_frame.c
+@@ -224,17 +224,6 @@ const char* get_frame_name(THP_EVAL_API_FRAME_OBJECT* frame) {
+   return PyUnicode_AsUTF8(F_CODE(frame)->co_name);
+ }
+ 
+-void clear_old_frame_if_python_312_plus(
+-    PyThreadState* tstate,
+-    THP_EVAL_API_FRAME_OBJECT* frame) {
+-#if IS_PYTHON_3_12_PLUS
+-
+-  THP_PyFrame_Clear(frame);
+-  THP_PyThreadState_PopFrame(tstate, frame);
+-
+-#endif
+-}
+-
+ static PyObject* dynamo_eval_custom_code_impl(
+     PyThreadState* tstate,
+     THP_EVAL_API_FRAME_OBJECT* frame,
+@@ -485,6 +474,18 @@ static PyObject* dynamo__custom_eval_frame_shim(
+ 
+ static void enable_eval_frame_shim(PyThreadState* tstate) {}
+ static void enable_eval_frame_default(PyThreadState* tstate) {}
++PyObject* dynamo_eval_custom_code(
++    PyThreadState* tstate,
++    THP_EVAL_API_FRAME_OBJECT* frame,
++    PyCodeObject* code,
++    const char* trace_annotation,
++    int throw_flag) {}
++THPPyInterpreterFrame* THPPyInterpreterFrame_New(
++    THP_EVAL_API_FRAME_OBJECT* frame) {}
++PyObject* dynamo_eval_frame_default(
++    PyThreadState* tstate,
++    THP_EVAL_API_FRAME_OBJECT* frame,
++    int throw_flag) {}
+ 
+ static struct PyGetSetDef THPPyInterpreterFrame_properties[] = {NULL};
+ 
+@@ -498,6 +499,17 @@ static PyTypeObject THPPyInterpreterFrameType = {
+ 
+ #endif // !(IS_PYTHON_3_14_PLUS)
+ 
++void clear_old_frame_if_python_312_plus(
++    PyThreadState* tstate,
++    THP_EVAL_API_FRAME_OBJECT* frame) {
++#if IS_PYTHON_3_12_PLUS
++
++  THP_PyFrame_Clear(frame);
++  THP_PyThreadState_PopFrame(tstate, frame);
++
++#endif
++}
++
+ static PyObject* increment_working_threads(
+     PyThreadState* tstate,
+     PyObject* module) {
+diff --git a/torch/csrc/dynamo/framelocals_mapping.cpp b/torch/csrc/dynamo/framelocals_mapping.cpp
+index b839fb26fc91..c4ee36d87767 100644
+--- a/torch/csrc/dynamo/framelocals_mapping.cpp
++++ b/torch/csrc/dynamo/framelocals_mapping.cpp
+@@ -26,9 +26,13 @@ FrameLocalsMapping::FrameLocalsMapping(FrameLocalsFrameType* frame)
+   PyCodeObject* co = F_CODE(frame);
+   _framelocals.resize(co->co_nlocalsplus, nullptr);
+ 
++#if IS_PYTHON_3_14_PLUS
++  TORCH_CHECK(false, "Python 3.14+ not supported");
++#else
+   if (!frame->stacktop) {
+     return;
+   }
++#endif
+ 
+   auto update_framelocals = [&](int i, PyObject* value) {
+     _PyLocals_Kind kind = _PyLocals_GetKind(co->co_localspluskinds, i);
+@@ -53,11 +57,21 @@ FrameLocalsMapping::FrameLocalsMapping(FrameLocalsFrameType* frame)
+   };
+ 
+   auto offset = co->co_nlocalsplus - co->co_nfreevars;
++#if IS_PYTHON_3_14_PLUS
++  TORCH_CHECK(false, "Python 3.14+ not supported");
++#else
+   for (int i = 0; i < offset; i++) {
+     update_framelocals(i, frame->localsplus[i]);
+   }
++#endif
++
+   // Get references to closure variables
++#if IS_PYTHON_3_14_PLUS
++  PyObject* closure;
++  TORCH_CHECK(false, "Python 3.14+ not supported");
++#else
+   PyObject* closure = ((PyFunctionObject*)FUNC(frame))->func_closure;
++#endif
+   for (int i = 0; i < co->co_nfreevars; i++) {
+     update_framelocals(offset + i, PyTuple_GET_ITEM(closure, i));
+   }
+diff --git a/torch/csrc/utils/python_compat.h b/torch/csrc/utils/python_compat.h
+index a1537611cc47..16292e4fd030 100644
+--- a/torch/csrc/utils/python_compat.h
++++ b/torch/csrc/utils/python_compat.h
+@@ -13,6 +13,7 @@ extern "C" {
+ #define IS_PYTHON_3_12_PLUS PY_VERSION_HEX >= 0x030C0000
+ #define IS_PYTHON_3_13_PLUS PY_VERSION_HEX >= 0x030D0000
+ #define IS_PYTHON_3_14_PLUS PY_VERSION_HEX >= 0x030E0000
++#define IS_PYTHON_3_15_PLUS PY_VERSION_HEX >= 0x030F0000
+ 
+ static inline int PyCode_GetNCellvars(PyCodeObject* code) {
+ // gh-26364 added co_ncellvars to Python 3.11.0rc1
+diff --git a/torch/onnx/__init__.py b/torch/onnx/__init__.py
+index 345ffd2a065b..ceeadde5365b 100644
+--- a/torch/onnx/__init__.py
++++ b/torch/onnx/__init__.py
+@@ -104,7 +104,6 @@ ONNXProgram.__module__ = "torch.onnx"
+ OnnxExporterError.__module__ = "torch.onnx"
+ _OrtBackend.__module__ = "torch.onnx"
+ _OrtBackendOptions.__module__ = "torch.onnx"
+-_OrtExecutionProvider.__module__ = "torch.onnx"
+ enable_fake_mode.__module__ = "torch.onnx"
+ is_onnxrt_backend_supported.__module__ = "torch.onnx"
+ 
+diff --git a/torch/utils/weak.py b/torch/utils/weak.py
+index 8bf2ba5ed02b..9c7218cb2ad3 100644
+--- a/torch/utils/weak.py
++++ b/torch/utils/weak.py
+@@ -3,8 +3,6 @@ from __future__ import annotations
+ 
+ import collections.abc as _collections_abc
+ import weakref
+-
+-from _weakrefset import _IterationGuard  # type: ignore[attr-defined]
+ from collections.abc import Mapping, MutableMapping
+ from weakref import ref
+ 
+@@ -22,6 +20,33 @@ __all__ = [
+ ]
+ 
+ 
++# TODO: make weakref properly thread safe following
++# https://github.com/python/cpython/pull/125325
++class _IterationGuard:
++    # This context manager registers itself in the current iterators of the
++    # weak container, such as to delay all removals until the context manager
++    # exits.
++    # This technique should be relatively thread-safe (since sets are).
++
++    def __init__(self, weakcontainer):
++        # Don't create cycles
++        self.weakcontainer = ref(weakcontainer)
++
++    def __enter__(self):
++        w = self.weakcontainer()
++        if w is not None:
++            w._iterating.add(self)
++        return self
++
++    def __exit__(self, e, t, b):
++        w = self.weakcontainer()
++        if w is not None:
++            s = w._iterating
++            s.remove(self)
++            if not s:
++                w._commit_removals()
++
++
+ # This file defines a variant of WeakKeyDictionary that overrides the hashing
+ # behavior of the key to use object identity, rather than the builtin
+ # __eq__/__hash__ functions.  This is useful for Tensor weak keys, as their
+-- 
+2.49.0
+
diff --git a/0001-Optionally-use-hipblaslt.patch b/0001-Optionally-use-hipblaslt.patch
deleted file mode 100644
index 56434a7..0000000
--- a/0001-Optionally-use-hipblaslt.patch
+++ /dev/null
@@ -1,262 +0,0 @@
-From d77e05d90df006322cda021f1a8affdcc2c7eaef Mon Sep 17 00:00:00 2001
-From: Tom Rix <trix@redhat.com>
-Date: Fri, 23 Feb 2024 08:27:30 -0500
-Subject: [PATCH] Optionally use hipblaslt
-
-The hipblaslt package is not available on Fedora.
-Instead of requiring the package, make it optional.
-If it is found, define the preprocessor variable HIPBLASLT
-Convert the checks for ROCM_VERSION >= 507000 to HIPBLASLT checks
-
-Signed-off-by: Tom Rix <trix@redhat.com>
----
- aten/src/ATen/cuda/CUDABlas.cpp          |  7 ++++---
- aten/src/ATen/cuda/CUDABlas.h            |  2 +-
- aten/src/ATen/cuda/CUDAContextLight.h    |  4 ++--
- aten/src/ATen/cuda/CublasHandlePool.cpp  |  4 ++--
- aten/src/ATen/cuda/tunable/TunableGemm.h |  6 +++---
- aten/src/ATen/native/cuda/Blas.cpp       | 14 ++++++++------
- cmake/Dependencies.cmake                 |  3 +++
- cmake/public/LoadHIP.cmake               |  4 ++--
- 8 files changed, 25 insertions(+), 19 deletions(-)
-
-diff --git a/aten/src/ATen/cuda/CUDABlas.cpp b/aten/src/ATen/cuda/CUDABlas.cpp
-index d534ec5a178..e815463f630 100644
---- a/aten/src/ATen/cuda/CUDABlas.cpp
-+++ b/aten/src/ATen/cuda/CUDABlas.cpp
-@@ -14,7 +14,7 @@
- #include <c10/util/irange.h>
- 
- #ifdef USE_ROCM
--#if ROCM_VERSION >= 60000
-+#ifdef HIPBLASLT
- #include <hipblaslt/hipblaslt-ext.hpp>
- #endif
- // until hipblas has an API to accept flags, we must use rocblas here
-@@ -781,7 +781,7 @@ void gemm<at::BFloat16>(CUDABLAS_GEMM_ARGTYPES(at::BFloat16)) {
-   }
- }
- 
--#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
-+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
- 
- #if defined(USE_ROCM) && ROCM_VERSION >= 50700 && ROCM_VERSION < 60000
- // only for rocm 5.7 where we first supported hipblaslt, it was difficult
-@@ -912,6 +912,7 @@ class CuBlasLtMatmulPreference : public CuBlasLtDescriptor<
- };
- } // namespace
- 
-+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
- template <typename Dtype>
- void gemm_and_bias(
-     bool transpose_mat1,
-@@ -1124,7 +1125,7 @@ template void gemm_and_bias(
-     at::BFloat16* result_ptr,
-     int64_t result_ld,
-     GEMMAndBiasActivationEpilogue activation);
--
-+#endif
- void scaled_gemm(
-     char transa,
-     char transb,
-diff --git a/aten/src/ATen/cuda/CUDABlas.h b/aten/src/ATen/cuda/CUDABlas.h
-index eb12bb350c5..068607467dd 100644
---- a/aten/src/ATen/cuda/CUDABlas.h
-+++ b/aten/src/ATen/cuda/CUDABlas.h
-@@ -82,7 +82,7 @@ void gemm_internal<at::Half>(CUDABLAS_GEMM_ARGTYPES(at::Half));
- template <>
- void gemm_internal<at::BFloat16>(CUDABLAS_GEMM_ARGTYPES(at::BFloat16));
- 
--#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
-+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
- enum GEMMAndBiasActivationEpilogue {
-   None,
-   RELU,
-diff --git a/aten/src/ATen/cuda/CUDAContextLight.h b/aten/src/ATen/cuda/CUDAContextLight.h
-index 4ec35f59a21..e28dc42034f 100644
---- a/aten/src/ATen/cuda/CUDAContextLight.h
-+++ b/aten/src/ATen/cuda/CUDAContextLight.h
-@@ -9,7 +9,7 @@
- 
- // cublasLT was introduced in CUDA 10.1 but we enable only for 11.1 that also
- // added bf16 support
--#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
-+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
- #include <cublasLt.h>
- #endif
- 
-@@ -82,7 +82,7 @@ TORCH_CUDA_CPP_API c10::Allocator* getCUDADeviceAllocator();
- /* Handles */
- TORCH_CUDA_CPP_API cusparseHandle_t getCurrentCUDASparseHandle();
- TORCH_CUDA_CPP_API cublasHandle_t getCurrentCUDABlasHandle();
--#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
-+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
- TORCH_CUDA_CPP_API cublasLtHandle_t getCurrentCUDABlasLtHandle();
- #endif
- 
-diff --git a/aten/src/ATen/cuda/CublasHandlePool.cpp b/aten/src/ATen/cuda/CublasHandlePool.cpp
-index 6913d2cd95e..3d4276be372 100644
---- a/aten/src/ATen/cuda/CublasHandlePool.cpp
-+++ b/aten/src/ATen/cuda/CublasHandlePool.cpp
-@@ -29,7 +29,7 @@ namespace at::cuda {
- 
- namespace {
- 
--#if defined(USE_ROCM) && ROCM_VERSION >= 50700
-+#if defined(USE_ROCM) && defined(HIPBLASLT)
- void createCublasLtHandle(cublasLtHandle_t *handle) {
-   TORCH_CUDABLAS_CHECK(cublasLtCreate(handle));
- }
-@@ -190,7 +190,7 @@ cublasHandle_t getCurrentCUDABlasHandle() {
-   return handle;
- }
- 
--#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
-+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
- cublasLtHandle_t getCurrentCUDABlasLtHandle() {
- #ifdef USE_ROCM
-   c10::DeviceIndex device = 0;
-diff --git a/aten/src/ATen/cuda/tunable/TunableGemm.h b/aten/src/ATen/cuda/tunable/TunableGemm.h
-index 3ba0d761277..dde1870cfbf 100644
---- a/aten/src/ATen/cuda/tunable/TunableGemm.h
-+++ b/aten/src/ATen/cuda/tunable/TunableGemm.h
-@@ -11,7 +11,7 @@
- 
- #include <ATen/cuda/tunable/GemmCommon.h>
- #ifdef USE_ROCM
--#if ROCM_VERSION >= 50700
-+#ifdef HIPBLASLT
- #include <ATen/cuda/tunable/GemmHipblaslt.h>
- #endif
- #include <ATen/cuda/tunable/GemmRocblas.h>
-@@ -166,7 +166,7 @@ class GemmTunableOp : public TunableOp<GemmParams<T>, StreamTimer> {
-     }
- #endif
- 
--#if defined(USE_ROCM) && ROCM_VERSION >= 50700
-+#if defined(USE_ROCM) && defined(HIPBLASLT)
-     static const char *env = std::getenv("PYTORCH_TUNABLEOP_HIPBLASLT_ENABLED");
-     if (env == nullptr || strcmp(env, "1") == 0) {
-       // disallow tuning of hipblaslt with c10::complex
-@@ -240,7 +240,7 @@ class GemmStridedBatchedTunableOp : public TunableOp<GemmStridedBatchedParams<T>
-     }
- #endif
- 
--#if defined(USE_ROCM) && ROCM_VERSION >= 50700
-+#if defined(USE_ROCM) && defined(HIPBLASLT)
-     static const char *env = std::getenv("PYTORCH_TUNABLEOP_HIPBLASLT_ENABLED");
-     if (env == nullptr || strcmp(env, "1") == 0) {
-       // disallow tuning of hipblaslt with c10::complex
-diff --git a/aten/src/ATen/native/cuda/Blas.cpp b/aten/src/ATen/native/cuda/Blas.cpp
-index 29e5c5e3cf1..df56f3d7f1d 100644
---- a/aten/src/ATen/native/cuda/Blas.cpp
-+++ b/aten/src/ATen/native/cuda/Blas.cpp
-@@ -155,7 +155,7 @@ enum class Activation {
-   GELU,
- };
- 
--#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
-+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
- cuda::blas::GEMMAndBiasActivationEpilogue activation_to_gemm_and_blas_arg(Activation a) {
-   switch (a) {
-     case Activation::None:
-@@ -193,6 +193,7 @@ static bool getDisableAddmmCudaLt() {
- 
- #ifdef USE_ROCM
- static bool isSupportedHipLtROCmArch(int index) {
-+#if defined(HIPBLASLT)
-     hipDeviceProp_t* prop = at::cuda::getDeviceProperties(index);
-     std::string device_arch = prop->gcnArchName;
-     static const std::vector<std::string> archs = {"gfx90a", "gfx940", "gfx941", "gfx942"};
-@@ -203,6 +204,7 @@ static bool isSupportedHipLtROCmArch(int index) {
-         }
-     }
-     TORCH_CHECK(false, "Attempting to use hipBLASLt on a unsupported architecture!");
-+#endif
-     return false;
- }
- #endif
-@@ -228,7 +230,7 @@ Tensor& addmm_out_cuda_impl(Tensor& result, const Tensor& self, const Tensor& ma
-   at::ScalarType scalar_type = self.scalar_type();
-   c10::MaybeOwned<Tensor> self_;
-   if (&result != &self) {
--#if (defined(CUDA_VERSION) && CUDA_VERSION >= 11040 && !defined(_MSC_VER)) || defined(USE_ROCM) && ROCM_VERSION >= 50700
-+#if (defined(CUDA_VERSION) && CUDA_VERSION >= 11040 && !defined(_MSC_VER)) || defined(USE_ROCM) && defined(HIPBLASLT)
-     // Strangely, if mat2 has only 1 row or column, we get
-     // CUBLAS_STATUS_INVALID_VALUE error from cublasLtMatmulAlgoGetHeuristic.
-     // self.dim() == 1 && result.dim() == 2 && self.sizes()[0] == mat2_sizes[1]
-@@ -271,7 +273,7 @@ Tensor& addmm_out_cuda_impl(Tensor& result, const Tensor& self, const Tensor& ma
-     }
-     self__sizes = self_->sizes();
-   } else {
--#if defined(USE_ROCM) && ROCM_VERSION >= 50700
-+#if defined(USE_ROCM) && defined(HIPBLASLT)
-     useLtInterface = !disable_addmm_cuda_lt &&
-         result.dim() == 2 && result.is_contiguous() &&
-         isSupportedHipLtROCmArch(self.device().index()) &&
-@@ -322,7 +324,7 @@ Tensor& addmm_out_cuda_impl(Tensor& result, const Tensor& self, const Tensor& ma
- 
-   TORCH_INTERNAL_ASSERT_DEBUG_ONLY(!args.result->is_conj());
- 
--#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
-+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
-   if (useLtInterface) {
-     AT_DISPATCH_FLOATING_TYPES_AND2(
-         at::ScalarType::Half,
-@@ -876,7 +878,7 @@ _scaled_mm_out_cuda(const Tensor& mat1, const Tensor& mat2,
-   at::native::resize_output(out, {mat1_sizes[0], mat2_sizes[1]});
-   at::native::resize_output(amax, {});
- 
--#if !defined(USE_ROCM) && !defined(_MSC_VER) || (defined(USE_ROCM) && ROCM_VERSION >= 60000)
-+#if !defined(USE_ROCM) && !defined(_MSC_VER) || (defined(USE_ROCM) && defined(HIPBLASLT))
-   cublasCommonArgs args(mat1, mat2, out);
-   const auto out_dtype_ = args.result->scalar_type();
-   TORCH_CHECK(args.transa == 't' && args.transb == 'n', "Only multiplication of row-major and column-major matrices is supported by cuBLASLt");
-@@ -906,7 +908,7 @@ _scaled_mm_out_cuda(const Tensor& mat1, const Tensor& mat2,
-   TORCH_CHECK(false, "_scaled_mm_out_cuda is not compiled for this platform.");
- #endif
- 
--#if defined(USE_ROCM) && ROCM_VERSION >= 60000
-+#if defined(USE_ROCM) && defined(HIPBLASLT)
-   // rocm's hipblaslt does not yet support amax, so calculate separately
-   auto out_float32 = out.to(kFloat);
-   out_float32.abs_();
-diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
-index b7ffbeb07dc..2b6c3678984 100644
---- a/cmake/Dependencies.cmake
-+++ b/cmake/Dependencies.cmake
-@@ -1273,6 +1273,9 @@ if(USE_ROCM)
-     if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "6.0.0")
-       list(APPEND HIP_CXX_FLAGS -DHIPBLAS_V2)
-     endif()
-+    if(hipblast_FOUND)
-+      list(APPEND HIP_CXX_FLAGS -DHIPBLASLT)
-+    endif()
-     if(HIPBLASLT_CUSTOM_DATA_TYPE)
-       list(APPEND HIP_CXX_FLAGS -DHIPBLASLT_CUSTOM_DATA_TYPE)
-     endif()
-diff --git a/cmake/public/LoadHIP.cmake b/cmake/public/LoadHIP.cmake
-index f6ca263c5e5..53eb0b63c1a 100644
---- a/cmake/public/LoadHIP.cmake
-+++ b/cmake/public/LoadHIP.cmake
-@@ -156,7 +156,7 @@ if(HIP_FOUND)
-   find_package_and_print_version(rocblas REQUIRED)
-   find_package_and_print_version(hipblas REQUIRED)
-   if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "5.7.0")
--    find_package_and_print_version(hipblaslt REQUIRED)
-+    find_package_and_print_version(hipblaslt)
-   endif()
-   find_package_and_print_version(miopen REQUIRED)
-   if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "4.1.0")
-@@ -191,7 +191,7 @@ if(HIP_FOUND)
-   # roctx is part of roctracer
-   find_library(ROCM_ROCTX_LIB roctx64 HINTS ${ROCM_PATH}/lib)
- 
--  if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "5.7.0")
-+  if(hipblastlt_FOUND)
-     # check whether hipblaslt is using its own datatype
-     set(file "${PROJECT_BINARY_DIR}/hipblaslt_test_data_type.cc")
-     file(WRITE ${file} ""
--- 
-2.43.2
-
diff --git a/0001-Regenerate-flatbuffer-header.patch b/0001-Regenerate-flatbuffer-header.patch
deleted file mode 100644
index 4eec491..0000000
--- a/0001-Regenerate-flatbuffer-header.patch
+++ /dev/null
@@ -1,39 +0,0 @@
-From 5b8e51b24513fa851eeff42f23d942bde301e321 Mon Sep 17 00:00:00 2001
-From: Tom Rix <trix@redhat.com>
-Date: Fri, 29 Sep 2023 06:19:29 -0700
-Subject: [PATCH] Regenerate flatbuffer header
-
-For this error
-torch/csrc/jit/serialization/mobile_bytecode_generated.h:12:41:
-error: static assertion failed: Non-compatible flatbuffers version included
-   12 |               FLATBUFFERS_VERSION_MINOR == 3 &&
-
-PyTorch is expecting 23.3.3, what f38 has
-Rawhide is at 23.5.26
-
-Regenerate with
-flatc --cpp --gen-mutable --no-prefix --scoped-enums mobile_bytecode.fbs
-
-Signed-off-by: Tom Rix <trix@redhat.com>
----
- torch/csrc/jit/serialization/mobile_bytecode_generated.h | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/torch/csrc/jit/serialization/mobile_bytecode_generated.h b/torch/csrc/jit/serialization/mobile_bytecode_generated.h
-index cffe8bc7a6..83575e4c19 100644
---- a/torch/csrc/jit/serialization/mobile_bytecode_generated.h
-+++ b/torch/csrc/jit/serialization/mobile_bytecode_generated.h
-@@ -9,8 +9,8 @@
- // Ensure the included flatbuffers.h is the same version as when this file was
- // generated, otherwise it may not be compatible.
- static_assert(FLATBUFFERS_VERSION_MAJOR == 23 &&
--              FLATBUFFERS_VERSION_MINOR == 3 &&
--              FLATBUFFERS_VERSION_REVISION == 3,
-+              FLATBUFFERS_VERSION_MINOR == 5 &&
-+              FLATBUFFERS_VERSION_REVISION == 26,
-              "Non-compatible flatbuffers version included");
- 
- namespace torch {
--- 
-2.43.0
-
diff --git a/0001-Stub-in-kineto-ActivityType.patch b/0001-Stub-in-kineto-ActivityType.patch
deleted file mode 100644
index f088645..0000000
--- a/0001-Stub-in-kineto-ActivityType.patch
+++ /dev/null
@@ -1,73 +0,0 @@
-From 3ef82b814179da571b2478f61d4279717ab0b23a Mon Sep 17 00:00:00 2001
-From: Tom Rix <trix@redhat.com>
-Date: Fri, 29 Sep 2023 06:25:23 -0700
-Subject: [PATCH] Stub in kineto ActivityType
-
-There is an error with kineto is not used, the shim still
-requires the ActivityTYpe.h header to get the enum Activity type.
-So cut-n-paste just enough of the header in to do this.
-
-Signed-off-by: Tom Rix <trix@redhat.com>
----
- torch/csrc/profiler/kineto_shim.h | 44 +++++++++++++++++++++++++++++++
- 1 file changed, 44 insertions(+)
-
-diff --git a/torch/csrc/profiler/kineto_shim.h b/torch/csrc/profiler/kineto_shim.h
-index e92cbf003d..68985ab7d0 100644
---- a/torch/csrc/profiler/kineto_shim.h
-+++ b/torch/csrc/profiler/kineto_shim.h
-@@ -12,7 +12,51 @@
- #undef USE_KINETO
- #endif
- 
-+#ifdef USE_KINETO
- #include <ActivityType.h>
-+#else
-+namespace libkineto {
-+// copied from header
-+/*
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-+ * All rights reserved.
-+ *
-+ * This source code is licensed under the BSD-style license found in the
-+ * LICENSE file in the root directory of this source tree.
-+ */
-+
-+// Note : All activity types are not enabled by default. Please add them
-+// at correct position in the enum
-+enum class ActivityType {
-+    // Activity types enabled by default
-+    CPU_OP = 0, // cpu side ops
-+    USER_ANNOTATION,
-+    GPU_USER_ANNOTATION,
-+    GPU_MEMCPY,
-+    GPU_MEMSET,
-+    CONCURRENT_KERNEL, // on-device kernels
-+    EXTERNAL_CORRELATION,
-+    CUDA_RUNTIME, // host side cuda runtime events
-+    CUDA_DRIVER, // host side cuda driver events
-+    CPU_INSTANT_EVENT, // host side point-like events
-+    PYTHON_FUNCTION,
-+    OVERHEAD, // CUPTI induced overhead events sampled from its overhead API.
-+
-+    // Optional Activity types
-+    CUDA_SYNC, // synchronization events between runtime and kernels
-+    GLOW_RUNTIME, // host side glow runtime events
-+    MTIA_RUNTIME, // host side MTIA runtime events
-+    CUDA_PROFILER_RANGE, // CUPTI Profiler range for performance metrics
-+    MTIA_CCP_EVENTS, // MTIA ondevice CCP events
-+    HPU_OP, // HPU host side runtime event
-+    XPU_RUNTIME, // host side xpu runtime events
-+
-+    ENUM_COUNT, // This is to add buffer and not used for any profiling logic. Add your new type before it.
-+    OPTIONAL_ACTIVITY_TYPE_START = CUDA_SYNC,
-+};
-+}
-+
-+#endif
- 
- #include <torch/csrc/Export.h>
- #include <torch/csrc/profiler/api.h>
--- 
-2.43.0
-
diff --git a/0001-can-not-use-with-c-files.patch b/0001-can-not-use-with-c-files.patch
deleted file mode 100644
index 719737c..0000000
--- a/0001-can-not-use-with-c-files.patch
+++ /dev/null
@@ -1,25 +0,0 @@
-From a5dff521691a17701b5a02ec75e84cfe1bf605f7 Mon Sep 17 00:00:00 2001
-From: Tom Rix <trix@redhat.com>
-Date: Sat, 3 Feb 2024 06:41:49 -0500
-Subject: [PATCH] can not use with c files
-
----
- cmake/Dependencies.cmake | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
-index 4dd8042058..5f91f3ffab 100644
---- a/cmake/Dependencies.cmake
-+++ b/cmake/Dependencies.cmake
-@@ -1269,7 +1269,7 @@ if(USE_ROCM)
-     list(APPEND HIP_CXX_FLAGS -Wno-duplicate-decl-specifier)
-     list(APPEND HIP_CXX_FLAGS -DCAFFE2_USE_MIOPEN)
-     list(APPEND HIP_CXX_FLAGS -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_HIP)
--    list(APPEND HIP_CXX_FLAGS -std=c++17)
-+#    list(APPEND HIP_CXX_FLAGS -std=c++17)
-     if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "6.0.0")
-       list(APPEND HIP_CXX_FLAGS -DHIPBLAS_V2)
-     endif()
--- 
-2.43.0
-
diff --git a/0001-cuda-hip-signatures.patch b/0001-cuda-hip-signatures.patch
deleted file mode 100644
index a258737..0000000
--- a/0001-cuda-hip-signatures.patch
+++ /dev/null
@@ -1,42 +0,0 @@
-From 214dc959acc809e1959643272c344ee5335d5a69 Mon Sep 17 00:00:00 2001
-From: Tom Rix <trix@redhat.com>
-Date: Thu, 1 Feb 2024 11:29:47 -0500
-Subject: [PATCH] cuda - hip signatures
-
----
- aten/src/ATen/cuda/detail/LazyNVRTC.cpp | 9 +++++++++
- 1 file changed, 9 insertions(+)
-
-diff --git a/aten/src/ATen/cuda/detail/LazyNVRTC.cpp b/aten/src/ATen/cuda/detail/LazyNVRTC.cpp
-index 1b85e7776e..bb6f88783a 100644
---- a/aten/src/ATen/cuda/detail/LazyNVRTC.cpp
-+++ b/aten/src/ATen/cuda/detail/LazyNVRTC.cpp
-@@ -134,8 +134,13 @@ nvrtcResult nvrtcCreateProgram(nvrtcProgram *prog,
-                                const char *src,
-                                const char *name,
-                                int numHeaders,
-+#if !defined(USE_ROCM)
-                                const char * const *headers,
-                                const char * const *includeNames) {
-+#else
-+                               const char **headers,
-+			       const char **includeNames) {
-+#endif
-   auto fn = reinterpret_cast<decltype(&nvrtcCreateProgram)>(getNVRTCLibrary().sym(__func__));
-   if (!fn)
-     throw std::runtime_error("Can't get nvrtcCreateProgram");
-@@ -150,7 +155,11 @@ NVRTC_STUB2(nvrtcGetPTX, nvrtcProgram, char *);
- NVRTC_STUB2(nvrtcGetCUBINSize, nvrtcProgram, size_t *);
- NVRTC_STUB2(nvrtcGetCUBIN, nvrtcProgram, char *);
- #endif
-+#if !defined(USE_ROCM)
- NVRTC_STUB3(nvrtcCompileProgram, nvrtcProgram, int, const char * const *);
-+#else
-+NVRTC_STUB3(nvrtcCompileProgram, nvrtcProgram, int, const char **);
-+#endif
- _STUB_1(NVRTC, nvrtcGetErrorString, const char *, nvrtcResult);
- NVRTC_STUB2(nvrtcGetProgramLogSize,nvrtcProgram, size_t*);
- NVRTC_STUB2(nvrtcGetProgramLog, nvrtcProgram, char *);
--- 
-2.43.0
-
diff --git a/0001-disable-submodule-search.patch b/0001-disable-submodule-search.patch
deleted file mode 100644
index b830fa6..0000000
--- a/0001-disable-submodule-search.patch
+++ /dev/null
@@ -1,25 +0,0 @@
-From e0b0ea90ecc0dbefc6aef2650e88ba88260935b9 Mon Sep 17 00:00:00 2001
-From: Tom Rix <trix@redhat.com>
-Date: Fri, 29 Sep 2023 17:21:13 -0700
-Subject: [PATCH] disable submodule search
-
----
- setup.py | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/setup.py b/setup.py
-index 0fd886d945..e397df8fb6 100644
---- a/setup.py
-+++ b/setup.py
-@@ -458,7 +458,7 @@ def mirror_files_into_torchgen():
- def build_deps():
-     report("-- Building version " + version)
- 
--    check_submodules()
-+    # check_submodules()
-     check_pydep("yaml", "pyyaml")
- 
-     build_caffe2(
--- 
-2.43.0
-
diff --git a/0001-disable-use-of-aotriton.patch b/0001-disable-use-of-aotriton.patch
deleted file mode 100644
index 34a1704..0000000
--- a/0001-disable-use-of-aotriton.patch
+++ /dev/null
@@ -1,46 +0,0 @@
-From 33d48f71db7530f00dbd8cff281b65aa8b355b2a Mon Sep 17 00:00:00 2001
-From: Tom Rix <trix@redhat.com>
-Date: Tue, 19 Mar 2024 11:32:37 -0400
-Subject: [PATCH] disable use of aotriton
-
----
- aten/src/ATen/native/transformers/cuda/sdp_utils.cpp | 6 ++++++
- 1 file changed, 6 insertions(+)
-
-diff --git a/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp b/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp
-index 96b839820efd..2d3dd0cb4b0f 100644
---- a/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp
-+++ b/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp
-@@ -21,9 +21,11 @@
- #include <cmath>
- #include <functional>
- 
-+#ifdef USE_FLASH_ATTENTION
- #if USE_ROCM
- #include <aotriton/flash.h>
- #endif
-+#endif
- 
- /**
- * Note [SDPA Runtime Dispatch]
-@@ -183,6 +185,7 @@ bool check_sm_version(cudaDeviceProp * dprops) {
- }
- 
- bool check_flash_attention_hardware_support(sdp_params const& params, bool debug) {
-+#ifdef USE_FLASH_ATTENTION
-   // Check that the gpu is capable of running flash attention
-   using sm80 = SMVersion<8, 0>;
-   using sm90 = SMVersion<9, 0>;
-@@ -211,6 +214,9 @@ bool check_flash_attention_hardware_support(sdp_params const& params, bool debug
-   }
- #endif
-   return true;
-+#else
-+  return false;
-+#endif
- }
- 
- bool check_mem_efficient_hardware_support(sdp_params const& params, bool debug) {
--- 
-2.44.0
-
diff --git a/0001-no-third_party-FXdiv.patch b/0001-no-third_party-FXdiv.patch
deleted file mode 100644
index 71404e3..0000000
--- a/0001-no-third_party-FXdiv.patch
+++ /dev/null
@@ -1,54 +0,0 @@
-From b3b307add5724ee5730f161e16594fa702f34a19 Mon Sep 17 00:00:00 2001
-From: Tom Rix <trix@redhat.com>
-Date: Sat, 3 Feb 2024 08:20:28 -0500
-Subject: [PATCH] no third_party FXdiv
-
----
- caffe2/CMakeLists.txt | 24 ++++++++++++------------
- 1 file changed, 12 insertions(+), 12 deletions(-)
-
-diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
-index b2f3adbfae..80a5625c8d 100644
---- a/caffe2/CMakeLists.txt
-+++ b/caffe2/CMakeLists.txt
-@@ -110,15 +110,15 @@ endif()
- # Note: the folders that are being commented out have not been properly
- # addressed yet.
- 
--if(NOT MSVC AND USE_XNNPACK)
--  if(NOT TARGET fxdiv)
--    set(FXDIV_BUILD_TESTS OFF CACHE BOOL "")
--    set(FXDIV_BUILD_BENCHMARKS OFF CACHE BOOL "")
--    add_subdirectory(
--      "${FXDIV_SOURCE_DIR}"
--      "${CMAKE_BINARY_DIR}/FXdiv")
--  endif()
--endif()
-+#if(NOT MSVC AND USE_XNNPACK)
-+#  if(NOT TARGET fxdiv)
-+#    set(FXDIV_BUILD_TESTS OFF CACHE BOOL "")
-+#    set(FXDIV_BUILD_BENCHMARKS OFF CACHE BOOL "")
-+#    add_subdirectory(
-+#      "${FXDIV_SOURCE_DIR}"
-+#      "${CMAKE_BINARY_DIR}/FXdiv")
-+#  endif()
-+#endif()
- 
- add_subdirectory(core)
- add_subdirectory(serialize)
-@@ -1081,9 +1081,9 @@ if(USE_XPU)
-   target_compile_definitions(torch_xpu PRIVATE USE_XPU)
- endif()
- 
--if(NOT MSVC AND USE_XNNPACK)
--  TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv)
--endif()
-+#if(NOT MSVC AND USE_XNNPACK)
-+#  TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv)
-+#endif()
- 
- # ==========================================================
- # formerly-libtorch flags
--- 
-2.43.0
-
diff --git a/0001-no-third_party-fmt.patch b/0001-no-third_party-fmt.patch
deleted file mode 100644
index 6e82af2..0000000
--- a/0001-no-third_party-fmt.patch
+++ /dev/null
@@ -1,65 +0,0 @@
-From 2ce255b75760a0a513fb1706629b416f76a5c822 Mon Sep 17 00:00:00 2001
-From: Tom Rix <trix@redhat.com>
-Date: Sat, 3 Feb 2024 08:16:04 -0500
-Subject: [PATCH] no third_party fmt
-
----
- c10/CMakeLists.txt       | 2 +-
- cmake/Dependencies.cmake | 6 +++---
- torch/CMakeLists.txt     | 2 +-
- 3 files changed, 5 insertions(+), 5 deletions(-)
-
-diff --git a/c10/CMakeLists.txt b/c10/CMakeLists.txt
-index 1f742f4c176..4fa08913bdd 100644
---- a/c10/CMakeLists.txt
-+++ b/c10/CMakeLists.txt
-@@ -87,7 +87,7 @@ endif()
- if(C10_USE_GLOG)
-   target_link_libraries(c10 PUBLIC glog::glog)
- endif()
--target_link_libraries(c10 PRIVATE fmt::fmt-header-only)
-+target_link_libraries(c10 PRIVATE fmt)
- 
- if(C10_USE_NUMA)
-   message(STATUS "NUMA paths:")
-diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
-index 6f5a2d5feff..42fbf80f6e8 100644
---- a/cmake/Dependencies.cmake
-+++ b/cmake/Dependencies.cmake
-@@ -1837,7 +1837,7 @@ endif()
- #
- set(TEMP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS})
- set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libs" FORCE)
--add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)
-+# add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)
- 
- # Disable compiler feature checks for `fmt`.
- #
-@@ -1846,9 +1846,9 @@ add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)
- # CMAKE_CXX_FLAGS in ways that break feature checks. Since we already know
- # `fmt` is compatible with a superset of the compilers that PyTorch is, it
- # shouldn't be too bad to just disable the checks.
--set_target_properties(fmt-header-only PROPERTIES INTERFACE_COMPILE_FEATURES "")
-+# set_target_properties(fmt-header-only PROPERTIES INTERFACE_COMPILE_FEATURES "")
- 
--list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only)
-+# list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only)
- set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS} CACHE BOOL "Build shared libs" FORCE)
- 
- # ---[ Kineto
-diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt
-index 97a72eed55b..9e5014d1980 100644
---- a/torch/CMakeLists.txt
-+++ b/torch/CMakeLists.txt
-@@ -80,7 +80,7 @@ set(TORCH_PYTHON_LINK_LIBRARIES
-     python::python
-     pybind::pybind11
-     shm
--    fmt::fmt-header-only
-+    fmt
-     ATEN_CPU_FILES_GEN_LIB)
- 
- if(USE_ASAN AND TARGET Sanitizer::address)
--- 
-2.43.2
-
diff --git a/0001-no-third_party-foxi.patch b/0001-no-third_party-foxi.patch
deleted file mode 100644
index ba1ec40..0000000
--- a/0001-no-third_party-foxi.patch
+++ /dev/null
@@ -1,36 +0,0 @@
-From 8cb61cf9282102ac225645fcc9fb4a1bb7cb15a2 Mon Sep 17 00:00:00 2001
-From: Tom Rix <trix@redhat.com>
-Date: Sat, 3 Feb 2024 08:11:55 -0500
-Subject: [PATCH] no third_party foxi
-
----
- cmake/Dependencies.cmake | 6 +++---
- 1 file changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
-index 5f91f3ffab..8e1461af81 100644
---- a/cmake/Dependencies.cmake
-+++ b/cmake/Dependencies.cmake
-@@ -1567,7 +1567,7 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX)
-       set_target_properties(onnx_proto PROPERTIES CXX_STANDARD 17)
-     endif()
-   endif()
--  add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/foxi EXCLUDE_FROM_ALL)
-+  # add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/foxi EXCLUDE_FROM_ALL)
- 
-   add_definitions(-DONNX_NAMESPACE=${ONNX_NAMESPACE})
-   if(NOT USE_SYSTEM_ONNX)
-@@ -1600,8 +1600,8 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX)
-     message("-- Found onnx: ${ONNX_LIBRARY} ${ONNX_PROTO_LIBRARY}")
-     list(APPEND Caffe2_DEPENDENCY_LIBS onnx_proto onnx)
-   endif()
--  include_directories(${FOXI_INCLUDE_DIRS})
--  list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader)
-+#  include_directories(${FOXI_INCLUDE_DIRS})
-+#  list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader)
-   # Recover the build shared libs option.
-   set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS})
- endif()
--- 
-2.43.0
-
diff --git a/0001-reenable-foxi-linking.patch b/0001-reenable-foxi-linking.patch
deleted file mode 100644
index 8e39795..0000000
--- a/0001-reenable-foxi-linking.patch
+++ /dev/null
@@ -1,25 +0,0 @@
-From 58ccda271e8f51c3fa5b7518cf6ee52ce204fd37 Mon Sep 17 00:00:00 2001
-From: Tom Rix <trix@redhat.com>
-Date: Thu, 22 Feb 2024 09:28:11 -0500
-Subject: [PATCH] reenable foxi linking
-
----
- cmake/Dependencies.cmake | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
-index 42fbf80f6e8..bc3a2dc6fee 100644
---- a/cmake/Dependencies.cmake
-+++ b/cmake/Dependencies.cmake
-@@ -1604,7 +1604,7 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX)
-     list(APPEND Caffe2_DEPENDENCY_LIBS onnx_proto onnx)
-   endif()
- #  include_directories(${FOXI_INCLUDE_DIRS})
--#  list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader)
-+  list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader)
-   # Recover the build shared libs option.
-   set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS})
- endif()
--- 
-2.43.2
-
diff --git a/0001-silence-an-assert.patch b/0001-silence-an-assert.patch
deleted file mode 100644
index 0b20dcf..0000000
--- a/0001-silence-an-assert.patch
+++ /dev/null
@@ -1,25 +0,0 @@
-From 04dd33db93b852fdfd7ea408813080b2e2026650 Mon Sep 17 00:00:00 2001
-From: Tom Rix <trix@redhat.com>
-Date: Sat, 3 Feb 2024 06:41:20 -0500
-Subject: [PATCH] silence an assert
-
----
- aten/src/ATen/native/cuda/IndexKernel.cu | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/aten/src/ATen/native/cuda/IndexKernel.cu b/aten/src/ATen/native/cuda/IndexKernel.cu
-index 657c0c77b3..b406aa6687 100644
---- a/aten/src/ATen/native/cuda/IndexKernel.cu
-+++ b/aten/src/ATen/native/cuda/IndexKernel.cu
-@@ -249,7 +249,7 @@ void index_put_kernel_quantized_cuda(TensorIterator& iter, const IntArrayRef ind
- 
-     gpu_index_kernel(iter, index_size, index_stride, [inv_scale, zero_point, qmin, qmax]C10_DEVICE(char* const out_data, const char* const in_data, const int64_t offset) {
-       int64_t qvalue = static_cast<int64_t>(zero_point + nearbyintf(*(float*)in_data * inv_scale));
--      qvalue = std::clamp(qvalue, qmin, qmax);
-+      //qvalue = std::clamp(qvalue, qmin, qmax);
-       *(scalar_t*)(out_data + offset) = static_cast<scalar_t>(qvalue);
-     });
-   });
--- 
-2.43.0
-
diff --git a/0001-use-any-hip.patch b/0001-use-any-hip.patch
deleted file mode 100644
index dca86ea..0000000
--- a/0001-use-any-hip.patch
+++ /dev/null
@@ -1,34 +0,0 @@
-From 4248211ce9a9de81bb3ade5d421ba709b19ead08 Mon Sep 17 00:00:00 2001
-From: Tom Rix <trix@redhat.com>
-Date: Sat, 3 Feb 2024 15:01:28 -0500
-Subject: [PATCH] use any hip
-
----
- cmake/public/LoadHIP.cmake | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/cmake/public/LoadHIP.cmake b/cmake/public/LoadHIP.cmake
-index 1abeb06228..28458c4146 100644
---- a/cmake/public/LoadHIP.cmake
-+++ b/cmake/public/LoadHIP.cmake
-@@ -30,7 +30,7 @@ endif()
- message("Building PyTorch for GPU arch: ${PYTORCH_ROCM_ARCH}")
- 
- # Add HIP to the CMAKE Module Path
--set(CMAKE_MODULE_PATH ${ROCM_PATH}/lib/cmake/hip ${CMAKE_MODULE_PATH})
-+set(CMAKE_MODULE_PATH ${ROCM_PATH}/lib64/cmake/hip ${CMAKE_MODULE_PATH})
- 
- macro(find_package_and_print_version PACKAGE_NAME)
-   find_package("${PACKAGE_NAME}" ${ARGN})
-@@ -38,7 +38,7 @@ macro(find_package_and_print_version PACKAGE_NAME)
- endmacro()
- 
- # Find the HIP Package
--find_package_and_print_version(HIP 1.0)
-+find_package_and_print_version(HIP MODULE)
- 
- if(HIP_FOUND)
-   set(PYTORCH_FOUND_HIP TRUE)
--- 
-2.43.0
-
diff --git a/README.NVIDIA b/README.NVIDIA
new file mode 100644
index 0000000..b927f47
--- /dev/null
+++ b/README.NVIDIA
@@ -0,0 +1,15 @@
+Some help for building this package for NVIDIA/CUDA
+
+Review NVIDIA's documenation
+https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html
+
+Review PyTorch documentation
+https://github.com/pytorch/pytorch#from-source
+
+Some convience strings to cut-n-paste
+
+F39
+dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/fedora39/x86_64/cuda-fedora39.repo
+
+Building is local.
+Build machine has a supported GPU, the drivers are loaded and CUDA SDK is installed.
diff --git a/next/0001-Use-horrible-dynamo-stub.patch b/next/0001-Use-horrible-dynamo-stub.patch
new file mode 100644
index 0000000..1900519
--- /dev/null
+++ b/next/0001-Use-horrible-dynamo-stub.patch
@@ -0,0 +1,85 @@
+From fd535f7bf44f2034cca2a66b4cc7d68d962341df Mon Sep 17 00:00:00 2001
+From: Tom Rix <Tom.Rix@amd.com>
+Date: Sun, 20 Jul 2025 12:47:58 -0700
+Subject: [PATCH] Use horrible dynamo stub
+
+Rawhide's update of python is too fast for dynamo
+So paper of the problem with a horrible stub that throws
+runtime exceptions if dynamo is used.
+
+Signed-off-by: Tom Rix <Tom.Rix@amd.com>
+---
+ build_variables.bzl                        | 26 ++++++++++++----------
+ torch/csrc/dynamo/horrible_dynamo_stub.cpp | 16 +++++++++++++
+ 2 files changed, 30 insertions(+), 12 deletions(-)
+ create mode 100644 torch/csrc/dynamo/horrible_dynamo_stub.cpp
+
+diff --git a/build_variables.bzl b/build_variables.bzl
+index b266c80e8843..a3be6893349b 100644
+--- a/build_variables.bzl
++++ b/build_variables.bzl
+@@ -140,7 +140,8 @@ core_trainer_sources = [
+     "torch/csrc/autograd/variable.cpp",
+     "torch/csrc/autograd/utils/warnings.cpp",
+     "torch/csrc/autograd/jit_decomp_interface.cpp",
+-    "torch/csrc/dynamo/compiled_autograd.cpp",
++#    "torch/csrc/dynamo/compiled_autograd.cpp",
++    "torch/csrc/dynamo/horrible_dynamo_stub.cpp",
+     "torch/csrc/jit/frontend/name_mangler.cpp",
+     "torch/csrc/jit/ir/type_hashing.cpp",
+     "torch/csrc/jit/serialization/pickler.cpp",
+@@ -868,17 +869,18 @@ libtorch_python_core_sources = [
+     "torch/csrc/autograd/python_torch_functions_manual.cpp",
+     "torch/csrc/autograd/python_variable.cpp",
+     "torch/csrc/autograd/python_variable_indexing.cpp",
+-    "torch/csrc/dynamo/python_compiled_autograd.cpp",
+-    "torch/csrc/dynamo/cache_entry.cpp",
+-    "torch/csrc/dynamo/cpp_shim.cpp",
+-    "torch/csrc/dynamo/cpython_defs.c",
+-    "torch/csrc/dynamo/eval_frame.c",
+-    "torch/csrc/dynamo/eval_frame_cpp.cpp",
+-    "torch/csrc/dynamo/extra_state.cpp",
+-    "torch/csrc/dynamo/framelocals_mapping.cpp",
+-    "torch/csrc/dynamo/guards.cpp",
+-    "torch/csrc/dynamo/utils.cpp",
+-    "torch/csrc/dynamo/init.cpp",
++#    "torch/csrc/dynamo/python_compiled_autograd.cpp",
++#    "torch/csrc/dynamo/cache_entry.cpp",
++#    "torch/csrc/dynamo/cpp_shim.cpp",
++#    "torch/csrc/dynamo/cpython_defs.c",
++#    "torch/csrc/dynamo/eval_frame.c",
++#    "torch/csrc/dynamo/eval_frame_cpp.cpp",
++#    "torch/csrc/dynamo/extra_state.cpp",
++#    "torch/csrc/dynamo/framelocals_mapping.cpp",
++#    "torch/csrc/dynamo/guards.cpp",
++#    "torch/csrc/dynamo/utils.cpp",
++#    "torch/csrc/dynamo/init.cpp",
++    "torch/csrc/dynamo/horrible_dynamo_stub.cpp",
+     "torch/csrc/functorch/init.cpp",
+     "torch/csrc/fx/node.cpp",
+     "torch/csrc/mps/Module.cpp",
+diff --git a/torch/csrc/dynamo/horrible_dynamo_stub.cpp b/torch/csrc/dynamo/horrible_dynamo_stub.cpp
+new file mode 100644
+index 000000000000..3ac1324d4557
+--- /dev/null
++++ b/torch/csrc/dynamo/horrible_dynamo_stub.cpp
+@@ -0,0 +1,16 @@
++#include <torch/csrc/autograd/engine.h>
++#include <torch/csrc/dynamo/compiled_autograd.h>
++
++namespace torch::dynamo::autograd {
++const std::unique_ptr<PyCompilerInterface>& getPyCompilerInterface() {
++  throw std::runtime_error("Dynamo not supported");
++  return nullptr;
++}
++std::vector<std::optional<InputMetadata>> get_input_metadata(
++    const edge_list& edges) {
++  std::vector<std::optional<InputMetadata>> r;
++  throw std::runtime_error("Dynamo not supported");
++  return r;
++}
++
++}
+-- 
+2.49.0
+
diff --git a/next/pyproject.toml b/next/pyproject.toml
deleted file mode 100644
index 9508ad0..0000000
--- a/next/pyproject.toml
+++ /dev/null
@@ -1,154 +0,0 @@
-[build-system]
-requires = [
-    "setuptools",
-    "wheel",
-    "astunparse",
-    "numpy",
-    "ninja",
-    "pyyaml",
-    "cmake",
-    "typing-extensions",
-    "requests",
-]
-# Use legacy backend to import local packages in setup.py
-build-backend = "setuptools.build_meta:__legacy__"
-
-
-[tool.black]
-# Uncomment if pyproject.toml worked fine to ensure consistency with flake8
-# line-length = 120
-target-version = ["py38", "py39", "py310", "py311"]
-
-
-[tool.ruff]
-target-version = "py38"
-
-# NOTE: Synchoronize the ignores with .flake8
-ignore = [
-    # these ignores are from flake8-bugbear; please fix!
-    "B007", "B008", "B017",
-    "B018", # Useless expression
-    "B019",
-    "B023",
-    "B028", # No explicit `stacklevel` keyword argument found
-    "B904",
-    "E402",
-    "C408", # C408 ignored because we like the dict keyword argument syntax
-    "E501", # E501 is not flexible enough, we're using B950 instead
-    "E721",
-    "E731", # Assign lambda expression
-    "E741",
-    "EXE001",
-    "F405",
-    "F841",
-    # these ignores are from flake8-logging-format; please fix!
-    "G101",
-    # these ignores are from ruff NPY; please fix!
-    "NPY002",
-    # these ignores are from ruff PERF; please fix!
-    "PERF203",
-    "PERF401",
-    "PERF403",
-    # these ignores are from PYI; please fix!
-    "PYI019",
-    "PYI024",
-    "PYI036",
-    "PYI041",
-    "PYI056",
-    "SIM102", "SIM103", "SIM112", # flake8-simplify code styles
-    "SIM105", # these ignores are from flake8-simplify. please fix or ignore with commented reason
-    "SIM108",
-    "SIM110",
-    "SIM114", # Combine `if` branches using logical `or` operator
-    "SIM115",
-    "SIM116", # Disable Use a dictionary instead of consecutive `if` statements
-    "SIM117",
-    "SIM118",
-    "UP006", # keep-runtime-typing
-    "UP007", # keep-runtime-typing
-]
-line-length = 120
-select = [
-    "B",
-    "C4",
-    "G",
-    "E",
-    "EXE",
-    "F",
-    "SIM1",
-    "W",
-    # Not included in flake8
-    "NPY",
-    "PERF",
-    "PGH004",
-    "PIE794",
-    "PIE800",
-    "PIE804",
-    "PIE807",
-    "PIE810",
-    "PLC0131", # type bivariance
-    "PLC0132", # type param mismatch
-    "PLC0205", # string as __slots__
-    "PLE",
-    "PLR0133", # constant comparison
-    "PLR0206", # property with params
-    "PLR1722", # use sys exit
-    "PLW0129", # assert on string literal
-    "PLW0406", # import self
-    "PLW0711", # binary op exception
-    "PLW1509", # preexec_fn not safe with threads
-    "PLW3301", # nested min max
-    "PT006", # TODO: enable more PT rules
-    "PT022",
-    "PT023",
-    "PT024",
-    "PT025",
-    "PT026",
-    "PYI",
-    "RUF008", # mutable dataclass default
-    "RUF015", # access first ele in constant time
-    "RUF016", # type error non-integer index
-    "RUF017",
-    "TRY200",
-    "TRY302",
-    "UP",
-]
-
-[tool.ruff.per-file-ignores]
-"__init__.py" = [
-    "F401",
-]
-"test/typing/reveal/**" = [
-    "F821",
-]
-"test/torch_np/numpy_tests/**" = [
-    "F821",
-]
-"test/jit/**" = [
-    "PLR0133", # tests require this for JIT
-    "PYI",
-    "RUF015",
-    "UP", # We don't want to modify the jit test as they test specify syntax
-]
-"test/test_jit.py" = [
-    "PLR0133", # tests require this for JIT
-    "PYI",
-    "RUF015",
-    "UP", # We don't want to modify the jit test as they test specify syntax
-]
-
-"torch/onnx/**" = [
-    "UP037", # ONNX does runtime type checking
-]
-
-"torchgen/api/types/__init__.py" = [
-    "F401",
-    "F403",
-]
-"torchgen/executorch/api/types/__init__.py" = [
-    "F401",
-    "F403",
-]
-"torch/utils/collect_env.py" = [
-    "UP", # collect_env.py needs to work with older versions of Python
-]
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..925742b
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,353 @@
+# Package ######################################################################
+
+[build-system]
+requires = [
+    # 70.1.0: min version for integrated bdist_wheel command from wheel package
+    # 77.0.0: min version for SPDX expression support for project.license
+    "setuptools>=70.1.0,<80.0",
+    "cmake>=3.27",
+    "ninja",
+    "numpy",
+    "packaging",
+    "pyyaml",
+    "requests",
+    "six",  # dependency chain: NNPACK -> PeachPy -> six
+    "typing-extensions>=4.10.0",
+]
+build-backend = "setuptools.build_meta"
+
+[dependency-groups]
+dev = [
+    # This list should be kept in sync with the requirements-build.txt
+    # in PyTorch root until the project fully migrates to pyproject.toml
+    # after which this can be removed as it is already specified in the
+    # [build-system] section
+    "setuptools>=70.1.0,<80.0",  # setuptools develop deprecated on 80.0
+    "cmake>=3.27",
+    "ninja",
+    "numpy",
+    "packaging",
+    "pyyaml",
+    "requests",
+    "six",  # dependency chain: NNPACK -> PeachPy -> six
+    "typing-extensions>=4.10.0",
+
+    # This list should be kept in sync with the requirements.txt in
+    # PyTorch root until the project fully migrates to pyproject.toml
+    "build[uv]",
+    "expecttest>=0.3.0",
+    "filelock",
+    "fsspec>=0.8.5",
+    "hypothesis",
+    "jinja2",
+    "lintrunner; platform_machine != 's390x' and platform_machine != 'riscv64'",
+    "networkx>=2.5.1",
+    "optree>=0.13.0",
+    "psutil",
+    "sympy>=1.13.3",
+    "typing-extensions>=4.13.2",
+    "wheel",
+]
+
+[project]
+name = "torch"
+description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration"
+readme = "README.md"
+requires-python = ">=3.10"
+# TODO: change to `license = "BSD-3-Clause"` and enable PEP 639 after pinning setuptools>=77
+# FIXME: As of 2025.06.20, it is hard to ensure the minimum version of setuptools in our CI environment.
+# TOML-table-based license deprecated in setuptools>=77, and the deprecation warning will be changed
+# to an error on 2026.02.18. See also: https://github.com/pypa/setuptools/issues/4903
+license = { text = "BSD-3-Clause" }
+authors = [{ name = "PyTorch Team", email = "packages@pytorch.org" }]
+keywords = ["pytorch", "machine learning"]
+classifiers = [
+    "Development Status :: 5 - Production/Stable",
+    "Intended Audience :: Developers",
+    "Intended Audience :: Education",
+    "Intended Audience :: Science/Research",
+    "Topic :: Scientific/Engineering",
+    "Topic :: Scientific/Engineering :: Mathematics",
+    "Topic :: Scientific/Engineering :: Artificial Intelligence",
+    "Topic :: Software Development",
+    "Topic :: Software Development :: Libraries",
+    "Topic :: Software Development :: Libraries :: Python Modules",
+    "Programming Language :: C++",
+    "Programming Language :: Python :: 3 :: Only",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+    "Programming Language :: Python :: 3.14",
+]
+dynamic = [
+    "entry-points",
+    "dependencies",
+    "scripts",
+    "version",
+]
+
+[project.urls]
+Homepage = "https://pytorch.org"
+Repository = "https://github.com/pytorch/pytorch"
+Documentation = "https://pytorch.org/docs"
+"Issue Tracker" = "https://github.com/pytorch/pytorch/issues"
+Forum = "https://discuss.pytorch.org"
+
+[project.optional-dependencies]
+optree = ["optree>=0.13.0"]
+opt-einsum = ["opt-einsum>=3.3"]
+pyyaml = ["pyyaml"]
+
+# Linter tools #################################################################
+
+[tool.isort]
+src_paths = ["caffe2", "torch", "torchgen", "functorch", "test"]
+extra_standard_library = ["typing_extensions"]
+skip_gitignore = true
+skip_glob = ["third_party/*"]
+atomic = true
+profile = "black"
+indent = 4
+line_length = 88
+lines_after_imports = 2
+multi_line_output = 3
+include_trailing_comma = true
+combine_as_imports = true
+
+[tool.usort.known]
+first_party = ["caffe2", "torch", "torchgen", "functorch", "test"]
+standard_library = ["typing_extensions"]
+
+[tool.ruff]
+line-length = 88
+src = ["caffe2", "torch", "torchgen", "functorch", "test"]
+
+[tool.ruff.format]
+docstring-code-format = true
+quote-style = "double"
+
+[tool.ruff.lint]
+# NOTE: Synchoronize the ignores with .flake8
+external = [
+    "B001",
+    "B902",
+    "B950",
+    "E121",
+    "E122",
+    "E128",
+    "E131",
+    "E704",
+    "E723",
+    "F723",
+    "F812",
+    "P201",
+    "P204",
+    "T484",
+    "TOR901",
+]
+ignore = [
+    # these ignores are from flake8-bugbear; please fix!
+    "B007", "B008", "B017",
+    "B018", # Useless expression
+    "B023",
+    "B028", # No explicit `stacklevel` keyword argument found
+    "E402",
+    "C408", # C408 ignored because we like the dict keyword argument syntax
+    "E501", # E501 is not flexible enough, we're using B950 instead
+    "E721",
+    "E741",
+    "EXE001",
+    "F405",
+    "FURB122", # writelines
+    # these ignores are from flake8-logging-format; please fix!
+    "G101",
+    # these ignores are from ruff NPY; please fix!
+    "NPY002",
+    # these ignores are from ruff PERF; please fix!
+    "PERF203",
+    "PERF401",
+    # these ignores are from PYI; please fix!
+    "PYI024",
+    "PYI036",
+    "PYI041",
+    "PYI056",
+    "SIM102", "SIM103", "SIM112", # flake8-simplify code styles
+    "SIM105", # these ignores are from flake8-simplify. please fix or ignore with commented reason
+    "SIM108", # SIM108 ignored because we prefer if-else-block instead of ternary expression
+    "SIM110",
+    "SIM114", # Combine `if` branches using logical `or` operator
+    "SIM115",
+    "SIM116", # Disable Use a dictionary instead of consecutive `if` statements
+    "SIM117",
+    "SIM118",
+    "UP007", # keep-runtime-typing
+    "UP045", # keep-runtime-typing
+    "TC006",
+    # TODO: Remove Python-3.10 specific suppressions
+    "B905",
+    "UP035",
+    "UP036",
+    "UP038",
+    "UP041",
+    "FURB161",
+]
+select = [
+    "B",
+    "B904", # Re-raised error without specifying the cause via the from keyword
+    "C4",
+    "G",
+    "E",
+    "EXE",
+    "F",
+    "SIM1",
+    "SIM911",
+    "W",
+    # Not included in flake8
+    "FURB",
+    "LOG",
+    "NPY",
+    "PERF",
+    "PGH004",
+    "PIE790",
+    "PIE794",
+    "PIE800",
+    "PIE804",
+    "PIE807",
+    "PIE810",
+    "PLC0131", # type bivariance
+    "PLC0132", # type param mismatch
+    "PLC0205", # string as __slots__
+    "PLC3002", # unnecessary-direct-lambda-call
+    "PLE",
+    "PLR0133", # constant comparison
+    "PLR0206", # property with params
+    "PLR1722", # use sys exit
+    "PLR1736", # unnecessary list index
+    "PLW0129", # assert on string literal
+    "PLW0131", # named expr without context
+    "PLW0133", # useless exception statement
+    "PLW0245", # super without brackets
+    "PLW0406", # import self
+    "PLW0711", # binary op exception
+    "PLW1501", # bad open mode
+    "PLW1507", # shallow copy os.environ
+    "PLW1509", # preexec_fn not safe with threads
+    "PLW2101", # useless lock statement
+    "PLW3301", # nested min max
+    "PT006", # TODO: enable more PT rules
+    "PT014", # duplicate parameterize case
+    "PT022",
+    "PT023",
+    "PT024",
+    "PT025",
+    "PT026",
+    "PYI",
+    "Q003",  # avoidable escaped quote
+    "Q004",  # unnecessary escaped quote
+    "RSE",
+    "RUF008", # mutable dataclass default
+    "RUF013", # ban implicit optional
+    "RUF015", # access first ele in constant time
+    "RUF016", # type error non-integer index
+    "RUF017",
+    "RUF018", # no assignment in assert
+    "RUF019", # unnecessary-key-check
+    "RUF020", # never union
+    "RUF024", # from keys mutable
+    "RUF026", # default factory kwarg
+    "RUF030", # No print statement in assert
+    "RUF033", # default values __post_init__ dataclass
+    "RUF041", # simplify nested Literal
+    "RUF048", # properly parse `__version__`
+    "RUF200", # validate pyproject.toml
+    "S324", # for hashlib FIPS compliance
+    "SLOT",
+    "TC",
+    "TRY002", # ban vanilla raise (todo fix NOQAs)
+    "TRY203",
+    "TRY401", # verbose-log-message
+    "UP",
+    "YTT",
+]
+
+[tool.ruff.lint.pyupgrade]
+# Preserve types, even if a file imports `from __future__ import annotations`.
+keep-runtime-typing = true
+
+[tool.ruff.lint.per-file-ignores]
+"__init__.py" = [
+    "F401",
+]
+"*.pyi" = [
+    "PYI011", # typed-argument-default-in-stub
+    "PYI021", # docstring-in-stub
+    "PYI053", # string-or-bytes-too-long
+]
+"functorch/notebooks/**" = [
+    "F401",
+]
+"test/export/**" = [
+    "PGH004"
+]
+"test/typing/**" = [
+    "PGH004"
+]
+"test/typing/reveal/**" = [
+    "F821",
+]
+"test/torch_np/numpy_tests/**" = [
+    "F821",
+    "NPY201",
+]
+"test/dynamo/test_bytecode_utils.py" = [
+    "F821",
+]
+"test/dynamo/test_debug_utils.py" = [
+    "UP037",
+]
+"test/dynamo/test_misc.py" = [
+    "PGH004",
+]
+"test/jit/**" = [
+    "PLR0133", # tests require this for JIT
+    "PYI",
+    "RUF015",
+    "UP", # We don't want to modify the jit test as they test specify syntax
+]
+"test/test_jit.py" = [
+    "PLR0133", # tests require this for JIT
+    "PYI",
+    "RUF015",
+    "UP", # We don't want to modify the jit test as they test specify syntax
+]
+"test/inductor/s429861_repro.py" = [
+    "PGH004",
+]
+"test/inductor/test_torchinductor.py" = [
+    "UP037",
+]
+# autogenerated #TODO figure out why file level noqa is ignored
+"torch/_appdirs.py" = ["PGH004"]
+"torch/jit/_shape_functions.py" = ["PGH004"]
+"torch/_inductor/fx_passes/serialized_patterns/**" = ["F401", "F501"]
+"torch/_inductor/autoheuristic/artifacts/**" = ["F401", "F501"]
+"torch/_inductor/codegen/**" = [
+    "PGH004"
+]
+"torchgen/api/types/__init__.py" = [
+    "F401",
+    "F403",
+]
+"torch/utils/collect_env.py" = [
+    "UP", # collect_env.py needs to work with older versions of Python
+]
+"torch/_vendor/**" = [
+    "UP", # No need to mess with _vendor
+]
+"tools/linter/**" = [
+    "LOG015" # please fix
+]
+
+[tool.codespell]
+ignore-words = "tools/linter/dictionary.txt"
diff --git a/python-torch.spec b/python-torch.spec
index 07a47eb..d3c31d7 100644
--- a/python-torch.spec
+++ b/python-torch.spec
@@ -4,18 +4,23 @@
 %global forgeurl https://github.com/pytorch/pytorch
 
 # So pre releases can be tried
-%bcond_without gitcommit
+%bcond_with gitcommit
 %if %{with gitcommit}
-# git tag v2.3.0-rc2
-%global commit0 6a89a753b1556fe8558582c452fdba083f6ec01a
+# v2.9.0-rc9
+%global commit0 0fabc3ba44823f257e70ce397d989c8de5e362c1
 %global shortcommit0 %(c=%{commit0}; echo ${c:0:7})
-%global date0 20240313
+%global date0 20251008
+%global pypi_version 2.9.0
+%global flatbuffers_version 24.12.23
+%global miniz_version 3.0.2
+%global pybind11_version 2.13.6
+%global rc_tag -rc9
 %else
-%global commit0 975d4284250170602db60adfda5eb1664a3b8acc
-%global shortcommit0 %(c=%{commit0}; echo ${c:0:7})
-%global date0 20240307
+%global pypi_version 2.9.1
+%global flatbuffers_version 24.12.23
+%global miniz_version 3.0.2
+%global pybind11_version 2.13.6
 %endif
-%global pypi_version 2.3.0
 
 # For -test subpackage
 # suitable only for local testing
@@ -25,39 +30,37 @@
 %bcond_with test
 
 %ifarch x86_64
-%if 0%{?fedora}
 %bcond_without rocm
-%else
-%bcond_with rocm
 %endif
-%endif
-# hipblaslt is in development
-%bcond_with hipblaslt
-# Which families gpu build for
-%global rocm_gpu_list gfx8 gfx9 gfx10 gfx11
-%global rocm_default_gpu default
-%bcond_without rocm_loop
 
-# For testing caffe2
+# For testing distributed+rccl etc.
+%bcond_without rccl
+%bcond_with gloo
+%bcond_without mpi
+%bcond_without tensorpipe
+
+# Disable dwz with rocm because memory can be exhausted
+%if %{with rocm}
+%define _find_debuginfo_dwz_opts %{nil}
+%endif
+
+# These came in 2.4 and not yet in Fedora
+%bcond_with opentelemetry
+%bcond_with httplib
+%bcond_with kineto
+
 %if 0%{?fedora}
-%bcond_without caffe2
+%bcond_without onnx
 %else
-%bcond_with caffe2
-%endif
-
-# For testing distributed
-%bcond_with distributed
-
-# For testing openvs
-%bcond_with opencv
-
-# For testing cuda
-%ifarch x86_64
-%bcond_with cuda
+%bcond_with onnx
 %endif
 
 Name:           python-%{pypi_name}
+%if %{with gitcommit}
 Version:        %{pypi_version}^git%{date0}.%{shortcommit0}
+%else
+Version:        %{pypi_version}
+%endif
 Release:        %autorelease
 Summary:        PyTorch AI/ML framework
 # See license.txt for license details
@@ -66,79 +69,94 @@ License:        BSD-3-Clause AND BSD-2-Clause AND 0BSD AND Apache-2.0 AND MIT AN
 URL:            https://pytorch.org/
 %if %{with gitcommit}
 Source0:        %{forgeurl}/archive/%{commit0}/pytorch-%{shortcommit0}.tar.gz
-Source100:        pyproject.toml
+Source1000:     pyproject.toml
 %else
-Source0:        %{forgeurl}/archive/%{commit0}/pytorch-%{shortcommit0}.tar.gz
-Source100:        pyproject.toml
+Source0:        %{forgeurl}/releases/download/v%{version}/pytorch-v%{version}.tar.gz
 %endif
-Source1:        https://github.com/google/flatbuffers/archive/refs/tags/v23.3.3.tar.gz
-Source2:        https://github.com/pybind/pybind11/archive/refs/tags/v2.11.1.tar.gz
+Source1:        https://github.com/google/flatbuffers/archive/refs/tags/v%{flatbuffers_version}.tar.gz
+Source2:        https://github.com/pybind/pybind11/archive/refs/tags/v%{pybind11_version}.tar.gz
 
-%if %{with cuda}
-%global cuf_ver 1.1.2
-Source10:       https://github.com/NVIDIA/cudnn-frontend/archive/refs/tags/v%{cuf_ver}.tar.gz
-%global cul_ver 3.4.1
-Source11:       https://github.com/NVIDIA/cutlass/archive/refs/tags/v%{cul_ver}.tar.gz
+# Developement on tensorpipe has stopped, repo made read only July 1, 2023, this is the last commit
+%global tp_commit 52791a2fd214b2a9dc5759d36725909c1daa7f2e
+%global tp_scommit %(c=%{tp_commit}; echo ${c:0:7})
+Source20:       https://github.com/pytorch/tensorpipe/archive/%{tp_commit}/tensorpipe-%{tp_scommit}.tar.gz
+# The old libuv tensorpipe uses
+Source21:       https://github.com/libuv/libuv/archive/refs/tags/v1.41.0.tar.gz
+# Developement afaik on libnop has stopped, this is the last commit
+%global nop_commit 910b55815be16109f04f4180e9adee14fb4ce281
+%global nop_scommit %(c=%{nop_commit}; echo ${c:0:7})
+Source22:       https://github.com/google/libnop/archive/%{nop_commit}/libnop-%{nop_scommit}.tar.gz
+
+%if %{without opentelemetry}
+%global ot_ver 1.14.2
+Source60:       https://github.com/open-telemetry/opentelemetry-cpp/archive/refs/tags/v%{ot_ver}.tar.gz
 %endif
 
-Patch0:        0001-no-third_party-foxi.patch
-Patch1:        0001-no-third_party-fmt.patch
-Patch2:        0001-no-third_party-FXdiv.patch
-Patch3:        0001-Stub-in-kineto-ActivityType.patch
-Patch5:        0001-disable-submodule-search.patch
-
-%if %{with caffe2}
-Patch6:        0001-reenable-foxi-linking.patch
+%if %{without httplib}
+%global hl_commit 3b6597bba913d51161383657829b7e644e59c006
+%global hl_scommit %(c=%{hl_commit}; echo ${c:0:7})
+Source70:       https://github.com/yhirose/cpp-httplib/archive/%{hl_commit}/cpp-httplib-%{hl_scommit}.tar.gz
 %endif
 
-%if %{with rocm}
-# https://github.com/pytorch/pytorch/pull/120551
-Patch100:      0001-Optionally-use-hipblaslt.patch
-Patch101:      0001-cuda-hip-signatures.patch
-Patch102:      0001-silence-an-assert.patch
-Patch103:      0001-can-not-use-with-c-files.patch
-Patch104:      0001-use-any-hip.patch
-Patch105:      0001-disable-use-of-aotriton.patch
+%if %{without kineto}
+%global ki_commit 5e7501833f1021ce6f618572d3baf657b6319658
+%global ki_scommit %(c=%{ki_commit}; echo ${c:0:7})
+Source80:       https://github.com/pytorch/kineto/archive/%{ki_commit}/kineto-%{ki_scommit}.tar.gz
 %endif
 
-ExclusiveArch:  x86_64 aarch64
+%global ox_ver 1.18.0
+Source90:       https://github.com/onnx/onnx/archive/refs/tags/v%{ox_ver}.tar.gz
+
+%global pt_arches x86_64 aarch64
+ExclusiveArch:  %pt_arches
 %global toolchain gcc
 %global _lto_cflags %nil
 
 BuildRequires:  cmake
-BuildRequires:  cpuinfo-devel
 BuildRequires:  eigen3-devel
+BuildRequires:  flexiblas-devel
 BuildRequires:  fmt-devel
 BuildRequires:  foxi-devel
-BuildRequires:  FP16-devel
-BuildRequires:  fxdiv-devel
 BuildRequires:  gcc-c++
 BuildRequires:  gcc-gfortran
-%if %{with distributed}
+
+%if %{with gloo}
 BuildRequires:  gloo-devel
 %endif
-BuildRequires:  ninja-build
-BuildRequires:  onnx-devel
+BuildRequires:  json-devel
+
 BuildRequires:  libomp-devel
-BuildRequires:  openblas-devel
-BuildRequires:  pocketfft-devel
+BuildRequires:  moodycamel-concurrentqueue-devel
+BuildRequires:  numactl-devel
+BuildRequires:  ninja-build
+%if %{with onnx}
+BuildRequires:  onnx-devel
+%endif
+%if %{with mpi}
+BuildRequires:  openmpi-devel
+%endif
 BuildRequires:  protobuf-devel
-BuildRequires:  pthreadpool-devel
-BuildRequires:  psimd-devel
-BuildRequires:  python3-numpy
-BuildRequires:  python3-pyyaml
-BuildRequires:  python3-typing-extensions
 BuildRequires:  sleef-devel
 BuildRequires:  valgrind-devel
-BuildRequires:  xnnpack-devel = 0.0^git20240229.fcbf55a
+BuildRequires:  pocketfft-devel
+BuildRequires:  pthreadpool-devel
+
+BuildRequires:  cpuinfo-devel
+BuildRequires:  FP16-devel
+BuildRequires:  fxdiv-devel
+BuildRequires:  psimd-devel
+BuildRequires:  xnnpack-devel = 0.0^git20240814.312eb7e
 
 BuildRequires:  python3-devel
 BuildRequires:  python3dist(filelock)
 BuildRequires:  python3dist(jinja2)
 BuildRequires:  python3dist(networkx)
+BuildRequires:  python3dist(numpy)
+BuildRequires:  python3dist(pip)
+BuildRequires:  python3dist(pyyaml)
 BuildRequires:  python3dist(setuptools)
-BuildRequires:  python3dist(typing-extensions)
 BuildRequires:  python3dist(sphinx)
+BuildRequires:  python3dist(typing-extensions)
 
 %if 0%{?fedora}
 BuildRequires:  python3-pybind11
@@ -148,50 +166,47 @@ BuildRequires:  python3dist(sympy)
 
 %if %{with rocm}
 BuildRequires:  hipblas-devel
-%if %{with hipblaslt}
 BuildRequires:  hipblaslt-devel
-%endif
 BuildRequires:  hipcub-devel
 BuildRequires:  hipfft-devel
 BuildRequires:  hiprand-devel
 BuildRequires:  hipsparse-devel
+BuildRequires:  hipsparselt-devel
 BuildRequires:  hipsolver-devel
+# Magma is broken on ROCm 7
+# BuildRequires:  magma-devel
 BuildRequires:  miopen-devel
 BuildRequires:  rocblas-devel
 BuildRequires:  rocrand-devel
 BuildRequires:  rocfft-devel
-%if %{with distributed}
+%if %{with rccl}
 BuildRequires:  rccl-devel
 %endif
 BuildRequires:  rocprim-devel
 BuildRequires:  rocm-cmake
 BuildRequires:  rocm-comgr-devel
+BuildRequires:  rocm-compilersupport-macros
 BuildRequires:  rocm-core-devel
 BuildRequires:  rocm-hip-devel
 BuildRequires:  rocm-runtime-devel
 BuildRequires:  rocm-rpm-macros
-BuildRequires:  rocm-rpm-macros-modules
+BuildRequires:  rocsolver-devel
+BuildRequires:  rocm-smi-devel
 BuildRequires:  rocthrust-devel
 BuildRequires:  roctracer-devel
 
-Requires:       rocm-rpm-macros-modules
-%endif
+Requires:       amdsmi
 
-%if %{with opencv}
-BuildRequires:  opencv-devel
 %endif
 
-
 %if %{with test}
 BuildRequires:  google-benchmark-devel
 %endif
 
-# Apache-2.0
-Provides:       bundled(flatbuffers) = 22.3.3
-# MIT
-Provides:       bundled(miniz) = 2.1.0
-Provides:       bundled(pybind11) = 2.11.1
+Requires:       python3dist(dill)
+Requires:       python3dist(yaml)
 
+Obsoletes:      caffe  = 1.0^git20200212.9b89154
 
 %description
 PyTorch is a Python package that provides two high-level features:
@@ -205,6 +220,24 @@ and Cython to extend PyTorch when needed.
 %package -n     python3-%{pypi_name}
 Summary:        %{summary}
 
+# For convience
+Provides:       pytorch
+
+# Apache-2.0
+Provides:       bundled(flatbuffers) = %{flatbuffers_version}
+# MIT
+Provides:       bundled(miniz) = %{miniz_version}
+Provides:       bundled(pybind11) = %{pybind11_version}
+
+%if %{with tensorpipe}
+# BSD-3-Clause
+Provides:       bundled(tensorpipe)
+# Apache-2.0
+Provides:       bundled(libnop)
+# MIT AND CC-BY-4.0 AND ISC AND BSD-2-Clause
+Provides:       bundled(libuv) = 1.41.0
+%endif
+
 %description -n python3-%{pypi_name}
 PyTorch is a Python package that provides two high-level features:
 
@@ -214,22 +247,6 @@ PyTorch is a Python package that provides two high-level features:
 You can reuse your favorite Python packages such as NumPy, SciPy,
 and Cython to extend PyTorch when needed.
 
-%package -n python3-%{pypi_name}-devel
-Summary:        Libraries and headers for %{name}
-Requires:       python3-%{pypi_name}%{?_isa} = %{version}-%{release}
-
-%description -n python3-%{pypi_name}-devel
-%{summary}
-
-%if %{with rocm}
-%package -n python3-%{pypi_name}-rocm
-Summary:        %{name} for ROCm
-Requires:       python3-%{pypi_name}%{?_isa} = %{version}-%{release}
-
-%description -n python3-%{pypi_name}-rocm
-%{summary}
-%endif
-
 %if %{with test}
 %package -n python3-%{pypi_name}-test
 Summary:        Tests for %{name}
@@ -242,44 +259,120 @@ Requires:       python3-%{pypi_name}%{?_isa} = %{version}-%{release}
 
 %prep
 
+%if %{with gitcommit}
 %autosetup -p1 -n pytorch-%{commit0}
+# Overwrite with a git checkout of the pyproject.toml
+cp %{SOURCE1000} .
+
+%else
+%autosetup -p1 -n pytorch-v%{version}
+%endif
 
 # Remove bundled egg-info
 rm -rf %{pypi_name}.egg-info
-# Overwrite with a git checkout of the pyproject.toml
-cp %{SOURCE100} .
 
 tar xf %{SOURCE1}
-cp -r flatbuffers-23.3.3/* third_party/flatbuffers/
+rm -rf third_party/flatbuffers/*
+cp -r flatbuffers-%{flatbuffers_version}/* third_party/flatbuffers/
 
 tar xf %{SOURCE2}
-cp -r pybind11-2.11.1/* third_party/pybind11/
+rm -rf third_party/pybind11/*
+cp -r pybind11-%{pybind11_version}/* third_party/pybind11/
 
-%if %{with cuda}
-tar xf %{SOURCE10}
-cp -r cudnn-frontend-%{cuf_ver}/* third_party/cudnn_frontend/
-tar xf %{SOURCE11}
-cp -r cutlass-%{cul_ver}/* third_party/cutlass/
+%if %{with tensorpipe}
+tar xf %{SOURCE20}
+rm -rf third_party/tensorpipe/*
+cp -r tensorpipe-*/* third_party/tensorpipe/
+tar xf %{SOURCE21}
+rm -rf third_party/tensorpipe/third_party/libuv/*
+cp -r libuv-*/* third_party/tensorpipe/third_party/libuv/
+tar xf %{SOURCE22}
+rm -rf third_party/tensorpipe/third_party/libnop/*
+cp -r libnop-*/* third_party/tensorpipe/third_party/libnop/
+
+# gcc 15 include cstdint
+sed -i '/#include <tensorpipe.*/a#include <cstdint>' third_party/tensorpipe/tensorpipe/common/allocator.h
+sed -i '/#include <tensorpipe.*/a#include <cstdint>' third_party/tensorpipe/tensorpipe/common/memory.h
 %endif
 
-%if %{with opencv}
-# Reduce requirements, *FOUND is not set 
-sed -i -e 's/USE_OPENCV AND OpenCV_FOUND AND USE_FFMPEG AND FFMPEG_FOUND/USE_OPENCV AND USE_FFMPEG/' caffe2/video/CMakeLists.txt
-sed -i -e 's/USE_OPENCV AND OpenCV_FOUND/USE_OPENCV/' caffe2/image/CMakeLists.txt
-sed -i -e 's/STATUS/FATAL/' caffe2/image/CMakeLists.txt
+%if %{without opentelemtry}
+tar xf %{SOURCE60}
+rm -rf third_party/opentelemetry-cpp/*
+cp -r opentelemetry-cpp-*/* third_party/opentelemetry-cpp/
 %endif
 
+%if %{without httplib}
+tar xf %{SOURCE70}
+rm -rf third_party/cpp-httplib/*
+cp -r cpp-httplib-*/* third_party/cpp-httplib/
+%endif
+
+%if %{without kineto}
+tar xf %{SOURCE80}
+rm -rf third_party/kineto/*
+cp -r kineto-*/* third_party/kineto/
+%endif
+
+%if %{without onnx}
+tar xf %{SOURCE90}
+rm -rf third_party/onnx/*
+cp -r onnx-*/* third_party/onnx/
+%endif
+
+# Adjust for the hipblaslt's we build
+sed -i -e 's@"gfx90a", "gfx940", "gfx941", "gfx942"@"gfx90a", "gfx1103", "gfx1150", "gfx1151", "gfx1100", "gfx1101", "gfx1200", "gfx1201"@' aten/src/ATen/native/cuda/Blas.cpp
+
 %if 0%{?rhel}
 # In RHEL but too old
 sed -i -e '/typing-extensions/d' setup.py
 # Need to pip these
 sed -i -e '/sympy/d' setup.py
 sed -i -e '/fsspec/d' setup.py
+%else
+# for 2.5.0
+sed -i -e 's@sympy==1.13.1@sympy>=1.13.1@' setup.py
 %endif
 
 # A new dependency
 # Connected to USE_FLASH_ATTENTION, since this is off, do not need it
 sed -i -e '/aotriton.cmake/d' cmake/Dependencies.cmake
+# Compress hip
+sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc --offload-compress@' cmake/Dependencies.cmake
+# Silence noisy warning
+sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -Wno-pass-failed@' cmake/Dependencies.cmake
+sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -Wno-unused-command-line-argument@' cmake/Dependencies.cmake
+sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -Wno-unused-result@' cmake/Dependencies.cmake
+sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -Wno-deprecated-declarations@' cmake/Dependencies.cmake
+# Use parallel jobs
+sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -parallel-jobs=4@' cmake/Dependencies.cmake
+# Need to link with librocm_smi64
+sed -i -e 's@hiprtc::hiprtc@hiprtc::hiprtc rocm_smi64@' cmake/Dependencies.cmake
+
+# No third_party fmt, use system
+sed -i -e 's@fmt::fmt-header-only@fmt@' CMakeLists.txt
+sed -i -e 's@fmt::fmt-header-only@fmt@' aten/src/ATen/CMakeLists.txt
+sed -i -e 's@list(APPEND ATen_HIP_INCLUDE $<TARGET_PROPERTY:fmt,INTERFACE_INCLUDE_DIRECTORIES>)@@' aten/src/ATen/CMakeLists.txt
+
+sed -i -e 's@fmt::fmt-header-only@fmt@' third_party/kineto/libkineto/CMakeLists.txt
+sed -i -e 's@fmt::fmt-header-only@fmt@' c10/CMakeLists.txt
+sed -i -e 's@fmt::fmt-header-only@fmt@' torch/CMakeLists.txt
+sed -i -e 's@fmt::fmt-header-only@fmt@' cmake/Dependencies.cmake
+sed -i -e 's@fmt::fmt-header-only@fmt@' caffe2/CMakeLists.txt
+
+sed -i -e 's@add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)@#add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)@' cmake/Dependencies.cmake
+sed -i -e 's@set_target_properties(fmt-header-only PROPERTIES INTERFACE_COMPILE_FEATURES "")@#set_target_properties(fmt-header-only PROPERTIES INTERFACE_COMPILE_FEATURES "")@' cmake/Dependencies.cmake
+sed -i -e 's@list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only)@#list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only)@' cmake/Dependencies.cmake
+
+# No third_party FXdiv
+sed -i -e 's@if(NOT TARGET fxdiv)@if(MSVC AND USE_XNNPACK)@' caffe2/CMakeLists.txt
+sed -i -e 's@TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv)@#TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv)@' caffe2/CMakeLists.txt
+
+# https://github.com/pytorch/pytorch/issues/149803
+# Tries to checkout nccl
+sed -i -e 's@    checkout_nccl()@    True@' tools/build_pytorch_libs.py
+
+# Disable the use of check_submodule's in the setup.py, we are a tarball, not a git repo
+sed -i -e 's@check_submodules()$@#check_submodules()@' setup.py
 
 # Release comes fully loaded with third party src
 # Remove what we can
@@ -289,7 +382,7 @@ sed -i -e '/aotriton.cmake/d' cmake/Dependencies.cmake
 # the third_party dir to compile the file.
 # mimiz is licensed MIT
 # https://github.com/richgel999/miniz/blob/master/LICENSE
-mv third_party/miniz-2.1.0 .
+mv third_party/miniz-%{miniz_version} .
 #
 # setup.py depends on this script
 mv third_party/build_bundled.py .
@@ -299,9 +392,24 @@ mv third_party/flatbuffers .
 
 mv third_party/pybind11 .
 
-%if %{with cuda}
-mv third_party/cudnn_frontend .
-mv third_party/cutlass .
+%if %{with tensorpipe}
+mv third_party/tensorpipe .
+%endif
+
+%if %{without opentelemetry}
+mv third_party/opentelemetry-cpp .
+%endif
+
+%if %{without httplib}
+mv third_party/cpp-httplib .
+%endif
+
+%if %{without kineto}
+mv third_party/kineto .
+%endif
+
+%if %{without onnx}
+mv third_party/onnx .
 %endif
 
 %if %{with test}
@@ -312,13 +420,28 @@ mv third_party/googletest .
 rm -rf third_party/*
 # Put stuff back
 mv build_bundled.py third_party
-mv miniz-2.1.0 third_party
+mv miniz-%{miniz_version} third_party
 mv flatbuffers third_party
 mv pybind11 third_party
 
-%if %{with cuda}
-mv cudnn_frontend third_party
-mv cutlass third_party
+%if %{with tensorpipe}
+mv tensorpipe third_party
+%endif
+
+%if %{without opentelemetry}
+mv opentelemetry-cpp third_party
+%endif
+
+%if %{without httplib}
+mv cpp-httplib third_party
+%endif
+
+%if %{without kineto}
+mv kineto third_party
+%endif
+
+%if %{without onnx}
+mv onnx third_party
 %endif
 
 %if %{with test}
@@ -328,47 +451,70 @@ mv googletest third_party
 #
 # Fake out pocketfft, and system header will be used
 mkdir third_party/pocketfft
+cp /usr/include/pocketfft_hdronly.h third_party/pocketfft/
+
 #
 # Use the system valgrind headers
 mkdir third_party/valgrind-headers
 cp %{_includedir}/valgrind/* third_party/valgrind-headers
 
-# Remove unneeded OpenCL files that confuse the lincense scanner
-rm caffe2/contrib/opencl/OpenCL/cl.hpp
-rm caffe2/mobile/contrib/libopencl-stub/include/CL/*.h
-rm caffe2/mobile/contrib/libopencl-stub/include/CL/*.hpp
+# Fix installing to /usr/lib64
+sed -i -e 's@DESTINATION ${PYTHON_LIB_REL_PATH}@DESTINATION ${CMAKE_INSTALL_PREFIX}/${PYTHON_LIB_REL_PATH}@' caffe2/CMakeLists.txt
+
+# reenable foxi linking
+sed -i -e 's@list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader)@#list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader)@' cmake/Dependencies.cmake
+
+# cmake version changed
+sed -i -e 's@cmake_minimum_required(VERSION 3.4)@cmake_minimum_required(VERSION 3.5)@' third_party/tensorpipe/third_party/libuv/CMakeLists.txt
+sed -i -e 's@cmake_minimum_required(VERSION 3.4)@cmake_minimum_required(VERSION 3.5)@' libuv*/CMakeLists.txt
+%if %{without opentelemtry}
+sed -i -e 's@cmake_minimum_required(VERSION 3.1)@cmake_minimum_required(VERSION 3.5)@' third_party/opentelemetry-cpp/CMakeLists.txt
+%endif
 
 %if %{with rocm}
 # hipify
 ./tools/amd_build/build_amd.py
 # Fedora installs to /usr/include, not /usr/include/rocm-core
 sed -i -e 's@rocm-core/rocm_version.h@rocm_version.h@' aten/src/ATen/hip/tunable/TunableGemm.h
+# https://github.com/pytorch/pytorch/issues/149805
+sed -i -e 's@rocm-core/rocm_version.h@rocm_version.h@' cmake/public/LoadHIP.cmake
+# Fedora installs to /usr/include, not /usr/include/rocm-core
+sed -i -e 's@rocm-core/rocm_version.h@rocm_version.h@' aten/src/ATen/hip/tunable/Tunable.cpp
+sed -i -e 's@rocm-core/rocm_version.h@rocm_version.h@' aten/src/ATen/cuda/tunable/Tunable.cpp
+# use any hip, correct CMAKE_MODULE_PATH
+sed -i -e 's@lib/cmake/hip@lib64/cmake/hip@' cmake/public/LoadHIP.cmake
+sed -i -e 's@HIP 1.0@HIP MODULE@'            cmake/public/LoadHIP.cmake
+# silence an assert
+# sed -i -e '/qvalue = std::clamp(qvalue, qmin, qmax);/d' aten/src/ATen/native/cuda/IndexKernel.cu
+
 %endif
 
-%if %{with cuda}
-# build complains about not being able to build -pie without -fPIC
-sed -i -e 's@string(APPEND CMAKE_CUDA_FLAGS " -D_GLIBCXX_USE_CXX11_ABI=${GLIBCXX_USE_CXX11_ABI}")@string(APPEND CMAKE_CUDA_FLAGS " -fPIC -D_GLIBCXX_USE_CXX11_ABI=${GLIBCXX_USE_CXX11_ABI}")@' CMakeLists.txt
-%endif
+# moodycamel include path needs adjusting to use the system's
+sed -i -e 's@${PROJECT_SOURCE_DIR}/third_party/concurrentqueue@/usr/include/concurrentqueue@' cmake/Dependencies.cmake
 
 %build
 
+# Export the arches
+# echo "%%pytorch_arches %pt_arches"   > macros.pytorch
+
 #
 # Control the number of jobs
 #
 # The build can fail if too many threads exceed the physical memory
-# So count core and and memory and increase the build memory util the build succeeds
+# Run at least one thread, more if CPU & memory resources are available.
 #
+%ifarch x86_64
 # Real cores, No hyperthreading
 COMPILE_JOBS=`cat /proc/cpuinfo | grep -m 1 'cpu cores' | awk '{ print $4 }'`
+%else
+# cpuinfo format varies on other arches, fall back to nproc
+COMPILE_JOBS=`nproc`
+%endif
 if [ ${COMPILE_JOBS}x = x ]; then
     COMPILE_JOBS=1
 fi
 # Take into account memmory usage per core, do not thrash real memory
-%if %{with cuda}
-BUILD_MEM=4
-%else
 BUILD_MEM=2
-%endif
 MEM_KB=0
 MEM_KB=`cat /proc/meminfo | grep MemTotal | awk '{ print $2 }'`
 MEM_MB=`eval "expr ${MEM_KB} / 1024"`
@@ -400,53 +546,55 @@ export CAFFE2_LINK_LOCAL_PROTOBUF=OFF
 export INTERN_BUILD_MOBILE=OFF
 export USE_DISTRIBUTED=OFF
 export USE_CUDA=OFF
+export USE_FAKELOWP=OFF
 export USE_FBGEMM=OFF
 export USE_FLASH_ATTENTION=OFF
-export USE_GOLD_LINKER=OFF
+export USE_GLOO=OFF
 export USE_ITT=OFF
 export USE_KINETO=OFF
+export USE_KLEIDIAI=OFF
 export USE_LITE_INTERPRETER_PROFILER=OFF
 export USE_LITE_PROTO=OFF
+export USE_MAGMA=OFF
+export USE_MEM_EFF_ATTENTION=OFF
 export USE_MKLDNN=OFF
+export USE_MPI=OFF
 export USE_NCCL=OFF
 export USE_NNPACK=OFF
 export USE_NUMPY=ON
 export USE_OPENMP=ON
 export USE_PYTORCH_QNNPACK=OFF
-export USE_QNNPACK=OFF
 export USE_ROCM=OFF
-export USE_SYSTEM_CPUINFO=ON
 export USE_SYSTEM_SLEEF=ON
 export USE_SYSTEM_EIGEN_INSTALL=ON
-export USE_SYSTEM_FP16=ON
-export USE_SYSTEM_PTHREADPOOL=ON
-export USE_SYSTEM_PSIMD=ON
-export USE_SYSTEM_FXDIV=ON
+%if %{with onnx}
 export USE_SYSTEM_ONNX=ON
-export USE_SYSTEM_XNNPACK=ON
+%endif
 export USE_SYSTEM_PYBIND11=OFF
 export USE_SYSTEM_LIBS=OFF
+export USE_SYSTEM_NCCL=OFF
 export USE_TENSORPIPE=OFF
-export USE_XNNPACK=ON
+export USE_XNNPACK=OFF
+export USE_XPU=OFF
+export USE_SYSTEM_PTHREADPOOL=ON
+export USE_SYSTEM_CPUINFO=ON
+export USE_SYSTEM_FP16=ON
+export USE_SYSTEM_FXDIV=ON
+export USE_SYSTEM_PSIMD=ON
+export USE_SYSTEM_XNNPACK=OFF
 
-%if %{with caffe2}
-export BUILD_CAFFE2=ON
-%endif
-
-%if %{with cuda}
-%if %{without rocm}
-export CUDACXX=/usr/local/cuda/bin/nvcc
-export CPLUS_INCLUDE_PATH=/usr/local/cuda/include
-export USE_CUDA=ON
-%endif
-%endif
-
-%if %{with distributed}
 export USE_DISTRIBUTED=ON
+%if %{with tensorpipe}
+export USE_TENSORPIPE=ON
+export TP_BUILD_LIBUV=OFF
 %endif
 
-%if %{with opencv}
-export USE_OPENCV=ON
+%if %{with gloo}
+export USE_GLOO=ON
+export USE_SYSTEM_GLOO=ON
+%endif
+%if %{with mpi}
+export USE_MPI=ON
 %endif
 
 %if %{with test}
@@ -462,100 +610,73 @@ export BUILD_TEST=ON
 #
 # See BZ 2244862
 
-
 %if %{with rocm}
 
 export USE_ROCM=ON
+export USE_ROCM_CK_SDPA=OFF
+export USE_ROCM_CK_GEMM=OFF
+export USE_FBGEMM_GENAI=OFF
+
+# Magma is broken on ROCm 7
+# export USE_MAGMA=ON
 export HIP_PATH=`hipconfig -p`
 export ROCM_PATH=`hipconfig -R`
-export HIP_CLANG_PATH=`hipconfig -l`
-RESOURCE_DIR=`${HIP_CLANG_PATH}/clang -print-resource-dir`
-export DEVICE_LIB_PATH=${RESOURCE_DIR}/amdgcn/bitcode
+#RESOURCE_DIR=`%{rocmllvm_bindir}/clang -print-resource-dir`
+#export DEVICE_LIB_PATH=${RESOURCE_DIR}/amdgcn/bitcode
 
-gpu=%{rocm_default_gpu}
-module load rocm/$gpu
-export PYTORCH_ROCM_ARCH=$ROCM_GPUS
-%py3_build
-mv build build-${gpu}
-module purge
+# pytorch uses clang, not hipcc
+export HIP_CLANG_PATH=%{rocmllvm_bindir}
+export PYTORCH_ROCM_ARCH=%{rocm_gpu_list_default}
 
-%if %{with rocm_loop}
-for gpu in %{rocm_gpu_list}
-do
-    module load rocm/$gpu
-    export PYTORCH_ROCM_ARCH=$ROCM_GPUS
-    %py3_build
-    mv build build-${gpu}
-    module purge
-done
 %endif
 
+%if 0%{?fedora}
+%pyproject_wheel
 %else
-
 %py3_build
-
 %endif
 
+
 %install
 
-%if %{with rocm}
+# pytorch rpm macros
+# install -Dpm 644 macros.pytorch \
+#    %{buildroot}%{_rpmmacrodir}/macros.pytorch
 
+%if %{with rocm}
 export USE_ROCM=ON
+export USE_ROCM_CK=OFF
 export HIP_PATH=`hipconfig -p`
 export ROCM_PATH=`hipconfig -R`
-export HIP_CLANG_PATH=`hipconfig -l`
-RESOURCE_DIR=`${HIP_CLANG_PATH}/clang -print-resource-dir`
-export DEVICE_LIB_PATH=${RESOURCE_DIR}/amdgcn/bitcode
+# RESOURCE_DIR=`%{rocmllvm_bindir}/clang -print-resource-dir`
+# export DEVICE_LIB_PATH=${RESOURCE_DIR}/amdgcn/bitcode
 
-gpu=%{rocm_default_gpu}
-module load rocm/$gpu
-export PYTORCH_ROCM_ARCH=$ROCM_GPUS
-mv build-${gpu} build
-%py3_install
-mv build build-${gpu}
-module purge
+# pytorch uses clang, not hipcc
+export HIP_CLANG_PATH=%{rocmllvm_bindir}
+export PYTORCH_ROCM_ARCH=%{rocm_gpu_list_default}
 
-%if %{with rocm_loop}
-for gpu in %{rocm_gpu_list}
-do
-    module load rocm/$gpu
-    export PYTORCH_ROCM_ARCH=$ROCM_GPUS
-    mv build-${gpu} build
-    # need to customize the install location, so replace py3_install
-    %{__python3} %{py_setup} %{?py_setup_args} install -O1 --skip-build --root %{buildroot} --prefix /usr/lib64/rocm/${gpu} %{?*}
-    rm -rfv %{buildroot}/usr/lib/rocm/${gpu}/bin/__pycache__
-    mv build build-${gpu}
-    module purge
-done
 %endif
 
+%if 0%{?fedora}
+%pyproject_install
+%pyproject_save_files '*torch*'
 %else
 %py3_install
-
 %endif
 
+
+%check
+# Not working yet
+# pyproject_check_import torch
+
 # Do not remote the empty files
 
-
-%files -n python3-%{pypi_name} 
+%files -n python3-%{pypi_name}
 %license LICENSE
 %doc README.md 
-%{_bindir}/convert-caffe2-to-onnx
-%{_bindir}/convert-onnx-to-caffe2
 %{_bindir}/torchrun
-%{python3_sitearch}/%{pypi_name}
-%{python3_sitearch}/%{pypi_name}-*.egg-info
+%{python3_sitearch}/%{pypi_name}*
 %{python3_sitearch}/functorch
-%{python3_sitearch}/torchgen
-%if %{with caffe2}
-%{python3_sitearch}/caffe2
-%endif
-%if %{with rocm}
-%if %{with rocm_loop}
-%{_libdir}/rocm/gfx*/bin/*
-%{_libdir}/rocm/gfx*/lib64/*
-%endif
-%endif
 
 %changelog
 %autochangelog
diff --git a/sources b/sources
index 90b1128..9a3681f 100644
--- a/sources
+++ b/sources
@@ -1,2 +1,19 @@
-SHA512 (pytorch-v2.1.0.tar.gz) = 59421bf6cea6661d61ed66ab16526e3a07162e70e53381cbd5987042917610ec993d2f151fb086f0f98e5a396fe69e82bbc76f840bebffe4ebe7f50458c3aa44
-SHA512 (pytorch-v2.1.2.tar.gz) = b7305407ad9dda877d277a0e7009f65f6d69f39370f2231b8bb8c6a9b711022d2129febdb00f5c83751b6664e01000fe2d30c5e5c13757de89fb8b2b99197a28
+SHA512 (pytorch-v2.7.0.tar.gz) = 17e875a66f1669901f5f770c9d829ba5bfa3967296cfb71550e8a92507181db742548eaf7cc9a2c478c4b91e366f27cc480e2e1bbb328db8501d30e1649839e6
+SHA512 (v23.3.3.tar.gz) = 4066c94f2473c7ea16917d29a613e16f840a329089c88e0bdbdb999aef3442ba00abfd2aa92266fa9c067e399dc88e6f0ccac40dc151378857e665638e78bbf0
+SHA512 (v2.13.6.tar.gz) = 497c25b33b09a9c42f67131ab82e35d689e8ce089dd7639be997305ff9a6d502447b79c824508c455d559e61f0186335b54dd2771d903a7c1621833930622d1a
+SHA512 (tensorpipe-52791a2.tar.gz) = 1e5faf17a7236c5506c08cb28be16069b11bb929bbca64ed9745ce4277d46739186ab7d6597da7437d90ed2d166d4c37ef2f3bceabe8083ef3adbb0e8e5f227e
+SHA512 (v1.41.0.tar.gz) = bb08a1970a10e8d9571ffea3d021643de30ec212cd51317b98d6cf0cfe55d6877992921fb01d1188a6d466687335b77885685d924f8cb7200a0bec30eee05c65
+SHA512 (libnop-910b558.tar.gz) = 74c5324eaa1b6b2ac8dfef94c835b5c5b044625f8e5efe3522470b1ecc4798ff43d344a013cee2f6901e83267c6167072947b754e63f1552ae7044cffe234c36
+SHA512 (v1.14.2.tar.gz) = 97635bbaf6dd567c201451dfaf7815b2052fe50d9bccc97aade86cfa4a92651374d167296a5453031b2681dc302806a289bca011a9e79ddc381a17d6118971d7
+SHA512 (cpp-httplib-3b6597b.tar.gz) = 8f1090658c498d04f14fec5c2f301847b1f3360bf92b18d82927643ee04ab61a6b274733a01c7850f9c030205120d674d1d961358d49fdd15636736fb8704f55
+SHA512 (kineto-be13176.tar.gz) = 41a08c7da9eea7d12402f80a5550c9d4df79798719cc52b12a507828c8c896ba28a37c35d8adf809ca72589e1d84965d5ef6dd01f3f8dc1c803c5ed67b03a43a
+SHA512 (pytorch-a1cb3cc.tar.gz) = 92bf8b2c2ef0b459406b60169ecebdc50652c75943e3d6087e4d261f6e308dbad365529561e0f07ea3f0b71790efb68b5e4ab2f44e270462097208d924dc2d95
+SHA512 (v24.12.23.tar.gz) = f97762ba41b9cfef648e93932fd789324c6bb6ebc5b7aeca8185c9ef602294b67d73aea7ae371035579a1419cbfbeba7c3e88b31b5a5848db98f5e8a03b982b1
+SHA512 (kineto-5e75018.tar.gz) = 921b96a56e01d69895b79e67582d8977ed6f873573ab41557c5d026ada5d1f6365e4ed0a0c6804057c52e92510749fc58619f554a164c1ba9d8cd13e789bebd0
+SHA512 (pytorch-v2.8.0.tar.gz) = 791e658eab87fb957f025558cb9f925078d2426ab7b6f60771d9841dfb691f67d905ba1330a800008efe7c938b6c69bdc52232bccfe8d4860e795a532cd69d28
+SHA512 (v1.18.0.tar.gz) = 2f38664947c8d1efc40620a7c1b1953d2aa4b0a37b67c4886b86e77c1d697363c26413413ddda8eabc545892fb1bcb43afc7e93e62f0901527524a2727e1ea8d
+SHA512 (pytorch-715dca6.tar.gz) = 09c9aae54fab3eb17901fc3226fece1c13f41cb8e45a2cb066021823abeb8d27c340993088e01d8e55bb37ed5f94334ec31e6c539cddfacbad157abd27c5e907
+SHA512 (pytorch-fd36458.tar.gz) = acbb7475b92ad4a8e8d779f3745da22d8438e4c5ef2d6e76d71c987789f2752c8aef7022c87c9a74640fe4f9c1f1a61a3f12a796f63b1e6be24da8e5aacf37dc
+SHA512 (pytorch-0fabc3b.tar.gz) = 2e87975de0bf6f3dcede168b379e1928712bca16170c2a8ee7d63459f53086c01baac05e0763e4d5d28cdaf1c7d8912225ee06adeff96ead4f6f456ee174b341
+SHA512 (pytorch-v2.9.0.tar.gz) = ae989e3a7fe30f9ea90944dc25e21ca92f2a94ee40d8de974a168c292d82c16ee8920624eff91a85755469ad05473dce0f85893e3ed7794ec5c6bdd89cbd2023
+SHA512 (pytorch-v2.9.1.tar.gz) = 88de0289fa2760abd69bef505b5ae3b6d7ff176b415cbb31bbc89ce5476a3800b322a97c4490f270f8b89657aff931bf9a5516202b268e0bb8b1f63dbb87b34a