From 61ccf033a8aece458e7df6573367bde478952636 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Thu, 24 Jul 2025 06:07:03 -0700 Subject: [PATCH] Update gitcommit to 2.8.0-rc8 Patch problem with 3.14 Start converting over py3 macros Handle new dependency on rocmsmi Signed-off-by: Tom Rix --- .../0001-Add-cmake-variable-USE_ROCM_CK.patch | 85 ++++- ...and-import-torch-issues-for-cpython-.patch | 359 ++++++++++++++++++ next/0001-Use-horrible-dynamo-stub.patch | 85 +++++ python-torch.spec | 60 ++- 4 files changed, 560 insertions(+), 29 deletions(-) create mode 100644 next/0001-Fix-compilation-and-import-torch-issues-for-cpython-.patch create mode 100644 next/0001-Use-horrible-dynamo-stub.patch diff --git a/next/0001-Add-cmake-variable-USE_ROCM_CK.patch b/next/0001-Add-cmake-variable-USE_ROCM_CK.patch index 1afe692..925e03b 100644 --- a/next/0001-Add-cmake-variable-USE_ROCM_CK.patch +++ b/next/0001-Add-cmake-variable-USE_ROCM_CK.patch @@ -1,17 +1,17 @@ -From 4cc5d88dfe7a45ab245648dc874645d32a24b98b Mon Sep 17 00:00:00 2001 +From 193854993cd939de186de19589c1add4c4b2cf66 Mon Sep 17 00:00:00 2001 From: Tom Rix -Date: Fri, 27 Jun 2025 13:52:51 -0700 +Date: Mon, 21 Jul 2025 11:35:03 -0700 Subject: [PATCH] Add cmake variable USE_ROCM_CK --- CMakeLists.txt | 1 + aten/src/ATen/CMakeLists.txt | 40 ++++++++++++++++----------------- - aten/src/ATen/cuda/CUDABlas.cpp | 10 ++++----- + aten/src/ATen/cuda/CUDABlas.cpp | 22 +++++++++--------- cmake/Dependencies.cmake | 3 +++ - 4 files changed, 29 insertions(+), 25 deletions(-) + 4 files changed, 35 insertions(+), 31 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt -index 99c0b9e0ea0c..4c632e42f531 100644 +index a5d25e6afa0f..afc1b53efa64 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -240,6 +240,7 @@ cmake_dependent_option( @@ -82,7 +82,7 @@ index c9cfd74b501e..59f6178218ee 100644 file(GLOB native_hip_ck "native/hip/ck*.hip") exclude(ATen_HIP_SRCS "${ATen_HIP_SRCS}" diff --git a/aten/src/ATen/cuda/CUDABlas.cpp b/aten/src/ATen/cuda/CUDABlas.cpp -index 89350a11bea7..33e5f2808057 100644 +index 89350a11bea7..e5b7960177cf 100644 --- a/aten/src/ATen/cuda/CUDABlas.cpp +++ b/aten/src/ATen/cuda/CUDABlas.cpp @@ -752,7 +752,7 @@ template <> @@ -94,16 +94,16 @@ index 89350a11bea7..33e5f2808057 100644 // hipblaslt does not support double gemm yet bgemm_internal_cublas(CUDABLAS_BGEMM_ARGS(double)); #else -@@ -1103,7 +1103,7 @@ inline void gemm_internal_cublas_half_helper(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE( - void * beta_ptr = &fbeta; - _cublasAdjustLdLevel3(transa, transb, m, n, k, &lda, &ldb, &ldc); - GEMM_CHECK_ARGVALUES(at::Half); --#ifdef USE_ROCM -+#ifdef USE_ROCM_CK - int flag = 0; - #if USE_GEMM_FLAGS_FP16_ALT_IMPL - flag = at::ROCmBackwardPassGuard::is_backward_pass() ? rocblas_gemm_flags_fp16_alt_impl : 0; -@@ -1270,7 +1270,7 @@ template <> +@@ -836,7 +836,7 @@ void bgemm_internal(CUDABLAS_BGEMM_ARGTYPES(at::BFloat16)) + bgemm_internal_cublas(CUDABLAS_BGEMM_ARGS(at::BFloat16)); + } + } +-#if defined(USE_ROCM) && !defined(_MSC_VER) ++#if defined(USE_ROCM) && defined(USE_ROCM_CK) + else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) { + at::native::bgemm_internal_ck(CUDABLAS_BGEMM_ARGS(at::BFloat16)); + } +@@ -1270,14 +1270,14 @@ template <> void gemm_internal(CUDABLAS_GEMM_ARGTYPES(double)) { if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) { @@ -112,6 +112,23 @@ index 89350a11bea7..33e5f2808057 100644 // hipblaslt does not support double gemm yet gemm_internal_cublas(CUDABLAS_GEMM_ARGS(double)); #else + gemm_internal_cublaslt(CUDABLAS_GEMM_ARGS(double)); + #endif + } +-#if defined(USE_ROCM) && !defined(_MSC_VER) ++#if defined(USE_ROCM) && defined(USE_ROCM_CK) + else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) { + at::native::gemm_internal_ck(CUDABLAS_GEMM_ARGS(double)); + } +@@ -1293,7 +1293,7 @@ void gemm_internal(CUDABLAS_GEMM_ARGTYPES(float)) + if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) { + gemm_internal_cublaslt(CUDABLAS_GEMM_ARGS(float)); + } +-#if defined(USE_ROCM) && !defined(_MSC_VER) ++#if defined(USE_ROCM) && defined(USE_ROCM_CK) + else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) { + if (at::detail::getCUDAHooks().isGPUArch({"gfx1100"})) { //no CK GEMM version for gfx1100 + gemm_internal_cublaslt(CUDABLAS_GEMM_ARGS(float)); @@ -1311,7 +1311,7 @@ template <> void gemm_internal>(CUDABLAS_GEMM_ARGTYPES(c10::complex)) { @@ -130,6 +147,42 @@ index 89350a11bea7..33e5f2808057 100644 // hipblaslt does not support complex gemm yet gemm_internal_cublas>(CUDABLAS_GEMM_ARGS(c10::complex)); #else +@@ -1345,7 +1345,7 @@ void gemm_internal(CUDABLAS_GEMM_ARGTYPES(at::Half)) + if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) { + gemm_internal_cublaslt(CUDABLAS_GEMM_ARGS(at::Half)); + } +-#if defined(USE_ROCM) && !defined(_MSC_VER) ++#if defined(USE_ROCM) && defined(USE_ROCM_CK) + else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) { + at::native::gemm_internal_ck(CUDABLAS_GEMM_ARGS(at::Half)); + } +@@ -1361,7 +1361,7 @@ void gemm_internal(CUDABLAS_GEMM_ARGTYPES(at::BFloat16)) + if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) { + gemm_internal_cublaslt(CUDABLAS_GEMM_ARGS(at::BFloat16)); + } +-#if defined(USE_ROCM) && !defined(_MSC_VER) ++#if defined(USE_ROCM) && defined(USE_ROCM_CK) + else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) { + at::native::gemm_internal_ck(CUDABLAS_GEMM_ARGS(at::BFloat16)); + } +@@ -1382,7 +1382,7 @@ void gemm_internal(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(at::Half, + if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) { + gemm_internal_cublaslt(CUDABLAS_GEMM_ARGS(at::Half)); + } +-#if defined(USE_ROCM) && !defined(_MSC_VER) ++#if defined(USE_ROCM) && defined(USE_ROCM_CK) + else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) { + TORCH_CHECK(false, "gemm input type at::Half and output type float is not supported for ROCm"); + } +@@ -1398,7 +1398,7 @@ void gemm_internal(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(at::B + if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) { + gemm_internal_cublaslt(CUDABLAS_GEMM_ARGS(at::BFloat16)); + } +-#if defined(USE_ROCM) && !defined(_MSC_VER) ++#if defined(USE_ROCM) && defined(USE_ROCM_CK) + else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) { + TORCH_CHECK(false, "gemm input type at::Half and output type float is not supported for ROCm"); + } diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index a93386c27f8d..be1368999d38 100644 --- a/cmake/Dependencies.cmake diff --git a/next/0001-Fix-compilation-and-import-torch-issues-for-cpython-.patch b/next/0001-Fix-compilation-and-import-torch-issues-for-cpython-.patch new file mode 100644 index 0000000..b6a282c --- /dev/null +++ b/next/0001-Fix-compilation-and-import-torch-issues-for-cpython-.patch @@ -0,0 +1,359 @@ +From f2a544b2e3a5bdc04985f6e06223c0c1700120a0 Mon Sep 17 00:00:00 2001 +From: albanD +Date: Sat, 12 Jul 2025 03:42:33 -0400 +Subject: [PATCH] Fix compilation and "import torch" issues for cpython 3.14 + +Imported from +https://github.com/albanD/pytorch/tree/cpython314_build +commit 88bb9cdb72449f4277829e20d94ad8aec1894216 + +Signed-off-by: Tom Rix +--- + torch/_dynamo/bytecode_analysis.py | 2 +- + torch/ao/quantization/__init__.py | 5 +++- + torch/ao/quantization/qconfig.py | 4 ++- + torch/ao/quantization/utils.py | 7 +++-- + torch/csrc/dynamo/cpython_defs.c | 16 +++++++++++ + torch/csrc/dynamo/cpython_includes.h | 17 ++++++++++++ + torch/csrc/dynamo/eval_frame.c | 34 +++++++++++++++-------- + torch/csrc/dynamo/framelocals_mapping.cpp | 14 ++++++++++ + torch/csrc/utils/python_compat.h | 1 + + torch/onnx/__init__.py | 1 - + torch/utils/weak.py | 29 +++++++++++++++++-- + 11 files changed, 111 insertions(+), 19 deletions(-) + +diff --git a/torch/_dynamo/bytecode_analysis.py b/torch/_dynamo/bytecode_analysis.py +index 3252ea91409f..2de74ee5bf8d 100644 +--- a/torch/_dynamo/bytecode_analysis.py ++++ b/torch/_dynamo/bytecode_analysis.py +@@ -33,7 +33,7 @@ if sys.version_info >= (3, 11): + TERMINAL_OPCODES.add(dis.opmap["JUMP_FORWARD"]) + else: + TERMINAL_OPCODES.add(dis.opmap["JUMP_ABSOLUTE"]) +-if sys.version_info >= (3, 12): ++if (3, 12) <= sys.version_info < (3, 14): + TERMINAL_OPCODES.add(dis.opmap["RETURN_CONST"]) + if sys.version_info >= (3, 13): + TERMINAL_OPCODES.add(dis.opmap["JUMP_BACKWARD_NO_INTERRUPT"]) +diff --git a/torch/ao/quantization/__init__.py b/torch/ao/quantization/__init__.py +index ffc1792fd23f..cf5a8b99a894 100644 +--- a/torch/ao/quantization/__init__.py ++++ b/torch/ao/quantization/__init__.py +@@ -1,5 +1,6 @@ + # mypy: allow-untyped-defs + ++import sys + from typing import Callable, Optional, Union + + import torch +@@ -33,7 +34,9 @@ from .stubs import * # noqa: F403 + + # ensure __module__ is set correctly for public APIs + ObserverOrFakeQuantize = Union[ObserverBase, FakeQuantizeBase] +-ObserverOrFakeQuantize.__module__ = "torch.ao.quantization" ++if sys.version_info < (3, 14): ++ ObserverOrFakeQuantize.__module__ = "torch.ao.quantization" ++ + for _f in [ + compare_results, + extract_results_from_loggers, +diff --git a/torch/ao/quantization/qconfig.py b/torch/ao/quantization/qconfig.py +index efee5302ad42..d9a8fc78bab4 100644 +--- a/torch/ao/quantization/qconfig.py ++++ b/torch/ao/quantization/qconfig.py +@@ -1,5 +1,6 @@ + # mypy: allow-untyped-defs + import copy ++import sys + import warnings + from collections import namedtuple + from typing import Any, Optional, Union +@@ -568,7 +569,8 @@ def _assert_valid_qconfig(qconfig: Optional[QConfig], mod: torch.nn.Module) -> N + + + QConfigAny = Optional[QConfig] +-QConfigAny.__module__ = "torch.ao.quantization.qconfig" ++if sys.version_info < (3, 14): ++ QConfigAny.__module__ = "torch.ao.quantization.qconfig" + + + def _add_module_to_qconfig_obs_ctr( +diff --git a/torch/ao/quantization/utils.py b/torch/ao/quantization/utils.py +index 4ac3112ec072..3b1503e01701 100644 +--- a/torch/ao/quantization/utils.py ++++ b/torch/ao/quantization/utils.py +@@ -4,6 +4,7 @@ Utils shared by different modes of quantization (eager/graph) + """ + + import functools ++import sys + import warnings + from collections import OrderedDict + from inspect import getfullargspec, signature +@@ -16,7 +17,8 @@ from torch.nn.utils.parametrize import is_parametrized + + + NodePattern = Union[tuple[Node, Node], tuple[Node, tuple[Node, Node]], Any] +-NodePattern.__module__ = "torch.ao.quantization.utils" ++if sys.version_info < (3, 14): ++ NodePattern.__module__ = "torch.ao.quantization.utils" + + # This is the Quantizer class instance from torch/quantization/fx/quantize.py. + # Define separately to prevent circular imports. +@@ -31,7 +33,8 @@ QuantizerCls = Any + Pattern = Union[ + Callable, tuple[Callable, Callable], tuple[Callable, tuple[Callable, Callable]], Any + ] +-Pattern.__module__ = "torch.ao.quantization.utils" ++if sys.version_info < (3, 14): ++ Pattern.__module__ = "torch.ao.quantization.utils" + + + # TODO: maybe rename this to MatchInputNode +diff --git a/torch/csrc/dynamo/cpython_defs.c b/torch/csrc/dynamo/cpython_defs.c +index b68ef894aeaa..244d4165d5e8 100644 +--- a/torch/csrc/dynamo/cpython_defs.c ++++ b/torch/csrc/dynamo/cpython_defs.c +@@ -2,6 +2,20 @@ + #include + #include + ++#if IS_PYTHON_3_14_PLUS ++ ++const uint8_t* THP_PyOpcode_Caches = NULL; ++const int THP_PyOpcode_Caches_size = 0; ++ ++void ++THP_PyThreadState_PopFrame(PyThreadState *tstate, _PyInterpreterFrame * frame) ++{} ++void ++THP_PyFrame_Clear(_PyInterpreterFrame *frame) ++{} ++ ++#else ++ + #if IS_PYTHON_3_11_PLUS + + #define Py_BUILD_CORE +@@ -360,3 +374,5 @@ const uint8_t* THP_PyOpcode_Caches = NULL; + const int THP_PyOpcode_Caches_size = 0; + + #endif ++ ++#endif // IS_PYTHON_3_14_PLUS +\ No newline at end of file +diff --git a/torch/csrc/dynamo/cpython_includes.h b/torch/csrc/dynamo/cpython_includes.h +index 6b99c1d5aec8..616be16563cf 100644 +--- a/torch/csrc/dynamo/cpython_includes.h ++++ b/torch/csrc/dynamo/cpython_includes.h +@@ -21,6 +21,14 @@ + + #if IS_PYTHON_3_11_PLUS + #include ++#if IS_PYTHON_3_14_PLUS ++#include ++#include ++#endif ++#endif ++ ++#if IS_PYTHON_3_14_PLUS ++#include + #endif + + #undef Py_BUILD_CORE +@@ -30,6 +38,13 @@ + extern "C" { + #endif + ++#if IS_PYTHON_3_14_PLUS ++ ++#define F_CODE(x) (PyCodeObject*)PyStackRef_AsPyObjectBorrow(x->f_executable) ++#define PREV_INSTR(x) (x)->instr_ptr ++ ++#else ++ + #if IS_PYTHON_3_13_PLUS + #define F_CODE(x) ((PyCodeObject*)(x)->f_executable) + #define PREV_INSTR(x) (x)->instr_ptr +@@ -38,6 +53,8 @@ extern "C" { + #define PREV_INSTR(x) (x)->prev_instr + #endif + ++#endif // IS_PYTHON_3_14_PLUS ++ + #if IS_PYTHON_3_12_PLUS + #define FUNC(x) ((x)->f_funcobj) + #else +diff --git a/torch/csrc/dynamo/eval_frame.c b/torch/csrc/dynamo/eval_frame.c +index f413782b2d30..72bb8839bac3 100644 +--- a/torch/csrc/dynamo/eval_frame.c ++++ b/torch/csrc/dynamo/eval_frame.c +@@ -224,17 +224,6 @@ const char* get_frame_name(THP_EVAL_API_FRAME_OBJECT* frame) { + return PyUnicode_AsUTF8(F_CODE(frame)->co_name); + } + +-void clear_old_frame_if_python_312_plus( +- PyThreadState* tstate, +- THP_EVAL_API_FRAME_OBJECT* frame) { +-#if IS_PYTHON_3_12_PLUS +- +- THP_PyFrame_Clear(frame); +- THP_PyThreadState_PopFrame(tstate, frame); +- +-#endif +-} +- + static PyObject* dynamo_eval_custom_code_impl( + PyThreadState* tstate, + THP_EVAL_API_FRAME_OBJECT* frame, +@@ -485,6 +474,18 @@ static PyObject* dynamo__custom_eval_frame_shim( + + static void enable_eval_frame_shim(PyThreadState* tstate) {} + static void enable_eval_frame_default(PyThreadState* tstate) {} ++PyObject* dynamo_eval_custom_code( ++ PyThreadState* tstate, ++ THP_EVAL_API_FRAME_OBJECT* frame, ++ PyCodeObject* code, ++ const char* trace_annotation, ++ int throw_flag) {} ++THPPyInterpreterFrame* THPPyInterpreterFrame_New( ++ THP_EVAL_API_FRAME_OBJECT* frame) {} ++PyObject* dynamo_eval_frame_default( ++ PyThreadState* tstate, ++ THP_EVAL_API_FRAME_OBJECT* frame, ++ int throw_flag) {} + + static struct PyGetSetDef THPPyInterpreterFrame_properties[] = {NULL}; + +@@ -498,6 +499,17 @@ static PyTypeObject THPPyInterpreterFrameType = { + + #endif // !(IS_PYTHON_3_14_PLUS) + ++void clear_old_frame_if_python_312_plus( ++ PyThreadState* tstate, ++ THP_EVAL_API_FRAME_OBJECT* frame) { ++#if IS_PYTHON_3_12_PLUS ++ ++ THP_PyFrame_Clear(frame); ++ THP_PyThreadState_PopFrame(tstate, frame); ++ ++#endif ++} ++ + static PyObject* increment_working_threads( + PyThreadState* tstate, + PyObject* module) { +diff --git a/torch/csrc/dynamo/framelocals_mapping.cpp b/torch/csrc/dynamo/framelocals_mapping.cpp +index b839fb26fc91..c4ee36d87767 100644 +--- a/torch/csrc/dynamo/framelocals_mapping.cpp ++++ b/torch/csrc/dynamo/framelocals_mapping.cpp +@@ -26,9 +26,13 @@ FrameLocalsMapping::FrameLocalsMapping(FrameLocalsFrameType* frame) + PyCodeObject* co = F_CODE(frame); + _framelocals.resize(co->co_nlocalsplus, nullptr); + ++#if IS_PYTHON_3_14_PLUS ++ TORCH_CHECK(false, "Python 3.14+ not supported"); ++#else + if (!frame->stacktop) { + return; + } ++#endif + + auto update_framelocals = [&](int i, PyObject* value) { + _PyLocals_Kind kind = _PyLocals_GetKind(co->co_localspluskinds, i); +@@ -53,11 +57,21 @@ FrameLocalsMapping::FrameLocalsMapping(FrameLocalsFrameType* frame) + }; + + auto offset = co->co_nlocalsplus - co->co_nfreevars; ++#if IS_PYTHON_3_14_PLUS ++ TORCH_CHECK(false, "Python 3.14+ not supported"); ++#else + for (int i = 0; i < offset; i++) { + update_framelocals(i, frame->localsplus[i]); + } ++#endif ++ + // Get references to closure variables ++#if IS_PYTHON_3_14_PLUS ++ PyObject* closure; ++ TORCH_CHECK(false, "Python 3.14+ not supported"); ++#else + PyObject* closure = ((PyFunctionObject*)FUNC(frame))->func_closure; ++#endif + for (int i = 0; i < co->co_nfreevars; i++) { + update_framelocals(offset + i, PyTuple_GET_ITEM(closure, i)); + } +diff --git a/torch/csrc/utils/python_compat.h b/torch/csrc/utils/python_compat.h +index a1537611cc47..16292e4fd030 100644 +--- a/torch/csrc/utils/python_compat.h ++++ b/torch/csrc/utils/python_compat.h +@@ -13,6 +13,7 @@ extern "C" { + #define IS_PYTHON_3_12_PLUS PY_VERSION_HEX >= 0x030C0000 + #define IS_PYTHON_3_13_PLUS PY_VERSION_HEX >= 0x030D0000 + #define IS_PYTHON_3_14_PLUS PY_VERSION_HEX >= 0x030E0000 ++#define IS_PYTHON_3_15_PLUS PY_VERSION_HEX >= 0x030F0000 + + static inline int PyCode_GetNCellvars(PyCodeObject* code) { + // gh-26364 added co_ncellvars to Python 3.11.0rc1 +diff --git a/torch/onnx/__init__.py b/torch/onnx/__init__.py +index 345ffd2a065b..ceeadde5365b 100644 +--- a/torch/onnx/__init__.py ++++ b/torch/onnx/__init__.py +@@ -104,7 +104,6 @@ ONNXProgram.__module__ = "torch.onnx" + OnnxExporterError.__module__ = "torch.onnx" + _OrtBackend.__module__ = "torch.onnx" + _OrtBackendOptions.__module__ = "torch.onnx" +-_OrtExecutionProvider.__module__ = "torch.onnx" + enable_fake_mode.__module__ = "torch.onnx" + is_onnxrt_backend_supported.__module__ = "torch.onnx" + +diff --git a/torch/utils/weak.py b/torch/utils/weak.py +index 8bf2ba5ed02b..9c7218cb2ad3 100644 +--- a/torch/utils/weak.py ++++ b/torch/utils/weak.py +@@ -3,8 +3,6 @@ from __future__ import annotations + + import collections.abc as _collections_abc + import weakref +- +-from _weakrefset import _IterationGuard # type: ignore[attr-defined] + from collections.abc import Mapping, MutableMapping + from weakref import ref + +@@ -22,6 +20,33 @@ __all__ = [ + ] + + ++# TODO: make weakref properly thread safe following ++# https://github.com/python/cpython/pull/125325 ++class _IterationGuard: ++ # This context manager registers itself in the current iterators of the ++ # weak container, such as to delay all removals until the context manager ++ # exits. ++ # This technique should be relatively thread-safe (since sets are). ++ ++ def __init__(self, weakcontainer): ++ # Don't create cycles ++ self.weakcontainer = ref(weakcontainer) ++ ++ def __enter__(self): ++ w = self.weakcontainer() ++ if w is not None: ++ w._iterating.add(self) ++ return self ++ ++ def __exit__(self, e, t, b): ++ w = self.weakcontainer() ++ if w is not None: ++ s = w._iterating ++ s.remove(self) ++ if not s: ++ w._commit_removals() ++ ++ + # This file defines a variant of WeakKeyDictionary that overrides the hashing + # behavior of the key to use object identity, rather than the builtin + # __eq__/__hash__ functions. This is useful for Tensor weak keys, as their +-- +2.49.0 + diff --git a/next/0001-Use-horrible-dynamo-stub.patch b/next/0001-Use-horrible-dynamo-stub.patch new file mode 100644 index 0000000..1900519 --- /dev/null +++ b/next/0001-Use-horrible-dynamo-stub.patch @@ -0,0 +1,85 @@ +From fd535f7bf44f2034cca2a66b4cc7d68d962341df Mon Sep 17 00:00:00 2001 +From: Tom Rix +Date: Sun, 20 Jul 2025 12:47:58 -0700 +Subject: [PATCH] Use horrible dynamo stub + +Rawhide's update of python is too fast for dynamo +So paper of the problem with a horrible stub that throws +runtime exceptions if dynamo is used. + +Signed-off-by: Tom Rix +--- + build_variables.bzl | 26 ++++++++++++---------- + torch/csrc/dynamo/horrible_dynamo_stub.cpp | 16 +++++++++++++ + 2 files changed, 30 insertions(+), 12 deletions(-) + create mode 100644 torch/csrc/dynamo/horrible_dynamo_stub.cpp + +diff --git a/build_variables.bzl b/build_variables.bzl +index b266c80e8843..a3be6893349b 100644 +--- a/build_variables.bzl ++++ b/build_variables.bzl +@@ -140,7 +140,8 @@ core_trainer_sources = [ + "torch/csrc/autograd/variable.cpp", + "torch/csrc/autograd/utils/warnings.cpp", + "torch/csrc/autograd/jit_decomp_interface.cpp", +- "torch/csrc/dynamo/compiled_autograd.cpp", ++# "torch/csrc/dynamo/compiled_autograd.cpp", ++ "torch/csrc/dynamo/horrible_dynamo_stub.cpp", + "torch/csrc/jit/frontend/name_mangler.cpp", + "torch/csrc/jit/ir/type_hashing.cpp", + "torch/csrc/jit/serialization/pickler.cpp", +@@ -868,17 +869,18 @@ libtorch_python_core_sources = [ + "torch/csrc/autograd/python_torch_functions_manual.cpp", + "torch/csrc/autograd/python_variable.cpp", + "torch/csrc/autograd/python_variable_indexing.cpp", +- "torch/csrc/dynamo/python_compiled_autograd.cpp", +- "torch/csrc/dynamo/cache_entry.cpp", +- "torch/csrc/dynamo/cpp_shim.cpp", +- "torch/csrc/dynamo/cpython_defs.c", +- "torch/csrc/dynamo/eval_frame.c", +- "torch/csrc/dynamo/eval_frame_cpp.cpp", +- "torch/csrc/dynamo/extra_state.cpp", +- "torch/csrc/dynamo/framelocals_mapping.cpp", +- "torch/csrc/dynamo/guards.cpp", +- "torch/csrc/dynamo/utils.cpp", +- "torch/csrc/dynamo/init.cpp", ++# "torch/csrc/dynamo/python_compiled_autograd.cpp", ++# "torch/csrc/dynamo/cache_entry.cpp", ++# "torch/csrc/dynamo/cpp_shim.cpp", ++# "torch/csrc/dynamo/cpython_defs.c", ++# "torch/csrc/dynamo/eval_frame.c", ++# "torch/csrc/dynamo/eval_frame_cpp.cpp", ++# "torch/csrc/dynamo/extra_state.cpp", ++# "torch/csrc/dynamo/framelocals_mapping.cpp", ++# "torch/csrc/dynamo/guards.cpp", ++# "torch/csrc/dynamo/utils.cpp", ++# "torch/csrc/dynamo/init.cpp", ++ "torch/csrc/dynamo/horrible_dynamo_stub.cpp", + "torch/csrc/functorch/init.cpp", + "torch/csrc/fx/node.cpp", + "torch/csrc/mps/Module.cpp", +diff --git a/torch/csrc/dynamo/horrible_dynamo_stub.cpp b/torch/csrc/dynamo/horrible_dynamo_stub.cpp +new file mode 100644 +index 000000000000..3ac1324d4557 +--- /dev/null ++++ b/torch/csrc/dynamo/horrible_dynamo_stub.cpp +@@ -0,0 +1,16 @@ ++#include ++#include ++ ++namespace torch::dynamo::autograd { ++const std::unique_ptr& getPyCompilerInterface() { ++ throw std::runtime_error("Dynamo not supported"); ++ return nullptr; ++} ++std::vector> get_input_metadata( ++ const edge_list& edges) { ++ std::vector> r; ++ throw std::runtime_error("Dynamo not supported"); ++ return r; ++} ++ ++} +-- +2.49.0 + diff --git a/python-torch.spec b/python-torch.spec index 91a82a9..44c1199 100644 --- a/python-torch.spec +++ b/python-torch.spec @@ -6,10 +6,10 @@ # So pre releases can be tried %bcond_with gitcommit %if %{with gitcommit} -# v2.8.0-rc6 -%global commit0 f2b69a083d15e3d0083bb304302a3fd0b5fb8705 +# v2.8.0-rc8 +%global commit0 a1cb3cc05d46d198467bebbb6e8fba50a325d4e7 %global shortcommit0 %(c=%{commit0}; echo ${c:0:7}) -%global date0 20250718 +%global date0 20250723 %global pypi_version 2.8.0 %global flatbuffers_version 24.12.23 %global miniz_version 3.0.2 @@ -33,7 +33,11 @@ %endif # For testing distributed+rccl etc. +%if %{with gitcommit} +%bcond_without rccl +%else %bcond_with rccl +%endif %bcond_with gloo %bcond_without mpi %bcond_without tensorpipe @@ -103,13 +107,13 @@ Source80: https://github.com/pytorch/kineto/archive/%{ki_commit}/kineto-%{ %if %{without gitcommit} # https://github.com/pytorch/pytorch/issues/150187 -# The hack job -# Patch11: 0001-python-torch-disable-ck.patch -# Cleaned up hack job Patch11: 0001-Add-cmake-varaible-USE_ROCM_CK.patch - %else +# https://github.com/pytorch/pytorch/issues/150187 Patch11: 0001-Add-cmake-variable-USE_ROCM_CK.patch +# https://github.com/pytorch/pytorch/issues/156595 +# Patch12: 0001-Use-horrible-dynamo-stub.patch +Patch12: 0001-Fix-compilation-and-import-torch-issues-for-cpython-.patch %endif ExclusiveArch: x86_64 aarch64 @@ -153,6 +157,9 @@ BuildRequires: python3dist(filelock) BuildRequires: python3dist(jinja2) BuildRequires: python3dist(networkx) BuildRequires: python3dist(numpy) +%if %{with gitcommit} +BuildRequires: python3dist(pip) +%endif BuildRequires: python3dist(pyyaml) BuildRequires: python3dist(setuptools) BuildRequires: python3dist(sphinx) @@ -171,6 +178,9 @@ BuildRequires: hipcub-devel BuildRequires: hipfft-devel BuildRequires: hiprand-devel BuildRequires: hipsparse-devel +%if %{with gitcommit} +BuildRequires: hipsparselt-devel +%endif BuildRequires: hipsolver-devel BuildRequires: magma-devel BuildRequires: miopen-devel @@ -190,6 +200,7 @@ BuildRequires: rocm-runtime-devel BuildRequires: rocm-rpm-macros %if %{with gitcommit} BuildRequires: rocsolver-devel +BuildRequires: rocm-smi-devel %endif BuildRequires: rocthrust-devel BuildRequires: roctracer-devel @@ -337,6 +348,10 @@ sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -Wno-unus sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -Wno-deprecated-declarations@' cmake/Dependencies.cmake # Use parallel jobs sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -parallel-jobs=4@' cmake/Dependencies.cmake +%if %{with gitcommit} +# Need to link with librocm_smi64 +sed -i -e 's@hiprtc::hiprtc@hiprtc::hiprtc rocm_smi64@' cmake/Dependencies.cmake +%endif # No third_party fmt, use system sed -i -e 's@fmt::fmt-header-only@fmt@' CMakeLists.txt @@ -590,17 +605,21 @@ export DEVICE_LIB_PATH=${RESOURCE_DIR}/amdgcn/bitcode # pytorch uses clang, not hipcc export HIP_CLANG_PATH=%{rocmllvm_bindir} -%if %{?fedora} <= 43 -export PYTORCH_ROCM_ARCH="gfx1100;gfx1201" -%else export PYTORCH_ROCM_ARCH=%{rocm_gpu_list_default} + +%if %{with gitcommit} +%pyproject_wheel +%else +%py3_build %endif -%py3_build - %else +%if %{with gitcommit} +%pyproject_wheel +%else %py3_build +%endif %endif @@ -617,17 +636,32 @@ export DEVICE_LIB_PATH=${RESOURCE_DIR}/amdgcn/bitcode # pytorch uses clang, not hipcc export HIP_CLANG_PATH=%{rocmllvm_bindir} export PYTORCH_ROCM_ARCH=%{rocm_gpu_list_default} +%if %{with gitcommit} +%pyproject_install +%else %py3_install +%endif %else +%if %{with gitcommit} +%pyproject_install +%pyproject_save_files torch +%else %py3_install - +%endif %endif + + %check +%if %{with gitcommit} +# Not working yet +# pyproject_check_import torch +%else %py3_check_import torch +%endif # Do not remote the empty files