diff --git a/.gitignore b/.gitignore index 315fe1c..c424df5 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,22 @@ /pytorch-97ff6cf.tar.gz /pytorch-v2.3.0.tar.gz /pytorch-v2.3.1.tar.gz +/pytorch-v2.4.0.tar.gz +/v1.14.2.tar.gz +/cpp-httplib-3b6597b.tar.gz +/kineto-be13176.tar.gz +/pytorch-v2.4.1.tar.gz +/pytorch-v2.5.0.tar.gz +/pytorch-v2.5.1.tar.gz +/pytorch-v2.7.0.tar.gz +/v2.13.6.tar.gz +/pytorch-a1cb3cc.tar.gz +/v24.12.23.tar.gz +/kineto-5e75018.tar.gz +/pytorch-v2.8.0.tar.gz +/v1.18.0.tar.gz +/pytorch-715dca6.tar.gz +/pytorch-fd36458.tar.gz +/pytorch-0fabc3b.tar.gz +/pytorch-v2.9.0.tar.gz +/pytorch-v2.9.1.tar.gz diff --git a/0001-Add-cmake-variable-USE_ROCM_CK.patch b/0001-Add-cmake-variable-USE_ROCM_CK.patch new file mode 100644 index 0000000..925e03b --- /dev/null +++ b/0001-Add-cmake-variable-USE_ROCM_CK.patch @@ -0,0 +1,202 @@ +From 193854993cd939de186de19589c1add4c4b2cf66 Mon Sep 17 00:00:00 2001 +From: Tom Rix +Date: Mon, 21 Jul 2025 11:35:03 -0700 +Subject: [PATCH] Add cmake variable USE_ROCM_CK + +--- + CMakeLists.txt | 1 + + aten/src/ATen/CMakeLists.txt | 40 ++++++++++++++++----------------- + aten/src/ATen/cuda/CUDABlas.cpp | 22 +++++++++--------- + cmake/Dependencies.cmake | 3 +++ + 4 files changed, 35 insertions(+), 31 deletions(-) + +diff --git a/CMakeLists.txt b/CMakeLists.txt +index a5d25e6afa0f..afc1b53efa64 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -240,6 +240,7 @@ cmake_dependent_option( + BUILD_LAZY_CUDA_LINALG "Build cuda linalg ops as separate library" ON + "USE_CUDA AND LINUX AND BUILD_PYTHON" OFF) + cmake_dependent_option(USE_ROCM "Use ROCm" ON "LINUX" OFF) ++cmake_dependent_option(USE_ROCM_CK "Use ROCm Composable Kernel" ON "USE_ROCM" ON) + option(CAFFE2_STATIC_LINK_CUDA "Statically link CUDA libraries" OFF) + cmake_dependent_option(USE_CUDNN "Use cuDNN" ON "USE_CUDA" OFF) + cmake_dependent_option(USE_STATIC_CUDNN "Use cuDNN static libraries" OFF +diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt +index c9cfd74b501e..59f6178218ee 100644 +--- a/aten/src/ATen/CMakeLists.txt ++++ b/aten/src/ATen/CMakeLists.txt +@@ -373,26 +373,26 @@ if(USE_ROCM) + # is header only, so this should be ok, except that the CMake build generates + # a ck/config.h. We just do that part here. Without this, the ck.h from the + # ROCM SDK may get accidentally used instead. +- function(_pytorch_rocm_generate_ck_conf) +- set(CK_ENABLE_INT8 "ON") +- set(CK_ENABLE_FP16 "ON") +- set(CK_ENABLE_FP32 "ON") +- set(CK_ENABLE_FP64 "ON") +- set(CK_ENABLE_BF16 "ON") +- set(CK_ENABLE_FP8 "ON") +- set(CK_ENABLE_BF8 "ON") +- set(CK_USE_XDL "ON") +- set(CK_USE_WMMA "ON") +- configure_file( +- "${Torch_SOURCE_DIR}/third_party/composable_kernel/include/ck/config.h.in" +- "${CMAKE_CURRENT_BINARY_DIR}/composable_kernel/ck/config.h" +- ) +- endfunction() ++# function(_pytorch_rocm_generate_ck_conf) ++# set(CK_ENABLE_INT8 "ON") ++# set(CK_ENABLE_FP16 "ON") ++# set(CK_ENABLE_FP32 "ON") ++# set(CK_ENABLE_FP64 "ON") ++# set(CK_ENABLE_BF16 "ON") ++# set(CK_ENABLE_FP8 "ON") ++# set(CK_ENABLE_BF8 "ON") ++# set(CK_USE_XDL "ON") ++# set(CK_USE_WMMA "ON") ++# configure_file( ++# "${Torch_SOURCE_DIR}/third_party/composable_kernel/include/ck/config.h.in" ++# "${CMAKE_CURRENT_BINARY_DIR}/composable_kernel/ck/config.h" ++# ) ++# endfunction() + list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/hip) +- list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/include) +- list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/library/include) +- list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/composable_kernel) +- _pytorch_rocm_generate_ck_conf() ++# list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/include) ++# list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/library/include) ++# list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/composable_kernel) ++# _pytorch_rocm_generate_ck_conf() + + # Next two lines are needed because TunableOp uses third-party/fmt + list(APPEND ATen_HIP_INCLUDE $) +@@ -409,7 +409,7 @@ endif() + ${native_quantized_hip_hip} + ${native_transformers_hip_hip} ${native_transformers_src_hip_hip} + ) +- if(WIN32) # Windows doesn't support Composable Kernels ++ if(NOT USE_ROCM_CK) # Windows doesn't support Composable Kernels + file(GLOB native_hip_bgemm "native/hip/bgemm_kernels/*.hip") + file(GLOB native_hip_ck "native/hip/ck*.hip") + exclude(ATen_HIP_SRCS "${ATen_HIP_SRCS}" +diff --git a/aten/src/ATen/cuda/CUDABlas.cpp b/aten/src/ATen/cuda/CUDABlas.cpp +index 89350a11bea7..e5b7960177cf 100644 +--- a/aten/src/ATen/cuda/CUDABlas.cpp ++++ b/aten/src/ATen/cuda/CUDABlas.cpp +@@ -752,7 +752,7 @@ template <> + void bgemm_internal(CUDABLAS_BGEMM_ARGTYPES(double)) + { + if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) { +-#ifdef USE_ROCM ++#ifdef USE_ROCM_CK + // hipblaslt does not support double gemm yet + bgemm_internal_cublas(CUDABLAS_BGEMM_ARGS(double)); + #else +@@ -836,7 +836,7 @@ void bgemm_internal(CUDABLAS_BGEMM_ARGTYPES(at::BFloat16)) + bgemm_internal_cublas(CUDABLAS_BGEMM_ARGS(at::BFloat16)); + } + } +-#if defined(USE_ROCM) && !defined(_MSC_VER) ++#if defined(USE_ROCM) && defined(USE_ROCM_CK) + else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) { + at::native::bgemm_internal_ck(CUDABLAS_BGEMM_ARGS(at::BFloat16)); + } +@@ -1270,14 +1270,14 @@ template <> + void gemm_internal(CUDABLAS_GEMM_ARGTYPES(double)) + { + if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) { +-#ifdef USE_ROCM ++#ifdef USE_ROCM_CK + // hipblaslt does not support double gemm yet + gemm_internal_cublas(CUDABLAS_GEMM_ARGS(double)); + #else + gemm_internal_cublaslt(CUDABLAS_GEMM_ARGS(double)); + #endif + } +-#if defined(USE_ROCM) && !defined(_MSC_VER) ++#if defined(USE_ROCM) && defined(USE_ROCM_CK) + else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) { + at::native::gemm_internal_ck(CUDABLAS_GEMM_ARGS(double)); + } +@@ -1293,7 +1293,7 @@ void gemm_internal(CUDABLAS_GEMM_ARGTYPES(float)) + if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) { + gemm_internal_cublaslt(CUDABLAS_GEMM_ARGS(float)); + } +-#if defined(USE_ROCM) && !defined(_MSC_VER) ++#if defined(USE_ROCM) && defined(USE_ROCM_CK) + else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) { + if (at::detail::getCUDAHooks().isGPUArch({"gfx1100"})) { //no CK GEMM version for gfx1100 + gemm_internal_cublaslt(CUDABLAS_GEMM_ARGS(float)); +@@ -1311,7 +1311,7 @@ template <> + void gemm_internal>(CUDABLAS_GEMM_ARGTYPES(c10::complex)) + { + if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) { +-#ifdef USE_ROCM ++#ifdef USE_ROCM_CK + // hipblaslt does not support complex gemm yet + gemm_internal_cublas>(CUDABLAS_GEMM_ARGS(c10::complex)); + #else +@@ -1327,7 +1327,7 @@ template <> + void gemm_internal>(CUDABLAS_GEMM_ARGTYPES(c10::complex)) + { + if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) { +-#ifdef USE_ROCM ++#ifdef USE_ROCM_CK + // hipblaslt does not support complex gemm yet + gemm_internal_cublas>(CUDABLAS_GEMM_ARGS(c10::complex)); + #else +@@ -1345,7 +1345,7 @@ void gemm_internal(CUDABLAS_GEMM_ARGTYPES(at::Half)) + if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) { + gemm_internal_cublaslt(CUDABLAS_GEMM_ARGS(at::Half)); + } +-#if defined(USE_ROCM) && !defined(_MSC_VER) ++#if defined(USE_ROCM) && defined(USE_ROCM_CK) + else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) { + at::native::gemm_internal_ck(CUDABLAS_GEMM_ARGS(at::Half)); + } +@@ -1361,7 +1361,7 @@ void gemm_internal(CUDABLAS_GEMM_ARGTYPES(at::BFloat16)) + if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) { + gemm_internal_cublaslt(CUDABLAS_GEMM_ARGS(at::BFloat16)); + } +-#if defined(USE_ROCM) && !defined(_MSC_VER) ++#if defined(USE_ROCM) && defined(USE_ROCM_CK) + else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) { + at::native::gemm_internal_ck(CUDABLAS_GEMM_ARGS(at::BFloat16)); + } +@@ -1382,7 +1382,7 @@ void gemm_internal(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(at::Half, + if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) { + gemm_internal_cublaslt(CUDABLAS_GEMM_ARGS(at::Half)); + } +-#if defined(USE_ROCM) && !defined(_MSC_VER) ++#if defined(USE_ROCM) && defined(USE_ROCM_CK) + else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) { + TORCH_CHECK(false, "gemm input type at::Half and output type float is not supported for ROCm"); + } +@@ -1398,7 +1398,7 @@ void gemm_internal(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(at::B + if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) { + gemm_internal_cublaslt(CUDABLAS_GEMM_ARGS(at::BFloat16)); + } +-#if defined(USE_ROCM) && !defined(_MSC_VER) ++#if defined(USE_ROCM) && defined(USE_ROCM_CK) + else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) { + TORCH_CHECK(false, "gemm input type at::Half and output type float is not supported for ROCm"); + } +diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake +index a93386c27f8d..be1368999d38 100644 +--- a/cmake/Dependencies.cmake ++++ b/cmake/Dependencies.cmake +@@ -1031,6 +1031,9 @@ if(USE_ROCM) + if(HIPBLASLT_VEC_EXT) + list(APPEND HIP_CXX_FLAGS -DHIPBLASLT_VEC_EXT) + endif() ++ if(USE_ROCM_CK) ++ list(APPEND HIP_CXX_FLAGS -DUSE_ROCM_CK) ++ endif() + list(APPEND HIP_HIPCC_FLAGS --offload-compress) + if(WIN32) + add_definitions(-DROCM_ON_WINDOWS) +-- +2.49.0 + diff --git a/0001-Fix-compilation-and-import-torch-issues-for-cpython-.patch b/0001-Fix-compilation-and-import-torch-issues-for-cpython-.patch new file mode 100644 index 0000000..b6a282c --- /dev/null +++ b/0001-Fix-compilation-and-import-torch-issues-for-cpython-.patch @@ -0,0 +1,359 @@ +From f2a544b2e3a5bdc04985f6e06223c0c1700120a0 Mon Sep 17 00:00:00 2001 +From: albanD +Date: Sat, 12 Jul 2025 03:42:33 -0400 +Subject: [PATCH] Fix compilation and "import torch" issues for cpython 3.14 + +Imported from +https://github.com/albanD/pytorch/tree/cpython314_build +commit 88bb9cdb72449f4277829e20d94ad8aec1894216 + +Signed-off-by: Tom Rix +--- + torch/_dynamo/bytecode_analysis.py | 2 +- + torch/ao/quantization/__init__.py | 5 +++- + torch/ao/quantization/qconfig.py | 4 ++- + torch/ao/quantization/utils.py | 7 +++-- + torch/csrc/dynamo/cpython_defs.c | 16 +++++++++++ + torch/csrc/dynamo/cpython_includes.h | 17 ++++++++++++ + torch/csrc/dynamo/eval_frame.c | 34 +++++++++++++++-------- + torch/csrc/dynamo/framelocals_mapping.cpp | 14 ++++++++++ + torch/csrc/utils/python_compat.h | 1 + + torch/onnx/__init__.py | 1 - + torch/utils/weak.py | 29 +++++++++++++++++-- + 11 files changed, 111 insertions(+), 19 deletions(-) + +diff --git a/torch/_dynamo/bytecode_analysis.py b/torch/_dynamo/bytecode_analysis.py +index 3252ea91409f..2de74ee5bf8d 100644 +--- a/torch/_dynamo/bytecode_analysis.py ++++ b/torch/_dynamo/bytecode_analysis.py +@@ -33,7 +33,7 @@ if sys.version_info >= (3, 11): + TERMINAL_OPCODES.add(dis.opmap["JUMP_FORWARD"]) + else: + TERMINAL_OPCODES.add(dis.opmap["JUMP_ABSOLUTE"]) +-if sys.version_info >= (3, 12): ++if (3, 12) <= sys.version_info < (3, 14): + TERMINAL_OPCODES.add(dis.opmap["RETURN_CONST"]) + if sys.version_info >= (3, 13): + TERMINAL_OPCODES.add(dis.opmap["JUMP_BACKWARD_NO_INTERRUPT"]) +diff --git a/torch/ao/quantization/__init__.py b/torch/ao/quantization/__init__.py +index ffc1792fd23f..cf5a8b99a894 100644 +--- a/torch/ao/quantization/__init__.py ++++ b/torch/ao/quantization/__init__.py +@@ -1,5 +1,6 @@ + # mypy: allow-untyped-defs + ++import sys + from typing import Callable, Optional, Union + + import torch +@@ -33,7 +34,9 @@ from .stubs import * # noqa: F403 + + # ensure __module__ is set correctly for public APIs + ObserverOrFakeQuantize = Union[ObserverBase, FakeQuantizeBase] +-ObserverOrFakeQuantize.__module__ = "torch.ao.quantization" ++if sys.version_info < (3, 14): ++ ObserverOrFakeQuantize.__module__ = "torch.ao.quantization" ++ + for _f in [ + compare_results, + extract_results_from_loggers, +diff --git a/torch/ao/quantization/qconfig.py b/torch/ao/quantization/qconfig.py +index efee5302ad42..d9a8fc78bab4 100644 +--- a/torch/ao/quantization/qconfig.py ++++ b/torch/ao/quantization/qconfig.py +@@ -1,5 +1,6 @@ + # mypy: allow-untyped-defs + import copy ++import sys + import warnings + from collections import namedtuple + from typing import Any, Optional, Union +@@ -568,7 +569,8 @@ def _assert_valid_qconfig(qconfig: Optional[QConfig], mod: torch.nn.Module) -> N + + + QConfigAny = Optional[QConfig] +-QConfigAny.__module__ = "torch.ao.quantization.qconfig" ++if sys.version_info < (3, 14): ++ QConfigAny.__module__ = "torch.ao.quantization.qconfig" + + + def _add_module_to_qconfig_obs_ctr( +diff --git a/torch/ao/quantization/utils.py b/torch/ao/quantization/utils.py +index 4ac3112ec072..3b1503e01701 100644 +--- a/torch/ao/quantization/utils.py ++++ b/torch/ao/quantization/utils.py +@@ -4,6 +4,7 @@ Utils shared by different modes of quantization (eager/graph) + """ + + import functools ++import sys + import warnings + from collections import OrderedDict + from inspect import getfullargspec, signature +@@ -16,7 +17,8 @@ from torch.nn.utils.parametrize import is_parametrized + + + NodePattern = Union[tuple[Node, Node], tuple[Node, tuple[Node, Node]], Any] +-NodePattern.__module__ = "torch.ao.quantization.utils" ++if sys.version_info < (3, 14): ++ NodePattern.__module__ = "torch.ao.quantization.utils" + + # This is the Quantizer class instance from torch/quantization/fx/quantize.py. + # Define separately to prevent circular imports. +@@ -31,7 +33,8 @@ QuantizerCls = Any + Pattern = Union[ + Callable, tuple[Callable, Callable], tuple[Callable, tuple[Callable, Callable]], Any + ] +-Pattern.__module__ = "torch.ao.quantization.utils" ++if sys.version_info < (3, 14): ++ Pattern.__module__ = "torch.ao.quantization.utils" + + + # TODO: maybe rename this to MatchInputNode +diff --git a/torch/csrc/dynamo/cpython_defs.c b/torch/csrc/dynamo/cpython_defs.c +index b68ef894aeaa..244d4165d5e8 100644 +--- a/torch/csrc/dynamo/cpython_defs.c ++++ b/torch/csrc/dynamo/cpython_defs.c +@@ -2,6 +2,20 @@ + #include + #include + ++#if IS_PYTHON_3_14_PLUS ++ ++const uint8_t* THP_PyOpcode_Caches = NULL; ++const int THP_PyOpcode_Caches_size = 0; ++ ++void ++THP_PyThreadState_PopFrame(PyThreadState *tstate, _PyInterpreterFrame * frame) ++{} ++void ++THP_PyFrame_Clear(_PyInterpreterFrame *frame) ++{} ++ ++#else ++ + #if IS_PYTHON_3_11_PLUS + + #define Py_BUILD_CORE +@@ -360,3 +374,5 @@ const uint8_t* THP_PyOpcode_Caches = NULL; + const int THP_PyOpcode_Caches_size = 0; + + #endif ++ ++#endif // IS_PYTHON_3_14_PLUS +\ No newline at end of file +diff --git a/torch/csrc/dynamo/cpython_includes.h b/torch/csrc/dynamo/cpython_includes.h +index 6b99c1d5aec8..616be16563cf 100644 +--- a/torch/csrc/dynamo/cpython_includes.h ++++ b/torch/csrc/dynamo/cpython_includes.h +@@ -21,6 +21,14 @@ + + #if IS_PYTHON_3_11_PLUS + #include ++#if IS_PYTHON_3_14_PLUS ++#include ++#include ++#endif ++#endif ++ ++#if IS_PYTHON_3_14_PLUS ++#include + #endif + + #undef Py_BUILD_CORE +@@ -30,6 +38,13 @@ + extern "C" { + #endif + ++#if IS_PYTHON_3_14_PLUS ++ ++#define F_CODE(x) (PyCodeObject*)PyStackRef_AsPyObjectBorrow(x->f_executable) ++#define PREV_INSTR(x) (x)->instr_ptr ++ ++#else ++ + #if IS_PYTHON_3_13_PLUS + #define F_CODE(x) ((PyCodeObject*)(x)->f_executable) + #define PREV_INSTR(x) (x)->instr_ptr +@@ -38,6 +53,8 @@ extern "C" { + #define PREV_INSTR(x) (x)->prev_instr + #endif + ++#endif // IS_PYTHON_3_14_PLUS ++ + #if IS_PYTHON_3_12_PLUS + #define FUNC(x) ((x)->f_funcobj) + #else +diff --git a/torch/csrc/dynamo/eval_frame.c b/torch/csrc/dynamo/eval_frame.c +index f413782b2d30..72bb8839bac3 100644 +--- a/torch/csrc/dynamo/eval_frame.c ++++ b/torch/csrc/dynamo/eval_frame.c +@@ -224,17 +224,6 @@ const char* get_frame_name(THP_EVAL_API_FRAME_OBJECT* frame) { + return PyUnicode_AsUTF8(F_CODE(frame)->co_name); + } + +-void clear_old_frame_if_python_312_plus( +- PyThreadState* tstate, +- THP_EVAL_API_FRAME_OBJECT* frame) { +-#if IS_PYTHON_3_12_PLUS +- +- THP_PyFrame_Clear(frame); +- THP_PyThreadState_PopFrame(tstate, frame); +- +-#endif +-} +- + static PyObject* dynamo_eval_custom_code_impl( + PyThreadState* tstate, + THP_EVAL_API_FRAME_OBJECT* frame, +@@ -485,6 +474,18 @@ static PyObject* dynamo__custom_eval_frame_shim( + + static void enable_eval_frame_shim(PyThreadState* tstate) {} + static void enable_eval_frame_default(PyThreadState* tstate) {} ++PyObject* dynamo_eval_custom_code( ++ PyThreadState* tstate, ++ THP_EVAL_API_FRAME_OBJECT* frame, ++ PyCodeObject* code, ++ const char* trace_annotation, ++ int throw_flag) {} ++THPPyInterpreterFrame* THPPyInterpreterFrame_New( ++ THP_EVAL_API_FRAME_OBJECT* frame) {} ++PyObject* dynamo_eval_frame_default( ++ PyThreadState* tstate, ++ THP_EVAL_API_FRAME_OBJECT* frame, ++ int throw_flag) {} + + static struct PyGetSetDef THPPyInterpreterFrame_properties[] = {NULL}; + +@@ -498,6 +499,17 @@ static PyTypeObject THPPyInterpreterFrameType = { + + #endif // !(IS_PYTHON_3_14_PLUS) + ++void clear_old_frame_if_python_312_plus( ++ PyThreadState* tstate, ++ THP_EVAL_API_FRAME_OBJECT* frame) { ++#if IS_PYTHON_3_12_PLUS ++ ++ THP_PyFrame_Clear(frame); ++ THP_PyThreadState_PopFrame(tstate, frame); ++ ++#endif ++} ++ + static PyObject* increment_working_threads( + PyThreadState* tstate, + PyObject* module) { +diff --git a/torch/csrc/dynamo/framelocals_mapping.cpp b/torch/csrc/dynamo/framelocals_mapping.cpp +index b839fb26fc91..c4ee36d87767 100644 +--- a/torch/csrc/dynamo/framelocals_mapping.cpp ++++ b/torch/csrc/dynamo/framelocals_mapping.cpp +@@ -26,9 +26,13 @@ FrameLocalsMapping::FrameLocalsMapping(FrameLocalsFrameType* frame) + PyCodeObject* co = F_CODE(frame); + _framelocals.resize(co->co_nlocalsplus, nullptr); + ++#if IS_PYTHON_3_14_PLUS ++ TORCH_CHECK(false, "Python 3.14+ not supported"); ++#else + if (!frame->stacktop) { + return; + } ++#endif + + auto update_framelocals = [&](int i, PyObject* value) { + _PyLocals_Kind kind = _PyLocals_GetKind(co->co_localspluskinds, i); +@@ -53,11 +57,21 @@ FrameLocalsMapping::FrameLocalsMapping(FrameLocalsFrameType* frame) + }; + + auto offset = co->co_nlocalsplus - co->co_nfreevars; ++#if IS_PYTHON_3_14_PLUS ++ TORCH_CHECK(false, "Python 3.14+ not supported"); ++#else + for (int i = 0; i < offset; i++) { + update_framelocals(i, frame->localsplus[i]); + } ++#endif ++ + // Get references to closure variables ++#if IS_PYTHON_3_14_PLUS ++ PyObject* closure; ++ TORCH_CHECK(false, "Python 3.14+ not supported"); ++#else + PyObject* closure = ((PyFunctionObject*)FUNC(frame))->func_closure; ++#endif + for (int i = 0; i < co->co_nfreevars; i++) { + update_framelocals(offset + i, PyTuple_GET_ITEM(closure, i)); + } +diff --git a/torch/csrc/utils/python_compat.h b/torch/csrc/utils/python_compat.h +index a1537611cc47..16292e4fd030 100644 +--- a/torch/csrc/utils/python_compat.h ++++ b/torch/csrc/utils/python_compat.h +@@ -13,6 +13,7 @@ extern "C" { + #define IS_PYTHON_3_12_PLUS PY_VERSION_HEX >= 0x030C0000 + #define IS_PYTHON_3_13_PLUS PY_VERSION_HEX >= 0x030D0000 + #define IS_PYTHON_3_14_PLUS PY_VERSION_HEX >= 0x030E0000 ++#define IS_PYTHON_3_15_PLUS PY_VERSION_HEX >= 0x030F0000 + + static inline int PyCode_GetNCellvars(PyCodeObject* code) { + // gh-26364 added co_ncellvars to Python 3.11.0rc1 +diff --git a/torch/onnx/__init__.py b/torch/onnx/__init__.py +index 345ffd2a065b..ceeadde5365b 100644 +--- a/torch/onnx/__init__.py ++++ b/torch/onnx/__init__.py +@@ -104,7 +104,6 @@ ONNXProgram.__module__ = "torch.onnx" + OnnxExporterError.__module__ = "torch.onnx" + _OrtBackend.__module__ = "torch.onnx" + _OrtBackendOptions.__module__ = "torch.onnx" +-_OrtExecutionProvider.__module__ = "torch.onnx" + enable_fake_mode.__module__ = "torch.onnx" + is_onnxrt_backend_supported.__module__ = "torch.onnx" + +diff --git a/torch/utils/weak.py b/torch/utils/weak.py +index 8bf2ba5ed02b..9c7218cb2ad3 100644 +--- a/torch/utils/weak.py ++++ b/torch/utils/weak.py +@@ -3,8 +3,6 @@ from __future__ import annotations + + import collections.abc as _collections_abc + import weakref +- +-from _weakrefset import _IterationGuard # type: ignore[attr-defined] + from collections.abc import Mapping, MutableMapping + from weakref import ref + +@@ -22,6 +20,33 @@ __all__ = [ + ] + + ++# TODO: make weakref properly thread safe following ++# https://github.com/python/cpython/pull/125325 ++class _IterationGuard: ++ # This context manager registers itself in the current iterators of the ++ # weak container, such as to delay all removals until the context manager ++ # exits. ++ # This technique should be relatively thread-safe (since sets are). ++ ++ def __init__(self, weakcontainer): ++ # Don't create cycles ++ self.weakcontainer = ref(weakcontainer) ++ ++ def __enter__(self): ++ w = self.weakcontainer() ++ if w is not None: ++ w._iterating.add(self) ++ return self ++ ++ def __exit__(self, e, t, b): ++ w = self.weakcontainer() ++ if w is not None: ++ s = w._iterating ++ s.remove(self) ++ if not s: ++ w._commit_removals() ++ ++ + # This file defines a variant of WeakKeyDictionary that overrides the hashing + # behavior of the key to use object identity, rather than the builtin + # __eq__/__hash__ functions. This is useful for Tensor weak keys, as their +-- +2.49.0 + diff --git a/0001-Optionally-use-hipblaslt.patch b/0001-Optionally-use-hipblaslt.patch deleted file mode 100644 index 56434a7..0000000 --- a/0001-Optionally-use-hipblaslt.patch +++ /dev/null @@ -1,262 +0,0 @@ -From d77e05d90df006322cda021f1a8affdcc2c7eaef Mon Sep 17 00:00:00 2001 -From: Tom Rix -Date: Fri, 23 Feb 2024 08:27:30 -0500 -Subject: [PATCH] Optionally use hipblaslt - -The hipblaslt package is not available on Fedora. -Instead of requiring the package, make it optional. -If it is found, define the preprocessor variable HIPBLASLT -Convert the checks for ROCM_VERSION >= 507000 to HIPBLASLT checks - -Signed-off-by: Tom Rix ---- - aten/src/ATen/cuda/CUDABlas.cpp | 7 ++++--- - aten/src/ATen/cuda/CUDABlas.h | 2 +- - aten/src/ATen/cuda/CUDAContextLight.h | 4 ++-- - aten/src/ATen/cuda/CublasHandlePool.cpp | 4 ++-- - aten/src/ATen/cuda/tunable/TunableGemm.h | 6 +++--- - aten/src/ATen/native/cuda/Blas.cpp | 14 ++++++++------ - cmake/Dependencies.cmake | 3 +++ - cmake/public/LoadHIP.cmake | 4 ++-- - 8 files changed, 25 insertions(+), 19 deletions(-) - -diff --git a/aten/src/ATen/cuda/CUDABlas.cpp b/aten/src/ATen/cuda/CUDABlas.cpp -index d534ec5a178..e815463f630 100644 ---- a/aten/src/ATen/cuda/CUDABlas.cpp -+++ b/aten/src/ATen/cuda/CUDABlas.cpp -@@ -14,7 +14,7 @@ - #include - - #ifdef USE_ROCM --#if ROCM_VERSION >= 60000 -+#ifdef HIPBLASLT - #include - #endif - // until hipblas has an API to accept flags, we must use rocblas here -@@ -781,7 +781,7 @@ void gemm(CUDABLAS_GEMM_ARGTYPES(at::BFloat16)) { - } - } - --#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700) -+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT)) - - #if defined(USE_ROCM) && ROCM_VERSION >= 50700 && ROCM_VERSION < 60000 - // only for rocm 5.7 where we first supported hipblaslt, it was difficult -@@ -912,6 +912,7 @@ class CuBlasLtMatmulPreference : public CuBlasLtDescriptor< - }; - } // namespace - -+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT)) - template - void gemm_and_bias( - bool transpose_mat1, -@@ -1124,7 +1125,7 @@ template void gemm_and_bias( - at::BFloat16* result_ptr, - int64_t result_ld, - GEMMAndBiasActivationEpilogue activation); -- -+#endif - void scaled_gemm( - char transa, - char transb, -diff --git a/aten/src/ATen/cuda/CUDABlas.h b/aten/src/ATen/cuda/CUDABlas.h -index eb12bb350c5..068607467dd 100644 ---- a/aten/src/ATen/cuda/CUDABlas.h -+++ b/aten/src/ATen/cuda/CUDABlas.h -@@ -82,7 +82,7 @@ void gemm_internal(CUDABLAS_GEMM_ARGTYPES(at::Half)); - template <> - void gemm_internal(CUDABLAS_GEMM_ARGTYPES(at::BFloat16)); - --#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700) -+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT)) - enum GEMMAndBiasActivationEpilogue { - None, - RELU, -diff --git a/aten/src/ATen/cuda/CUDAContextLight.h b/aten/src/ATen/cuda/CUDAContextLight.h -index 4ec35f59a21..e28dc42034f 100644 ---- a/aten/src/ATen/cuda/CUDAContextLight.h -+++ b/aten/src/ATen/cuda/CUDAContextLight.h -@@ -9,7 +9,7 @@ - - // cublasLT was introduced in CUDA 10.1 but we enable only for 11.1 that also - // added bf16 support --#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700) -+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT)) - #include - #endif - -@@ -82,7 +82,7 @@ TORCH_CUDA_CPP_API c10::Allocator* getCUDADeviceAllocator(); - /* Handles */ - TORCH_CUDA_CPP_API cusparseHandle_t getCurrentCUDASparseHandle(); - TORCH_CUDA_CPP_API cublasHandle_t getCurrentCUDABlasHandle(); --#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700) -+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT)) - TORCH_CUDA_CPP_API cublasLtHandle_t getCurrentCUDABlasLtHandle(); - #endif - -diff --git a/aten/src/ATen/cuda/CublasHandlePool.cpp b/aten/src/ATen/cuda/CublasHandlePool.cpp -index 6913d2cd95e..3d4276be372 100644 ---- a/aten/src/ATen/cuda/CublasHandlePool.cpp -+++ b/aten/src/ATen/cuda/CublasHandlePool.cpp -@@ -29,7 +29,7 @@ namespace at::cuda { - - namespace { - --#if defined(USE_ROCM) && ROCM_VERSION >= 50700 -+#if defined(USE_ROCM) && defined(HIPBLASLT) - void createCublasLtHandle(cublasLtHandle_t *handle) { - TORCH_CUDABLAS_CHECK(cublasLtCreate(handle)); - } -@@ -190,7 +190,7 @@ cublasHandle_t getCurrentCUDABlasHandle() { - return handle; - } - --#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700) -+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT)) - cublasLtHandle_t getCurrentCUDABlasLtHandle() { - #ifdef USE_ROCM - c10::DeviceIndex device = 0; -diff --git a/aten/src/ATen/cuda/tunable/TunableGemm.h b/aten/src/ATen/cuda/tunable/TunableGemm.h -index 3ba0d761277..dde1870cfbf 100644 ---- a/aten/src/ATen/cuda/tunable/TunableGemm.h -+++ b/aten/src/ATen/cuda/tunable/TunableGemm.h -@@ -11,7 +11,7 @@ - - #include - #ifdef USE_ROCM --#if ROCM_VERSION >= 50700 -+#ifdef HIPBLASLT - #include - #endif - #include -@@ -166,7 +166,7 @@ class GemmTunableOp : public TunableOp, StreamTimer> { - } - #endif - --#if defined(USE_ROCM) && ROCM_VERSION >= 50700 -+#if defined(USE_ROCM) && defined(HIPBLASLT) - static const char *env = std::getenv("PYTORCH_TUNABLEOP_HIPBLASLT_ENABLED"); - if (env == nullptr || strcmp(env, "1") == 0) { - // disallow tuning of hipblaslt with c10::complex -@@ -240,7 +240,7 @@ class GemmStridedBatchedTunableOp : public TunableOp - } - #endif - --#if defined(USE_ROCM) && ROCM_VERSION >= 50700 -+#if defined(USE_ROCM) && defined(HIPBLASLT) - static const char *env = std::getenv("PYTORCH_TUNABLEOP_HIPBLASLT_ENABLED"); - if (env == nullptr || strcmp(env, "1") == 0) { - // disallow tuning of hipblaslt with c10::complex -diff --git a/aten/src/ATen/native/cuda/Blas.cpp b/aten/src/ATen/native/cuda/Blas.cpp -index 29e5c5e3cf1..df56f3d7f1d 100644 ---- a/aten/src/ATen/native/cuda/Blas.cpp -+++ b/aten/src/ATen/native/cuda/Blas.cpp -@@ -155,7 +155,7 @@ enum class Activation { - GELU, - }; - --#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700) -+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT)) - cuda::blas::GEMMAndBiasActivationEpilogue activation_to_gemm_and_blas_arg(Activation a) { - switch (a) { - case Activation::None: -@@ -193,6 +193,7 @@ static bool getDisableAddmmCudaLt() { - - #ifdef USE_ROCM - static bool isSupportedHipLtROCmArch(int index) { -+#if defined(HIPBLASLT) - hipDeviceProp_t* prop = at::cuda::getDeviceProperties(index); - std::string device_arch = prop->gcnArchName; - static const std::vector archs = {"gfx90a", "gfx940", "gfx941", "gfx942"}; -@@ -203,6 +204,7 @@ static bool isSupportedHipLtROCmArch(int index) { - } - } - TORCH_CHECK(false, "Attempting to use hipBLASLt on a unsupported architecture!"); -+#endif - return false; - } - #endif -@@ -228,7 +230,7 @@ Tensor& addmm_out_cuda_impl(Tensor& result, const Tensor& self, const Tensor& ma - at::ScalarType scalar_type = self.scalar_type(); - c10::MaybeOwned self_; - if (&result != &self) { --#if (defined(CUDA_VERSION) && CUDA_VERSION >= 11040 && !defined(_MSC_VER)) || defined(USE_ROCM) && ROCM_VERSION >= 50700 -+#if (defined(CUDA_VERSION) && CUDA_VERSION >= 11040 && !defined(_MSC_VER)) || defined(USE_ROCM) && defined(HIPBLASLT) - // Strangely, if mat2 has only 1 row or column, we get - // CUBLAS_STATUS_INVALID_VALUE error from cublasLtMatmulAlgoGetHeuristic. - // self.dim() == 1 && result.dim() == 2 && self.sizes()[0] == mat2_sizes[1] -@@ -271,7 +273,7 @@ Tensor& addmm_out_cuda_impl(Tensor& result, const Tensor& self, const Tensor& ma - } - self__sizes = self_->sizes(); - } else { --#if defined(USE_ROCM) && ROCM_VERSION >= 50700 -+#if defined(USE_ROCM) && defined(HIPBLASLT) - useLtInterface = !disable_addmm_cuda_lt && - result.dim() == 2 && result.is_contiguous() && - isSupportedHipLtROCmArch(self.device().index()) && -@@ -322,7 +324,7 @@ Tensor& addmm_out_cuda_impl(Tensor& result, const Tensor& self, const Tensor& ma - - TORCH_INTERNAL_ASSERT_DEBUG_ONLY(!args.result->is_conj()); - --#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700) -+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT)) - if (useLtInterface) { - AT_DISPATCH_FLOATING_TYPES_AND2( - at::ScalarType::Half, -@@ -876,7 +878,7 @@ _scaled_mm_out_cuda(const Tensor& mat1, const Tensor& mat2, - at::native::resize_output(out, {mat1_sizes[0], mat2_sizes[1]}); - at::native::resize_output(amax, {}); - --#if !defined(USE_ROCM) && !defined(_MSC_VER) || (defined(USE_ROCM) && ROCM_VERSION >= 60000) -+#if !defined(USE_ROCM) && !defined(_MSC_VER) || (defined(USE_ROCM) && defined(HIPBLASLT)) - cublasCommonArgs args(mat1, mat2, out); - const auto out_dtype_ = args.result->scalar_type(); - TORCH_CHECK(args.transa == 't' && args.transb == 'n', "Only multiplication of row-major and column-major matrices is supported by cuBLASLt"); -@@ -906,7 +908,7 @@ _scaled_mm_out_cuda(const Tensor& mat1, const Tensor& mat2, - TORCH_CHECK(false, "_scaled_mm_out_cuda is not compiled for this platform."); - #endif - --#if defined(USE_ROCM) && ROCM_VERSION >= 60000 -+#if defined(USE_ROCM) && defined(HIPBLASLT) - // rocm's hipblaslt does not yet support amax, so calculate separately - auto out_float32 = out.to(kFloat); - out_float32.abs_(); -diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake -index b7ffbeb07dc..2b6c3678984 100644 ---- a/cmake/Dependencies.cmake -+++ b/cmake/Dependencies.cmake -@@ -1273,6 +1273,9 @@ if(USE_ROCM) - if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "6.0.0") - list(APPEND HIP_CXX_FLAGS -DHIPBLAS_V2) - endif() -+ if(hipblast_FOUND) -+ list(APPEND HIP_CXX_FLAGS -DHIPBLASLT) -+ endif() - if(HIPBLASLT_CUSTOM_DATA_TYPE) - list(APPEND HIP_CXX_FLAGS -DHIPBLASLT_CUSTOM_DATA_TYPE) - endif() -diff --git a/cmake/public/LoadHIP.cmake b/cmake/public/LoadHIP.cmake -index f6ca263c5e5..53eb0b63c1a 100644 ---- a/cmake/public/LoadHIP.cmake -+++ b/cmake/public/LoadHIP.cmake -@@ -156,7 +156,7 @@ if(HIP_FOUND) - find_package_and_print_version(rocblas REQUIRED) - find_package_and_print_version(hipblas REQUIRED) - if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "5.7.0") -- find_package_and_print_version(hipblaslt REQUIRED) -+ find_package_and_print_version(hipblaslt) - endif() - find_package_and_print_version(miopen REQUIRED) - if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "4.1.0") -@@ -191,7 +191,7 @@ if(HIP_FOUND) - # roctx is part of roctracer - find_library(ROCM_ROCTX_LIB roctx64 HINTS ${ROCM_PATH}/lib) - -- if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "5.7.0") -+ if(hipblastlt_FOUND) - # check whether hipblaslt is using its own datatype - set(file "${PROJECT_BINARY_DIR}/hipblaslt_test_data_type.cc") - file(WRITE ${file} "" --- -2.43.2 - diff --git a/0001-Reenable-dim-for-python-3.12.patch b/0001-Reenable-dim-for-python-3.12.patch deleted file mode 100644 index 138b5d4..0000000 --- a/0001-Reenable-dim-for-python-3.12.patch +++ /dev/null @@ -1,115 +0,0 @@ -From ee3fb343a376cdba6f4ce188cac90023f13e2aea Mon Sep 17 00:00:00 2001 -From: Tom Rix -Date: Thu, 4 Apr 2024 14:21:38 -0600 -Subject: [PATCH] Reenable dim for python 3.12 - -In 3.12: - -_PyArg_Parser added an element to the start of the structure. -So existing positional initialization is off. Switch to element -initialization. - -_Py_CODEUNIT changed to from an int to a union, but relevant_op -is passed an int for the return of decoder.opcode, so the parameter -type is wrong, switch it to int. - -The opcode PRECALL was removed, so reduce its handling to 3.11 - -Signed-off-by: Tom Rix ---- - functorch/csrc/dim/dim.cpp | 24 +++++------------------- - functorch/csrc/dim/minpybind.h | 4 ++-- - 2 files changed, 7 insertions(+), 21 deletions(-) - -diff --git a/functorch/csrc/dim/dim.cpp b/functorch/csrc/dim/dim.cpp -index 4cc027504c77..e48b0d58081f 100644 ---- a/functorch/csrc/dim/dim.cpp -+++ b/functorch/csrc/dim/dim.cpp -@@ -6,20 +6,6 @@ - - #include - -- --// Many APIs have changed/don't exist anymore --#if IS_PYTHON_3_12_PLUS -- --#include "dim.h" -- --// Re-enable this some day --PyObject* Dim_init() { -- PyErr_SetString(PyExc_RuntimeError, "First class dim doesn't work with python 3.12"); -- return nullptr; --} -- --#else -- - #include "minpybind.h" - #include - #include -@@ -441,7 +427,7 @@ static PyObject* DimList_bind(DimList *self, - PY_BEGIN - mpy::handle sizes; - static const char * const _keywords[] = {"sizes", nullptr}; -- static _PyArg_Parser parser = {"O", _keywords, 0}; -+ static _PyArg_Parser parser = { .format = "O", .keywords = _keywords}; - if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &parser, &sizes)) { - return nullptr; - } -@@ -465,7 +451,7 @@ static PyObject* DimList_bind_len(DimList *self, - PY_BEGIN - int size; - static const char * const _keywords[] = {"N", nullptr}; -- static _PyArg_Parser parser = {"i", _keywords, 0}; -+ static _PyArg_Parser parser = { .format = "i", .keywords = _keywords}; - if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &parser, &size)) { - return nullptr; - } -@@ -1468,7 +1454,7 @@ PyTypeObject Tensor::Type = { - - // dim() -------------------- - --static bool relevant_op(_Py_CODEUNIT c) { -+static bool relevant_op(int c) { - switch(c) { - case STORE_NAME: - case STORE_GLOBAL: -@@ -1587,7 +1573,7 @@ static PyObject* _dims(PyObject *self, - auto c = mpy::obj::steal(PyFrame_GetCode(f.ptr())); - auto lasti = PyFrame_GetLasti(f.ptr()); - auto decoder = PyInstDecoder(c.ptr(), lasti); -- #if IS_PYTHON_3_11_PLUS -+ #if IS_PYTHON_3_11 - // When py3.11 adapts bytecode lasti points to the precall - // rather than the call instruction after it - if (decoder.opcode() == PRECALL) { -@@ -3268,4 +3254,4 @@ PyObject* Dim_init() { - } - } - --#endif -+ -diff --git a/functorch/csrc/dim/minpybind.h b/functorch/csrc/dim/minpybind.h -index de82b5af95a4..d76d4828bf80 100644 ---- a/functorch/csrc/dim/minpybind.h -+++ b/functorch/csrc/dim/minpybind.h -@@ -621,7 +621,7 @@ struct vector_args { - PyObject *dummy = NULL; - _PyArg_ParseStackAndKeywords((PyObject*const*)args, nargs, kwnames.ptr(), _parser, &dummy, &dummy, &dummy, &dummy, &dummy); - #else -- _PyArg_Parser* _parser = new _PyArg_Parser{NULL, &names_buf[0], fname_cstr, 0}; -+ _PyArg_Parser* _parser = new _PyArg_Parser{ .keywords = &names_buf[0], .fname = fname_cstr}; - std::unique_ptr buf(new PyObject*[names.size()]); - _PyArg_UnpackKeywords((PyObject*const*)args, nargs, NULL, kwnames.ptr(), _parser, required, (Py_ssize_t)values.size() - kwonly, 0, &buf[0]); - #endif -@@ -706,7 +706,7 @@ inline object handle::call_vector(vector_args args) { - #define MPY_PARSE_ARGS_KWNAMES(fmt, FORALL_ARGS) \ - static const char * const kwlist[] = { FORALL_ARGS(MPY_ARGS_NAME) nullptr}; \ - FORALL_ARGS(MPY_ARGS_DECLARE) \ -- static _PyArg_Parser parser = {fmt, kwlist, 0}; \ -+ static _PyArg_Parser parser = { .format = fmt, .keywords = kwlist}; \ - if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &parser, FORALL_ARGS(MPY_ARGS_POINTER) nullptr)) { \ - throw mpy::exception_set(); \ - } --- -2.44.0 - diff --git a/0001-Regenerate-flatbuffer-header.patch b/0001-Regenerate-flatbuffer-header.patch deleted file mode 100644 index 4eec491..0000000 --- a/0001-Regenerate-flatbuffer-header.patch +++ /dev/null @@ -1,39 +0,0 @@ -From 5b8e51b24513fa851eeff42f23d942bde301e321 Mon Sep 17 00:00:00 2001 -From: Tom Rix -Date: Fri, 29 Sep 2023 06:19:29 -0700 -Subject: [PATCH] Regenerate flatbuffer header - -For this error -torch/csrc/jit/serialization/mobile_bytecode_generated.h:12:41: -error: static assertion failed: Non-compatible flatbuffers version included - 12 | FLATBUFFERS_VERSION_MINOR == 3 && - -PyTorch is expecting 23.3.3, what f38 has -Rawhide is at 23.5.26 - -Regenerate with -flatc --cpp --gen-mutable --no-prefix --scoped-enums mobile_bytecode.fbs - -Signed-off-by: Tom Rix ---- - torch/csrc/jit/serialization/mobile_bytecode_generated.h | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/torch/csrc/jit/serialization/mobile_bytecode_generated.h b/torch/csrc/jit/serialization/mobile_bytecode_generated.h -index cffe8bc7a6..83575e4c19 100644 ---- a/torch/csrc/jit/serialization/mobile_bytecode_generated.h -+++ b/torch/csrc/jit/serialization/mobile_bytecode_generated.h -@@ -9,8 +9,8 @@ - // Ensure the included flatbuffers.h is the same version as when this file was - // generated, otherwise it may not be compatible. - static_assert(FLATBUFFERS_VERSION_MAJOR == 23 && -- FLATBUFFERS_VERSION_MINOR == 3 && -- FLATBUFFERS_VERSION_REVISION == 3, -+ FLATBUFFERS_VERSION_MINOR == 5 && -+ FLATBUFFERS_VERSION_REVISION == 26, - "Non-compatible flatbuffers version included"); - - namespace torch { --- -2.43.0 - diff --git a/0001-Stub-in-kineto-ActivityType.patch b/0001-Stub-in-kineto-ActivityType.patch deleted file mode 100644 index f088645..0000000 --- a/0001-Stub-in-kineto-ActivityType.patch +++ /dev/null @@ -1,73 +0,0 @@ -From 3ef82b814179da571b2478f61d4279717ab0b23a Mon Sep 17 00:00:00 2001 -From: Tom Rix -Date: Fri, 29 Sep 2023 06:25:23 -0700 -Subject: [PATCH] Stub in kineto ActivityType - -There is an error with kineto is not used, the shim still -requires the ActivityTYpe.h header to get the enum Activity type. -So cut-n-paste just enough of the header in to do this. - -Signed-off-by: Tom Rix ---- - torch/csrc/profiler/kineto_shim.h | 44 +++++++++++++++++++++++++++++++ - 1 file changed, 44 insertions(+) - -diff --git a/torch/csrc/profiler/kineto_shim.h b/torch/csrc/profiler/kineto_shim.h -index e92cbf003d..68985ab7d0 100644 ---- a/torch/csrc/profiler/kineto_shim.h -+++ b/torch/csrc/profiler/kineto_shim.h -@@ -12,7 +12,51 @@ - #undef USE_KINETO - #endif - -+#ifdef USE_KINETO - #include -+#else -+namespace libkineto { -+// copied from header -+/* -+ * Copyright (c) Meta Platforms, Inc. and affiliates. -+ * All rights reserved. -+ * -+ * This source code is licensed under the BSD-style license found in the -+ * LICENSE file in the root directory of this source tree. -+ */ -+ -+// Note : All activity types are not enabled by default. Please add them -+// at correct position in the enum -+enum class ActivityType { -+ // Activity types enabled by default -+ CPU_OP = 0, // cpu side ops -+ USER_ANNOTATION, -+ GPU_USER_ANNOTATION, -+ GPU_MEMCPY, -+ GPU_MEMSET, -+ CONCURRENT_KERNEL, // on-device kernels -+ EXTERNAL_CORRELATION, -+ CUDA_RUNTIME, // host side cuda runtime events -+ CUDA_DRIVER, // host side cuda driver events -+ CPU_INSTANT_EVENT, // host side point-like events -+ PYTHON_FUNCTION, -+ OVERHEAD, // CUPTI induced overhead events sampled from its overhead API. -+ -+ // Optional Activity types -+ CUDA_SYNC, // synchronization events between runtime and kernels -+ GLOW_RUNTIME, // host side glow runtime events -+ MTIA_RUNTIME, // host side MTIA runtime events -+ CUDA_PROFILER_RANGE, // CUPTI Profiler range for performance metrics -+ MTIA_CCP_EVENTS, // MTIA ondevice CCP events -+ HPU_OP, // HPU host side runtime event -+ XPU_RUNTIME, // host side xpu runtime events -+ -+ ENUM_COUNT, // This is to add buffer and not used for any profiling logic. Add your new type before it. -+ OPTIONAL_ACTIVITY_TYPE_START = CUDA_SYNC, -+}; -+} -+ -+#endif - - #include - #include --- -2.43.0 - diff --git a/0001-can-not-use-with-c-files.patch b/0001-can-not-use-with-c-files.patch deleted file mode 100644 index 719737c..0000000 --- a/0001-can-not-use-with-c-files.patch +++ /dev/null @@ -1,25 +0,0 @@ -From a5dff521691a17701b5a02ec75e84cfe1bf605f7 Mon Sep 17 00:00:00 2001 -From: Tom Rix -Date: Sat, 3 Feb 2024 06:41:49 -0500 -Subject: [PATCH] can not use with c files - ---- - cmake/Dependencies.cmake | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake -index 4dd8042058..5f91f3ffab 100644 ---- a/cmake/Dependencies.cmake -+++ b/cmake/Dependencies.cmake -@@ -1269,7 +1269,7 @@ if(USE_ROCM) - list(APPEND HIP_CXX_FLAGS -Wno-duplicate-decl-specifier) - list(APPEND HIP_CXX_FLAGS -DCAFFE2_USE_MIOPEN) - list(APPEND HIP_CXX_FLAGS -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_HIP) -- list(APPEND HIP_CXX_FLAGS -std=c++17) -+# list(APPEND HIP_CXX_FLAGS -std=c++17) - if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "6.0.0") - list(APPEND HIP_CXX_FLAGS -DHIPBLAS_V2) - endif() --- -2.43.0 - diff --git a/0001-cuda-hip-signatures.patch b/0001-cuda-hip-signatures.patch deleted file mode 100644 index a258737..0000000 --- a/0001-cuda-hip-signatures.patch +++ /dev/null @@ -1,42 +0,0 @@ -From 214dc959acc809e1959643272c344ee5335d5a69 Mon Sep 17 00:00:00 2001 -From: Tom Rix -Date: Thu, 1 Feb 2024 11:29:47 -0500 -Subject: [PATCH] cuda - hip signatures - ---- - aten/src/ATen/cuda/detail/LazyNVRTC.cpp | 9 +++++++++ - 1 file changed, 9 insertions(+) - -diff --git a/aten/src/ATen/cuda/detail/LazyNVRTC.cpp b/aten/src/ATen/cuda/detail/LazyNVRTC.cpp -index 1b85e7776e..bb6f88783a 100644 ---- a/aten/src/ATen/cuda/detail/LazyNVRTC.cpp -+++ b/aten/src/ATen/cuda/detail/LazyNVRTC.cpp -@@ -134,8 +134,13 @@ nvrtcResult nvrtcCreateProgram(nvrtcProgram *prog, - const char *src, - const char *name, - int numHeaders, -+#if !defined(USE_ROCM) - const char * const *headers, - const char * const *includeNames) { -+#else -+ const char **headers, -+ const char **includeNames) { -+#endif - auto fn = reinterpret_cast(getNVRTCLibrary().sym(__func__)); - if (!fn) - throw std::runtime_error("Can't get nvrtcCreateProgram"); -@@ -150,7 +155,11 @@ NVRTC_STUB2(nvrtcGetPTX, nvrtcProgram, char *); - NVRTC_STUB2(nvrtcGetCUBINSize, nvrtcProgram, size_t *); - NVRTC_STUB2(nvrtcGetCUBIN, nvrtcProgram, char *); - #endif -+#if !defined(USE_ROCM) - NVRTC_STUB3(nvrtcCompileProgram, nvrtcProgram, int, const char * const *); -+#else -+NVRTC_STUB3(nvrtcCompileProgram, nvrtcProgram, int, const char **); -+#endif - _STUB_1(NVRTC, nvrtcGetErrorString, const char *, nvrtcResult); - NVRTC_STUB2(nvrtcGetProgramLogSize,nvrtcProgram, size_t*); - NVRTC_STUB2(nvrtcGetProgramLog, nvrtcProgram, char *); --- -2.43.0 - diff --git a/0001-disable-use-of-aotriton.patch b/0001-disable-use-of-aotriton.patch deleted file mode 100644 index 34a1704..0000000 --- a/0001-disable-use-of-aotriton.patch +++ /dev/null @@ -1,46 +0,0 @@ -From 33d48f71db7530f00dbd8cff281b65aa8b355b2a Mon Sep 17 00:00:00 2001 -From: Tom Rix -Date: Tue, 19 Mar 2024 11:32:37 -0400 -Subject: [PATCH] disable use of aotriton - ---- - aten/src/ATen/native/transformers/cuda/sdp_utils.cpp | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp b/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp -index 96b839820efd..2d3dd0cb4b0f 100644 ---- a/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp -+++ b/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp -@@ -21,9 +21,11 @@ - #include - #include - -+#ifdef USE_FLASH_ATTENTION - #if USE_ROCM - #include - #endif -+#endif - - /** - * Note [SDPA Runtime Dispatch] -@@ -183,6 +185,7 @@ bool check_sm_version(cudaDeviceProp * dprops) { - } - - bool check_flash_attention_hardware_support(sdp_params const& params, bool debug) { -+#ifdef USE_FLASH_ATTENTION - // Check that the gpu is capable of running flash attention - using sm80 = SMVersion<8, 0>; - using sm90 = SMVersion<9, 0>; -@@ -211,6 +214,9 @@ bool check_flash_attention_hardware_support(sdp_params const& params, bool debug - } - #endif - return true; -+#else -+ return false; -+#endif - } - - bool check_mem_efficient_hardware_support(sdp_params const& params, bool debug) { --- -2.44.0 - diff --git a/0001-dynamo-3.12-enable-dynamo-on-3.12-enable-most-dynamo.patch b/0001-dynamo-3.12-enable-dynamo-on-3.12-enable-most-dynamo.patch deleted file mode 100644 index 0ce5b1f..0000000 --- a/0001-dynamo-3.12-enable-dynamo-on-3.12-enable-most-dynamo.patch +++ /dev/null @@ -1,226 +0,0 @@ -From b9d45eb1cc90696a4de76676221219e24423c709 Mon Sep 17 00:00:00 2001 -From: William Wen -Date: Wed, 3 Apr 2024 17:58:46 -0700 -Subject: [PATCH] [dynamo, 3.12] enable dynamo on 3.12, enable most dynamo - unittests on 3.12 (#123216) - -Pull Request resolved: https://github.com/pytorch/pytorch/pull/123216 -Approved by: https://github.com/jansel, https://github.com/malfet ---- - test/dynamo/test_autograd_function.py | 3 ++ - test/dynamo/test_misc.py | 63 +++++++++++++++++++++++++ - test/functorch/test_eager_transforms.py | 7 ++- - test/run_test.py | 3 -- - torch/__init__.py | 5 +- - torch/_dynamo/eval_frame.py | 4 +- - torch/_dynamo/test_case.py | 8 +--- - 7 files changed, 74 insertions(+), 19 deletions(-) - -diff --git a/test/dynamo/test_autograd_function.py b/test/dynamo/test_autograd_function.py -index d23fec607afa..bc5ebc767038 100644 ---- a/test/dynamo/test_autograd_function.py -+++ b/test/dynamo/test_autograd_function.py -@@ -2,6 +2,8 @@ - - import copy - import math -+import sys -+import unittest - - import torch - -@@ -528,6 +530,7 @@ class AutogradFunctionTests(torch._dynamo.test_case.TestCase): - # I pulled all of these test cases from test_autograd.py - # In the future, we should make the Dynamo test suite actually - # run on test_autograd.py (it's disabled right now) and delete these. -+ @unittest.skipIf(sys.version_info >= (3, 12), "invalid free in 3.12+") - def test_smoke_from_test_autograd(self): - class Func(torch.autograd.Function): - @staticmethod -diff --git a/test/dynamo/test_misc.py b/test/dynamo/test_misc.py -index a73de8b1c7e9..8f54e0564e6b 100644 ---- a/test/dynamo/test_misc.py -+++ b/test/dynamo/test_misc.py -@@ -9760,6 +9760,69 @@ fn - lambda mod: mod, - ) - -+ @xfailIfPy311 -+ def test_outside_linear_module_free(self): -+ # Compared to test_linear_module_free, the linear -+ # layer is not the code object that is directly compiled. -+ def model_inp_ctr(): -+ fc = torch.nn.Linear(100, 100) -+ -+ class Mod(torch.nn.Module): -+ def __init__(self): -+ super().__init__() -+ self.fc_ref = fc -+ -+ def forward(self, x): -+ return fc(x[0]) -+ -+ # return fc to keep it alive in _test_compile_model_free -+ return Mod(), (torch.randn(100, 100), fc) -+ -+ self._test_compile_model_free(model_inp_ctr, lambda mod: mod.fc_ref) -+ -+ @unittest.skipIf(sys.version_info >= (3, 12), "leaks in 3.12+") -+ def test_parameter_free(self): -+ def model_inp_ctr(): -+ param = torch.nn.Parameter(torch.randn(100, 100)) -+ -+ class Mod(torch.nn.Module): -+ def __init__(self): -+ super().__init__() -+ self.param = param -+ -+ def forward(self, x): -+ return self.param * x[0] -+ -+ # return param to keep it alive in _test_compile_model_free -+ return Mod(), (torch.randn(100, 100), param) -+ -+ self._test_compile_model_free(model_inp_ctr, lambda mod: mod.param) -+ -+ def test_raises_importerror1(self): -+ @torch.compile(backend="eager") -+ def fn(x): -+ try: -+ import some_module_that_surely_does_not_exist -+ -+ return -+ except ImportError: -+ pass -+ return x.sin() -+ -+ x = torch.randn(8) -+ self.assertEqual(fn(x), x.sin()) -+ -+ def test_raises_importerror2(self): -+ @torch.compile(backend="eager") -+ def fn(x): -+ import some_module_that_surely_does_not_exist -+ -+ return x + 1 -+ -+ x = torch.randn(8) -+ with self.assertRaises(ImportError): -+ fn(x) -+ - def test_dynamo_cache_move_to_front(self): - class Mod(torch.nn.Module): - def __init__(self): -diff --git a/test/functorch/test_eager_transforms.py b/test/functorch/test_eager_transforms.py -index 09415cf8f48e..60790ec06059 100644 ---- a/test/functorch/test_eager_transforms.py -+++ b/test/functorch/test_eager_transforms.py -@@ -4762,8 +4762,7 @@ class TestCompileTransforms(TestCase): - # Triton only supports GPU with SM70 or later. - @expectedFailureIf((IS_ARM64 and not IS_MACOS) or - IS_WINDOWS or -- (TEST_CUDA and not SM70OrLater) or -- (sys.version_info >= (3, 12))) -+ (TEST_CUDA and not SM70OrLater)) - def test_compile_vmap_hessian(self, device): - # The model and inputs are a smaller version - # of code at benchmark repo: -@@ -4792,8 +4791,8 @@ class TestCompileTransforms(TestCase): - actual = opt_fn(params_and_buffers, x) - self.assertEqual(actual, expected) - -- # torch.compile is not supported on Windows or on Python 3.12+ -- @expectedFailureIf(IS_WINDOWS or (sys.version_info >= (3, 12))) -+ # torch.compile is not supported on Windows -+ @expectedFailureIf(IS_WINDOWS) - @torch._dynamo.config.patch(suppress_errors=False) - @torch._dynamo.config.patch(capture_func_transforms=True) - @skipIfTorchDynamo("Do not test torch.compile on top of torch.compile") -diff --git a/test/run_test.py b/test/run_test.py -index e86af9623042..ebb14df4167d 100755 ---- a/test/run_test.py -+++ b/test/run_test.py -@@ -74,7 +74,6 @@ sys.path.remove(str(REPO_ROOT)) - RERUN_DISABLED_TESTS = os.getenv("PYTORCH_TEST_RERUN_DISABLED_TESTS", "0") == "1" - DISTRIBUTED_TEST_PREFIX = "distributed" - INDUCTOR_TEST_PREFIX = "inductor" --DYNAMO_TEST_PREFIX = "dynamo" - - - # Note [ROCm parallel CI testing] -@@ -324,7 +323,6 @@ JIT_EXECUTOR_TESTS = [ - ] - - INDUCTOR_TESTS = [test for test in TESTS if test.startswith(INDUCTOR_TEST_PREFIX)] --DYNAMO_TESTS = [test for test in TESTS if test.startswith(DYNAMO_TEST_PREFIX)] - DISTRIBUTED_TESTS = [test for test in TESTS if test.startswith(DISTRIBUTED_TEST_PREFIX)] - TORCH_EXPORT_TESTS = [test for test in TESTS if test.startswith("export")] - FUNCTORCH_TESTS = [test for test in TESTS if test.startswith("functorch")] -@@ -1361,7 +1359,6 @@ def get_selected_tests(options) -> List[str]: - # these tests failing in Python 3.12 temporarily disabling - if sys.version_info >= (3, 12): - options.exclude.extend(INDUCTOR_TESTS) -- options.exclude.extend(DYNAMO_TESTS) - options.exclude.extend( - [ - "functorch/test_dims", -diff --git a/torch/__init__.py b/torch/__init__.py -index d381712b4a35..26cdffe81d29 100644 ---- a/torch/__init__.py -+++ b/torch/__init__.py -@@ -1861,9 +1861,8 @@ def compile(model: Optional[Callable] = None, *, - - """ - _C._log_api_usage_once("torch.compile") -- # Temporary until we get proper support for python 3.12 -- if sys.version_info >= (3, 12): -- raise RuntimeError("Dynamo is not supported on Python 3.12+") -+ if sys.version_info >= (3, 13): -+ raise RuntimeError("Dynamo is not supported on Python 3.13+") - - # Decorator mode - if model is None: -diff --git a/torch/_dynamo/eval_frame.py b/torch/_dynamo/eval_frame.py -index 53ab0df3a947..0a80eeea99ed 100644 ---- a/torch/_dynamo/eval_frame.py -+++ b/torch/_dynamo/eval_frame.py -@@ -589,8 +589,8 @@ class _NullDecorator(contextlib.nullcontext): # type: ignore[type-arg] - - - def check_if_dynamo_supported(): -- if sys.version_info >= (3, 12): -- raise RuntimeError("Python 3.12+ not yet supported for torch.compile") -+ if sys.version_info >= (3, 13): -+ raise RuntimeError("Python 3.13+ not yet supported for torch.compile") - - - def is_dynamo_supported(): -diff --git a/torch/_dynamo/test_case.py b/torch/_dynamo/test_case.py -index e3cbef09eaae..297ea6e2bc2a 100644 ---- a/torch/_dynamo/test_case.py -+++ b/torch/_dynamo/test_case.py -@@ -1,7 +1,6 @@ - import contextlib - import importlib - import logging --import sys - - import torch - import torch.testing -@@ -20,12 +19,7 @@ log = logging.getLogger(__name__) - def run_tests(needs=()): - from torch.testing._internal.common_utils import run_tests - -- if ( -- TEST_WITH_TORCHDYNAMO -- or IS_WINDOWS -- or TEST_WITH_CROSSREF -- or sys.version_info >= (3, 12) -- ): -+ if TEST_WITH_TORCHDYNAMO or IS_WINDOWS or TEST_WITH_CROSSREF: - return # skip testing - - if isinstance(needs, str): --- -2.44.0 - diff --git a/0001-no-third_party-FXdiv.patch b/0001-no-third_party-FXdiv.patch deleted file mode 100644 index 71404e3..0000000 --- a/0001-no-third_party-FXdiv.patch +++ /dev/null @@ -1,54 +0,0 @@ -From b3b307add5724ee5730f161e16594fa702f34a19 Mon Sep 17 00:00:00 2001 -From: Tom Rix -Date: Sat, 3 Feb 2024 08:20:28 -0500 -Subject: [PATCH] no third_party FXdiv - ---- - caffe2/CMakeLists.txt | 24 ++++++++++++------------ - 1 file changed, 12 insertions(+), 12 deletions(-) - -diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt -index b2f3adbfae..80a5625c8d 100644 ---- a/caffe2/CMakeLists.txt -+++ b/caffe2/CMakeLists.txt -@@ -110,15 +110,15 @@ endif() - # Note: the folders that are being commented out have not been properly - # addressed yet. - --if(NOT MSVC AND USE_XNNPACK) -- if(NOT TARGET fxdiv) -- set(FXDIV_BUILD_TESTS OFF CACHE BOOL "") -- set(FXDIV_BUILD_BENCHMARKS OFF CACHE BOOL "") -- add_subdirectory( -- "${FXDIV_SOURCE_DIR}" -- "${CMAKE_BINARY_DIR}/FXdiv") -- endif() --endif() -+#if(NOT MSVC AND USE_XNNPACK) -+# if(NOT TARGET fxdiv) -+# set(FXDIV_BUILD_TESTS OFF CACHE BOOL "") -+# set(FXDIV_BUILD_BENCHMARKS OFF CACHE BOOL "") -+# add_subdirectory( -+# "${FXDIV_SOURCE_DIR}" -+# "${CMAKE_BINARY_DIR}/FXdiv") -+# endif() -+#endif() - - add_subdirectory(core) - add_subdirectory(serialize) -@@ -1081,9 +1081,9 @@ if(USE_XPU) - target_compile_definitions(torch_xpu PRIVATE USE_XPU) - endif() - --if(NOT MSVC AND USE_XNNPACK) -- TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv) --endif() -+#if(NOT MSVC AND USE_XNNPACK) -+# TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv) -+#endif() - - # ========================================================== - # formerly-libtorch flags --- -2.43.0 - diff --git a/0001-no-third_party-fmt.patch b/0001-no-third_party-fmt.patch deleted file mode 100644 index 6e82af2..0000000 --- a/0001-no-third_party-fmt.patch +++ /dev/null @@ -1,65 +0,0 @@ -From 2ce255b75760a0a513fb1706629b416f76a5c822 Mon Sep 17 00:00:00 2001 -From: Tom Rix -Date: Sat, 3 Feb 2024 08:16:04 -0500 -Subject: [PATCH] no third_party fmt - ---- - c10/CMakeLists.txt | 2 +- - cmake/Dependencies.cmake | 6 +++--- - torch/CMakeLists.txt | 2 +- - 3 files changed, 5 insertions(+), 5 deletions(-) - -diff --git a/c10/CMakeLists.txt b/c10/CMakeLists.txt -index 1f742f4c176..4fa08913bdd 100644 ---- a/c10/CMakeLists.txt -+++ b/c10/CMakeLists.txt -@@ -87,7 +87,7 @@ endif() - if(C10_USE_GLOG) - target_link_libraries(c10 PUBLIC glog::glog) - endif() --target_link_libraries(c10 PRIVATE fmt::fmt-header-only) -+target_link_libraries(c10 PRIVATE fmt) - - if(C10_USE_NUMA) - message(STATUS "NUMA paths:") -diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake -index 6f5a2d5feff..42fbf80f6e8 100644 ---- a/cmake/Dependencies.cmake -+++ b/cmake/Dependencies.cmake -@@ -1837,7 +1837,7 @@ endif() - # - set(TEMP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) - set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libs" FORCE) --add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt) -+# add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt) - - # Disable compiler feature checks for `fmt`. - # -@@ -1846,9 +1846,9 @@ add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt) - # CMAKE_CXX_FLAGS in ways that break feature checks. Since we already know - # `fmt` is compatible with a superset of the compilers that PyTorch is, it - # shouldn't be too bad to just disable the checks. --set_target_properties(fmt-header-only PROPERTIES INTERFACE_COMPILE_FEATURES "") -+# set_target_properties(fmt-header-only PROPERTIES INTERFACE_COMPILE_FEATURES "") - --list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only) -+# list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only) - set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS} CACHE BOOL "Build shared libs" FORCE) - - # ---[ Kineto -diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt -index 97a72eed55b..9e5014d1980 100644 ---- a/torch/CMakeLists.txt -+++ b/torch/CMakeLists.txt -@@ -80,7 +80,7 @@ set(TORCH_PYTHON_LINK_LIBRARIES - python::python - pybind::pybind11 - shm -- fmt::fmt-header-only -+ fmt - ATEN_CPU_FILES_GEN_LIB) - - if(USE_ASAN AND TARGET Sanitizer::address) --- -2.43.2 - diff --git a/0001-no-third_party-foxi.patch b/0001-no-third_party-foxi.patch deleted file mode 100644 index ba1ec40..0000000 --- a/0001-no-third_party-foxi.patch +++ /dev/null @@ -1,36 +0,0 @@ -From 8cb61cf9282102ac225645fcc9fb4a1bb7cb15a2 Mon Sep 17 00:00:00 2001 -From: Tom Rix -Date: Sat, 3 Feb 2024 08:11:55 -0500 -Subject: [PATCH] no third_party foxi - ---- - cmake/Dependencies.cmake | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake -index 5f91f3ffab..8e1461af81 100644 ---- a/cmake/Dependencies.cmake -+++ b/cmake/Dependencies.cmake -@@ -1567,7 +1567,7 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX) - set_target_properties(onnx_proto PROPERTIES CXX_STANDARD 17) - endif() - endif() -- add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/foxi EXCLUDE_FROM_ALL) -+ # add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/foxi EXCLUDE_FROM_ALL) - - add_definitions(-DONNX_NAMESPACE=${ONNX_NAMESPACE}) - if(NOT USE_SYSTEM_ONNX) -@@ -1600,8 +1600,8 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX) - message("-- Found onnx: ${ONNX_LIBRARY} ${ONNX_PROTO_LIBRARY}") - list(APPEND Caffe2_DEPENDENCY_LIBS onnx_proto onnx) - endif() -- include_directories(${FOXI_INCLUDE_DIRS}) -- list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader) -+# include_directories(${FOXI_INCLUDE_DIRS}) -+# list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader) - # Recover the build shared libs option. - set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS}) - endif() --- -2.43.0 - diff --git a/0001-reenable-foxi-linking.patch b/0001-reenable-foxi-linking.patch deleted file mode 100644 index 8e39795..0000000 --- a/0001-reenable-foxi-linking.patch +++ /dev/null @@ -1,25 +0,0 @@ -From 58ccda271e8f51c3fa5b7518cf6ee52ce204fd37 Mon Sep 17 00:00:00 2001 -From: Tom Rix -Date: Thu, 22 Feb 2024 09:28:11 -0500 -Subject: [PATCH] reenable foxi linking - ---- - cmake/Dependencies.cmake | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake -index 42fbf80f6e8..bc3a2dc6fee 100644 ---- a/cmake/Dependencies.cmake -+++ b/cmake/Dependencies.cmake -@@ -1604,7 +1604,7 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX) - list(APPEND Caffe2_DEPENDENCY_LIBS onnx_proto onnx) - endif() - # include_directories(${FOXI_INCLUDE_DIRS}) --# list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader) -+ list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader) - # Recover the build shared libs option. - set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS}) - endif() --- -2.43.2 - diff --git a/0001-silence-an-assert.patch b/0001-silence-an-assert.patch deleted file mode 100644 index 0b20dcf..0000000 --- a/0001-silence-an-assert.patch +++ /dev/null @@ -1,25 +0,0 @@ -From 04dd33db93b852fdfd7ea408813080b2e2026650 Mon Sep 17 00:00:00 2001 -From: Tom Rix -Date: Sat, 3 Feb 2024 06:41:20 -0500 -Subject: [PATCH] silence an assert - ---- - aten/src/ATen/native/cuda/IndexKernel.cu | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/aten/src/ATen/native/cuda/IndexKernel.cu b/aten/src/ATen/native/cuda/IndexKernel.cu -index 657c0c77b3..b406aa6687 100644 ---- a/aten/src/ATen/native/cuda/IndexKernel.cu -+++ b/aten/src/ATen/native/cuda/IndexKernel.cu -@@ -249,7 +249,7 @@ void index_put_kernel_quantized_cuda(TensorIterator& iter, const IntArrayRef ind - - gpu_index_kernel(iter, index_size, index_stride, [inv_scale, zero_point, qmin, qmax]C10_DEVICE(char* const out_data, const char* const in_data, const int64_t offset) { - int64_t qvalue = static_cast(zero_point + nearbyintf(*(float*)in_data * inv_scale)); -- qvalue = std::clamp(qvalue, qmin, qmax); -+ //qvalue = std::clamp(qvalue, qmin, qmax); - *(scalar_t*)(out_data + offset) = static_cast(qvalue); - }); - }); --- -2.43.0 - diff --git a/0001-use-any-hip.patch b/0001-use-any-hip.patch deleted file mode 100644 index dca86ea..0000000 --- a/0001-use-any-hip.patch +++ /dev/null @@ -1,34 +0,0 @@ -From 4248211ce9a9de81bb3ade5d421ba709b19ead08 Mon Sep 17 00:00:00 2001 -From: Tom Rix -Date: Sat, 3 Feb 2024 15:01:28 -0500 -Subject: [PATCH] use any hip - ---- - cmake/public/LoadHIP.cmake | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/cmake/public/LoadHIP.cmake b/cmake/public/LoadHIP.cmake -index 1abeb06228..28458c4146 100644 ---- a/cmake/public/LoadHIP.cmake -+++ b/cmake/public/LoadHIP.cmake -@@ -30,7 +30,7 @@ endif() - message("Building PyTorch for GPU arch: ${PYTORCH_ROCM_ARCH}") - - # Add HIP to the CMAKE Module Path --set(CMAKE_MODULE_PATH ${ROCM_PATH}/lib/cmake/hip ${CMAKE_MODULE_PATH}) -+set(CMAKE_MODULE_PATH ${ROCM_PATH}/lib64/cmake/hip ${CMAKE_MODULE_PATH}) - - macro(find_package_and_print_version PACKAGE_NAME) - find_package("${PACKAGE_NAME}" ${ARGN}) -@@ -38,7 +38,7 @@ macro(find_package_and_print_version PACKAGE_NAME) - endmacro() - - # Find the HIP Package --find_package_and_print_version(HIP 1.0) -+find_package_and_print_version(HIP MODULE) - - if(HIP_FOUND) - set(PYTORCH_FOUND_HIP TRUE) --- -2.43.0 - diff --git a/next/0001-Use-horrible-dynamo-stub.patch b/next/0001-Use-horrible-dynamo-stub.patch new file mode 100644 index 0000000..1900519 --- /dev/null +++ b/next/0001-Use-horrible-dynamo-stub.patch @@ -0,0 +1,85 @@ +From fd535f7bf44f2034cca2a66b4cc7d68d962341df Mon Sep 17 00:00:00 2001 +From: Tom Rix +Date: Sun, 20 Jul 2025 12:47:58 -0700 +Subject: [PATCH] Use horrible dynamo stub + +Rawhide's update of python is too fast for dynamo +So paper of the problem with a horrible stub that throws +runtime exceptions if dynamo is used. + +Signed-off-by: Tom Rix +--- + build_variables.bzl | 26 ++++++++++++---------- + torch/csrc/dynamo/horrible_dynamo_stub.cpp | 16 +++++++++++++ + 2 files changed, 30 insertions(+), 12 deletions(-) + create mode 100644 torch/csrc/dynamo/horrible_dynamo_stub.cpp + +diff --git a/build_variables.bzl b/build_variables.bzl +index b266c80e8843..a3be6893349b 100644 +--- a/build_variables.bzl ++++ b/build_variables.bzl +@@ -140,7 +140,8 @@ core_trainer_sources = [ + "torch/csrc/autograd/variable.cpp", + "torch/csrc/autograd/utils/warnings.cpp", + "torch/csrc/autograd/jit_decomp_interface.cpp", +- "torch/csrc/dynamo/compiled_autograd.cpp", ++# "torch/csrc/dynamo/compiled_autograd.cpp", ++ "torch/csrc/dynamo/horrible_dynamo_stub.cpp", + "torch/csrc/jit/frontend/name_mangler.cpp", + "torch/csrc/jit/ir/type_hashing.cpp", + "torch/csrc/jit/serialization/pickler.cpp", +@@ -868,17 +869,18 @@ libtorch_python_core_sources = [ + "torch/csrc/autograd/python_torch_functions_manual.cpp", + "torch/csrc/autograd/python_variable.cpp", + "torch/csrc/autograd/python_variable_indexing.cpp", +- "torch/csrc/dynamo/python_compiled_autograd.cpp", +- "torch/csrc/dynamo/cache_entry.cpp", +- "torch/csrc/dynamo/cpp_shim.cpp", +- "torch/csrc/dynamo/cpython_defs.c", +- "torch/csrc/dynamo/eval_frame.c", +- "torch/csrc/dynamo/eval_frame_cpp.cpp", +- "torch/csrc/dynamo/extra_state.cpp", +- "torch/csrc/dynamo/framelocals_mapping.cpp", +- "torch/csrc/dynamo/guards.cpp", +- "torch/csrc/dynamo/utils.cpp", +- "torch/csrc/dynamo/init.cpp", ++# "torch/csrc/dynamo/python_compiled_autograd.cpp", ++# "torch/csrc/dynamo/cache_entry.cpp", ++# "torch/csrc/dynamo/cpp_shim.cpp", ++# "torch/csrc/dynamo/cpython_defs.c", ++# "torch/csrc/dynamo/eval_frame.c", ++# "torch/csrc/dynamo/eval_frame_cpp.cpp", ++# "torch/csrc/dynamo/extra_state.cpp", ++# "torch/csrc/dynamo/framelocals_mapping.cpp", ++# "torch/csrc/dynamo/guards.cpp", ++# "torch/csrc/dynamo/utils.cpp", ++# "torch/csrc/dynamo/init.cpp", ++ "torch/csrc/dynamo/horrible_dynamo_stub.cpp", + "torch/csrc/functorch/init.cpp", + "torch/csrc/fx/node.cpp", + "torch/csrc/mps/Module.cpp", +diff --git a/torch/csrc/dynamo/horrible_dynamo_stub.cpp b/torch/csrc/dynamo/horrible_dynamo_stub.cpp +new file mode 100644 +index 000000000000..3ac1324d4557 +--- /dev/null ++++ b/torch/csrc/dynamo/horrible_dynamo_stub.cpp +@@ -0,0 +1,16 @@ ++#include ++#include ++ ++namespace torch::dynamo::autograd { ++const std::unique_ptr& getPyCompilerInterface() { ++ throw std::runtime_error("Dynamo not supported"); ++ return nullptr; ++} ++std::vector> get_input_metadata( ++ const edge_list& edges) { ++ std::vector> r; ++ throw std::runtime_error("Dynamo not supported"); ++ return r; ++} ++ ++} +-- +2.49.0 + diff --git a/pyproject.toml b/pyproject.toml index 9508ad0..925742b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,46 +1,165 @@ +# Package ###################################################################### + [build-system] requires = [ - "setuptools", - "wheel", - "astunparse", - "numpy", + # 70.1.0: min version for integrated bdist_wheel command from wheel package + # 77.0.0: min version for SPDX expression support for project.license + "setuptools>=70.1.0,<80.0", + "cmake>=3.27", "ninja", + "numpy", + "packaging", "pyyaml", - "cmake", - "typing-extensions", "requests", + "six", # dependency chain: NNPACK -> PeachPy -> six + "typing-extensions>=4.10.0", ] -# Use legacy backend to import local packages in setup.py -build-backend = "setuptools.build_meta:__legacy__" +build-backend = "setuptools.build_meta" +[dependency-groups] +dev = [ + # This list should be kept in sync with the requirements-build.txt + # in PyTorch root until the project fully migrates to pyproject.toml + # after which this can be removed as it is already specified in the + # [build-system] section + "setuptools>=70.1.0,<80.0", # setuptools develop deprecated on 80.0 + "cmake>=3.27", + "ninja", + "numpy", + "packaging", + "pyyaml", + "requests", + "six", # dependency chain: NNPACK -> PeachPy -> six + "typing-extensions>=4.10.0", -[tool.black] -# Uncomment if pyproject.toml worked fine to ensure consistency with flake8 -# line-length = 120 -target-version = ["py38", "py39", "py310", "py311"] + # This list should be kept in sync with the requirements.txt in + # PyTorch root until the project fully migrates to pyproject.toml + "build[uv]", + "expecttest>=0.3.0", + "filelock", + "fsspec>=0.8.5", + "hypothesis", + "jinja2", + "lintrunner; platform_machine != 's390x' and platform_machine != 'riscv64'", + "networkx>=2.5.1", + "optree>=0.13.0", + "psutil", + "sympy>=1.13.3", + "typing-extensions>=4.13.2", + "wheel", +] +[project] +name = "torch" +description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration" +readme = "README.md" +requires-python = ">=3.10" +# TODO: change to `license = "BSD-3-Clause"` and enable PEP 639 after pinning setuptools>=77 +# FIXME: As of 2025.06.20, it is hard to ensure the minimum version of setuptools in our CI environment. +# TOML-table-based license deprecated in setuptools>=77, and the deprecation warning will be changed +# to an error on 2026.02.18. See also: https://github.com/pypa/setuptools/issues/4903 +license = { text = "BSD-3-Clause" } +authors = [{ name = "PyTorch Team", email = "packages@pytorch.org" }] +keywords = ["pytorch", "machine learning"] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "Intended Audience :: Education", + "Intended Audience :: Science/Research", + "Topic :: Scientific/Engineering", + "Topic :: Scientific/Engineering :: Mathematics", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Topic :: Software Development", + "Topic :: Software Development :: Libraries", + "Topic :: Software Development :: Libraries :: Python Modules", + "Programming Language :: C++", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", +] +dynamic = [ + "entry-points", + "dependencies", + "scripts", + "version", +] + +[project.urls] +Homepage = "https://pytorch.org" +Repository = "https://github.com/pytorch/pytorch" +Documentation = "https://pytorch.org/docs" +"Issue Tracker" = "https://github.com/pytorch/pytorch/issues" +Forum = "https://discuss.pytorch.org" + +[project.optional-dependencies] +optree = ["optree>=0.13.0"] +opt-einsum = ["opt-einsum>=3.3"] +pyyaml = ["pyyaml"] + +# Linter tools ################################################################# + +[tool.isort] +src_paths = ["caffe2", "torch", "torchgen", "functorch", "test"] +extra_standard_library = ["typing_extensions"] +skip_gitignore = true +skip_glob = ["third_party/*"] +atomic = true +profile = "black" +indent = 4 +line_length = 88 +lines_after_imports = 2 +multi_line_output = 3 +include_trailing_comma = true +combine_as_imports = true + +[tool.usort.known] +first_party = ["caffe2", "torch", "torchgen", "functorch", "test"] +standard_library = ["typing_extensions"] [tool.ruff] -target-version = "py38" +line-length = 88 +src = ["caffe2", "torch", "torchgen", "functorch", "test"] +[tool.ruff.format] +docstring-code-format = true +quote-style = "double" + +[tool.ruff.lint] # NOTE: Synchoronize the ignores with .flake8 +external = [ + "B001", + "B902", + "B950", + "E121", + "E122", + "E128", + "E131", + "E704", + "E723", + "F723", + "F812", + "P201", + "P204", + "T484", + "TOR901", +] ignore = [ # these ignores are from flake8-bugbear; please fix! "B007", "B008", "B017", "B018", # Useless expression - "B019", "B023", "B028", # No explicit `stacklevel` keyword argument found - "B904", "E402", "C408", # C408 ignored because we like the dict keyword argument syntax "E501", # E501 is not flexible enough, we're using B950 instead "E721", - "E731", # Assign lambda expression "E741", "EXE001", "F405", - "F841", + "FURB122", # writelines # these ignores are from flake8-logging-format; please fix! "G101", # these ignores are from ruff NPY; please fix! @@ -48,39 +167,49 @@ ignore = [ # these ignores are from ruff PERF; please fix! "PERF203", "PERF401", - "PERF403", # these ignores are from PYI; please fix! - "PYI019", "PYI024", "PYI036", "PYI041", "PYI056", "SIM102", "SIM103", "SIM112", # flake8-simplify code styles "SIM105", # these ignores are from flake8-simplify. please fix or ignore with commented reason - "SIM108", + "SIM108", # SIM108 ignored because we prefer if-else-block instead of ternary expression "SIM110", "SIM114", # Combine `if` branches using logical `or` operator "SIM115", "SIM116", # Disable Use a dictionary instead of consecutive `if` statements "SIM117", "SIM118", - "UP006", # keep-runtime-typing "UP007", # keep-runtime-typing + "UP045", # keep-runtime-typing + "TC006", + # TODO: Remove Python-3.10 specific suppressions + "B905", + "UP035", + "UP036", + "UP038", + "UP041", + "FURB161", ] -line-length = 120 select = [ "B", + "B904", # Re-raised error without specifying the cause via the from keyword "C4", "G", "E", "EXE", "F", "SIM1", + "SIM911", "W", # Not included in flake8 + "FURB", + "LOG", "NPY", "PERF", "PGH004", + "PIE790", "PIE794", "PIE800", "PIE804", @@ -89,40 +218,96 @@ select = [ "PLC0131", # type bivariance "PLC0132", # type param mismatch "PLC0205", # string as __slots__ + "PLC3002", # unnecessary-direct-lambda-call "PLE", "PLR0133", # constant comparison "PLR0206", # property with params "PLR1722", # use sys exit + "PLR1736", # unnecessary list index "PLW0129", # assert on string literal + "PLW0131", # named expr without context + "PLW0133", # useless exception statement + "PLW0245", # super without brackets "PLW0406", # import self "PLW0711", # binary op exception + "PLW1501", # bad open mode + "PLW1507", # shallow copy os.environ "PLW1509", # preexec_fn not safe with threads + "PLW2101", # useless lock statement "PLW3301", # nested min max "PT006", # TODO: enable more PT rules + "PT014", # duplicate parameterize case "PT022", "PT023", "PT024", "PT025", "PT026", "PYI", + "Q003", # avoidable escaped quote + "Q004", # unnecessary escaped quote + "RSE", "RUF008", # mutable dataclass default + "RUF013", # ban implicit optional "RUF015", # access first ele in constant time "RUF016", # type error non-integer index "RUF017", - "TRY200", - "TRY302", + "RUF018", # no assignment in assert + "RUF019", # unnecessary-key-check + "RUF020", # never union + "RUF024", # from keys mutable + "RUF026", # default factory kwarg + "RUF030", # No print statement in assert + "RUF033", # default values __post_init__ dataclass + "RUF041", # simplify nested Literal + "RUF048", # properly parse `__version__` + "RUF200", # validate pyproject.toml + "S324", # for hashlib FIPS compliance + "SLOT", + "TC", + "TRY002", # ban vanilla raise (todo fix NOQAs) + "TRY203", + "TRY401", # verbose-log-message "UP", + "YTT", ] -[tool.ruff.per-file-ignores] +[tool.ruff.lint.pyupgrade] +# Preserve types, even if a file imports `from __future__ import annotations`. +keep-runtime-typing = true + +[tool.ruff.lint.per-file-ignores] "__init__.py" = [ "F401", ] +"*.pyi" = [ + "PYI011", # typed-argument-default-in-stub + "PYI021", # docstring-in-stub + "PYI053", # string-or-bytes-too-long +] +"functorch/notebooks/**" = [ + "F401", +] +"test/export/**" = [ + "PGH004" +] +"test/typing/**" = [ + "PGH004" +] "test/typing/reveal/**" = [ "F821", ] "test/torch_np/numpy_tests/**" = [ "F821", + "NPY201", +] +"test/dynamo/test_bytecode_utils.py" = [ + "F821", +] +"test/dynamo/test_debug_utils.py" = [ + "UP037", +] +"test/dynamo/test_misc.py" = [ + "PGH004", ] "test/jit/**" = [ "PLR0133", # tests require this for JIT @@ -136,19 +321,33 @@ select = [ "RUF015", "UP", # We don't want to modify the jit test as they test specify syntax ] - -"torch/onnx/**" = [ - "UP037", # ONNX does runtime type checking +"test/inductor/s429861_repro.py" = [ + "PGH004", +] +"test/inductor/test_torchinductor.py" = [ + "UP037", +] +# autogenerated #TODO figure out why file level noqa is ignored +"torch/_appdirs.py" = ["PGH004"] +"torch/jit/_shape_functions.py" = ["PGH004"] +"torch/_inductor/fx_passes/serialized_patterns/**" = ["F401", "F501"] +"torch/_inductor/autoheuristic/artifacts/**" = ["F401", "F501"] +"torch/_inductor/codegen/**" = [ + "PGH004" ] - "torchgen/api/types/__init__.py" = [ "F401", "F403", ] -"torchgen/executorch/api/types/__init__.py" = [ - "F401", - "F403", -] "torch/utils/collect_env.py" = [ "UP", # collect_env.py needs to work with older versions of Python ] +"torch/_vendor/**" = [ + "UP", # No need to mess with _vendor +] +"tools/linter/**" = [ + "LOG015" # please fix +] + +[tool.codespell] +ignore-words = "tools/linter/dictionary.txt" diff --git a/python-torch.spec b/python-torch.spec index 6f102ca..d3c31d7 100644 --- a/python-torch.spec +++ b/python-torch.spec @@ -6,13 +6,20 @@ # So pre releases can be tried %bcond_with gitcommit %if %{with gitcommit} -# ToT -%global commit0 75b0720a97ac5d82e8a7a1a6ae7c5f7a87d7183d +# v2.9.0-rc9 +%global commit0 0fabc3ba44823f257e70ce397d989c8de5e362c1 %global shortcommit0 %(c=%{commit0}; echo ${c:0:7}) -%global date0 20240609 -%global pypi_version 2.4.0 +%global date0 20251008 +%global pypi_version 2.9.0 +%global flatbuffers_version 24.12.23 +%global miniz_version 3.0.2 +%global pybind11_version 2.13.6 +%global rc_tag -rc9 %else -%global pypi_version 2.3.1 +%global pypi_version 2.9.1 +%global flatbuffers_version 24.12.23 +%global miniz_version 3.0.2 +%global pybind11_version 2.13.6 %endif # For -test subpackage @@ -23,94 +30,29 @@ %bcond_with test %ifarch x86_64 -# ROCm support came in F40 -%if 0%{?fedora} > 39 %bcond_without rocm -%else -%bcond_with rocm -%endif -%endif -# hipblaslt is in development -%bcond_with hipblaslt -# Which families gpu build for -%global rocm_gpu_list gfx8 gfx9 gfx10 gfx11 gfx90a gfx942 gfx1100 -%global rocm_default_gpu default -%bcond_without rocm_loop - -# Caffe2 support came in F41 -%if 0%{?fedora} > 40 -%bcond_without caffe2 -%else -%bcond_with caffe2 %endif -# Distributed support came in F41 -%if 0%{?fedora} > 40 -%bcond_without distributed # For testing distributed+rccl etc. %bcond_without rccl %bcond_with gloo %bcond_without mpi %bcond_without tensorpipe -%else -%bcond_with distributed -%endif - -# OpenCV support came in F41 -%if 0%{?fedora} > 40 -%bcond_without opencv -%else -%bcond_with opencv -%endif - -# Do no confuse xnnpack versions -%if 0%{?fedora} > 40 -%bcond_without xnnpack -%else -%bcond_with xnnpack -%endif - -%if 0%{?fedora} > 39 -%bcond_without pthreadpool -%else -%bcond_with pthreadpool -%endif - -%if 0%{?fedora} > 39 -%bcond_without pocketfft -%else -%bcond_with pocketfft -%endif - -# For testing cuda -%ifarch x86_64 -%bcond_with cuda -%endif - -# For testing compat-gcc -%global compat_gcc_major 13 -%bcond_with compat_gcc # Disable dwz with rocm because memory can be exhausted %if %{with rocm} %define _find_debuginfo_dwz_opts %{nil} %endif -%if %{with cuda} -# workaround problems with -pie -%global build_cxxflags %{nil} -%global build_ldflags %{nil} -%endif - # These came in 2.4 and not yet in Fedora -%if %{with gitcommit} %bcond_with opentelemetry %bcond_with httplib %bcond_with kineto + +%if 0%{?fedora} +%bcond_without onnx %else -%bcond_without opentelemetry -%bcond_without httplib -%bcond_without kineto +%bcond_with onnx %endif Name: python-%{pypi_name} @@ -131,17 +73,9 @@ Source1000: pyproject.toml %else Source0: %{forgeurl}/releases/download/v%{version}/pytorch-v%{version}.tar.gz %endif -Source1: https://github.com/google/flatbuffers/archive/refs/tags/v23.3.3.tar.gz -Source2: https://github.com/pybind/pybind11/archive/refs/tags/v2.11.1.tar.gz +Source1: https://github.com/google/flatbuffers/archive/refs/tags/v%{flatbuffers_version}.tar.gz +Source2: https://github.com/pybind/pybind11/archive/refs/tags/v%{pybind11_version}.tar.gz -%if %{with cuda} -%global cuf_ver 1.1.2 -Source10: https://github.com/NVIDIA/cudnn-frontend/archive/refs/tags/v%{cuf_ver}.tar.gz -%global cul_ver 3.4.1 -Source11: https://github.com/NVIDIA/cutlass/archive/refs/tags/v%{cul_ver}.tar.gz -%endif - -%if %{with tensorpipe} # Developement on tensorpipe has stopped, repo made read only July 1, 2023, this is the last commit %global tp_commit 52791a2fd214b2a9dc5759d36725909c1daa7f2e %global tp_scommit %(c=%{tp_commit}; echo ${c:0:7}) @@ -152,36 +86,7 @@ Source21: https://github.com/libuv/libuv/archive/refs/tags/v1.41.0.tar.gz %global nop_commit 910b55815be16109f04f4180e9adee14fb4ce281 %global nop_scommit %(c=%{nop_commit}; echo ${c:0:7}) Source22: https://github.com/google/libnop/archive/%{nop_commit}/libnop-%{nop_scommit}.tar.gz -%endif -%if %{without xnnpack} -%global xnn_commit fcbf55af6cf28a4627bcd1f703ab7ad843f0f3a2 -%global xnn_scommit %(c=%{xnn_commit}; echo ${c:0:7}) -Source30: https://github.com/google/xnnpack/archive/%{xnn_commit}/xnnpack-%{xnn_scommit}.tar.gz -%global fx_commit 63058eff77e11aa15bf531df5dd34395ec3017c8 -%global fx_scommit %(c=%{fx_commit}; echo ${c:0:7}) -Source31: https://github.com/Maratyszcza/fxdiv/archive/%{fx_commit}/FXdiv-%{fx_scommit}.tar.gz -%global fp_commit 0a92994d729ff76a58f692d3028ca1b64b145d91 -%global fp_scommit %(c=%{fp_commit}; echo ${c:0:7}) -Source32: https://github.com/Maratyszcza/FP16/archive/%{fp_commit}/FP16-%{fp_scommit}.tar.gz -%global ps_commit 072586a71b55b7f8c584153d223e95687148a900 -%global ps_scommit %(c=%{ps_commit}; echo ${c:0:7}) -Source33: https://github.com/Maratyszcza/psimd/archive/%{ps_commit}/psimd-%{ps_scommit}.tar.gz -%endif - -%if %{without pthreadpool} -%global pt_commit 4fe0e1e183925bf8cfa6aae24237e724a96479b8 -%global pt_scommit %(c=%{pt_commit}; echo ${c:0:7}) -Source40: https://github.com/Maratyszcza/pthreadpool/archive/%{pt_commit}/pthreadpool-%{pt_scommit}.tar.gz -%endif - -%if %{without pocketfft} -%global pf_commit 076cb3d2536b7c5d0629093ad886e10ac05f3623 -%global pf_scommit %(c=%{pf_commit}; echo ${c:0:7}) -Source50: https://github.com/mreineck/pocketfft/archive/%{pf_commit}/pocketfft-%{pf_scommit}.tar.gz -%endif - -%if %{with gitcommit} %if %{without opentelemetry} %global ot_ver 1.14.2 Source60: https://github.com/open-telemetry/opentelemetry-cpp/archive/refs/tags/v%{ot_ver}.tar.gz @@ -194,105 +99,60 @@ Source70: https://github.com/yhirose/cpp-httplib/archive/%{hl_commit}/cpp- %endif %if %{without kineto} -%global ki_commit be1317644c68b4bfc4646024a6b221066e430031 +%global ki_commit 5e7501833f1021ce6f618572d3baf657b6319658 %global ki_scommit %(c=%{ki_commit}; echo ${c:0:7}) Source80: https://github.com/pytorch/kineto/archive/%{ki_commit}/kineto-%{ki_scommit}.tar.gz %endif -%endif -Patch0: 0001-no-third_party-foxi.patch +%global ox_ver 1.18.0 +Source90: https://github.com/onnx/onnx/archive/refs/tags/v%{ox_ver}.tar.gz -%if %{without gitcommit} -Patch3: 0001-Stub-in-kineto-ActivityType.patch -%endif - -%if %{with caffe2} -Patch6: 0001-reenable-foxi-linking.patch -%endif - -# Bring some patches forward -%if %{without gitcommit} -# https://github.com/pytorch/pytorch/pull/123384 -Patch7: 0001-Reenable-dim-for-python-3.12.patch - -# Dynamo/Inductor on 3.12 -# Fails to apply on 2.3.1 -# Patch8: 0001-dynamo-3.12-enable-dynamo-on-3.12-enable-most-dynamo.patch -%endif - -# ROCm patches -# Patches need to be refactored for ToT -%if %{without gitcommit} -# These are ROCm packages -%if %{without cuda} -# https://github.com/pytorch/pytorch/pull/120551 -Patch100: 0001-Optionally-use-hipblaslt.patch -Patch101: 0001-cuda-hip-signatures.patch -Patch102: 0001-silence-an-assert.patch -Patch103: 0001-can-not-use-with-c-files.patch -Patch104: 0001-use-any-hip.patch -Patch105: 0001-disable-use-of-aotriton.patch -%endif -%endif - -ExclusiveArch: x86_64 aarch64 +%global pt_arches x86_64 aarch64 +ExclusiveArch: %pt_arches %global toolchain gcc %global _lto_cflags %nil BuildRequires: cmake -BuildRequires: cpuinfo-devel BuildRequires: eigen3-devel +BuildRequires: flexiblas-devel BuildRequires: fmt-devel -%if %{with caffe2} BuildRequires: foxi-devel -%endif - -%if %{with compat_gcc} -BuildRequires: compat-gcc-%{compat_gcc_major}-c++ -BuildRequires: compat-gcc-%{compat_gcc_major}-gfortran -%else BuildRequires: gcc-c++ BuildRequires: gcc-gfortran -%endif -%if %{with distributed} %if %{with gloo} BuildRequires: gloo-devel %endif -%endif -BuildRequires: ninja-build -BuildRequires: onnx-devel +BuildRequires: json-devel + BuildRequires: libomp-devel -%if %{with distributed} +BuildRequires: moodycamel-concurrentqueue-devel +BuildRequires: numactl-devel +BuildRequires: ninja-build +%if %{with onnx} +BuildRequires: onnx-devel +%endif %if %{with mpi} BuildRequires: openmpi-devel %endif -%endif -BuildRequires: openblas-devel BuildRequires: protobuf-devel BuildRequires: sleef-devel BuildRequires: valgrind-devel - -%if %{with pocketfft} BuildRequires: pocketfft-devel -%endif - -%if %{with pthreadpool} BuildRequires: pthreadpool-devel -%endif -%if %{with xnnpack} +BuildRequires: cpuinfo-devel BuildRequires: FP16-devel BuildRequires: fxdiv-devel BuildRequires: psimd-devel -BuildRequires: xnnpack-devel = 0.0^git20240229.fcbf55a -%endif +BuildRequires: xnnpack-devel = 0.0^git20240814.312eb7e BuildRequires: python3-devel BuildRequires: python3dist(filelock) BuildRequires: python3dist(jinja2) BuildRequires: python3dist(networkx) BuildRequires: python3dist(numpy) +BuildRequires: python3dist(pip) BuildRequires: python3dist(pyyaml) BuildRequires: python3dist(setuptools) BuildRequires: python3dist(sphinx) @@ -306,39 +166,37 @@ BuildRequires: python3dist(sympy) %if %{with rocm} BuildRequires: hipblas-devel -%if %{with hipblaslt} BuildRequires: hipblaslt-devel -%endif BuildRequires: hipcub-devel BuildRequires: hipfft-devel BuildRequires: hiprand-devel BuildRequires: hipsparse-devel +BuildRequires: hipsparselt-devel BuildRequires: hipsolver-devel +# Magma is broken on ROCm 7 +# BuildRequires: magma-devel BuildRequires: miopen-devel BuildRequires: rocblas-devel BuildRequires: rocrand-devel BuildRequires: rocfft-devel -%if %{with distributed} %if %{with rccl} BuildRequires: rccl-devel %endif -%endif BuildRequires: rocprim-devel BuildRequires: rocm-cmake BuildRequires: rocm-comgr-devel +BuildRequires: rocm-compilersupport-macros BuildRequires: rocm-core-devel BuildRequires: rocm-hip-devel BuildRequires: rocm-runtime-devel BuildRequires: rocm-rpm-macros -BuildRequires: rocm-rpm-macros-modules +BuildRequires: rocsolver-devel +BuildRequires: rocm-smi-devel BuildRequires: rocthrust-devel BuildRequires: roctracer-devel -Requires: rocm-rpm-macros-modules -%endif +Requires: amdsmi -%if %{with opencv} -BuildRequires: opencv-devel %endif %if %{with test} @@ -346,49 +204,9 @@ BuildRequires: google-benchmark-devel %endif Requires: python3dist(dill) +Requires: python3dist(yaml) -# For convience -Provides: pytorch - -# Apache-2.0 -Provides: bundled(flatbuffers) = 22.3.3 -# MIT -Provides: bundled(miniz) = 2.1.0 -Provides: bundled(pybind11) = 2.11.1 - -%if %{with tensorpipe} -# BSD-3-Clause -Provides: bundled(tensorpipe) -# Apache-2.0 -Provides: bundled(libnop) -# MIT AND CC-BY-4.0 AND ISC AND BSD-2-Clause -Provides: bundled(libuv) = 1.41.0 -%endif - -# These are already in Fedora -%if %{without xnnpack} -# BSD-3-Clause -Provides: bundled(xnnpack) -# MIT -Provides: bundled(FP16) -# MIT -Provides: bundled(fxdiv) -# MIT -Provides: bundled(psimd) -%endif - -%if %{without pthreadpool} -# BSD-2-Clause -Provides: bundled(pthreadpool) -%endif - -%if %{without pocketfft} -# BSD-3-Clause -Provides: bundled(pocketfft) -%endif - -# For convience -Provides: pytorch +Obsoletes: caffe = 1.0^git20200212.9b89154 %description PyTorch is a Python package that provides two high-level features: @@ -402,6 +220,24 @@ and Cython to extend PyTorch when needed. %package -n python3-%{pypi_name} Summary: %{summary} +# For convience +Provides: pytorch + +# Apache-2.0 +Provides: bundled(flatbuffers) = %{flatbuffers_version} +# MIT +Provides: bundled(miniz) = %{miniz_version} +Provides: bundled(pybind11) = %{pybind11_version} + +%if %{with tensorpipe} +# BSD-3-Clause +Provides: bundled(tensorpipe) +# Apache-2.0 +Provides: bundled(libnop) +# MIT AND CC-BY-4.0 AND ISC AND BSD-2-Clause +Provides: bundled(libuv) = 1.41.0 +%endif + %description -n python3-%{pypi_name} PyTorch is a Python package that provides two high-level features: @@ -411,48 +247,6 @@ PyTorch is a Python package that provides two high-level features: You can reuse your favorite Python packages such as NumPy, SciPy, and Cython to extend PyTorch when needed. -%if %{with rocm} -%package -n python3-%{pypi_name}-rocm-gfx8 -Summary: %{name} for ROCm gfx8 - -%description -n python3-%{pypi_name}-rocm-gfx8 -%{summary} - -%package -n python3-%{pypi_name}-rocm-gfx9 -Summary: %{name} for ROCm gfx9 - -%description -n python3-%{pypi_name}-rocm-gfx9 -%{summary} - -%package -n python3-%{pypi_name}-rocm-gfx10 -Summary: %{name} for ROCm gfx10 - -%description -n python3-%{pypi_name}-rocm-gfx10 -%{summary} - -%package -n python3-%{pypi_name}-rocm-gfx11 -Summary: %{name} for ROCm gfx11 - -%description -n python3-%{pypi_name}-rocm-gfx11 -%{summary} - -%package -n python3-%{pypi_name}-rocm-gfx90a -Summary: %{name} for ROCm MI200 -%description -n python3-%{pypi_name}-rocm-gfx90a -%{summary} - -%package -n python3-%{pypi_name}-rocm-gfx942 -Summary: %{name} for ROCm MI300 -%description -n python3-%{pypi_name}-rocm-gfx942 -%{summary} - -%package -n python3-%{pypi_name}-rocm-gfx1100 -Summary: %{name} for W7900 -%description -n python3-%{pypi_name}-rocm-gfx1100 -%{summary} - -%endif - %if %{with test} %package -n python3-%{pypi_name}-test Summary: Tests for %{name} @@ -469,6 +263,7 @@ Requires: python3-%{pypi_name}%{?_isa} = %{version}-%{release} %autosetup -p1 -n pytorch-%{commit0} # Overwrite with a git checkout of the pyproject.toml cp %{SOURCE1000} . + %else %autosetup -p1 -n pytorch-v%{version} %endif @@ -478,20 +273,11 @@ rm -rf %{pypi_name}.egg-info tar xf %{SOURCE1} rm -rf third_party/flatbuffers/* -cp -r flatbuffers-23.3.3/* third_party/flatbuffers/ +cp -r flatbuffers-%{flatbuffers_version}/* third_party/flatbuffers/ tar xf %{SOURCE2} rm -rf third_party/pybind11/* -cp -r pybind11-2.11.1/* third_party/pybind11/ - -%if %{with cuda} -tar xf %{SOURCE10} -rm -rf third_party/cudnn_frontend/* -cp -r cudnn-frontend-%{cuf_ver}/* third_party/cudnn_frontend/ -tar xf %{SOURCE11} -rm -rf third_party/cutlass/* -cp -r cutlass-%{cul_ver}/* third_party/cutlass/ -%endif +cp -r pybind11-%{pybind11_version}/* third_party/pybind11/ %if %{with tensorpipe} tar xf %{SOURCE20} @@ -503,36 +289,12 @@ cp -r libuv-*/* third_party/tensorpipe/third_party/libuv/ tar xf %{SOURCE22} rm -rf third_party/tensorpipe/third_party/libnop/* cp -r libnop-*/* third_party/tensorpipe/third_party/libnop/ + +# gcc 15 include cstdint +sed -i '/#include ' third_party/tensorpipe/tensorpipe/common/allocator.h +sed -i '/#include ' third_party/tensorpipe/tensorpipe/common/memory.h %endif -%if %{without xnnpack} -tar xf %{SOURCE30} -rm -rf third_party/XNNPACK/* -cp -r XNNPACK-*/* third_party/XNNPACK/ -tar xf %{SOURCE31} -rm -rf third_party/FXdiv/* -cp -r FXdiv-*/* third_party/FXdiv/ -tar xf %{SOURCE32} -rm -rf third_party/FP16/* -cp -r FP16-*/* third_party/FP16/ -tar xf %{SOURCE33} -rm -rf third_party/psimd/* -cp -r psimd-*/* third_party/psimd/ -%endif - -%if %{without pthreadpool} -tar xf %{SOURCE40} -rm -rf third_party/pthreadpool/* -cp -r pthreadpool-*/* third_party/pthreadpool/ -%endif - -%if %{without pocketfft} -tar xf %{SOURCE50} -rm -rf third_party/pocketfft/* -cp -r pocketfft-*/* third_party/pocketfft/ -%endif - -%if %{with gitcommit} %if %{without opentelemtry} tar xf %{SOURCE60} rm -rf third_party/opentelemetry-cpp/* @@ -550,16 +312,15 @@ tar xf %{SOURCE80} rm -rf third_party/kineto/* cp -r kineto-*/* third_party/kineto/ %endif + +%if %{without onnx} +tar xf %{SOURCE90} +rm -rf third_party/onnx/* +cp -r onnx-*/* third_party/onnx/ %endif -%if %{with opencv} -%if %{without gitcommit} -# Reduce requirements, *FOUND is not set -sed -i -e 's/USE_OPENCV AND OpenCV_FOUND AND USE_FFMPEG AND FFMPEG_FOUND/USE_OPENCV AND USE_FFMPEG/' caffe2/video/CMakeLists.txt -sed -i -e 's/USE_OPENCV AND OpenCV_FOUND/USE_OPENCV/' caffe2/image/CMakeLists.txt -sed -i -e 's/STATUS/FATAL/' caffe2/image/CMakeLists.txt -%endif -%endif +# Adjust for the hipblaslt's we build +sed -i -e 's@"gfx90a", "gfx940", "gfx941", "gfx942"@"gfx90a", "gfx1103", "gfx1150", "gfx1151", "gfx1100", "gfx1101", "gfx1200", "gfx1201"@' aten/src/ATen/native/cuda/Blas.cpp %if 0%{?rhel} # In RHEL but too old @@ -567,26 +328,48 @@ sed -i -e '/typing-extensions/d' setup.py # Need to pip these sed -i -e '/sympy/d' setup.py sed -i -e '/fsspec/d' setup.py +%else +# for 2.5.0 +sed -i -e 's@sympy==1.13.1@sympy>=1.13.1@' setup.py %endif # A new dependency # Connected to USE_FLASH_ATTENTION, since this is off, do not need it sed -i -e '/aotriton.cmake/d' cmake/Dependencies.cmake +# Compress hip +sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc --offload-compress@' cmake/Dependencies.cmake +# Silence noisy warning +sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -Wno-pass-failed@' cmake/Dependencies.cmake +sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -Wno-unused-command-line-argument@' cmake/Dependencies.cmake +sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -Wno-unused-result@' cmake/Dependencies.cmake +sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -Wno-deprecated-declarations@' cmake/Dependencies.cmake +# Use parallel jobs +sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -parallel-jobs=4@' cmake/Dependencies.cmake +# Need to link with librocm_smi64 +sed -i -e 's@hiprtc::hiprtc@hiprtc::hiprtc rocm_smi64@' cmake/Dependencies.cmake # No third_party fmt, use system sed -i -e 's@fmt::fmt-header-only@fmt@' CMakeLists.txt +sed -i -e 's@fmt::fmt-header-only@fmt@' aten/src/ATen/CMakeLists.txt +sed -i -e 's@list(APPEND ATen_HIP_INCLUDE $)@@' aten/src/ATen/CMakeLists.txt + +sed -i -e 's@fmt::fmt-header-only@fmt@' third_party/kineto/libkineto/CMakeLists.txt sed -i -e 's@fmt::fmt-header-only@fmt@' c10/CMakeLists.txt sed -i -e 's@fmt::fmt-header-only@fmt@' torch/CMakeLists.txt sed -i -e 's@fmt::fmt-header-only@fmt@' cmake/Dependencies.cmake +sed -i -e 's@fmt::fmt-header-only@fmt@' caffe2/CMakeLists.txt + sed -i -e 's@add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)@#add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)@' cmake/Dependencies.cmake sed -i -e 's@set_target_properties(fmt-header-only PROPERTIES INTERFACE_COMPILE_FEATURES "")@#set_target_properties(fmt-header-only PROPERTIES INTERFACE_COMPILE_FEATURES "")@' cmake/Dependencies.cmake sed -i -e 's@list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only)@#list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only)@' cmake/Dependencies.cmake # No third_party FXdiv -%if %{with xnnpack} sed -i -e 's@if(NOT TARGET fxdiv)@if(MSVC AND USE_XNNPACK)@' caffe2/CMakeLists.txt sed -i -e 's@TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv)@#TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv)@' caffe2/CMakeLists.txt -%endif + +# https://github.com/pytorch/pytorch/issues/149803 +# Tries to checkout nccl +sed -i -e 's@ checkout_nccl()@ True@' tools/build_pytorch_libs.py # Disable the use of check_submodule's in the setup.py, we are a tarball, not a git repo sed -i -e 's@check_submodules()$@#check_submodules()@' setup.py @@ -599,7 +382,7 @@ sed -i -e 's@check_submodules()$@#check_submodules()@' setup.py # the third_party dir to compile the file. # mimiz is licensed MIT # https://github.com/richgel999/miniz/blob/master/LICENSE -mv third_party/miniz-2.1.0 . +mv third_party/miniz-%{miniz_version} . # # setup.py depends on this script mv third_party/build_bundled.py . @@ -609,30 +392,10 @@ mv third_party/flatbuffers . mv third_party/pybind11 . -%if %{with cuda} -mv third_party/cudnn_frontend . -mv third_party/cutlass . -%endif - %if %{with tensorpipe} mv third_party/tensorpipe . %endif -%if %{without xnnpack} -mv third_party/XNNPACK . -mv third_party/FXdiv . -mv third_party/FP16 . -mv third_party/psimd . -%endif - -%if %{without pthreadpool} -mv third_party/pthreadpool . -%endif - -%if %{without pocketfft} -mv third_party/pocketfft . -%endif - %if %{without opentelemetry} mv third_party/opentelemetry-cpp . %endif @@ -645,6 +408,10 @@ mv third_party/cpp-httplib . mv third_party/kineto . %endif +%if %{without onnx} +mv third_party/onnx . +%endif + %if %{with test} mv third_party/googletest . %endif @@ -653,34 +420,14 @@ mv third_party/googletest . rm -rf third_party/* # Put stuff back mv build_bundled.py third_party -mv miniz-2.1.0 third_party +mv miniz-%{miniz_version} third_party mv flatbuffers third_party mv pybind11 third_party -%if %{with cuda} -mv cudnn_frontend third_party -mv cutlass third_party -%endif - %if %{with tensorpipe} mv tensorpipe third_party %endif -%if %{without xnnpack} -mv XNNPACK third_party -mv FXdiv third_party -mv FP16 third_party -mv psimd third_party -%endif - -%if %{without pthreadpool} -mv pthreadpool third_party -%endif - -%if %{without pocketfft} -mv pocketfft third_party -%endif - %if %{without opentelemetry} mv opentelemetry-cpp third_party %endif @@ -693,26 +440,35 @@ mv cpp-httplib third_party mv kineto third_party %endif +%if %{without onnx} +mv onnx third_party +%endif + %if %{with test} mv googletest third_party %endif -%if %{with pocketfft} # # Fake out pocketfft, and system header will be used mkdir third_party/pocketfft -%endif +cp /usr/include/pocketfft_hdronly.h third_party/pocketfft/ # # Use the system valgrind headers mkdir third_party/valgrind-headers cp %{_includedir}/valgrind/* third_party/valgrind-headers -%if %{without gitcommit} -# Remove unneeded OpenCL files that confuse the lincense scanner -rm caffe2/contrib/opencl/OpenCL/cl.hpp -rm caffe2/mobile/contrib/libopencl-stub/include/CL/*.h -rm caffe2/mobile/contrib/libopencl-stub/include/CL/*.hpp +# Fix installing to /usr/lib64 +sed -i -e 's@DESTINATION ${PYTHON_LIB_REL_PATH}@DESTINATION ${CMAKE_INSTALL_PREFIX}/${PYTHON_LIB_REL_PATH}@' caffe2/CMakeLists.txt + +# reenable foxi linking +sed -i -e 's@list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader)@#list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader)@' cmake/Dependencies.cmake + +# cmake version changed +sed -i -e 's@cmake_minimum_required(VERSION 3.4)@cmake_minimum_required(VERSION 3.5)@' third_party/tensorpipe/third_party/libuv/CMakeLists.txt +sed -i -e 's@cmake_minimum_required(VERSION 3.4)@cmake_minimum_required(VERSION 3.5)@' libuv*/CMakeLists.txt +%if %{without opentelemtry} +sed -i -e 's@cmake_minimum_required(VERSION 3.1)@cmake_minimum_required(VERSION 3.5)@' third_party/opentelemetry-cpp/CMakeLists.txt %endif %if %{with rocm} @@ -720,33 +476,45 @@ rm caffe2/mobile/contrib/libopencl-stub/include/CL/*.hpp ./tools/amd_build/build_amd.py # Fedora installs to /usr/include, not /usr/include/rocm-core sed -i -e 's@rocm-core/rocm_version.h@rocm_version.h@' aten/src/ATen/hip/tunable/TunableGemm.h -%endif - -%if %{with cuda} - -# TBD +# https://github.com/pytorch/pytorch/issues/149805 +sed -i -e 's@rocm-core/rocm_version.h@rocm_version.h@' cmake/public/LoadHIP.cmake +# Fedora installs to /usr/include, not /usr/include/rocm-core +sed -i -e 's@rocm-core/rocm_version.h@rocm_version.h@' aten/src/ATen/hip/tunable/Tunable.cpp +sed -i -e 's@rocm-core/rocm_version.h@rocm_version.h@' aten/src/ATen/cuda/tunable/Tunable.cpp +# use any hip, correct CMAKE_MODULE_PATH +sed -i -e 's@lib/cmake/hip@lib64/cmake/hip@' cmake/public/LoadHIP.cmake +sed -i -e 's@HIP 1.0@HIP MODULE@' cmake/public/LoadHIP.cmake +# silence an assert +# sed -i -e '/qvalue = std::clamp(qvalue, qmin, qmax);/d' aten/src/ATen/native/cuda/IndexKernel.cu %endif +# moodycamel include path needs adjusting to use the system's +sed -i -e 's@${PROJECT_SOURCE_DIR}/third_party/concurrentqueue@/usr/include/concurrentqueue@' cmake/Dependencies.cmake + %build +# Export the arches +# echo "%%pytorch_arches %pt_arches" > macros.pytorch + # # Control the number of jobs # # The build can fail if too many threads exceed the physical memory -# So count core and and memory and increase the build memory util the build succeeds +# Run at least one thread, more if CPU & memory resources are available. # +%ifarch x86_64 # Real cores, No hyperthreading COMPILE_JOBS=`cat /proc/cpuinfo | grep -m 1 'cpu cores' | awk '{ print $4 }'` +%else +# cpuinfo format varies on other arches, fall back to nproc +COMPILE_JOBS=`nproc` +%endif if [ ${COMPILE_JOBS}x = x ]; then COMPILE_JOBS=1 fi # Take into account memmory usage per core, do not thrash real memory -%if %{with cuda} -BUILD_MEM=4 -%else BUILD_MEM=2 -%endif MEM_KB=0 MEM_KB=`cat /proc/meminfo | grep MemTotal | awk '{ print $2 }'` MEM_MB=`eval "expr ${MEM_KB} / 1024"` @@ -757,12 +525,6 @@ if [ "$COMPILE_JOBS_MEM" -lt "$COMPILE_JOBS" ]; then fi export MAX_JOBS=$COMPILE_JOBS -%if %{with compat_gcc} -export CC=%{_bindir}/gcc%{compat_gcc_major} -export CXX=%{_bindir}/g++%{compat_gcc_major} -export FC=%{_bindir}/gfortran%{compat_gcc_major} -%endif - # For debugging setup.py # export SETUPTOOLS_SCM_DEBUG=1 @@ -772,10 +534,7 @@ export FC=%{_bindir}/gfortran%{compat_gcc_major} # export CMAKE_SHARED_LINKER_FLAGS=-Wl,--verbose # Manually set this hardening flag -# CUDA is unhappy with pie, so do not use it -%if %{without cuda} export CMAKE_EXE_LINKER_FLAGS=-pie -%endif export BUILD_CUSTOM_PROTOBUF=OFF export BUILD_NVFUSER=OFF @@ -787,15 +546,17 @@ export CAFFE2_LINK_LOCAL_PROTOBUF=OFF export INTERN_BUILD_MOBILE=OFF export USE_DISTRIBUTED=OFF export USE_CUDA=OFF +export USE_FAKELOWP=OFF export USE_FBGEMM=OFF export USE_FLASH_ATTENTION=OFF -export USE_GOLD_LINKER=OFF export USE_GLOO=OFF export USE_ITT=OFF export USE_KINETO=OFF +export USE_KLEIDIAI=OFF export USE_LITE_INTERPRETER_PROFILER=OFF export USE_LITE_PROTO=OFF export USE_MAGMA=OFF +export USE_MEM_EFF_ATTENTION=OFF export USE_MKLDNN=OFF export USE_MPI=OFF export USE_NCCL=OFF @@ -803,45 +564,25 @@ export USE_NNPACK=OFF export USE_NUMPY=ON export USE_OPENMP=ON export USE_PYTORCH_QNNPACK=OFF -%if %{without gitcommit} -export USE_QNNPACK=OFF -%endif export USE_ROCM=OFF -export USE_SYSTEM_CPUINFO=ON export USE_SYSTEM_SLEEF=ON export USE_SYSTEM_EIGEN_INSTALL=ON +%if %{with onnx} export USE_SYSTEM_ONNX=ON +%endif export USE_SYSTEM_PYBIND11=OFF export USE_SYSTEM_LIBS=OFF +export USE_SYSTEM_NCCL=OFF export USE_TENSORPIPE=OFF -export USE_XNNPACK=ON - -%if %{with pthreadpool} +export USE_XNNPACK=OFF +export USE_XPU=OFF export USE_SYSTEM_PTHREADPOOL=ON -%endif - -%if %{with xnnpack} +export USE_SYSTEM_CPUINFO=ON export USE_SYSTEM_FP16=ON export USE_SYSTEM_FXDIV=ON export USE_SYSTEM_PSIMD=ON -export USE_SYSTEM_XNNPACK=ON -%endif +export USE_SYSTEM_XNNPACK=OFF -%if %{with caffe2} -export BUILD_CAFFE2=ON -%endif - -%if %{with cuda} -%if %{without rocm} -export CUDACXX=/usr/local/cuda/bin/nvcc -export CPLUS_INCLUDE_PATH=/usr/local/cuda/include -export USE_CUDA=ON -# The arches to build for -export TORCH_CUDA_ARCH_LIST="8.0 8.6 8.9 9.0" -%endif -%endif - -%if %{with distributed} export USE_DISTRIBUTED=ON %if %{with tensorpipe} export USE_TENSORPIPE=ON @@ -855,11 +596,6 @@ export USE_SYSTEM_GLOO=ON %if %{with mpi} export USE_MPI=ON %endif -%endif - -%if %{with opencv} -export USE_OPENCV=ON -%endif %if %{with test} export BUILD_TEST=ON @@ -874,130 +610,73 @@ export BUILD_TEST=ON # # See BZ 2244862 - %if %{with rocm} export USE_ROCM=ON +export USE_ROCM_CK_SDPA=OFF +export USE_ROCM_CK_GEMM=OFF +export USE_FBGEMM_GENAI=OFF + +# Magma is broken on ROCm 7 +# export USE_MAGMA=ON export HIP_PATH=`hipconfig -p` export ROCM_PATH=`hipconfig -R` -export HIP_CLANG_PATH=`hipconfig -l` -RESOURCE_DIR=`${HIP_CLANG_PATH}/clang -print-resource-dir` -export DEVICE_LIB_PATH=${RESOURCE_DIR}/amdgcn/bitcode +#RESOURCE_DIR=`%{rocmllvm_bindir}/clang -print-resource-dir` +#export DEVICE_LIB_PATH=${RESOURCE_DIR}/amdgcn/bitcode -gpu=%{rocm_default_gpu} -module load rocm/$gpu -export PYTORCH_ROCM_ARCH=$ROCM_GPUS -%py3_build -mv build build-${gpu} -module purge +# pytorch uses clang, not hipcc +export HIP_CLANG_PATH=%{rocmllvm_bindir} +export PYTORCH_ROCM_ARCH=%{rocm_gpu_list_default} -%if %{with rocm_loop} -for gpu in %{rocm_gpu_list} -do - module load rocm/$gpu - export PYTORCH_ROCM_ARCH=$ROCM_GPUS - %py3_build - mv build build-${gpu} - module purge -done %endif +%if 0%{?fedora} +%pyproject_wheel %else - %py3_build - %endif + %install -%if %{with compat_gcc} -export CC=%{_bindir}/gcc%{compat_gcc_major} -export CXX=%{_bindir}/g++%{compat_gcc_major} -export FC=%{_bindir}/gfortran%{compat_gcc_major} -%endif +# pytorch rpm macros +# install -Dpm 644 macros.pytorch \ +# %{buildroot}%{_rpmmacrodir}/macros.pytorch %if %{with rocm} export USE_ROCM=ON +export USE_ROCM_CK=OFF export HIP_PATH=`hipconfig -p` export ROCM_PATH=`hipconfig -R` -export HIP_CLANG_PATH=`hipconfig -l` -RESOURCE_DIR=`${HIP_CLANG_PATH}/clang -print-resource-dir` -export DEVICE_LIB_PATH=${RESOURCE_DIR}/amdgcn/bitcode +# RESOURCE_DIR=`%{rocmllvm_bindir}/clang -print-resource-dir` +# export DEVICE_LIB_PATH=${RESOURCE_DIR}/amdgcn/bitcode -gpu=%{rocm_default_gpu} -module load rocm/$gpu -export PYTORCH_ROCM_ARCH=$ROCM_GPUS -mv build-${gpu} build -%py3_install -mv build build-${gpu} -module purge +# pytorch uses clang, not hipcc +export HIP_CLANG_PATH=%{rocmllvm_bindir} +export PYTORCH_ROCM_ARCH=%{rocm_gpu_list_default} -%if %{with rocm_loop} -for gpu in %{rocm_gpu_list} -do - module load rocm/$gpu - export PYTORCH_ROCM_ARCH=$ROCM_GPUS - mv build-${gpu} build - # need to customize the install location, so replace py3_install - %{__python3} %{py_setup} %{?py_setup_args} install -O1 --skip-build --root %{buildroot} --prefix /usr/lib64/rocm/${gpu} %{?*} - rm -rfv %{buildroot}/usr/lib/rocm/${gpu}/bin/__pycache__ - mv build build-${gpu} - module purge -done %endif +%if 0%{?fedora} +%pyproject_install +%pyproject_save_files '*torch*' %else %py3_install - %endif + +%check +# Not working yet +# pyproject_check_import torch + # Do not remote the empty files - -%files -n python3-%{pypi_name} +%files -n python3-%{pypi_name} %license LICENSE %doc README.md -%{_bindir}/convert-caffe2-to-onnx -%{_bindir}/convert-onnx-to-caffe2 %{_bindir}/torchrun -%{python3_sitearch}/%{pypi_name} -%{python3_sitearch}/%{pypi_name}-*.egg-info +%{python3_sitearch}/%{pypi_name}* %{python3_sitearch}/functorch -%{python3_sitearch}/torchgen -%if %{with caffe2} -%{python3_sitearch}/caffe2 -%endif - -%if %{with rocm} -%files -n python3-%{pypi_name}-rocm-gfx8 -%{_libdir}/rocm/gfx8/bin/* -%{_libdir}/rocm/gfx8/lib64/* - -%files -n python3-%{pypi_name}-rocm-gfx9 -%{_libdir}/rocm/gfx9/bin/* -%{_libdir}/rocm/gfx9/lib64/* - -%files -n python3-%{pypi_name}-rocm-gfx10 -%{_libdir}/rocm/gfx10/bin/* -%{_libdir}/rocm/gfx10/lib64/* - -%files -n python3-%{pypi_name}-rocm-gfx11 -%{_libdir}/rocm/gfx11/bin/* -%{_libdir}/rocm/gfx11/lib64/* - -%files -n python3-%{pypi_name}-rocm-gfx90a -%{_libdir}/rocm/gfx90a/bin/* -%{_libdir}/rocm/gfx90a/lib64/* - -%files -n python3-%{pypi_name}-rocm-gfx942 -%{_libdir}/rocm/gfx942/bin/* -%{_libdir}/rocm/gfx942/lib64/* - -%files -n python3-%{pypi_name}-rocm-gfx1100 -%{_libdir}/rocm/gfx1100/bin/* -%{_libdir}/rocm/gfx1100/lib64/* - -%endif %changelog %autochangelog diff --git a/sources b/sources index 60cce58..9a3681f 100644 --- a/sources +++ b/sources @@ -1,14 +1,19 @@ -SHA512 (pytorch-v2.1.0.tar.gz) = 59421bf6cea6661d61ed66ab16526e3a07162e70e53381cbd5987042917610ec993d2f151fb086f0f98e5a396fe69e82bbc76f840bebffe4ebe7f50458c3aa44 -SHA512 (pytorch-v2.1.2.tar.gz) = b7305407ad9dda877d277a0e7009f65f6d69f39370f2231b8bb8c6a9b711022d2129febdb00f5c83751b6664e01000fe2d30c5e5c13757de89fb8b2b99197a28 -SHA512 (pytorch-975d428.tar.gz) = a02195b18d832db9a739c3eeecd0cd0c8868d8b92e4a2fca42e4bdd20735f0745d84573df28d9ae1db014cf79ffd005a8409b3e8bb92f9db2a446f784ef46ff4 +SHA512 (pytorch-v2.7.0.tar.gz) = 17e875a66f1669901f5f770c9d829ba5bfa3967296cfb71550e8a92507181db742548eaf7cc9a2c478c4b91e366f27cc480e2e1bbb328db8501d30e1649839e6 SHA512 (v23.3.3.tar.gz) = 4066c94f2473c7ea16917d29a613e16f840a329089c88e0bdbdb999aef3442ba00abfd2aa92266fa9c067e399dc88e6f0ccac40dc151378857e665638e78bbf0 -SHA512 (v2.11.1.tar.gz) = ed1512ff0bca3bc0a45edc2eb8c77f8286ab9389f6ff1d5cb309be24bc608abbe0df6a7f5cb18c8f80a3bfa509058547c13551c3cd6a759af708fd0cdcdd9e95 -SHA512 (pytorch-6a89a75.tar.gz) = 6978acc6f37d7c5adc71517a6f379c7133b2bbd040189deddba7753acde41f6ddba2e9f2e397928e89c776d6a5458b8a74f8e04beb312d71fd30b072687ba98f -SHA512 (pytorch-74832f1.tar.gz) = bd553bfbbb422d353bbbf616c201251b2517b905e2621fa05bfe3d97726b078caad377583adccdc0cca234235a11fcb4730a93e834907b2ca4c06d552b2a2683 -SHA512 (pytorch-4bb5cb5.tar.gz) = 430ae996ddee560537787646ae9f7aa01498f37c99c2e3fe4c5f66ee732ee3fe4ecf337fdf857bc0c7fe27634af75cee3ce576bbe2576463b81e27dbbfacf6ef +SHA512 (v2.13.6.tar.gz) = 497c25b33b09a9c42f67131ab82e35d689e8ce089dd7639be997305ff9a6d502447b79c824508c455d559e61f0186335b54dd2771d903a7c1621833930622d1a SHA512 (tensorpipe-52791a2.tar.gz) = 1e5faf17a7236c5506c08cb28be16069b11bb929bbca64ed9745ce4277d46739186ab7d6597da7437d90ed2d166d4c37ef2f3bceabe8083ef3adbb0e8e5f227e SHA512 (v1.41.0.tar.gz) = bb08a1970a10e8d9571ffea3d021643de30ec212cd51317b98d6cf0cfe55d6877992921fb01d1188a6d466687335b77885685d924f8cb7200a0bec30eee05c65 SHA512 (libnop-910b558.tar.gz) = 74c5324eaa1b6b2ac8dfef94c835b5c5b044625f8e5efe3522470b1ecc4798ff43d344a013cee2f6901e83267c6167072947b754e63f1552ae7044cffe234c36 -SHA512 (pytorch-97ff6cf.tar.gz) = 105ebcba298558fe833f90e7e40b003d35a74609e777f9dc4c47f5668c884f603455113ac0ff252a62b83c81137ae66ceb1a862d351203925dcfc3dcf9f73580 -SHA512 (pytorch-v2.3.0.tar.gz) = 0c2ffc7bf2fd86070e9958c34eca1f03a0248a011ac6ffaeb69f65306ff856edd5359986f02af25888433187e6d7f29b60edded092e2ac30c8cec49023166eda -SHA512 (pytorch-v2.3.1.tar.gz) = fe132251b2bae87b70ba3d95dc32f6a4545970d11893118b0ebe6ca129732e516ef4d6cc4f380b3db9bb2277d1db8ce78a401c40149bb1dfbab76eab9e3992c4 +SHA512 (v1.14.2.tar.gz) = 97635bbaf6dd567c201451dfaf7815b2052fe50d9bccc97aade86cfa4a92651374d167296a5453031b2681dc302806a289bca011a9e79ddc381a17d6118971d7 +SHA512 (cpp-httplib-3b6597b.tar.gz) = 8f1090658c498d04f14fec5c2f301847b1f3360bf92b18d82927643ee04ab61a6b274733a01c7850f9c030205120d674d1d961358d49fdd15636736fb8704f55 +SHA512 (kineto-be13176.tar.gz) = 41a08c7da9eea7d12402f80a5550c9d4df79798719cc52b12a507828c8c896ba28a37c35d8adf809ca72589e1d84965d5ef6dd01f3f8dc1c803c5ed67b03a43a +SHA512 (pytorch-a1cb3cc.tar.gz) = 92bf8b2c2ef0b459406b60169ecebdc50652c75943e3d6087e4d261f6e308dbad365529561e0f07ea3f0b71790efb68b5e4ab2f44e270462097208d924dc2d95 +SHA512 (v24.12.23.tar.gz) = f97762ba41b9cfef648e93932fd789324c6bb6ebc5b7aeca8185c9ef602294b67d73aea7ae371035579a1419cbfbeba7c3e88b31b5a5848db98f5e8a03b982b1 +SHA512 (kineto-5e75018.tar.gz) = 921b96a56e01d69895b79e67582d8977ed6f873573ab41557c5d026ada5d1f6365e4ed0a0c6804057c52e92510749fc58619f554a164c1ba9d8cd13e789bebd0 +SHA512 (pytorch-v2.8.0.tar.gz) = 791e658eab87fb957f025558cb9f925078d2426ab7b6f60771d9841dfb691f67d905ba1330a800008efe7c938b6c69bdc52232bccfe8d4860e795a532cd69d28 +SHA512 (v1.18.0.tar.gz) = 2f38664947c8d1efc40620a7c1b1953d2aa4b0a37b67c4886b86e77c1d697363c26413413ddda8eabc545892fb1bcb43afc7e93e62f0901527524a2727e1ea8d +SHA512 (pytorch-715dca6.tar.gz) = 09c9aae54fab3eb17901fc3226fece1c13f41cb8e45a2cb066021823abeb8d27c340993088e01d8e55bb37ed5f94334ec31e6c539cddfacbad157abd27c5e907 +SHA512 (pytorch-fd36458.tar.gz) = acbb7475b92ad4a8e8d779f3745da22d8438e4c5ef2d6e76d71c987789f2752c8aef7022c87c9a74640fe4f9c1f1a61a3f12a796f63b1e6be24da8e5aacf37dc +SHA512 (pytorch-0fabc3b.tar.gz) = 2e87975de0bf6f3dcede168b379e1928712bca16170c2a8ee7d63459f53086c01baac05e0763e4d5d28cdaf1c7d8912225ee06adeff96ead4f6f456ee174b341 +SHA512 (pytorch-v2.9.0.tar.gz) = ae989e3a7fe30f9ea90944dc25e21ca92f2a94ee40d8de974a168c292d82c16ee8920624eff91a85755469ad05473dce0f85893e3ed7794ec5c6bdd89cbd2023 +SHA512 (pytorch-v2.9.1.tar.gz) = 88de0289fa2760abd69bef505b5ae3b6d7ff176b415cbb31bbc89ce5476a3800b322a97c4490f270f8b89657aff931bf9a5516202b268e0bb8b1f63dbb87b34a