diff --git a/.gitignore b/.gitignore index c424df5..31a05a6 100644 --- a/.gitignore +++ b/.gitignore @@ -11,23 +11,9 @@ /libnop-910b558.tar.gz /pytorch-97ff6cf.tar.gz /pytorch-v2.3.0.tar.gz -/pytorch-v2.3.1.tar.gz -/pytorch-v2.4.0.tar.gz -/v1.14.2.tar.gz -/cpp-httplib-3b6597b.tar.gz -/kineto-be13176.tar.gz -/pytorch-v2.4.1.tar.gz -/pytorch-v2.5.0.tar.gz -/pytorch-v2.5.1.tar.gz -/pytorch-v2.7.0.tar.gz -/v2.13.6.tar.gz -/pytorch-a1cb3cc.tar.gz -/v24.12.23.tar.gz -/kineto-5e75018.tar.gz -/pytorch-v2.8.0.tar.gz -/v1.18.0.tar.gz -/pytorch-715dca6.tar.gz -/pytorch-fd36458.tar.gz -/pytorch-0fabc3b.tar.gz -/pytorch-v2.9.0.tar.gz -/pytorch-v2.9.1.tar.gz +/xnnpack-fcbf55a.tar.gz +/FXdiv-63058ef.tar.gz +/FP16-0a92994.tar.gz +/psimd-072586a.tar.gz +/pthreadpool-4fe0e1e.tar.gz +/pocketfft-076cb3d.tar.gz diff --git a/0001-Add-cmake-variable-USE_ROCM_CK.patch b/0001-Add-cmake-variable-USE_ROCM_CK.patch deleted file mode 100644 index 925e03b..0000000 --- a/0001-Add-cmake-variable-USE_ROCM_CK.patch +++ /dev/null @@ -1,202 +0,0 @@ -From 193854993cd939de186de19589c1add4c4b2cf66 Mon Sep 17 00:00:00 2001 -From: Tom Rix -Date: Mon, 21 Jul 2025 11:35:03 -0700 -Subject: [PATCH] Add cmake variable USE_ROCM_CK - ---- - CMakeLists.txt | 1 + - aten/src/ATen/CMakeLists.txt | 40 ++++++++++++++++----------------- - aten/src/ATen/cuda/CUDABlas.cpp | 22 +++++++++--------- - cmake/Dependencies.cmake | 3 +++ - 4 files changed, 35 insertions(+), 31 deletions(-) - -diff --git a/CMakeLists.txt b/CMakeLists.txt -index a5d25e6afa0f..afc1b53efa64 100644 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -240,6 +240,7 @@ cmake_dependent_option( - BUILD_LAZY_CUDA_LINALG "Build cuda linalg ops as separate library" ON - "USE_CUDA AND LINUX AND BUILD_PYTHON" OFF) - cmake_dependent_option(USE_ROCM "Use ROCm" ON "LINUX" OFF) -+cmake_dependent_option(USE_ROCM_CK "Use ROCm Composable Kernel" ON "USE_ROCM" ON) - option(CAFFE2_STATIC_LINK_CUDA "Statically link CUDA libraries" OFF) - cmake_dependent_option(USE_CUDNN "Use cuDNN" ON "USE_CUDA" OFF) - cmake_dependent_option(USE_STATIC_CUDNN "Use cuDNN static libraries" OFF -diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt -index c9cfd74b501e..59f6178218ee 100644 ---- a/aten/src/ATen/CMakeLists.txt -+++ b/aten/src/ATen/CMakeLists.txt -@@ -373,26 +373,26 @@ if(USE_ROCM) - # is header only, so this should be ok, except that the CMake build generates - # a ck/config.h. We just do that part here. Without this, the ck.h from the - # ROCM SDK may get accidentally used instead. -- function(_pytorch_rocm_generate_ck_conf) -- set(CK_ENABLE_INT8 "ON") -- set(CK_ENABLE_FP16 "ON") -- set(CK_ENABLE_FP32 "ON") -- set(CK_ENABLE_FP64 "ON") -- set(CK_ENABLE_BF16 "ON") -- set(CK_ENABLE_FP8 "ON") -- set(CK_ENABLE_BF8 "ON") -- set(CK_USE_XDL "ON") -- set(CK_USE_WMMA "ON") -- configure_file( -- "${Torch_SOURCE_DIR}/third_party/composable_kernel/include/ck/config.h.in" -- "${CMAKE_CURRENT_BINARY_DIR}/composable_kernel/ck/config.h" -- ) -- endfunction() -+# function(_pytorch_rocm_generate_ck_conf) -+# set(CK_ENABLE_INT8 "ON") -+# set(CK_ENABLE_FP16 "ON") -+# set(CK_ENABLE_FP32 "ON") -+# set(CK_ENABLE_FP64 "ON") -+# set(CK_ENABLE_BF16 "ON") -+# set(CK_ENABLE_FP8 "ON") -+# set(CK_ENABLE_BF8 "ON") -+# set(CK_USE_XDL "ON") -+# set(CK_USE_WMMA "ON") -+# configure_file( -+# "${Torch_SOURCE_DIR}/third_party/composable_kernel/include/ck/config.h.in" -+# "${CMAKE_CURRENT_BINARY_DIR}/composable_kernel/ck/config.h" -+# ) -+# endfunction() - list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/hip) -- list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/include) -- list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/library/include) -- list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/composable_kernel) -- _pytorch_rocm_generate_ck_conf() -+# list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/include) -+# list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/library/include) -+# list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/composable_kernel) -+# _pytorch_rocm_generate_ck_conf() - - # Next two lines are needed because TunableOp uses third-party/fmt - list(APPEND ATen_HIP_INCLUDE $) -@@ -409,7 +409,7 @@ endif() - ${native_quantized_hip_hip} - ${native_transformers_hip_hip} ${native_transformers_src_hip_hip} - ) -- if(WIN32) # Windows doesn't support Composable Kernels -+ if(NOT USE_ROCM_CK) # Windows doesn't support Composable Kernels - file(GLOB native_hip_bgemm "native/hip/bgemm_kernels/*.hip") - file(GLOB native_hip_ck "native/hip/ck*.hip") - exclude(ATen_HIP_SRCS "${ATen_HIP_SRCS}" -diff --git a/aten/src/ATen/cuda/CUDABlas.cpp b/aten/src/ATen/cuda/CUDABlas.cpp -index 89350a11bea7..e5b7960177cf 100644 ---- a/aten/src/ATen/cuda/CUDABlas.cpp -+++ b/aten/src/ATen/cuda/CUDABlas.cpp -@@ -752,7 +752,7 @@ template <> - void bgemm_internal(CUDABLAS_BGEMM_ARGTYPES(double)) - { - if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) { --#ifdef USE_ROCM -+#ifdef USE_ROCM_CK - // hipblaslt does not support double gemm yet - bgemm_internal_cublas(CUDABLAS_BGEMM_ARGS(double)); - #else -@@ -836,7 +836,7 @@ void bgemm_internal(CUDABLAS_BGEMM_ARGTYPES(at::BFloat16)) - bgemm_internal_cublas(CUDABLAS_BGEMM_ARGS(at::BFloat16)); - } - } --#if defined(USE_ROCM) && !defined(_MSC_VER) -+#if defined(USE_ROCM) && defined(USE_ROCM_CK) - else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) { - at::native::bgemm_internal_ck(CUDABLAS_BGEMM_ARGS(at::BFloat16)); - } -@@ -1270,14 +1270,14 @@ template <> - void gemm_internal(CUDABLAS_GEMM_ARGTYPES(double)) - { - if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) { --#ifdef USE_ROCM -+#ifdef USE_ROCM_CK - // hipblaslt does not support double gemm yet - gemm_internal_cublas(CUDABLAS_GEMM_ARGS(double)); - #else - gemm_internal_cublaslt(CUDABLAS_GEMM_ARGS(double)); - #endif - } --#if defined(USE_ROCM) && !defined(_MSC_VER) -+#if defined(USE_ROCM) && defined(USE_ROCM_CK) - else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) { - at::native::gemm_internal_ck(CUDABLAS_GEMM_ARGS(double)); - } -@@ -1293,7 +1293,7 @@ void gemm_internal(CUDABLAS_GEMM_ARGTYPES(float)) - if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) { - gemm_internal_cublaslt(CUDABLAS_GEMM_ARGS(float)); - } --#if defined(USE_ROCM) && !defined(_MSC_VER) -+#if defined(USE_ROCM) && defined(USE_ROCM_CK) - else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) { - if (at::detail::getCUDAHooks().isGPUArch({"gfx1100"})) { //no CK GEMM version for gfx1100 - gemm_internal_cublaslt(CUDABLAS_GEMM_ARGS(float)); -@@ -1311,7 +1311,7 @@ template <> - void gemm_internal>(CUDABLAS_GEMM_ARGTYPES(c10::complex)) - { - if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) { --#ifdef USE_ROCM -+#ifdef USE_ROCM_CK - // hipblaslt does not support complex gemm yet - gemm_internal_cublas>(CUDABLAS_GEMM_ARGS(c10::complex)); - #else -@@ -1327,7 +1327,7 @@ template <> - void gemm_internal>(CUDABLAS_GEMM_ARGTYPES(c10::complex)) - { - if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) { --#ifdef USE_ROCM -+#ifdef USE_ROCM_CK - // hipblaslt does not support complex gemm yet - gemm_internal_cublas>(CUDABLAS_GEMM_ARGS(c10::complex)); - #else -@@ -1345,7 +1345,7 @@ void gemm_internal(CUDABLAS_GEMM_ARGTYPES(at::Half)) - if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) { - gemm_internal_cublaslt(CUDABLAS_GEMM_ARGS(at::Half)); - } --#if defined(USE_ROCM) && !defined(_MSC_VER) -+#if defined(USE_ROCM) && defined(USE_ROCM_CK) - else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) { - at::native::gemm_internal_ck(CUDABLAS_GEMM_ARGS(at::Half)); - } -@@ -1361,7 +1361,7 @@ void gemm_internal(CUDABLAS_GEMM_ARGTYPES(at::BFloat16)) - if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) { - gemm_internal_cublaslt(CUDABLAS_GEMM_ARGS(at::BFloat16)); - } --#if defined(USE_ROCM) && !defined(_MSC_VER) -+#if defined(USE_ROCM) && defined(USE_ROCM_CK) - else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) { - at::native::gemm_internal_ck(CUDABLAS_GEMM_ARGS(at::BFloat16)); - } -@@ -1382,7 +1382,7 @@ void gemm_internal(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(at::Half, - if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) { - gemm_internal_cublaslt(CUDABLAS_GEMM_ARGS(at::Half)); - } --#if defined(USE_ROCM) && !defined(_MSC_VER) -+#if defined(USE_ROCM) && defined(USE_ROCM_CK) - else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) { - TORCH_CHECK(false, "gemm input type at::Half and output type float is not supported for ROCm"); - } -@@ -1398,7 +1398,7 @@ void gemm_internal(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(at::B - if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) { - gemm_internal_cublaslt(CUDABLAS_GEMM_ARGS(at::BFloat16)); - } --#if defined(USE_ROCM) && !defined(_MSC_VER) -+#if defined(USE_ROCM) && defined(USE_ROCM_CK) - else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) { - TORCH_CHECK(false, "gemm input type at::Half and output type float is not supported for ROCm"); - } -diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake -index a93386c27f8d..be1368999d38 100644 ---- a/cmake/Dependencies.cmake -+++ b/cmake/Dependencies.cmake -@@ -1031,6 +1031,9 @@ if(USE_ROCM) - if(HIPBLASLT_VEC_EXT) - list(APPEND HIP_CXX_FLAGS -DHIPBLASLT_VEC_EXT) - endif() -+ if(USE_ROCM_CK) -+ list(APPEND HIP_CXX_FLAGS -DUSE_ROCM_CK) -+ endif() - list(APPEND HIP_HIPCC_FLAGS --offload-compress) - if(WIN32) - add_definitions(-DROCM_ON_WINDOWS) --- -2.49.0 - diff --git a/0001-Fix-compilation-and-import-torch-issues-for-cpython-.patch b/0001-Fix-compilation-and-import-torch-issues-for-cpython-.patch deleted file mode 100644 index b6a282c..0000000 --- a/0001-Fix-compilation-and-import-torch-issues-for-cpython-.patch +++ /dev/null @@ -1,359 +0,0 @@ -From f2a544b2e3a5bdc04985f6e06223c0c1700120a0 Mon Sep 17 00:00:00 2001 -From: albanD -Date: Sat, 12 Jul 2025 03:42:33 -0400 -Subject: [PATCH] Fix compilation and "import torch" issues for cpython 3.14 - -Imported from -https://github.com/albanD/pytorch/tree/cpython314_build -commit 88bb9cdb72449f4277829e20d94ad8aec1894216 - -Signed-off-by: Tom Rix ---- - torch/_dynamo/bytecode_analysis.py | 2 +- - torch/ao/quantization/__init__.py | 5 +++- - torch/ao/quantization/qconfig.py | 4 ++- - torch/ao/quantization/utils.py | 7 +++-- - torch/csrc/dynamo/cpython_defs.c | 16 +++++++++++ - torch/csrc/dynamo/cpython_includes.h | 17 ++++++++++++ - torch/csrc/dynamo/eval_frame.c | 34 +++++++++++++++-------- - torch/csrc/dynamo/framelocals_mapping.cpp | 14 ++++++++++ - torch/csrc/utils/python_compat.h | 1 + - torch/onnx/__init__.py | 1 - - torch/utils/weak.py | 29 +++++++++++++++++-- - 11 files changed, 111 insertions(+), 19 deletions(-) - -diff --git a/torch/_dynamo/bytecode_analysis.py b/torch/_dynamo/bytecode_analysis.py -index 3252ea91409f..2de74ee5bf8d 100644 ---- a/torch/_dynamo/bytecode_analysis.py -+++ b/torch/_dynamo/bytecode_analysis.py -@@ -33,7 +33,7 @@ if sys.version_info >= (3, 11): - TERMINAL_OPCODES.add(dis.opmap["JUMP_FORWARD"]) - else: - TERMINAL_OPCODES.add(dis.opmap["JUMP_ABSOLUTE"]) --if sys.version_info >= (3, 12): -+if (3, 12) <= sys.version_info < (3, 14): - TERMINAL_OPCODES.add(dis.opmap["RETURN_CONST"]) - if sys.version_info >= (3, 13): - TERMINAL_OPCODES.add(dis.opmap["JUMP_BACKWARD_NO_INTERRUPT"]) -diff --git a/torch/ao/quantization/__init__.py b/torch/ao/quantization/__init__.py -index ffc1792fd23f..cf5a8b99a894 100644 ---- a/torch/ao/quantization/__init__.py -+++ b/torch/ao/quantization/__init__.py -@@ -1,5 +1,6 @@ - # mypy: allow-untyped-defs - -+import sys - from typing import Callable, Optional, Union - - import torch -@@ -33,7 +34,9 @@ from .stubs import * # noqa: F403 - - # ensure __module__ is set correctly for public APIs - ObserverOrFakeQuantize = Union[ObserverBase, FakeQuantizeBase] --ObserverOrFakeQuantize.__module__ = "torch.ao.quantization" -+if sys.version_info < (3, 14): -+ ObserverOrFakeQuantize.__module__ = "torch.ao.quantization" -+ - for _f in [ - compare_results, - extract_results_from_loggers, -diff --git a/torch/ao/quantization/qconfig.py b/torch/ao/quantization/qconfig.py -index efee5302ad42..d9a8fc78bab4 100644 ---- a/torch/ao/quantization/qconfig.py -+++ b/torch/ao/quantization/qconfig.py -@@ -1,5 +1,6 @@ - # mypy: allow-untyped-defs - import copy -+import sys - import warnings - from collections import namedtuple - from typing import Any, Optional, Union -@@ -568,7 +569,8 @@ def _assert_valid_qconfig(qconfig: Optional[QConfig], mod: torch.nn.Module) -> N - - - QConfigAny = Optional[QConfig] --QConfigAny.__module__ = "torch.ao.quantization.qconfig" -+if sys.version_info < (3, 14): -+ QConfigAny.__module__ = "torch.ao.quantization.qconfig" - - - def _add_module_to_qconfig_obs_ctr( -diff --git a/torch/ao/quantization/utils.py b/torch/ao/quantization/utils.py -index 4ac3112ec072..3b1503e01701 100644 ---- a/torch/ao/quantization/utils.py -+++ b/torch/ao/quantization/utils.py -@@ -4,6 +4,7 @@ Utils shared by different modes of quantization (eager/graph) - """ - - import functools -+import sys - import warnings - from collections import OrderedDict - from inspect import getfullargspec, signature -@@ -16,7 +17,8 @@ from torch.nn.utils.parametrize import is_parametrized - - - NodePattern = Union[tuple[Node, Node], tuple[Node, tuple[Node, Node]], Any] --NodePattern.__module__ = "torch.ao.quantization.utils" -+if sys.version_info < (3, 14): -+ NodePattern.__module__ = "torch.ao.quantization.utils" - - # This is the Quantizer class instance from torch/quantization/fx/quantize.py. - # Define separately to prevent circular imports. -@@ -31,7 +33,8 @@ QuantizerCls = Any - Pattern = Union[ - Callable, tuple[Callable, Callable], tuple[Callable, tuple[Callable, Callable]], Any - ] --Pattern.__module__ = "torch.ao.quantization.utils" -+if sys.version_info < (3, 14): -+ Pattern.__module__ = "torch.ao.quantization.utils" - - - # TODO: maybe rename this to MatchInputNode -diff --git a/torch/csrc/dynamo/cpython_defs.c b/torch/csrc/dynamo/cpython_defs.c -index b68ef894aeaa..244d4165d5e8 100644 ---- a/torch/csrc/dynamo/cpython_defs.c -+++ b/torch/csrc/dynamo/cpython_defs.c -@@ -2,6 +2,20 @@ - #include - #include - -+#if IS_PYTHON_3_14_PLUS -+ -+const uint8_t* THP_PyOpcode_Caches = NULL; -+const int THP_PyOpcode_Caches_size = 0; -+ -+void -+THP_PyThreadState_PopFrame(PyThreadState *tstate, _PyInterpreterFrame * frame) -+{} -+void -+THP_PyFrame_Clear(_PyInterpreterFrame *frame) -+{} -+ -+#else -+ - #if IS_PYTHON_3_11_PLUS - - #define Py_BUILD_CORE -@@ -360,3 +374,5 @@ const uint8_t* THP_PyOpcode_Caches = NULL; - const int THP_PyOpcode_Caches_size = 0; - - #endif -+ -+#endif // IS_PYTHON_3_14_PLUS -\ No newline at end of file -diff --git a/torch/csrc/dynamo/cpython_includes.h b/torch/csrc/dynamo/cpython_includes.h -index 6b99c1d5aec8..616be16563cf 100644 ---- a/torch/csrc/dynamo/cpython_includes.h -+++ b/torch/csrc/dynamo/cpython_includes.h -@@ -21,6 +21,14 @@ - - #if IS_PYTHON_3_11_PLUS - #include -+#if IS_PYTHON_3_14_PLUS -+#include -+#include -+#endif -+#endif -+ -+#if IS_PYTHON_3_14_PLUS -+#include - #endif - - #undef Py_BUILD_CORE -@@ -30,6 +38,13 @@ - extern "C" { - #endif - -+#if IS_PYTHON_3_14_PLUS -+ -+#define F_CODE(x) (PyCodeObject*)PyStackRef_AsPyObjectBorrow(x->f_executable) -+#define PREV_INSTR(x) (x)->instr_ptr -+ -+#else -+ - #if IS_PYTHON_3_13_PLUS - #define F_CODE(x) ((PyCodeObject*)(x)->f_executable) - #define PREV_INSTR(x) (x)->instr_ptr -@@ -38,6 +53,8 @@ extern "C" { - #define PREV_INSTR(x) (x)->prev_instr - #endif - -+#endif // IS_PYTHON_3_14_PLUS -+ - #if IS_PYTHON_3_12_PLUS - #define FUNC(x) ((x)->f_funcobj) - #else -diff --git a/torch/csrc/dynamo/eval_frame.c b/torch/csrc/dynamo/eval_frame.c -index f413782b2d30..72bb8839bac3 100644 ---- a/torch/csrc/dynamo/eval_frame.c -+++ b/torch/csrc/dynamo/eval_frame.c -@@ -224,17 +224,6 @@ const char* get_frame_name(THP_EVAL_API_FRAME_OBJECT* frame) { - return PyUnicode_AsUTF8(F_CODE(frame)->co_name); - } - --void clear_old_frame_if_python_312_plus( -- PyThreadState* tstate, -- THP_EVAL_API_FRAME_OBJECT* frame) { --#if IS_PYTHON_3_12_PLUS -- -- THP_PyFrame_Clear(frame); -- THP_PyThreadState_PopFrame(tstate, frame); -- --#endif --} -- - static PyObject* dynamo_eval_custom_code_impl( - PyThreadState* tstate, - THP_EVAL_API_FRAME_OBJECT* frame, -@@ -485,6 +474,18 @@ static PyObject* dynamo__custom_eval_frame_shim( - - static void enable_eval_frame_shim(PyThreadState* tstate) {} - static void enable_eval_frame_default(PyThreadState* tstate) {} -+PyObject* dynamo_eval_custom_code( -+ PyThreadState* tstate, -+ THP_EVAL_API_FRAME_OBJECT* frame, -+ PyCodeObject* code, -+ const char* trace_annotation, -+ int throw_flag) {} -+THPPyInterpreterFrame* THPPyInterpreterFrame_New( -+ THP_EVAL_API_FRAME_OBJECT* frame) {} -+PyObject* dynamo_eval_frame_default( -+ PyThreadState* tstate, -+ THP_EVAL_API_FRAME_OBJECT* frame, -+ int throw_flag) {} - - static struct PyGetSetDef THPPyInterpreterFrame_properties[] = {NULL}; - -@@ -498,6 +499,17 @@ static PyTypeObject THPPyInterpreterFrameType = { - - #endif // !(IS_PYTHON_3_14_PLUS) - -+void clear_old_frame_if_python_312_plus( -+ PyThreadState* tstate, -+ THP_EVAL_API_FRAME_OBJECT* frame) { -+#if IS_PYTHON_3_12_PLUS -+ -+ THP_PyFrame_Clear(frame); -+ THP_PyThreadState_PopFrame(tstate, frame); -+ -+#endif -+} -+ - static PyObject* increment_working_threads( - PyThreadState* tstate, - PyObject* module) { -diff --git a/torch/csrc/dynamo/framelocals_mapping.cpp b/torch/csrc/dynamo/framelocals_mapping.cpp -index b839fb26fc91..c4ee36d87767 100644 ---- a/torch/csrc/dynamo/framelocals_mapping.cpp -+++ b/torch/csrc/dynamo/framelocals_mapping.cpp -@@ -26,9 +26,13 @@ FrameLocalsMapping::FrameLocalsMapping(FrameLocalsFrameType* frame) - PyCodeObject* co = F_CODE(frame); - _framelocals.resize(co->co_nlocalsplus, nullptr); - -+#if IS_PYTHON_3_14_PLUS -+ TORCH_CHECK(false, "Python 3.14+ not supported"); -+#else - if (!frame->stacktop) { - return; - } -+#endif - - auto update_framelocals = [&](int i, PyObject* value) { - _PyLocals_Kind kind = _PyLocals_GetKind(co->co_localspluskinds, i); -@@ -53,11 +57,21 @@ FrameLocalsMapping::FrameLocalsMapping(FrameLocalsFrameType* frame) - }; - - auto offset = co->co_nlocalsplus - co->co_nfreevars; -+#if IS_PYTHON_3_14_PLUS -+ TORCH_CHECK(false, "Python 3.14+ not supported"); -+#else - for (int i = 0; i < offset; i++) { - update_framelocals(i, frame->localsplus[i]); - } -+#endif -+ - // Get references to closure variables -+#if IS_PYTHON_3_14_PLUS -+ PyObject* closure; -+ TORCH_CHECK(false, "Python 3.14+ not supported"); -+#else - PyObject* closure = ((PyFunctionObject*)FUNC(frame))->func_closure; -+#endif - for (int i = 0; i < co->co_nfreevars; i++) { - update_framelocals(offset + i, PyTuple_GET_ITEM(closure, i)); - } -diff --git a/torch/csrc/utils/python_compat.h b/torch/csrc/utils/python_compat.h -index a1537611cc47..16292e4fd030 100644 ---- a/torch/csrc/utils/python_compat.h -+++ b/torch/csrc/utils/python_compat.h -@@ -13,6 +13,7 @@ extern "C" { - #define IS_PYTHON_3_12_PLUS PY_VERSION_HEX >= 0x030C0000 - #define IS_PYTHON_3_13_PLUS PY_VERSION_HEX >= 0x030D0000 - #define IS_PYTHON_3_14_PLUS PY_VERSION_HEX >= 0x030E0000 -+#define IS_PYTHON_3_15_PLUS PY_VERSION_HEX >= 0x030F0000 - - static inline int PyCode_GetNCellvars(PyCodeObject* code) { - // gh-26364 added co_ncellvars to Python 3.11.0rc1 -diff --git a/torch/onnx/__init__.py b/torch/onnx/__init__.py -index 345ffd2a065b..ceeadde5365b 100644 ---- a/torch/onnx/__init__.py -+++ b/torch/onnx/__init__.py -@@ -104,7 +104,6 @@ ONNXProgram.__module__ = "torch.onnx" - OnnxExporterError.__module__ = "torch.onnx" - _OrtBackend.__module__ = "torch.onnx" - _OrtBackendOptions.__module__ = "torch.onnx" --_OrtExecutionProvider.__module__ = "torch.onnx" - enable_fake_mode.__module__ = "torch.onnx" - is_onnxrt_backend_supported.__module__ = "torch.onnx" - -diff --git a/torch/utils/weak.py b/torch/utils/weak.py -index 8bf2ba5ed02b..9c7218cb2ad3 100644 ---- a/torch/utils/weak.py -+++ b/torch/utils/weak.py -@@ -3,8 +3,6 @@ from __future__ import annotations - - import collections.abc as _collections_abc - import weakref -- --from _weakrefset import _IterationGuard # type: ignore[attr-defined] - from collections.abc import Mapping, MutableMapping - from weakref import ref - -@@ -22,6 +20,33 @@ __all__ = [ - ] - - -+# TODO: make weakref properly thread safe following -+# https://github.com/python/cpython/pull/125325 -+class _IterationGuard: -+ # This context manager registers itself in the current iterators of the -+ # weak container, such as to delay all removals until the context manager -+ # exits. -+ # This technique should be relatively thread-safe (since sets are). -+ -+ def __init__(self, weakcontainer): -+ # Don't create cycles -+ self.weakcontainer = ref(weakcontainer) -+ -+ def __enter__(self): -+ w = self.weakcontainer() -+ if w is not None: -+ w._iterating.add(self) -+ return self -+ -+ def __exit__(self, e, t, b): -+ w = self.weakcontainer() -+ if w is not None: -+ s = w._iterating -+ s.remove(self) -+ if not s: -+ w._commit_removals() -+ -+ - # This file defines a variant of WeakKeyDictionary that overrides the hashing - # behavior of the key to use object identity, rather than the builtin - # __eq__/__hash__ functions. This is useful for Tensor weak keys, as their --- -2.49.0 - diff --git a/0001-Optionally-use-hipblaslt.patch b/0001-Optionally-use-hipblaslt.patch new file mode 100644 index 0000000..56434a7 --- /dev/null +++ b/0001-Optionally-use-hipblaslt.patch @@ -0,0 +1,262 @@ +From d77e05d90df006322cda021f1a8affdcc2c7eaef Mon Sep 17 00:00:00 2001 +From: Tom Rix +Date: Fri, 23 Feb 2024 08:27:30 -0500 +Subject: [PATCH] Optionally use hipblaslt + +The hipblaslt package is not available on Fedora. +Instead of requiring the package, make it optional. +If it is found, define the preprocessor variable HIPBLASLT +Convert the checks for ROCM_VERSION >= 507000 to HIPBLASLT checks + +Signed-off-by: Tom Rix +--- + aten/src/ATen/cuda/CUDABlas.cpp | 7 ++++--- + aten/src/ATen/cuda/CUDABlas.h | 2 +- + aten/src/ATen/cuda/CUDAContextLight.h | 4 ++-- + aten/src/ATen/cuda/CublasHandlePool.cpp | 4 ++-- + aten/src/ATen/cuda/tunable/TunableGemm.h | 6 +++--- + aten/src/ATen/native/cuda/Blas.cpp | 14 ++++++++------ + cmake/Dependencies.cmake | 3 +++ + cmake/public/LoadHIP.cmake | 4 ++-- + 8 files changed, 25 insertions(+), 19 deletions(-) + +diff --git a/aten/src/ATen/cuda/CUDABlas.cpp b/aten/src/ATen/cuda/CUDABlas.cpp +index d534ec5a178..e815463f630 100644 +--- a/aten/src/ATen/cuda/CUDABlas.cpp ++++ b/aten/src/ATen/cuda/CUDABlas.cpp +@@ -14,7 +14,7 @@ + #include + + #ifdef USE_ROCM +-#if ROCM_VERSION >= 60000 ++#ifdef HIPBLASLT + #include + #endif + // until hipblas has an API to accept flags, we must use rocblas here +@@ -781,7 +781,7 @@ void gemm(CUDABLAS_GEMM_ARGTYPES(at::BFloat16)) { + } + } + +-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700) ++#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT)) + + #if defined(USE_ROCM) && ROCM_VERSION >= 50700 && ROCM_VERSION < 60000 + // only for rocm 5.7 where we first supported hipblaslt, it was difficult +@@ -912,6 +912,7 @@ class CuBlasLtMatmulPreference : public CuBlasLtDescriptor< + }; + } // namespace + ++#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT)) + template + void gemm_and_bias( + bool transpose_mat1, +@@ -1124,7 +1125,7 @@ template void gemm_and_bias( + at::BFloat16* result_ptr, + int64_t result_ld, + GEMMAndBiasActivationEpilogue activation); +- ++#endif + void scaled_gemm( + char transa, + char transb, +diff --git a/aten/src/ATen/cuda/CUDABlas.h b/aten/src/ATen/cuda/CUDABlas.h +index eb12bb350c5..068607467dd 100644 +--- a/aten/src/ATen/cuda/CUDABlas.h ++++ b/aten/src/ATen/cuda/CUDABlas.h +@@ -82,7 +82,7 @@ void gemm_internal(CUDABLAS_GEMM_ARGTYPES(at::Half)); + template <> + void gemm_internal(CUDABLAS_GEMM_ARGTYPES(at::BFloat16)); + +-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700) ++#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT)) + enum GEMMAndBiasActivationEpilogue { + None, + RELU, +diff --git a/aten/src/ATen/cuda/CUDAContextLight.h b/aten/src/ATen/cuda/CUDAContextLight.h +index 4ec35f59a21..e28dc42034f 100644 +--- a/aten/src/ATen/cuda/CUDAContextLight.h ++++ b/aten/src/ATen/cuda/CUDAContextLight.h +@@ -9,7 +9,7 @@ + + // cublasLT was introduced in CUDA 10.1 but we enable only for 11.1 that also + // added bf16 support +-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700) ++#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT)) + #include + #endif + +@@ -82,7 +82,7 @@ TORCH_CUDA_CPP_API c10::Allocator* getCUDADeviceAllocator(); + /* Handles */ + TORCH_CUDA_CPP_API cusparseHandle_t getCurrentCUDASparseHandle(); + TORCH_CUDA_CPP_API cublasHandle_t getCurrentCUDABlasHandle(); +-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700) ++#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT)) + TORCH_CUDA_CPP_API cublasLtHandle_t getCurrentCUDABlasLtHandle(); + #endif + +diff --git a/aten/src/ATen/cuda/CublasHandlePool.cpp b/aten/src/ATen/cuda/CublasHandlePool.cpp +index 6913d2cd95e..3d4276be372 100644 +--- a/aten/src/ATen/cuda/CublasHandlePool.cpp ++++ b/aten/src/ATen/cuda/CublasHandlePool.cpp +@@ -29,7 +29,7 @@ namespace at::cuda { + + namespace { + +-#if defined(USE_ROCM) && ROCM_VERSION >= 50700 ++#if defined(USE_ROCM) && defined(HIPBLASLT) + void createCublasLtHandle(cublasLtHandle_t *handle) { + TORCH_CUDABLAS_CHECK(cublasLtCreate(handle)); + } +@@ -190,7 +190,7 @@ cublasHandle_t getCurrentCUDABlasHandle() { + return handle; + } + +-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700) ++#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT)) + cublasLtHandle_t getCurrentCUDABlasLtHandle() { + #ifdef USE_ROCM + c10::DeviceIndex device = 0; +diff --git a/aten/src/ATen/cuda/tunable/TunableGemm.h b/aten/src/ATen/cuda/tunable/TunableGemm.h +index 3ba0d761277..dde1870cfbf 100644 +--- a/aten/src/ATen/cuda/tunable/TunableGemm.h ++++ b/aten/src/ATen/cuda/tunable/TunableGemm.h +@@ -11,7 +11,7 @@ + + #include + #ifdef USE_ROCM +-#if ROCM_VERSION >= 50700 ++#ifdef HIPBLASLT + #include + #endif + #include +@@ -166,7 +166,7 @@ class GemmTunableOp : public TunableOp, StreamTimer> { + } + #endif + +-#if defined(USE_ROCM) && ROCM_VERSION >= 50700 ++#if defined(USE_ROCM) && defined(HIPBLASLT) + static const char *env = std::getenv("PYTORCH_TUNABLEOP_HIPBLASLT_ENABLED"); + if (env == nullptr || strcmp(env, "1") == 0) { + // disallow tuning of hipblaslt with c10::complex +@@ -240,7 +240,7 @@ class GemmStridedBatchedTunableOp : public TunableOp + } + #endif + +-#if defined(USE_ROCM) && ROCM_VERSION >= 50700 ++#if defined(USE_ROCM) && defined(HIPBLASLT) + static const char *env = std::getenv("PYTORCH_TUNABLEOP_HIPBLASLT_ENABLED"); + if (env == nullptr || strcmp(env, "1") == 0) { + // disallow tuning of hipblaslt with c10::complex +diff --git a/aten/src/ATen/native/cuda/Blas.cpp b/aten/src/ATen/native/cuda/Blas.cpp +index 29e5c5e3cf1..df56f3d7f1d 100644 +--- a/aten/src/ATen/native/cuda/Blas.cpp ++++ b/aten/src/ATen/native/cuda/Blas.cpp +@@ -155,7 +155,7 @@ enum class Activation { + GELU, + }; + +-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700) ++#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT)) + cuda::blas::GEMMAndBiasActivationEpilogue activation_to_gemm_and_blas_arg(Activation a) { + switch (a) { + case Activation::None: +@@ -193,6 +193,7 @@ static bool getDisableAddmmCudaLt() { + + #ifdef USE_ROCM + static bool isSupportedHipLtROCmArch(int index) { ++#if defined(HIPBLASLT) + hipDeviceProp_t* prop = at::cuda::getDeviceProperties(index); + std::string device_arch = prop->gcnArchName; + static const std::vector archs = {"gfx90a", "gfx940", "gfx941", "gfx942"}; +@@ -203,6 +204,7 @@ static bool isSupportedHipLtROCmArch(int index) { + } + } + TORCH_CHECK(false, "Attempting to use hipBLASLt on a unsupported architecture!"); ++#endif + return false; + } + #endif +@@ -228,7 +230,7 @@ Tensor& addmm_out_cuda_impl(Tensor& result, const Tensor& self, const Tensor& ma + at::ScalarType scalar_type = self.scalar_type(); + c10::MaybeOwned self_; + if (&result != &self) { +-#if (defined(CUDA_VERSION) && CUDA_VERSION >= 11040 && !defined(_MSC_VER)) || defined(USE_ROCM) && ROCM_VERSION >= 50700 ++#if (defined(CUDA_VERSION) && CUDA_VERSION >= 11040 && !defined(_MSC_VER)) || defined(USE_ROCM) && defined(HIPBLASLT) + // Strangely, if mat2 has only 1 row or column, we get + // CUBLAS_STATUS_INVALID_VALUE error from cublasLtMatmulAlgoGetHeuristic. + // self.dim() == 1 && result.dim() == 2 && self.sizes()[0] == mat2_sizes[1] +@@ -271,7 +273,7 @@ Tensor& addmm_out_cuda_impl(Tensor& result, const Tensor& self, const Tensor& ma + } + self__sizes = self_->sizes(); + } else { +-#if defined(USE_ROCM) && ROCM_VERSION >= 50700 ++#if defined(USE_ROCM) && defined(HIPBLASLT) + useLtInterface = !disable_addmm_cuda_lt && + result.dim() == 2 && result.is_contiguous() && + isSupportedHipLtROCmArch(self.device().index()) && +@@ -322,7 +324,7 @@ Tensor& addmm_out_cuda_impl(Tensor& result, const Tensor& self, const Tensor& ma + + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(!args.result->is_conj()); + +-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700) ++#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT)) + if (useLtInterface) { + AT_DISPATCH_FLOATING_TYPES_AND2( + at::ScalarType::Half, +@@ -876,7 +878,7 @@ _scaled_mm_out_cuda(const Tensor& mat1, const Tensor& mat2, + at::native::resize_output(out, {mat1_sizes[0], mat2_sizes[1]}); + at::native::resize_output(amax, {}); + +-#if !defined(USE_ROCM) && !defined(_MSC_VER) || (defined(USE_ROCM) && ROCM_VERSION >= 60000) ++#if !defined(USE_ROCM) && !defined(_MSC_VER) || (defined(USE_ROCM) && defined(HIPBLASLT)) + cublasCommonArgs args(mat1, mat2, out); + const auto out_dtype_ = args.result->scalar_type(); + TORCH_CHECK(args.transa == 't' && args.transb == 'n', "Only multiplication of row-major and column-major matrices is supported by cuBLASLt"); +@@ -906,7 +908,7 @@ _scaled_mm_out_cuda(const Tensor& mat1, const Tensor& mat2, + TORCH_CHECK(false, "_scaled_mm_out_cuda is not compiled for this platform."); + #endif + +-#if defined(USE_ROCM) && ROCM_VERSION >= 60000 ++#if defined(USE_ROCM) && defined(HIPBLASLT) + // rocm's hipblaslt does not yet support amax, so calculate separately + auto out_float32 = out.to(kFloat); + out_float32.abs_(); +diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake +index b7ffbeb07dc..2b6c3678984 100644 +--- a/cmake/Dependencies.cmake ++++ b/cmake/Dependencies.cmake +@@ -1273,6 +1273,9 @@ if(USE_ROCM) + if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "6.0.0") + list(APPEND HIP_CXX_FLAGS -DHIPBLAS_V2) + endif() ++ if(hipblast_FOUND) ++ list(APPEND HIP_CXX_FLAGS -DHIPBLASLT) ++ endif() + if(HIPBLASLT_CUSTOM_DATA_TYPE) + list(APPEND HIP_CXX_FLAGS -DHIPBLASLT_CUSTOM_DATA_TYPE) + endif() +diff --git a/cmake/public/LoadHIP.cmake b/cmake/public/LoadHIP.cmake +index f6ca263c5e5..53eb0b63c1a 100644 +--- a/cmake/public/LoadHIP.cmake ++++ b/cmake/public/LoadHIP.cmake +@@ -156,7 +156,7 @@ if(HIP_FOUND) + find_package_and_print_version(rocblas REQUIRED) + find_package_and_print_version(hipblas REQUIRED) + if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "5.7.0") +- find_package_and_print_version(hipblaslt REQUIRED) ++ find_package_and_print_version(hipblaslt) + endif() + find_package_and_print_version(miopen REQUIRED) + if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "4.1.0") +@@ -191,7 +191,7 @@ if(HIP_FOUND) + # roctx is part of roctracer + find_library(ROCM_ROCTX_LIB roctx64 HINTS ${ROCM_PATH}/lib) + +- if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "5.7.0") ++ if(hipblastlt_FOUND) + # check whether hipblaslt is using its own datatype + set(file "${PROJECT_BINARY_DIR}/hipblaslt_test_data_type.cc") + file(WRITE ${file} "" +-- +2.43.2 + diff --git a/0001-Reenable-dim-for-python-3.12.patch b/0001-Reenable-dim-for-python-3.12.patch new file mode 100644 index 0000000..138b5d4 --- /dev/null +++ b/0001-Reenable-dim-for-python-3.12.patch @@ -0,0 +1,115 @@ +From ee3fb343a376cdba6f4ce188cac90023f13e2aea Mon Sep 17 00:00:00 2001 +From: Tom Rix +Date: Thu, 4 Apr 2024 14:21:38 -0600 +Subject: [PATCH] Reenable dim for python 3.12 + +In 3.12: + +_PyArg_Parser added an element to the start of the structure. +So existing positional initialization is off. Switch to element +initialization. + +_Py_CODEUNIT changed to from an int to a union, but relevant_op +is passed an int for the return of decoder.opcode, so the parameter +type is wrong, switch it to int. + +The opcode PRECALL was removed, so reduce its handling to 3.11 + +Signed-off-by: Tom Rix +--- + functorch/csrc/dim/dim.cpp | 24 +++++------------------- + functorch/csrc/dim/minpybind.h | 4 ++-- + 2 files changed, 7 insertions(+), 21 deletions(-) + +diff --git a/functorch/csrc/dim/dim.cpp b/functorch/csrc/dim/dim.cpp +index 4cc027504c77..e48b0d58081f 100644 +--- a/functorch/csrc/dim/dim.cpp ++++ b/functorch/csrc/dim/dim.cpp +@@ -6,20 +6,6 @@ + + #include + +- +-// Many APIs have changed/don't exist anymore +-#if IS_PYTHON_3_12_PLUS +- +-#include "dim.h" +- +-// Re-enable this some day +-PyObject* Dim_init() { +- PyErr_SetString(PyExc_RuntimeError, "First class dim doesn't work with python 3.12"); +- return nullptr; +-} +- +-#else +- + #include "minpybind.h" + #include + #include +@@ -441,7 +427,7 @@ static PyObject* DimList_bind(DimList *self, + PY_BEGIN + mpy::handle sizes; + static const char * const _keywords[] = {"sizes", nullptr}; +- static _PyArg_Parser parser = {"O", _keywords, 0}; ++ static _PyArg_Parser parser = { .format = "O", .keywords = _keywords}; + if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &parser, &sizes)) { + return nullptr; + } +@@ -465,7 +451,7 @@ static PyObject* DimList_bind_len(DimList *self, + PY_BEGIN + int size; + static const char * const _keywords[] = {"N", nullptr}; +- static _PyArg_Parser parser = {"i", _keywords, 0}; ++ static _PyArg_Parser parser = { .format = "i", .keywords = _keywords}; + if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &parser, &size)) { + return nullptr; + } +@@ -1468,7 +1454,7 @@ PyTypeObject Tensor::Type = { + + // dim() -------------------- + +-static bool relevant_op(_Py_CODEUNIT c) { ++static bool relevant_op(int c) { + switch(c) { + case STORE_NAME: + case STORE_GLOBAL: +@@ -1587,7 +1573,7 @@ static PyObject* _dims(PyObject *self, + auto c = mpy::obj::steal(PyFrame_GetCode(f.ptr())); + auto lasti = PyFrame_GetLasti(f.ptr()); + auto decoder = PyInstDecoder(c.ptr(), lasti); +- #if IS_PYTHON_3_11_PLUS ++ #if IS_PYTHON_3_11 + // When py3.11 adapts bytecode lasti points to the precall + // rather than the call instruction after it + if (decoder.opcode() == PRECALL) { +@@ -3268,4 +3254,4 @@ PyObject* Dim_init() { + } + } + +-#endif ++ +diff --git a/functorch/csrc/dim/minpybind.h b/functorch/csrc/dim/minpybind.h +index de82b5af95a4..d76d4828bf80 100644 +--- a/functorch/csrc/dim/minpybind.h ++++ b/functorch/csrc/dim/minpybind.h +@@ -621,7 +621,7 @@ struct vector_args { + PyObject *dummy = NULL; + _PyArg_ParseStackAndKeywords((PyObject*const*)args, nargs, kwnames.ptr(), _parser, &dummy, &dummy, &dummy, &dummy, &dummy); + #else +- _PyArg_Parser* _parser = new _PyArg_Parser{NULL, &names_buf[0], fname_cstr, 0}; ++ _PyArg_Parser* _parser = new _PyArg_Parser{ .keywords = &names_buf[0], .fname = fname_cstr}; + std::unique_ptr buf(new PyObject*[names.size()]); + _PyArg_UnpackKeywords((PyObject*const*)args, nargs, NULL, kwnames.ptr(), _parser, required, (Py_ssize_t)values.size() - kwonly, 0, &buf[0]); + #endif +@@ -706,7 +706,7 @@ inline object handle::call_vector(vector_args args) { + #define MPY_PARSE_ARGS_KWNAMES(fmt, FORALL_ARGS) \ + static const char * const kwlist[] = { FORALL_ARGS(MPY_ARGS_NAME) nullptr}; \ + FORALL_ARGS(MPY_ARGS_DECLARE) \ +- static _PyArg_Parser parser = {fmt, kwlist, 0}; \ ++ static _PyArg_Parser parser = { .format = fmt, .keywords = kwlist}; \ + if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &parser, FORALL_ARGS(MPY_ARGS_POINTER) nullptr)) { \ + throw mpy::exception_set(); \ + } +-- +2.44.0 + diff --git a/0001-Regenerate-flatbuffer-header.patch b/0001-Regenerate-flatbuffer-header.patch new file mode 100644 index 0000000..4eec491 --- /dev/null +++ b/0001-Regenerate-flatbuffer-header.patch @@ -0,0 +1,39 @@ +From 5b8e51b24513fa851eeff42f23d942bde301e321 Mon Sep 17 00:00:00 2001 +From: Tom Rix +Date: Fri, 29 Sep 2023 06:19:29 -0700 +Subject: [PATCH] Regenerate flatbuffer header + +For this error +torch/csrc/jit/serialization/mobile_bytecode_generated.h:12:41: +error: static assertion failed: Non-compatible flatbuffers version included + 12 | FLATBUFFERS_VERSION_MINOR == 3 && + +PyTorch is expecting 23.3.3, what f38 has +Rawhide is at 23.5.26 + +Regenerate with +flatc --cpp --gen-mutable --no-prefix --scoped-enums mobile_bytecode.fbs + +Signed-off-by: Tom Rix +--- + torch/csrc/jit/serialization/mobile_bytecode_generated.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/torch/csrc/jit/serialization/mobile_bytecode_generated.h b/torch/csrc/jit/serialization/mobile_bytecode_generated.h +index cffe8bc7a6..83575e4c19 100644 +--- a/torch/csrc/jit/serialization/mobile_bytecode_generated.h ++++ b/torch/csrc/jit/serialization/mobile_bytecode_generated.h +@@ -9,8 +9,8 @@ + // Ensure the included flatbuffers.h is the same version as when this file was + // generated, otherwise it may not be compatible. + static_assert(FLATBUFFERS_VERSION_MAJOR == 23 && +- FLATBUFFERS_VERSION_MINOR == 3 && +- FLATBUFFERS_VERSION_REVISION == 3, ++ FLATBUFFERS_VERSION_MINOR == 5 && ++ FLATBUFFERS_VERSION_REVISION == 26, + "Non-compatible flatbuffers version included"); + + namespace torch { +-- +2.43.0 + diff --git a/0001-Stub-in-kineto-ActivityType.patch b/0001-Stub-in-kineto-ActivityType.patch new file mode 100644 index 0000000..f088645 --- /dev/null +++ b/0001-Stub-in-kineto-ActivityType.patch @@ -0,0 +1,73 @@ +From 3ef82b814179da571b2478f61d4279717ab0b23a Mon Sep 17 00:00:00 2001 +From: Tom Rix +Date: Fri, 29 Sep 2023 06:25:23 -0700 +Subject: [PATCH] Stub in kineto ActivityType + +There is an error with kineto is not used, the shim still +requires the ActivityTYpe.h header to get the enum Activity type. +So cut-n-paste just enough of the header in to do this. + +Signed-off-by: Tom Rix +--- + torch/csrc/profiler/kineto_shim.h | 44 +++++++++++++++++++++++++++++++ + 1 file changed, 44 insertions(+) + +diff --git a/torch/csrc/profiler/kineto_shim.h b/torch/csrc/profiler/kineto_shim.h +index e92cbf003d..68985ab7d0 100644 +--- a/torch/csrc/profiler/kineto_shim.h ++++ b/torch/csrc/profiler/kineto_shim.h +@@ -12,7 +12,51 @@ + #undef USE_KINETO + #endif + ++#ifdef USE_KINETO + #include ++#else ++namespace libkineto { ++// copied from header ++/* ++ * Copyright (c) Meta Platforms, Inc. and affiliates. ++ * All rights reserved. ++ * ++ * This source code is licensed under the BSD-style license found in the ++ * LICENSE file in the root directory of this source tree. ++ */ ++ ++// Note : All activity types are not enabled by default. Please add them ++// at correct position in the enum ++enum class ActivityType { ++ // Activity types enabled by default ++ CPU_OP = 0, // cpu side ops ++ USER_ANNOTATION, ++ GPU_USER_ANNOTATION, ++ GPU_MEMCPY, ++ GPU_MEMSET, ++ CONCURRENT_KERNEL, // on-device kernels ++ EXTERNAL_CORRELATION, ++ CUDA_RUNTIME, // host side cuda runtime events ++ CUDA_DRIVER, // host side cuda driver events ++ CPU_INSTANT_EVENT, // host side point-like events ++ PYTHON_FUNCTION, ++ OVERHEAD, // CUPTI induced overhead events sampled from its overhead API. ++ ++ // Optional Activity types ++ CUDA_SYNC, // synchronization events between runtime and kernels ++ GLOW_RUNTIME, // host side glow runtime events ++ MTIA_RUNTIME, // host side MTIA runtime events ++ CUDA_PROFILER_RANGE, // CUPTI Profiler range for performance metrics ++ MTIA_CCP_EVENTS, // MTIA ondevice CCP events ++ HPU_OP, // HPU host side runtime event ++ XPU_RUNTIME, // host side xpu runtime events ++ ++ ENUM_COUNT, // This is to add buffer and not used for any profiling logic. Add your new type before it. ++ OPTIONAL_ACTIVITY_TYPE_START = CUDA_SYNC, ++}; ++} ++ ++#endif + + #include + #include +-- +2.43.0 + diff --git a/0001-can-not-use-with-c-files.patch b/0001-can-not-use-with-c-files.patch new file mode 100644 index 0000000..719737c --- /dev/null +++ b/0001-can-not-use-with-c-files.patch @@ -0,0 +1,25 @@ +From a5dff521691a17701b5a02ec75e84cfe1bf605f7 Mon Sep 17 00:00:00 2001 +From: Tom Rix +Date: Sat, 3 Feb 2024 06:41:49 -0500 +Subject: [PATCH] can not use with c files + +--- + cmake/Dependencies.cmake | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake +index 4dd8042058..5f91f3ffab 100644 +--- a/cmake/Dependencies.cmake ++++ b/cmake/Dependencies.cmake +@@ -1269,7 +1269,7 @@ if(USE_ROCM) + list(APPEND HIP_CXX_FLAGS -Wno-duplicate-decl-specifier) + list(APPEND HIP_CXX_FLAGS -DCAFFE2_USE_MIOPEN) + list(APPEND HIP_CXX_FLAGS -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_HIP) +- list(APPEND HIP_CXX_FLAGS -std=c++17) ++# list(APPEND HIP_CXX_FLAGS -std=c++17) + if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "6.0.0") + list(APPEND HIP_CXX_FLAGS -DHIPBLAS_V2) + endif() +-- +2.43.0 + diff --git a/0001-cuda-hip-signatures.patch b/0001-cuda-hip-signatures.patch new file mode 100644 index 0000000..a258737 --- /dev/null +++ b/0001-cuda-hip-signatures.patch @@ -0,0 +1,42 @@ +From 214dc959acc809e1959643272c344ee5335d5a69 Mon Sep 17 00:00:00 2001 +From: Tom Rix +Date: Thu, 1 Feb 2024 11:29:47 -0500 +Subject: [PATCH] cuda - hip signatures + +--- + aten/src/ATen/cuda/detail/LazyNVRTC.cpp | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/aten/src/ATen/cuda/detail/LazyNVRTC.cpp b/aten/src/ATen/cuda/detail/LazyNVRTC.cpp +index 1b85e7776e..bb6f88783a 100644 +--- a/aten/src/ATen/cuda/detail/LazyNVRTC.cpp ++++ b/aten/src/ATen/cuda/detail/LazyNVRTC.cpp +@@ -134,8 +134,13 @@ nvrtcResult nvrtcCreateProgram(nvrtcProgram *prog, + const char *src, + const char *name, + int numHeaders, ++#if !defined(USE_ROCM) + const char * const *headers, + const char * const *includeNames) { ++#else ++ const char **headers, ++ const char **includeNames) { ++#endif + auto fn = reinterpret_cast(getNVRTCLibrary().sym(__func__)); + if (!fn) + throw std::runtime_error("Can't get nvrtcCreateProgram"); +@@ -150,7 +155,11 @@ NVRTC_STUB2(nvrtcGetPTX, nvrtcProgram, char *); + NVRTC_STUB2(nvrtcGetCUBINSize, nvrtcProgram, size_t *); + NVRTC_STUB2(nvrtcGetCUBIN, nvrtcProgram, char *); + #endif ++#if !defined(USE_ROCM) + NVRTC_STUB3(nvrtcCompileProgram, nvrtcProgram, int, const char * const *); ++#else ++NVRTC_STUB3(nvrtcCompileProgram, nvrtcProgram, int, const char **); ++#endif + _STUB_1(NVRTC, nvrtcGetErrorString, const char *, nvrtcResult); + NVRTC_STUB2(nvrtcGetProgramLogSize,nvrtcProgram, size_t*); + NVRTC_STUB2(nvrtcGetProgramLog, nvrtcProgram, char *); +-- +2.43.0 + diff --git a/0001-disable-submodule-search.patch b/0001-disable-submodule-search.patch new file mode 100644 index 0000000..b830fa6 --- /dev/null +++ b/0001-disable-submodule-search.patch @@ -0,0 +1,25 @@ +From e0b0ea90ecc0dbefc6aef2650e88ba88260935b9 Mon Sep 17 00:00:00 2001 +From: Tom Rix +Date: Fri, 29 Sep 2023 17:21:13 -0700 +Subject: [PATCH] disable submodule search + +--- + setup.py | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/setup.py b/setup.py +index 0fd886d945..e397df8fb6 100644 +--- a/setup.py ++++ b/setup.py +@@ -458,7 +458,7 @@ def mirror_files_into_torchgen(): + def build_deps(): + report("-- Building version " + version) + +- check_submodules() ++ # check_submodules() + check_pydep("yaml", "pyyaml") + + build_caffe2( +-- +2.43.0 + diff --git a/0001-disable-use-of-aotriton.patch b/0001-disable-use-of-aotriton.patch new file mode 100644 index 0000000..34a1704 --- /dev/null +++ b/0001-disable-use-of-aotriton.patch @@ -0,0 +1,46 @@ +From 33d48f71db7530f00dbd8cff281b65aa8b355b2a Mon Sep 17 00:00:00 2001 +From: Tom Rix +Date: Tue, 19 Mar 2024 11:32:37 -0400 +Subject: [PATCH] disable use of aotriton + +--- + aten/src/ATen/native/transformers/cuda/sdp_utils.cpp | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp b/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp +index 96b839820efd..2d3dd0cb4b0f 100644 +--- a/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp ++++ b/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp +@@ -21,9 +21,11 @@ + #include + #include + ++#ifdef USE_FLASH_ATTENTION + #if USE_ROCM + #include + #endif ++#endif + + /** + * Note [SDPA Runtime Dispatch] +@@ -183,6 +185,7 @@ bool check_sm_version(cudaDeviceProp * dprops) { + } + + bool check_flash_attention_hardware_support(sdp_params const& params, bool debug) { ++#ifdef USE_FLASH_ATTENTION + // Check that the gpu is capable of running flash attention + using sm80 = SMVersion<8, 0>; + using sm90 = SMVersion<9, 0>; +@@ -211,6 +214,9 @@ bool check_flash_attention_hardware_support(sdp_params const& params, bool debug + } + #endif + return true; ++#else ++ return false; ++#endif + } + + bool check_mem_efficient_hardware_support(sdp_params const& params, bool debug) { +-- +2.44.0 + diff --git a/0001-dynamo-3.12-enable-dynamo-on-3.12-enable-most-dynamo.patch b/0001-dynamo-3.12-enable-dynamo-on-3.12-enable-most-dynamo.patch new file mode 100644 index 0000000..0ce5b1f --- /dev/null +++ b/0001-dynamo-3.12-enable-dynamo-on-3.12-enable-most-dynamo.patch @@ -0,0 +1,226 @@ +From b9d45eb1cc90696a4de76676221219e24423c709 Mon Sep 17 00:00:00 2001 +From: William Wen +Date: Wed, 3 Apr 2024 17:58:46 -0700 +Subject: [PATCH] [dynamo, 3.12] enable dynamo on 3.12, enable most dynamo + unittests on 3.12 (#123216) + +Pull Request resolved: https://github.com/pytorch/pytorch/pull/123216 +Approved by: https://github.com/jansel, https://github.com/malfet +--- + test/dynamo/test_autograd_function.py | 3 ++ + test/dynamo/test_misc.py | 63 +++++++++++++++++++++++++ + test/functorch/test_eager_transforms.py | 7 ++- + test/run_test.py | 3 -- + torch/__init__.py | 5 +- + torch/_dynamo/eval_frame.py | 4 +- + torch/_dynamo/test_case.py | 8 +--- + 7 files changed, 74 insertions(+), 19 deletions(-) + +diff --git a/test/dynamo/test_autograd_function.py b/test/dynamo/test_autograd_function.py +index d23fec607afa..bc5ebc767038 100644 +--- a/test/dynamo/test_autograd_function.py ++++ b/test/dynamo/test_autograd_function.py +@@ -2,6 +2,8 @@ + + import copy + import math ++import sys ++import unittest + + import torch + +@@ -528,6 +530,7 @@ class AutogradFunctionTests(torch._dynamo.test_case.TestCase): + # I pulled all of these test cases from test_autograd.py + # In the future, we should make the Dynamo test suite actually + # run on test_autograd.py (it's disabled right now) and delete these. ++ @unittest.skipIf(sys.version_info >= (3, 12), "invalid free in 3.12+") + def test_smoke_from_test_autograd(self): + class Func(torch.autograd.Function): + @staticmethod +diff --git a/test/dynamo/test_misc.py b/test/dynamo/test_misc.py +index a73de8b1c7e9..8f54e0564e6b 100644 +--- a/test/dynamo/test_misc.py ++++ b/test/dynamo/test_misc.py +@@ -9760,6 +9760,69 @@ fn + lambda mod: mod, + ) + ++ @xfailIfPy311 ++ def test_outside_linear_module_free(self): ++ # Compared to test_linear_module_free, the linear ++ # layer is not the code object that is directly compiled. ++ def model_inp_ctr(): ++ fc = torch.nn.Linear(100, 100) ++ ++ class Mod(torch.nn.Module): ++ def __init__(self): ++ super().__init__() ++ self.fc_ref = fc ++ ++ def forward(self, x): ++ return fc(x[0]) ++ ++ # return fc to keep it alive in _test_compile_model_free ++ return Mod(), (torch.randn(100, 100), fc) ++ ++ self._test_compile_model_free(model_inp_ctr, lambda mod: mod.fc_ref) ++ ++ @unittest.skipIf(sys.version_info >= (3, 12), "leaks in 3.12+") ++ def test_parameter_free(self): ++ def model_inp_ctr(): ++ param = torch.nn.Parameter(torch.randn(100, 100)) ++ ++ class Mod(torch.nn.Module): ++ def __init__(self): ++ super().__init__() ++ self.param = param ++ ++ def forward(self, x): ++ return self.param * x[0] ++ ++ # return param to keep it alive in _test_compile_model_free ++ return Mod(), (torch.randn(100, 100), param) ++ ++ self._test_compile_model_free(model_inp_ctr, lambda mod: mod.param) ++ ++ def test_raises_importerror1(self): ++ @torch.compile(backend="eager") ++ def fn(x): ++ try: ++ import some_module_that_surely_does_not_exist ++ ++ return ++ except ImportError: ++ pass ++ return x.sin() ++ ++ x = torch.randn(8) ++ self.assertEqual(fn(x), x.sin()) ++ ++ def test_raises_importerror2(self): ++ @torch.compile(backend="eager") ++ def fn(x): ++ import some_module_that_surely_does_not_exist ++ ++ return x + 1 ++ ++ x = torch.randn(8) ++ with self.assertRaises(ImportError): ++ fn(x) ++ + def test_dynamo_cache_move_to_front(self): + class Mod(torch.nn.Module): + def __init__(self): +diff --git a/test/functorch/test_eager_transforms.py b/test/functorch/test_eager_transforms.py +index 09415cf8f48e..60790ec06059 100644 +--- a/test/functorch/test_eager_transforms.py ++++ b/test/functorch/test_eager_transforms.py +@@ -4762,8 +4762,7 @@ class TestCompileTransforms(TestCase): + # Triton only supports GPU with SM70 or later. + @expectedFailureIf((IS_ARM64 and not IS_MACOS) or + IS_WINDOWS or +- (TEST_CUDA and not SM70OrLater) or +- (sys.version_info >= (3, 12))) ++ (TEST_CUDA and not SM70OrLater)) + def test_compile_vmap_hessian(self, device): + # The model and inputs are a smaller version + # of code at benchmark repo: +@@ -4792,8 +4791,8 @@ class TestCompileTransforms(TestCase): + actual = opt_fn(params_and_buffers, x) + self.assertEqual(actual, expected) + +- # torch.compile is not supported on Windows or on Python 3.12+ +- @expectedFailureIf(IS_WINDOWS or (sys.version_info >= (3, 12))) ++ # torch.compile is not supported on Windows ++ @expectedFailureIf(IS_WINDOWS) + @torch._dynamo.config.patch(suppress_errors=False) + @torch._dynamo.config.patch(capture_func_transforms=True) + @skipIfTorchDynamo("Do not test torch.compile on top of torch.compile") +diff --git a/test/run_test.py b/test/run_test.py +index e86af9623042..ebb14df4167d 100755 +--- a/test/run_test.py ++++ b/test/run_test.py +@@ -74,7 +74,6 @@ sys.path.remove(str(REPO_ROOT)) + RERUN_DISABLED_TESTS = os.getenv("PYTORCH_TEST_RERUN_DISABLED_TESTS", "0") == "1" + DISTRIBUTED_TEST_PREFIX = "distributed" + INDUCTOR_TEST_PREFIX = "inductor" +-DYNAMO_TEST_PREFIX = "dynamo" + + + # Note [ROCm parallel CI testing] +@@ -324,7 +323,6 @@ JIT_EXECUTOR_TESTS = [ + ] + + INDUCTOR_TESTS = [test for test in TESTS if test.startswith(INDUCTOR_TEST_PREFIX)] +-DYNAMO_TESTS = [test for test in TESTS if test.startswith(DYNAMO_TEST_PREFIX)] + DISTRIBUTED_TESTS = [test for test in TESTS if test.startswith(DISTRIBUTED_TEST_PREFIX)] + TORCH_EXPORT_TESTS = [test for test in TESTS if test.startswith("export")] + FUNCTORCH_TESTS = [test for test in TESTS if test.startswith("functorch")] +@@ -1361,7 +1359,6 @@ def get_selected_tests(options) -> List[str]: + # these tests failing in Python 3.12 temporarily disabling + if sys.version_info >= (3, 12): + options.exclude.extend(INDUCTOR_TESTS) +- options.exclude.extend(DYNAMO_TESTS) + options.exclude.extend( + [ + "functorch/test_dims", +diff --git a/torch/__init__.py b/torch/__init__.py +index d381712b4a35..26cdffe81d29 100644 +--- a/torch/__init__.py ++++ b/torch/__init__.py +@@ -1861,9 +1861,8 @@ def compile(model: Optional[Callable] = None, *, + + """ + _C._log_api_usage_once("torch.compile") +- # Temporary until we get proper support for python 3.12 +- if sys.version_info >= (3, 12): +- raise RuntimeError("Dynamo is not supported on Python 3.12+") ++ if sys.version_info >= (3, 13): ++ raise RuntimeError("Dynamo is not supported on Python 3.13+") + + # Decorator mode + if model is None: +diff --git a/torch/_dynamo/eval_frame.py b/torch/_dynamo/eval_frame.py +index 53ab0df3a947..0a80eeea99ed 100644 +--- a/torch/_dynamo/eval_frame.py ++++ b/torch/_dynamo/eval_frame.py +@@ -589,8 +589,8 @@ class _NullDecorator(contextlib.nullcontext): # type: ignore[type-arg] + + + def check_if_dynamo_supported(): +- if sys.version_info >= (3, 12): +- raise RuntimeError("Python 3.12+ not yet supported for torch.compile") ++ if sys.version_info >= (3, 13): ++ raise RuntimeError("Python 3.13+ not yet supported for torch.compile") + + + def is_dynamo_supported(): +diff --git a/torch/_dynamo/test_case.py b/torch/_dynamo/test_case.py +index e3cbef09eaae..297ea6e2bc2a 100644 +--- a/torch/_dynamo/test_case.py ++++ b/torch/_dynamo/test_case.py +@@ -1,7 +1,6 @@ + import contextlib + import importlib + import logging +-import sys + + import torch + import torch.testing +@@ -20,12 +19,7 @@ log = logging.getLogger(__name__) + def run_tests(needs=()): + from torch.testing._internal.common_utils import run_tests + +- if ( +- TEST_WITH_TORCHDYNAMO +- or IS_WINDOWS +- or TEST_WITH_CROSSREF +- or sys.version_info >= (3, 12) +- ): ++ if TEST_WITH_TORCHDYNAMO or IS_WINDOWS or TEST_WITH_CROSSREF: + return # skip testing + + if isinstance(needs, str): +-- +2.44.0 + diff --git a/0001-no-third_party-FXdiv.patch b/0001-no-third_party-FXdiv.patch new file mode 100644 index 0000000..71404e3 --- /dev/null +++ b/0001-no-third_party-FXdiv.patch @@ -0,0 +1,54 @@ +From b3b307add5724ee5730f161e16594fa702f34a19 Mon Sep 17 00:00:00 2001 +From: Tom Rix +Date: Sat, 3 Feb 2024 08:20:28 -0500 +Subject: [PATCH] no third_party FXdiv + +--- + caffe2/CMakeLists.txt | 24 ++++++++++++------------ + 1 file changed, 12 insertions(+), 12 deletions(-) + +diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt +index b2f3adbfae..80a5625c8d 100644 +--- a/caffe2/CMakeLists.txt ++++ b/caffe2/CMakeLists.txt +@@ -110,15 +110,15 @@ endif() + # Note: the folders that are being commented out have not been properly + # addressed yet. + +-if(NOT MSVC AND USE_XNNPACK) +- if(NOT TARGET fxdiv) +- set(FXDIV_BUILD_TESTS OFF CACHE BOOL "") +- set(FXDIV_BUILD_BENCHMARKS OFF CACHE BOOL "") +- add_subdirectory( +- "${FXDIV_SOURCE_DIR}" +- "${CMAKE_BINARY_DIR}/FXdiv") +- endif() +-endif() ++#if(NOT MSVC AND USE_XNNPACK) ++# if(NOT TARGET fxdiv) ++# set(FXDIV_BUILD_TESTS OFF CACHE BOOL "") ++# set(FXDIV_BUILD_BENCHMARKS OFF CACHE BOOL "") ++# add_subdirectory( ++# "${FXDIV_SOURCE_DIR}" ++# "${CMAKE_BINARY_DIR}/FXdiv") ++# endif() ++#endif() + + add_subdirectory(core) + add_subdirectory(serialize) +@@ -1081,9 +1081,9 @@ if(USE_XPU) + target_compile_definitions(torch_xpu PRIVATE USE_XPU) + endif() + +-if(NOT MSVC AND USE_XNNPACK) +- TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv) +-endif() ++#if(NOT MSVC AND USE_XNNPACK) ++# TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv) ++#endif() + + # ========================================================== + # formerly-libtorch flags +-- +2.43.0 + diff --git a/0001-no-third_party-fmt.patch b/0001-no-third_party-fmt.patch new file mode 100644 index 0000000..6e82af2 --- /dev/null +++ b/0001-no-third_party-fmt.patch @@ -0,0 +1,65 @@ +From 2ce255b75760a0a513fb1706629b416f76a5c822 Mon Sep 17 00:00:00 2001 +From: Tom Rix +Date: Sat, 3 Feb 2024 08:16:04 -0500 +Subject: [PATCH] no third_party fmt + +--- + c10/CMakeLists.txt | 2 +- + cmake/Dependencies.cmake | 6 +++--- + torch/CMakeLists.txt | 2 +- + 3 files changed, 5 insertions(+), 5 deletions(-) + +diff --git a/c10/CMakeLists.txt b/c10/CMakeLists.txt +index 1f742f4c176..4fa08913bdd 100644 +--- a/c10/CMakeLists.txt ++++ b/c10/CMakeLists.txt +@@ -87,7 +87,7 @@ endif() + if(C10_USE_GLOG) + target_link_libraries(c10 PUBLIC glog::glog) + endif() +-target_link_libraries(c10 PRIVATE fmt::fmt-header-only) ++target_link_libraries(c10 PRIVATE fmt) + + if(C10_USE_NUMA) + message(STATUS "NUMA paths:") +diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake +index 6f5a2d5feff..42fbf80f6e8 100644 +--- a/cmake/Dependencies.cmake ++++ b/cmake/Dependencies.cmake +@@ -1837,7 +1837,7 @@ endif() + # + set(TEMP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) + set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libs" FORCE) +-add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt) ++# add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt) + + # Disable compiler feature checks for `fmt`. + # +@@ -1846,9 +1846,9 @@ add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt) + # CMAKE_CXX_FLAGS in ways that break feature checks. Since we already know + # `fmt` is compatible with a superset of the compilers that PyTorch is, it + # shouldn't be too bad to just disable the checks. +-set_target_properties(fmt-header-only PROPERTIES INTERFACE_COMPILE_FEATURES "") ++# set_target_properties(fmt-header-only PROPERTIES INTERFACE_COMPILE_FEATURES "") + +-list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only) ++# list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only) + set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS} CACHE BOOL "Build shared libs" FORCE) + + # ---[ Kineto +diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt +index 97a72eed55b..9e5014d1980 100644 +--- a/torch/CMakeLists.txt ++++ b/torch/CMakeLists.txt +@@ -80,7 +80,7 @@ set(TORCH_PYTHON_LINK_LIBRARIES + python::python + pybind::pybind11 + shm +- fmt::fmt-header-only ++ fmt + ATEN_CPU_FILES_GEN_LIB) + + if(USE_ASAN AND TARGET Sanitizer::address) +-- +2.43.2 + diff --git a/0001-no-third_party-foxi.patch b/0001-no-third_party-foxi.patch new file mode 100644 index 0000000..ba1ec40 --- /dev/null +++ b/0001-no-third_party-foxi.patch @@ -0,0 +1,36 @@ +From 8cb61cf9282102ac225645fcc9fb4a1bb7cb15a2 Mon Sep 17 00:00:00 2001 +From: Tom Rix +Date: Sat, 3 Feb 2024 08:11:55 -0500 +Subject: [PATCH] no third_party foxi + +--- + cmake/Dependencies.cmake | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake +index 5f91f3ffab..8e1461af81 100644 +--- a/cmake/Dependencies.cmake ++++ b/cmake/Dependencies.cmake +@@ -1567,7 +1567,7 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX) + set_target_properties(onnx_proto PROPERTIES CXX_STANDARD 17) + endif() + endif() +- add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/foxi EXCLUDE_FROM_ALL) ++ # add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/foxi EXCLUDE_FROM_ALL) + + add_definitions(-DONNX_NAMESPACE=${ONNX_NAMESPACE}) + if(NOT USE_SYSTEM_ONNX) +@@ -1600,8 +1600,8 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX) + message("-- Found onnx: ${ONNX_LIBRARY} ${ONNX_PROTO_LIBRARY}") + list(APPEND Caffe2_DEPENDENCY_LIBS onnx_proto onnx) + endif() +- include_directories(${FOXI_INCLUDE_DIRS}) +- list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader) ++# include_directories(${FOXI_INCLUDE_DIRS}) ++# list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader) + # Recover the build shared libs option. + set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS}) + endif() +-- +2.43.0 + diff --git a/0001-reenable-foxi-linking.patch b/0001-reenable-foxi-linking.patch new file mode 100644 index 0000000..8e39795 --- /dev/null +++ b/0001-reenable-foxi-linking.patch @@ -0,0 +1,25 @@ +From 58ccda271e8f51c3fa5b7518cf6ee52ce204fd37 Mon Sep 17 00:00:00 2001 +From: Tom Rix +Date: Thu, 22 Feb 2024 09:28:11 -0500 +Subject: [PATCH] reenable foxi linking + +--- + cmake/Dependencies.cmake | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake +index 42fbf80f6e8..bc3a2dc6fee 100644 +--- a/cmake/Dependencies.cmake ++++ b/cmake/Dependencies.cmake +@@ -1604,7 +1604,7 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX) + list(APPEND Caffe2_DEPENDENCY_LIBS onnx_proto onnx) + endif() + # include_directories(${FOXI_INCLUDE_DIRS}) +-# list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader) ++ list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader) + # Recover the build shared libs option. + set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS}) + endif() +-- +2.43.2 + diff --git a/0001-silence-an-assert.patch b/0001-silence-an-assert.patch new file mode 100644 index 0000000..0b20dcf --- /dev/null +++ b/0001-silence-an-assert.patch @@ -0,0 +1,25 @@ +From 04dd33db93b852fdfd7ea408813080b2e2026650 Mon Sep 17 00:00:00 2001 +From: Tom Rix +Date: Sat, 3 Feb 2024 06:41:20 -0500 +Subject: [PATCH] silence an assert + +--- + aten/src/ATen/native/cuda/IndexKernel.cu | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/aten/src/ATen/native/cuda/IndexKernel.cu b/aten/src/ATen/native/cuda/IndexKernel.cu +index 657c0c77b3..b406aa6687 100644 +--- a/aten/src/ATen/native/cuda/IndexKernel.cu ++++ b/aten/src/ATen/native/cuda/IndexKernel.cu +@@ -249,7 +249,7 @@ void index_put_kernel_quantized_cuda(TensorIterator& iter, const IntArrayRef ind + + gpu_index_kernel(iter, index_size, index_stride, [inv_scale, zero_point, qmin, qmax]C10_DEVICE(char* const out_data, const char* const in_data, const int64_t offset) { + int64_t qvalue = static_cast(zero_point + nearbyintf(*(float*)in_data * inv_scale)); +- qvalue = std::clamp(qvalue, qmin, qmax); ++ //qvalue = std::clamp(qvalue, qmin, qmax); + *(scalar_t*)(out_data + offset) = static_cast(qvalue); + }); + }); +-- +2.43.0 + diff --git a/0001-use-any-hip.patch b/0001-use-any-hip.patch new file mode 100644 index 0000000..dca86ea --- /dev/null +++ b/0001-use-any-hip.patch @@ -0,0 +1,34 @@ +From 4248211ce9a9de81bb3ade5d421ba709b19ead08 Mon Sep 17 00:00:00 2001 +From: Tom Rix +Date: Sat, 3 Feb 2024 15:01:28 -0500 +Subject: [PATCH] use any hip + +--- + cmake/public/LoadHIP.cmake | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/cmake/public/LoadHIP.cmake b/cmake/public/LoadHIP.cmake +index 1abeb06228..28458c4146 100644 +--- a/cmake/public/LoadHIP.cmake ++++ b/cmake/public/LoadHIP.cmake +@@ -30,7 +30,7 @@ endif() + message("Building PyTorch for GPU arch: ${PYTORCH_ROCM_ARCH}") + + # Add HIP to the CMAKE Module Path +-set(CMAKE_MODULE_PATH ${ROCM_PATH}/lib/cmake/hip ${CMAKE_MODULE_PATH}) ++set(CMAKE_MODULE_PATH ${ROCM_PATH}/lib64/cmake/hip ${CMAKE_MODULE_PATH}) + + macro(find_package_and_print_version PACKAGE_NAME) + find_package("${PACKAGE_NAME}" ${ARGN}) +@@ -38,7 +38,7 @@ macro(find_package_and_print_version PACKAGE_NAME) + endmacro() + + # Find the HIP Package +-find_package_and_print_version(HIP 1.0) ++find_package_and_print_version(HIP MODULE) + + if(HIP_FOUND) + set(PYTORCH_FOUND_HIP TRUE) +-- +2.43.0 + diff --git a/README.NVIDIA b/README.NVIDIA deleted file mode 100644 index b927f47..0000000 --- a/README.NVIDIA +++ /dev/null @@ -1,15 +0,0 @@ -Some help for building this package for NVIDIA/CUDA - -Review NVIDIA's documenation -https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html - -Review PyTorch documentation -https://github.com/pytorch/pytorch#from-source - -Some convience strings to cut-n-paste - -F39 -dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/fedora39/x86_64/cuda-fedora39.repo - -Building is local. -Build machine has a supported GPU, the drivers are loaded and CUDA SDK is installed. diff --git a/next/0001-Use-horrible-dynamo-stub.patch b/next/0001-Use-horrible-dynamo-stub.patch deleted file mode 100644 index 1900519..0000000 --- a/next/0001-Use-horrible-dynamo-stub.patch +++ /dev/null @@ -1,85 +0,0 @@ -From fd535f7bf44f2034cca2a66b4cc7d68d962341df Mon Sep 17 00:00:00 2001 -From: Tom Rix -Date: Sun, 20 Jul 2025 12:47:58 -0700 -Subject: [PATCH] Use horrible dynamo stub - -Rawhide's update of python is too fast for dynamo -So paper of the problem with a horrible stub that throws -runtime exceptions if dynamo is used. - -Signed-off-by: Tom Rix ---- - build_variables.bzl | 26 ++++++++++++---------- - torch/csrc/dynamo/horrible_dynamo_stub.cpp | 16 +++++++++++++ - 2 files changed, 30 insertions(+), 12 deletions(-) - create mode 100644 torch/csrc/dynamo/horrible_dynamo_stub.cpp - -diff --git a/build_variables.bzl b/build_variables.bzl -index b266c80e8843..a3be6893349b 100644 ---- a/build_variables.bzl -+++ b/build_variables.bzl -@@ -140,7 +140,8 @@ core_trainer_sources = [ - "torch/csrc/autograd/variable.cpp", - "torch/csrc/autograd/utils/warnings.cpp", - "torch/csrc/autograd/jit_decomp_interface.cpp", -- "torch/csrc/dynamo/compiled_autograd.cpp", -+# "torch/csrc/dynamo/compiled_autograd.cpp", -+ "torch/csrc/dynamo/horrible_dynamo_stub.cpp", - "torch/csrc/jit/frontend/name_mangler.cpp", - "torch/csrc/jit/ir/type_hashing.cpp", - "torch/csrc/jit/serialization/pickler.cpp", -@@ -868,17 +869,18 @@ libtorch_python_core_sources = [ - "torch/csrc/autograd/python_torch_functions_manual.cpp", - "torch/csrc/autograd/python_variable.cpp", - "torch/csrc/autograd/python_variable_indexing.cpp", -- "torch/csrc/dynamo/python_compiled_autograd.cpp", -- "torch/csrc/dynamo/cache_entry.cpp", -- "torch/csrc/dynamo/cpp_shim.cpp", -- "torch/csrc/dynamo/cpython_defs.c", -- "torch/csrc/dynamo/eval_frame.c", -- "torch/csrc/dynamo/eval_frame_cpp.cpp", -- "torch/csrc/dynamo/extra_state.cpp", -- "torch/csrc/dynamo/framelocals_mapping.cpp", -- "torch/csrc/dynamo/guards.cpp", -- "torch/csrc/dynamo/utils.cpp", -- "torch/csrc/dynamo/init.cpp", -+# "torch/csrc/dynamo/python_compiled_autograd.cpp", -+# "torch/csrc/dynamo/cache_entry.cpp", -+# "torch/csrc/dynamo/cpp_shim.cpp", -+# "torch/csrc/dynamo/cpython_defs.c", -+# "torch/csrc/dynamo/eval_frame.c", -+# "torch/csrc/dynamo/eval_frame_cpp.cpp", -+# "torch/csrc/dynamo/extra_state.cpp", -+# "torch/csrc/dynamo/framelocals_mapping.cpp", -+# "torch/csrc/dynamo/guards.cpp", -+# "torch/csrc/dynamo/utils.cpp", -+# "torch/csrc/dynamo/init.cpp", -+ "torch/csrc/dynamo/horrible_dynamo_stub.cpp", - "torch/csrc/functorch/init.cpp", - "torch/csrc/fx/node.cpp", - "torch/csrc/mps/Module.cpp", -diff --git a/torch/csrc/dynamo/horrible_dynamo_stub.cpp b/torch/csrc/dynamo/horrible_dynamo_stub.cpp -new file mode 100644 -index 000000000000..3ac1324d4557 ---- /dev/null -+++ b/torch/csrc/dynamo/horrible_dynamo_stub.cpp -@@ -0,0 +1,16 @@ -+#include -+#include -+ -+namespace torch::dynamo::autograd { -+const std::unique_ptr& getPyCompilerInterface() { -+ throw std::runtime_error("Dynamo not supported"); -+ return nullptr; -+} -+std::vector> get_input_metadata( -+ const edge_list& edges) { -+ std::vector> r; -+ throw std::runtime_error("Dynamo not supported"); -+ return r; -+} -+ -+} --- -2.49.0 - diff --git a/pyproject.toml b/pyproject.toml index 925742b..9508ad0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,165 +1,46 @@ -# Package ###################################################################### - [build-system] requires = [ - # 70.1.0: min version for integrated bdist_wheel command from wheel package - # 77.0.0: min version for SPDX expression support for project.license - "setuptools>=70.1.0,<80.0", - "cmake>=3.27", - "ninja", - "numpy", - "packaging", - "pyyaml", - "requests", - "six", # dependency chain: NNPACK -> PeachPy -> six - "typing-extensions>=4.10.0", -] -build-backend = "setuptools.build_meta" - -[dependency-groups] -dev = [ - # This list should be kept in sync with the requirements-build.txt - # in PyTorch root until the project fully migrates to pyproject.toml - # after which this can be removed as it is already specified in the - # [build-system] section - "setuptools>=70.1.0,<80.0", # setuptools develop deprecated on 80.0 - "cmake>=3.27", - "ninja", - "numpy", - "packaging", - "pyyaml", - "requests", - "six", # dependency chain: NNPACK -> PeachPy -> six - "typing-extensions>=4.10.0", - - # This list should be kept in sync with the requirements.txt in - # PyTorch root until the project fully migrates to pyproject.toml - "build[uv]", - "expecttest>=0.3.0", - "filelock", - "fsspec>=0.8.5", - "hypothesis", - "jinja2", - "lintrunner; platform_machine != 's390x' and platform_machine != 'riscv64'", - "networkx>=2.5.1", - "optree>=0.13.0", - "psutil", - "sympy>=1.13.3", - "typing-extensions>=4.13.2", + "setuptools", "wheel", + "astunparse", + "numpy", + "ninja", + "pyyaml", + "cmake", + "typing-extensions", + "requests", ] +# Use legacy backend to import local packages in setup.py +build-backend = "setuptools.build_meta:__legacy__" -[project] -name = "torch" -description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration" -readme = "README.md" -requires-python = ">=3.10" -# TODO: change to `license = "BSD-3-Clause"` and enable PEP 639 after pinning setuptools>=77 -# FIXME: As of 2025.06.20, it is hard to ensure the minimum version of setuptools in our CI environment. -# TOML-table-based license deprecated in setuptools>=77, and the deprecation warning will be changed -# to an error on 2026.02.18. See also: https://github.com/pypa/setuptools/issues/4903 -license = { text = "BSD-3-Clause" } -authors = [{ name = "PyTorch Team", email = "packages@pytorch.org" }] -keywords = ["pytorch", "machine learning"] -classifiers = [ - "Development Status :: 5 - Production/Stable", - "Intended Audience :: Developers", - "Intended Audience :: Education", - "Intended Audience :: Science/Research", - "Topic :: Scientific/Engineering", - "Topic :: Scientific/Engineering :: Mathematics", - "Topic :: Scientific/Engineering :: Artificial Intelligence", - "Topic :: Software Development", - "Topic :: Software Development :: Libraries", - "Topic :: Software Development :: Libraries :: Python Modules", - "Programming Language :: C++", - "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Programming Language :: Python :: 3.13", - "Programming Language :: Python :: 3.14", -] -dynamic = [ - "entry-points", - "dependencies", - "scripts", - "version", -] -[project.urls] -Homepage = "https://pytorch.org" -Repository = "https://github.com/pytorch/pytorch" -Documentation = "https://pytorch.org/docs" -"Issue Tracker" = "https://github.com/pytorch/pytorch/issues" -Forum = "https://discuss.pytorch.org" +[tool.black] +# Uncomment if pyproject.toml worked fine to ensure consistency with flake8 +# line-length = 120 +target-version = ["py38", "py39", "py310", "py311"] -[project.optional-dependencies] -optree = ["optree>=0.13.0"] -opt-einsum = ["opt-einsum>=3.3"] -pyyaml = ["pyyaml"] - -# Linter tools ################################################################# - -[tool.isort] -src_paths = ["caffe2", "torch", "torchgen", "functorch", "test"] -extra_standard_library = ["typing_extensions"] -skip_gitignore = true -skip_glob = ["third_party/*"] -atomic = true -profile = "black" -indent = 4 -line_length = 88 -lines_after_imports = 2 -multi_line_output = 3 -include_trailing_comma = true -combine_as_imports = true - -[tool.usort.known] -first_party = ["caffe2", "torch", "torchgen", "functorch", "test"] -standard_library = ["typing_extensions"] [tool.ruff] -line-length = 88 -src = ["caffe2", "torch", "torchgen", "functorch", "test"] +target-version = "py38" -[tool.ruff.format] -docstring-code-format = true -quote-style = "double" - -[tool.ruff.lint] # NOTE: Synchoronize the ignores with .flake8 -external = [ - "B001", - "B902", - "B950", - "E121", - "E122", - "E128", - "E131", - "E704", - "E723", - "F723", - "F812", - "P201", - "P204", - "T484", - "TOR901", -] ignore = [ # these ignores are from flake8-bugbear; please fix! "B007", "B008", "B017", "B018", # Useless expression + "B019", "B023", "B028", # No explicit `stacklevel` keyword argument found + "B904", "E402", "C408", # C408 ignored because we like the dict keyword argument syntax "E501", # E501 is not flexible enough, we're using B950 instead "E721", + "E731", # Assign lambda expression "E741", "EXE001", "F405", - "FURB122", # writelines + "F841", # these ignores are from flake8-logging-format; please fix! "G101", # these ignores are from ruff NPY; please fix! @@ -167,49 +48,39 @@ ignore = [ # these ignores are from ruff PERF; please fix! "PERF203", "PERF401", + "PERF403", # these ignores are from PYI; please fix! + "PYI019", "PYI024", "PYI036", "PYI041", "PYI056", "SIM102", "SIM103", "SIM112", # flake8-simplify code styles "SIM105", # these ignores are from flake8-simplify. please fix or ignore with commented reason - "SIM108", # SIM108 ignored because we prefer if-else-block instead of ternary expression + "SIM108", "SIM110", "SIM114", # Combine `if` branches using logical `or` operator "SIM115", "SIM116", # Disable Use a dictionary instead of consecutive `if` statements "SIM117", "SIM118", + "UP006", # keep-runtime-typing "UP007", # keep-runtime-typing - "UP045", # keep-runtime-typing - "TC006", - # TODO: Remove Python-3.10 specific suppressions - "B905", - "UP035", - "UP036", - "UP038", - "UP041", - "FURB161", ] +line-length = 120 select = [ "B", - "B904", # Re-raised error without specifying the cause via the from keyword "C4", "G", "E", "EXE", "F", "SIM1", - "SIM911", "W", # Not included in flake8 - "FURB", - "LOG", "NPY", "PERF", "PGH004", - "PIE790", "PIE794", "PIE800", "PIE804", @@ -218,96 +89,40 @@ select = [ "PLC0131", # type bivariance "PLC0132", # type param mismatch "PLC0205", # string as __slots__ - "PLC3002", # unnecessary-direct-lambda-call "PLE", "PLR0133", # constant comparison "PLR0206", # property with params "PLR1722", # use sys exit - "PLR1736", # unnecessary list index "PLW0129", # assert on string literal - "PLW0131", # named expr without context - "PLW0133", # useless exception statement - "PLW0245", # super without brackets "PLW0406", # import self "PLW0711", # binary op exception - "PLW1501", # bad open mode - "PLW1507", # shallow copy os.environ "PLW1509", # preexec_fn not safe with threads - "PLW2101", # useless lock statement "PLW3301", # nested min max "PT006", # TODO: enable more PT rules - "PT014", # duplicate parameterize case "PT022", "PT023", "PT024", "PT025", "PT026", "PYI", - "Q003", # avoidable escaped quote - "Q004", # unnecessary escaped quote - "RSE", "RUF008", # mutable dataclass default - "RUF013", # ban implicit optional "RUF015", # access first ele in constant time "RUF016", # type error non-integer index "RUF017", - "RUF018", # no assignment in assert - "RUF019", # unnecessary-key-check - "RUF020", # never union - "RUF024", # from keys mutable - "RUF026", # default factory kwarg - "RUF030", # No print statement in assert - "RUF033", # default values __post_init__ dataclass - "RUF041", # simplify nested Literal - "RUF048", # properly parse `__version__` - "RUF200", # validate pyproject.toml - "S324", # for hashlib FIPS compliance - "SLOT", - "TC", - "TRY002", # ban vanilla raise (todo fix NOQAs) - "TRY203", - "TRY401", # verbose-log-message + "TRY200", + "TRY302", "UP", - "YTT", ] -[tool.ruff.lint.pyupgrade] -# Preserve types, even if a file imports `from __future__ import annotations`. -keep-runtime-typing = true - -[tool.ruff.lint.per-file-ignores] +[tool.ruff.per-file-ignores] "__init__.py" = [ "F401", ] -"*.pyi" = [ - "PYI011", # typed-argument-default-in-stub - "PYI021", # docstring-in-stub - "PYI053", # string-or-bytes-too-long -] -"functorch/notebooks/**" = [ - "F401", -] -"test/export/**" = [ - "PGH004" -] -"test/typing/**" = [ - "PGH004" -] "test/typing/reveal/**" = [ "F821", ] "test/torch_np/numpy_tests/**" = [ "F821", - "NPY201", -] -"test/dynamo/test_bytecode_utils.py" = [ - "F821", -] -"test/dynamo/test_debug_utils.py" = [ - "UP037", -] -"test/dynamo/test_misc.py" = [ - "PGH004", ] "test/jit/**" = [ "PLR0133", # tests require this for JIT @@ -321,33 +136,19 @@ keep-runtime-typing = true "RUF015", "UP", # We don't want to modify the jit test as they test specify syntax ] -"test/inductor/s429861_repro.py" = [ - "PGH004", -] -"test/inductor/test_torchinductor.py" = [ - "UP037", -] -# autogenerated #TODO figure out why file level noqa is ignored -"torch/_appdirs.py" = ["PGH004"] -"torch/jit/_shape_functions.py" = ["PGH004"] -"torch/_inductor/fx_passes/serialized_patterns/**" = ["F401", "F501"] -"torch/_inductor/autoheuristic/artifacts/**" = ["F401", "F501"] -"torch/_inductor/codegen/**" = [ - "PGH004" + +"torch/onnx/**" = [ + "UP037", # ONNX does runtime type checking ] + "torchgen/api/types/__init__.py" = [ "F401", "F403", ] +"torchgen/executorch/api/types/__init__.py" = [ + "F401", + "F403", +] "torch/utils/collect_env.py" = [ "UP", # collect_env.py needs to work with older versions of Python ] -"torch/_vendor/**" = [ - "UP", # No need to mess with _vendor -] -"tools/linter/**" = [ - "LOG015" # please fix -] - -[tool.codespell] -ignore-words = "tools/linter/dictionary.txt" diff --git a/python-torch.spec b/python-torch.spec index d3c31d7..e3f7f13 100644 --- a/python-torch.spec +++ b/python-torch.spec @@ -6,20 +6,13 @@ # So pre releases can be tried %bcond_with gitcommit %if %{with gitcommit} -# v2.9.0-rc9 -%global commit0 0fabc3ba44823f257e70ce397d989c8de5e362c1 +# ToT +%global commit0 b36e01801b89a516f4271f796773d5f4b43f1186 %global shortcommit0 %(c=%{commit0}; echo ${c:0:7}) -%global date0 20251008 -%global pypi_version 2.9.0 -%global flatbuffers_version 24.12.23 -%global miniz_version 3.0.2 -%global pybind11_version 2.13.6 -%global rc_tag -rc9 +%global date0 20240521 +%global pypi_version 2.4.0 %else -%global pypi_version 2.9.1 -%global flatbuffers_version 24.12.23 -%global miniz_version 3.0.2 -%global pybind11_version 2.13.6 +%global pypi_version 2.3.0 %endif # For -test subpackage @@ -30,31 +23,75 @@ %bcond_with test %ifarch x86_64 +# ROCm support came in F40 +%if 0%{?fedora} > 39 %bcond_without rocm +%else +%bcond_with rocm +%endif +%endif +# hipblaslt is in development +%bcond_with hipblaslt +# Which families gpu build for +%global rocm_gpu_list gfx8 gfx9 gfx10 gfx11 +%global rocm_default_gpu default +%bcond_without rocm_loop + +# Caffe2 support came in F41 +%if 0%{?fedora} > 40 +%bcond_without caffe2 +%else +%bcond_with caffe2 %endif +# Distributed support came in F41 +%if 0%{?fedora} > 40 +%bcond_without distributed # For testing distributed+rccl etc. %bcond_without rccl %bcond_with gloo %bcond_without mpi %bcond_without tensorpipe +%else +%bcond_with distributed +%endif + +# OpenCV support came in F41 +%if 0%{?fedora} > 40 +%bcond_without opencv +%else +%bcond_with opencv +%endif + +# Do no confuse xnnpack versions +%if 0%{?fedora} > 40 +%bcond_without xnnpack +%else +%bcond_with xnnpack +%endif + +%if 0%{?fedora} > 39 +%bcond_without pthreadpool +%else +%bcond_with pthreadpool +%endif + +%if 0%{?fedora} > 39 +%bcond_without pocketfft +%else +%bcond_with pocketfft +%endif + +# For testing cuda +%ifarch x86_64 +%bcond_with cuda +%endif # Disable dwz with rocm because memory can be exhausted %if %{with rocm} %define _find_debuginfo_dwz_opts %{nil} %endif -# These came in 2.4 and not yet in Fedora -%bcond_with opentelemetry -%bcond_with httplib -%bcond_with kineto - -%if 0%{?fedora} -%bcond_without onnx -%else -%bcond_with onnx -%endif - Name: python-%{pypi_name} %if %{with gitcommit} Version: %{pypi_version}^git%{date0}.%{shortcommit0} @@ -69,13 +106,21 @@ License: BSD-3-Clause AND BSD-2-Clause AND 0BSD AND Apache-2.0 AND MIT AN URL: https://pytorch.org/ %if %{with gitcommit} Source0: %{forgeurl}/archive/%{commit0}/pytorch-%{shortcommit0}.tar.gz -Source1000: pyproject.toml +Source100: pyproject.toml %else Source0: %{forgeurl}/releases/download/v%{version}/pytorch-v%{version}.tar.gz %endif -Source1: https://github.com/google/flatbuffers/archive/refs/tags/v%{flatbuffers_version}.tar.gz -Source2: https://github.com/pybind/pybind11/archive/refs/tags/v%{pybind11_version}.tar.gz +Source1: https://github.com/google/flatbuffers/archive/refs/tags/v23.3.3.tar.gz +Source2: https://github.com/pybind/pybind11/archive/refs/tags/v2.11.1.tar.gz +%if %{with cuda} +%global cuf_ver 1.1.2 +Source10: https://github.com/NVIDIA/cudnn-frontend/archive/refs/tags/v%{cuf_ver}.tar.gz +%global cul_ver 3.4.1 +Source11: https://github.com/NVIDIA/cutlass/archive/refs/tags/v%{cul_ver}.tar.gz +%endif + +%if %{with tensorpipe} # Developement on tensorpipe has stopped, repo made read only July 1, 2023, this is the last commit %global tp_commit 52791a2fd214b2a9dc5759d36725909c1daa7f2e %global tp_scommit %(c=%{tp_commit}; echo ${c:0:7}) @@ -86,73 +131,120 @@ Source21: https://github.com/libuv/libuv/archive/refs/tags/v1.41.0.tar.gz %global nop_commit 910b55815be16109f04f4180e9adee14fb4ce281 %global nop_scommit %(c=%{nop_commit}; echo ${c:0:7}) Source22: https://github.com/google/libnop/archive/%{nop_commit}/libnop-%{nop_scommit}.tar.gz - -%if %{without opentelemetry} -%global ot_ver 1.14.2 -Source60: https://github.com/open-telemetry/opentelemetry-cpp/archive/refs/tags/v%{ot_ver}.tar.gz %endif -%if %{without httplib} -%global hl_commit 3b6597bba913d51161383657829b7e644e59c006 -%global hl_scommit %(c=%{hl_commit}; echo ${c:0:7}) -Source70: https://github.com/yhirose/cpp-httplib/archive/%{hl_commit}/cpp-httplib-%{hl_scommit}.tar.gz +%if %{without xnnpack} +%global xnn_commit fcbf55af6cf28a4627bcd1f703ab7ad843f0f3a2 +%global xnn_scommit %(c=%{xnn_commit}; echo ${c:0:7}) +Source30: https://github.com/google/xnnpack/archive/%{xnn_commit}/xnnpack-%{xnn_scommit}.tar.gz +%global fx_commit 63058eff77e11aa15bf531df5dd34395ec3017c8 +%global fx_scommit %(c=%{fx_commit}; echo ${c:0:7}) +Source31: https://github.com/Maratyszcza/fxdiv/archive/%{fx_commit}/FXdiv-%{fx_scommit}.tar.gz +%global fp_commit 0a92994d729ff76a58f692d3028ca1b64b145d91 +%global fp_scommit %(c=%{fp_commit}; echo ${c:0:7}) +Source32: https://github.com/Maratyszcza/FP16/archive/%{fp_commit}/FP16-%{fp_scommit}.tar.gz +%global ps_commit 072586a71b55b7f8c584153d223e95687148a900 +%global ps_scommit %(c=%{ps_commit}; echo ${c:0:7}) +Source33: https://github.com/Maratyszcza/psimd/archive/%{ps_commit}/psimd-%{ps_scommit}.tar.gz %endif -%if %{without kineto} -%global ki_commit 5e7501833f1021ce6f618572d3baf657b6319658 -%global ki_scommit %(c=%{ki_commit}; echo ${c:0:7}) -Source80: https://github.com/pytorch/kineto/archive/%{ki_commit}/kineto-%{ki_scommit}.tar.gz +%if %{without pthreadpool} +%global pt_commit 4fe0e1e183925bf8cfa6aae24237e724a96479b8 +%global pt_scommit %(c=%{pt_commit}; echo ${c:0:7}) +Source40: https://github.com/Maratyszcza/pthreadpool/archive/%{pt_commit}/pthreadpool-%{pt_scommit}.tar.gz %endif -%global ox_ver 1.18.0 -Source90: https://github.com/onnx/onnx/archive/refs/tags/v%{ox_ver}.tar.gz +%if %{without pocketfft} +%global pf_commit 076cb3d2536b7c5d0629093ad886e10ac05f3623 +%global pf_scommit %(c=%{pf_commit}; echo ${c:0:7}) +Source50: https://github.com/mreineck/pocketfft/archive/%{pf_commit}/pocketfft-%{pf_scommit}.tar.gz +%endif + +Patch0: 0001-no-third_party-foxi.patch +Patch3: 0001-Stub-in-kineto-ActivityType.patch +Patch5: 0001-disable-submodule-search.patch + +%if %{with caffe2} +Patch6: 0001-reenable-foxi-linking.patch +%endif + +# Bring some patches forward +%if %{without gitcommit} +# https://github.com/pytorch/pytorch/pull/123384 +Patch7: 0001-Reenable-dim-for-python-3.12.patch + +# Dynamo/Inductor on 3.12 +Patch8: 0001-dynamo-3.12-enable-dynamo-on-3.12-enable-most-dynamo.patch +%endif + +%if %{with rocm} +# ROCm patches +# https://github.com/pytorch/pytorch/pull/120551 +Patch100: 0001-Optionally-use-hipblaslt.patch +Patch101: 0001-cuda-hip-signatures.patch +Patch102: 0001-silence-an-assert.patch +Patch103: 0001-can-not-use-with-c-files.patch +Patch104: 0001-use-any-hip.patch +Patch105: 0001-disable-use-of-aotriton.patch +%endif + +# Do not claim aarch64 on anything newer than F41 +%if 0%{?fedora} > 40 +ExclusiveArch: x86_64 aarch64 +%else +ExclusiveArch: x86_64 +%endif -%global pt_arches x86_64 aarch64 -ExclusiveArch: %pt_arches %global toolchain gcc %global _lto_cflags %nil BuildRequires: cmake +BuildRequires: cpuinfo-devel BuildRequires: eigen3-devel -BuildRequires: flexiblas-devel BuildRequires: fmt-devel +%if %{with caffe2} BuildRequires: foxi-devel +%endif BuildRequires: gcc-c++ BuildRequires: gcc-gfortran - +%if %{with distributed} %if %{with gloo} BuildRequires: gloo-devel %endif -BuildRequires: json-devel - -BuildRequires: libomp-devel -BuildRequires: moodycamel-concurrentqueue-devel -BuildRequires: numactl-devel -BuildRequires: ninja-build -%if %{with onnx} -BuildRequires: onnx-devel %endif +BuildRequires: ninja-build +BuildRequires: onnx-devel +BuildRequires: libomp-devel +%if %{with distributed} %if %{with mpi} BuildRequires: openmpi-devel %endif +%endif +BuildRequires: openblas-devel BuildRequires: protobuf-devel BuildRequires: sleef-devel BuildRequires: valgrind-devel -BuildRequires: pocketfft-devel -BuildRequires: pthreadpool-devel -BuildRequires: cpuinfo-devel +%if %{with pocketfft} +BuildRequires: pocketfft-devel +%endif + +%if %{with pthreadpool} +BuildRequires: pthreadpool-devel +%endif + +%if %{with xnnpack} BuildRequires: FP16-devel BuildRequires: fxdiv-devel BuildRequires: psimd-devel -BuildRequires: xnnpack-devel = 0.0^git20240814.312eb7e +BuildRequires: xnnpack-devel = 0.0^git20240229.fcbf55a +%endif BuildRequires: python3-devel BuildRequires: python3dist(filelock) BuildRequires: python3dist(jinja2) BuildRequires: python3dist(networkx) BuildRequires: python3dist(numpy) -BuildRequires: python3dist(pip) BuildRequires: python3dist(pyyaml) BuildRequires: python3dist(setuptools) BuildRequires: python3dist(sphinx) @@ -166,37 +258,39 @@ BuildRequires: python3dist(sympy) %if %{with rocm} BuildRequires: hipblas-devel +%if %{with hipblaslt} BuildRequires: hipblaslt-devel +%endif BuildRequires: hipcub-devel BuildRequires: hipfft-devel BuildRequires: hiprand-devel BuildRequires: hipsparse-devel -BuildRequires: hipsparselt-devel BuildRequires: hipsolver-devel -# Magma is broken on ROCm 7 -# BuildRequires: magma-devel BuildRequires: miopen-devel BuildRequires: rocblas-devel BuildRequires: rocrand-devel BuildRequires: rocfft-devel +%if %{with distributed} %if %{with rccl} BuildRequires: rccl-devel %endif +%endif BuildRequires: rocprim-devel BuildRequires: rocm-cmake BuildRequires: rocm-comgr-devel -BuildRequires: rocm-compilersupport-macros BuildRequires: rocm-core-devel BuildRequires: rocm-hip-devel BuildRequires: rocm-runtime-devel BuildRequires: rocm-rpm-macros -BuildRequires: rocsolver-devel -BuildRequires: rocm-smi-devel +BuildRequires: rocm-rpm-macros-modules BuildRequires: rocthrust-devel BuildRequires: roctracer-devel -Requires: amdsmi +Requires: rocm-rpm-macros-modules +%endif +%if %{with opencv} +BuildRequires: opencv-devel %endif %if %{with test} @@ -204,9 +298,49 @@ BuildRequires: google-benchmark-devel %endif Requires: python3dist(dill) -Requires: python3dist(yaml) -Obsoletes: caffe = 1.0^git20200212.9b89154 +# For convience +Provides: pytorch + +# Apache-2.0 +Provides: bundled(flatbuffers) = 22.3.3 +# MIT +Provides: bundled(miniz) = 2.1.0 +Provides: bundled(pybind11) = 2.11.1 + +%if %{with tensorpipe} +# BSD-3-Clause +Provides: bundled(tensorpipe) +# Apache-2.0 +Provides: bundled(libnop) +# MIT AND CC-BY-4.0 AND ISC AND BSD-2-Clause +Provides: bundled(libuv) = 1.41.0 +%endif + +# These are already in Fedora +%if %{without xnnpack} +# BSD-3-Clause +Provides: bundled(xnnpack) +# MIT +Provides: bundled(FP16) +# MIT +Provides: bundled(fxdiv) +# MIT +Provides: bundled(psimd) +%endif + +%if %{without pthreadpool} +# BSD-2-Clause +Provides: bundled(pthreadpool) +%endif + +%if %{without pocketfft} +# BSD-3-Clause +Provides: bundled(pocketfft) +%endif + +# For convience +Provides: pytorch %description PyTorch is a Python package that provides two high-level features: @@ -220,24 +354,6 @@ and Cython to extend PyTorch when needed. %package -n python3-%{pypi_name} Summary: %{summary} -# For convience -Provides: pytorch - -# Apache-2.0 -Provides: bundled(flatbuffers) = %{flatbuffers_version} -# MIT -Provides: bundled(miniz) = %{miniz_version} -Provides: bundled(pybind11) = %{pybind11_version} - -%if %{with tensorpipe} -# BSD-3-Clause -Provides: bundled(tensorpipe) -# Apache-2.0 -Provides: bundled(libnop) -# MIT AND CC-BY-4.0 AND ISC AND BSD-2-Clause -Provides: bundled(libuv) = 1.41.0 -%endif - %description -n python3-%{pypi_name} PyTorch is a Python package that provides two high-level features: @@ -247,6 +363,33 @@ PyTorch is a Python package that provides two high-level features: You can reuse your favorite Python packages such as NumPy, SciPy, and Cython to extend PyTorch when needed. +%if %{with rocm} +%package -n python3-%{pypi_name}-rocm-gfx8 +Summary: %{name} for ROCm gfx8 + +%description -n python3-%{pypi_name}-rocm-gfx8 +%{summary} + +%package -n python3-%{pypi_name}-rocm-gfx9 +Summary: %{name} for ROCm gfx9 + +%description -n python3-%{pypi_name}-rocm-gfx9 +%{summary} + +%package -n python3-%{pypi_name}-rocm-gfx10 +Summary: %{name} for ROCm gfx10 + +%description -n python3-%{pypi_name}-rocm-gfx10 +%{summary} + +%package -n python3-%{pypi_name}-rocm-gfx11 +Summary: %{name} for ROCm gfx11 + +%description -n python3-%{pypi_name}-rocm-gfx11 +%{summary} + +%endif + %if %{with test} %package -n python3-%{pypi_name}-test Summary: Tests for %{name} @@ -262,8 +405,7 @@ Requires: python3-%{pypi_name}%{?_isa} = %{version}-%{release} %if %{with gitcommit} %autosetup -p1 -n pytorch-%{commit0} # Overwrite with a git checkout of the pyproject.toml -cp %{SOURCE1000} . - +cp %{SOURCE100} . %else %autosetup -p1 -n pytorch-v%{version} %endif @@ -273,11 +415,20 @@ rm -rf %{pypi_name}.egg-info tar xf %{SOURCE1} rm -rf third_party/flatbuffers/* -cp -r flatbuffers-%{flatbuffers_version}/* third_party/flatbuffers/ +cp -r flatbuffers-23.3.3/* third_party/flatbuffers/ tar xf %{SOURCE2} rm -rf third_party/pybind11/* -cp -r pybind11-%{pybind11_version}/* third_party/pybind11/ +cp -r pybind11-2.11.1/* third_party/pybind11/ + +%if %{with cuda} +tar xf %{SOURCE10} +rm -rf third_party/cudnn_frontend/* +cp -r cudnn-frontend-%{cuf_ver}/* third_party/cudnn_frontend/ +tar xf %{SOURCE11} +rm -rf third_party/cutlass/* +cp -r cutlass-%{cul_ver}/* third_party/cutlass/ +%endif %if %{with tensorpipe} tar xf %{SOURCE20} @@ -289,38 +440,43 @@ cp -r libuv-*/* third_party/tensorpipe/third_party/libuv/ tar xf %{SOURCE22} rm -rf third_party/tensorpipe/third_party/libnop/* cp -r libnop-*/* third_party/tensorpipe/third_party/libnop/ - -# gcc 15 include cstdint -sed -i '/#include ' third_party/tensorpipe/tensorpipe/common/allocator.h -sed -i '/#include ' third_party/tensorpipe/tensorpipe/common/memory.h %endif -%if %{without opentelemtry} -tar xf %{SOURCE60} -rm -rf third_party/opentelemetry-cpp/* -cp -r opentelemetry-cpp-*/* third_party/opentelemetry-cpp/ +%if %{without xnnpack} +tar xf %{SOURCE30} +rm -rf third_party/XNNPACK/* +cp -r XNNPACK-*/* third_party/XNNPACK/ +tar xf %{SOURCE31} +rm -rf third_party/FXdiv/* +cp -r FXdiv-*/* third_party/FXdiv/ +tar xf %{SOURCE32} +rm -rf third_party/FP16/* +cp -r FP16-*/* third_party/FP16/ +tar xf %{SOURCE33} +rm -rf third_party/psimd/* +cp -r psimd-*/* third_party/psimd/ %endif -%if %{without httplib} -tar xf %{SOURCE70} -rm -rf third_party/cpp-httplib/* -cp -r cpp-httplib-*/* third_party/cpp-httplib/ +%if %{without pthreadpool} +tar xf %{SOURCE40} +rm -rf third_party/pthreadpool/* +cp -r pthreadpool-*/* third_party/pthreadpool/ %endif -%if %{without kineto} -tar xf %{SOURCE80} -rm -rf third_party/kineto/* -cp -r kineto-*/* third_party/kineto/ +%if %{without pocketfft} +tar xf %{SOURCE50} +rm -rf third_party/pocketfft/* +cp -r pocketfft-*/* third_party/pocketfft/ %endif -%if %{without onnx} -tar xf %{SOURCE90} -rm -rf third_party/onnx/* -cp -r onnx-*/* third_party/onnx/ +%if %{with opencv} +%if %{without gitcommit} +# Reduce requirements, *FOUND is not set +sed -i -e 's/USE_OPENCV AND OpenCV_FOUND AND USE_FFMPEG AND FFMPEG_FOUND/USE_OPENCV AND USE_FFMPEG/' caffe2/video/CMakeLists.txt +sed -i -e 's/USE_OPENCV AND OpenCV_FOUND/USE_OPENCV/' caffe2/image/CMakeLists.txt +sed -i -e 's/STATUS/FATAL/' caffe2/image/CMakeLists.txt +%endif %endif - -# Adjust for the hipblaslt's we build -sed -i -e 's@"gfx90a", "gfx940", "gfx941", "gfx942"@"gfx90a", "gfx1103", "gfx1150", "gfx1151", "gfx1100", "gfx1101", "gfx1200", "gfx1201"@' aten/src/ATen/native/cuda/Blas.cpp %if 0%{?rhel} # In RHEL but too old @@ -328,51 +484,26 @@ sed -i -e '/typing-extensions/d' setup.py # Need to pip these sed -i -e '/sympy/d' setup.py sed -i -e '/fsspec/d' setup.py -%else -# for 2.5.0 -sed -i -e 's@sympy==1.13.1@sympy>=1.13.1@' setup.py %endif # A new dependency # Connected to USE_FLASH_ATTENTION, since this is off, do not need it sed -i -e '/aotriton.cmake/d' cmake/Dependencies.cmake -# Compress hip -sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc --offload-compress@' cmake/Dependencies.cmake -# Silence noisy warning -sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -Wno-pass-failed@' cmake/Dependencies.cmake -sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -Wno-unused-command-line-argument@' cmake/Dependencies.cmake -sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -Wno-unused-result@' cmake/Dependencies.cmake -sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -Wno-deprecated-declarations@' cmake/Dependencies.cmake -# Use parallel jobs -sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -parallel-jobs=4@' cmake/Dependencies.cmake -# Need to link with librocm_smi64 -sed -i -e 's@hiprtc::hiprtc@hiprtc::hiprtc rocm_smi64@' cmake/Dependencies.cmake # No third_party fmt, use system sed -i -e 's@fmt::fmt-header-only@fmt@' CMakeLists.txt -sed -i -e 's@fmt::fmt-header-only@fmt@' aten/src/ATen/CMakeLists.txt -sed -i -e 's@list(APPEND ATen_HIP_INCLUDE $)@@' aten/src/ATen/CMakeLists.txt - -sed -i -e 's@fmt::fmt-header-only@fmt@' third_party/kineto/libkineto/CMakeLists.txt sed -i -e 's@fmt::fmt-header-only@fmt@' c10/CMakeLists.txt sed -i -e 's@fmt::fmt-header-only@fmt@' torch/CMakeLists.txt sed -i -e 's@fmt::fmt-header-only@fmt@' cmake/Dependencies.cmake -sed -i -e 's@fmt::fmt-header-only@fmt@' caffe2/CMakeLists.txt - sed -i -e 's@add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)@#add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)@' cmake/Dependencies.cmake sed -i -e 's@set_target_properties(fmt-header-only PROPERTIES INTERFACE_COMPILE_FEATURES "")@#set_target_properties(fmt-header-only PROPERTIES INTERFACE_COMPILE_FEATURES "")@' cmake/Dependencies.cmake sed -i -e 's@list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only)@#list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only)@' cmake/Dependencies.cmake # No third_party FXdiv +%if %{with xnnpack} sed -i -e 's@if(NOT TARGET fxdiv)@if(MSVC AND USE_XNNPACK)@' caffe2/CMakeLists.txt sed -i -e 's@TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv)@#TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv)@' caffe2/CMakeLists.txt - -# https://github.com/pytorch/pytorch/issues/149803 -# Tries to checkout nccl -sed -i -e 's@ checkout_nccl()@ True@' tools/build_pytorch_libs.py - -# Disable the use of check_submodule's in the setup.py, we are a tarball, not a git repo -sed -i -e 's@check_submodules()$@#check_submodules()@' setup.py +%endif # Release comes fully loaded with third party src # Remove what we can @@ -382,7 +513,7 @@ sed -i -e 's@check_submodules()$@#check_submodules()@' setup.py # the third_party dir to compile the file. # mimiz is licensed MIT # https://github.com/richgel999/miniz/blob/master/LICENSE -mv third_party/miniz-%{miniz_version} . +mv third_party/miniz-2.1.0 . # # setup.py depends on this script mv third_party/build_bundled.py . @@ -392,24 +523,28 @@ mv third_party/flatbuffers . mv third_party/pybind11 . +%if %{with cuda} +mv third_party/cudnn_frontend . +mv third_party/cutlass . +%endif + %if %{with tensorpipe} mv third_party/tensorpipe . %endif -%if %{without opentelemetry} -mv third_party/opentelemetry-cpp . +%if %{without xnnpack} +mv third_party/XNNPACK . +mv third_party/FXdiv . +mv third_party/FP16 . +mv third_party/psimd . %endif -%if %{without httplib} -mv third_party/cpp-httplib . +%if %{without pthreadpool} +mv third_party/pthreadpool . %endif -%if %{without kineto} -mv third_party/kineto . -%endif - -%if %{without onnx} -mv third_party/onnx . +%if %{without pocketfft} +mv third_party/pocketfft . %endif %if %{with test} @@ -420,55 +555,54 @@ mv third_party/googletest . rm -rf third_party/* # Put stuff back mv build_bundled.py third_party -mv miniz-%{miniz_version} third_party +mv miniz-2.1.0 third_party mv flatbuffers third_party mv pybind11 third_party +%if %{with cuda} +mv cudnn_frontend third_party +mv cutlass third_party +%endif + %if %{with tensorpipe} mv tensorpipe third_party %endif -%if %{without opentelemetry} -mv opentelemetry-cpp third_party +%if %{without xnnpack} +mv XNNPACK third_party +mv FXdiv third_party +mv FP16 third_party +mv psimd third_party %endif -%if %{without httplib} -mv cpp-httplib third_party +%if %{without pthreadpool} +mv pthreadpool third_party %endif -%if %{without kineto} -mv kineto third_party -%endif - -%if %{without onnx} -mv onnx third_party +%if %{without pocketfft} +mv pocketfft third_party %endif %if %{with test} mv googletest third_party %endif +%if %{with pocketfft} # # Fake out pocketfft, and system header will be used mkdir third_party/pocketfft -cp /usr/include/pocketfft_hdronly.h third_party/pocketfft/ +%endif # # Use the system valgrind headers mkdir third_party/valgrind-headers cp %{_includedir}/valgrind/* third_party/valgrind-headers -# Fix installing to /usr/lib64 -sed -i -e 's@DESTINATION ${PYTHON_LIB_REL_PATH}@DESTINATION ${CMAKE_INSTALL_PREFIX}/${PYTHON_LIB_REL_PATH}@' caffe2/CMakeLists.txt - -# reenable foxi linking -sed -i -e 's@list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader)@#list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader)@' cmake/Dependencies.cmake - -# cmake version changed -sed -i -e 's@cmake_minimum_required(VERSION 3.4)@cmake_minimum_required(VERSION 3.5)@' third_party/tensorpipe/third_party/libuv/CMakeLists.txt -sed -i -e 's@cmake_minimum_required(VERSION 3.4)@cmake_minimum_required(VERSION 3.5)@' libuv*/CMakeLists.txt -%if %{without opentelemtry} -sed -i -e 's@cmake_minimum_required(VERSION 3.1)@cmake_minimum_required(VERSION 3.5)@' third_party/opentelemetry-cpp/CMakeLists.txt +%if %{without gitcommit} +# Remove unneeded OpenCL files that confuse the lincense scanner +rm caffe2/contrib/opencl/OpenCL/cl.hpp +rm caffe2/mobile/contrib/libopencl-stub/include/CL/*.h +rm caffe2/mobile/contrib/libopencl-stub/include/CL/*.hpp %endif %if %{with rocm} @@ -476,45 +610,32 @@ sed -i -e 's@cmake_minimum_required(VERSION 3.1)@cmake_minimum_required(VERSION ./tools/amd_build/build_amd.py # Fedora installs to /usr/include, not /usr/include/rocm-core sed -i -e 's@rocm-core/rocm_version.h@rocm_version.h@' aten/src/ATen/hip/tunable/TunableGemm.h -# https://github.com/pytorch/pytorch/issues/149805 -sed -i -e 's@rocm-core/rocm_version.h@rocm_version.h@' cmake/public/LoadHIP.cmake -# Fedora installs to /usr/include, not /usr/include/rocm-core -sed -i -e 's@rocm-core/rocm_version.h@rocm_version.h@' aten/src/ATen/hip/tunable/Tunable.cpp -sed -i -e 's@rocm-core/rocm_version.h@rocm_version.h@' aten/src/ATen/cuda/tunable/Tunable.cpp -# use any hip, correct CMAKE_MODULE_PATH -sed -i -e 's@lib/cmake/hip@lib64/cmake/hip@' cmake/public/LoadHIP.cmake -sed -i -e 's@HIP 1.0@HIP MODULE@' cmake/public/LoadHIP.cmake -# silence an assert -# sed -i -e '/qvalue = std::clamp(qvalue, qmin, qmax);/d' aten/src/ATen/native/cuda/IndexKernel.cu - %endif -# moodycamel include path needs adjusting to use the system's -sed -i -e 's@${PROJECT_SOURCE_DIR}/third_party/concurrentqueue@/usr/include/concurrentqueue@' cmake/Dependencies.cmake +%if %{with cuda} +# build complains about not being able to build -pie without -fPIC +sed -i -e 's@string(APPEND CMAKE_CUDA_FLAGS " -D_GLIBCXX_USE_CXX11_ABI=${GLIBCXX_USE_CXX11_ABI}")@string(APPEND CMAKE_CUDA_FLAGS " -fPIC -D_GLIBCXX_USE_CXX11_ABI=${GLIBCXX_USE_CXX11_ABI}")@' CMakeLists.txt +%endif %build -# Export the arches -# echo "%%pytorch_arches %pt_arches" > macros.pytorch - # # Control the number of jobs # # The build can fail if too many threads exceed the physical memory -# Run at least one thread, more if CPU & memory resources are available. +# So count core and and memory and increase the build memory util the build succeeds # -%ifarch x86_64 # Real cores, No hyperthreading COMPILE_JOBS=`cat /proc/cpuinfo | grep -m 1 'cpu cores' | awk '{ print $4 }'` -%else -# cpuinfo format varies on other arches, fall back to nproc -COMPILE_JOBS=`nproc` -%endif if [ ${COMPILE_JOBS}x = x ]; then COMPILE_JOBS=1 fi # Take into account memmory usage per core, do not thrash real memory +%if %{with cuda} +BUILD_MEM=4 +%else BUILD_MEM=2 +%endif MEM_KB=0 MEM_KB=`cat /proc/meminfo | grep MemTotal | awk '{ print $2 }'` MEM_MB=`eval "expr ${MEM_KB} / 1024"` @@ -546,17 +667,15 @@ export CAFFE2_LINK_LOCAL_PROTOBUF=OFF export INTERN_BUILD_MOBILE=OFF export USE_DISTRIBUTED=OFF export USE_CUDA=OFF -export USE_FAKELOWP=OFF export USE_FBGEMM=OFF export USE_FLASH_ATTENTION=OFF +export USE_GOLD_LINKER=OFF export USE_GLOO=OFF export USE_ITT=OFF export USE_KINETO=OFF -export USE_KLEIDIAI=OFF export USE_LITE_INTERPRETER_PROFILER=OFF export USE_LITE_PROTO=OFF export USE_MAGMA=OFF -export USE_MEM_EFF_ATTENTION=OFF export USE_MKLDNN=OFF export USE_MPI=OFF export USE_NCCL=OFF @@ -564,25 +683,43 @@ export USE_NNPACK=OFF export USE_NUMPY=ON export USE_OPENMP=ON export USE_PYTORCH_QNNPACK=OFF +export USE_QNNPACK=OFF export USE_ROCM=OFF +export USE_SYSTEM_CPUINFO=ON export USE_SYSTEM_SLEEF=ON export USE_SYSTEM_EIGEN_INSTALL=ON -%if %{with onnx} export USE_SYSTEM_ONNX=ON -%endif export USE_SYSTEM_PYBIND11=OFF export USE_SYSTEM_LIBS=OFF -export USE_SYSTEM_NCCL=OFF export USE_TENSORPIPE=OFF -export USE_XNNPACK=OFF -export USE_XPU=OFF +export USE_XNNPACK=ON + +%if %{with pthreadpool} export USE_SYSTEM_PTHREADPOOL=ON -export USE_SYSTEM_CPUINFO=ON +%endif + +%if %{with xnnpack} export USE_SYSTEM_FP16=ON export USE_SYSTEM_FXDIV=ON export USE_SYSTEM_PSIMD=ON -export USE_SYSTEM_XNNPACK=OFF +export USE_SYSTEM_XNNPACK=ON +%endif +%if %{with caffe2} +export BUILD_CAFFE2=ON +%endif + +%if %{with cuda} +%if %{without rocm} +export CUDACXX=/usr/local/cuda/bin/nvcc +export CPLUS_INCLUDE_PATH=/usr/local/cuda/include +export USE_CUDA=ON +# The arches to build for +export TORCH_CUDA_ARCH_LIST="8.0 8.6 8.9 9.0" +%endif +%endif + +%if %{with distributed} export USE_DISTRIBUTED=ON %if %{with tensorpipe} export USE_TENSORPIPE=ON @@ -596,6 +733,11 @@ export USE_SYSTEM_GLOO=ON %if %{with mpi} export USE_MPI=ON %endif +%endif + +%if %{with opencv} +export USE_OPENCV=ON +%endif %if %{with test} export BUILD_TEST=ON @@ -610,73 +752,112 @@ export BUILD_TEST=ON # # See BZ 2244862 + %if %{with rocm} export USE_ROCM=ON -export USE_ROCM_CK_SDPA=OFF -export USE_ROCM_CK_GEMM=OFF -export USE_FBGEMM_GENAI=OFF - -# Magma is broken on ROCm 7 -# export USE_MAGMA=ON export HIP_PATH=`hipconfig -p` export ROCM_PATH=`hipconfig -R` -#RESOURCE_DIR=`%{rocmllvm_bindir}/clang -print-resource-dir` -#export DEVICE_LIB_PATH=${RESOURCE_DIR}/amdgcn/bitcode +export HIP_CLANG_PATH=`hipconfig -l` +RESOURCE_DIR=`${HIP_CLANG_PATH}/clang -print-resource-dir` +export DEVICE_LIB_PATH=${RESOURCE_DIR}/amdgcn/bitcode -# pytorch uses clang, not hipcc -export HIP_CLANG_PATH=%{rocmllvm_bindir} -export PYTORCH_ROCM_ARCH=%{rocm_gpu_list_default} - -%endif - -%if 0%{?fedora} -%pyproject_wheel -%else +gpu=%{rocm_default_gpu} +module load rocm/$gpu +export PYTORCH_ROCM_ARCH=$ROCM_GPUS %py3_build +mv build build-${gpu} +module purge + +%if %{with rocm_loop} +for gpu in %{rocm_gpu_list} +do + module load rocm/$gpu + export PYTORCH_ROCM_ARCH=$ROCM_GPUS + %py3_build + mv build build-${gpu} + module purge +done %endif +%else + +%py3_build + +%endif %install -# pytorch rpm macros -# install -Dpm 644 macros.pytorch \ -# %{buildroot}%{_rpmmacrodir}/macros.pytorch - %if %{with rocm} + export USE_ROCM=ON -export USE_ROCM_CK=OFF export HIP_PATH=`hipconfig -p` export ROCM_PATH=`hipconfig -R` -# RESOURCE_DIR=`%{rocmllvm_bindir}/clang -print-resource-dir` -# export DEVICE_LIB_PATH=${RESOURCE_DIR}/amdgcn/bitcode +export HIP_CLANG_PATH=`hipconfig -l` +RESOURCE_DIR=`${HIP_CLANG_PATH}/clang -print-resource-dir` +export DEVICE_LIB_PATH=${RESOURCE_DIR}/amdgcn/bitcode -# pytorch uses clang, not hipcc -export HIP_CLANG_PATH=%{rocmllvm_bindir} -export PYTORCH_ROCM_ARCH=%{rocm_gpu_list_default} +gpu=%{rocm_default_gpu} +module load rocm/$gpu +export PYTORCH_ROCM_ARCH=$ROCM_GPUS +mv build-${gpu} build +%py3_install +mv build build-${gpu} +module purge +%if %{with rocm_loop} +for gpu in %{rocm_gpu_list} +do + module load rocm/$gpu + export PYTORCH_ROCM_ARCH=$ROCM_GPUS + mv build-${gpu} build + # need to customize the install location, so replace py3_install + %{__python3} %{py_setup} %{?py_setup_args} install -O1 --skip-build --root %{buildroot} --prefix /usr/lib64/rocm/${gpu} %{?*} + rm -rfv %{buildroot}/usr/lib/rocm/${gpu}/bin/__pycache__ + mv build build-${gpu} + module purge +done %endif -%if 0%{?fedora} -%pyproject_install -%pyproject_save_files '*torch*' %else %py3_install + %endif - -%check -# Not working yet -# pyproject_check_import torch - # Do not remote the empty files -%files -n python3-%{pypi_name} + +%files -n python3-%{pypi_name} %license LICENSE %doc README.md +%{_bindir}/convert-caffe2-to-onnx +%{_bindir}/convert-onnx-to-caffe2 %{_bindir}/torchrun -%{python3_sitearch}/%{pypi_name}* +%{python3_sitearch}/%{pypi_name} +%{python3_sitearch}/%{pypi_name}-*.egg-info %{python3_sitearch}/functorch +%{python3_sitearch}/torchgen +%if %{with caffe2} +%{python3_sitearch}/caffe2 +%endif + +%if %{with rocm} +%files -n python3-%{pypi_name}-rocm-gfx8 +%{_libdir}/rocm/gfx8/bin/* +%{_libdir}/rocm/gfx8/lib64/* + +%files -n python3-%{pypi_name}-rocm-gfx9 +%{_libdir}/rocm/gfx9/bin/* +%{_libdir}/rocm/gfx9/lib64/* + +%files -n python3-%{pypi_name}-rocm-gfx10 +%{_libdir}/rocm/gfx10/bin/* +%{_libdir}/rocm/gfx10/lib64/* + +%files -n python3-%{pypi_name}-rocm-gfx11 +%{_libdir}/rocm/gfx11/bin/* +%{_libdir}/rocm/gfx11/lib64/* +%endif %changelog %autochangelog diff --git a/sources b/sources index 9a3681f..0a8bb3b 100644 --- a/sources +++ b/sources @@ -1,19 +1,19 @@ -SHA512 (pytorch-v2.7.0.tar.gz) = 17e875a66f1669901f5f770c9d829ba5bfa3967296cfb71550e8a92507181db742548eaf7cc9a2c478c4b91e366f27cc480e2e1bbb328db8501d30e1649839e6 +SHA512 (pytorch-v2.1.0.tar.gz) = 59421bf6cea6661d61ed66ab16526e3a07162e70e53381cbd5987042917610ec993d2f151fb086f0f98e5a396fe69e82bbc76f840bebffe4ebe7f50458c3aa44 +SHA512 (pytorch-v2.1.2.tar.gz) = b7305407ad9dda877d277a0e7009f65f6d69f39370f2231b8bb8c6a9b711022d2129febdb00f5c83751b6664e01000fe2d30c5e5c13757de89fb8b2b99197a28 +SHA512 (pytorch-975d428.tar.gz) = a02195b18d832db9a739c3eeecd0cd0c8868d8b92e4a2fca42e4bdd20735f0745d84573df28d9ae1db014cf79ffd005a8409b3e8bb92f9db2a446f784ef46ff4 SHA512 (v23.3.3.tar.gz) = 4066c94f2473c7ea16917d29a613e16f840a329089c88e0bdbdb999aef3442ba00abfd2aa92266fa9c067e399dc88e6f0ccac40dc151378857e665638e78bbf0 -SHA512 (v2.13.6.tar.gz) = 497c25b33b09a9c42f67131ab82e35d689e8ce089dd7639be997305ff9a6d502447b79c824508c455d559e61f0186335b54dd2771d903a7c1621833930622d1a +SHA512 (v2.11.1.tar.gz) = ed1512ff0bca3bc0a45edc2eb8c77f8286ab9389f6ff1d5cb309be24bc608abbe0df6a7f5cb18c8f80a3bfa509058547c13551c3cd6a759af708fd0cdcdd9e95 +SHA512 (pytorch-6a89a75.tar.gz) = 6978acc6f37d7c5adc71517a6f379c7133b2bbd040189deddba7753acde41f6ddba2e9f2e397928e89c776d6a5458b8a74f8e04beb312d71fd30b072687ba98f +SHA512 (pytorch-74832f1.tar.gz) = bd553bfbbb422d353bbbf616c201251b2517b905e2621fa05bfe3d97726b078caad377583adccdc0cca234235a11fcb4730a93e834907b2ca4c06d552b2a2683 +SHA512 (pytorch-4bb5cb5.tar.gz) = 430ae996ddee560537787646ae9f7aa01498f37c99c2e3fe4c5f66ee732ee3fe4ecf337fdf857bc0c7fe27634af75cee3ce576bbe2576463b81e27dbbfacf6ef SHA512 (tensorpipe-52791a2.tar.gz) = 1e5faf17a7236c5506c08cb28be16069b11bb929bbca64ed9745ce4277d46739186ab7d6597da7437d90ed2d166d4c37ef2f3bceabe8083ef3adbb0e8e5f227e SHA512 (v1.41.0.tar.gz) = bb08a1970a10e8d9571ffea3d021643de30ec212cd51317b98d6cf0cfe55d6877992921fb01d1188a6d466687335b77885685d924f8cb7200a0bec30eee05c65 SHA512 (libnop-910b558.tar.gz) = 74c5324eaa1b6b2ac8dfef94c835b5c5b044625f8e5efe3522470b1ecc4798ff43d344a013cee2f6901e83267c6167072947b754e63f1552ae7044cffe234c36 -SHA512 (v1.14.2.tar.gz) = 97635bbaf6dd567c201451dfaf7815b2052fe50d9bccc97aade86cfa4a92651374d167296a5453031b2681dc302806a289bca011a9e79ddc381a17d6118971d7 -SHA512 (cpp-httplib-3b6597b.tar.gz) = 8f1090658c498d04f14fec5c2f301847b1f3360bf92b18d82927643ee04ab61a6b274733a01c7850f9c030205120d674d1d961358d49fdd15636736fb8704f55 -SHA512 (kineto-be13176.tar.gz) = 41a08c7da9eea7d12402f80a5550c9d4df79798719cc52b12a507828c8c896ba28a37c35d8adf809ca72589e1d84965d5ef6dd01f3f8dc1c803c5ed67b03a43a -SHA512 (pytorch-a1cb3cc.tar.gz) = 92bf8b2c2ef0b459406b60169ecebdc50652c75943e3d6087e4d261f6e308dbad365529561e0f07ea3f0b71790efb68b5e4ab2f44e270462097208d924dc2d95 -SHA512 (v24.12.23.tar.gz) = f97762ba41b9cfef648e93932fd789324c6bb6ebc5b7aeca8185c9ef602294b67d73aea7ae371035579a1419cbfbeba7c3e88b31b5a5848db98f5e8a03b982b1 -SHA512 (kineto-5e75018.tar.gz) = 921b96a56e01d69895b79e67582d8977ed6f873573ab41557c5d026ada5d1f6365e4ed0a0c6804057c52e92510749fc58619f554a164c1ba9d8cd13e789bebd0 -SHA512 (pytorch-v2.8.0.tar.gz) = 791e658eab87fb957f025558cb9f925078d2426ab7b6f60771d9841dfb691f67d905ba1330a800008efe7c938b6c69bdc52232bccfe8d4860e795a532cd69d28 -SHA512 (v1.18.0.tar.gz) = 2f38664947c8d1efc40620a7c1b1953d2aa4b0a37b67c4886b86e77c1d697363c26413413ddda8eabc545892fb1bcb43afc7e93e62f0901527524a2727e1ea8d -SHA512 (pytorch-715dca6.tar.gz) = 09c9aae54fab3eb17901fc3226fece1c13f41cb8e45a2cb066021823abeb8d27c340993088e01d8e55bb37ed5f94334ec31e6c539cddfacbad157abd27c5e907 -SHA512 (pytorch-fd36458.tar.gz) = acbb7475b92ad4a8e8d779f3745da22d8438e4c5ef2d6e76d71c987789f2752c8aef7022c87c9a74640fe4f9c1f1a61a3f12a796f63b1e6be24da8e5aacf37dc -SHA512 (pytorch-0fabc3b.tar.gz) = 2e87975de0bf6f3dcede168b379e1928712bca16170c2a8ee7d63459f53086c01baac05e0763e4d5d28cdaf1c7d8912225ee06adeff96ead4f6f456ee174b341 -SHA512 (pytorch-v2.9.0.tar.gz) = ae989e3a7fe30f9ea90944dc25e21ca92f2a94ee40d8de974a168c292d82c16ee8920624eff91a85755469ad05473dce0f85893e3ed7794ec5c6bdd89cbd2023 -SHA512 (pytorch-v2.9.1.tar.gz) = 88de0289fa2760abd69bef505b5ae3b6d7ff176b415cbb31bbc89ce5476a3800b322a97c4490f270f8b89657aff931bf9a5516202b268e0bb8b1f63dbb87b34a +SHA512 (pytorch-97ff6cf.tar.gz) = 105ebcba298558fe833f90e7e40b003d35a74609e777f9dc4c47f5668c884f603455113ac0ff252a62b83c81137ae66ceb1a862d351203925dcfc3dcf9f73580 +SHA512 (pytorch-v2.3.0.tar.gz) = 0c2ffc7bf2fd86070e9958c34eca1f03a0248a011ac6ffaeb69f65306ff856edd5359986f02af25888433187e6d7f29b60edded092e2ac30c8cec49023166eda +SHA512 (xnnpack-fcbf55a.tar.gz) = 8063e27686f7b71cfba05b0c004c46db4506638689ffb112f013b3886de58653b60ca5487978c3f96275c17bb1136883ca4c93ddb2241a2c31925a950cb51759 +SHA512 (FXdiv-63058ef.tar.gz) = da33eab4d006645f383a1f24fc3e747db3aeb0613219297ec0ae69aa2617f07ba050ebd6a64a8cbde6d25481f176d0ec3b9753a95d1fbcead2136595f3e50e97 +SHA512 (FP16-0a92994.tar.gz) = 3f094f242425ea37de274eb8539dc5f8ab0c13fd5325d14180ef12e9c04e6002a110d086c4c667f7c8054af337deab096d59482eb95cc8a632c3c412b48e89d1 +SHA512 (psimd-072586a.tar.gz) = a18faea093423dd9fe19ece8b228e011dccce0a2a22222f777ea19b023a13173966d4a8aea01147e8fc58de5d39cffcedeb2221a1572ae52bd5aba1295f86a94 +SHA512 (pthreadpool-4fe0e1e.tar.gz) = 764d81219f2bf1f056983b5c2576f377aeef37f0f2282e74f81bfe1eac5353e175603f80a6647c96165b24ebdcb7bc2189a376e8577ce4319d82679c33750451 +SHA512 (pocketfft-076cb3d.tar.gz) = a5dc5348b2079377787384722bf31be0cc6eed3bfacbf8a7fc3a4bc5d65832deb0da47686c12795c7e925286a3b43f6b5368ee380ddbe839c36edd106f1321a9