diff --git a/.gitignore b/.gitignore index c424df5..3f2501f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,33 +1,2 @@ /pytorch-v2.1.0.tar.gz /pytorch-v2.1.2.tar.gz -/pytorch-975d428.tar.gz -/v23.3.3.tar.gz -/v2.11.1.tar.gz -/pytorch-6a89a75.tar.gz -/pytorch-74832f1.tar.gz -/pytorch-4bb5cb5.tar.gz -/tensorpipe-52791a2.tar.gz -/v1.41.0.tar.gz -/libnop-910b558.tar.gz -/pytorch-97ff6cf.tar.gz -/pytorch-v2.3.0.tar.gz -/pytorch-v2.3.1.tar.gz -/pytorch-v2.4.0.tar.gz -/v1.14.2.tar.gz -/cpp-httplib-3b6597b.tar.gz -/kineto-be13176.tar.gz -/pytorch-v2.4.1.tar.gz -/pytorch-v2.5.0.tar.gz -/pytorch-v2.5.1.tar.gz -/pytorch-v2.7.0.tar.gz -/v2.13.6.tar.gz -/pytorch-a1cb3cc.tar.gz -/v24.12.23.tar.gz -/kineto-5e75018.tar.gz -/pytorch-v2.8.0.tar.gz -/v1.18.0.tar.gz -/pytorch-715dca6.tar.gz -/pytorch-fd36458.tar.gz -/pytorch-0fabc3b.tar.gz -/pytorch-v2.9.0.tar.gz -/pytorch-v2.9.1.tar.gz diff --git a/0001-Add-cmake-variable-USE_ROCM_CK.patch b/0001-Add-cmake-variable-USE_ROCM_CK.patch deleted file mode 100644 index 925e03b..0000000 --- a/0001-Add-cmake-variable-USE_ROCM_CK.patch +++ /dev/null @@ -1,202 +0,0 @@ -From 193854993cd939de186de19589c1add4c4b2cf66 Mon Sep 17 00:00:00 2001 -From: Tom Rix -Date: Mon, 21 Jul 2025 11:35:03 -0700 -Subject: [PATCH] Add cmake variable USE_ROCM_CK - ---- - CMakeLists.txt | 1 + - aten/src/ATen/CMakeLists.txt | 40 ++++++++++++++++----------------- - aten/src/ATen/cuda/CUDABlas.cpp | 22 +++++++++--------- - cmake/Dependencies.cmake | 3 +++ - 4 files changed, 35 insertions(+), 31 deletions(-) - -diff --git a/CMakeLists.txt b/CMakeLists.txt -index a5d25e6afa0f..afc1b53efa64 100644 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -240,6 +240,7 @@ cmake_dependent_option( - BUILD_LAZY_CUDA_LINALG "Build cuda linalg ops as separate library" ON - "USE_CUDA AND LINUX AND BUILD_PYTHON" OFF) - cmake_dependent_option(USE_ROCM "Use ROCm" ON "LINUX" OFF) -+cmake_dependent_option(USE_ROCM_CK "Use ROCm Composable Kernel" ON "USE_ROCM" ON) - option(CAFFE2_STATIC_LINK_CUDA "Statically link CUDA libraries" OFF) - cmake_dependent_option(USE_CUDNN "Use cuDNN" ON "USE_CUDA" OFF) - cmake_dependent_option(USE_STATIC_CUDNN "Use cuDNN static libraries" OFF -diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt -index c9cfd74b501e..59f6178218ee 100644 ---- a/aten/src/ATen/CMakeLists.txt -+++ b/aten/src/ATen/CMakeLists.txt -@@ -373,26 +373,26 @@ if(USE_ROCM) - # is header only, so this should be ok, except that the CMake build generates - # a ck/config.h. We just do that part here. Without this, the ck.h from the - # ROCM SDK may get accidentally used instead. -- function(_pytorch_rocm_generate_ck_conf) -- set(CK_ENABLE_INT8 "ON") -- set(CK_ENABLE_FP16 "ON") -- set(CK_ENABLE_FP32 "ON") -- set(CK_ENABLE_FP64 "ON") -- set(CK_ENABLE_BF16 "ON") -- set(CK_ENABLE_FP8 "ON") -- set(CK_ENABLE_BF8 "ON") -- set(CK_USE_XDL "ON") -- set(CK_USE_WMMA "ON") -- configure_file( -- "${Torch_SOURCE_DIR}/third_party/composable_kernel/include/ck/config.h.in" -- "${CMAKE_CURRENT_BINARY_DIR}/composable_kernel/ck/config.h" -- ) -- endfunction() -+# function(_pytorch_rocm_generate_ck_conf) -+# set(CK_ENABLE_INT8 "ON") -+# set(CK_ENABLE_FP16 "ON") -+# set(CK_ENABLE_FP32 "ON") -+# set(CK_ENABLE_FP64 "ON") -+# set(CK_ENABLE_BF16 "ON") -+# set(CK_ENABLE_FP8 "ON") -+# set(CK_ENABLE_BF8 "ON") -+# set(CK_USE_XDL "ON") -+# set(CK_USE_WMMA "ON") -+# configure_file( -+# "${Torch_SOURCE_DIR}/third_party/composable_kernel/include/ck/config.h.in" -+# "${CMAKE_CURRENT_BINARY_DIR}/composable_kernel/ck/config.h" -+# ) -+# endfunction() - list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/hip) -- list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/include) -- list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/library/include) -- list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/composable_kernel) -- _pytorch_rocm_generate_ck_conf() -+# list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/include) -+# list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/library/include) -+# list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/composable_kernel) -+# _pytorch_rocm_generate_ck_conf() - - # Next two lines are needed because TunableOp uses third-party/fmt - list(APPEND ATen_HIP_INCLUDE $) -@@ -409,7 +409,7 @@ endif() - ${native_quantized_hip_hip} - ${native_transformers_hip_hip} ${native_transformers_src_hip_hip} - ) -- if(WIN32) # Windows doesn't support Composable Kernels -+ if(NOT USE_ROCM_CK) # Windows doesn't support Composable Kernels - file(GLOB native_hip_bgemm "native/hip/bgemm_kernels/*.hip") - file(GLOB native_hip_ck "native/hip/ck*.hip") - exclude(ATen_HIP_SRCS "${ATen_HIP_SRCS}" -diff --git a/aten/src/ATen/cuda/CUDABlas.cpp b/aten/src/ATen/cuda/CUDABlas.cpp -index 89350a11bea7..e5b7960177cf 100644 ---- a/aten/src/ATen/cuda/CUDABlas.cpp -+++ b/aten/src/ATen/cuda/CUDABlas.cpp -@@ -752,7 +752,7 @@ template <> - void bgemm_internal(CUDABLAS_BGEMM_ARGTYPES(double)) - { - if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) { --#ifdef USE_ROCM -+#ifdef USE_ROCM_CK - // hipblaslt does not support double gemm yet - bgemm_internal_cublas(CUDABLAS_BGEMM_ARGS(double)); - #else -@@ -836,7 +836,7 @@ void bgemm_internal(CUDABLAS_BGEMM_ARGTYPES(at::BFloat16)) - bgemm_internal_cublas(CUDABLAS_BGEMM_ARGS(at::BFloat16)); - } - } --#if defined(USE_ROCM) && !defined(_MSC_VER) -+#if defined(USE_ROCM) && defined(USE_ROCM_CK) - else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) { - at::native::bgemm_internal_ck(CUDABLAS_BGEMM_ARGS(at::BFloat16)); - } -@@ -1270,14 +1270,14 @@ template <> - void gemm_internal(CUDABLAS_GEMM_ARGTYPES(double)) - { - if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) { --#ifdef USE_ROCM -+#ifdef USE_ROCM_CK - // hipblaslt does not support double gemm yet - gemm_internal_cublas(CUDABLAS_GEMM_ARGS(double)); - #else - gemm_internal_cublaslt(CUDABLAS_GEMM_ARGS(double)); - #endif - } --#if defined(USE_ROCM) && !defined(_MSC_VER) -+#if defined(USE_ROCM) && defined(USE_ROCM_CK) - else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) { - at::native::gemm_internal_ck(CUDABLAS_GEMM_ARGS(double)); - } -@@ -1293,7 +1293,7 @@ void gemm_internal(CUDABLAS_GEMM_ARGTYPES(float)) - if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) { - gemm_internal_cublaslt(CUDABLAS_GEMM_ARGS(float)); - } --#if defined(USE_ROCM) && !defined(_MSC_VER) -+#if defined(USE_ROCM) && defined(USE_ROCM_CK) - else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) { - if (at::detail::getCUDAHooks().isGPUArch({"gfx1100"})) { //no CK GEMM version for gfx1100 - gemm_internal_cublaslt(CUDABLAS_GEMM_ARGS(float)); -@@ -1311,7 +1311,7 @@ template <> - void gemm_internal>(CUDABLAS_GEMM_ARGTYPES(c10::complex)) - { - if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) { --#ifdef USE_ROCM -+#ifdef USE_ROCM_CK - // hipblaslt does not support complex gemm yet - gemm_internal_cublas>(CUDABLAS_GEMM_ARGS(c10::complex)); - #else -@@ -1327,7 +1327,7 @@ template <> - void gemm_internal>(CUDABLAS_GEMM_ARGTYPES(c10::complex)) - { - if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) { --#ifdef USE_ROCM -+#ifdef USE_ROCM_CK - // hipblaslt does not support complex gemm yet - gemm_internal_cublas>(CUDABLAS_GEMM_ARGS(c10::complex)); - #else -@@ -1345,7 +1345,7 @@ void gemm_internal(CUDABLAS_GEMM_ARGTYPES(at::Half)) - if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) { - gemm_internal_cublaslt(CUDABLAS_GEMM_ARGS(at::Half)); - } --#if defined(USE_ROCM) && !defined(_MSC_VER) -+#if defined(USE_ROCM) && defined(USE_ROCM_CK) - else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) { - at::native::gemm_internal_ck(CUDABLAS_GEMM_ARGS(at::Half)); - } -@@ -1361,7 +1361,7 @@ void gemm_internal(CUDABLAS_GEMM_ARGTYPES(at::BFloat16)) - if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) { - gemm_internal_cublaslt(CUDABLAS_GEMM_ARGS(at::BFloat16)); - } --#if defined(USE_ROCM) && !defined(_MSC_VER) -+#if defined(USE_ROCM) && defined(USE_ROCM_CK) - else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) { - at::native::gemm_internal_ck(CUDABLAS_GEMM_ARGS(at::BFloat16)); - } -@@ -1382,7 +1382,7 @@ void gemm_internal(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(at::Half, - if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) { - gemm_internal_cublaslt(CUDABLAS_GEMM_ARGS(at::Half)); - } --#if defined(USE_ROCM) && !defined(_MSC_VER) -+#if defined(USE_ROCM) && defined(USE_ROCM_CK) - else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) { - TORCH_CHECK(false, "gemm input type at::Half and output type float is not supported for ROCm"); - } -@@ -1398,7 +1398,7 @@ void gemm_internal(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(at::B - if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) { - gemm_internal_cublaslt(CUDABLAS_GEMM_ARGS(at::BFloat16)); - } --#if defined(USE_ROCM) && !defined(_MSC_VER) -+#if defined(USE_ROCM) && defined(USE_ROCM_CK) - else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) { - TORCH_CHECK(false, "gemm input type at::Half and output type float is not supported for ROCm"); - } -diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake -index a93386c27f8d..be1368999d38 100644 ---- a/cmake/Dependencies.cmake -+++ b/cmake/Dependencies.cmake -@@ -1031,6 +1031,9 @@ if(USE_ROCM) - if(HIPBLASLT_VEC_EXT) - list(APPEND HIP_CXX_FLAGS -DHIPBLASLT_VEC_EXT) - endif() -+ if(USE_ROCM_CK) -+ list(APPEND HIP_CXX_FLAGS -DUSE_ROCM_CK) -+ endif() - list(APPEND HIP_HIPCC_FLAGS --offload-compress) - if(WIN32) - add_definitions(-DROCM_ON_WINDOWS) --- -2.49.0 - diff --git a/0001-Fix-compilation-and-import-torch-issues-for-cpython-.patch b/0001-Fix-compilation-and-import-torch-issues-for-cpython-.patch deleted file mode 100644 index b6a282c..0000000 --- a/0001-Fix-compilation-and-import-torch-issues-for-cpython-.patch +++ /dev/null @@ -1,359 +0,0 @@ -From f2a544b2e3a5bdc04985f6e06223c0c1700120a0 Mon Sep 17 00:00:00 2001 -From: albanD -Date: Sat, 12 Jul 2025 03:42:33 -0400 -Subject: [PATCH] Fix compilation and "import torch" issues for cpython 3.14 - -Imported from -https://github.com/albanD/pytorch/tree/cpython314_build -commit 88bb9cdb72449f4277829e20d94ad8aec1894216 - -Signed-off-by: Tom Rix ---- - torch/_dynamo/bytecode_analysis.py | 2 +- - torch/ao/quantization/__init__.py | 5 +++- - torch/ao/quantization/qconfig.py | 4 ++- - torch/ao/quantization/utils.py | 7 +++-- - torch/csrc/dynamo/cpython_defs.c | 16 +++++++++++ - torch/csrc/dynamo/cpython_includes.h | 17 ++++++++++++ - torch/csrc/dynamo/eval_frame.c | 34 +++++++++++++++-------- - torch/csrc/dynamo/framelocals_mapping.cpp | 14 ++++++++++ - torch/csrc/utils/python_compat.h | 1 + - torch/onnx/__init__.py | 1 - - torch/utils/weak.py | 29 +++++++++++++++++-- - 11 files changed, 111 insertions(+), 19 deletions(-) - -diff --git a/torch/_dynamo/bytecode_analysis.py b/torch/_dynamo/bytecode_analysis.py -index 3252ea91409f..2de74ee5bf8d 100644 ---- a/torch/_dynamo/bytecode_analysis.py -+++ b/torch/_dynamo/bytecode_analysis.py -@@ -33,7 +33,7 @@ if sys.version_info >= (3, 11): - TERMINAL_OPCODES.add(dis.opmap["JUMP_FORWARD"]) - else: - TERMINAL_OPCODES.add(dis.opmap["JUMP_ABSOLUTE"]) --if sys.version_info >= (3, 12): -+if (3, 12) <= sys.version_info < (3, 14): - TERMINAL_OPCODES.add(dis.opmap["RETURN_CONST"]) - if sys.version_info >= (3, 13): - TERMINAL_OPCODES.add(dis.opmap["JUMP_BACKWARD_NO_INTERRUPT"]) -diff --git a/torch/ao/quantization/__init__.py b/torch/ao/quantization/__init__.py -index ffc1792fd23f..cf5a8b99a894 100644 ---- a/torch/ao/quantization/__init__.py -+++ b/torch/ao/quantization/__init__.py -@@ -1,5 +1,6 @@ - # mypy: allow-untyped-defs - -+import sys - from typing import Callable, Optional, Union - - import torch -@@ -33,7 +34,9 @@ from .stubs import * # noqa: F403 - - # ensure __module__ is set correctly for public APIs - ObserverOrFakeQuantize = Union[ObserverBase, FakeQuantizeBase] --ObserverOrFakeQuantize.__module__ = "torch.ao.quantization" -+if sys.version_info < (3, 14): -+ ObserverOrFakeQuantize.__module__ = "torch.ao.quantization" -+ - for _f in [ - compare_results, - extract_results_from_loggers, -diff --git a/torch/ao/quantization/qconfig.py b/torch/ao/quantization/qconfig.py -index efee5302ad42..d9a8fc78bab4 100644 ---- a/torch/ao/quantization/qconfig.py -+++ b/torch/ao/quantization/qconfig.py -@@ -1,5 +1,6 @@ - # mypy: allow-untyped-defs - import copy -+import sys - import warnings - from collections import namedtuple - from typing import Any, Optional, Union -@@ -568,7 +569,8 @@ def _assert_valid_qconfig(qconfig: Optional[QConfig], mod: torch.nn.Module) -> N - - - QConfigAny = Optional[QConfig] --QConfigAny.__module__ = "torch.ao.quantization.qconfig" -+if sys.version_info < (3, 14): -+ QConfigAny.__module__ = "torch.ao.quantization.qconfig" - - - def _add_module_to_qconfig_obs_ctr( -diff --git a/torch/ao/quantization/utils.py b/torch/ao/quantization/utils.py -index 4ac3112ec072..3b1503e01701 100644 ---- a/torch/ao/quantization/utils.py -+++ b/torch/ao/quantization/utils.py -@@ -4,6 +4,7 @@ Utils shared by different modes of quantization (eager/graph) - """ - - import functools -+import sys - import warnings - from collections import OrderedDict - from inspect import getfullargspec, signature -@@ -16,7 +17,8 @@ from torch.nn.utils.parametrize import is_parametrized - - - NodePattern = Union[tuple[Node, Node], tuple[Node, tuple[Node, Node]], Any] --NodePattern.__module__ = "torch.ao.quantization.utils" -+if sys.version_info < (3, 14): -+ NodePattern.__module__ = "torch.ao.quantization.utils" - - # This is the Quantizer class instance from torch/quantization/fx/quantize.py. - # Define separately to prevent circular imports. -@@ -31,7 +33,8 @@ QuantizerCls = Any - Pattern = Union[ - Callable, tuple[Callable, Callable], tuple[Callable, tuple[Callable, Callable]], Any - ] --Pattern.__module__ = "torch.ao.quantization.utils" -+if sys.version_info < (3, 14): -+ Pattern.__module__ = "torch.ao.quantization.utils" - - - # TODO: maybe rename this to MatchInputNode -diff --git a/torch/csrc/dynamo/cpython_defs.c b/torch/csrc/dynamo/cpython_defs.c -index b68ef894aeaa..244d4165d5e8 100644 ---- a/torch/csrc/dynamo/cpython_defs.c -+++ b/torch/csrc/dynamo/cpython_defs.c -@@ -2,6 +2,20 @@ - #include - #include - -+#if IS_PYTHON_3_14_PLUS -+ -+const uint8_t* THP_PyOpcode_Caches = NULL; -+const int THP_PyOpcode_Caches_size = 0; -+ -+void -+THP_PyThreadState_PopFrame(PyThreadState *tstate, _PyInterpreterFrame * frame) -+{} -+void -+THP_PyFrame_Clear(_PyInterpreterFrame *frame) -+{} -+ -+#else -+ - #if IS_PYTHON_3_11_PLUS - - #define Py_BUILD_CORE -@@ -360,3 +374,5 @@ const uint8_t* THP_PyOpcode_Caches = NULL; - const int THP_PyOpcode_Caches_size = 0; - - #endif -+ -+#endif // IS_PYTHON_3_14_PLUS -\ No newline at end of file -diff --git a/torch/csrc/dynamo/cpython_includes.h b/torch/csrc/dynamo/cpython_includes.h -index 6b99c1d5aec8..616be16563cf 100644 ---- a/torch/csrc/dynamo/cpython_includes.h -+++ b/torch/csrc/dynamo/cpython_includes.h -@@ -21,6 +21,14 @@ - - #if IS_PYTHON_3_11_PLUS - #include -+#if IS_PYTHON_3_14_PLUS -+#include -+#include -+#endif -+#endif -+ -+#if IS_PYTHON_3_14_PLUS -+#include - #endif - - #undef Py_BUILD_CORE -@@ -30,6 +38,13 @@ - extern "C" { - #endif - -+#if IS_PYTHON_3_14_PLUS -+ -+#define F_CODE(x) (PyCodeObject*)PyStackRef_AsPyObjectBorrow(x->f_executable) -+#define PREV_INSTR(x) (x)->instr_ptr -+ -+#else -+ - #if IS_PYTHON_3_13_PLUS - #define F_CODE(x) ((PyCodeObject*)(x)->f_executable) - #define PREV_INSTR(x) (x)->instr_ptr -@@ -38,6 +53,8 @@ extern "C" { - #define PREV_INSTR(x) (x)->prev_instr - #endif - -+#endif // IS_PYTHON_3_14_PLUS -+ - #if IS_PYTHON_3_12_PLUS - #define FUNC(x) ((x)->f_funcobj) - #else -diff --git a/torch/csrc/dynamo/eval_frame.c b/torch/csrc/dynamo/eval_frame.c -index f413782b2d30..72bb8839bac3 100644 ---- a/torch/csrc/dynamo/eval_frame.c -+++ b/torch/csrc/dynamo/eval_frame.c -@@ -224,17 +224,6 @@ const char* get_frame_name(THP_EVAL_API_FRAME_OBJECT* frame) { - return PyUnicode_AsUTF8(F_CODE(frame)->co_name); - } - --void clear_old_frame_if_python_312_plus( -- PyThreadState* tstate, -- THP_EVAL_API_FRAME_OBJECT* frame) { --#if IS_PYTHON_3_12_PLUS -- -- THP_PyFrame_Clear(frame); -- THP_PyThreadState_PopFrame(tstate, frame); -- --#endif --} -- - static PyObject* dynamo_eval_custom_code_impl( - PyThreadState* tstate, - THP_EVAL_API_FRAME_OBJECT* frame, -@@ -485,6 +474,18 @@ static PyObject* dynamo__custom_eval_frame_shim( - - static void enable_eval_frame_shim(PyThreadState* tstate) {} - static void enable_eval_frame_default(PyThreadState* tstate) {} -+PyObject* dynamo_eval_custom_code( -+ PyThreadState* tstate, -+ THP_EVAL_API_FRAME_OBJECT* frame, -+ PyCodeObject* code, -+ const char* trace_annotation, -+ int throw_flag) {} -+THPPyInterpreterFrame* THPPyInterpreterFrame_New( -+ THP_EVAL_API_FRAME_OBJECT* frame) {} -+PyObject* dynamo_eval_frame_default( -+ PyThreadState* tstate, -+ THP_EVAL_API_FRAME_OBJECT* frame, -+ int throw_flag) {} - - static struct PyGetSetDef THPPyInterpreterFrame_properties[] = {NULL}; - -@@ -498,6 +499,17 @@ static PyTypeObject THPPyInterpreterFrameType = { - - #endif // !(IS_PYTHON_3_14_PLUS) - -+void clear_old_frame_if_python_312_plus( -+ PyThreadState* tstate, -+ THP_EVAL_API_FRAME_OBJECT* frame) { -+#if IS_PYTHON_3_12_PLUS -+ -+ THP_PyFrame_Clear(frame); -+ THP_PyThreadState_PopFrame(tstate, frame); -+ -+#endif -+} -+ - static PyObject* increment_working_threads( - PyThreadState* tstate, - PyObject* module) { -diff --git a/torch/csrc/dynamo/framelocals_mapping.cpp b/torch/csrc/dynamo/framelocals_mapping.cpp -index b839fb26fc91..c4ee36d87767 100644 ---- a/torch/csrc/dynamo/framelocals_mapping.cpp -+++ b/torch/csrc/dynamo/framelocals_mapping.cpp -@@ -26,9 +26,13 @@ FrameLocalsMapping::FrameLocalsMapping(FrameLocalsFrameType* frame) - PyCodeObject* co = F_CODE(frame); - _framelocals.resize(co->co_nlocalsplus, nullptr); - -+#if IS_PYTHON_3_14_PLUS -+ TORCH_CHECK(false, "Python 3.14+ not supported"); -+#else - if (!frame->stacktop) { - return; - } -+#endif - - auto update_framelocals = [&](int i, PyObject* value) { - _PyLocals_Kind kind = _PyLocals_GetKind(co->co_localspluskinds, i); -@@ -53,11 +57,21 @@ FrameLocalsMapping::FrameLocalsMapping(FrameLocalsFrameType* frame) - }; - - auto offset = co->co_nlocalsplus - co->co_nfreevars; -+#if IS_PYTHON_3_14_PLUS -+ TORCH_CHECK(false, "Python 3.14+ not supported"); -+#else - for (int i = 0; i < offset; i++) { - update_framelocals(i, frame->localsplus[i]); - } -+#endif -+ - // Get references to closure variables -+#if IS_PYTHON_3_14_PLUS -+ PyObject* closure; -+ TORCH_CHECK(false, "Python 3.14+ not supported"); -+#else - PyObject* closure = ((PyFunctionObject*)FUNC(frame))->func_closure; -+#endif - for (int i = 0; i < co->co_nfreevars; i++) { - update_framelocals(offset + i, PyTuple_GET_ITEM(closure, i)); - } -diff --git a/torch/csrc/utils/python_compat.h b/torch/csrc/utils/python_compat.h -index a1537611cc47..16292e4fd030 100644 ---- a/torch/csrc/utils/python_compat.h -+++ b/torch/csrc/utils/python_compat.h -@@ -13,6 +13,7 @@ extern "C" { - #define IS_PYTHON_3_12_PLUS PY_VERSION_HEX >= 0x030C0000 - #define IS_PYTHON_3_13_PLUS PY_VERSION_HEX >= 0x030D0000 - #define IS_PYTHON_3_14_PLUS PY_VERSION_HEX >= 0x030E0000 -+#define IS_PYTHON_3_15_PLUS PY_VERSION_HEX >= 0x030F0000 - - static inline int PyCode_GetNCellvars(PyCodeObject* code) { - // gh-26364 added co_ncellvars to Python 3.11.0rc1 -diff --git a/torch/onnx/__init__.py b/torch/onnx/__init__.py -index 345ffd2a065b..ceeadde5365b 100644 ---- a/torch/onnx/__init__.py -+++ b/torch/onnx/__init__.py -@@ -104,7 +104,6 @@ ONNXProgram.__module__ = "torch.onnx" - OnnxExporterError.__module__ = "torch.onnx" - _OrtBackend.__module__ = "torch.onnx" - _OrtBackendOptions.__module__ = "torch.onnx" --_OrtExecutionProvider.__module__ = "torch.onnx" - enable_fake_mode.__module__ = "torch.onnx" - is_onnxrt_backend_supported.__module__ = "torch.onnx" - -diff --git a/torch/utils/weak.py b/torch/utils/weak.py -index 8bf2ba5ed02b..9c7218cb2ad3 100644 ---- a/torch/utils/weak.py -+++ b/torch/utils/weak.py -@@ -3,8 +3,6 @@ from __future__ import annotations - - import collections.abc as _collections_abc - import weakref -- --from _weakrefset import _IterationGuard # type: ignore[attr-defined] - from collections.abc import Mapping, MutableMapping - from weakref import ref - -@@ -22,6 +20,33 @@ __all__ = [ - ] - - -+# TODO: make weakref properly thread safe following -+# https://github.com/python/cpython/pull/125325 -+class _IterationGuard: -+ # This context manager registers itself in the current iterators of the -+ # weak container, such as to delay all removals until the context manager -+ # exits. -+ # This technique should be relatively thread-safe (since sets are). -+ -+ def __init__(self, weakcontainer): -+ # Don't create cycles -+ self.weakcontainer = ref(weakcontainer) -+ -+ def __enter__(self): -+ w = self.weakcontainer() -+ if w is not None: -+ w._iterating.add(self) -+ return self -+ -+ def __exit__(self, e, t, b): -+ w = self.weakcontainer() -+ if w is not None: -+ s = w._iterating -+ s.remove(self) -+ if not s: -+ w._commit_removals() -+ -+ - # This file defines a variant of WeakKeyDictionary that overrides the hashing - # behavior of the key to use object identity, rather than the builtin - # __eq__/__hash__ functions. This is useful for Tensor weak keys, as their --- -2.49.0 - diff --git a/0001-Optionally-use-hipblaslt.patch b/0001-Optionally-use-hipblaslt.patch new file mode 100644 index 0000000..56434a7 --- /dev/null +++ b/0001-Optionally-use-hipblaslt.patch @@ -0,0 +1,262 @@ +From d77e05d90df006322cda021f1a8affdcc2c7eaef Mon Sep 17 00:00:00 2001 +From: Tom Rix +Date: Fri, 23 Feb 2024 08:27:30 -0500 +Subject: [PATCH] Optionally use hipblaslt + +The hipblaslt package is not available on Fedora. +Instead of requiring the package, make it optional. +If it is found, define the preprocessor variable HIPBLASLT +Convert the checks for ROCM_VERSION >= 507000 to HIPBLASLT checks + +Signed-off-by: Tom Rix +--- + aten/src/ATen/cuda/CUDABlas.cpp | 7 ++++--- + aten/src/ATen/cuda/CUDABlas.h | 2 +- + aten/src/ATen/cuda/CUDAContextLight.h | 4 ++-- + aten/src/ATen/cuda/CublasHandlePool.cpp | 4 ++-- + aten/src/ATen/cuda/tunable/TunableGemm.h | 6 +++--- + aten/src/ATen/native/cuda/Blas.cpp | 14 ++++++++------ + cmake/Dependencies.cmake | 3 +++ + cmake/public/LoadHIP.cmake | 4 ++-- + 8 files changed, 25 insertions(+), 19 deletions(-) + +diff --git a/aten/src/ATen/cuda/CUDABlas.cpp b/aten/src/ATen/cuda/CUDABlas.cpp +index d534ec5a178..e815463f630 100644 +--- a/aten/src/ATen/cuda/CUDABlas.cpp ++++ b/aten/src/ATen/cuda/CUDABlas.cpp +@@ -14,7 +14,7 @@ + #include + + #ifdef USE_ROCM +-#if ROCM_VERSION >= 60000 ++#ifdef HIPBLASLT + #include + #endif + // until hipblas has an API to accept flags, we must use rocblas here +@@ -781,7 +781,7 @@ void gemm(CUDABLAS_GEMM_ARGTYPES(at::BFloat16)) { + } + } + +-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700) ++#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT)) + + #if defined(USE_ROCM) && ROCM_VERSION >= 50700 && ROCM_VERSION < 60000 + // only for rocm 5.7 where we first supported hipblaslt, it was difficult +@@ -912,6 +912,7 @@ class CuBlasLtMatmulPreference : public CuBlasLtDescriptor< + }; + } // namespace + ++#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT)) + template + void gemm_and_bias( + bool transpose_mat1, +@@ -1124,7 +1125,7 @@ template void gemm_and_bias( + at::BFloat16* result_ptr, + int64_t result_ld, + GEMMAndBiasActivationEpilogue activation); +- ++#endif + void scaled_gemm( + char transa, + char transb, +diff --git a/aten/src/ATen/cuda/CUDABlas.h b/aten/src/ATen/cuda/CUDABlas.h +index eb12bb350c5..068607467dd 100644 +--- a/aten/src/ATen/cuda/CUDABlas.h ++++ b/aten/src/ATen/cuda/CUDABlas.h +@@ -82,7 +82,7 @@ void gemm_internal(CUDABLAS_GEMM_ARGTYPES(at::Half)); + template <> + void gemm_internal(CUDABLAS_GEMM_ARGTYPES(at::BFloat16)); + +-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700) ++#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT)) + enum GEMMAndBiasActivationEpilogue { + None, + RELU, +diff --git a/aten/src/ATen/cuda/CUDAContextLight.h b/aten/src/ATen/cuda/CUDAContextLight.h +index 4ec35f59a21..e28dc42034f 100644 +--- a/aten/src/ATen/cuda/CUDAContextLight.h ++++ b/aten/src/ATen/cuda/CUDAContextLight.h +@@ -9,7 +9,7 @@ + + // cublasLT was introduced in CUDA 10.1 but we enable only for 11.1 that also + // added bf16 support +-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700) ++#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT)) + #include + #endif + +@@ -82,7 +82,7 @@ TORCH_CUDA_CPP_API c10::Allocator* getCUDADeviceAllocator(); + /* Handles */ + TORCH_CUDA_CPP_API cusparseHandle_t getCurrentCUDASparseHandle(); + TORCH_CUDA_CPP_API cublasHandle_t getCurrentCUDABlasHandle(); +-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700) ++#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT)) + TORCH_CUDA_CPP_API cublasLtHandle_t getCurrentCUDABlasLtHandle(); + #endif + +diff --git a/aten/src/ATen/cuda/CublasHandlePool.cpp b/aten/src/ATen/cuda/CublasHandlePool.cpp +index 6913d2cd95e..3d4276be372 100644 +--- a/aten/src/ATen/cuda/CublasHandlePool.cpp ++++ b/aten/src/ATen/cuda/CublasHandlePool.cpp +@@ -29,7 +29,7 @@ namespace at::cuda { + + namespace { + +-#if defined(USE_ROCM) && ROCM_VERSION >= 50700 ++#if defined(USE_ROCM) && defined(HIPBLASLT) + void createCublasLtHandle(cublasLtHandle_t *handle) { + TORCH_CUDABLAS_CHECK(cublasLtCreate(handle)); + } +@@ -190,7 +190,7 @@ cublasHandle_t getCurrentCUDABlasHandle() { + return handle; + } + +-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700) ++#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT)) + cublasLtHandle_t getCurrentCUDABlasLtHandle() { + #ifdef USE_ROCM + c10::DeviceIndex device = 0; +diff --git a/aten/src/ATen/cuda/tunable/TunableGemm.h b/aten/src/ATen/cuda/tunable/TunableGemm.h +index 3ba0d761277..dde1870cfbf 100644 +--- a/aten/src/ATen/cuda/tunable/TunableGemm.h ++++ b/aten/src/ATen/cuda/tunable/TunableGemm.h +@@ -11,7 +11,7 @@ + + #include + #ifdef USE_ROCM +-#if ROCM_VERSION >= 50700 ++#ifdef HIPBLASLT + #include + #endif + #include +@@ -166,7 +166,7 @@ class GemmTunableOp : public TunableOp, StreamTimer> { + } + #endif + +-#if defined(USE_ROCM) && ROCM_VERSION >= 50700 ++#if defined(USE_ROCM) && defined(HIPBLASLT) + static const char *env = std::getenv("PYTORCH_TUNABLEOP_HIPBLASLT_ENABLED"); + if (env == nullptr || strcmp(env, "1") == 0) { + // disallow tuning of hipblaslt with c10::complex +@@ -240,7 +240,7 @@ class GemmStridedBatchedTunableOp : public TunableOp + } + #endif + +-#if defined(USE_ROCM) && ROCM_VERSION >= 50700 ++#if defined(USE_ROCM) && defined(HIPBLASLT) + static const char *env = std::getenv("PYTORCH_TUNABLEOP_HIPBLASLT_ENABLED"); + if (env == nullptr || strcmp(env, "1") == 0) { + // disallow tuning of hipblaslt with c10::complex +diff --git a/aten/src/ATen/native/cuda/Blas.cpp b/aten/src/ATen/native/cuda/Blas.cpp +index 29e5c5e3cf1..df56f3d7f1d 100644 +--- a/aten/src/ATen/native/cuda/Blas.cpp ++++ b/aten/src/ATen/native/cuda/Blas.cpp +@@ -155,7 +155,7 @@ enum class Activation { + GELU, + }; + +-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700) ++#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT)) + cuda::blas::GEMMAndBiasActivationEpilogue activation_to_gemm_and_blas_arg(Activation a) { + switch (a) { + case Activation::None: +@@ -193,6 +193,7 @@ static bool getDisableAddmmCudaLt() { + + #ifdef USE_ROCM + static bool isSupportedHipLtROCmArch(int index) { ++#if defined(HIPBLASLT) + hipDeviceProp_t* prop = at::cuda::getDeviceProperties(index); + std::string device_arch = prop->gcnArchName; + static const std::vector archs = {"gfx90a", "gfx940", "gfx941", "gfx942"}; +@@ -203,6 +204,7 @@ static bool isSupportedHipLtROCmArch(int index) { + } + } + TORCH_CHECK(false, "Attempting to use hipBLASLt on a unsupported architecture!"); ++#endif + return false; + } + #endif +@@ -228,7 +230,7 @@ Tensor& addmm_out_cuda_impl(Tensor& result, const Tensor& self, const Tensor& ma + at::ScalarType scalar_type = self.scalar_type(); + c10::MaybeOwned self_; + if (&result != &self) { +-#if (defined(CUDA_VERSION) && CUDA_VERSION >= 11040 && !defined(_MSC_VER)) || defined(USE_ROCM) && ROCM_VERSION >= 50700 ++#if (defined(CUDA_VERSION) && CUDA_VERSION >= 11040 && !defined(_MSC_VER)) || defined(USE_ROCM) && defined(HIPBLASLT) + // Strangely, if mat2 has only 1 row or column, we get + // CUBLAS_STATUS_INVALID_VALUE error from cublasLtMatmulAlgoGetHeuristic. + // self.dim() == 1 && result.dim() == 2 && self.sizes()[0] == mat2_sizes[1] +@@ -271,7 +273,7 @@ Tensor& addmm_out_cuda_impl(Tensor& result, const Tensor& self, const Tensor& ma + } + self__sizes = self_->sizes(); + } else { +-#if defined(USE_ROCM) && ROCM_VERSION >= 50700 ++#if defined(USE_ROCM) && defined(HIPBLASLT) + useLtInterface = !disable_addmm_cuda_lt && + result.dim() == 2 && result.is_contiguous() && + isSupportedHipLtROCmArch(self.device().index()) && +@@ -322,7 +324,7 @@ Tensor& addmm_out_cuda_impl(Tensor& result, const Tensor& self, const Tensor& ma + + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(!args.result->is_conj()); + +-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700) ++#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT)) + if (useLtInterface) { + AT_DISPATCH_FLOATING_TYPES_AND2( + at::ScalarType::Half, +@@ -876,7 +878,7 @@ _scaled_mm_out_cuda(const Tensor& mat1, const Tensor& mat2, + at::native::resize_output(out, {mat1_sizes[0], mat2_sizes[1]}); + at::native::resize_output(amax, {}); + +-#if !defined(USE_ROCM) && !defined(_MSC_VER) || (defined(USE_ROCM) && ROCM_VERSION >= 60000) ++#if !defined(USE_ROCM) && !defined(_MSC_VER) || (defined(USE_ROCM) && defined(HIPBLASLT)) + cublasCommonArgs args(mat1, mat2, out); + const auto out_dtype_ = args.result->scalar_type(); + TORCH_CHECK(args.transa == 't' && args.transb == 'n', "Only multiplication of row-major and column-major matrices is supported by cuBLASLt"); +@@ -906,7 +908,7 @@ _scaled_mm_out_cuda(const Tensor& mat1, const Tensor& mat2, + TORCH_CHECK(false, "_scaled_mm_out_cuda is not compiled for this platform."); + #endif + +-#if defined(USE_ROCM) && ROCM_VERSION >= 60000 ++#if defined(USE_ROCM) && defined(HIPBLASLT) + // rocm's hipblaslt does not yet support amax, so calculate separately + auto out_float32 = out.to(kFloat); + out_float32.abs_(); +diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake +index b7ffbeb07dc..2b6c3678984 100644 +--- a/cmake/Dependencies.cmake ++++ b/cmake/Dependencies.cmake +@@ -1273,6 +1273,9 @@ if(USE_ROCM) + if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "6.0.0") + list(APPEND HIP_CXX_FLAGS -DHIPBLAS_V2) + endif() ++ if(hipblast_FOUND) ++ list(APPEND HIP_CXX_FLAGS -DHIPBLASLT) ++ endif() + if(HIPBLASLT_CUSTOM_DATA_TYPE) + list(APPEND HIP_CXX_FLAGS -DHIPBLASLT_CUSTOM_DATA_TYPE) + endif() +diff --git a/cmake/public/LoadHIP.cmake b/cmake/public/LoadHIP.cmake +index f6ca263c5e5..53eb0b63c1a 100644 +--- a/cmake/public/LoadHIP.cmake ++++ b/cmake/public/LoadHIP.cmake +@@ -156,7 +156,7 @@ if(HIP_FOUND) + find_package_and_print_version(rocblas REQUIRED) + find_package_and_print_version(hipblas REQUIRED) + if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "5.7.0") +- find_package_and_print_version(hipblaslt REQUIRED) ++ find_package_and_print_version(hipblaslt) + endif() + find_package_and_print_version(miopen REQUIRED) + if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "4.1.0") +@@ -191,7 +191,7 @@ if(HIP_FOUND) + # roctx is part of roctracer + find_library(ROCM_ROCTX_LIB roctx64 HINTS ${ROCM_PATH}/lib) + +- if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "5.7.0") ++ if(hipblastlt_FOUND) + # check whether hipblaslt is using its own datatype + set(file "${PROJECT_BINARY_DIR}/hipblaslt_test_data_type.cc") + file(WRITE ${file} "" +-- +2.43.2 + diff --git a/0001-Regenerate-flatbuffer-header.patch b/0001-Regenerate-flatbuffer-header.patch new file mode 100644 index 0000000..4eec491 --- /dev/null +++ b/0001-Regenerate-flatbuffer-header.patch @@ -0,0 +1,39 @@ +From 5b8e51b24513fa851eeff42f23d942bde301e321 Mon Sep 17 00:00:00 2001 +From: Tom Rix +Date: Fri, 29 Sep 2023 06:19:29 -0700 +Subject: [PATCH] Regenerate flatbuffer header + +For this error +torch/csrc/jit/serialization/mobile_bytecode_generated.h:12:41: +error: static assertion failed: Non-compatible flatbuffers version included + 12 | FLATBUFFERS_VERSION_MINOR == 3 && + +PyTorch is expecting 23.3.3, what f38 has +Rawhide is at 23.5.26 + +Regenerate with +flatc --cpp --gen-mutable --no-prefix --scoped-enums mobile_bytecode.fbs + +Signed-off-by: Tom Rix +--- + torch/csrc/jit/serialization/mobile_bytecode_generated.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/torch/csrc/jit/serialization/mobile_bytecode_generated.h b/torch/csrc/jit/serialization/mobile_bytecode_generated.h +index cffe8bc7a6..83575e4c19 100644 +--- a/torch/csrc/jit/serialization/mobile_bytecode_generated.h ++++ b/torch/csrc/jit/serialization/mobile_bytecode_generated.h +@@ -9,8 +9,8 @@ + // Ensure the included flatbuffers.h is the same version as when this file was + // generated, otherwise it may not be compatible. + static_assert(FLATBUFFERS_VERSION_MAJOR == 23 && +- FLATBUFFERS_VERSION_MINOR == 3 && +- FLATBUFFERS_VERSION_REVISION == 3, ++ FLATBUFFERS_VERSION_MINOR == 5 && ++ FLATBUFFERS_VERSION_REVISION == 26, + "Non-compatible flatbuffers version included"); + + namespace torch { +-- +2.43.0 + diff --git a/0001-Stub-in-kineto-ActivityType.patch b/0001-Stub-in-kineto-ActivityType.patch new file mode 100644 index 0000000..f088645 --- /dev/null +++ b/0001-Stub-in-kineto-ActivityType.patch @@ -0,0 +1,73 @@ +From 3ef82b814179da571b2478f61d4279717ab0b23a Mon Sep 17 00:00:00 2001 +From: Tom Rix +Date: Fri, 29 Sep 2023 06:25:23 -0700 +Subject: [PATCH] Stub in kineto ActivityType + +There is an error with kineto is not used, the shim still +requires the ActivityTYpe.h header to get the enum Activity type. +So cut-n-paste just enough of the header in to do this. + +Signed-off-by: Tom Rix +--- + torch/csrc/profiler/kineto_shim.h | 44 +++++++++++++++++++++++++++++++ + 1 file changed, 44 insertions(+) + +diff --git a/torch/csrc/profiler/kineto_shim.h b/torch/csrc/profiler/kineto_shim.h +index e92cbf003d..68985ab7d0 100644 +--- a/torch/csrc/profiler/kineto_shim.h ++++ b/torch/csrc/profiler/kineto_shim.h +@@ -12,7 +12,51 @@ + #undef USE_KINETO + #endif + ++#ifdef USE_KINETO + #include ++#else ++namespace libkineto { ++// copied from header ++/* ++ * Copyright (c) Meta Platforms, Inc. and affiliates. ++ * All rights reserved. ++ * ++ * This source code is licensed under the BSD-style license found in the ++ * LICENSE file in the root directory of this source tree. ++ */ ++ ++// Note : All activity types are not enabled by default. Please add them ++// at correct position in the enum ++enum class ActivityType { ++ // Activity types enabled by default ++ CPU_OP = 0, // cpu side ops ++ USER_ANNOTATION, ++ GPU_USER_ANNOTATION, ++ GPU_MEMCPY, ++ GPU_MEMSET, ++ CONCURRENT_KERNEL, // on-device kernels ++ EXTERNAL_CORRELATION, ++ CUDA_RUNTIME, // host side cuda runtime events ++ CUDA_DRIVER, // host side cuda driver events ++ CPU_INSTANT_EVENT, // host side point-like events ++ PYTHON_FUNCTION, ++ OVERHEAD, // CUPTI induced overhead events sampled from its overhead API. ++ ++ // Optional Activity types ++ CUDA_SYNC, // synchronization events between runtime and kernels ++ GLOW_RUNTIME, // host side glow runtime events ++ MTIA_RUNTIME, // host side MTIA runtime events ++ CUDA_PROFILER_RANGE, // CUPTI Profiler range for performance metrics ++ MTIA_CCP_EVENTS, // MTIA ondevice CCP events ++ HPU_OP, // HPU host side runtime event ++ XPU_RUNTIME, // host side xpu runtime events ++ ++ ENUM_COUNT, // This is to add buffer and not used for any profiling logic. Add your new type before it. ++ OPTIONAL_ACTIVITY_TYPE_START = CUDA_SYNC, ++}; ++} ++ ++#endif + + #include + #include +-- +2.43.0 + diff --git a/0001-can-not-use-with-c-files.patch b/0001-can-not-use-with-c-files.patch new file mode 100644 index 0000000..719737c --- /dev/null +++ b/0001-can-not-use-with-c-files.patch @@ -0,0 +1,25 @@ +From a5dff521691a17701b5a02ec75e84cfe1bf605f7 Mon Sep 17 00:00:00 2001 +From: Tom Rix +Date: Sat, 3 Feb 2024 06:41:49 -0500 +Subject: [PATCH] can not use with c files + +--- + cmake/Dependencies.cmake | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake +index 4dd8042058..5f91f3ffab 100644 +--- a/cmake/Dependencies.cmake ++++ b/cmake/Dependencies.cmake +@@ -1269,7 +1269,7 @@ if(USE_ROCM) + list(APPEND HIP_CXX_FLAGS -Wno-duplicate-decl-specifier) + list(APPEND HIP_CXX_FLAGS -DCAFFE2_USE_MIOPEN) + list(APPEND HIP_CXX_FLAGS -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_HIP) +- list(APPEND HIP_CXX_FLAGS -std=c++17) ++# list(APPEND HIP_CXX_FLAGS -std=c++17) + if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "6.0.0") + list(APPEND HIP_CXX_FLAGS -DHIPBLAS_V2) + endif() +-- +2.43.0 + diff --git a/0001-cuda-hip-signatures.patch b/0001-cuda-hip-signatures.patch new file mode 100644 index 0000000..a258737 --- /dev/null +++ b/0001-cuda-hip-signatures.patch @@ -0,0 +1,42 @@ +From 214dc959acc809e1959643272c344ee5335d5a69 Mon Sep 17 00:00:00 2001 +From: Tom Rix +Date: Thu, 1 Feb 2024 11:29:47 -0500 +Subject: [PATCH] cuda - hip signatures + +--- + aten/src/ATen/cuda/detail/LazyNVRTC.cpp | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/aten/src/ATen/cuda/detail/LazyNVRTC.cpp b/aten/src/ATen/cuda/detail/LazyNVRTC.cpp +index 1b85e7776e..bb6f88783a 100644 +--- a/aten/src/ATen/cuda/detail/LazyNVRTC.cpp ++++ b/aten/src/ATen/cuda/detail/LazyNVRTC.cpp +@@ -134,8 +134,13 @@ nvrtcResult nvrtcCreateProgram(nvrtcProgram *prog, + const char *src, + const char *name, + int numHeaders, ++#if !defined(USE_ROCM) + const char * const *headers, + const char * const *includeNames) { ++#else ++ const char **headers, ++ const char **includeNames) { ++#endif + auto fn = reinterpret_cast(getNVRTCLibrary().sym(__func__)); + if (!fn) + throw std::runtime_error("Can't get nvrtcCreateProgram"); +@@ -150,7 +155,11 @@ NVRTC_STUB2(nvrtcGetPTX, nvrtcProgram, char *); + NVRTC_STUB2(nvrtcGetCUBINSize, nvrtcProgram, size_t *); + NVRTC_STUB2(nvrtcGetCUBIN, nvrtcProgram, char *); + #endif ++#if !defined(USE_ROCM) + NVRTC_STUB3(nvrtcCompileProgram, nvrtcProgram, int, const char * const *); ++#else ++NVRTC_STUB3(nvrtcCompileProgram, nvrtcProgram, int, const char **); ++#endif + _STUB_1(NVRTC, nvrtcGetErrorString, const char *, nvrtcResult); + NVRTC_STUB2(nvrtcGetProgramLogSize,nvrtcProgram, size_t*); + NVRTC_STUB2(nvrtcGetProgramLog, nvrtcProgram, char *); +-- +2.43.0 + diff --git a/0001-disable-submodule-search.patch b/0001-disable-submodule-search.patch new file mode 100644 index 0000000..b830fa6 --- /dev/null +++ b/0001-disable-submodule-search.patch @@ -0,0 +1,25 @@ +From e0b0ea90ecc0dbefc6aef2650e88ba88260935b9 Mon Sep 17 00:00:00 2001 +From: Tom Rix +Date: Fri, 29 Sep 2023 17:21:13 -0700 +Subject: [PATCH] disable submodule search + +--- + setup.py | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/setup.py b/setup.py +index 0fd886d945..e397df8fb6 100644 +--- a/setup.py ++++ b/setup.py +@@ -458,7 +458,7 @@ def mirror_files_into_torchgen(): + def build_deps(): + report("-- Building version " + version) + +- check_submodules() ++ # check_submodules() + check_pydep("yaml", "pyyaml") + + build_caffe2( +-- +2.43.0 + diff --git a/0001-disable-use-of-aotriton.patch b/0001-disable-use-of-aotriton.patch new file mode 100644 index 0000000..34a1704 --- /dev/null +++ b/0001-disable-use-of-aotriton.patch @@ -0,0 +1,46 @@ +From 33d48f71db7530f00dbd8cff281b65aa8b355b2a Mon Sep 17 00:00:00 2001 +From: Tom Rix +Date: Tue, 19 Mar 2024 11:32:37 -0400 +Subject: [PATCH] disable use of aotriton + +--- + aten/src/ATen/native/transformers/cuda/sdp_utils.cpp | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp b/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp +index 96b839820efd..2d3dd0cb4b0f 100644 +--- a/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp ++++ b/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp +@@ -21,9 +21,11 @@ + #include + #include + ++#ifdef USE_FLASH_ATTENTION + #if USE_ROCM + #include + #endif ++#endif + + /** + * Note [SDPA Runtime Dispatch] +@@ -183,6 +185,7 @@ bool check_sm_version(cudaDeviceProp * dprops) { + } + + bool check_flash_attention_hardware_support(sdp_params const& params, bool debug) { ++#ifdef USE_FLASH_ATTENTION + // Check that the gpu is capable of running flash attention + using sm80 = SMVersion<8, 0>; + using sm90 = SMVersion<9, 0>; +@@ -211,6 +214,9 @@ bool check_flash_attention_hardware_support(sdp_params const& params, bool debug + } + #endif + return true; ++#else ++ return false; ++#endif + } + + bool check_mem_efficient_hardware_support(sdp_params const& params, bool debug) { +-- +2.44.0 + diff --git a/0001-no-third_party-FXdiv.patch b/0001-no-third_party-FXdiv.patch new file mode 100644 index 0000000..71404e3 --- /dev/null +++ b/0001-no-third_party-FXdiv.patch @@ -0,0 +1,54 @@ +From b3b307add5724ee5730f161e16594fa702f34a19 Mon Sep 17 00:00:00 2001 +From: Tom Rix +Date: Sat, 3 Feb 2024 08:20:28 -0500 +Subject: [PATCH] no third_party FXdiv + +--- + caffe2/CMakeLists.txt | 24 ++++++++++++------------ + 1 file changed, 12 insertions(+), 12 deletions(-) + +diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt +index b2f3adbfae..80a5625c8d 100644 +--- a/caffe2/CMakeLists.txt ++++ b/caffe2/CMakeLists.txt +@@ -110,15 +110,15 @@ endif() + # Note: the folders that are being commented out have not been properly + # addressed yet. + +-if(NOT MSVC AND USE_XNNPACK) +- if(NOT TARGET fxdiv) +- set(FXDIV_BUILD_TESTS OFF CACHE BOOL "") +- set(FXDIV_BUILD_BENCHMARKS OFF CACHE BOOL "") +- add_subdirectory( +- "${FXDIV_SOURCE_DIR}" +- "${CMAKE_BINARY_DIR}/FXdiv") +- endif() +-endif() ++#if(NOT MSVC AND USE_XNNPACK) ++# if(NOT TARGET fxdiv) ++# set(FXDIV_BUILD_TESTS OFF CACHE BOOL "") ++# set(FXDIV_BUILD_BENCHMARKS OFF CACHE BOOL "") ++# add_subdirectory( ++# "${FXDIV_SOURCE_DIR}" ++# "${CMAKE_BINARY_DIR}/FXdiv") ++# endif() ++#endif() + + add_subdirectory(core) + add_subdirectory(serialize) +@@ -1081,9 +1081,9 @@ if(USE_XPU) + target_compile_definitions(torch_xpu PRIVATE USE_XPU) + endif() + +-if(NOT MSVC AND USE_XNNPACK) +- TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv) +-endif() ++#if(NOT MSVC AND USE_XNNPACK) ++# TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv) ++#endif() + + # ========================================================== + # formerly-libtorch flags +-- +2.43.0 + diff --git a/0001-no-third_party-fmt.patch b/0001-no-third_party-fmt.patch new file mode 100644 index 0000000..6e82af2 --- /dev/null +++ b/0001-no-third_party-fmt.patch @@ -0,0 +1,65 @@ +From 2ce255b75760a0a513fb1706629b416f76a5c822 Mon Sep 17 00:00:00 2001 +From: Tom Rix +Date: Sat, 3 Feb 2024 08:16:04 -0500 +Subject: [PATCH] no third_party fmt + +--- + c10/CMakeLists.txt | 2 +- + cmake/Dependencies.cmake | 6 +++--- + torch/CMakeLists.txt | 2 +- + 3 files changed, 5 insertions(+), 5 deletions(-) + +diff --git a/c10/CMakeLists.txt b/c10/CMakeLists.txt +index 1f742f4c176..4fa08913bdd 100644 +--- a/c10/CMakeLists.txt ++++ b/c10/CMakeLists.txt +@@ -87,7 +87,7 @@ endif() + if(C10_USE_GLOG) + target_link_libraries(c10 PUBLIC glog::glog) + endif() +-target_link_libraries(c10 PRIVATE fmt::fmt-header-only) ++target_link_libraries(c10 PRIVATE fmt) + + if(C10_USE_NUMA) + message(STATUS "NUMA paths:") +diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake +index 6f5a2d5feff..42fbf80f6e8 100644 +--- a/cmake/Dependencies.cmake ++++ b/cmake/Dependencies.cmake +@@ -1837,7 +1837,7 @@ endif() + # + set(TEMP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) + set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libs" FORCE) +-add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt) ++# add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt) + + # Disable compiler feature checks for `fmt`. + # +@@ -1846,9 +1846,9 @@ add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt) + # CMAKE_CXX_FLAGS in ways that break feature checks. Since we already know + # `fmt` is compatible with a superset of the compilers that PyTorch is, it + # shouldn't be too bad to just disable the checks. +-set_target_properties(fmt-header-only PROPERTIES INTERFACE_COMPILE_FEATURES "") ++# set_target_properties(fmt-header-only PROPERTIES INTERFACE_COMPILE_FEATURES "") + +-list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only) ++# list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only) + set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS} CACHE BOOL "Build shared libs" FORCE) + + # ---[ Kineto +diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt +index 97a72eed55b..9e5014d1980 100644 +--- a/torch/CMakeLists.txt ++++ b/torch/CMakeLists.txt +@@ -80,7 +80,7 @@ set(TORCH_PYTHON_LINK_LIBRARIES + python::python + pybind::pybind11 + shm +- fmt::fmt-header-only ++ fmt + ATEN_CPU_FILES_GEN_LIB) + + if(USE_ASAN AND TARGET Sanitizer::address) +-- +2.43.2 + diff --git a/0001-no-third_party-foxi.patch b/0001-no-third_party-foxi.patch new file mode 100644 index 0000000..ba1ec40 --- /dev/null +++ b/0001-no-third_party-foxi.patch @@ -0,0 +1,36 @@ +From 8cb61cf9282102ac225645fcc9fb4a1bb7cb15a2 Mon Sep 17 00:00:00 2001 +From: Tom Rix +Date: Sat, 3 Feb 2024 08:11:55 -0500 +Subject: [PATCH] no third_party foxi + +--- + cmake/Dependencies.cmake | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake +index 5f91f3ffab..8e1461af81 100644 +--- a/cmake/Dependencies.cmake ++++ b/cmake/Dependencies.cmake +@@ -1567,7 +1567,7 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX) + set_target_properties(onnx_proto PROPERTIES CXX_STANDARD 17) + endif() + endif() +- add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/foxi EXCLUDE_FROM_ALL) ++ # add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/foxi EXCLUDE_FROM_ALL) + + add_definitions(-DONNX_NAMESPACE=${ONNX_NAMESPACE}) + if(NOT USE_SYSTEM_ONNX) +@@ -1600,8 +1600,8 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX) + message("-- Found onnx: ${ONNX_LIBRARY} ${ONNX_PROTO_LIBRARY}") + list(APPEND Caffe2_DEPENDENCY_LIBS onnx_proto onnx) + endif() +- include_directories(${FOXI_INCLUDE_DIRS}) +- list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader) ++# include_directories(${FOXI_INCLUDE_DIRS}) ++# list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader) + # Recover the build shared libs option. + set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS}) + endif() +-- +2.43.0 + diff --git a/0001-reenable-foxi-linking.patch b/0001-reenable-foxi-linking.patch new file mode 100644 index 0000000..8e39795 --- /dev/null +++ b/0001-reenable-foxi-linking.patch @@ -0,0 +1,25 @@ +From 58ccda271e8f51c3fa5b7518cf6ee52ce204fd37 Mon Sep 17 00:00:00 2001 +From: Tom Rix +Date: Thu, 22 Feb 2024 09:28:11 -0500 +Subject: [PATCH] reenable foxi linking + +--- + cmake/Dependencies.cmake | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake +index 42fbf80f6e8..bc3a2dc6fee 100644 +--- a/cmake/Dependencies.cmake ++++ b/cmake/Dependencies.cmake +@@ -1604,7 +1604,7 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX) + list(APPEND Caffe2_DEPENDENCY_LIBS onnx_proto onnx) + endif() + # include_directories(${FOXI_INCLUDE_DIRS}) +-# list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader) ++ list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader) + # Recover the build shared libs option. + set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS}) + endif() +-- +2.43.2 + diff --git a/0001-silence-an-assert.patch b/0001-silence-an-assert.patch new file mode 100644 index 0000000..0b20dcf --- /dev/null +++ b/0001-silence-an-assert.patch @@ -0,0 +1,25 @@ +From 04dd33db93b852fdfd7ea408813080b2e2026650 Mon Sep 17 00:00:00 2001 +From: Tom Rix +Date: Sat, 3 Feb 2024 06:41:20 -0500 +Subject: [PATCH] silence an assert + +--- + aten/src/ATen/native/cuda/IndexKernel.cu | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/aten/src/ATen/native/cuda/IndexKernel.cu b/aten/src/ATen/native/cuda/IndexKernel.cu +index 657c0c77b3..b406aa6687 100644 +--- a/aten/src/ATen/native/cuda/IndexKernel.cu ++++ b/aten/src/ATen/native/cuda/IndexKernel.cu +@@ -249,7 +249,7 @@ void index_put_kernel_quantized_cuda(TensorIterator& iter, const IntArrayRef ind + + gpu_index_kernel(iter, index_size, index_stride, [inv_scale, zero_point, qmin, qmax]C10_DEVICE(char* const out_data, const char* const in_data, const int64_t offset) { + int64_t qvalue = static_cast(zero_point + nearbyintf(*(float*)in_data * inv_scale)); +- qvalue = std::clamp(qvalue, qmin, qmax); ++ //qvalue = std::clamp(qvalue, qmin, qmax); + *(scalar_t*)(out_data + offset) = static_cast(qvalue); + }); + }); +-- +2.43.0 + diff --git a/0001-use-any-hip.patch b/0001-use-any-hip.patch new file mode 100644 index 0000000..dca86ea --- /dev/null +++ b/0001-use-any-hip.patch @@ -0,0 +1,34 @@ +From 4248211ce9a9de81bb3ade5d421ba709b19ead08 Mon Sep 17 00:00:00 2001 +From: Tom Rix +Date: Sat, 3 Feb 2024 15:01:28 -0500 +Subject: [PATCH] use any hip + +--- + cmake/public/LoadHIP.cmake | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/cmake/public/LoadHIP.cmake b/cmake/public/LoadHIP.cmake +index 1abeb06228..28458c4146 100644 +--- a/cmake/public/LoadHIP.cmake ++++ b/cmake/public/LoadHIP.cmake +@@ -30,7 +30,7 @@ endif() + message("Building PyTorch for GPU arch: ${PYTORCH_ROCM_ARCH}") + + # Add HIP to the CMAKE Module Path +-set(CMAKE_MODULE_PATH ${ROCM_PATH}/lib/cmake/hip ${CMAKE_MODULE_PATH}) ++set(CMAKE_MODULE_PATH ${ROCM_PATH}/lib64/cmake/hip ${CMAKE_MODULE_PATH}) + + macro(find_package_and_print_version PACKAGE_NAME) + find_package("${PACKAGE_NAME}" ${ARGN}) +@@ -38,7 +38,7 @@ macro(find_package_and_print_version PACKAGE_NAME) + endmacro() + + # Find the HIP Package +-find_package_and_print_version(HIP 1.0) ++find_package_and_print_version(HIP MODULE) + + if(HIP_FOUND) + set(PYTORCH_FOUND_HIP TRUE) +-- +2.43.0 + diff --git a/README.NVIDIA b/README.NVIDIA deleted file mode 100644 index b927f47..0000000 --- a/README.NVIDIA +++ /dev/null @@ -1,15 +0,0 @@ -Some help for building this package for NVIDIA/CUDA - -Review NVIDIA's documenation -https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html - -Review PyTorch documentation -https://github.com/pytorch/pytorch#from-source - -Some convience strings to cut-n-paste - -F39 -dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/fedora39/x86_64/cuda-fedora39.repo - -Building is local. -Build machine has a supported GPU, the drivers are loaded and CUDA SDK is installed. diff --git a/next/0001-Use-horrible-dynamo-stub.patch b/next/0001-Use-horrible-dynamo-stub.patch deleted file mode 100644 index 1900519..0000000 --- a/next/0001-Use-horrible-dynamo-stub.patch +++ /dev/null @@ -1,85 +0,0 @@ -From fd535f7bf44f2034cca2a66b4cc7d68d962341df Mon Sep 17 00:00:00 2001 -From: Tom Rix -Date: Sun, 20 Jul 2025 12:47:58 -0700 -Subject: [PATCH] Use horrible dynamo stub - -Rawhide's update of python is too fast for dynamo -So paper of the problem with a horrible stub that throws -runtime exceptions if dynamo is used. - -Signed-off-by: Tom Rix ---- - build_variables.bzl | 26 ++++++++++++---------- - torch/csrc/dynamo/horrible_dynamo_stub.cpp | 16 +++++++++++++ - 2 files changed, 30 insertions(+), 12 deletions(-) - create mode 100644 torch/csrc/dynamo/horrible_dynamo_stub.cpp - -diff --git a/build_variables.bzl b/build_variables.bzl -index b266c80e8843..a3be6893349b 100644 ---- a/build_variables.bzl -+++ b/build_variables.bzl -@@ -140,7 +140,8 @@ core_trainer_sources = [ - "torch/csrc/autograd/variable.cpp", - "torch/csrc/autograd/utils/warnings.cpp", - "torch/csrc/autograd/jit_decomp_interface.cpp", -- "torch/csrc/dynamo/compiled_autograd.cpp", -+# "torch/csrc/dynamo/compiled_autograd.cpp", -+ "torch/csrc/dynamo/horrible_dynamo_stub.cpp", - "torch/csrc/jit/frontend/name_mangler.cpp", - "torch/csrc/jit/ir/type_hashing.cpp", - "torch/csrc/jit/serialization/pickler.cpp", -@@ -868,17 +869,18 @@ libtorch_python_core_sources = [ - "torch/csrc/autograd/python_torch_functions_manual.cpp", - "torch/csrc/autograd/python_variable.cpp", - "torch/csrc/autograd/python_variable_indexing.cpp", -- "torch/csrc/dynamo/python_compiled_autograd.cpp", -- "torch/csrc/dynamo/cache_entry.cpp", -- "torch/csrc/dynamo/cpp_shim.cpp", -- "torch/csrc/dynamo/cpython_defs.c", -- "torch/csrc/dynamo/eval_frame.c", -- "torch/csrc/dynamo/eval_frame_cpp.cpp", -- "torch/csrc/dynamo/extra_state.cpp", -- "torch/csrc/dynamo/framelocals_mapping.cpp", -- "torch/csrc/dynamo/guards.cpp", -- "torch/csrc/dynamo/utils.cpp", -- "torch/csrc/dynamo/init.cpp", -+# "torch/csrc/dynamo/python_compiled_autograd.cpp", -+# "torch/csrc/dynamo/cache_entry.cpp", -+# "torch/csrc/dynamo/cpp_shim.cpp", -+# "torch/csrc/dynamo/cpython_defs.c", -+# "torch/csrc/dynamo/eval_frame.c", -+# "torch/csrc/dynamo/eval_frame_cpp.cpp", -+# "torch/csrc/dynamo/extra_state.cpp", -+# "torch/csrc/dynamo/framelocals_mapping.cpp", -+# "torch/csrc/dynamo/guards.cpp", -+# "torch/csrc/dynamo/utils.cpp", -+# "torch/csrc/dynamo/init.cpp", -+ "torch/csrc/dynamo/horrible_dynamo_stub.cpp", - "torch/csrc/functorch/init.cpp", - "torch/csrc/fx/node.cpp", - "torch/csrc/mps/Module.cpp", -diff --git a/torch/csrc/dynamo/horrible_dynamo_stub.cpp b/torch/csrc/dynamo/horrible_dynamo_stub.cpp -new file mode 100644 -index 000000000000..3ac1324d4557 ---- /dev/null -+++ b/torch/csrc/dynamo/horrible_dynamo_stub.cpp -@@ -0,0 +1,16 @@ -+#include -+#include -+ -+namespace torch::dynamo::autograd { -+const std::unique_ptr& getPyCompilerInterface() { -+ throw std::runtime_error("Dynamo not supported"); -+ return nullptr; -+} -+std::vector> get_input_metadata( -+ const edge_list& edges) { -+ std::vector> r; -+ throw std::runtime_error("Dynamo not supported"); -+ return r; -+} -+ -+} --- -2.49.0 - diff --git a/next/pyproject.toml b/next/pyproject.toml new file mode 100644 index 0000000..9508ad0 --- /dev/null +++ b/next/pyproject.toml @@ -0,0 +1,154 @@ +[build-system] +requires = [ + "setuptools", + "wheel", + "astunparse", + "numpy", + "ninja", + "pyyaml", + "cmake", + "typing-extensions", + "requests", +] +# Use legacy backend to import local packages in setup.py +build-backend = "setuptools.build_meta:__legacy__" + + +[tool.black] +# Uncomment if pyproject.toml worked fine to ensure consistency with flake8 +# line-length = 120 +target-version = ["py38", "py39", "py310", "py311"] + + +[tool.ruff] +target-version = "py38" + +# NOTE: Synchoronize the ignores with .flake8 +ignore = [ + # these ignores are from flake8-bugbear; please fix! + "B007", "B008", "B017", + "B018", # Useless expression + "B019", + "B023", + "B028", # No explicit `stacklevel` keyword argument found + "B904", + "E402", + "C408", # C408 ignored because we like the dict keyword argument syntax + "E501", # E501 is not flexible enough, we're using B950 instead + "E721", + "E731", # Assign lambda expression + "E741", + "EXE001", + "F405", + "F841", + # these ignores are from flake8-logging-format; please fix! + "G101", + # these ignores are from ruff NPY; please fix! + "NPY002", + # these ignores are from ruff PERF; please fix! + "PERF203", + "PERF401", + "PERF403", + # these ignores are from PYI; please fix! + "PYI019", + "PYI024", + "PYI036", + "PYI041", + "PYI056", + "SIM102", "SIM103", "SIM112", # flake8-simplify code styles + "SIM105", # these ignores are from flake8-simplify. please fix or ignore with commented reason + "SIM108", + "SIM110", + "SIM114", # Combine `if` branches using logical `or` operator + "SIM115", + "SIM116", # Disable Use a dictionary instead of consecutive `if` statements + "SIM117", + "SIM118", + "UP006", # keep-runtime-typing + "UP007", # keep-runtime-typing +] +line-length = 120 +select = [ + "B", + "C4", + "G", + "E", + "EXE", + "F", + "SIM1", + "W", + # Not included in flake8 + "NPY", + "PERF", + "PGH004", + "PIE794", + "PIE800", + "PIE804", + "PIE807", + "PIE810", + "PLC0131", # type bivariance + "PLC0132", # type param mismatch + "PLC0205", # string as __slots__ + "PLE", + "PLR0133", # constant comparison + "PLR0206", # property with params + "PLR1722", # use sys exit + "PLW0129", # assert on string literal + "PLW0406", # import self + "PLW0711", # binary op exception + "PLW1509", # preexec_fn not safe with threads + "PLW3301", # nested min max + "PT006", # TODO: enable more PT rules + "PT022", + "PT023", + "PT024", + "PT025", + "PT026", + "PYI", + "RUF008", # mutable dataclass default + "RUF015", # access first ele in constant time + "RUF016", # type error non-integer index + "RUF017", + "TRY200", + "TRY302", + "UP", +] + +[tool.ruff.per-file-ignores] +"__init__.py" = [ + "F401", +] +"test/typing/reveal/**" = [ + "F821", +] +"test/torch_np/numpy_tests/**" = [ + "F821", +] +"test/jit/**" = [ + "PLR0133", # tests require this for JIT + "PYI", + "RUF015", + "UP", # We don't want to modify the jit test as they test specify syntax +] +"test/test_jit.py" = [ + "PLR0133", # tests require this for JIT + "PYI", + "RUF015", + "UP", # We don't want to modify the jit test as they test specify syntax +] + +"torch/onnx/**" = [ + "UP037", # ONNX does runtime type checking +] + +"torchgen/api/types/__init__.py" = [ + "F401", + "F403", +] +"torchgen/executorch/api/types/__init__.py" = [ + "F401", + "F403", +] +"torch/utils/collect_env.py" = [ + "UP", # collect_env.py needs to work with older versions of Python +] diff --git a/pyproject.toml b/pyproject.toml deleted file mode 100644 index 925742b..0000000 --- a/pyproject.toml +++ /dev/null @@ -1,353 +0,0 @@ -# Package ###################################################################### - -[build-system] -requires = [ - # 70.1.0: min version for integrated bdist_wheel command from wheel package - # 77.0.0: min version for SPDX expression support for project.license - "setuptools>=70.1.0,<80.0", - "cmake>=3.27", - "ninja", - "numpy", - "packaging", - "pyyaml", - "requests", - "six", # dependency chain: NNPACK -> PeachPy -> six - "typing-extensions>=4.10.0", -] -build-backend = "setuptools.build_meta" - -[dependency-groups] -dev = [ - # This list should be kept in sync with the requirements-build.txt - # in PyTorch root until the project fully migrates to pyproject.toml - # after which this can be removed as it is already specified in the - # [build-system] section - "setuptools>=70.1.0,<80.0", # setuptools develop deprecated on 80.0 - "cmake>=3.27", - "ninja", - "numpy", - "packaging", - "pyyaml", - "requests", - "six", # dependency chain: NNPACK -> PeachPy -> six - "typing-extensions>=4.10.0", - - # This list should be kept in sync with the requirements.txt in - # PyTorch root until the project fully migrates to pyproject.toml - "build[uv]", - "expecttest>=0.3.0", - "filelock", - "fsspec>=0.8.5", - "hypothesis", - "jinja2", - "lintrunner; platform_machine != 's390x' and platform_machine != 'riscv64'", - "networkx>=2.5.1", - "optree>=0.13.0", - "psutil", - "sympy>=1.13.3", - "typing-extensions>=4.13.2", - "wheel", -] - -[project] -name = "torch" -description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration" -readme = "README.md" -requires-python = ">=3.10" -# TODO: change to `license = "BSD-3-Clause"` and enable PEP 639 after pinning setuptools>=77 -# FIXME: As of 2025.06.20, it is hard to ensure the minimum version of setuptools in our CI environment. -# TOML-table-based license deprecated in setuptools>=77, and the deprecation warning will be changed -# to an error on 2026.02.18. See also: https://github.com/pypa/setuptools/issues/4903 -license = { text = "BSD-3-Clause" } -authors = [{ name = "PyTorch Team", email = "packages@pytorch.org" }] -keywords = ["pytorch", "machine learning"] -classifiers = [ - "Development Status :: 5 - Production/Stable", - "Intended Audience :: Developers", - "Intended Audience :: Education", - "Intended Audience :: Science/Research", - "Topic :: Scientific/Engineering", - "Topic :: Scientific/Engineering :: Mathematics", - "Topic :: Scientific/Engineering :: Artificial Intelligence", - "Topic :: Software Development", - "Topic :: Software Development :: Libraries", - "Topic :: Software Development :: Libraries :: Python Modules", - "Programming Language :: C++", - "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Programming Language :: Python :: 3.13", - "Programming Language :: Python :: 3.14", -] -dynamic = [ - "entry-points", - "dependencies", - "scripts", - "version", -] - -[project.urls] -Homepage = "https://pytorch.org" -Repository = "https://github.com/pytorch/pytorch" -Documentation = "https://pytorch.org/docs" -"Issue Tracker" = "https://github.com/pytorch/pytorch/issues" -Forum = "https://discuss.pytorch.org" - -[project.optional-dependencies] -optree = ["optree>=0.13.0"] -opt-einsum = ["opt-einsum>=3.3"] -pyyaml = ["pyyaml"] - -# Linter tools ################################################################# - -[tool.isort] -src_paths = ["caffe2", "torch", "torchgen", "functorch", "test"] -extra_standard_library = ["typing_extensions"] -skip_gitignore = true -skip_glob = ["third_party/*"] -atomic = true -profile = "black" -indent = 4 -line_length = 88 -lines_after_imports = 2 -multi_line_output = 3 -include_trailing_comma = true -combine_as_imports = true - -[tool.usort.known] -first_party = ["caffe2", "torch", "torchgen", "functorch", "test"] -standard_library = ["typing_extensions"] - -[tool.ruff] -line-length = 88 -src = ["caffe2", "torch", "torchgen", "functorch", "test"] - -[tool.ruff.format] -docstring-code-format = true -quote-style = "double" - -[tool.ruff.lint] -# NOTE: Synchoronize the ignores with .flake8 -external = [ - "B001", - "B902", - "B950", - "E121", - "E122", - "E128", - "E131", - "E704", - "E723", - "F723", - "F812", - "P201", - "P204", - "T484", - "TOR901", -] -ignore = [ - # these ignores are from flake8-bugbear; please fix! - "B007", "B008", "B017", - "B018", # Useless expression - "B023", - "B028", # No explicit `stacklevel` keyword argument found - "E402", - "C408", # C408 ignored because we like the dict keyword argument syntax - "E501", # E501 is not flexible enough, we're using B950 instead - "E721", - "E741", - "EXE001", - "F405", - "FURB122", # writelines - # these ignores are from flake8-logging-format; please fix! - "G101", - # these ignores are from ruff NPY; please fix! - "NPY002", - # these ignores are from ruff PERF; please fix! - "PERF203", - "PERF401", - # these ignores are from PYI; please fix! - "PYI024", - "PYI036", - "PYI041", - "PYI056", - "SIM102", "SIM103", "SIM112", # flake8-simplify code styles - "SIM105", # these ignores are from flake8-simplify. please fix or ignore with commented reason - "SIM108", # SIM108 ignored because we prefer if-else-block instead of ternary expression - "SIM110", - "SIM114", # Combine `if` branches using logical `or` operator - "SIM115", - "SIM116", # Disable Use a dictionary instead of consecutive `if` statements - "SIM117", - "SIM118", - "UP007", # keep-runtime-typing - "UP045", # keep-runtime-typing - "TC006", - # TODO: Remove Python-3.10 specific suppressions - "B905", - "UP035", - "UP036", - "UP038", - "UP041", - "FURB161", -] -select = [ - "B", - "B904", # Re-raised error without specifying the cause via the from keyword - "C4", - "G", - "E", - "EXE", - "F", - "SIM1", - "SIM911", - "W", - # Not included in flake8 - "FURB", - "LOG", - "NPY", - "PERF", - "PGH004", - "PIE790", - "PIE794", - "PIE800", - "PIE804", - "PIE807", - "PIE810", - "PLC0131", # type bivariance - "PLC0132", # type param mismatch - "PLC0205", # string as __slots__ - "PLC3002", # unnecessary-direct-lambda-call - "PLE", - "PLR0133", # constant comparison - "PLR0206", # property with params - "PLR1722", # use sys exit - "PLR1736", # unnecessary list index - "PLW0129", # assert on string literal - "PLW0131", # named expr without context - "PLW0133", # useless exception statement - "PLW0245", # super without brackets - "PLW0406", # import self - "PLW0711", # binary op exception - "PLW1501", # bad open mode - "PLW1507", # shallow copy os.environ - "PLW1509", # preexec_fn not safe with threads - "PLW2101", # useless lock statement - "PLW3301", # nested min max - "PT006", # TODO: enable more PT rules - "PT014", # duplicate parameterize case - "PT022", - "PT023", - "PT024", - "PT025", - "PT026", - "PYI", - "Q003", # avoidable escaped quote - "Q004", # unnecessary escaped quote - "RSE", - "RUF008", # mutable dataclass default - "RUF013", # ban implicit optional - "RUF015", # access first ele in constant time - "RUF016", # type error non-integer index - "RUF017", - "RUF018", # no assignment in assert - "RUF019", # unnecessary-key-check - "RUF020", # never union - "RUF024", # from keys mutable - "RUF026", # default factory kwarg - "RUF030", # No print statement in assert - "RUF033", # default values __post_init__ dataclass - "RUF041", # simplify nested Literal - "RUF048", # properly parse `__version__` - "RUF200", # validate pyproject.toml - "S324", # for hashlib FIPS compliance - "SLOT", - "TC", - "TRY002", # ban vanilla raise (todo fix NOQAs) - "TRY203", - "TRY401", # verbose-log-message - "UP", - "YTT", -] - -[tool.ruff.lint.pyupgrade] -# Preserve types, even if a file imports `from __future__ import annotations`. -keep-runtime-typing = true - -[tool.ruff.lint.per-file-ignores] -"__init__.py" = [ - "F401", -] -"*.pyi" = [ - "PYI011", # typed-argument-default-in-stub - "PYI021", # docstring-in-stub - "PYI053", # string-or-bytes-too-long -] -"functorch/notebooks/**" = [ - "F401", -] -"test/export/**" = [ - "PGH004" -] -"test/typing/**" = [ - "PGH004" -] -"test/typing/reveal/**" = [ - "F821", -] -"test/torch_np/numpy_tests/**" = [ - "F821", - "NPY201", -] -"test/dynamo/test_bytecode_utils.py" = [ - "F821", -] -"test/dynamo/test_debug_utils.py" = [ - "UP037", -] -"test/dynamo/test_misc.py" = [ - "PGH004", -] -"test/jit/**" = [ - "PLR0133", # tests require this for JIT - "PYI", - "RUF015", - "UP", # We don't want to modify the jit test as they test specify syntax -] -"test/test_jit.py" = [ - "PLR0133", # tests require this for JIT - "PYI", - "RUF015", - "UP", # We don't want to modify the jit test as they test specify syntax -] -"test/inductor/s429861_repro.py" = [ - "PGH004", -] -"test/inductor/test_torchinductor.py" = [ - "UP037", -] -# autogenerated #TODO figure out why file level noqa is ignored -"torch/_appdirs.py" = ["PGH004"] -"torch/jit/_shape_functions.py" = ["PGH004"] -"torch/_inductor/fx_passes/serialized_patterns/**" = ["F401", "F501"] -"torch/_inductor/autoheuristic/artifacts/**" = ["F401", "F501"] -"torch/_inductor/codegen/**" = [ - "PGH004" -] -"torchgen/api/types/__init__.py" = [ - "F401", - "F403", -] -"torch/utils/collect_env.py" = [ - "UP", # collect_env.py needs to work with older versions of Python -] -"torch/_vendor/**" = [ - "UP", # No need to mess with _vendor -] -"tools/linter/**" = [ - "LOG015" # please fix -] - -[tool.codespell] -ignore-words = "tools/linter/dictionary.txt" diff --git a/python-torch.spec b/python-torch.spec index d3c31d7..07a47eb 100644 --- a/python-torch.spec +++ b/python-torch.spec @@ -4,23 +4,18 @@ %global forgeurl https://github.com/pytorch/pytorch # So pre releases can be tried -%bcond_with gitcommit +%bcond_without gitcommit %if %{with gitcommit} -# v2.9.0-rc9 -%global commit0 0fabc3ba44823f257e70ce397d989c8de5e362c1 +# git tag v2.3.0-rc2 +%global commit0 6a89a753b1556fe8558582c452fdba083f6ec01a %global shortcommit0 %(c=%{commit0}; echo ${c:0:7}) -%global date0 20251008 -%global pypi_version 2.9.0 -%global flatbuffers_version 24.12.23 -%global miniz_version 3.0.2 -%global pybind11_version 2.13.6 -%global rc_tag -rc9 +%global date0 20240313 %else -%global pypi_version 2.9.1 -%global flatbuffers_version 24.12.23 -%global miniz_version 3.0.2 -%global pybind11_version 2.13.6 +%global commit0 975d4284250170602db60adfda5eb1664a3b8acc +%global shortcommit0 %(c=%{commit0}; echo ${c:0:7}) +%global date0 20240307 %endif +%global pypi_version 2.3.0 # For -test subpackage # suitable only for local testing @@ -30,37 +25,39 @@ %bcond_with test %ifarch x86_64 -%bcond_without rocm -%endif - -# For testing distributed+rccl etc. -%bcond_without rccl -%bcond_with gloo -%bcond_without mpi -%bcond_without tensorpipe - -# Disable dwz with rocm because memory can be exhausted -%if %{with rocm} -%define _find_debuginfo_dwz_opts %{nil} -%endif - -# These came in 2.4 and not yet in Fedora -%bcond_with opentelemetry -%bcond_with httplib -%bcond_with kineto - %if 0%{?fedora} -%bcond_without onnx +%bcond_without rocm %else -%bcond_with onnx +%bcond_with rocm +%endif +%endif +# hipblaslt is in development +%bcond_with hipblaslt +# Which families gpu build for +%global rocm_gpu_list gfx8 gfx9 gfx10 gfx11 +%global rocm_default_gpu default +%bcond_without rocm_loop + +# For testing caffe2 +%if 0%{?fedora} +%bcond_without caffe2 +%else +%bcond_with caffe2 +%endif + +# For testing distributed +%bcond_with distributed + +# For testing openvs +%bcond_with opencv + +# For testing cuda +%ifarch x86_64 +%bcond_with cuda %endif Name: python-%{pypi_name} -%if %{with gitcommit} Version: %{pypi_version}^git%{date0}.%{shortcommit0} -%else -Version: %{pypi_version} -%endif Release: %autorelease Summary: PyTorch AI/ML framework # See license.txt for license details @@ -69,94 +66,79 @@ License: BSD-3-Clause AND BSD-2-Clause AND 0BSD AND Apache-2.0 AND MIT AN URL: https://pytorch.org/ %if %{with gitcommit} Source0: %{forgeurl}/archive/%{commit0}/pytorch-%{shortcommit0}.tar.gz -Source1000: pyproject.toml +Source100: pyproject.toml %else -Source0: %{forgeurl}/releases/download/v%{version}/pytorch-v%{version}.tar.gz +Source0: %{forgeurl}/archive/%{commit0}/pytorch-%{shortcommit0}.tar.gz +Source100: pyproject.toml %endif -Source1: https://github.com/google/flatbuffers/archive/refs/tags/v%{flatbuffers_version}.tar.gz -Source2: https://github.com/pybind/pybind11/archive/refs/tags/v%{pybind11_version}.tar.gz +Source1: https://github.com/google/flatbuffers/archive/refs/tags/v23.3.3.tar.gz +Source2: https://github.com/pybind/pybind11/archive/refs/tags/v2.11.1.tar.gz -# Developement on tensorpipe has stopped, repo made read only July 1, 2023, this is the last commit -%global tp_commit 52791a2fd214b2a9dc5759d36725909c1daa7f2e -%global tp_scommit %(c=%{tp_commit}; echo ${c:0:7}) -Source20: https://github.com/pytorch/tensorpipe/archive/%{tp_commit}/tensorpipe-%{tp_scommit}.tar.gz -# The old libuv tensorpipe uses -Source21: https://github.com/libuv/libuv/archive/refs/tags/v1.41.0.tar.gz -# Developement afaik on libnop has stopped, this is the last commit -%global nop_commit 910b55815be16109f04f4180e9adee14fb4ce281 -%global nop_scommit %(c=%{nop_commit}; echo ${c:0:7}) -Source22: https://github.com/google/libnop/archive/%{nop_commit}/libnop-%{nop_scommit}.tar.gz - -%if %{without opentelemetry} -%global ot_ver 1.14.2 -Source60: https://github.com/open-telemetry/opentelemetry-cpp/archive/refs/tags/v%{ot_ver}.tar.gz +%if %{with cuda} +%global cuf_ver 1.1.2 +Source10: https://github.com/NVIDIA/cudnn-frontend/archive/refs/tags/v%{cuf_ver}.tar.gz +%global cul_ver 3.4.1 +Source11: https://github.com/NVIDIA/cutlass/archive/refs/tags/v%{cul_ver}.tar.gz %endif -%if %{without httplib} -%global hl_commit 3b6597bba913d51161383657829b7e644e59c006 -%global hl_scommit %(c=%{hl_commit}; echo ${c:0:7}) -Source70: https://github.com/yhirose/cpp-httplib/archive/%{hl_commit}/cpp-httplib-%{hl_scommit}.tar.gz +Patch0: 0001-no-third_party-foxi.patch +Patch1: 0001-no-third_party-fmt.patch +Patch2: 0001-no-third_party-FXdiv.patch +Patch3: 0001-Stub-in-kineto-ActivityType.patch +Patch5: 0001-disable-submodule-search.patch + +%if %{with caffe2} +Patch6: 0001-reenable-foxi-linking.patch %endif -%if %{without kineto} -%global ki_commit 5e7501833f1021ce6f618572d3baf657b6319658 -%global ki_scommit %(c=%{ki_commit}; echo ${c:0:7}) -Source80: https://github.com/pytorch/kineto/archive/%{ki_commit}/kineto-%{ki_scommit}.tar.gz +%if %{with rocm} +# https://github.com/pytorch/pytorch/pull/120551 +Patch100: 0001-Optionally-use-hipblaslt.patch +Patch101: 0001-cuda-hip-signatures.patch +Patch102: 0001-silence-an-assert.patch +Patch103: 0001-can-not-use-with-c-files.patch +Patch104: 0001-use-any-hip.patch +Patch105: 0001-disable-use-of-aotriton.patch %endif -%global ox_ver 1.18.0 -Source90: https://github.com/onnx/onnx/archive/refs/tags/v%{ox_ver}.tar.gz - -%global pt_arches x86_64 aarch64 -ExclusiveArch: %pt_arches +ExclusiveArch: x86_64 aarch64 %global toolchain gcc %global _lto_cflags %nil BuildRequires: cmake +BuildRequires: cpuinfo-devel BuildRequires: eigen3-devel -BuildRequires: flexiblas-devel BuildRequires: fmt-devel BuildRequires: foxi-devel -BuildRequires: gcc-c++ -BuildRequires: gcc-gfortran - -%if %{with gloo} -BuildRequires: gloo-devel -%endif -BuildRequires: json-devel - -BuildRequires: libomp-devel -BuildRequires: moodycamel-concurrentqueue-devel -BuildRequires: numactl-devel -BuildRequires: ninja-build -%if %{with onnx} -BuildRequires: onnx-devel -%endif -%if %{with mpi} -BuildRequires: openmpi-devel -%endif -BuildRequires: protobuf-devel -BuildRequires: sleef-devel -BuildRequires: valgrind-devel -BuildRequires: pocketfft-devel -BuildRequires: pthreadpool-devel - -BuildRequires: cpuinfo-devel BuildRequires: FP16-devel BuildRequires: fxdiv-devel +BuildRequires: gcc-c++ +BuildRequires: gcc-gfortran +%if %{with distributed} +BuildRequires: gloo-devel +%endif +BuildRequires: ninja-build +BuildRequires: onnx-devel +BuildRequires: libomp-devel +BuildRequires: openblas-devel +BuildRequires: pocketfft-devel +BuildRequires: protobuf-devel +BuildRequires: pthreadpool-devel BuildRequires: psimd-devel -BuildRequires: xnnpack-devel = 0.0^git20240814.312eb7e +BuildRequires: python3-numpy +BuildRequires: python3-pyyaml +BuildRequires: python3-typing-extensions +BuildRequires: sleef-devel +BuildRequires: valgrind-devel +BuildRequires: xnnpack-devel = 0.0^git20240229.fcbf55a BuildRequires: python3-devel BuildRequires: python3dist(filelock) BuildRequires: python3dist(jinja2) BuildRequires: python3dist(networkx) -BuildRequires: python3dist(numpy) -BuildRequires: python3dist(pip) -BuildRequires: python3dist(pyyaml) BuildRequires: python3dist(setuptools) -BuildRequires: python3dist(sphinx) BuildRequires: python3dist(typing-extensions) +BuildRequires: python3dist(sphinx) %if 0%{?fedora} BuildRequires: python3-pybind11 @@ -166,47 +148,50 @@ BuildRequires: python3dist(sympy) %if %{with rocm} BuildRequires: hipblas-devel +%if %{with hipblaslt} BuildRequires: hipblaslt-devel +%endif BuildRequires: hipcub-devel BuildRequires: hipfft-devel BuildRequires: hiprand-devel BuildRequires: hipsparse-devel -BuildRequires: hipsparselt-devel BuildRequires: hipsolver-devel -# Magma is broken on ROCm 7 -# BuildRequires: magma-devel BuildRequires: miopen-devel BuildRequires: rocblas-devel BuildRequires: rocrand-devel BuildRequires: rocfft-devel -%if %{with rccl} +%if %{with distributed} BuildRequires: rccl-devel %endif BuildRequires: rocprim-devel BuildRequires: rocm-cmake BuildRequires: rocm-comgr-devel -BuildRequires: rocm-compilersupport-macros BuildRequires: rocm-core-devel BuildRequires: rocm-hip-devel BuildRequires: rocm-runtime-devel BuildRequires: rocm-rpm-macros -BuildRequires: rocsolver-devel -BuildRequires: rocm-smi-devel +BuildRequires: rocm-rpm-macros-modules BuildRequires: rocthrust-devel BuildRequires: roctracer-devel -Requires: amdsmi - +Requires: rocm-rpm-macros-modules %endif +%if %{with opencv} +BuildRequires: opencv-devel +%endif + + %if %{with test} BuildRequires: google-benchmark-devel %endif -Requires: python3dist(dill) -Requires: python3dist(yaml) +# Apache-2.0 +Provides: bundled(flatbuffers) = 22.3.3 +# MIT +Provides: bundled(miniz) = 2.1.0 +Provides: bundled(pybind11) = 2.11.1 -Obsoletes: caffe = 1.0^git20200212.9b89154 %description PyTorch is a Python package that provides two high-level features: @@ -220,24 +205,6 @@ and Cython to extend PyTorch when needed. %package -n python3-%{pypi_name} Summary: %{summary} -# For convience -Provides: pytorch - -# Apache-2.0 -Provides: bundled(flatbuffers) = %{flatbuffers_version} -# MIT -Provides: bundled(miniz) = %{miniz_version} -Provides: bundled(pybind11) = %{pybind11_version} - -%if %{with tensorpipe} -# BSD-3-Clause -Provides: bundled(tensorpipe) -# Apache-2.0 -Provides: bundled(libnop) -# MIT AND CC-BY-4.0 AND ISC AND BSD-2-Clause -Provides: bundled(libuv) = 1.41.0 -%endif - %description -n python3-%{pypi_name} PyTorch is a Python package that provides two high-level features: @@ -247,6 +214,22 @@ PyTorch is a Python package that provides two high-level features: You can reuse your favorite Python packages such as NumPy, SciPy, and Cython to extend PyTorch when needed. +%package -n python3-%{pypi_name}-devel +Summary: Libraries and headers for %{name} +Requires: python3-%{pypi_name}%{?_isa} = %{version}-%{release} + +%description -n python3-%{pypi_name}-devel +%{summary} + +%if %{with rocm} +%package -n python3-%{pypi_name}-rocm +Summary: %{name} for ROCm +Requires: python3-%{pypi_name}%{?_isa} = %{version}-%{release} + +%description -n python3-%{pypi_name}-rocm +%{summary} +%endif + %if %{with test} %package -n python3-%{pypi_name}-test Summary: Tests for %{name} @@ -259,120 +242,44 @@ Requires: python3-%{pypi_name}%{?_isa} = %{version}-%{release} %prep -%if %{with gitcommit} %autosetup -p1 -n pytorch-%{commit0} -# Overwrite with a git checkout of the pyproject.toml -cp %{SOURCE1000} . - -%else -%autosetup -p1 -n pytorch-v%{version} -%endif # Remove bundled egg-info rm -rf %{pypi_name}.egg-info +# Overwrite with a git checkout of the pyproject.toml +cp %{SOURCE100} . tar xf %{SOURCE1} -rm -rf third_party/flatbuffers/* -cp -r flatbuffers-%{flatbuffers_version}/* third_party/flatbuffers/ +cp -r flatbuffers-23.3.3/* third_party/flatbuffers/ tar xf %{SOURCE2} -rm -rf third_party/pybind11/* -cp -r pybind11-%{pybind11_version}/* third_party/pybind11/ +cp -r pybind11-2.11.1/* third_party/pybind11/ -%if %{with tensorpipe} -tar xf %{SOURCE20} -rm -rf third_party/tensorpipe/* -cp -r tensorpipe-*/* third_party/tensorpipe/ -tar xf %{SOURCE21} -rm -rf third_party/tensorpipe/third_party/libuv/* -cp -r libuv-*/* third_party/tensorpipe/third_party/libuv/ -tar xf %{SOURCE22} -rm -rf third_party/tensorpipe/third_party/libnop/* -cp -r libnop-*/* third_party/tensorpipe/third_party/libnop/ - -# gcc 15 include cstdint -sed -i '/#include ' third_party/tensorpipe/tensorpipe/common/allocator.h -sed -i '/#include ' third_party/tensorpipe/tensorpipe/common/memory.h +%if %{with cuda} +tar xf %{SOURCE10} +cp -r cudnn-frontend-%{cuf_ver}/* third_party/cudnn_frontend/ +tar xf %{SOURCE11} +cp -r cutlass-%{cul_ver}/* third_party/cutlass/ %endif -%if %{without opentelemtry} -tar xf %{SOURCE60} -rm -rf third_party/opentelemetry-cpp/* -cp -r opentelemetry-cpp-*/* third_party/opentelemetry-cpp/ +%if %{with opencv} +# Reduce requirements, *FOUND is not set +sed -i -e 's/USE_OPENCV AND OpenCV_FOUND AND USE_FFMPEG AND FFMPEG_FOUND/USE_OPENCV AND USE_FFMPEG/' caffe2/video/CMakeLists.txt +sed -i -e 's/USE_OPENCV AND OpenCV_FOUND/USE_OPENCV/' caffe2/image/CMakeLists.txt +sed -i -e 's/STATUS/FATAL/' caffe2/image/CMakeLists.txt %endif -%if %{without httplib} -tar xf %{SOURCE70} -rm -rf third_party/cpp-httplib/* -cp -r cpp-httplib-*/* third_party/cpp-httplib/ -%endif - -%if %{without kineto} -tar xf %{SOURCE80} -rm -rf third_party/kineto/* -cp -r kineto-*/* third_party/kineto/ -%endif - -%if %{without onnx} -tar xf %{SOURCE90} -rm -rf third_party/onnx/* -cp -r onnx-*/* third_party/onnx/ -%endif - -# Adjust for the hipblaslt's we build -sed -i -e 's@"gfx90a", "gfx940", "gfx941", "gfx942"@"gfx90a", "gfx1103", "gfx1150", "gfx1151", "gfx1100", "gfx1101", "gfx1200", "gfx1201"@' aten/src/ATen/native/cuda/Blas.cpp - %if 0%{?rhel} # In RHEL but too old sed -i -e '/typing-extensions/d' setup.py # Need to pip these sed -i -e '/sympy/d' setup.py sed -i -e '/fsspec/d' setup.py -%else -# for 2.5.0 -sed -i -e 's@sympy==1.13.1@sympy>=1.13.1@' setup.py %endif # A new dependency # Connected to USE_FLASH_ATTENTION, since this is off, do not need it sed -i -e '/aotriton.cmake/d' cmake/Dependencies.cmake -# Compress hip -sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc --offload-compress@' cmake/Dependencies.cmake -# Silence noisy warning -sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -Wno-pass-failed@' cmake/Dependencies.cmake -sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -Wno-unused-command-line-argument@' cmake/Dependencies.cmake -sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -Wno-unused-result@' cmake/Dependencies.cmake -sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -Wno-deprecated-declarations@' cmake/Dependencies.cmake -# Use parallel jobs -sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -parallel-jobs=4@' cmake/Dependencies.cmake -# Need to link with librocm_smi64 -sed -i -e 's@hiprtc::hiprtc@hiprtc::hiprtc rocm_smi64@' cmake/Dependencies.cmake - -# No third_party fmt, use system -sed -i -e 's@fmt::fmt-header-only@fmt@' CMakeLists.txt -sed -i -e 's@fmt::fmt-header-only@fmt@' aten/src/ATen/CMakeLists.txt -sed -i -e 's@list(APPEND ATen_HIP_INCLUDE $)@@' aten/src/ATen/CMakeLists.txt - -sed -i -e 's@fmt::fmt-header-only@fmt@' third_party/kineto/libkineto/CMakeLists.txt -sed -i -e 's@fmt::fmt-header-only@fmt@' c10/CMakeLists.txt -sed -i -e 's@fmt::fmt-header-only@fmt@' torch/CMakeLists.txt -sed -i -e 's@fmt::fmt-header-only@fmt@' cmake/Dependencies.cmake -sed -i -e 's@fmt::fmt-header-only@fmt@' caffe2/CMakeLists.txt - -sed -i -e 's@add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)@#add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)@' cmake/Dependencies.cmake -sed -i -e 's@set_target_properties(fmt-header-only PROPERTIES INTERFACE_COMPILE_FEATURES "")@#set_target_properties(fmt-header-only PROPERTIES INTERFACE_COMPILE_FEATURES "")@' cmake/Dependencies.cmake -sed -i -e 's@list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only)@#list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only)@' cmake/Dependencies.cmake - -# No third_party FXdiv -sed -i -e 's@if(NOT TARGET fxdiv)@if(MSVC AND USE_XNNPACK)@' caffe2/CMakeLists.txt -sed -i -e 's@TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv)@#TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv)@' caffe2/CMakeLists.txt - -# https://github.com/pytorch/pytorch/issues/149803 -# Tries to checkout nccl -sed -i -e 's@ checkout_nccl()@ True@' tools/build_pytorch_libs.py - -# Disable the use of check_submodule's in the setup.py, we are a tarball, not a git repo -sed -i -e 's@check_submodules()$@#check_submodules()@' setup.py # Release comes fully loaded with third party src # Remove what we can @@ -382,7 +289,7 @@ sed -i -e 's@check_submodules()$@#check_submodules()@' setup.py # the third_party dir to compile the file. # mimiz is licensed MIT # https://github.com/richgel999/miniz/blob/master/LICENSE -mv third_party/miniz-%{miniz_version} . +mv third_party/miniz-2.1.0 . # # setup.py depends on this script mv third_party/build_bundled.py . @@ -392,24 +299,9 @@ mv third_party/flatbuffers . mv third_party/pybind11 . -%if %{with tensorpipe} -mv third_party/tensorpipe . -%endif - -%if %{without opentelemetry} -mv third_party/opentelemetry-cpp . -%endif - -%if %{without httplib} -mv third_party/cpp-httplib . -%endif - -%if %{without kineto} -mv third_party/kineto . -%endif - -%if %{without onnx} -mv third_party/onnx . +%if %{with cuda} +mv third_party/cudnn_frontend . +mv third_party/cutlass . %endif %if %{with test} @@ -420,28 +312,13 @@ mv third_party/googletest . rm -rf third_party/* # Put stuff back mv build_bundled.py third_party -mv miniz-%{miniz_version} third_party +mv miniz-2.1.0 third_party mv flatbuffers third_party mv pybind11 third_party -%if %{with tensorpipe} -mv tensorpipe third_party -%endif - -%if %{without opentelemetry} -mv opentelemetry-cpp third_party -%endif - -%if %{without httplib} -mv cpp-httplib third_party -%endif - -%if %{without kineto} -mv kineto third_party -%endif - -%if %{without onnx} -mv onnx third_party +%if %{with cuda} +mv cudnn_frontend third_party +mv cutlass third_party %endif %if %{with test} @@ -451,70 +328,47 @@ mv googletest third_party # # Fake out pocketfft, and system header will be used mkdir third_party/pocketfft -cp /usr/include/pocketfft_hdronly.h third_party/pocketfft/ - # # Use the system valgrind headers mkdir third_party/valgrind-headers cp %{_includedir}/valgrind/* third_party/valgrind-headers -# Fix installing to /usr/lib64 -sed -i -e 's@DESTINATION ${PYTHON_LIB_REL_PATH}@DESTINATION ${CMAKE_INSTALL_PREFIX}/${PYTHON_LIB_REL_PATH}@' caffe2/CMakeLists.txt - -# reenable foxi linking -sed -i -e 's@list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader)@#list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader)@' cmake/Dependencies.cmake - -# cmake version changed -sed -i -e 's@cmake_minimum_required(VERSION 3.4)@cmake_minimum_required(VERSION 3.5)@' third_party/tensorpipe/third_party/libuv/CMakeLists.txt -sed -i -e 's@cmake_minimum_required(VERSION 3.4)@cmake_minimum_required(VERSION 3.5)@' libuv*/CMakeLists.txt -%if %{without opentelemtry} -sed -i -e 's@cmake_minimum_required(VERSION 3.1)@cmake_minimum_required(VERSION 3.5)@' third_party/opentelemetry-cpp/CMakeLists.txt -%endif +# Remove unneeded OpenCL files that confuse the lincense scanner +rm caffe2/contrib/opencl/OpenCL/cl.hpp +rm caffe2/mobile/contrib/libopencl-stub/include/CL/*.h +rm caffe2/mobile/contrib/libopencl-stub/include/CL/*.hpp %if %{with rocm} # hipify ./tools/amd_build/build_amd.py # Fedora installs to /usr/include, not /usr/include/rocm-core sed -i -e 's@rocm-core/rocm_version.h@rocm_version.h@' aten/src/ATen/hip/tunable/TunableGemm.h -# https://github.com/pytorch/pytorch/issues/149805 -sed -i -e 's@rocm-core/rocm_version.h@rocm_version.h@' cmake/public/LoadHIP.cmake -# Fedora installs to /usr/include, not /usr/include/rocm-core -sed -i -e 's@rocm-core/rocm_version.h@rocm_version.h@' aten/src/ATen/hip/tunable/Tunable.cpp -sed -i -e 's@rocm-core/rocm_version.h@rocm_version.h@' aten/src/ATen/cuda/tunable/Tunable.cpp -# use any hip, correct CMAKE_MODULE_PATH -sed -i -e 's@lib/cmake/hip@lib64/cmake/hip@' cmake/public/LoadHIP.cmake -sed -i -e 's@HIP 1.0@HIP MODULE@' cmake/public/LoadHIP.cmake -# silence an assert -# sed -i -e '/qvalue = std::clamp(qvalue, qmin, qmax);/d' aten/src/ATen/native/cuda/IndexKernel.cu - %endif -# moodycamel include path needs adjusting to use the system's -sed -i -e 's@${PROJECT_SOURCE_DIR}/third_party/concurrentqueue@/usr/include/concurrentqueue@' cmake/Dependencies.cmake +%if %{with cuda} +# build complains about not being able to build -pie without -fPIC +sed -i -e 's@string(APPEND CMAKE_CUDA_FLAGS " -D_GLIBCXX_USE_CXX11_ABI=${GLIBCXX_USE_CXX11_ABI}")@string(APPEND CMAKE_CUDA_FLAGS " -fPIC -D_GLIBCXX_USE_CXX11_ABI=${GLIBCXX_USE_CXX11_ABI}")@' CMakeLists.txt +%endif %build -# Export the arches -# echo "%%pytorch_arches %pt_arches" > macros.pytorch - # # Control the number of jobs # # The build can fail if too many threads exceed the physical memory -# Run at least one thread, more if CPU & memory resources are available. +# So count core and and memory and increase the build memory util the build succeeds # -%ifarch x86_64 # Real cores, No hyperthreading COMPILE_JOBS=`cat /proc/cpuinfo | grep -m 1 'cpu cores' | awk '{ print $4 }'` -%else -# cpuinfo format varies on other arches, fall back to nproc -COMPILE_JOBS=`nproc` -%endif if [ ${COMPILE_JOBS}x = x ]; then COMPILE_JOBS=1 fi # Take into account memmory usage per core, do not thrash real memory +%if %{with cuda} +BUILD_MEM=4 +%else BUILD_MEM=2 +%endif MEM_KB=0 MEM_KB=`cat /proc/meminfo | grep MemTotal | awk '{ print $2 }'` MEM_MB=`eval "expr ${MEM_KB} / 1024"` @@ -546,55 +400,53 @@ export CAFFE2_LINK_LOCAL_PROTOBUF=OFF export INTERN_BUILD_MOBILE=OFF export USE_DISTRIBUTED=OFF export USE_CUDA=OFF -export USE_FAKELOWP=OFF export USE_FBGEMM=OFF export USE_FLASH_ATTENTION=OFF -export USE_GLOO=OFF +export USE_GOLD_LINKER=OFF export USE_ITT=OFF export USE_KINETO=OFF -export USE_KLEIDIAI=OFF export USE_LITE_INTERPRETER_PROFILER=OFF export USE_LITE_PROTO=OFF -export USE_MAGMA=OFF -export USE_MEM_EFF_ATTENTION=OFF export USE_MKLDNN=OFF -export USE_MPI=OFF export USE_NCCL=OFF export USE_NNPACK=OFF export USE_NUMPY=ON export USE_OPENMP=ON export USE_PYTORCH_QNNPACK=OFF +export USE_QNNPACK=OFF export USE_ROCM=OFF +export USE_SYSTEM_CPUINFO=ON export USE_SYSTEM_SLEEF=ON export USE_SYSTEM_EIGEN_INSTALL=ON -%if %{with onnx} +export USE_SYSTEM_FP16=ON +export USE_SYSTEM_PTHREADPOOL=ON +export USE_SYSTEM_PSIMD=ON +export USE_SYSTEM_FXDIV=ON export USE_SYSTEM_ONNX=ON -%endif +export USE_SYSTEM_XNNPACK=ON export USE_SYSTEM_PYBIND11=OFF export USE_SYSTEM_LIBS=OFF -export USE_SYSTEM_NCCL=OFF export USE_TENSORPIPE=OFF -export USE_XNNPACK=OFF -export USE_XPU=OFF -export USE_SYSTEM_PTHREADPOOL=ON -export USE_SYSTEM_CPUINFO=ON -export USE_SYSTEM_FP16=ON -export USE_SYSTEM_FXDIV=ON -export USE_SYSTEM_PSIMD=ON -export USE_SYSTEM_XNNPACK=OFF +export USE_XNNPACK=ON +%if %{with caffe2} +export BUILD_CAFFE2=ON +%endif + +%if %{with cuda} +%if %{without rocm} +export CUDACXX=/usr/local/cuda/bin/nvcc +export CPLUS_INCLUDE_PATH=/usr/local/cuda/include +export USE_CUDA=ON +%endif +%endif + +%if %{with distributed} export USE_DISTRIBUTED=ON -%if %{with tensorpipe} -export USE_TENSORPIPE=ON -export TP_BUILD_LIBUV=OFF %endif -%if %{with gloo} -export USE_GLOO=ON -export USE_SYSTEM_GLOO=ON -%endif -%if %{with mpi} -export USE_MPI=ON +%if %{with opencv} +export USE_OPENCV=ON %endif %if %{with test} @@ -610,73 +462,100 @@ export BUILD_TEST=ON # # See BZ 2244862 + %if %{with rocm} export USE_ROCM=ON -export USE_ROCM_CK_SDPA=OFF -export USE_ROCM_CK_GEMM=OFF -export USE_FBGEMM_GENAI=OFF - -# Magma is broken on ROCm 7 -# export USE_MAGMA=ON export HIP_PATH=`hipconfig -p` export ROCM_PATH=`hipconfig -R` -#RESOURCE_DIR=`%{rocmllvm_bindir}/clang -print-resource-dir` -#export DEVICE_LIB_PATH=${RESOURCE_DIR}/amdgcn/bitcode +export HIP_CLANG_PATH=`hipconfig -l` +RESOURCE_DIR=`${HIP_CLANG_PATH}/clang -print-resource-dir` +export DEVICE_LIB_PATH=${RESOURCE_DIR}/amdgcn/bitcode -# pytorch uses clang, not hipcc -export HIP_CLANG_PATH=%{rocmllvm_bindir} -export PYTORCH_ROCM_ARCH=%{rocm_gpu_list_default} - -%endif - -%if 0%{?fedora} -%pyproject_wheel -%else +gpu=%{rocm_default_gpu} +module load rocm/$gpu +export PYTORCH_ROCM_ARCH=$ROCM_GPUS %py3_build +mv build build-${gpu} +module purge + +%if %{with rocm_loop} +for gpu in %{rocm_gpu_list} +do + module load rocm/$gpu + export PYTORCH_ROCM_ARCH=$ROCM_GPUS + %py3_build + mv build build-${gpu} + module purge +done %endif +%else + +%py3_build + +%endif %install -# pytorch rpm macros -# install -Dpm 644 macros.pytorch \ -# %{buildroot}%{_rpmmacrodir}/macros.pytorch - %if %{with rocm} + export USE_ROCM=ON -export USE_ROCM_CK=OFF export HIP_PATH=`hipconfig -p` export ROCM_PATH=`hipconfig -R` -# RESOURCE_DIR=`%{rocmllvm_bindir}/clang -print-resource-dir` -# export DEVICE_LIB_PATH=${RESOURCE_DIR}/amdgcn/bitcode +export HIP_CLANG_PATH=`hipconfig -l` +RESOURCE_DIR=`${HIP_CLANG_PATH}/clang -print-resource-dir` +export DEVICE_LIB_PATH=${RESOURCE_DIR}/amdgcn/bitcode -# pytorch uses clang, not hipcc -export HIP_CLANG_PATH=%{rocmllvm_bindir} -export PYTORCH_ROCM_ARCH=%{rocm_gpu_list_default} +gpu=%{rocm_default_gpu} +module load rocm/$gpu +export PYTORCH_ROCM_ARCH=$ROCM_GPUS +mv build-${gpu} build +%py3_install +mv build build-${gpu} +module purge +%if %{with rocm_loop} +for gpu in %{rocm_gpu_list} +do + module load rocm/$gpu + export PYTORCH_ROCM_ARCH=$ROCM_GPUS + mv build-${gpu} build + # need to customize the install location, so replace py3_install + %{__python3} %{py_setup} %{?py_setup_args} install -O1 --skip-build --root %{buildroot} --prefix /usr/lib64/rocm/${gpu} %{?*} + rm -rfv %{buildroot}/usr/lib/rocm/${gpu}/bin/__pycache__ + mv build build-${gpu} + module purge +done %endif -%if 0%{?fedora} -%pyproject_install -%pyproject_save_files '*torch*' %else %py3_install + %endif - -%check -# Not working yet -# pyproject_check_import torch - # Do not remote the empty files -%files -n python3-%{pypi_name} + +%files -n python3-%{pypi_name} %license LICENSE %doc README.md +%{_bindir}/convert-caffe2-to-onnx +%{_bindir}/convert-onnx-to-caffe2 %{_bindir}/torchrun -%{python3_sitearch}/%{pypi_name}* +%{python3_sitearch}/%{pypi_name} +%{python3_sitearch}/%{pypi_name}-*.egg-info %{python3_sitearch}/functorch +%{python3_sitearch}/torchgen +%if %{with caffe2} +%{python3_sitearch}/caffe2 +%endif +%if %{with rocm} +%if %{with rocm_loop} +%{_libdir}/rocm/gfx*/bin/* +%{_libdir}/rocm/gfx*/lib64/* +%endif +%endif %changelog %autochangelog diff --git a/sources b/sources index 9a3681f..90b1128 100644 --- a/sources +++ b/sources @@ -1,19 +1,2 @@ -SHA512 (pytorch-v2.7.0.tar.gz) = 17e875a66f1669901f5f770c9d829ba5bfa3967296cfb71550e8a92507181db742548eaf7cc9a2c478c4b91e366f27cc480e2e1bbb328db8501d30e1649839e6 -SHA512 (v23.3.3.tar.gz) = 4066c94f2473c7ea16917d29a613e16f840a329089c88e0bdbdb999aef3442ba00abfd2aa92266fa9c067e399dc88e6f0ccac40dc151378857e665638e78bbf0 -SHA512 (v2.13.6.tar.gz) = 497c25b33b09a9c42f67131ab82e35d689e8ce089dd7639be997305ff9a6d502447b79c824508c455d559e61f0186335b54dd2771d903a7c1621833930622d1a -SHA512 (tensorpipe-52791a2.tar.gz) = 1e5faf17a7236c5506c08cb28be16069b11bb929bbca64ed9745ce4277d46739186ab7d6597da7437d90ed2d166d4c37ef2f3bceabe8083ef3adbb0e8e5f227e -SHA512 (v1.41.0.tar.gz) = bb08a1970a10e8d9571ffea3d021643de30ec212cd51317b98d6cf0cfe55d6877992921fb01d1188a6d466687335b77885685d924f8cb7200a0bec30eee05c65 -SHA512 (libnop-910b558.tar.gz) = 74c5324eaa1b6b2ac8dfef94c835b5c5b044625f8e5efe3522470b1ecc4798ff43d344a013cee2f6901e83267c6167072947b754e63f1552ae7044cffe234c36 -SHA512 (v1.14.2.tar.gz) = 97635bbaf6dd567c201451dfaf7815b2052fe50d9bccc97aade86cfa4a92651374d167296a5453031b2681dc302806a289bca011a9e79ddc381a17d6118971d7 -SHA512 (cpp-httplib-3b6597b.tar.gz) = 8f1090658c498d04f14fec5c2f301847b1f3360bf92b18d82927643ee04ab61a6b274733a01c7850f9c030205120d674d1d961358d49fdd15636736fb8704f55 -SHA512 (kineto-be13176.tar.gz) = 41a08c7da9eea7d12402f80a5550c9d4df79798719cc52b12a507828c8c896ba28a37c35d8adf809ca72589e1d84965d5ef6dd01f3f8dc1c803c5ed67b03a43a -SHA512 (pytorch-a1cb3cc.tar.gz) = 92bf8b2c2ef0b459406b60169ecebdc50652c75943e3d6087e4d261f6e308dbad365529561e0f07ea3f0b71790efb68b5e4ab2f44e270462097208d924dc2d95 -SHA512 (v24.12.23.tar.gz) = f97762ba41b9cfef648e93932fd789324c6bb6ebc5b7aeca8185c9ef602294b67d73aea7ae371035579a1419cbfbeba7c3e88b31b5a5848db98f5e8a03b982b1 -SHA512 (kineto-5e75018.tar.gz) = 921b96a56e01d69895b79e67582d8977ed6f873573ab41557c5d026ada5d1f6365e4ed0a0c6804057c52e92510749fc58619f554a164c1ba9d8cd13e789bebd0 -SHA512 (pytorch-v2.8.0.tar.gz) = 791e658eab87fb957f025558cb9f925078d2426ab7b6f60771d9841dfb691f67d905ba1330a800008efe7c938b6c69bdc52232bccfe8d4860e795a532cd69d28 -SHA512 (v1.18.0.tar.gz) = 2f38664947c8d1efc40620a7c1b1953d2aa4b0a37b67c4886b86e77c1d697363c26413413ddda8eabc545892fb1bcb43afc7e93e62f0901527524a2727e1ea8d -SHA512 (pytorch-715dca6.tar.gz) = 09c9aae54fab3eb17901fc3226fece1c13f41cb8e45a2cb066021823abeb8d27c340993088e01d8e55bb37ed5f94334ec31e6c539cddfacbad157abd27c5e907 -SHA512 (pytorch-fd36458.tar.gz) = acbb7475b92ad4a8e8d779f3745da22d8438e4c5ef2d6e76d71c987789f2752c8aef7022c87c9a74640fe4f9c1f1a61a3f12a796f63b1e6be24da8e5aacf37dc -SHA512 (pytorch-0fabc3b.tar.gz) = 2e87975de0bf6f3dcede168b379e1928712bca16170c2a8ee7d63459f53086c01baac05e0763e4d5d28cdaf1c7d8912225ee06adeff96ead4f6f456ee174b341 -SHA512 (pytorch-v2.9.0.tar.gz) = ae989e3a7fe30f9ea90944dc25e21ca92f2a94ee40d8de974a168c292d82c16ee8920624eff91a85755469ad05473dce0f85893e3ed7794ec5c6bdd89cbd2023 -SHA512 (pytorch-v2.9.1.tar.gz) = 88de0289fa2760abd69bef505b5ae3b6d7ff176b415cbb31bbc89ce5476a3800b322a97c4490f270f8b89657aff931bf9a5516202b268e0bb8b1f63dbb87b34a +SHA512 (pytorch-v2.1.0.tar.gz) = 59421bf6cea6661d61ed66ab16526e3a07162e70e53381cbd5987042917610ec993d2f151fb086f0f98e5a396fe69e82bbc76f840bebffe4ebe7f50458c3aa44 +SHA512 (pytorch-v2.1.2.tar.gz) = b7305407ad9dda877d277a0e7009f65f6d69f39370f2231b8bb8c6a9b711022d2129febdb00f5c83751b6664e01000fe2d30c5e5c13757de89fb8b2b99197a28