diff --git a/.gitignore b/.gitignore index 3f2501f..c424df5 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,33 @@ /pytorch-v2.1.0.tar.gz /pytorch-v2.1.2.tar.gz +/pytorch-975d428.tar.gz +/v23.3.3.tar.gz +/v2.11.1.tar.gz +/pytorch-6a89a75.tar.gz +/pytorch-74832f1.tar.gz +/pytorch-4bb5cb5.tar.gz +/tensorpipe-52791a2.tar.gz +/v1.41.0.tar.gz +/libnop-910b558.tar.gz +/pytorch-97ff6cf.tar.gz +/pytorch-v2.3.0.tar.gz +/pytorch-v2.3.1.tar.gz +/pytorch-v2.4.0.tar.gz +/v1.14.2.tar.gz +/cpp-httplib-3b6597b.tar.gz +/kineto-be13176.tar.gz +/pytorch-v2.4.1.tar.gz +/pytorch-v2.5.0.tar.gz +/pytorch-v2.5.1.tar.gz +/pytorch-v2.7.0.tar.gz +/v2.13.6.tar.gz +/pytorch-a1cb3cc.tar.gz +/v24.12.23.tar.gz +/kineto-5e75018.tar.gz +/pytorch-v2.8.0.tar.gz +/v1.18.0.tar.gz +/pytorch-715dca6.tar.gz +/pytorch-fd36458.tar.gz +/pytorch-0fabc3b.tar.gz +/pytorch-v2.9.0.tar.gz +/pytorch-v2.9.1.tar.gz diff --git a/0001-Add-cmake-variable-USE_ROCM_CK.patch b/0001-Add-cmake-variable-USE_ROCM_CK.patch new file mode 100644 index 0000000..925e03b --- /dev/null +++ b/0001-Add-cmake-variable-USE_ROCM_CK.patch @@ -0,0 +1,202 @@ +From 193854993cd939de186de19589c1add4c4b2cf66 Mon Sep 17 00:00:00 2001 +From: Tom Rix +Date: Mon, 21 Jul 2025 11:35:03 -0700 +Subject: [PATCH] Add cmake variable USE_ROCM_CK + +--- + CMakeLists.txt | 1 + + aten/src/ATen/CMakeLists.txt | 40 ++++++++++++++++----------------- + aten/src/ATen/cuda/CUDABlas.cpp | 22 +++++++++--------- + cmake/Dependencies.cmake | 3 +++ + 4 files changed, 35 insertions(+), 31 deletions(-) + +diff --git a/CMakeLists.txt b/CMakeLists.txt +index a5d25e6afa0f..afc1b53efa64 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -240,6 +240,7 @@ cmake_dependent_option( + BUILD_LAZY_CUDA_LINALG "Build cuda linalg ops as separate library" ON + "USE_CUDA AND LINUX AND BUILD_PYTHON" OFF) + cmake_dependent_option(USE_ROCM "Use ROCm" ON "LINUX" OFF) ++cmake_dependent_option(USE_ROCM_CK "Use ROCm Composable Kernel" ON "USE_ROCM" ON) + option(CAFFE2_STATIC_LINK_CUDA "Statically link CUDA libraries" OFF) + cmake_dependent_option(USE_CUDNN "Use cuDNN" ON "USE_CUDA" OFF) + cmake_dependent_option(USE_STATIC_CUDNN "Use cuDNN static libraries" OFF +diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt +index c9cfd74b501e..59f6178218ee 100644 +--- a/aten/src/ATen/CMakeLists.txt ++++ b/aten/src/ATen/CMakeLists.txt +@@ -373,26 +373,26 @@ if(USE_ROCM) + # is header only, so this should be ok, except that the CMake build generates + # a ck/config.h. We just do that part here. Without this, the ck.h from the + # ROCM SDK may get accidentally used instead. +- function(_pytorch_rocm_generate_ck_conf) +- set(CK_ENABLE_INT8 "ON") +- set(CK_ENABLE_FP16 "ON") +- set(CK_ENABLE_FP32 "ON") +- set(CK_ENABLE_FP64 "ON") +- set(CK_ENABLE_BF16 "ON") +- set(CK_ENABLE_FP8 "ON") +- set(CK_ENABLE_BF8 "ON") +- set(CK_USE_XDL "ON") +- set(CK_USE_WMMA "ON") +- configure_file( +- "${Torch_SOURCE_DIR}/third_party/composable_kernel/include/ck/config.h.in" +- "${CMAKE_CURRENT_BINARY_DIR}/composable_kernel/ck/config.h" +- ) +- endfunction() ++# function(_pytorch_rocm_generate_ck_conf) ++# set(CK_ENABLE_INT8 "ON") ++# set(CK_ENABLE_FP16 "ON") ++# set(CK_ENABLE_FP32 "ON") ++# set(CK_ENABLE_FP64 "ON") ++# set(CK_ENABLE_BF16 "ON") ++# set(CK_ENABLE_FP8 "ON") ++# set(CK_ENABLE_BF8 "ON") ++# set(CK_USE_XDL "ON") ++# set(CK_USE_WMMA "ON") ++# configure_file( ++# "${Torch_SOURCE_DIR}/third_party/composable_kernel/include/ck/config.h.in" ++# "${CMAKE_CURRENT_BINARY_DIR}/composable_kernel/ck/config.h" ++# ) ++# endfunction() + list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/hip) +- list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/include) +- list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/library/include) +- list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/composable_kernel) +- _pytorch_rocm_generate_ck_conf() ++# list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/include) ++# list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/library/include) ++# list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/composable_kernel) ++# _pytorch_rocm_generate_ck_conf() + + # Next two lines are needed because TunableOp uses third-party/fmt + list(APPEND ATen_HIP_INCLUDE $) +@@ -409,7 +409,7 @@ endif() + ${native_quantized_hip_hip} + ${native_transformers_hip_hip} ${native_transformers_src_hip_hip} + ) +- if(WIN32) # Windows doesn't support Composable Kernels ++ if(NOT USE_ROCM_CK) # Windows doesn't support Composable Kernels + file(GLOB native_hip_bgemm "native/hip/bgemm_kernels/*.hip") + file(GLOB native_hip_ck "native/hip/ck*.hip") + exclude(ATen_HIP_SRCS "${ATen_HIP_SRCS}" +diff --git a/aten/src/ATen/cuda/CUDABlas.cpp b/aten/src/ATen/cuda/CUDABlas.cpp +index 89350a11bea7..e5b7960177cf 100644 +--- a/aten/src/ATen/cuda/CUDABlas.cpp ++++ b/aten/src/ATen/cuda/CUDABlas.cpp +@@ -752,7 +752,7 @@ template <> + void bgemm_internal(CUDABLAS_BGEMM_ARGTYPES(double)) + { + if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) { +-#ifdef USE_ROCM ++#ifdef USE_ROCM_CK + // hipblaslt does not support double gemm yet + bgemm_internal_cublas(CUDABLAS_BGEMM_ARGS(double)); + #else +@@ -836,7 +836,7 @@ void bgemm_internal(CUDABLAS_BGEMM_ARGTYPES(at::BFloat16)) + bgemm_internal_cublas(CUDABLAS_BGEMM_ARGS(at::BFloat16)); + } + } +-#if defined(USE_ROCM) && !defined(_MSC_VER) ++#if defined(USE_ROCM) && defined(USE_ROCM_CK) + else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) { + at::native::bgemm_internal_ck(CUDABLAS_BGEMM_ARGS(at::BFloat16)); + } +@@ -1270,14 +1270,14 @@ template <> + void gemm_internal(CUDABLAS_GEMM_ARGTYPES(double)) + { + if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) { +-#ifdef USE_ROCM ++#ifdef USE_ROCM_CK + // hipblaslt does not support double gemm yet + gemm_internal_cublas(CUDABLAS_GEMM_ARGS(double)); + #else + gemm_internal_cublaslt(CUDABLAS_GEMM_ARGS(double)); + #endif + } +-#if defined(USE_ROCM) && !defined(_MSC_VER) ++#if defined(USE_ROCM) && defined(USE_ROCM_CK) + else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) { + at::native::gemm_internal_ck(CUDABLAS_GEMM_ARGS(double)); + } +@@ -1293,7 +1293,7 @@ void gemm_internal(CUDABLAS_GEMM_ARGTYPES(float)) + if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) { + gemm_internal_cublaslt(CUDABLAS_GEMM_ARGS(float)); + } +-#if defined(USE_ROCM) && !defined(_MSC_VER) ++#if defined(USE_ROCM) && defined(USE_ROCM_CK) + else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) { + if (at::detail::getCUDAHooks().isGPUArch({"gfx1100"})) { //no CK GEMM version for gfx1100 + gemm_internal_cublaslt(CUDABLAS_GEMM_ARGS(float)); +@@ -1311,7 +1311,7 @@ template <> + void gemm_internal>(CUDABLAS_GEMM_ARGTYPES(c10::complex)) + { + if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) { +-#ifdef USE_ROCM ++#ifdef USE_ROCM_CK + // hipblaslt does not support complex gemm yet + gemm_internal_cublas>(CUDABLAS_GEMM_ARGS(c10::complex)); + #else +@@ -1327,7 +1327,7 @@ template <> + void gemm_internal>(CUDABLAS_GEMM_ARGTYPES(c10::complex)) + { + if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) { +-#ifdef USE_ROCM ++#ifdef USE_ROCM_CK + // hipblaslt does not support complex gemm yet + gemm_internal_cublas>(CUDABLAS_GEMM_ARGS(c10::complex)); + #else +@@ -1345,7 +1345,7 @@ void gemm_internal(CUDABLAS_GEMM_ARGTYPES(at::Half)) + if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) { + gemm_internal_cublaslt(CUDABLAS_GEMM_ARGS(at::Half)); + } +-#if defined(USE_ROCM) && !defined(_MSC_VER) ++#if defined(USE_ROCM) && defined(USE_ROCM_CK) + else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) { + at::native::gemm_internal_ck(CUDABLAS_GEMM_ARGS(at::Half)); + } +@@ -1361,7 +1361,7 @@ void gemm_internal(CUDABLAS_GEMM_ARGTYPES(at::BFloat16)) + if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) { + gemm_internal_cublaslt(CUDABLAS_GEMM_ARGS(at::BFloat16)); + } +-#if defined(USE_ROCM) && !defined(_MSC_VER) ++#if defined(USE_ROCM) && defined(USE_ROCM_CK) + else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) { + at::native::gemm_internal_ck(CUDABLAS_GEMM_ARGS(at::BFloat16)); + } +@@ -1382,7 +1382,7 @@ void gemm_internal(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(at::Half, + if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) { + gemm_internal_cublaslt(CUDABLAS_GEMM_ARGS(at::Half)); + } +-#if defined(USE_ROCM) && !defined(_MSC_VER) ++#if defined(USE_ROCM) && defined(USE_ROCM_CK) + else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) { + TORCH_CHECK(false, "gemm input type at::Half and output type float is not supported for ROCm"); + } +@@ -1398,7 +1398,7 @@ void gemm_internal(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(at::B + if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) { + gemm_internal_cublaslt(CUDABLAS_GEMM_ARGS(at::BFloat16)); + } +-#if defined(USE_ROCM) && !defined(_MSC_VER) ++#if defined(USE_ROCM) && defined(USE_ROCM_CK) + else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) { + TORCH_CHECK(false, "gemm input type at::Half and output type float is not supported for ROCm"); + } +diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake +index a93386c27f8d..be1368999d38 100644 +--- a/cmake/Dependencies.cmake ++++ b/cmake/Dependencies.cmake +@@ -1031,6 +1031,9 @@ if(USE_ROCM) + if(HIPBLASLT_VEC_EXT) + list(APPEND HIP_CXX_FLAGS -DHIPBLASLT_VEC_EXT) + endif() ++ if(USE_ROCM_CK) ++ list(APPEND HIP_CXX_FLAGS -DUSE_ROCM_CK) ++ endif() + list(APPEND HIP_HIPCC_FLAGS --offload-compress) + if(WIN32) + add_definitions(-DROCM_ON_WINDOWS) +-- +2.49.0 + diff --git a/0001-Fix-compilation-and-import-torch-issues-for-cpython-.patch b/0001-Fix-compilation-and-import-torch-issues-for-cpython-.patch new file mode 100644 index 0000000..b6a282c --- /dev/null +++ b/0001-Fix-compilation-and-import-torch-issues-for-cpython-.patch @@ -0,0 +1,359 @@ +From f2a544b2e3a5bdc04985f6e06223c0c1700120a0 Mon Sep 17 00:00:00 2001 +From: albanD +Date: Sat, 12 Jul 2025 03:42:33 -0400 +Subject: [PATCH] Fix compilation and "import torch" issues for cpython 3.14 + +Imported from +https://github.com/albanD/pytorch/tree/cpython314_build +commit 88bb9cdb72449f4277829e20d94ad8aec1894216 + +Signed-off-by: Tom Rix +--- + torch/_dynamo/bytecode_analysis.py | 2 +- + torch/ao/quantization/__init__.py | 5 +++- + torch/ao/quantization/qconfig.py | 4 ++- + torch/ao/quantization/utils.py | 7 +++-- + torch/csrc/dynamo/cpython_defs.c | 16 +++++++++++ + torch/csrc/dynamo/cpython_includes.h | 17 ++++++++++++ + torch/csrc/dynamo/eval_frame.c | 34 +++++++++++++++-------- + torch/csrc/dynamo/framelocals_mapping.cpp | 14 ++++++++++ + torch/csrc/utils/python_compat.h | 1 + + torch/onnx/__init__.py | 1 - + torch/utils/weak.py | 29 +++++++++++++++++-- + 11 files changed, 111 insertions(+), 19 deletions(-) + +diff --git a/torch/_dynamo/bytecode_analysis.py b/torch/_dynamo/bytecode_analysis.py +index 3252ea91409f..2de74ee5bf8d 100644 +--- a/torch/_dynamo/bytecode_analysis.py ++++ b/torch/_dynamo/bytecode_analysis.py +@@ -33,7 +33,7 @@ if sys.version_info >= (3, 11): + TERMINAL_OPCODES.add(dis.opmap["JUMP_FORWARD"]) + else: + TERMINAL_OPCODES.add(dis.opmap["JUMP_ABSOLUTE"]) +-if sys.version_info >= (3, 12): ++if (3, 12) <= sys.version_info < (3, 14): + TERMINAL_OPCODES.add(dis.opmap["RETURN_CONST"]) + if sys.version_info >= (3, 13): + TERMINAL_OPCODES.add(dis.opmap["JUMP_BACKWARD_NO_INTERRUPT"]) +diff --git a/torch/ao/quantization/__init__.py b/torch/ao/quantization/__init__.py +index ffc1792fd23f..cf5a8b99a894 100644 +--- a/torch/ao/quantization/__init__.py ++++ b/torch/ao/quantization/__init__.py +@@ -1,5 +1,6 @@ + # mypy: allow-untyped-defs + ++import sys + from typing import Callable, Optional, Union + + import torch +@@ -33,7 +34,9 @@ from .stubs import * # noqa: F403 + + # ensure __module__ is set correctly for public APIs + ObserverOrFakeQuantize = Union[ObserverBase, FakeQuantizeBase] +-ObserverOrFakeQuantize.__module__ = "torch.ao.quantization" ++if sys.version_info < (3, 14): ++ ObserverOrFakeQuantize.__module__ = "torch.ao.quantization" ++ + for _f in [ + compare_results, + extract_results_from_loggers, +diff --git a/torch/ao/quantization/qconfig.py b/torch/ao/quantization/qconfig.py +index efee5302ad42..d9a8fc78bab4 100644 +--- a/torch/ao/quantization/qconfig.py ++++ b/torch/ao/quantization/qconfig.py +@@ -1,5 +1,6 @@ + # mypy: allow-untyped-defs + import copy ++import sys + import warnings + from collections import namedtuple + from typing import Any, Optional, Union +@@ -568,7 +569,8 @@ def _assert_valid_qconfig(qconfig: Optional[QConfig], mod: torch.nn.Module) -> N + + + QConfigAny = Optional[QConfig] +-QConfigAny.__module__ = "torch.ao.quantization.qconfig" ++if sys.version_info < (3, 14): ++ QConfigAny.__module__ = "torch.ao.quantization.qconfig" + + + def _add_module_to_qconfig_obs_ctr( +diff --git a/torch/ao/quantization/utils.py b/torch/ao/quantization/utils.py +index 4ac3112ec072..3b1503e01701 100644 +--- a/torch/ao/quantization/utils.py ++++ b/torch/ao/quantization/utils.py +@@ -4,6 +4,7 @@ Utils shared by different modes of quantization (eager/graph) + """ + + import functools ++import sys + import warnings + from collections import OrderedDict + from inspect import getfullargspec, signature +@@ -16,7 +17,8 @@ from torch.nn.utils.parametrize import is_parametrized + + + NodePattern = Union[tuple[Node, Node], tuple[Node, tuple[Node, Node]], Any] +-NodePattern.__module__ = "torch.ao.quantization.utils" ++if sys.version_info < (3, 14): ++ NodePattern.__module__ = "torch.ao.quantization.utils" + + # This is the Quantizer class instance from torch/quantization/fx/quantize.py. + # Define separately to prevent circular imports. +@@ -31,7 +33,8 @@ QuantizerCls = Any + Pattern = Union[ + Callable, tuple[Callable, Callable], tuple[Callable, tuple[Callable, Callable]], Any + ] +-Pattern.__module__ = "torch.ao.quantization.utils" ++if sys.version_info < (3, 14): ++ Pattern.__module__ = "torch.ao.quantization.utils" + + + # TODO: maybe rename this to MatchInputNode +diff --git a/torch/csrc/dynamo/cpython_defs.c b/torch/csrc/dynamo/cpython_defs.c +index b68ef894aeaa..244d4165d5e8 100644 +--- a/torch/csrc/dynamo/cpython_defs.c ++++ b/torch/csrc/dynamo/cpython_defs.c +@@ -2,6 +2,20 @@ + #include + #include + ++#if IS_PYTHON_3_14_PLUS ++ ++const uint8_t* THP_PyOpcode_Caches = NULL; ++const int THP_PyOpcode_Caches_size = 0; ++ ++void ++THP_PyThreadState_PopFrame(PyThreadState *tstate, _PyInterpreterFrame * frame) ++{} ++void ++THP_PyFrame_Clear(_PyInterpreterFrame *frame) ++{} ++ ++#else ++ + #if IS_PYTHON_3_11_PLUS + + #define Py_BUILD_CORE +@@ -360,3 +374,5 @@ const uint8_t* THP_PyOpcode_Caches = NULL; + const int THP_PyOpcode_Caches_size = 0; + + #endif ++ ++#endif // IS_PYTHON_3_14_PLUS +\ No newline at end of file +diff --git a/torch/csrc/dynamo/cpython_includes.h b/torch/csrc/dynamo/cpython_includes.h +index 6b99c1d5aec8..616be16563cf 100644 +--- a/torch/csrc/dynamo/cpython_includes.h ++++ b/torch/csrc/dynamo/cpython_includes.h +@@ -21,6 +21,14 @@ + + #if IS_PYTHON_3_11_PLUS + #include ++#if IS_PYTHON_3_14_PLUS ++#include ++#include ++#endif ++#endif ++ ++#if IS_PYTHON_3_14_PLUS ++#include + #endif + + #undef Py_BUILD_CORE +@@ -30,6 +38,13 @@ + extern "C" { + #endif + ++#if IS_PYTHON_3_14_PLUS ++ ++#define F_CODE(x) (PyCodeObject*)PyStackRef_AsPyObjectBorrow(x->f_executable) ++#define PREV_INSTR(x) (x)->instr_ptr ++ ++#else ++ + #if IS_PYTHON_3_13_PLUS + #define F_CODE(x) ((PyCodeObject*)(x)->f_executable) + #define PREV_INSTR(x) (x)->instr_ptr +@@ -38,6 +53,8 @@ extern "C" { + #define PREV_INSTR(x) (x)->prev_instr + #endif + ++#endif // IS_PYTHON_3_14_PLUS ++ + #if IS_PYTHON_3_12_PLUS + #define FUNC(x) ((x)->f_funcobj) + #else +diff --git a/torch/csrc/dynamo/eval_frame.c b/torch/csrc/dynamo/eval_frame.c +index f413782b2d30..72bb8839bac3 100644 +--- a/torch/csrc/dynamo/eval_frame.c ++++ b/torch/csrc/dynamo/eval_frame.c +@@ -224,17 +224,6 @@ const char* get_frame_name(THP_EVAL_API_FRAME_OBJECT* frame) { + return PyUnicode_AsUTF8(F_CODE(frame)->co_name); + } + +-void clear_old_frame_if_python_312_plus( +- PyThreadState* tstate, +- THP_EVAL_API_FRAME_OBJECT* frame) { +-#if IS_PYTHON_3_12_PLUS +- +- THP_PyFrame_Clear(frame); +- THP_PyThreadState_PopFrame(tstate, frame); +- +-#endif +-} +- + static PyObject* dynamo_eval_custom_code_impl( + PyThreadState* tstate, + THP_EVAL_API_FRAME_OBJECT* frame, +@@ -485,6 +474,18 @@ static PyObject* dynamo__custom_eval_frame_shim( + + static void enable_eval_frame_shim(PyThreadState* tstate) {} + static void enable_eval_frame_default(PyThreadState* tstate) {} ++PyObject* dynamo_eval_custom_code( ++ PyThreadState* tstate, ++ THP_EVAL_API_FRAME_OBJECT* frame, ++ PyCodeObject* code, ++ const char* trace_annotation, ++ int throw_flag) {} ++THPPyInterpreterFrame* THPPyInterpreterFrame_New( ++ THP_EVAL_API_FRAME_OBJECT* frame) {} ++PyObject* dynamo_eval_frame_default( ++ PyThreadState* tstate, ++ THP_EVAL_API_FRAME_OBJECT* frame, ++ int throw_flag) {} + + static struct PyGetSetDef THPPyInterpreterFrame_properties[] = {NULL}; + +@@ -498,6 +499,17 @@ static PyTypeObject THPPyInterpreterFrameType = { + + #endif // !(IS_PYTHON_3_14_PLUS) + ++void clear_old_frame_if_python_312_plus( ++ PyThreadState* tstate, ++ THP_EVAL_API_FRAME_OBJECT* frame) { ++#if IS_PYTHON_3_12_PLUS ++ ++ THP_PyFrame_Clear(frame); ++ THP_PyThreadState_PopFrame(tstate, frame); ++ ++#endif ++} ++ + static PyObject* increment_working_threads( + PyThreadState* tstate, + PyObject* module) { +diff --git a/torch/csrc/dynamo/framelocals_mapping.cpp b/torch/csrc/dynamo/framelocals_mapping.cpp +index b839fb26fc91..c4ee36d87767 100644 +--- a/torch/csrc/dynamo/framelocals_mapping.cpp ++++ b/torch/csrc/dynamo/framelocals_mapping.cpp +@@ -26,9 +26,13 @@ FrameLocalsMapping::FrameLocalsMapping(FrameLocalsFrameType* frame) + PyCodeObject* co = F_CODE(frame); + _framelocals.resize(co->co_nlocalsplus, nullptr); + ++#if IS_PYTHON_3_14_PLUS ++ TORCH_CHECK(false, "Python 3.14+ not supported"); ++#else + if (!frame->stacktop) { + return; + } ++#endif + + auto update_framelocals = [&](int i, PyObject* value) { + _PyLocals_Kind kind = _PyLocals_GetKind(co->co_localspluskinds, i); +@@ -53,11 +57,21 @@ FrameLocalsMapping::FrameLocalsMapping(FrameLocalsFrameType* frame) + }; + + auto offset = co->co_nlocalsplus - co->co_nfreevars; ++#if IS_PYTHON_3_14_PLUS ++ TORCH_CHECK(false, "Python 3.14+ not supported"); ++#else + for (int i = 0; i < offset; i++) { + update_framelocals(i, frame->localsplus[i]); + } ++#endif ++ + // Get references to closure variables ++#if IS_PYTHON_3_14_PLUS ++ PyObject* closure; ++ TORCH_CHECK(false, "Python 3.14+ not supported"); ++#else + PyObject* closure = ((PyFunctionObject*)FUNC(frame))->func_closure; ++#endif + for (int i = 0; i < co->co_nfreevars; i++) { + update_framelocals(offset + i, PyTuple_GET_ITEM(closure, i)); + } +diff --git a/torch/csrc/utils/python_compat.h b/torch/csrc/utils/python_compat.h +index a1537611cc47..16292e4fd030 100644 +--- a/torch/csrc/utils/python_compat.h ++++ b/torch/csrc/utils/python_compat.h +@@ -13,6 +13,7 @@ extern "C" { + #define IS_PYTHON_3_12_PLUS PY_VERSION_HEX >= 0x030C0000 + #define IS_PYTHON_3_13_PLUS PY_VERSION_HEX >= 0x030D0000 + #define IS_PYTHON_3_14_PLUS PY_VERSION_HEX >= 0x030E0000 ++#define IS_PYTHON_3_15_PLUS PY_VERSION_HEX >= 0x030F0000 + + static inline int PyCode_GetNCellvars(PyCodeObject* code) { + // gh-26364 added co_ncellvars to Python 3.11.0rc1 +diff --git a/torch/onnx/__init__.py b/torch/onnx/__init__.py +index 345ffd2a065b..ceeadde5365b 100644 +--- a/torch/onnx/__init__.py ++++ b/torch/onnx/__init__.py +@@ -104,7 +104,6 @@ ONNXProgram.__module__ = "torch.onnx" + OnnxExporterError.__module__ = "torch.onnx" + _OrtBackend.__module__ = "torch.onnx" + _OrtBackendOptions.__module__ = "torch.onnx" +-_OrtExecutionProvider.__module__ = "torch.onnx" + enable_fake_mode.__module__ = "torch.onnx" + is_onnxrt_backend_supported.__module__ = "torch.onnx" + +diff --git a/torch/utils/weak.py b/torch/utils/weak.py +index 8bf2ba5ed02b..9c7218cb2ad3 100644 +--- a/torch/utils/weak.py ++++ b/torch/utils/weak.py +@@ -3,8 +3,6 @@ from __future__ import annotations + + import collections.abc as _collections_abc + import weakref +- +-from _weakrefset import _IterationGuard # type: ignore[attr-defined] + from collections.abc import Mapping, MutableMapping + from weakref import ref + +@@ -22,6 +20,33 @@ __all__ = [ + ] + + ++# TODO: make weakref properly thread safe following ++# https://github.com/python/cpython/pull/125325 ++class _IterationGuard: ++ # This context manager registers itself in the current iterators of the ++ # weak container, such as to delay all removals until the context manager ++ # exits. ++ # This technique should be relatively thread-safe (since sets are). ++ ++ def __init__(self, weakcontainer): ++ # Don't create cycles ++ self.weakcontainer = ref(weakcontainer) ++ ++ def __enter__(self): ++ w = self.weakcontainer() ++ if w is not None: ++ w._iterating.add(self) ++ return self ++ ++ def __exit__(self, e, t, b): ++ w = self.weakcontainer() ++ if w is not None: ++ s = w._iterating ++ s.remove(self) ++ if not s: ++ w._commit_removals() ++ ++ + # This file defines a variant of WeakKeyDictionary that overrides the hashing + # behavior of the key to use object identity, rather than the builtin + # __eq__/__hash__ functions. This is useful for Tensor weak keys, as their +-- +2.49.0 + diff --git a/0001-Optionally-use-hipblaslt.patch b/0001-Optionally-use-hipblaslt.patch deleted file mode 100644 index 56434a7..0000000 --- a/0001-Optionally-use-hipblaslt.patch +++ /dev/null @@ -1,262 +0,0 @@ -From d77e05d90df006322cda021f1a8affdcc2c7eaef Mon Sep 17 00:00:00 2001 -From: Tom Rix -Date: Fri, 23 Feb 2024 08:27:30 -0500 -Subject: [PATCH] Optionally use hipblaslt - -The hipblaslt package is not available on Fedora. -Instead of requiring the package, make it optional. -If it is found, define the preprocessor variable HIPBLASLT -Convert the checks for ROCM_VERSION >= 507000 to HIPBLASLT checks - -Signed-off-by: Tom Rix ---- - aten/src/ATen/cuda/CUDABlas.cpp | 7 ++++--- - aten/src/ATen/cuda/CUDABlas.h | 2 +- - aten/src/ATen/cuda/CUDAContextLight.h | 4 ++-- - aten/src/ATen/cuda/CublasHandlePool.cpp | 4 ++-- - aten/src/ATen/cuda/tunable/TunableGemm.h | 6 +++--- - aten/src/ATen/native/cuda/Blas.cpp | 14 ++++++++------ - cmake/Dependencies.cmake | 3 +++ - cmake/public/LoadHIP.cmake | 4 ++-- - 8 files changed, 25 insertions(+), 19 deletions(-) - -diff --git a/aten/src/ATen/cuda/CUDABlas.cpp b/aten/src/ATen/cuda/CUDABlas.cpp -index d534ec5a178..e815463f630 100644 ---- a/aten/src/ATen/cuda/CUDABlas.cpp -+++ b/aten/src/ATen/cuda/CUDABlas.cpp -@@ -14,7 +14,7 @@ - #include - - #ifdef USE_ROCM --#if ROCM_VERSION >= 60000 -+#ifdef HIPBLASLT - #include - #endif - // until hipblas has an API to accept flags, we must use rocblas here -@@ -781,7 +781,7 @@ void gemm(CUDABLAS_GEMM_ARGTYPES(at::BFloat16)) { - } - } - --#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700) -+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT)) - - #if defined(USE_ROCM) && ROCM_VERSION >= 50700 && ROCM_VERSION < 60000 - // only for rocm 5.7 where we first supported hipblaslt, it was difficult -@@ -912,6 +912,7 @@ class CuBlasLtMatmulPreference : public CuBlasLtDescriptor< - }; - } // namespace - -+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT)) - template - void gemm_and_bias( - bool transpose_mat1, -@@ -1124,7 +1125,7 @@ template void gemm_and_bias( - at::BFloat16* result_ptr, - int64_t result_ld, - GEMMAndBiasActivationEpilogue activation); -- -+#endif - void scaled_gemm( - char transa, - char transb, -diff --git a/aten/src/ATen/cuda/CUDABlas.h b/aten/src/ATen/cuda/CUDABlas.h -index eb12bb350c5..068607467dd 100644 ---- a/aten/src/ATen/cuda/CUDABlas.h -+++ b/aten/src/ATen/cuda/CUDABlas.h -@@ -82,7 +82,7 @@ void gemm_internal(CUDABLAS_GEMM_ARGTYPES(at::Half)); - template <> - void gemm_internal(CUDABLAS_GEMM_ARGTYPES(at::BFloat16)); - --#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700) -+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT)) - enum GEMMAndBiasActivationEpilogue { - None, - RELU, -diff --git a/aten/src/ATen/cuda/CUDAContextLight.h b/aten/src/ATen/cuda/CUDAContextLight.h -index 4ec35f59a21..e28dc42034f 100644 ---- a/aten/src/ATen/cuda/CUDAContextLight.h -+++ b/aten/src/ATen/cuda/CUDAContextLight.h -@@ -9,7 +9,7 @@ - - // cublasLT was introduced in CUDA 10.1 but we enable only for 11.1 that also - // added bf16 support --#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700) -+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT)) - #include - #endif - -@@ -82,7 +82,7 @@ TORCH_CUDA_CPP_API c10::Allocator* getCUDADeviceAllocator(); - /* Handles */ - TORCH_CUDA_CPP_API cusparseHandle_t getCurrentCUDASparseHandle(); - TORCH_CUDA_CPP_API cublasHandle_t getCurrentCUDABlasHandle(); --#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700) -+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT)) - TORCH_CUDA_CPP_API cublasLtHandle_t getCurrentCUDABlasLtHandle(); - #endif - -diff --git a/aten/src/ATen/cuda/CublasHandlePool.cpp b/aten/src/ATen/cuda/CublasHandlePool.cpp -index 6913d2cd95e..3d4276be372 100644 ---- a/aten/src/ATen/cuda/CublasHandlePool.cpp -+++ b/aten/src/ATen/cuda/CublasHandlePool.cpp -@@ -29,7 +29,7 @@ namespace at::cuda { - - namespace { - --#if defined(USE_ROCM) && ROCM_VERSION >= 50700 -+#if defined(USE_ROCM) && defined(HIPBLASLT) - void createCublasLtHandle(cublasLtHandle_t *handle) { - TORCH_CUDABLAS_CHECK(cublasLtCreate(handle)); - } -@@ -190,7 +190,7 @@ cublasHandle_t getCurrentCUDABlasHandle() { - return handle; - } - --#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700) -+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT)) - cublasLtHandle_t getCurrentCUDABlasLtHandle() { - #ifdef USE_ROCM - c10::DeviceIndex device = 0; -diff --git a/aten/src/ATen/cuda/tunable/TunableGemm.h b/aten/src/ATen/cuda/tunable/TunableGemm.h -index 3ba0d761277..dde1870cfbf 100644 ---- a/aten/src/ATen/cuda/tunable/TunableGemm.h -+++ b/aten/src/ATen/cuda/tunable/TunableGemm.h -@@ -11,7 +11,7 @@ - - #include - #ifdef USE_ROCM --#if ROCM_VERSION >= 50700 -+#ifdef HIPBLASLT - #include - #endif - #include -@@ -166,7 +166,7 @@ class GemmTunableOp : public TunableOp, StreamTimer> { - } - #endif - --#if defined(USE_ROCM) && ROCM_VERSION >= 50700 -+#if defined(USE_ROCM) && defined(HIPBLASLT) - static const char *env = std::getenv("PYTORCH_TUNABLEOP_HIPBLASLT_ENABLED"); - if (env == nullptr || strcmp(env, "1") == 0) { - // disallow tuning of hipblaslt with c10::complex -@@ -240,7 +240,7 @@ class GemmStridedBatchedTunableOp : public TunableOp - } - #endif - --#if defined(USE_ROCM) && ROCM_VERSION >= 50700 -+#if defined(USE_ROCM) && defined(HIPBLASLT) - static const char *env = std::getenv("PYTORCH_TUNABLEOP_HIPBLASLT_ENABLED"); - if (env == nullptr || strcmp(env, "1") == 0) { - // disallow tuning of hipblaslt with c10::complex -diff --git a/aten/src/ATen/native/cuda/Blas.cpp b/aten/src/ATen/native/cuda/Blas.cpp -index 29e5c5e3cf1..df56f3d7f1d 100644 ---- a/aten/src/ATen/native/cuda/Blas.cpp -+++ b/aten/src/ATen/native/cuda/Blas.cpp -@@ -155,7 +155,7 @@ enum class Activation { - GELU, - }; - --#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700) -+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT)) - cuda::blas::GEMMAndBiasActivationEpilogue activation_to_gemm_and_blas_arg(Activation a) { - switch (a) { - case Activation::None: -@@ -193,6 +193,7 @@ static bool getDisableAddmmCudaLt() { - - #ifdef USE_ROCM - static bool isSupportedHipLtROCmArch(int index) { -+#if defined(HIPBLASLT) - hipDeviceProp_t* prop = at::cuda::getDeviceProperties(index); - std::string device_arch = prop->gcnArchName; - static const std::vector archs = {"gfx90a", "gfx940", "gfx941", "gfx942"}; -@@ -203,6 +204,7 @@ static bool isSupportedHipLtROCmArch(int index) { - } - } - TORCH_CHECK(false, "Attempting to use hipBLASLt on a unsupported architecture!"); -+#endif - return false; - } - #endif -@@ -228,7 +230,7 @@ Tensor& addmm_out_cuda_impl(Tensor& result, const Tensor& self, const Tensor& ma - at::ScalarType scalar_type = self.scalar_type(); - c10::MaybeOwned self_; - if (&result != &self) { --#if (defined(CUDA_VERSION) && CUDA_VERSION >= 11040 && !defined(_MSC_VER)) || defined(USE_ROCM) && ROCM_VERSION >= 50700 -+#if (defined(CUDA_VERSION) && CUDA_VERSION >= 11040 && !defined(_MSC_VER)) || defined(USE_ROCM) && defined(HIPBLASLT) - // Strangely, if mat2 has only 1 row or column, we get - // CUBLAS_STATUS_INVALID_VALUE error from cublasLtMatmulAlgoGetHeuristic. - // self.dim() == 1 && result.dim() == 2 && self.sizes()[0] == mat2_sizes[1] -@@ -271,7 +273,7 @@ Tensor& addmm_out_cuda_impl(Tensor& result, const Tensor& self, const Tensor& ma - } - self__sizes = self_->sizes(); - } else { --#if defined(USE_ROCM) && ROCM_VERSION >= 50700 -+#if defined(USE_ROCM) && defined(HIPBLASLT) - useLtInterface = !disable_addmm_cuda_lt && - result.dim() == 2 && result.is_contiguous() && - isSupportedHipLtROCmArch(self.device().index()) && -@@ -322,7 +324,7 @@ Tensor& addmm_out_cuda_impl(Tensor& result, const Tensor& self, const Tensor& ma - - TORCH_INTERNAL_ASSERT_DEBUG_ONLY(!args.result->is_conj()); - --#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700) -+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT)) - if (useLtInterface) { - AT_DISPATCH_FLOATING_TYPES_AND2( - at::ScalarType::Half, -@@ -876,7 +878,7 @@ _scaled_mm_out_cuda(const Tensor& mat1, const Tensor& mat2, - at::native::resize_output(out, {mat1_sizes[0], mat2_sizes[1]}); - at::native::resize_output(amax, {}); - --#if !defined(USE_ROCM) && !defined(_MSC_VER) || (defined(USE_ROCM) && ROCM_VERSION >= 60000) -+#if !defined(USE_ROCM) && !defined(_MSC_VER) || (defined(USE_ROCM) && defined(HIPBLASLT)) - cublasCommonArgs args(mat1, mat2, out); - const auto out_dtype_ = args.result->scalar_type(); - TORCH_CHECK(args.transa == 't' && args.transb == 'n', "Only multiplication of row-major and column-major matrices is supported by cuBLASLt"); -@@ -906,7 +908,7 @@ _scaled_mm_out_cuda(const Tensor& mat1, const Tensor& mat2, - TORCH_CHECK(false, "_scaled_mm_out_cuda is not compiled for this platform."); - #endif - --#if defined(USE_ROCM) && ROCM_VERSION >= 60000 -+#if defined(USE_ROCM) && defined(HIPBLASLT) - // rocm's hipblaslt does not yet support amax, so calculate separately - auto out_float32 = out.to(kFloat); - out_float32.abs_(); -diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake -index b7ffbeb07dc..2b6c3678984 100644 ---- a/cmake/Dependencies.cmake -+++ b/cmake/Dependencies.cmake -@@ -1273,6 +1273,9 @@ if(USE_ROCM) - if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "6.0.0") - list(APPEND HIP_CXX_FLAGS -DHIPBLAS_V2) - endif() -+ if(hipblast_FOUND) -+ list(APPEND HIP_CXX_FLAGS -DHIPBLASLT) -+ endif() - if(HIPBLASLT_CUSTOM_DATA_TYPE) - list(APPEND HIP_CXX_FLAGS -DHIPBLASLT_CUSTOM_DATA_TYPE) - endif() -diff --git a/cmake/public/LoadHIP.cmake b/cmake/public/LoadHIP.cmake -index f6ca263c5e5..53eb0b63c1a 100644 ---- a/cmake/public/LoadHIP.cmake -+++ b/cmake/public/LoadHIP.cmake -@@ -156,7 +156,7 @@ if(HIP_FOUND) - find_package_and_print_version(rocblas REQUIRED) - find_package_and_print_version(hipblas REQUIRED) - if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "5.7.0") -- find_package_and_print_version(hipblaslt REQUIRED) -+ find_package_and_print_version(hipblaslt) - endif() - find_package_and_print_version(miopen REQUIRED) - if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "4.1.0") -@@ -191,7 +191,7 @@ if(HIP_FOUND) - # roctx is part of roctracer - find_library(ROCM_ROCTX_LIB roctx64 HINTS ${ROCM_PATH}/lib) - -- if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "5.7.0") -+ if(hipblastlt_FOUND) - # check whether hipblaslt is using its own datatype - set(file "${PROJECT_BINARY_DIR}/hipblaslt_test_data_type.cc") - file(WRITE ${file} "" --- -2.43.2 - diff --git a/0001-Regenerate-flatbuffer-header.patch b/0001-Regenerate-flatbuffer-header.patch deleted file mode 100644 index 4eec491..0000000 --- a/0001-Regenerate-flatbuffer-header.patch +++ /dev/null @@ -1,39 +0,0 @@ -From 5b8e51b24513fa851eeff42f23d942bde301e321 Mon Sep 17 00:00:00 2001 -From: Tom Rix -Date: Fri, 29 Sep 2023 06:19:29 -0700 -Subject: [PATCH] Regenerate flatbuffer header - -For this error -torch/csrc/jit/serialization/mobile_bytecode_generated.h:12:41: -error: static assertion failed: Non-compatible flatbuffers version included - 12 | FLATBUFFERS_VERSION_MINOR == 3 && - -PyTorch is expecting 23.3.3, what f38 has -Rawhide is at 23.5.26 - -Regenerate with -flatc --cpp --gen-mutable --no-prefix --scoped-enums mobile_bytecode.fbs - -Signed-off-by: Tom Rix ---- - torch/csrc/jit/serialization/mobile_bytecode_generated.h | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/torch/csrc/jit/serialization/mobile_bytecode_generated.h b/torch/csrc/jit/serialization/mobile_bytecode_generated.h -index cffe8bc7a6..83575e4c19 100644 ---- a/torch/csrc/jit/serialization/mobile_bytecode_generated.h -+++ b/torch/csrc/jit/serialization/mobile_bytecode_generated.h -@@ -9,8 +9,8 @@ - // Ensure the included flatbuffers.h is the same version as when this file was - // generated, otherwise it may not be compatible. - static_assert(FLATBUFFERS_VERSION_MAJOR == 23 && -- FLATBUFFERS_VERSION_MINOR == 3 && -- FLATBUFFERS_VERSION_REVISION == 3, -+ FLATBUFFERS_VERSION_MINOR == 5 && -+ FLATBUFFERS_VERSION_REVISION == 26, - "Non-compatible flatbuffers version included"); - - namespace torch { --- -2.43.0 - diff --git a/0001-Stub-in-kineto-ActivityType.patch b/0001-Stub-in-kineto-ActivityType.patch deleted file mode 100644 index f088645..0000000 --- a/0001-Stub-in-kineto-ActivityType.patch +++ /dev/null @@ -1,73 +0,0 @@ -From 3ef82b814179da571b2478f61d4279717ab0b23a Mon Sep 17 00:00:00 2001 -From: Tom Rix -Date: Fri, 29 Sep 2023 06:25:23 -0700 -Subject: [PATCH] Stub in kineto ActivityType - -There is an error with kineto is not used, the shim still -requires the ActivityTYpe.h header to get the enum Activity type. -So cut-n-paste just enough of the header in to do this. - -Signed-off-by: Tom Rix ---- - torch/csrc/profiler/kineto_shim.h | 44 +++++++++++++++++++++++++++++++ - 1 file changed, 44 insertions(+) - -diff --git a/torch/csrc/profiler/kineto_shim.h b/torch/csrc/profiler/kineto_shim.h -index e92cbf003d..68985ab7d0 100644 ---- a/torch/csrc/profiler/kineto_shim.h -+++ b/torch/csrc/profiler/kineto_shim.h -@@ -12,7 +12,51 @@ - #undef USE_KINETO - #endif - -+#ifdef USE_KINETO - #include -+#else -+namespace libkineto { -+// copied from header -+/* -+ * Copyright (c) Meta Platforms, Inc. and affiliates. -+ * All rights reserved. -+ * -+ * This source code is licensed under the BSD-style license found in the -+ * LICENSE file in the root directory of this source tree. -+ */ -+ -+// Note : All activity types are not enabled by default. Please add them -+// at correct position in the enum -+enum class ActivityType { -+ // Activity types enabled by default -+ CPU_OP = 0, // cpu side ops -+ USER_ANNOTATION, -+ GPU_USER_ANNOTATION, -+ GPU_MEMCPY, -+ GPU_MEMSET, -+ CONCURRENT_KERNEL, // on-device kernels -+ EXTERNAL_CORRELATION, -+ CUDA_RUNTIME, // host side cuda runtime events -+ CUDA_DRIVER, // host side cuda driver events -+ CPU_INSTANT_EVENT, // host side point-like events -+ PYTHON_FUNCTION, -+ OVERHEAD, // CUPTI induced overhead events sampled from its overhead API. -+ -+ // Optional Activity types -+ CUDA_SYNC, // synchronization events between runtime and kernels -+ GLOW_RUNTIME, // host side glow runtime events -+ MTIA_RUNTIME, // host side MTIA runtime events -+ CUDA_PROFILER_RANGE, // CUPTI Profiler range for performance metrics -+ MTIA_CCP_EVENTS, // MTIA ondevice CCP events -+ HPU_OP, // HPU host side runtime event -+ XPU_RUNTIME, // host side xpu runtime events -+ -+ ENUM_COUNT, // This is to add buffer and not used for any profiling logic. Add your new type before it. -+ OPTIONAL_ACTIVITY_TYPE_START = CUDA_SYNC, -+}; -+} -+ -+#endif - - #include - #include --- -2.43.0 - diff --git a/0001-can-not-use-with-c-files.patch b/0001-can-not-use-with-c-files.patch deleted file mode 100644 index 719737c..0000000 --- a/0001-can-not-use-with-c-files.patch +++ /dev/null @@ -1,25 +0,0 @@ -From a5dff521691a17701b5a02ec75e84cfe1bf605f7 Mon Sep 17 00:00:00 2001 -From: Tom Rix -Date: Sat, 3 Feb 2024 06:41:49 -0500 -Subject: [PATCH] can not use with c files - ---- - cmake/Dependencies.cmake | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake -index 4dd8042058..5f91f3ffab 100644 ---- a/cmake/Dependencies.cmake -+++ b/cmake/Dependencies.cmake -@@ -1269,7 +1269,7 @@ if(USE_ROCM) - list(APPEND HIP_CXX_FLAGS -Wno-duplicate-decl-specifier) - list(APPEND HIP_CXX_FLAGS -DCAFFE2_USE_MIOPEN) - list(APPEND HIP_CXX_FLAGS -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_HIP) -- list(APPEND HIP_CXX_FLAGS -std=c++17) -+# list(APPEND HIP_CXX_FLAGS -std=c++17) - if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "6.0.0") - list(APPEND HIP_CXX_FLAGS -DHIPBLAS_V2) - endif() --- -2.43.0 - diff --git a/0001-cuda-hip-signatures.patch b/0001-cuda-hip-signatures.patch deleted file mode 100644 index a258737..0000000 --- a/0001-cuda-hip-signatures.patch +++ /dev/null @@ -1,42 +0,0 @@ -From 214dc959acc809e1959643272c344ee5335d5a69 Mon Sep 17 00:00:00 2001 -From: Tom Rix -Date: Thu, 1 Feb 2024 11:29:47 -0500 -Subject: [PATCH] cuda - hip signatures - ---- - aten/src/ATen/cuda/detail/LazyNVRTC.cpp | 9 +++++++++ - 1 file changed, 9 insertions(+) - -diff --git a/aten/src/ATen/cuda/detail/LazyNVRTC.cpp b/aten/src/ATen/cuda/detail/LazyNVRTC.cpp -index 1b85e7776e..bb6f88783a 100644 ---- a/aten/src/ATen/cuda/detail/LazyNVRTC.cpp -+++ b/aten/src/ATen/cuda/detail/LazyNVRTC.cpp -@@ -134,8 +134,13 @@ nvrtcResult nvrtcCreateProgram(nvrtcProgram *prog, - const char *src, - const char *name, - int numHeaders, -+#if !defined(USE_ROCM) - const char * const *headers, - const char * const *includeNames) { -+#else -+ const char **headers, -+ const char **includeNames) { -+#endif - auto fn = reinterpret_cast(getNVRTCLibrary().sym(__func__)); - if (!fn) - throw std::runtime_error("Can't get nvrtcCreateProgram"); -@@ -150,7 +155,11 @@ NVRTC_STUB2(nvrtcGetPTX, nvrtcProgram, char *); - NVRTC_STUB2(nvrtcGetCUBINSize, nvrtcProgram, size_t *); - NVRTC_STUB2(nvrtcGetCUBIN, nvrtcProgram, char *); - #endif -+#if !defined(USE_ROCM) - NVRTC_STUB3(nvrtcCompileProgram, nvrtcProgram, int, const char * const *); -+#else -+NVRTC_STUB3(nvrtcCompileProgram, nvrtcProgram, int, const char **); -+#endif - _STUB_1(NVRTC, nvrtcGetErrorString, const char *, nvrtcResult); - NVRTC_STUB2(nvrtcGetProgramLogSize,nvrtcProgram, size_t*); - NVRTC_STUB2(nvrtcGetProgramLog, nvrtcProgram, char *); --- -2.43.0 - diff --git a/0001-disable-submodule-search.patch b/0001-disable-submodule-search.patch deleted file mode 100644 index b830fa6..0000000 --- a/0001-disable-submodule-search.patch +++ /dev/null @@ -1,25 +0,0 @@ -From e0b0ea90ecc0dbefc6aef2650e88ba88260935b9 Mon Sep 17 00:00:00 2001 -From: Tom Rix -Date: Fri, 29 Sep 2023 17:21:13 -0700 -Subject: [PATCH] disable submodule search - ---- - setup.py | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/setup.py b/setup.py -index 0fd886d945..e397df8fb6 100644 ---- a/setup.py -+++ b/setup.py -@@ -458,7 +458,7 @@ def mirror_files_into_torchgen(): - def build_deps(): - report("-- Building version " + version) - -- check_submodules() -+ # check_submodules() - check_pydep("yaml", "pyyaml") - - build_caffe2( --- -2.43.0 - diff --git a/0001-disable-use-of-aotriton.patch b/0001-disable-use-of-aotriton.patch deleted file mode 100644 index 34a1704..0000000 --- a/0001-disable-use-of-aotriton.patch +++ /dev/null @@ -1,46 +0,0 @@ -From 33d48f71db7530f00dbd8cff281b65aa8b355b2a Mon Sep 17 00:00:00 2001 -From: Tom Rix -Date: Tue, 19 Mar 2024 11:32:37 -0400 -Subject: [PATCH] disable use of aotriton - ---- - aten/src/ATen/native/transformers/cuda/sdp_utils.cpp | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp b/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp -index 96b839820efd..2d3dd0cb4b0f 100644 ---- a/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp -+++ b/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp -@@ -21,9 +21,11 @@ - #include - #include - -+#ifdef USE_FLASH_ATTENTION - #if USE_ROCM - #include - #endif -+#endif - - /** - * Note [SDPA Runtime Dispatch] -@@ -183,6 +185,7 @@ bool check_sm_version(cudaDeviceProp * dprops) { - } - - bool check_flash_attention_hardware_support(sdp_params const& params, bool debug) { -+#ifdef USE_FLASH_ATTENTION - // Check that the gpu is capable of running flash attention - using sm80 = SMVersion<8, 0>; - using sm90 = SMVersion<9, 0>; -@@ -211,6 +214,9 @@ bool check_flash_attention_hardware_support(sdp_params const& params, bool debug - } - #endif - return true; -+#else -+ return false; -+#endif - } - - bool check_mem_efficient_hardware_support(sdp_params const& params, bool debug) { --- -2.44.0 - diff --git a/0001-no-third_party-FXdiv.patch b/0001-no-third_party-FXdiv.patch deleted file mode 100644 index 71404e3..0000000 --- a/0001-no-third_party-FXdiv.patch +++ /dev/null @@ -1,54 +0,0 @@ -From b3b307add5724ee5730f161e16594fa702f34a19 Mon Sep 17 00:00:00 2001 -From: Tom Rix -Date: Sat, 3 Feb 2024 08:20:28 -0500 -Subject: [PATCH] no third_party FXdiv - ---- - caffe2/CMakeLists.txt | 24 ++++++++++++------------ - 1 file changed, 12 insertions(+), 12 deletions(-) - -diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt -index b2f3adbfae..80a5625c8d 100644 ---- a/caffe2/CMakeLists.txt -+++ b/caffe2/CMakeLists.txt -@@ -110,15 +110,15 @@ endif() - # Note: the folders that are being commented out have not been properly - # addressed yet. - --if(NOT MSVC AND USE_XNNPACK) -- if(NOT TARGET fxdiv) -- set(FXDIV_BUILD_TESTS OFF CACHE BOOL "") -- set(FXDIV_BUILD_BENCHMARKS OFF CACHE BOOL "") -- add_subdirectory( -- "${FXDIV_SOURCE_DIR}" -- "${CMAKE_BINARY_DIR}/FXdiv") -- endif() --endif() -+#if(NOT MSVC AND USE_XNNPACK) -+# if(NOT TARGET fxdiv) -+# set(FXDIV_BUILD_TESTS OFF CACHE BOOL "") -+# set(FXDIV_BUILD_BENCHMARKS OFF CACHE BOOL "") -+# add_subdirectory( -+# "${FXDIV_SOURCE_DIR}" -+# "${CMAKE_BINARY_DIR}/FXdiv") -+# endif() -+#endif() - - add_subdirectory(core) - add_subdirectory(serialize) -@@ -1081,9 +1081,9 @@ if(USE_XPU) - target_compile_definitions(torch_xpu PRIVATE USE_XPU) - endif() - --if(NOT MSVC AND USE_XNNPACK) -- TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv) --endif() -+#if(NOT MSVC AND USE_XNNPACK) -+# TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv) -+#endif() - - # ========================================================== - # formerly-libtorch flags --- -2.43.0 - diff --git a/0001-no-third_party-fmt.patch b/0001-no-third_party-fmt.patch deleted file mode 100644 index 6e82af2..0000000 --- a/0001-no-third_party-fmt.patch +++ /dev/null @@ -1,65 +0,0 @@ -From 2ce255b75760a0a513fb1706629b416f76a5c822 Mon Sep 17 00:00:00 2001 -From: Tom Rix -Date: Sat, 3 Feb 2024 08:16:04 -0500 -Subject: [PATCH] no third_party fmt - ---- - c10/CMakeLists.txt | 2 +- - cmake/Dependencies.cmake | 6 +++--- - torch/CMakeLists.txt | 2 +- - 3 files changed, 5 insertions(+), 5 deletions(-) - -diff --git a/c10/CMakeLists.txt b/c10/CMakeLists.txt -index 1f742f4c176..4fa08913bdd 100644 ---- a/c10/CMakeLists.txt -+++ b/c10/CMakeLists.txt -@@ -87,7 +87,7 @@ endif() - if(C10_USE_GLOG) - target_link_libraries(c10 PUBLIC glog::glog) - endif() --target_link_libraries(c10 PRIVATE fmt::fmt-header-only) -+target_link_libraries(c10 PRIVATE fmt) - - if(C10_USE_NUMA) - message(STATUS "NUMA paths:") -diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake -index 6f5a2d5feff..42fbf80f6e8 100644 ---- a/cmake/Dependencies.cmake -+++ b/cmake/Dependencies.cmake -@@ -1837,7 +1837,7 @@ endif() - # - set(TEMP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) - set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libs" FORCE) --add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt) -+# add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt) - - # Disable compiler feature checks for `fmt`. - # -@@ -1846,9 +1846,9 @@ add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt) - # CMAKE_CXX_FLAGS in ways that break feature checks. Since we already know - # `fmt` is compatible with a superset of the compilers that PyTorch is, it - # shouldn't be too bad to just disable the checks. --set_target_properties(fmt-header-only PROPERTIES INTERFACE_COMPILE_FEATURES "") -+# set_target_properties(fmt-header-only PROPERTIES INTERFACE_COMPILE_FEATURES "") - --list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only) -+# list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only) - set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS} CACHE BOOL "Build shared libs" FORCE) - - # ---[ Kineto -diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt -index 97a72eed55b..9e5014d1980 100644 ---- a/torch/CMakeLists.txt -+++ b/torch/CMakeLists.txt -@@ -80,7 +80,7 @@ set(TORCH_PYTHON_LINK_LIBRARIES - python::python - pybind::pybind11 - shm -- fmt::fmt-header-only -+ fmt - ATEN_CPU_FILES_GEN_LIB) - - if(USE_ASAN AND TARGET Sanitizer::address) --- -2.43.2 - diff --git a/0001-no-third_party-foxi.patch b/0001-no-third_party-foxi.patch deleted file mode 100644 index ba1ec40..0000000 --- a/0001-no-third_party-foxi.patch +++ /dev/null @@ -1,36 +0,0 @@ -From 8cb61cf9282102ac225645fcc9fb4a1bb7cb15a2 Mon Sep 17 00:00:00 2001 -From: Tom Rix -Date: Sat, 3 Feb 2024 08:11:55 -0500 -Subject: [PATCH] no third_party foxi - ---- - cmake/Dependencies.cmake | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake -index 5f91f3ffab..8e1461af81 100644 ---- a/cmake/Dependencies.cmake -+++ b/cmake/Dependencies.cmake -@@ -1567,7 +1567,7 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX) - set_target_properties(onnx_proto PROPERTIES CXX_STANDARD 17) - endif() - endif() -- add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/foxi EXCLUDE_FROM_ALL) -+ # add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/foxi EXCLUDE_FROM_ALL) - - add_definitions(-DONNX_NAMESPACE=${ONNX_NAMESPACE}) - if(NOT USE_SYSTEM_ONNX) -@@ -1600,8 +1600,8 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX) - message("-- Found onnx: ${ONNX_LIBRARY} ${ONNX_PROTO_LIBRARY}") - list(APPEND Caffe2_DEPENDENCY_LIBS onnx_proto onnx) - endif() -- include_directories(${FOXI_INCLUDE_DIRS}) -- list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader) -+# include_directories(${FOXI_INCLUDE_DIRS}) -+# list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader) - # Recover the build shared libs option. - set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS}) - endif() --- -2.43.0 - diff --git a/0001-reenable-foxi-linking.patch b/0001-reenable-foxi-linking.patch deleted file mode 100644 index 8e39795..0000000 --- a/0001-reenable-foxi-linking.patch +++ /dev/null @@ -1,25 +0,0 @@ -From 58ccda271e8f51c3fa5b7518cf6ee52ce204fd37 Mon Sep 17 00:00:00 2001 -From: Tom Rix -Date: Thu, 22 Feb 2024 09:28:11 -0500 -Subject: [PATCH] reenable foxi linking - ---- - cmake/Dependencies.cmake | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake -index 42fbf80f6e8..bc3a2dc6fee 100644 ---- a/cmake/Dependencies.cmake -+++ b/cmake/Dependencies.cmake -@@ -1604,7 +1604,7 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX) - list(APPEND Caffe2_DEPENDENCY_LIBS onnx_proto onnx) - endif() - # include_directories(${FOXI_INCLUDE_DIRS}) --# list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader) -+ list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader) - # Recover the build shared libs option. - set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS}) - endif() --- -2.43.2 - diff --git a/0001-silence-an-assert.patch b/0001-silence-an-assert.patch deleted file mode 100644 index 0b20dcf..0000000 --- a/0001-silence-an-assert.patch +++ /dev/null @@ -1,25 +0,0 @@ -From 04dd33db93b852fdfd7ea408813080b2e2026650 Mon Sep 17 00:00:00 2001 -From: Tom Rix -Date: Sat, 3 Feb 2024 06:41:20 -0500 -Subject: [PATCH] silence an assert - ---- - aten/src/ATen/native/cuda/IndexKernel.cu | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/aten/src/ATen/native/cuda/IndexKernel.cu b/aten/src/ATen/native/cuda/IndexKernel.cu -index 657c0c77b3..b406aa6687 100644 ---- a/aten/src/ATen/native/cuda/IndexKernel.cu -+++ b/aten/src/ATen/native/cuda/IndexKernel.cu -@@ -249,7 +249,7 @@ void index_put_kernel_quantized_cuda(TensorIterator& iter, const IntArrayRef ind - - gpu_index_kernel(iter, index_size, index_stride, [inv_scale, zero_point, qmin, qmax]C10_DEVICE(char* const out_data, const char* const in_data, const int64_t offset) { - int64_t qvalue = static_cast(zero_point + nearbyintf(*(float*)in_data * inv_scale)); -- qvalue = std::clamp(qvalue, qmin, qmax); -+ //qvalue = std::clamp(qvalue, qmin, qmax); - *(scalar_t*)(out_data + offset) = static_cast(qvalue); - }); - }); --- -2.43.0 - diff --git a/0001-use-any-hip.patch b/0001-use-any-hip.patch deleted file mode 100644 index dca86ea..0000000 --- a/0001-use-any-hip.patch +++ /dev/null @@ -1,34 +0,0 @@ -From 4248211ce9a9de81bb3ade5d421ba709b19ead08 Mon Sep 17 00:00:00 2001 -From: Tom Rix -Date: Sat, 3 Feb 2024 15:01:28 -0500 -Subject: [PATCH] use any hip - ---- - cmake/public/LoadHIP.cmake | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/cmake/public/LoadHIP.cmake b/cmake/public/LoadHIP.cmake -index 1abeb06228..28458c4146 100644 ---- a/cmake/public/LoadHIP.cmake -+++ b/cmake/public/LoadHIP.cmake -@@ -30,7 +30,7 @@ endif() - message("Building PyTorch for GPU arch: ${PYTORCH_ROCM_ARCH}") - - # Add HIP to the CMAKE Module Path --set(CMAKE_MODULE_PATH ${ROCM_PATH}/lib/cmake/hip ${CMAKE_MODULE_PATH}) -+set(CMAKE_MODULE_PATH ${ROCM_PATH}/lib64/cmake/hip ${CMAKE_MODULE_PATH}) - - macro(find_package_and_print_version PACKAGE_NAME) - find_package("${PACKAGE_NAME}" ${ARGN}) -@@ -38,7 +38,7 @@ macro(find_package_and_print_version PACKAGE_NAME) - endmacro() - - # Find the HIP Package --find_package_and_print_version(HIP 1.0) -+find_package_and_print_version(HIP MODULE) - - if(HIP_FOUND) - set(PYTORCH_FOUND_HIP TRUE) --- -2.43.0 - diff --git a/README.NVIDIA b/README.NVIDIA new file mode 100644 index 0000000..b927f47 --- /dev/null +++ b/README.NVIDIA @@ -0,0 +1,15 @@ +Some help for building this package for NVIDIA/CUDA + +Review NVIDIA's documenation +https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html + +Review PyTorch documentation +https://github.com/pytorch/pytorch#from-source + +Some convience strings to cut-n-paste + +F39 +dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/fedora39/x86_64/cuda-fedora39.repo + +Building is local. +Build machine has a supported GPU, the drivers are loaded and CUDA SDK is installed. diff --git a/next/0001-Use-horrible-dynamo-stub.patch b/next/0001-Use-horrible-dynamo-stub.patch new file mode 100644 index 0000000..1900519 --- /dev/null +++ b/next/0001-Use-horrible-dynamo-stub.patch @@ -0,0 +1,85 @@ +From fd535f7bf44f2034cca2a66b4cc7d68d962341df Mon Sep 17 00:00:00 2001 +From: Tom Rix +Date: Sun, 20 Jul 2025 12:47:58 -0700 +Subject: [PATCH] Use horrible dynamo stub + +Rawhide's update of python is too fast for dynamo +So paper of the problem with a horrible stub that throws +runtime exceptions if dynamo is used. + +Signed-off-by: Tom Rix +--- + build_variables.bzl | 26 ++++++++++++---------- + torch/csrc/dynamo/horrible_dynamo_stub.cpp | 16 +++++++++++++ + 2 files changed, 30 insertions(+), 12 deletions(-) + create mode 100644 torch/csrc/dynamo/horrible_dynamo_stub.cpp + +diff --git a/build_variables.bzl b/build_variables.bzl +index b266c80e8843..a3be6893349b 100644 +--- a/build_variables.bzl ++++ b/build_variables.bzl +@@ -140,7 +140,8 @@ core_trainer_sources = [ + "torch/csrc/autograd/variable.cpp", + "torch/csrc/autograd/utils/warnings.cpp", + "torch/csrc/autograd/jit_decomp_interface.cpp", +- "torch/csrc/dynamo/compiled_autograd.cpp", ++# "torch/csrc/dynamo/compiled_autograd.cpp", ++ "torch/csrc/dynamo/horrible_dynamo_stub.cpp", + "torch/csrc/jit/frontend/name_mangler.cpp", + "torch/csrc/jit/ir/type_hashing.cpp", + "torch/csrc/jit/serialization/pickler.cpp", +@@ -868,17 +869,18 @@ libtorch_python_core_sources = [ + "torch/csrc/autograd/python_torch_functions_manual.cpp", + "torch/csrc/autograd/python_variable.cpp", + "torch/csrc/autograd/python_variable_indexing.cpp", +- "torch/csrc/dynamo/python_compiled_autograd.cpp", +- "torch/csrc/dynamo/cache_entry.cpp", +- "torch/csrc/dynamo/cpp_shim.cpp", +- "torch/csrc/dynamo/cpython_defs.c", +- "torch/csrc/dynamo/eval_frame.c", +- "torch/csrc/dynamo/eval_frame_cpp.cpp", +- "torch/csrc/dynamo/extra_state.cpp", +- "torch/csrc/dynamo/framelocals_mapping.cpp", +- "torch/csrc/dynamo/guards.cpp", +- "torch/csrc/dynamo/utils.cpp", +- "torch/csrc/dynamo/init.cpp", ++# "torch/csrc/dynamo/python_compiled_autograd.cpp", ++# "torch/csrc/dynamo/cache_entry.cpp", ++# "torch/csrc/dynamo/cpp_shim.cpp", ++# "torch/csrc/dynamo/cpython_defs.c", ++# "torch/csrc/dynamo/eval_frame.c", ++# "torch/csrc/dynamo/eval_frame_cpp.cpp", ++# "torch/csrc/dynamo/extra_state.cpp", ++# "torch/csrc/dynamo/framelocals_mapping.cpp", ++# "torch/csrc/dynamo/guards.cpp", ++# "torch/csrc/dynamo/utils.cpp", ++# "torch/csrc/dynamo/init.cpp", ++ "torch/csrc/dynamo/horrible_dynamo_stub.cpp", + "torch/csrc/functorch/init.cpp", + "torch/csrc/fx/node.cpp", + "torch/csrc/mps/Module.cpp", +diff --git a/torch/csrc/dynamo/horrible_dynamo_stub.cpp b/torch/csrc/dynamo/horrible_dynamo_stub.cpp +new file mode 100644 +index 000000000000..3ac1324d4557 +--- /dev/null ++++ b/torch/csrc/dynamo/horrible_dynamo_stub.cpp +@@ -0,0 +1,16 @@ ++#include ++#include ++ ++namespace torch::dynamo::autograd { ++const std::unique_ptr& getPyCompilerInterface() { ++ throw std::runtime_error("Dynamo not supported"); ++ return nullptr; ++} ++std::vector> get_input_metadata( ++ const edge_list& edges) { ++ std::vector> r; ++ throw std::runtime_error("Dynamo not supported"); ++ return r; ++} ++ ++} +-- +2.49.0 + diff --git a/next/pyproject.toml b/next/pyproject.toml deleted file mode 100644 index 9508ad0..0000000 --- a/next/pyproject.toml +++ /dev/null @@ -1,154 +0,0 @@ -[build-system] -requires = [ - "setuptools", - "wheel", - "astunparse", - "numpy", - "ninja", - "pyyaml", - "cmake", - "typing-extensions", - "requests", -] -# Use legacy backend to import local packages in setup.py -build-backend = "setuptools.build_meta:__legacy__" - - -[tool.black] -# Uncomment if pyproject.toml worked fine to ensure consistency with flake8 -# line-length = 120 -target-version = ["py38", "py39", "py310", "py311"] - - -[tool.ruff] -target-version = "py38" - -# NOTE: Synchoronize the ignores with .flake8 -ignore = [ - # these ignores are from flake8-bugbear; please fix! - "B007", "B008", "B017", - "B018", # Useless expression - "B019", - "B023", - "B028", # No explicit `stacklevel` keyword argument found - "B904", - "E402", - "C408", # C408 ignored because we like the dict keyword argument syntax - "E501", # E501 is not flexible enough, we're using B950 instead - "E721", - "E731", # Assign lambda expression - "E741", - "EXE001", - "F405", - "F841", - # these ignores are from flake8-logging-format; please fix! - "G101", - # these ignores are from ruff NPY; please fix! - "NPY002", - # these ignores are from ruff PERF; please fix! - "PERF203", - "PERF401", - "PERF403", - # these ignores are from PYI; please fix! - "PYI019", - "PYI024", - "PYI036", - "PYI041", - "PYI056", - "SIM102", "SIM103", "SIM112", # flake8-simplify code styles - "SIM105", # these ignores are from flake8-simplify. please fix or ignore with commented reason - "SIM108", - "SIM110", - "SIM114", # Combine `if` branches using logical `or` operator - "SIM115", - "SIM116", # Disable Use a dictionary instead of consecutive `if` statements - "SIM117", - "SIM118", - "UP006", # keep-runtime-typing - "UP007", # keep-runtime-typing -] -line-length = 120 -select = [ - "B", - "C4", - "G", - "E", - "EXE", - "F", - "SIM1", - "W", - # Not included in flake8 - "NPY", - "PERF", - "PGH004", - "PIE794", - "PIE800", - "PIE804", - "PIE807", - "PIE810", - "PLC0131", # type bivariance - "PLC0132", # type param mismatch - "PLC0205", # string as __slots__ - "PLE", - "PLR0133", # constant comparison - "PLR0206", # property with params - "PLR1722", # use sys exit - "PLW0129", # assert on string literal - "PLW0406", # import self - "PLW0711", # binary op exception - "PLW1509", # preexec_fn not safe with threads - "PLW3301", # nested min max - "PT006", # TODO: enable more PT rules - "PT022", - "PT023", - "PT024", - "PT025", - "PT026", - "PYI", - "RUF008", # mutable dataclass default - "RUF015", # access first ele in constant time - "RUF016", # type error non-integer index - "RUF017", - "TRY200", - "TRY302", - "UP", -] - -[tool.ruff.per-file-ignores] -"__init__.py" = [ - "F401", -] -"test/typing/reveal/**" = [ - "F821", -] -"test/torch_np/numpy_tests/**" = [ - "F821", -] -"test/jit/**" = [ - "PLR0133", # tests require this for JIT - "PYI", - "RUF015", - "UP", # We don't want to modify the jit test as they test specify syntax -] -"test/test_jit.py" = [ - "PLR0133", # tests require this for JIT - "PYI", - "RUF015", - "UP", # We don't want to modify the jit test as they test specify syntax -] - -"torch/onnx/**" = [ - "UP037", # ONNX does runtime type checking -] - -"torchgen/api/types/__init__.py" = [ - "F401", - "F403", -] -"torchgen/executorch/api/types/__init__.py" = [ - "F401", - "F403", -] -"torch/utils/collect_env.py" = [ - "UP", # collect_env.py needs to work with older versions of Python -] diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..925742b --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,353 @@ +# Package ###################################################################### + +[build-system] +requires = [ + # 70.1.0: min version for integrated bdist_wheel command from wheel package + # 77.0.0: min version for SPDX expression support for project.license + "setuptools>=70.1.0,<80.0", + "cmake>=3.27", + "ninja", + "numpy", + "packaging", + "pyyaml", + "requests", + "six", # dependency chain: NNPACK -> PeachPy -> six + "typing-extensions>=4.10.0", +] +build-backend = "setuptools.build_meta" + +[dependency-groups] +dev = [ + # This list should be kept in sync with the requirements-build.txt + # in PyTorch root until the project fully migrates to pyproject.toml + # after which this can be removed as it is already specified in the + # [build-system] section + "setuptools>=70.1.0,<80.0", # setuptools develop deprecated on 80.0 + "cmake>=3.27", + "ninja", + "numpy", + "packaging", + "pyyaml", + "requests", + "six", # dependency chain: NNPACK -> PeachPy -> six + "typing-extensions>=4.10.0", + + # This list should be kept in sync with the requirements.txt in + # PyTorch root until the project fully migrates to pyproject.toml + "build[uv]", + "expecttest>=0.3.0", + "filelock", + "fsspec>=0.8.5", + "hypothesis", + "jinja2", + "lintrunner; platform_machine != 's390x' and platform_machine != 'riscv64'", + "networkx>=2.5.1", + "optree>=0.13.0", + "psutil", + "sympy>=1.13.3", + "typing-extensions>=4.13.2", + "wheel", +] + +[project] +name = "torch" +description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration" +readme = "README.md" +requires-python = ">=3.10" +# TODO: change to `license = "BSD-3-Clause"` and enable PEP 639 after pinning setuptools>=77 +# FIXME: As of 2025.06.20, it is hard to ensure the minimum version of setuptools in our CI environment. +# TOML-table-based license deprecated in setuptools>=77, and the deprecation warning will be changed +# to an error on 2026.02.18. See also: https://github.com/pypa/setuptools/issues/4903 +license = { text = "BSD-3-Clause" } +authors = [{ name = "PyTorch Team", email = "packages@pytorch.org" }] +keywords = ["pytorch", "machine learning"] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "Intended Audience :: Education", + "Intended Audience :: Science/Research", + "Topic :: Scientific/Engineering", + "Topic :: Scientific/Engineering :: Mathematics", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Topic :: Software Development", + "Topic :: Software Development :: Libraries", + "Topic :: Software Development :: Libraries :: Python Modules", + "Programming Language :: C++", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", +] +dynamic = [ + "entry-points", + "dependencies", + "scripts", + "version", +] + +[project.urls] +Homepage = "https://pytorch.org" +Repository = "https://github.com/pytorch/pytorch" +Documentation = "https://pytorch.org/docs" +"Issue Tracker" = "https://github.com/pytorch/pytorch/issues" +Forum = "https://discuss.pytorch.org" + +[project.optional-dependencies] +optree = ["optree>=0.13.0"] +opt-einsum = ["opt-einsum>=3.3"] +pyyaml = ["pyyaml"] + +# Linter tools ################################################################# + +[tool.isort] +src_paths = ["caffe2", "torch", "torchgen", "functorch", "test"] +extra_standard_library = ["typing_extensions"] +skip_gitignore = true +skip_glob = ["third_party/*"] +atomic = true +profile = "black" +indent = 4 +line_length = 88 +lines_after_imports = 2 +multi_line_output = 3 +include_trailing_comma = true +combine_as_imports = true + +[tool.usort.known] +first_party = ["caffe2", "torch", "torchgen", "functorch", "test"] +standard_library = ["typing_extensions"] + +[tool.ruff] +line-length = 88 +src = ["caffe2", "torch", "torchgen", "functorch", "test"] + +[tool.ruff.format] +docstring-code-format = true +quote-style = "double" + +[tool.ruff.lint] +# NOTE: Synchoronize the ignores with .flake8 +external = [ + "B001", + "B902", + "B950", + "E121", + "E122", + "E128", + "E131", + "E704", + "E723", + "F723", + "F812", + "P201", + "P204", + "T484", + "TOR901", +] +ignore = [ + # these ignores are from flake8-bugbear; please fix! + "B007", "B008", "B017", + "B018", # Useless expression + "B023", + "B028", # No explicit `stacklevel` keyword argument found + "E402", + "C408", # C408 ignored because we like the dict keyword argument syntax + "E501", # E501 is not flexible enough, we're using B950 instead + "E721", + "E741", + "EXE001", + "F405", + "FURB122", # writelines + # these ignores are from flake8-logging-format; please fix! + "G101", + # these ignores are from ruff NPY; please fix! + "NPY002", + # these ignores are from ruff PERF; please fix! + "PERF203", + "PERF401", + # these ignores are from PYI; please fix! + "PYI024", + "PYI036", + "PYI041", + "PYI056", + "SIM102", "SIM103", "SIM112", # flake8-simplify code styles + "SIM105", # these ignores are from flake8-simplify. please fix or ignore with commented reason + "SIM108", # SIM108 ignored because we prefer if-else-block instead of ternary expression + "SIM110", + "SIM114", # Combine `if` branches using logical `or` operator + "SIM115", + "SIM116", # Disable Use a dictionary instead of consecutive `if` statements + "SIM117", + "SIM118", + "UP007", # keep-runtime-typing + "UP045", # keep-runtime-typing + "TC006", + # TODO: Remove Python-3.10 specific suppressions + "B905", + "UP035", + "UP036", + "UP038", + "UP041", + "FURB161", +] +select = [ + "B", + "B904", # Re-raised error without specifying the cause via the from keyword + "C4", + "G", + "E", + "EXE", + "F", + "SIM1", + "SIM911", + "W", + # Not included in flake8 + "FURB", + "LOG", + "NPY", + "PERF", + "PGH004", + "PIE790", + "PIE794", + "PIE800", + "PIE804", + "PIE807", + "PIE810", + "PLC0131", # type bivariance + "PLC0132", # type param mismatch + "PLC0205", # string as __slots__ + "PLC3002", # unnecessary-direct-lambda-call + "PLE", + "PLR0133", # constant comparison + "PLR0206", # property with params + "PLR1722", # use sys exit + "PLR1736", # unnecessary list index + "PLW0129", # assert on string literal + "PLW0131", # named expr without context + "PLW0133", # useless exception statement + "PLW0245", # super without brackets + "PLW0406", # import self + "PLW0711", # binary op exception + "PLW1501", # bad open mode + "PLW1507", # shallow copy os.environ + "PLW1509", # preexec_fn not safe with threads + "PLW2101", # useless lock statement + "PLW3301", # nested min max + "PT006", # TODO: enable more PT rules + "PT014", # duplicate parameterize case + "PT022", + "PT023", + "PT024", + "PT025", + "PT026", + "PYI", + "Q003", # avoidable escaped quote + "Q004", # unnecessary escaped quote + "RSE", + "RUF008", # mutable dataclass default + "RUF013", # ban implicit optional + "RUF015", # access first ele in constant time + "RUF016", # type error non-integer index + "RUF017", + "RUF018", # no assignment in assert + "RUF019", # unnecessary-key-check + "RUF020", # never union + "RUF024", # from keys mutable + "RUF026", # default factory kwarg + "RUF030", # No print statement in assert + "RUF033", # default values __post_init__ dataclass + "RUF041", # simplify nested Literal + "RUF048", # properly parse `__version__` + "RUF200", # validate pyproject.toml + "S324", # for hashlib FIPS compliance + "SLOT", + "TC", + "TRY002", # ban vanilla raise (todo fix NOQAs) + "TRY203", + "TRY401", # verbose-log-message + "UP", + "YTT", +] + +[tool.ruff.lint.pyupgrade] +# Preserve types, even if a file imports `from __future__ import annotations`. +keep-runtime-typing = true + +[tool.ruff.lint.per-file-ignores] +"__init__.py" = [ + "F401", +] +"*.pyi" = [ + "PYI011", # typed-argument-default-in-stub + "PYI021", # docstring-in-stub + "PYI053", # string-or-bytes-too-long +] +"functorch/notebooks/**" = [ + "F401", +] +"test/export/**" = [ + "PGH004" +] +"test/typing/**" = [ + "PGH004" +] +"test/typing/reveal/**" = [ + "F821", +] +"test/torch_np/numpy_tests/**" = [ + "F821", + "NPY201", +] +"test/dynamo/test_bytecode_utils.py" = [ + "F821", +] +"test/dynamo/test_debug_utils.py" = [ + "UP037", +] +"test/dynamo/test_misc.py" = [ + "PGH004", +] +"test/jit/**" = [ + "PLR0133", # tests require this for JIT + "PYI", + "RUF015", + "UP", # We don't want to modify the jit test as they test specify syntax +] +"test/test_jit.py" = [ + "PLR0133", # tests require this for JIT + "PYI", + "RUF015", + "UP", # We don't want to modify the jit test as they test specify syntax +] +"test/inductor/s429861_repro.py" = [ + "PGH004", +] +"test/inductor/test_torchinductor.py" = [ + "UP037", +] +# autogenerated #TODO figure out why file level noqa is ignored +"torch/_appdirs.py" = ["PGH004"] +"torch/jit/_shape_functions.py" = ["PGH004"] +"torch/_inductor/fx_passes/serialized_patterns/**" = ["F401", "F501"] +"torch/_inductor/autoheuristic/artifacts/**" = ["F401", "F501"] +"torch/_inductor/codegen/**" = [ + "PGH004" +] +"torchgen/api/types/__init__.py" = [ + "F401", + "F403", +] +"torch/utils/collect_env.py" = [ + "UP", # collect_env.py needs to work with older versions of Python +] +"torch/_vendor/**" = [ + "UP", # No need to mess with _vendor +] +"tools/linter/**" = [ + "LOG015" # please fix +] + +[tool.codespell] +ignore-words = "tools/linter/dictionary.txt" diff --git a/python-torch.spec b/python-torch.spec index 07a47eb..d3c31d7 100644 --- a/python-torch.spec +++ b/python-torch.spec @@ -4,18 +4,23 @@ %global forgeurl https://github.com/pytorch/pytorch # So pre releases can be tried -%bcond_without gitcommit +%bcond_with gitcommit %if %{with gitcommit} -# git tag v2.3.0-rc2 -%global commit0 6a89a753b1556fe8558582c452fdba083f6ec01a +# v2.9.0-rc9 +%global commit0 0fabc3ba44823f257e70ce397d989c8de5e362c1 %global shortcommit0 %(c=%{commit0}; echo ${c:0:7}) -%global date0 20240313 +%global date0 20251008 +%global pypi_version 2.9.0 +%global flatbuffers_version 24.12.23 +%global miniz_version 3.0.2 +%global pybind11_version 2.13.6 +%global rc_tag -rc9 %else -%global commit0 975d4284250170602db60adfda5eb1664a3b8acc -%global shortcommit0 %(c=%{commit0}; echo ${c:0:7}) -%global date0 20240307 +%global pypi_version 2.9.1 +%global flatbuffers_version 24.12.23 +%global miniz_version 3.0.2 +%global pybind11_version 2.13.6 %endif -%global pypi_version 2.3.0 # For -test subpackage # suitable only for local testing @@ -25,39 +30,37 @@ %bcond_with test %ifarch x86_64 -%if 0%{?fedora} %bcond_without rocm -%else -%bcond_with rocm %endif -%endif -# hipblaslt is in development -%bcond_with hipblaslt -# Which families gpu build for -%global rocm_gpu_list gfx8 gfx9 gfx10 gfx11 -%global rocm_default_gpu default -%bcond_without rocm_loop -# For testing caffe2 +# For testing distributed+rccl etc. +%bcond_without rccl +%bcond_with gloo +%bcond_without mpi +%bcond_without tensorpipe + +# Disable dwz with rocm because memory can be exhausted +%if %{with rocm} +%define _find_debuginfo_dwz_opts %{nil} +%endif + +# These came in 2.4 and not yet in Fedora +%bcond_with opentelemetry +%bcond_with httplib +%bcond_with kineto + %if 0%{?fedora} -%bcond_without caffe2 +%bcond_without onnx %else -%bcond_with caffe2 -%endif - -# For testing distributed -%bcond_with distributed - -# For testing openvs -%bcond_with opencv - -# For testing cuda -%ifarch x86_64 -%bcond_with cuda +%bcond_with onnx %endif Name: python-%{pypi_name} +%if %{with gitcommit} Version: %{pypi_version}^git%{date0}.%{shortcommit0} +%else +Version: %{pypi_version} +%endif Release: %autorelease Summary: PyTorch AI/ML framework # See license.txt for license details @@ -66,79 +69,94 @@ License: BSD-3-Clause AND BSD-2-Clause AND 0BSD AND Apache-2.0 AND MIT AN URL: https://pytorch.org/ %if %{with gitcommit} Source0: %{forgeurl}/archive/%{commit0}/pytorch-%{shortcommit0}.tar.gz -Source100: pyproject.toml +Source1000: pyproject.toml %else -Source0: %{forgeurl}/archive/%{commit0}/pytorch-%{shortcommit0}.tar.gz -Source100: pyproject.toml +Source0: %{forgeurl}/releases/download/v%{version}/pytorch-v%{version}.tar.gz %endif -Source1: https://github.com/google/flatbuffers/archive/refs/tags/v23.3.3.tar.gz -Source2: https://github.com/pybind/pybind11/archive/refs/tags/v2.11.1.tar.gz +Source1: https://github.com/google/flatbuffers/archive/refs/tags/v%{flatbuffers_version}.tar.gz +Source2: https://github.com/pybind/pybind11/archive/refs/tags/v%{pybind11_version}.tar.gz -%if %{with cuda} -%global cuf_ver 1.1.2 -Source10: https://github.com/NVIDIA/cudnn-frontend/archive/refs/tags/v%{cuf_ver}.tar.gz -%global cul_ver 3.4.1 -Source11: https://github.com/NVIDIA/cutlass/archive/refs/tags/v%{cul_ver}.tar.gz +# Developement on tensorpipe has stopped, repo made read only July 1, 2023, this is the last commit +%global tp_commit 52791a2fd214b2a9dc5759d36725909c1daa7f2e +%global tp_scommit %(c=%{tp_commit}; echo ${c:0:7}) +Source20: https://github.com/pytorch/tensorpipe/archive/%{tp_commit}/tensorpipe-%{tp_scommit}.tar.gz +# The old libuv tensorpipe uses +Source21: https://github.com/libuv/libuv/archive/refs/tags/v1.41.0.tar.gz +# Developement afaik on libnop has stopped, this is the last commit +%global nop_commit 910b55815be16109f04f4180e9adee14fb4ce281 +%global nop_scommit %(c=%{nop_commit}; echo ${c:0:7}) +Source22: https://github.com/google/libnop/archive/%{nop_commit}/libnop-%{nop_scommit}.tar.gz + +%if %{without opentelemetry} +%global ot_ver 1.14.2 +Source60: https://github.com/open-telemetry/opentelemetry-cpp/archive/refs/tags/v%{ot_ver}.tar.gz %endif -Patch0: 0001-no-third_party-foxi.patch -Patch1: 0001-no-third_party-fmt.patch -Patch2: 0001-no-third_party-FXdiv.patch -Patch3: 0001-Stub-in-kineto-ActivityType.patch -Patch5: 0001-disable-submodule-search.patch - -%if %{with caffe2} -Patch6: 0001-reenable-foxi-linking.patch +%if %{without httplib} +%global hl_commit 3b6597bba913d51161383657829b7e644e59c006 +%global hl_scommit %(c=%{hl_commit}; echo ${c:0:7}) +Source70: https://github.com/yhirose/cpp-httplib/archive/%{hl_commit}/cpp-httplib-%{hl_scommit}.tar.gz %endif -%if %{with rocm} -# https://github.com/pytorch/pytorch/pull/120551 -Patch100: 0001-Optionally-use-hipblaslt.patch -Patch101: 0001-cuda-hip-signatures.patch -Patch102: 0001-silence-an-assert.patch -Patch103: 0001-can-not-use-with-c-files.patch -Patch104: 0001-use-any-hip.patch -Patch105: 0001-disable-use-of-aotriton.patch +%if %{without kineto} +%global ki_commit 5e7501833f1021ce6f618572d3baf657b6319658 +%global ki_scommit %(c=%{ki_commit}; echo ${c:0:7}) +Source80: https://github.com/pytorch/kineto/archive/%{ki_commit}/kineto-%{ki_scommit}.tar.gz %endif -ExclusiveArch: x86_64 aarch64 +%global ox_ver 1.18.0 +Source90: https://github.com/onnx/onnx/archive/refs/tags/v%{ox_ver}.tar.gz + +%global pt_arches x86_64 aarch64 +ExclusiveArch: %pt_arches %global toolchain gcc %global _lto_cflags %nil BuildRequires: cmake -BuildRequires: cpuinfo-devel BuildRequires: eigen3-devel +BuildRequires: flexiblas-devel BuildRequires: fmt-devel BuildRequires: foxi-devel -BuildRequires: FP16-devel -BuildRequires: fxdiv-devel BuildRequires: gcc-c++ BuildRequires: gcc-gfortran -%if %{with distributed} + +%if %{with gloo} BuildRequires: gloo-devel %endif -BuildRequires: ninja-build -BuildRequires: onnx-devel +BuildRequires: json-devel + BuildRequires: libomp-devel -BuildRequires: openblas-devel -BuildRequires: pocketfft-devel +BuildRequires: moodycamel-concurrentqueue-devel +BuildRequires: numactl-devel +BuildRequires: ninja-build +%if %{with onnx} +BuildRequires: onnx-devel +%endif +%if %{with mpi} +BuildRequires: openmpi-devel +%endif BuildRequires: protobuf-devel -BuildRequires: pthreadpool-devel -BuildRequires: psimd-devel -BuildRequires: python3-numpy -BuildRequires: python3-pyyaml -BuildRequires: python3-typing-extensions BuildRequires: sleef-devel BuildRequires: valgrind-devel -BuildRequires: xnnpack-devel = 0.0^git20240229.fcbf55a +BuildRequires: pocketfft-devel +BuildRequires: pthreadpool-devel + +BuildRequires: cpuinfo-devel +BuildRequires: FP16-devel +BuildRequires: fxdiv-devel +BuildRequires: psimd-devel +BuildRequires: xnnpack-devel = 0.0^git20240814.312eb7e BuildRequires: python3-devel BuildRequires: python3dist(filelock) BuildRequires: python3dist(jinja2) BuildRequires: python3dist(networkx) +BuildRequires: python3dist(numpy) +BuildRequires: python3dist(pip) +BuildRequires: python3dist(pyyaml) BuildRequires: python3dist(setuptools) -BuildRequires: python3dist(typing-extensions) BuildRequires: python3dist(sphinx) +BuildRequires: python3dist(typing-extensions) %if 0%{?fedora} BuildRequires: python3-pybind11 @@ -148,50 +166,47 @@ BuildRequires: python3dist(sympy) %if %{with rocm} BuildRequires: hipblas-devel -%if %{with hipblaslt} BuildRequires: hipblaslt-devel -%endif BuildRequires: hipcub-devel BuildRequires: hipfft-devel BuildRequires: hiprand-devel BuildRequires: hipsparse-devel +BuildRequires: hipsparselt-devel BuildRequires: hipsolver-devel +# Magma is broken on ROCm 7 +# BuildRequires: magma-devel BuildRequires: miopen-devel BuildRequires: rocblas-devel BuildRequires: rocrand-devel BuildRequires: rocfft-devel -%if %{with distributed} +%if %{with rccl} BuildRequires: rccl-devel %endif BuildRequires: rocprim-devel BuildRequires: rocm-cmake BuildRequires: rocm-comgr-devel +BuildRequires: rocm-compilersupport-macros BuildRequires: rocm-core-devel BuildRequires: rocm-hip-devel BuildRequires: rocm-runtime-devel BuildRequires: rocm-rpm-macros -BuildRequires: rocm-rpm-macros-modules +BuildRequires: rocsolver-devel +BuildRequires: rocm-smi-devel BuildRequires: rocthrust-devel BuildRequires: roctracer-devel -Requires: rocm-rpm-macros-modules -%endif +Requires: amdsmi -%if %{with opencv} -BuildRequires: opencv-devel %endif - %if %{with test} BuildRequires: google-benchmark-devel %endif -# Apache-2.0 -Provides: bundled(flatbuffers) = 22.3.3 -# MIT -Provides: bundled(miniz) = 2.1.0 -Provides: bundled(pybind11) = 2.11.1 +Requires: python3dist(dill) +Requires: python3dist(yaml) +Obsoletes: caffe = 1.0^git20200212.9b89154 %description PyTorch is a Python package that provides two high-level features: @@ -205,6 +220,24 @@ and Cython to extend PyTorch when needed. %package -n python3-%{pypi_name} Summary: %{summary} +# For convience +Provides: pytorch + +# Apache-2.0 +Provides: bundled(flatbuffers) = %{flatbuffers_version} +# MIT +Provides: bundled(miniz) = %{miniz_version} +Provides: bundled(pybind11) = %{pybind11_version} + +%if %{with tensorpipe} +# BSD-3-Clause +Provides: bundled(tensorpipe) +# Apache-2.0 +Provides: bundled(libnop) +# MIT AND CC-BY-4.0 AND ISC AND BSD-2-Clause +Provides: bundled(libuv) = 1.41.0 +%endif + %description -n python3-%{pypi_name} PyTorch is a Python package that provides two high-level features: @@ -214,22 +247,6 @@ PyTorch is a Python package that provides two high-level features: You can reuse your favorite Python packages such as NumPy, SciPy, and Cython to extend PyTorch when needed. -%package -n python3-%{pypi_name}-devel -Summary: Libraries and headers for %{name} -Requires: python3-%{pypi_name}%{?_isa} = %{version}-%{release} - -%description -n python3-%{pypi_name}-devel -%{summary} - -%if %{with rocm} -%package -n python3-%{pypi_name}-rocm -Summary: %{name} for ROCm -Requires: python3-%{pypi_name}%{?_isa} = %{version}-%{release} - -%description -n python3-%{pypi_name}-rocm -%{summary} -%endif - %if %{with test} %package -n python3-%{pypi_name}-test Summary: Tests for %{name} @@ -242,44 +259,120 @@ Requires: python3-%{pypi_name}%{?_isa} = %{version}-%{release} %prep +%if %{with gitcommit} %autosetup -p1 -n pytorch-%{commit0} +# Overwrite with a git checkout of the pyproject.toml +cp %{SOURCE1000} . + +%else +%autosetup -p1 -n pytorch-v%{version} +%endif # Remove bundled egg-info rm -rf %{pypi_name}.egg-info -# Overwrite with a git checkout of the pyproject.toml -cp %{SOURCE100} . tar xf %{SOURCE1} -cp -r flatbuffers-23.3.3/* third_party/flatbuffers/ +rm -rf third_party/flatbuffers/* +cp -r flatbuffers-%{flatbuffers_version}/* third_party/flatbuffers/ tar xf %{SOURCE2} -cp -r pybind11-2.11.1/* third_party/pybind11/ +rm -rf third_party/pybind11/* +cp -r pybind11-%{pybind11_version}/* third_party/pybind11/ -%if %{with cuda} -tar xf %{SOURCE10} -cp -r cudnn-frontend-%{cuf_ver}/* third_party/cudnn_frontend/ -tar xf %{SOURCE11} -cp -r cutlass-%{cul_ver}/* third_party/cutlass/ +%if %{with tensorpipe} +tar xf %{SOURCE20} +rm -rf third_party/tensorpipe/* +cp -r tensorpipe-*/* third_party/tensorpipe/ +tar xf %{SOURCE21} +rm -rf third_party/tensorpipe/third_party/libuv/* +cp -r libuv-*/* third_party/tensorpipe/third_party/libuv/ +tar xf %{SOURCE22} +rm -rf third_party/tensorpipe/third_party/libnop/* +cp -r libnop-*/* third_party/tensorpipe/third_party/libnop/ + +# gcc 15 include cstdint +sed -i '/#include ' third_party/tensorpipe/tensorpipe/common/allocator.h +sed -i '/#include ' third_party/tensorpipe/tensorpipe/common/memory.h %endif -%if %{with opencv} -# Reduce requirements, *FOUND is not set -sed -i -e 's/USE_OPENCV AND OpenCV_FOUND AND USE_FFMPEG AND FFMPEG_FOUND/USE_OPENCV AND USE_FFMPEG/' caffe2/video/CMakeLists.txt -sed -i -e 's/USE_OPENCV AND OpenCV_FOUND/USE_OPENCV/' caffe2/image/CMakeLists.txt -sed -i -e 's/STATUS/FATAL/' caffe2/image/CMakeLists.txt +%if %{without opentelemtry} +tar xf %{SOURCE60} +rm -rf third_party/opentelemetry-cpp/* +cp -r opentelemetry-cpp-*/* third_party/opentelemetry-cpp/ %endif +%if %{without httplib} +tar xf %{SOURCE70} +rm -rf third_party/cpp-httplib/* +cp -r cpp-httplib-*/* third_party/cpp-httplib/ +%endif + +%if %{without kineto} +tar xf %{SOURCE80} +rm -rf third_party/kineto/* +cp -r kineto-*/* third_party/kineto/ +%endif + +%if %{without onnx} +tar xf %{SOURCE90} +rm -rf third_party/onnx/* +cp -r onnx-*/* third_party/onnx/ +%endif + +# Adjust for the hipblaslt's we build +sed -i -e 's@"gfx90a", "gfx940", "gfx941", "gfx942"@"gfx90a", "gfx1103", "gfx1150", "gfx1151", "gfx1100", "gfx1101", "gfx1200", "gfx1201"@' aten/src/ATen/native/cuda/Blas.cpp + %if 0%{?rhel} # In RHEL but too old sed -i -e '/typing-extensions/d' setup.py # Need to pip these sed -i -e '/sympy/d' setup.py sed -i -e '/fsspec/d' setup.py +%else +# for 2.5.0 +sed -i -e 's@sympy==1.13.1@sympy>=1.13.1@' setup.py %endif # A new dependency # Connected to USE_FLASH_ATTENTION, since this is off, do not need it sed -i -e '/aotriton.cmake/d' cmake/Dependencies.cmake +# Compress hip +sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc --offload-compress@' cmake/Dependencies.cmake +# Silence noisy warning +sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -Wno-pass-failed@' cmake/Dependencies.cmake +sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -Wno-unused-command-line-argument@' cmake/Dependencies.cmake +sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -Wno-unused-result@' cmake/Dependencies.cmake +sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -Wno-deprecated-declarations@' cmake/Dependencies.cmake +# Use parallel jobs +sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -parallel-jobs=4@' cmake/Dependencies.cmake +# Need to link with librocm_smi64 +sed -i -e 's@hiprtc::hiprtc@hiprtc::hiprtc rocm_smi64@' cmake/Dependencies.cmake + +# No third_party fmt, use system +sed -i -e 's@fmt::fmt-header-only@fmt@' CMakeLists.txt +sed -i -e 's@fmt::fmt-header-only@fmt@' aten/src/ATen/CMakeLists.txt +sed -i -e 's@list(APPEND ATen_HIP_INCLUDE $)@@' aten/src/ATen/CMakeLists.txt + +sed -i -e 's@fmt::fmt-header-only@fmt@' third_party/kineto/libkineto/CMakeLists.txt +sed -i -e 's@fmt::fmt-header-only@fmt@' c10/CMakeLists.txt +sed -i -e 's@fmt::fmt-header-only@fmt@' torch/CMakeLists.txt +sed -i -e 's@fmt::fmt-header-only@fmt@' cmake/Dependencies.cmake +sed -i -e 's@fmt::fmt-header-only@fmt@' caffe2/CMakeLists.txt + +sed -i -e 's@add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)@#add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)@' cmake/Dependencies.cmake +sed -i -e 's@set_target_properties(fmt-header-only PROPERTIES INTERFACE_COMPILE_FEATURES "")@#set_target_properties(fmt-header-only PROPERTIES INTERFACE_COMPILE_FEATURES "")@' cmake/Dependencies.cmake +sed -i -e 's@list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only)@#list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only)@' cmake/Dependencies.cmake + +# No third_party FXdiv +sed -i -e 's@if(NOT TARGET fxdiv)@if(MSVC AND USE_XNNPACK)@' caffe2/CMakeLists.txt +sed -i -e 's@TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv)@#TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv)@' caffe2/CMakeLists.txt + +# https://github.com/pytorch/pytorch/issues/149803 +# Tries to checkout nccl +sed -i -e 's@ checkout_nccl()@ True@' tools/build_pytorch_libs.py + +# Disable the use of check_submodule's in the setup.py, we are a tarball, not a git repo +sed -i -e 's@check_submodules()$@#check_submodules()@' setup.py # Release comes fully loaded with third party src # Remove what we can @@ -289,7 +382,7 @@ sed -i -e '/aotriton.cmake/d' cmake/Dependencies.cmake # the third_party dir to compile the file. # mimiz is licensed MIT # https://github.com/richgel999/miniz/blob/master/LICENSE -mv third_party/miniz-2.1.0 . +mv third_party/miniz-%{miniz_version} . # # setup.py depends on this script mv third_party/build_bundled.py . @@ -299,9 +392,24 @@ mv third_party/flatbuffers . mv third_party/pybind11 . -%if %{with cuda} -mv third_party/cudnn_frontend . -mv third_party/cutlass . +%if %{with tensorpipe} +mv third_party/tensorpipe . +%endif + +%if %{without opentelemetry} +mv third_party/opentelemetry-cpp . +%endif + +%if %{without httplib} +mv third_party/cpp-httplib . +%endif + +%if %{without kineto} +mv third_party/kineto . +%endif + +%if %{without onnx} +mv third_party/onnx . %endif %if %{with test} @@ -312,13 +420,28 @@ mv third_party/googletest . rm -rf third_party/* # Put stuff back mv build_bundled.py third_party -mv miniz-2.1.0 third_party +mv miniz-%{miniz_version} third_party mv flatbuffers third_party mv pybind11 third_party -%if %{with cuda} -mv cudnn_frontend third_party -mv cutlass third_party +%if %{with tensorpipe} +mv tensorpipe third_party +%endif + +%if %{without opentelemetry} +mv opentelemetry-cpp third_party +%endif + +%if %{without httplib} +mv cpp-httplib third_party +%endif + +%if %{without kineto} +mv kineto third_party +%endif + +%if %{without onnx} +mv onnx third_party %endif %if %{with test} @@ -328,47 +451,70 @@ mv googletest third_party # # Fake out pocketfft, and system header will be used mkdir third_party/pocketfft +cp /usr/include/pocketfft_hdronly.h third_party/pocketfft/ + # # Use the system valgrind headers mkdir third_party/valgrind-headers cp %{_includedir}/valgrind/* third_party/valgrind-headers -# Remove unneeded OpenCL files that confuse the lincense scanner -rm caffe2/contrib/opencl/OpenCL/cl.hpp -rm caffe2/mobile/contrib/libopencl-stub/include/CL/*.h -rm caffe2/mobile/contrib/libopencl-stub/include/CL/*.hpp +# Fix installing to /usr/lib64 +sed -i -e 's@DESTINATION ${PYTHON_LIB_REL_PATH}@DESTINATION ${CMAKE_INSTALL_PREFIX}/${PYTHON_LIB_REL_PATH}@' caffe2/CMakeLists.txt + +# reenable foxi linking +sed -i -e 's@list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader)@#list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader)@' cmake/Dependencies.cmake + +# cmake version changed +sed -i -e 's@cmake_minimum_required(VERSION 3.4)@cmake_minimum_required(VERSION 3.5)@' third_party/tensorpipe/third_party/libuv/CMakeLists.txt +sed -i -e 's@cmake_minimum_required(VERSION 3.4)@cmake_minimum_required(VERSION 3.5)@' libuv*/CMakeLists.txt +%if %{without opentelemtry} +sed -i -e 's@cmake_minimum_required(VERSION 3.1)@cmake_minimum_required(VERSION 3.5)@' third_party/opentelemetry-cpp/CMakeLists.txt +%endif %if %{with rocm} # hipify ./tools/amd_build/build_amd.py # Fedora installs to /usr/include, not /usr/include/rocm-core sed -i -e 's@rocm-core/rocm_version.h@rocm_version.h@' aten/src/ATen/hip/tunable/TunableGemm.h +# https://github.com/pytorch/pytorch/issues/149805 +sed -i -e 's@rocm-core/rocm_version.h@rocm_version.h@' cmake/public/LoadHIP.cmake +# Fedora installs to /usr/include, not /usr/include/rocm-core +sed -i -e 's@rocm-core/rocm_version.h@rocm_version.h@' aten/src/ATen/hip/tunable/Tunable.cpp +sed -i -e 's@rocm-core/rocm_version.h@rocm_version.h@' aten/src/ATen/cuda/tunable/Tunable.cpp +# use any hip, correct CMAKE_MODULE_PATH +sed -i -e 's@lib/cmake/hip@lib64/cmake/hip@' cmake/public/LoadHIP.cmake +sed -i -e 's@HIP 1.0@HIP MODULE@' cmake/public/LoadHIP.cmake +# silence an assert +# sed -i -e '/qvalue = std::clamp(qvalue, qmin, qmax);/d' aten/src/ATen/native/cuda/IndexKernel.cu + %endif -%if %{with cuda} -# build complains about not being able to build -pie without -fPIC -sed -i -e 's@string(APPEND CMAKE_CUDA_FLAGS " -D_GLIBCXX_USE_CXX11_ABI=${GLIBCXX_USE_CXX11_ABI}")@string(APPEND CMAKE_CUDA_FLAGS " -fPIC -D_GLIBCXX_USE_CXX11_ABI=${GLIBCXX_USE_CXX11_ABI}")@' CMakeLists.txt -%endif +# moodycamel include path needs adjusting to use the system's +sed -i -e 's@${PROJECT_SOURCE_DIR}/third_party/concurrentqueue@/usr/include/concurrentqueue@' cmake/Dependencies.cmake %build +# Export the arches +# echo "%%pytorch_arches %pt_arches" > macros.pytorch + # # Control the number of jobs # # The build can fail if too many threads exceed the physical memory -# So count core and and memory and increase the build memory util the build succeeds +# Run at least one thread, more if CPU & memory resources are available. # +%ifarch x86_64 # Real cores, No hyperthreading COMPILE_JOBS=`cat /proc/cpuinfo | grep -m 1 'cpu cores' | awk '{ print $4 }'` +%else +# cpuinfo format varies on other arches, fall back to nproc +COMPILE_JOBS=`nproc` +%endif if [ ${COMPILE_JOBS}x = x ]; then COMPILE_JOBS=1 fi # Take into account memmory usage per core, do not thrash real memory -%if %{with cuda} -BUILD_MEM=4 -%else BUILD_MEM=2 -%endif MEM_KB=0 MEM_KB=`cat /proc/meminfo | grep MemTotal | awk '{ print $2 }'` MEM_MB=`eval "expr ${MEM_KB} / 1024"` @@ -400,53 +546,55 @@ export CAFFE2_LINK_LOCAL_PROTOBUF=OFF export INTERN_BUILD_MOBILE=OFF export USE_DISTRIBUTED=OFF export USE_CUDA=OFF +export USE_FAKELOWP=OFF export USE_FBGEMM=OFF export USE_FLASH_ATTENTION=OFF -export USE_GOLD_LINKER=OFF +export USE_GLOO=OFF export USE_ITT=OFF export USE_KINETO=OFF +export USE_KLEIDIAI=OFF export USE_LITE_INTERPRETER_PROFILER=OFF export USE_LITE_PROTO=OFF +export USE_MAGMA=OFF +export USE_MEM_EFF_ATTENTION=OFF export USE_MKLDNN=OFF +export USE_MPI=OFF export USE_NCCL=OFF export USE_NNPACK=OFF export USE_NUMPY=ON export USE_OPENMP=ON export USE_PYTORCH_QNNPACK=OFF -export USE_QNNPACK=OFF export USE_ROCM=OFF -export USE_SYSTEM_CPUINFO=ON export USE_SYSTEM_SLEEF=ON export USE_SYSTEM_EIGEN_INSTALL=ON -export USE_SYSTEM_FP16=ON -export USE_SYSTEM_PTHREADPOOL=ON -export USE_SYSTEM_PSIMD=ON -export USE_SYSTEM_FXDIV=ON +%if %{with onnx} export USE_SYSTEM_ONNX=ON -export USE_SYSTEM_XNNPACK=ON +%endif export USE_SYSTEM_PYBIND11=OFF export USE_SYSTEM_LIBS=OFF +export USE_SYSTEM_NCCL=OFF export USE_TENSORPIPE=OFF -export USE_XNNPACK=ON +export USE_XNNPACK=OFF +export USE_XPU=OFF +export USE_SYSTEM_PTHREADPOOL=ON +export USE_SYSTEM_CPUINFO=ON +export USE_SYSTEM_FP16=ON +export USE_SYSTEM_FXDIV=ON +export USE_SYSTEM_PSIMD=ON +export USE_SYSTEM_XNNPACK=OFF -%if %{with caffe2} -export BUILD_CAFFE2=ON -%endif - -%if %{with cuda} -%if %{without rocm} -export CUDACXX=/usr/local/cuda/bin/nvcc -export CPLUS_INCLUDE_PATH=/usr/local/cuda/include -export USE_CUDA=ON -%endif -%endif - -%if %{with distributed} export USE_DISTRIBUTED=ON +%if %{with tensorpipe} +export USE_TENSORPIPE=ON +export TP_BUILD_LIBUV=OFF %endif -%if %{with opencv} -export USE_OPENCV=ON +%if %{with gloo} +export USE_GLOO=ON +export USE_SYSTEM_GLOO=ON +%endif +%if %{with mpi} +export USE_MPI=ON %endif %if %{with test} @@ -462,100 +610,73 @@ export BUILD_TEST=ON # # See BZ 2244862 - %if %{with rocm} export USE_ROCM=ON +export USE_ROCM_CK_SDPA=OFF +export USE_ROCM_CK_GEMM=OFF +export USE_FBGEMM_GENAI=OFF + +# Magma is broken on ROCm 7 +# export USE_MAGMA=ON export HIP_PATH=`hipconfig -p` export ROCM_PATH=`hipconfig -R` -export HIP_CLANG_PATH=`hipconfig -l` -RESOURCE_DIR=`${HIP_CLANG_PATH}/clang -print-resource-dir` -export DEVICE_LIB_PATH=${RESOURCE_DIR}/amdgcn/bitcode +#RESOURCE_DIR=`%{rocmllvm_bindir}/clang -print-resource-dir` +#export DEVICE_LIB_PATH=${RESOURCE_DIR}/amdgcn/bitcode -gpu=%{rocm_default_gpu} -module load rocm/$gpu -export PYTORCH_ROCM_ARCH=$ROCM_GPUS -%py3_build -mv build build-${gpu} -module purge +# pytorch uses clang, not hipcc +export HIP_CLANG_PATH=%{rocmllvm_bindir} +export PYTORCH_ROCM_ARCH=%{rocm_gpu_list_default} -%if %{with rocm_loop} -for gpu in %{rocm_gpu_list} -do - module load rocm/$gpu - export PYTORCH_ROCM_ARCH=$ROCM_GPUS - %py3_build - mv build build-${gpu} - module purge -done %endif +%if 0%{?fedora} +%pyproject_wheel %else - %py3_build - %endif + %install -%if %{with rocm} +# pytorch rpm macros +# install -Dpm 644 macros.pytorch \ +# %{buildroot}%{_rpmmacrodir}/macros.pytorch +%if %{with rocm} export USE_ROCM=ON +export USE_ROCM_CK=OFF export HIP_PATH=`hipconfig -p` export ROCM_PATH=`hipconfig -R` -export HIP_CLANG_PATH=`hipconfig -l` -RESOURCE_DIR=`${HIP_CLANG_PATH}/clang -print-resource-dir` -export DEVICE_LIB_PATH=${RESOURCE_DIR}/amdgcn/bitcode +# RESOURCE_DIR=`%{rocmllvm_bindir}/clang -print-resource-dir` +# export DEVICE_LIB_PATH=${RESOURCE_DIR}/amdgcn/bitcode -gpu=%{rocm_default_gpu} -module load rocm/$gpu -export PYTORCH_ROCM_ARCH=$ROCM_GPUS -mv build-${gpu} build -%py3_install -mv build build-${gpu} -module purge +# pytorch uses clang, not hipcc +export HIP_CLANG_PATH=%{rocmllvm_bindir} +export PYTORCH_ROCM_ARCH=%{rocm_gpu_list_default} -%if %{with rocm_loop} -for gpu in %{rocm_gpu_list} -do - module load rocm/$gpu - export PYTORCH_ROCM_ARCH=$ROCM_GPUS - mv build-${gpu} build - # need to customize the install location, so replace py3_install - %{__python3} %{py_setup} %{?py_setup_args} install -O1 --skip-build --root %{buildroot} --prefix /usr/lib64/rocm/${gpu} %{?*} - rm -rfv %{buildroot}/usr/lib/rocm/${gpu}/bin/__pycache__ - mv build build-${gpu} - module purge -done %endif +%if 0%{?fedora} +%pyproject_install +%pyproject_save_files '*torch*' %else %py3_install - %endif + +%check +# Not working yet +# pyproject_check_import torch + # Do not remote the empty files - -%files -n python3-%{pypi_name} +%files -n python3-%{pypi_name} %license LICENSE %doc README.md -%{_bindir}/convert-caffe2-to-onnx -%{_bindir}/convert-onnx-to-caffe2 %{_bindir}/torchrun -%{python3_sitearch}/%{pypi_name} -%{python3_sitearch}/%{pypi_name}-*.egg-info +%{python3_sitearch}/%{pypi_name}* %{python3_sitearch}/functorch -%{python3_sitearch}/torchgen -%if %{with caffe2} -%{python3_sitearch}/caffe2 -%endif -%if %{with rocm} -%if %{with rocm_loop} -%{_libdir}/rocm/gfx*/bin/* -%{_libdir}/rocm/gfx*/lib64/* -%endif -%endif %changelog %autochangelog diff --git a/sources b/sources index 90b1128..9a3681f 100644 --- a/sources +++ b/sources @@ -1,2 +1,19 @@ -SHA512 (pytorch-v2.1.0.tar.gz) = 59421bf6cea6661d61ed66ab16526e3a07162e70e53381cbd5987042917610ec993d2f151fb086f0f98e5a396fe69e82bbc76f840bebffe4ebe7f50458c3aa44 -SHA512 (pytorch-v2.1.2.tar.gz) = b7305407ad9dda877d277a0e7009f65f6d69f39370f2231b8bb8c6a9b711022d2129febdb00f5c83751b6664e01000fe2d30c5e5c13757de89fb8b2b99197a28 +SHA512 (pytorch-v2.7.0.tar.gz) = 17e875a66f1669901f5f770c9d829ba5bfa3967296cfb71550e8a92507181db742548eaf7cc9a2c478c4b91e366f27cc480e2e1bbb328db8501d30e1649839e6 +SHA512 (v23.3.3.tar.gz) = 4066c94f2473c7ea16917d29a613e16f840a329089c88e0bdbdb999aef3442ba00abfd2aa92266fa9c067e399dc88e6f0ccac40dc151378857e665638e78bbf0 +SHA512 (v2.13.6.tar.gz) = 497c25b33b09a9c42f67131ab82e35d689e8ce089dd7639be997305ff9a6d502447b79c824508c455d559e61f0186335b54dd2771d903a7c1621833930622d1a +SHA512 (tensorpipe-52791a2.tar.gz) = 1e5faf17a7236c5506c08cb28be16069b11bb929bbca64ed9745ce4277d46739186ab7d6597da7437d90ed2d166d4c37ef2f3bceabe8083ef3adbb0e8e5f227e +SHA512 (v1.41.0.tar.gz) = bb08a1970a10e8d9571ffea3d021643de30ec212cd51317b98d6cf0cfe55d6877992921fb01d1188a6d466687335b77885685d924f8cb7200a0bec30eee05c65 +SHA512 (libnop-910b558.tar.gz) = 74c5324eaa1b6b2ac8dfef94c835b5c5b044625f8e5efe3522470b1ecc4798ff43d344a013cee2f6901e83267c6167072947b754e63f1552ae7044cffe234c36 +SHA512 (v1.14.2.tar.gz) = 97635bbaf6dd567c201451dfaf7815b2052fe50d9bccc97aade86cfa4a92651374d167296a5453031b2681dc302806a289bca011a9e79ddc381a17d6118971d7 +SHA512 (cpp-httplib-3b6597b.tar.gz) = 8f1090658c498d04f14fec5c2f301847b1f3360bf92b18d82927643ee04ab61a6b274733a01c7850f9c030205120d674d1d961358d49fdd15636736fb8704f55 +SHA512 (kineto-be13176.tar.gz) = 41a08c7da9eea7d12402f80a5550c9d4df79798719cc52b12a507828c8c896ba28a37c35d8adf809ca72589e1d84965d5ef6dd01f3f8dc1c803c5ed67b03a43a +SHA512 (pytorch-a1cb3cc.tar.gz) = 92bf8b2c2ef0b459406b60169ecebdc50652c75943e3d6087e4d261f6e308dbad365529561e0f07ea3f0b71790efb68b5e4ab2f44e270462097208d924dc2d95 +SHA512 (v24.12.23.tar.gz) = f97762ba41b9cfef648e93932fd789324c6bb6ebc5b7aeca8185c9ef602294b67d73aea7ae371035579a1419cbfbeba7c3e88b31b5a5848db98f5e8a03b982b1 +SHA512 (kineto-5e75018.tar.gz) = 921b96a56e01d69895b79e67582d8977ed6f873573ab41557c5d026ada5d1f6365e4ed0a0c6804057c52e92510749fc58619f554a164c1ba9d8cd13e789bebd0 +SHA512 (pytorch-v2.8.0.tar.gz) = 791e658eab87fb957f025558cb9f925078d2426ab7b6f60771d9841dfb691f67d905ba1330a800008efe7c938b6c69bdc52232bccfe8d4860e795a532cd69d28 +SHA512 (v1.18.0.tar.gz) = 2f38664947c8d1efc40620a7c1b1953d2aa4b0a37b67c4886b86e77c1d697363c26413413ddda8eabc545892fb1bcb43afc7e93e62f0901527524a2727e1ea8d +SHA512 (pytorch-715dca6.tar.gz) = 09c9aae54fab3eb17901fc3226fece1c13f41cb8e45a2cb066021823abeb8d27c340993088e01d8e55bb37ed5f94334ec31e6c539cddfacbad157abd27c5e907 +SHA512 (pytorch-fd36458.tar.gz) = acbb7475b92ad4a8e8d779f3745da22d8438e4c5ef2d6e76d71c987789f2752c8aef7022c87c9a74640fe4f9c1f1a61a3f12a796f63b1e6be24da8e5aacf37dc +SHA512 (pytorch-0fabc3b.tar.gz) = 2e87975de0bf6f3dcede168b379e1928712bca16170c2a8ee7d63459f53086c01baac05e0763e4d5d28cdaf1c7d8912225ee06adeff96ead4f6f456ee174b341 +SHA512 (pytorch-v2.9.0.tar.gz) = ae989e3a7fe30f9ea90944dc25e21ca92f2a94ee40d8de974a168c292d82c16ee8920624eff91a85755469ad05473dce0f85893e3ed7794ec5c6bdd89cbd2023 +SHA512 (pytorch-v2.9.1.tar.gz) = 88de0289fa2760abd69bef505b5ae3b6d7ff176b415cbb31bbc89ce5476a3800b322a97c4490f270f8b89657aff931bf9a5516202b268e0bb8b1f63dbb87b34a