Compare commits

..

10 commits

Author SHA1 Message Date
Tom Rix
a85b3ec712 Fix broken cpuinfo for aarch64
Signed-off-by: Tom Rix <trix@redhat.com>
2024-06-11 14:39:24 -06:00
Tom Rix
0a219010a7 Update sources
Signed-off-by: Tom Rix <trix@redhat.com>
2024-06-11 12:05:23 -06:00
Tom Rix
93593886f9 Reduce amd gpu list on F40
Signed-off-by: Tom Rix <trix@redhat.com>
2024-06-11 11:58:31 -06:00
Tom Rix
e492aac499 Merge branch 'rawhide' into f40
Update to 2.3.1

Signed-off-by: Tom Rix <trix@redhat.com>
2024-06-11 05:22:31 -06:00
Tom Rix
7afc241f03 Revert "Provide pytorch as a convience"
This reverts commit 4d6bb6e14a.
2024-06-11 05:22:14 -06:00
Tom Rix
6f71567388 Revert "Add a few missing files to install"
This reverts commit f75a38ed41.
2024-06-11 05:21:43 -06:00
Tom Rix
11bcbe02b2 Revert "Rebuild for libonnx"
This reverts commit 4f4ba3564b.
2024-06-11 05:21:07 -06:00
Tom Rix
4f4ba3564b Rebuild for libonnx
Signed-off-by: Tom Rix <trix@redhat.com>
2024-04-15 13:35:06 -06:00
Tom Rix
f75a38ed41 Add a few missing files to install
Signed-off-by: Tom Rix <trix@redhat.com>
2024-03-31 19:55:46 -06:00
Tom Rix
4d6bb6e14a Provide pytorch as a convience
Signed-off-by: Tom Rix <trix@redhat.com>
2024-03-31 13:57:54 -06:00
21 changed files with 1652 additions and 1104 deletions

24
.gitignore vendored
View file

@ -12,22 +12,8 @@
/pytorch-97ff6cf.tar.gz
/pytorch-v2.3.0.tar.gz
/pytorch-v2.3.1.tar.gz
/pytorch-v2.4.0.tar.gz
/v1.14.2.tar.gz
/cpp-httplib-3b6597b.tar.gz
/kineto-be13176.tar.gz
/pytorch-v2.4.1.tar.gz
/pytorch-v2.5.0.tar.gz
/pytorch-v2.5.1.tar.gz
/pytorch-v2.7.0.tar.gz
/v2.13.6.tar.gz
/pytorch-a1cb3cc.tar.gz
/v24.12.23.tar.gz
/kineto-5e75018.tar.gz
/pytorch-v2.8.0.tar.gz
/v1.18.0.tar.gz
/pytorch-715dca6.tar.gz
/pytorch-fd36458.tar.gz
/pytorch-0fabc3b.tar.gz
/pytorch-v2.9.0.tar.gz
/pytorch-v2.9.1.tar.gz
/xnnpack-fcbf55a.tar.gz
/FXdiv-63058ef.tar.gz
/FP16-0a92994.tar.gz
/psimd-072586a.tar.gz
/cpuinfo-d6860c4.tar.gz

View file

@ -1,202 +0,0 @@
From 193854993cd939de186de19589c1add4c4b2cf66 Mon Sep 17 00:00:00 2001
From: Tom Rix <Tom.Rix@amd.com>
Date: Mon, 21 Jul 2025 11:35:03 -0700
Subject: [PATCH] Add cmake variable USE_ROCM_CK
---
CMakeLists.txt | 1 +
aten/src/ATen/CMakeLists.txt | 40 ++++++++++++++++-----------------
aten/src/ATen/cuda/CUDABlas.cpp | 22 +++++++++---------
cmake/Dependencies.cmake | 3 +++
4 files changed, 35 insertions(+), 31 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index a5d25e6afa0f..afc1b53efa64 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -240,6 +240,7 @@ cmake_dependent_option(
BUILD_LAZY_CUDA_LINALG "Build cuda linalg ops as separate library" ON
"USE_CUDA AND LINUX AND BUILD_PYTHON" OFF)
cmake_dependent_option(USE_ROCM "Use ROCm" ON "LINUX" OFF)
+cmake_dependent_option(USE_ROCM_CK "Use ROCm Composable Kernel" ON "USE_ROCM" ON)
option(CAFFE2_STATIC_LINK_CUDA "Statically link CUDA libraries" OFF)
cmake_dependent_option(USE_CUDNN "Use cuDNN" ON "USE_CUDA" OFF)
cmake_dependent_option(USE_STATIC_CUDNN "Use cuDNN static libraries" OFF
diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt
index c9cfd74b501e..59f6178218ee 100644
--- a/aten/src/ATen/CMakeLists.txt
+++ b/aten/src/ATen/CMakeLists.txt
@@ -373,26 +373,26 @@ if(USE_ROCM)
# is header only, so this should be ok, except that the CMake build generates
# a ck/config.h. We just do that part here. Without this, the ck.h from the
# ROCM SDK may get accidentally used instead.
- function(_pytorch_rocm_generate_ck_conf)
- set(CK_ENABLE_INT8 "ON")
- set(CK_ENABLE_FP16 "ON")
- set(CK_ENABLE_FP32 "ON")
- set(CK_ENABLE_FP64 "ON")
- set(CK_ENABLE_BF16 "ON")
- set(CK_ENABLE_FP8 "ON")
- set(CK_ENABLE_BF8 "ON")
- set(CK_USE_XDL "ON")
- set(CK_USE_WMMA "ON")
- configure_file(
- "${Torch_SOURCE_DIR}/third_party/composable_kernel/include/ck/config.h.in"
- "${CMAKE_CURRENT_BINARY_DIR}/composable_kernel/ck/config.h"
- )
- endfunction()
+# function(_pytorch_rocm_generate_ck_conf)
+# set(CK_ENABLE_INT8 "ON")
+# set(CK_ENABLE_FP16 "ON")
+# set(CK_ENABLE_FP32 "ON")
+# set(CK_ENABLE_FP64 "ON")
+# set(CK_ENABLE_BF16 "ON")
+# set(CK_ENABLE_FP8 "ON")
+# set(CK_ENABLE_BF8 "ON")
+# set(CK_USE_XDL "ON")
+# set(CK_USE_WMMA "ON")
+# configure_file(
+# "${Torch_SOURCE_DIR}/third_party/composable_kernel/include/ck/config.h.in"
+# "${CMAKE_CURRENT_BINARY_DIR}/composable_kernel/ck/config.h"
+# )
+# endfunction()
list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/hip)
- list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/include)
- list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/library/include)
- list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/composable_kernel)
- _pytorch_rocm_generate_ck_conf()
+# list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/include)
+# list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/library/include)
+# list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/composable_kernel)
+# _pytorch_rocm_generate_ck_conf()
# Next two lines are needed because TunableOp uses third-party/fmt
list(APPEND ATen_HIP_INCLUDE $<TARGET_PROPERTY:fmt::fmt-header-only,INTERFACE_INCLUDE_DIRECTORIES>)
@@ -409,7 +409,7 @@ endif()
${native_quantized_hip_hip}
${native_transformers_hip_hip} ${native_transformers_src_hip_hip}
)
- if(WIN32) # Windows doesn't support Composable Kernels
+ if(NOT USE_ROCM_CK) # Windows doesn't support Composable Kernels
file(GLOB native_hip_bgemm "native/hip/bgemm_kernels/*.hip")
file(GLOB native_hip_ck "native/hip/ck*.hip")
exclude(ATen_HIP_SRCS "${ATen_HIP_SRCS}"
diff --git a/aten/src/ATen/cuda/CUDABlas.cpp b/aten/src/ATen/cuda/CUDABlas.cpp
index 89350a11bea7..e5b7960177cf 100644
--- a/aten/src/ATen/cuda/CUDABlas.cpp
+++ b/aten/src/ATen/cuda/CUDABlas.cpp
@@ -752,7 +752,7 @@ template <>
void bgemm_internal<double>(CUDABLAS_BGEMM_ARGTYPES(double))
{
if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
-#ifdef USE_ROCM
+#ifdef USE_ROCM_CK
// hipblaslt does not support double gemm yet
bgemm_internal_cublas<double>(CUDABLAS_BGEMM_ARGS(double));
#else
@@ -836,7 +836,7 @@ void bgemm_internal<at::BFloat16>(CUDABLAS_BGEMM_ARGTYPES(at::BFloat16))
bgemm_internal_cublas<at::BFloat16>(CUDABLAS_BGEMM_ARGS(at::BFloat16));
}
}
-#if defined(USE_ROCM) && !defined(_MSC_VER)
+#if defined(USE_ROCM) && defined(USE_ROCM_CK)
else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
at::native::bgemm_internal_ck<at::BFloat16>(CUDABLAS_BGEMM_ARGS(at::BFloat16));
}
@@ -1270,14 +1270,14 @@ template <>
void gemm_internal<double>(CUDABLAS_GEMM_ARGTYPES(double))
{
if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
-#ifdef USE_ROCM
+#ifdef USE_ROCM_CK
// hipblaslt does not support double gemm yet
gemm_internal_cublas<double>(CUDABLAS_GEMM_ARGS(double));
#else
gemm_internal_cublaslt<double>(CUDABLAS_GEMM_ARGS(double));
#endif
}
-#if defined(USE_ROCM) && !defined(_MSC_VER)
+#if defined(USE_ROCM) && defined(USE_ROCM_CK)
else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
at::native::gemm_internal_ck<double>(CUDABLAS_GEMM_ARGS(double));
}
@@ -1293,7 +1293,7 @@ void gemm_internal<float>(CUDABLAS_GEMM_ARGTYPES(float))
if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
gemm_internal_cublaslt<float>(CUDABLAS_GEMM_ARGS(float));
}
-#if defined(USE_ROCM) && !defined(_MSC_VER)
+#if defined(USE_ROCM) && defined(USE_ROCM_CK)
else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
if (at::detail::getCUDAHooks().isGPUArch({"gfx1100"})) { //no CK GEMM version for gfx1100
gemm_internal_cublaslt<float>(CUDABLAS_GEMM_ARGS(float));
@@ -1311,7 +1311,7 @@ template <>
void gemm_internal<c10::complex<double>>(CUDABLAS_GEMM_ARGTYPES(c10::complex<double>))
{
if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
-#ifdef USE_ROCM
+#ifdef USE_ROCM_CK
// hipblaslt does not support complex gemm yet
gemm_internal_cublas<c10::complex<double>>(CUDABLAS_GEMM_ARGS(c10::complex<double>));
#else
@@ -1327,7 +1327,7 @@ template <>
void gemm_internal<c10::complex<float>>(CUDABLAS_GEMM_ARGTYPES(c10::complex<float>))
{
if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
-#ifdef USE_ROCM
+#ifdef USE_ROCM_CK
// hipblaslt does not support complex gemm yet
gemm_internal_cublas<c10::complex<float>>(CUDABLAS_GEMM_ARGS(c10::complex<float>));
#else
@@ -1345,7 +1345,7 @@ void gemm_internal<at::Half>(CUDABLAS_GEMM_ARGTYPES(at::Half))
if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
gemm_internal_cublaslt<at::Half>(CUDABLAS_GEMM_ARGS(at::Half));
}
-#if defined(USE_ROCM) && !defined(_MSC_VER)
+#if defined(USE_ROCM) && defined(USE_ROCM_CK)
else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
at::native::gemm_internal_ck<at::Half>(CUDABLAS_GEMM_ARGS(at::Half));
}
@@ -1361,7 +1361,7 @@ void gemm_internal<at::BFloat16>(CUDABLAS_GEMM_ARGTYPES(at::BFloat16))
if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
gemm_internal_cublaslt<at::BFloat16>(CUDABLAS_GEMM_ARGS(at::BFloat16));
}
-#if defined(USE_ROCM) && !defined(_MSC_VER)
+#if defined(USE_ROCM) && defined(USE_ROCM_CK)
else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
at::native::gemm_internal_ck<at::BFloat16>(CUDABLAS_GEMM_ARGS(at::BFloat16));
}
@@ -1382,7 +1382,7 @@ void gemm_internal<at::Half, float>(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(at::Half,
if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
gemm_internal_cublaslt<at::Half, float>(CUDABLAS_GEMM_ARGS(at::Half));
}
-#if defined(USE_ROCM) && !defined(_MSC_VER)
+#if defined(USE_ROCM) && defined(USE_ROCM_CK)
else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
TORCH_CHECK(false, "gemm input type at::Half and output type float is not supported for ROCm");
}
@@ -1398,7 +1398,7 @@ void gemm_internal<at::BFloat16, float>(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(at::B
if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
gemm_internal_cublaslt<at::BFloat16, float>(CUDABLAS_GEMM_ARGS(at::BFloat16));
}
-#if defined(USE_ROCM) && !defined(_MSC_VER)
+#if defined(USE_ROCM) && defined(USE_ROCM_CK)
else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
TORCH_CHECK(false, "gemm input type at::Half and output type float is not supported for ROCm");
}
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index a93386c27f8d..be1368999d38 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -1031,6 +1031,9 @@ if(USE_ROCM)
if(HIPBLASLT_VEC_EXT)
list(APPEND HIP_CXX_FLAGS -DHIPBLASLT_VEC_EXT)
endif()
+ if(USE_ROCM_CK)
+ list(APPEND HIP_CXX_FLAGS -DUSE_ROCM_CK)
+ endif()
list(APPEND HIP_HIPCC_FLAGS --offload-compress)
if(WIN32)
add_definitions(-DROCM_ON_WINDOWS)
--
2.49.0

View file

@ -1,359 +0,0 @@
From f2a544b2e3a5bdc04985f6e06223c0c1700120a0 Mon Sep 17 00:00:00 2001
From: albanD <desmaison.alban@gmail.com>
Date: Sat, 12 Jul 2025 03:42:33 -0400
Subject: [PATCH] Fix compilation and "import torch" issues for cpython 3.14
Imported from
https://github.com/albanD/pytorch/tree/cpython314_build
commit 88bb9cdb72449f4277829e20d94ad8aec1894216
Signed-off-by: Tom Rix <Tom.Rix@amd.com>
---
torch/_dynamo/bytecode_analysis.py | 2 +-
torch/ao/quantization/__init__.py | 5 +++-
torch/ao/quantization/qconfig.py | 4 ++-
torch/ao/quantization/utils.py | 7 +++--
torch/csrc/dynamo/cpython_defs.c | 16 +++++++++++
torch/csrc/dynamo/cpython_includes.h | 17 ++++++++++++
torch/csrc/dynamo/eval_frame.c | 34 +++++++++++++++--------
torch/csrc/dynamo/framelocals_mapping.cpp | 14 ++++++++++
torch/csrc/utils/python_compat.h | 1 +
torch/onnx/__init__.py | 1 -
torch/utils/weak.py | 29 +++++++++++++++++--
11 files changed, 111 insertions(+), 19 deletions(-)
diff --git a/torch/_dynamo/bytecode_analysis.py b/torch/_dynamo/bytecode_analysis.py
index 3252ea91409f..2de74ee5bf8d 100644
--- a/torch/_dynamo/bytecode_analysis.py
+++ b/torch/_dynamo/bytecode_analysis.py
@@ -33,7 +33,7 @@ if sys.version_info >= (3, 11):
TERMINAL_OPCODES.add(dis.opmap["JUMP_FORWARD"])
else:
TERMINAL_OPCODES.add(dis.opmap["JUMP_ABSOLUTE"])
-if sys.version_info >= (3, 12):
+if (3, 12) <= sys.version_info < (3, 14):
TERMINAL_OPCODES.add(dis.opmap["RETURN_CONST"])
if sys.version_info >= (3, 13):
TERMINAL_OPCODES.add(dis.opmap["JUMP_BACKWARD_NO_INTERRUPT"])
diff --git a/torch/ao/quantization/__init__.py b/torch/ao/quantization/__init__.py
index ffc1792fd23f..cf5a8b99a894 100644
--- a/torch/ao/quantization/__init__.py
+++ b/torch/ao/quantization/__init__.py
@@ -1,5 +1,6 @@
# mypy: allow-untyped-defs
+import sys
from typing import Callable, Optional, Union
import torch
@@ -33,7 +34,9 @@ from .stubs import * # noqa: F403
# ensure __module__ is set correctly for public APIs
ObserverOrFakeQuantize = Union[ObserverBase, FakeQuantizeBase]
-ObserverOrFakeQuantize.__module__ = "torch.ao.quantization"
+if sys.version_info < (3, 14):
+ ObserverOrFakeQuantize.__module__ = "torch.ao.quantization"
+
for _f in [
compare_results,
extract_results_from_loggers,
diff --git a/torch/ao/quantization/qconfig.py b/torch/ao/quantization/qconfig.py
index efee5302ad42..d9a8fc78bab4 100644
--- a/torch/ao/quantization/qconfig.py
+++ b/torch/ao/quantization/qconfig.py
@@ -1,5 +1,6 @@
# mypy: allow-untyped-defs
import copy
+import sys
import warnings
from collections import namedtuple
from typing import Any, Optional, Union
@@ -568,7 +569,8 @@ def _assert_valid_qconfig(qconfig: Optional[QConfig], mod: torch.nn.Module) -> N
QConfigAny = Optional[QConfig]
-QConfigAny.__module__ = "torch.ao.quantization.qconfig"
+if sys.version_info < (3, 14):
+ QConfigAny.__module__ = "torch.ao.quantization.qconfig"
def _add_module_to_qconfig_obs_ctr(
diff --git a/torch/ao/quantization/utils.py b/torch/ao/quantization/utils.py
index 4ac3112ec072..3b1503e01701 100644
--- a/torch/ao/quantization/utils.py
+++ b/torch/ao/quantization/utils.py
@@ -4,6 +4,7 @@ Utils shared by different modes of quantization (eager/graph)
"""
import functools
+import sys
import warnings
from collections import OrderedDict
from inspect import getfullargspec, signature
@@ -16,7 +17,8 @@ from torch.nn.utils.parametrize import is_parametrized
NodePattern = Union[tuple[Node, Node], tuple[Node, tuple[Node, Node]], Any]
-NodePattern.__module__ = "torch.ao.quantization.utils"
+if sys.version_info < (3, 14):
+ NodePattern.__module__ = "torch.ao.quantization.utils"
# This is the Quantizer class instance from torch/quantization/fx/quantize.py.
# Define separately to prevent circular imports.
@@ -31,7 +33,8 @@ QuantizerCls = Any
Pattern = Union[
Callable, tuple[Callable, Callable], tuple[Callable, tuple[Callable, Callable]], Any
]
-Pattern.__module__ = "torch.ao.quantization.utils"
+if sys.version_info < (3, 14):
+ Pattern.__module__ = "torch.ao.quantization.utils"
# TODO: maybe rename this to MatchInputNode
diff --git a/torch/csrc/dynamo/cpython_defs.c b/torch/csrc/dynamo/cpython_defs.c
index b68ef894aeaa..244d4165d5e8 100644
--- a/torch/csrc/dynamo/cpython_defs.c
+++ b/torch/csrc/dynamo/cpython_defs.c
@@ -2,6 +2,20 @@
#include <torch/csrc/dynamo/cpython_includes.h>
#include <torch/csrc/dynamo/debug_macros.h>
+#if IS_PYTHON_3_14_PLUS
+
+const uint8_t* THP_PyOpcode_Caches = NULL;
+const int THP_PyOpcode_Caches_size = 0;
+
+void
+THP_PyThreadState_PopFrame(PyThreadState *tstate, _PyInterpreterFrame * frame)
+{}
+void
+THP_PyFrame_Clear(_PyInterpreterFrame *frame)
+{}
+
+#else
+
#if IS_PYTHON_3_11_PLUS
#define Py_BUILD_CORE
@@ -360,3 +374,5 @@ const uint8_t* THP_PyOpcode_Caches = NULL;
const int THP_PyOpcode_Caches_size = 0;
#endif
+
+#endif // IS_PYTHON_3_14_PLUS
\ No newline at end of file
diff --git a/torch/csrc/dynamo/cpython_includes.h b/torch/csrc/dynamo/cpython_includes.h
index 6b99c1d5aec8..616be16563cf 100644
--- a/torch/csrc/dynamo/cpython_includes.h
+++ b/torch/csrc/dynamo/cpython_includes.h
@@ -21,6 +21,14 @@
#if IS_PYTHON_3_11_PLUS
#include <internal/pycore_frame.h>
+#if IS_PYTHON_3_14_PLUS
+#include <internal/pycore_interpframe_structs.h>
+#include <internal/pycore_stackref.h>
+#endif
+#endif
+
+#if IS_PYTHON_3_14_PLUS
+#include <internal/pycore_code.h>
#endif
#undef Py_BUILD_CORE
@@ -30,6 +38,13 @@
extern "C" {
#endif
+#if IS_PYTHON_3_14_PLUS
+
+#define F_CODE(x) (PyCodeObject*)PyStackRef_AsPyObjectBorrow(x->f_executable)
+#define PREV_INSTR(x) (x)->instr_ptr
+
+#else
+
#if IS_PYTHON_3_13_PLUS
#define F_CODE(x) ((PyCodeObject*)(x)->f_executable)
#define PREV_INSTR(x) (x)->instr_ptr
@@ -38,6 +53,8 @@ extern "C" {
#define PREV_INSTR(x) (x)->prev_instr
#endif
+#endif // IS_PYTHON_3_14_PLUS
+
#if IS_PYTHON_3_12_PLUS
#define FUNC(x) ((x)->f_funcobj)
#else
diff --git a/torch/csrc/dynamo/eval_frame.c b/torch/csrc/dynamo/eval_frame.c
index f413782b2d30..72bb8839bac3 100644
--- a/torch/csrc/dynamo/eval_frame.c
+++ b/torch/csrc/dynamo/eval_frame.c
@@ -224,17 +224,6 @@ const char* get_frame_name(THP_EVAL_API_FRAME_OBJECT* frame) {
return PyUnicode_AsUTF8(F_CODE(frame)->co_name);
}
-void clear_old_frame_if_python_312_plus(
- PyThreadState* tstate,
- THP_EVAL_API_FRAME_OBJECT* frame) {
-#if IS_PYTHON_3_12_PLUS
-
- THP_PyFrame_Clear(frame);
- THP_PyThreadState_PopFrame(tstate, frame);
-
-#endif
-}
-
static PyObject* dynamo_eval_custom_code_impl(
PyThreadState* tstate,
THP_EVAL_API_FRAME_OBJECT* frame,
@@ -485,6 +474,18 @@ static PyObject* dynamo__custom_eval_frame_shim(
static void enable_eval_frame_shim(PyThreadState* tstate) {}
static void enable_eval_frame_default(PyThreadState* tstate) {}
+PyObject* dynamo_eval_custom_code(
+ PyThreadState* tstate,
+ THP_EVAL_API_FRAME_OBJECT* frame,
+ PyCodeObject* code,
+ const char* trace_annotation,
+ int throw_flag) {}
+THPPyInterpreterFrame* THPPyInterpreterFrame_New(
+ THP_EVAL_API_FRAME_OBJECT* frame) {}
+PyObject* dynamo_eval_frame_default(
+ PyThreadState* tstate,
+ THP_EVAL_API_FRAME_OBJECT* frame,
+ int throw_flag) {}
static struct PyGetSetDef THPPyInterpreterFrame_properties[] = {NULL};
@@ -498,6 +499,17 @@ static PyTypeObject THPPyInterpreterFrameType = {
#endif // !(IS_PYTHON_3_14_PLUS)
+void clear_old_frame_if_python_312_plus(
+ PyThreadState* tstate,
+ THP_EVAL_API_FRAME_OBJECT* frame) {
+#if IS_PYTHON_3_12_PLUS
+
+ THP_PyFrame_Clear(frame);
+ THP_PyThreadState_PopFrame(tstate, frame);
+
+#endif
+}
+
static PyObject* increment_working_threads(
PyThreadState* tstate,
PyObject* module) {
diff --git a/torch/csrc/dynamo/framelocals_mapping.cpp b/torch/csrc/dynamo/framelocals_mapping.cpp
index b839fb26fc91..c4ee36d87767 100644
--- a/torch/csrc/dynamo/framelocals_mapping.cpp
+++ b/torch/csrc/dynamo/framelocals_mapping.cpp
@@ -26,9 +26,13 @@ FrameLocalsMapping::FrameLocalsMapping(FrameLocalsFrameType* frame)
PyCodeObject* co = F_CODE(frame);
_framelocals.resize(co->co_nlocalsplus, nullptr);
+#if IS_PYTHON_3_14_PLUS
+ TORCH_CHECK(false, "Python 3.14+ not supported");
+#else
if (!frame->stacktop) {
return;
}
+#endif
auto update_framelocals = [&](int i, PyObject* value) {
_PyLocals_Kind kind = _PyLocals_GetKind(co->co_localspluskinds, i);
@@ -53,11 +57,21 @@ FrameLocalsMapping::FrameLocalsMapping(FrameLocalsFrameType* frame)
};
auto offset = co->co_nlocalsplus - co->co_nfreevars;
+#if IS_PYTHON_3_14_PLUS
+ TORCH_CHECK(false, "Python 3.14+ not supported");
+#else
for (int i = 0; i < offset; i++) {
update_framelocals(i, frame->localsplus[i]);
}
+#endif
+
// Get references to closure variables
+#if IS_PYTHON_3_14_PLUS
+ PyObject* closure;
+ TORCH_CHECK(false, "Python 3.14+ not supported");
+#else
PyObject* closure = ((PyFunctionObject*)FUNC(frame))->func_closure;
+#endif
for (int i = 0; i < co->co_nfreevars; i++) {
update_framelocals(offset + i, PyTuple_GET_ITEM(closure, i));
}
diff --git a/torch/csrc/utils/python_compat.h b/torch/csrc/utils/python_compat.h
index a1537611cc47..16292e4fd030 100644
--- a/torch/csrc/utils/python_compat.h
+++ b/torch/csrc/utils/python_compat.h
@@ -13,6 +13,7 @@ extern "C" {
#define IS_PYTHON_3_12_PLUS PY_VERSION_HEX >= 0x030C0000
#define IS_PYTHON_3_13_PLUS PY_VERSION_HEX >= 0x030D0000
#define IS_PYTHON_3_14_PLUS PY_VERSION_HEX >= 0x030E0000
+#define IS_PYTHON_3_15_PLUS PY_VERSION_HEX >= 0x030F0000
static inline int PyCode_GetNCellvars(PyCodeObject* code) {
// gh-26364 added co_ncellvars to Python 3.11.0rc1
diff --git a/torch/onnx/__init__.py b/torch/onnx/__init__.py
index 345ffd2a065b..ceeadde5365b 100644
--- a/torch/onnx/__init__.py
+++ b/torch/onnx/__init__.py
@@ -104,7 +104,6 @@ ONNXProgram.__module__ = "torch.onnx"
OnnxExporterError.__module__ = "torch.onnx"
_OrtBackend.__module__ = "torch.onnx"
_OrtBackendOptions.__module__ = "torch.onnx"
-_OrtExecutionProvider.__module__ = "torch.onnx"
enable_fake_mode.__module__ = "torch.onnx"
is_onnxrt_backend_supported.__module__ = "torch.onnx"
diff --git a/torch/utils/weak.py b/torch/utils/weak.py
index 8bf2ba5ed02b..9c7218cb2ad3 100644
--- a/torch/utils/weak.py
+++ b/torch/utils/weak.py
@@ -3,8 +3,6 @@ from __future__ import annotations
import collections.abc as _collections_abc
import weakref
-
-from _weakrefset import _IterationGuard # type: ignore[attr-defined]
from collections.abc import Mapping, MutableMapping
from weakref import ref
@@ -22,6 +20,33 @@ __all__ = [
]
+# TODO: make weakref properly thread safe following
+# https://github.com/python/cpython/pull/125325
+class _IterationGuard:
+ # This context manager registers itself in the current iterators of the
+ # weak container, such as to delay all removals until the context manager
+ # exits.
+ # This technique should be relatively thread-safe (since sets are).
+
+ def __init__(self, weakcontainer):
+ # Don't create cycles
+ self.weakcontainer = ref(weakcontainer)
+
+ def __enter__(self):
+ w = self.weakcontainer()
+ if w is not None:
+ w._iterating.add(self)
+ return self
+
+ def __exit__(self, e, t, b):
+ w = self.weakcontainer()
+ if w is not None:
+ s = w._iterating
+ s.remove(self)
+ if not s:
+ w._commit_removals()
+
+
# This file defines a variant of WeakKeyDictionary that overrides the hashing
# behavior of the key to use object identity, rather than the builtin
# __eq__/__hash__ functions. This is useful for Tensor weak keys, as their
--
2.49.0

View file

@ -0,0 +1,262 @@
From d77e05d90df006322cda021f1a8affdcc2c7eaef Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Fri, 23 Feb 2024 08:27:30 -0500
Subject: [PATCH] Optionally use hipblaslt
The hipblaslt package is not available on Fedora.
Instead of requiring the package, make it optional.
If it is found, define the preprocessor variable HIPBLASLT
Convert the checks for ROCM_VERSION >= 507000 to HIPBLASLT checks
Signed-off-by: Tom Rix <trix@redhat.com>
---
aten/src/ATen/cuda/CUDABlas.cpp | 7 ++++---
aten/src/ATen/cuda/CUDABlas.h | 2 +-
aten/src/ATen/cuda/CUDAContextLight.h | 4 ++--
aten/src/ATen/cuda/CublasHandlePool.cpp | 4 ++--
aten/src/ATen/cuda/tunable/TunableGemm.h | 6 +++---
aten/src/ATen/native/cuda/Blas.cpp | 14 ++++++++------
cmake/Dependencies.cmake | 3 +++
cmake/public/LoadHIP.cmake | 4 ++--
8 files changed, 25 insertions(+), 19 deletions(-)
diff --git a/aten/src/ATen/cuda/CUDABlas.cpp b/aten/src/ATen/cuda/CUDABlas.cpp
index d534ec5a178..e815463f630 100644
--- a/aten/src/ATen/cuda/CUDABlas.cpp
+++ b/aten/src/ATen/cuda/CUDABlas.cpp
@@ -14,7 +14,7 @@
#include <c10/util/irange.h>
#ifdef USE_ROCM
-#if ROCM_VERSION >= 60000
+#ifdef HIPBLASLT
#include <hipblaslt/hipblaslt-ext.hpp>
#endif
// until hipblas has an API to accept flags, we must use rocblas here
@@ -781,7 +781,7 @@ void gemm<at::BFloat16>(CUDABLAS_GEMM_ARGTYPES(at::BFloat16)) {
}
}
-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
#if defined(USE_ROCM) && ROCM_VERSION >= 50700 && ROCM_VERSION < 60000
// only for rocm 5.7 where we first supported hipblaslt, it was difficult
@@ -912,6 +912,7 @@ class CuBlasLtMatmulPreference : public CuBlasLtDescriptor<
};
} // namespace
+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
template <typename Dtype>
void gemm_and_bias(
bool transpose_mat1,
@@ -1124,7 +1125,7 @@ template void gemm_and_bias(
at::BFloat16* result_ptr,
int64_t result_ld,
GEMMAndBiasActivationEpilogue activation);
-
+#endif
void scaled_gemm(
char transa,
char transb,
diff --git a/aten/src/ATen/cuda/CUDABlas.h b/aten/src/ATen/cuda/CUDABlas.h
index eb12bb350c5..068607467dd 100644
--- a/aten/src/ATen/cuda/CUDABlas.h
+++ b/aten/src/ATen/cuda/CUDABlas.h
@@ -82,7 +82,7 @@ void gemm_internal<at::Half>(CUDABLAS_GEMM_ARGTYPES(at::Half));
template <>
void gemm_internal<at::BFloat16>(CUDABLAS_GEMM_ARGTYPES(at::BFloat16));
-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
enum GEMMAndBiasActivationEpilogue {
None,
RELU,
diff --git a/aten/src/ATen/cuda/CUDAContextLight.h b/aten/src/ATen/cuda/CUDAContextLight.h
index 4ec35f59a21..e28dc42034f 100644
--- a/aten/src/ATen/cuda/CUDAContextLight.h
+++ b/aten/src/ATen/cuda/CUDAContextLight.h
@@ -9,7 +9,7 @@
// cublasLT was introduced in CUDA 10.1 but we enable only for 11.1 that also
// added bf16 support
-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
#include <cublasLt.h>
#endif
@@ -82,7 +82,7 @@ TORCH_CUDA_CPP_API c10::Allocator* getCUDADeviceAllocator();
/* Handles */
TORCH_CUDA_CPP_API cusparseHandle_t getCurrentCUDASparseHandle();
TORCH_CUDA_CPP_API cublasHandle_t getCurrentCUDABlasHandle();
-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
TORCH_CUDA_CPP_API cublasLtHandle_t getCurrentCUDABlasLtHandle();
#endif
diff --git a/aten/src/ATen/cuda/CublasHandlePool.cpp b/aten/src/ATen/cuda/CublasHandlePool.cpp
index 6913d2cd95e..3d4276be372 100644
--- a/aten/src/ATen/cuda/CublasHandlePool.cpp
+++ b/aten/src/ATen/cuda/CublasHandlePool.cpp
@@ -29,7 +29,7 @@ namespace at::cuda {
namespace {
-#if defined(USE_ROCM) && ROCM_VERSION >= 50700
+#if defined(USE_ROCM) && defined(HIPBLASLT)
void createCublasLtHandle(cublasLtHandle_t *handle) {
TORCH_CUDABLAS_CHECK(cublasLtCreate(handle));
}
@@ -190,7 +190,7 @@ cublasHandle_t getCurrentCUDABlasHandle() {
return handle;
}
-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
cublasLtHandle_t getCurrentCUDABlasLtHandle() {
#ifdef USE_ROCM
c10::DeviceIndex device = 0;
diff --git a/aten/src/ATen/cuda/tunable/TunableGemm.h b/aten/src/ATen/cuda/tunable/TunableGemm.h
index 3ba0d761277..dde1870cfbf 100644
--- a/aten/src/ATen/cuda/tunable/TunableGemm.h
+++ b/aten/src/ATen/cuda/tunable/TunableGemm.h
@@ -11,7 +11,7 @@
#include <ATen/cuda/tunable/GemmCommon.h>
#ifdef USE_ROCM
-#if ROCM_VERSION >= 50700
+#ifdef HIPBLASLT
#include <ATen/cuda/tunable/GemmHipblaslt.h>
#endif
#include <ATen/cuda/tunable/GemmRocblas.h>
@@ -166,7 +166,7 @@ class GemmTunableOp : public TunableOp<GemmParams<T>, StreamTimer> {
}
#endif
-#if defined(USE_ROCM) && ROCM_VERSION >= 50700
+#if defined(USE_ROCM) && defined(HIPBLASLT)
static const char *env = std::getenv("PYTORCH_TUNABLEOP_HIPBLASLT_ENABLED");
if (env == nullptr || strcmp(env, "1") == 0) {
// disallow tuning of hipblaslt with c10::complex
@@ -240,7 +240,7 @@ class GemmStridedBatchedTunableOp : public TunableOp<GemmStridedBatchedParams<T>
}
#endif
-#if defined(USE_ROCM) && ROCM_VERSION >= 50700
+#if defined(USE_ROCM) && defined(HIPBLASLT)
static const char *env = std::getenv("PYTORCH_TUNABLEOP_HIPBLASLT_ENABLED");
if (env == nullptr || strcmp(env, "1") == 0) {
// disallow tuning of hipblaslt with c10::complex
diff --git a/aten/src/ATen/native/cuda/Blas.cpp b/aten/src/ATen/native/cuda/Blas.cpp
index 29e5c5e3cf1..df56f3d7f1d 100644
--- a/aten/src/ATen/native/cuda/Blas.cpp
+++ b/aten/src/ATen/native/cuda/Blas.cpp
@@ -155,7 +155,7 @@ enum class Activation {
GELU,
};
-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
cuda::blas::GEMMAndBiasActivationEpilogue activation_to_gemm_and_blas_arg(Activation a) {
switch (a) {
case Activation::None:
@@ -193,6 +193,7 @@ static bool getDisableAddmmCudaLt() {
#ifdef USE_ROCM
static bool isSupportedHipLtROCmArch(int index) {
+#if defined(HIPBLASLT)
hipDeviceProp_t* prop = at::cuda::getDeviceProperties(index);
std::string device_arch = prop->gcnArchName;
static const std::vector<std::string> archs = {"gfx90a", "gfx940", "gfx941", "gfx942"};
@@ -203,6 +204,7 @@ static bool isSupportedHipLtROCmArch(int index) {
}
}
TORCH_CHECK(false, "Attempting to use hipBLASLt on a unsupported architecture!");
+#endif
return false;
}
#endif
@@ -228,7 +230,7 @@ Tensor& addmm_out_cuda_impl(Tensor& result, const Tensor& self, const Tensor& ma
at::ScalarType scalar_type = self.scalar_type();
c10::MaybeOwned<Tensor> self_;
if (&result != &self) {
-#if (defined(CUDA_VERSION) && CUDA_VERSION >= 11040 && !defined(_MSC_VER)) || defined(USE_ROCM) && ROCM_VERSION >= 50700
+#if (defined(CUDA_VERSION) && CUDA_VERSION >= 11040 && !defined(_MSC_VER)) || defined(USE_ROCM) && defined(HIPBLASLT)
// Strangely, if mat2 has only 1 row or column, we get
// CUBLAS_STATUS_INVALID_VALUE error from cublasLtMatmulAlgoGetHeuristic.
// self.dim() == 1 && result.dim() == 2 && self.sizes()[0] == mat2_sizes[1]
@@ -271,7 +273,7 @@ Tensor& addmm_out_cuda_impl(Tensor& result, const Tensor& self, const Tensor& ma
}
self__sizes = self_->sizes();
} else {
-#if defined(USE_ROCM) && ROCM_VERSION >= 50700
+#if defined(USE_ROCM) && defined(HIPBLASLT)
useLtInterface = !disable_addmm_cuda_lt &&
result.dim() == 2 && result.is_contiguous() &&
isSupportedHipLtROCmArch(self.device().index()) &&
@@ -322,7 +324,7 @@ Tensor& addmm_out_cuda_impl(Tensor& result, const Tensor& self, const Tensor& ma
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(!args.result->is_conj());
-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
if (useLtInterface) {
AT_DISPATCH_FLOATING_TYPES_AND2(
at::ScalarType::Half,
@@ -876,7 +878,7 @@ _scaled_mm_out_cuda(const Tensor& mat1, const Tensor& mat2,
at::native::resize_output(out, {mat1_sizes[0], mat2_sizes[1]});
at::native::resize_output(amax, {});
-#if !defined(USE_ROCM) && !defined(_MSC_VER) || (defined(USE_ROCM) && ROCM_VERSION >= 60000)
+#if !defined(USE_ROCM) && !defined(_MSC_VER) || (defined(USE_ROCM) && defined(HIPBLASLT))
cublasCommonArgs args(mat1, mat2, out);
const auto out_dtype_ = args.result->scalar_type();
TORCH_CHECK(args.transa == 't' && args.transb == 'n', "Only multiplication of row-major and column-major matrices is supported by cuBLASLt");
@@ -906,7 +908,7 @@ _scaled_mm_out_cuda(const Tensor& mat1, const Tensor& mat2,
TORCH_CHECK(false, "_scaled_mm_out_cuda is not compiled for this platform.");
#endif
-#if defined(USE_ROCM) && ROCM_VERSION >= 60000
+#if defined(USE_ROCM) && defined(HIPBLASLT)
// rocm's hipblaslt does not yet support amax, so calculate separately
auto out_float32 = out.to(kFloat);
out_float32.abs_();
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index b7ffbeb07dc..2b6c3678984 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -1273,6 +1273,9 @@ if(USE_ROCM)
if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "6.0.0")
list(APPEND HIP_CXX_FLAGS -DHIPBLAS_V2)
endif()
+ if(hipblast_FOUND)
+ list(APPEND HIP_CXX_FLAGS -DHIPBLASLT)
+ endif()
if(HIPBLASLT_CUSTOM_DATA_TYPE)
list(APPEND HIP_CXX_FLAGS -DHIPBLASLT_CUSTOM_DATA_TYPE)
endif()
diff --git a/cmake/public/LoadHIP.cmake b/cmake/public/LoadHIP.cmake
index f6ca263c5e5..53eb0b63c1a 100644
--- a/cmake/public/LoadHIP.cmake
+++ b/cmake/public/LoadHIP.cmake
@@ -156,7 +156,7 @@ if(HIP_FOUND)
find_package_and_print_version(rocblas REQUIRED)
find_package_and_print_version(hipblas REQUIRED)
if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "5.7.0")
- find_package_and_print_version(hipblaslt REQUIRED)
+ find_package_and_print_version(hipblaslt)
endif()
find_package_and_print_version(miopen REQUIRED)
if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "4.1.0")
@@ -191,7 +191,7 @@ if(HIP_FOUND)
# roctx is part of roctracer
find_library(ROCM_ROCTX_LIB roctx64 HINTS ${ROCM_PATH}/lib)
- if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "5.7.0")
+ if(hipblastlt_FOUND)
# check whether hipblaslt is using its own datatype
set(file "${PROJECT_BINARY_DIR}/hipblaslt_test_data_type.cc")
file(WRITE ${file} ""
--
2.43.2

View file

@ -0,0 +1,115 @@
From ee3fb343a376cdba6f4ce188cac90023f13e2aea Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Thu, 4 Apr 2024 14:21:38 -0600
Subject: [PATCH] Reenable dim for python 3.12
In 3.12:
_PyArg_Parser added an element to the start of the structure.
So existing positional initialization is off. Switch to element
initialization.
_Py_CODEUNIT changed to from an int to a union, but relevant_op
is passed an int for the return of decoder.opcode, so the parameter
type is wrong, switch it to int.
The opcode PRECALL was removed, so reduce its handling to 3.11
Signed-off-by: Tom Rix <trix@redhat.com>
---
functorch/csrc/dim/dim.cpp | 24 +++++-------------------
functorch/csrc/dim/minpybind.h | 4 ++--
2 files changed, 7 insertions(+), 21 deletions(-)
diff --git a/functorch/csrc/dim/dim.cpp b/functorch/csrc/dim/dim.cpp
index 4cc027504c77..e48b0d58081f 100644
--- a/functorch/csrc/dim/dim.cpp
+++ b/functorch/csrc/dim/dim.cpp
@@ -6,20 +6,6 @@
#include <torch/csrc/utils/python_compat.h>
-
-// Many APIs have changed/don't exist anymore
-#if IS_PYTHON_3_12_PLUS
-
-#include "dim.h"
-
-// Re-enable this some day
-PyObject* Dim_init() {
- PyErr_SetString(PyExc_RuntimeError, "First class dim doesn't work with python 3.12");
- return nullptr;
-}
-
-#else
-
#include "minpybind.h"
#include <frameobject.h>
#include <opcode.h>
@@ -441,7 +427,7 @@ static PyObject* DimList_bind(DimList *self,
PY_BEGIN
mpy::handle sizes;
static const char * const _keywords[] = {"sizes", nullptr};
- static _PyArg_Parser parser = {"O", _keywords, 0};
+ static _PyArg_Parser parser = { .format = "O", .keywords = _keywords};
if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &parser, &sizes)) {
return nullptr;
}
@@ -465,7 +451,7 @@ static PyObject* DimList_bind_len(DimList *self,
PY_BEGIN
int size;
static const char * const _keywords[] = {"N", nullptr};
- static _PyArg_Parser parser = {"i", _keywords, 0};
+ static _PyArg_Parser parser = { .format = "i", .keywords = _keywords};
if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &parser, &size)) {
return nullptr;
}
@@ -1468,7 +1454,7 @@ PyTypeObject Tensor::Type = {
// dim() --------------------
-static bool relevant_op(_Py_CODEUNIT c) {
+static bool relevant_op(int c) {
switch(c) {
case STORE_NAME:
case STORE_GLOBAL:
@@ -1587,7 +1573,7 @@ static PyObject* _dims(PyObject *self,
auto c = mpy::obj<PyCodeObject>::steal(PyFrame_GetCode(f.ptr()));
auto lasti = PyFrame_GetLasti(f.ptr());
auto decoder = PyInstDecoder(c.ptr(), lasti);
- #if IS_PYTHON_3_11_PLUS
+ #if IS_PYTHON_3_11
// When py3.11 adapts bytecode lasti points to the precall
// rather than the call instruction after it
if (decoder.opcode() == PRECALL) {
@@ -3268,4 +3254,4 @@ PyObject* Dim_init() {
}
}
-#endif
+
diff --git a/functorch/csrc/dim/minpybind.h b/functorch/csrc/dim/minpybind.h
index de82b5af95a4..d76d4828bf80 100644
--- a/functorch/csrc/dim/minpybind.h
+++ b/functorch/csrc/dim/minpybind.h
@@ -621,7 +621,7 @@ struct vector_args {
PyObject *dummy = NULL;
_PyArg_ParseStackAndKeywords((PyObject*const*)args, nargs, kwnames.ptr(), _parser, &dummy, &dummy, &dummy, &dummy, &dummy);
#else
- _PyArg_Parser* _parser = new _PyArg_Parser{NULL, &names_buf[0], fname_cstr, 0};
+ _PyArg_Parser* _parser = new _PyArg_Parser{ .keywords = &names_buf[0], .fname = fname_cstr};
std::unique_ptr<PyObject*[]> buf(new PyObject*[names.size()]);
_PyArg_UnpackKeywords((PyObject*const*)args, nargs, NULL, kwnames.ptr(), _parser, required, (Py_ssize_t)values.size() - kwonly, 0, &buf[0]);
#endif
@@ -706,7 +706,7 @@ inline object handle::call_vector(vector_args args) {
#define MPY_PARSE_ARGS_KWNAMES(fmt, FORALL_ARGS) \
static const char * const kwlist[] = { FORALL_ARGS(MPY_ARGS_NAME) nullptr}; \
FORALL_ARGS(MPY_ARGS_DECLARE) \
- static _PyArg_Parser parser = {fmt, kwlist, 0}; \
+ static _PyArg_Parser parser = { .format = fmt, .keywords = kwlist}; \
if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &parser, FORALL_ARGS(MPY_ARGS_POINTER) nullptr)) { \
throw mpy::exception_set(); \
}
--
2.44.0

View file

@ -0,0 +1,39 @@
From 5b8e51b24513fa851eeff42f23d942bde301e321 Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Fri, 29 Sep 2023 06:19:29 -0700
Subject: [PATCH] Regenerate flatbuffer header
For this error
torch/csrc/jit/serialization/mobile_bytecode_generated.h:12:41:
error: static assertion failed: Non-compatible flatbuffers version included
12 | FLATBUFFERS_VERSION_MINOR == 3 &&
PyTorch is expecting 23.3.3, what f38 has
Rawhide is at 23.5.26
Regenerate with
flatc --cpp --gen-mutable --no-prefix --scoped-enums mobile_bytecode.fbs
Signed-off-by: Tom Rix <trix@redhat.com>
---
torch/csrc/jit/serialization/mobile_bytecode_generated.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/torch/csrc/jit/serialization/mobile_bytecode_generated.h b/torch/csrc/jit/serialization/mobile_bytecode_generated.h
index cffe8bc7a6..83575e4c19 100644
--- a/torch/csrc/jit/serialization/mobile_bytecode_generated.h
+++ b/torch/csrc/jit/serialization/mobile_bytecode_generated.h
@@ -9,8 +9,8 @@
// Ensure the included flatbuffers.h is the same version as when this file was
// generated, otherwise it may not be compatible.
static_assert(FLATBUFFERS_VERSION_MAJOR == 23 &&
- FLATBUFFERS_VERSION_MINOR == 3 &&
- FLATBUFFERS_VERSION_REVISION == 3,
+ FLATBUFFERS_VERSION_MINOR == 5 &&
+ FLATBUFFERS_VERSION_REVISION == 26,
"Non-compatible flatbuffers version included");
namespace torch {
--
2.43.0

View file

@ -0,0 +1,73 @@
From 3ef82b814179da571b2478f61d4279717ab0b23a Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Fri, 29 Sep 2023 06:25:23 -0700
Subject: [PATCH] Stub in kineto ActivityType
There is an error with kineto is not used, the shim still
requires the ActivityTYpe.h header to get the enum Activity type.
So cut-n-paste just enough of the header in to do this.
Signed-off-by: Tom Rix <trix@redhat.com>
---
torch/csrc/profiler/kineto_shim.h | 44 +++++++++++++++++++++++++++++++
1 file changed, 44 insertions(+)
diff --git a/torch/csrc/profiler/kineto_shim.h b/torch/csrc/profiler/kineto_shim.h
index e92cbf003d..68985ab7d0 100644
--- a/torch/csrc/profiler/kineto_shim.h
+++ b/torch/csrc/profiler/kineto_shim.h
@@ -12,7 +12,51 @@
#undef USE_KINETO
#endif
+#ifdef USE_KINETO
#include <ActivityType.h>
+#else
+namespace libkineto {
+// copied from header
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// Note : All activity types are not enabled by default. Please add them
+// at correct position in the enum
+enum class ActivityType {
+ // Activity types enabled by default
+ CPU_OP = 0, // cpu side ops
+ USER_ANNOTATION,
+ GPU_USER_ANNOTATION,
+ GPU_MEMCPY,
+ GPU_MEMSET,
+ CONCURRENT_KERNEL, // on-device kernels
+ EXTERNAL_CORRELATION,
+ CUDA_RUNTIME, // host side cuda runtime events
+ CUDA_DRIVER, // host side cuda driver events
+ CPU_INSTANT_EVENT, // host side point-like events
+ PYTHON_FUNCTION,
+ OVERHEAD, // CUPTI induced overhead events sampled from its overhead API.
+
+ // Optional Activity types
+ CUDA_SYNC, // synchronization events between runtime and kernels
+ GLOW_RUNTIME, // host side glow runtime events
+ MTIA_RUNTIME, // host side MTIA runtime events
+ CUDA_PROFILER_RANGE, // CUPTI Profiler range for performance metrics
+ MTIA_CCP_EVENTS, // MTIA ondevice CCP events
+ HPU_OP, // HPU host side runtime event
+ XPU_RUNTIME, // host side xpu runtime events
+
+ ENUM_COUNT, // This is to add buffer and not used for any profiling logic. Add your new type before it.
+ OPTIONAL_ACTIVITY_TYPE_START = CUDA_SYNC,
+};
+}
+
+#endif
#include <torch/csrc/Export.h>
#include <torch/csrc/profiler/api.h>
--
2.43.0

View file

@ -0,0 +1,25 @@
From a5dff521691a17701b5a02ec75e84cfe1bf605f7 Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Sat, 3 Feb 2024 06:41:49 -0500
Subject: [PATCH] can not use with c files
---
cmake/Dependencies.cmake | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index 4dd8042058..5f91f3ffab 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -1269,7 +1269,7 @@ if(USE_ROCM)
list(APPEND HIP_CXX_FLAGS -Wno-duplicate-decl-specifier)
list(APPEND HIP_CXX_FLAGS -DCAFFE2_USE_MIOPEN)
list(APPEND HIP_CXX_FLAGS -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_HIP)
- list(APPEND HIP_CXX_FLAGS -std=c++17)
+# list(APPEND HIP_CXX_FLAGS -std=c++17)
if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "6.0.0")
list(APPEND HIP_CXX_FLAGS -DHIPBLAS_V2)
endif()
--
2.43.0

View file

@ -0,0 +1,42 @@
From 214dc959acc809e1959643272c344ee5335d5a69 Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Thu, 1 Feb 2024 11:29:47 -0500
Subject: [PATCH] cuda - hip signatures
---
aten/src/ATen/cuda/detail/LazyNVRTC.cpp | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/aten/src/ATen/cuda/detail/LazyNVRTC.cpp b/aten/src/ATen/cuda/detail/LazyNVRTC.cpp
index 1b85e7776e..bb6f88783a 100644
--- a/aten/src/ATen/cuda/detail/LazyNVRTC.cpp
+++ b/aten/src/ATen/cuda/detail/LazyNVRTC.cpp
@@ -134,8 +134,13 @@ nvrtcResult nvrtcCreateProgram(nvrtcProgram *prog,
const char *src,
const char *name,
int numHeaders,
+#if !defined(USE_ROCM)
const char * const *headers,
const char * const *includeNames) {
+#else
+ const char **headers,
+ const char **includeNames) {
+#endif
auto fn = reinterpret_cast<decltype(&nvrtcCreateProgram)>(getNVRTCLibrary().sym(__func__));
if (!fn)
throw std::runtime_error("Can't get nvrtcCreateProgram");
@@ -150,7 +155,11 @@ NVRTC_STUB2(nvrtcGetPTX, nvrtcProgram, char *);
NVRTC_STUB2(nvrtcGetCUBINSize, nvrtcProgram, size_t *);
NVRTC_STUB2(nvrtcGetCUBIN, nvrtcProgram, char *);
#endif
+#if !defined(USE_ROCM)
NVRTC_STUB3(nvrtcCompileProgram, nvrtcProgram, int, const char * const *);
+#else
+NVRTC_STUB3(nvrtcCompileProgram, nvrtcProgram, int, const char **);
+#endif
_STUB_1(NVRTC, nvrtcGetErrorString, const char *, nvrtcResult);
NVRTC_STUB2(nvrtcGetProgramLogSize,nvrtcProgram, size_t*);
NVRTC_STUB2(nvrtcGetProgramLog, nvrtcProgram, char *);
--
2.43.0

View file

@ -0,0 +1,46 @@
From 33d48f71db7530f00dbd8cff281b65aa8b355b2a Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Tue, 19 Mar 2024 11:32:37 -0400
Subject: [PATCH] disable use of aotriton
---
aten/src/ATen/native/transformers/cuda/sdp_utils.cpp | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp b/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp
index 96b839820efd..2d3dd0cb4b0f 100644
--- a/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp
+++ b/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp
@@ -21,9 +21,11 @@
#include <cmath>
#include <functional>
+#ifdef USE_FLASH_ATTENTION
#if USE_ROCM
#include <aotriton/flash.h>
#endif
+#endif
/**
* Note [SDPA Runtime Dispatch]
@@ -183,6 +185,7 @@ bool check_sm_version(cudaDeviceProp * dprops) {
}
bool check_flash_attention_hardware_support(sdp_params const& params, bool debug) {
+#ifdef USE_FLASH_ATTENTION
// Check that the gpu is capable of running flash attention
using sm80 = SMVersion<8, 0>;
using sm90 = SMVersion<9, 0>;
@@ -211,6 +214,9 @@ bool check_flash_attention_hardware_support(sdp_params const& params, bool debug
}
#endif
return true;
+#else
+ return false;
+#endif
}
bool check_mem_efficient_hardware_support(sdp_params const& params, bool debug) {
--
2.44.0

View file

@ -0,0 +1,226 @@
From b9d45eb1cc90696a4de76676221219e24423c709 Mon Sep 17 00:00:00 2001
From: William Wen <williamwen@meta.com>
Date: Wed, 3 Apr 2024 17:58:46 -0700
Subject: [PATCH] [dynamo, 3.12] enable dynamo on 3.12, enable most dynamo
unittests on 3.12 (#123216)
Pull Request resolved: https://github.com/pytorch/pytorch/pull/123216
Approved by: https://github.com/jansel, https://github.com/malfet
---
test/dynamo/test_autograd_function.py | 3 ++
test/dynamo/test_misc.py | 63 +++++++++++++++++++++++++
test/functorch/test_eager_transforms.py | 7 ++-
test/run_test.py | 3 --
torch/__init__.py | 5 +-
torch/_dynamo/eval_frame.py | 4 +-
torch/_dynamo/test_case.py | 8 +---
7 files changed, 74 insertions(+), 19 deletions(-)
diff --git a/test/dynamo/test_autograd_function.py b/test/dynamo/test_autograd_function.py
index d23fec607afa..bc5ebc767038 100644
--- a/test/dynamo/test_autograd_function.py
+++ b/test/dynamo/test_autograd_function.py
@@ -2,6 +2,8 @@
import copy
import math
+import sys
+import unittest
import torch
@@ -528,6 +530,7 @@ class AutogradFunctionTests(torch._dynamo.test_case.TestCase):
# I pulled all of these test cases from test_autograd.py
# In the future, we should make the Dynamo test suite actually
# run on test_autograd.py (it's disabled right now) and delete these.
+ @unittest.skipIf(sys.version_info >= (3, 12), "invalid free in 3.12+")
def test_smoke_from_test_autograd(self):
class Func(torch.autograd.Function):
@staticmethod
diff --git a/test/dynamo/test_misc.py b/test/dynamo/test_misc.py
index a73de8b1c7e9..8f54e0564e6b 100644
--- a/test/dynamo/test_misc.py
+++ b/test/dynamo/test_misc.py
@@ -9760,6 +9760,69 @@ fn
lambda mod: mod,
)
+ @xfailIfPy311
+ def test_outside_linear_module_free(self):
+ # Compared to test_linear_module_free, the linear
+ # layer is not the code object that is directly compiled.
+ def model_inp_ctr():
+ fc = torch.nn.Linear(100, 100)
+
+ class Mod(torch.nn.Module):
+ def __init__(self):
+ super().__init__()
+ self.fc_ref = fc
+
+ def forward(self, x):
+ return fc(x[0])
+
+ # return fc to keep it alive in _test_compile_model_free
+ return Mod(), (torch.randn(100, 100), fc)
+
+ self._test_compile_model_free(model_inp_ctr, lambda mod: mod.fc_ref)
+
+ @unittest.skipIf(sys.version_info >= (3, 12), "leaks in 3.12+")
+ def test_parameter_free(self):
+ def model_inp_ctr():
+ param = torch.nn.Parameter(torch.randn(100, 100))
+
+ class Mod(torch.nn.Module):
+ def __init__(self):
+ super().__init__()
+ self.param = param
+
+ def forward(self, x):
+ return self.param * x[0]
+
+ # return param to keep it alive in _test_compile_model_free
+ return Mod(), (torch.randn(100, 100), param)
+
+ self._test_compile_model_free(model_inp_ctr, lambda mod: mod.param)
+
+ def test_raises_importerror1(self):
+ @torch.compile(backend="eager")
+ def fn(x):
+ try:
+ import some_module_that_surely_does_not_exist
+
+ return
+ except ImportError:
+ pass
+ return x.sin()
+
+ x = torch.randn(8)
+ self.assertEqual(fn(x), x.sin())
+
+ def test_raises_importerror2(self):
+ @torch.compile(backend="eager")
+ def fn(x):
+ import some_module_that_surely_does_not_exist
+
+ return x + 1
+
+ x = torch.randn(8)
+ with self.assertRaises(ImportError):
+ fn(x)
+
def test_dynamo_cache_move_to_front(self):
class Mod(torch.nn.Module):
def __init__(self):
diff --git a/test/functorch/test_eager_transforms.py b/test/functorch/test_eager_transforms.py
index 09415cf8f48e..60790ec06059 100644
--- a/test/functorch/test_eager_transforms.py
+++ b/test/functorch/test_eager_transforms.py
@@ -4762,8 +4762,7 @@ class TestCompileTransforms(TestCase):
# Triton only supports GPU with SM70 or later.
@expectedFailureIf((IS_ARM64 and not IS_MACOS) or
IS_WINDOWS or
- (TEST_CUDA and not SM70OrLater) or
- (sys.version_info >= (3, 12)))
+ (TEST_CUDA and not SM70OrLater))
def test_compile_vmap_hessian(self, device):
# The model and inputs are a smaller version
# of code at benchmark repo:
@@ -4792,8 +4791,8 @@ class TestCompileTransforms(TestCase):
actual = opt_fn(params_and_buffers, x)
self.assertEqual(actual, expected)
- # torch.compile is not supported on Windows or on Python 3.12+
- @expectedFailureIf(IS_WINDOWS or (sys.version_info >= (3, 12)))
+ # torch.compile is not supported on Windows
+ @expectedFailureIf(IS_WINDOWS)
@torch._dynamo.config.patch(suppress_errors=False)
@torch._dynamo.config.patch(capture_func_transforms=True)
@skipIfTorchDynamo("Do not test torch.compile on top of torch.compile")
diff --git a/test/run_test.py b/test/run_test.py
index e86af9623042..ebb14df4167d 100755
--- a/test/run_test.py
+++ b/test/run_test.py
@@ -74,7 +74,6 @@ sys.path.remove(str(REPO_ROOT))
RERUN_DISABLED_TESTS = os.getenv("PYTORCH_TEST_RERUN_DISABLED_TESTS", "0") == "1"
DISTRIBUTED_TEST_PREFIX = "distributed"
INDUCTOR_TEST_PREFIX = "inductor"
-DYNAMO_TEST_PREFIX = "dynamo"
# Note [ROCm parallel CI testing]
@@ -324,7 +323,6 @@ JIT_EXECUTOR_TESTS = [
]
INDUCTOR_TESTS = [test for test in TESTS if test.startswith(INDUCTOR_TEST_PREFIX)]
-DYNAMO_TESTS = [test for test in TESTS if test.startswith(DYNAMO_TEST_PREFIX)]
DISTRIBUTED_TESTS = [test for test in TESTS if test.startswith(DISTRIBUTED_TEST_PREFIX)]
TORCH_EXPORT_TESTS = [test for test in TESTS if test.startswith("export")]
FUNCTORCH_TESTS = [test for test in TESTS if test.startswith("functorch")]
@@ -1361,7 +1359,6 @@ def get_selected_tests(options) -> List[str]:
# these tests failing in Python 3.12 temporarily disabling
if sys.version_info >= (3, 12):
options.exclude.extend(INDUCTOR_TESTS)
- options.exclude.extend(DYNAMO_TESTS)
options.exclude.extend(
[
"functorch/test_dims",
diff --git a/torch/__init__.py b/torch/__init__.py
index d381712b4a35..26cdffe81d29 100644
--- a/torch/__init__.py
+++ b/torch/__init__.py
@@ -1861,9 +1861,8 @@ def compile(model: Optional[Callable] = None, *,
"""
_C._log_api_usage_once("torch.compile")
- # Temporary until we get proper support for python 3.12
- if sys.version_info >= (3, 12):
- raise RuntimeError("Dynamo is not supported on Python 3.12+")
+ if sys.version_info >= (3, 13):
+ raise RuntimeError("Dynamo is not supported on Python 3.13+")
# Decorator mode
if model is None:
diff --git a/torch/_dynamo/eval_frame.py b/torch/_dynamo/eval_frame.py
index 53ab0df3a947..0a80eeea99ed 100644
--- a/torch/_dynamo/eval_frame.py
+++ b/torch/_dynamo/eval_frame.py
@@ -589,8 +589,8 @@ class _NullDecorator(contextlib.nullcontext): # type: ignore[type-arg]
def check_if_dynamo_supported():
- if sys.version_info >= (3, 12):
- raise RuntimeError("Python 3.12+ not yet supported for torch.compile")
+ if sys.version_info >= (3, 13):
+ raise RuntimeError("Python 3.13+ not yet supported for torch.compile")
def is_dynamo_supported():
diff --git a/torch/_dynamo/test_case.py b/torch/_dynamo/test_case.py
index e3cbef09eaae..297ea6e2bc2a 100644
--- a/torch/_dynamo/test_case.py
+++ b/torch/_dynamo/test_case.py
@@ -1,7 +1,6 @@
import contextlib
import importlib
import logging
-import sys
import torch
import torch.testing
@@ -20,12 +19,7 @@ log = logging.getLogger(__name__)
def run_tests(needs=()):
from torch.testing._internal.common_utils import run_tests
- if (
- TEST_WITH_TORCHDYNAMO
- or IS_WINDOWS
- or TEST_WITH_CROSSREF
- or sys.version_info >= (3, 12)
- ):
+ if TEST_WITH_TORCHDYNAMO or IS_WINDOWS or TEST_WITH_CROSSREF:
return # skip testing
if isinstance(needs, str):
--
2.44.0

View file

@ -0,0 +1,54 @@
From b3b307add5724ee5730f161e16594fa702f34a19 Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Sat, 3 Feb 2024 08:20:28 -0500
Subject: [PATCH] no third_party FXdiv
---
caffe2/CMakeLists.txt | 24 ++++++++++++------------
1 file changed, 12 insertions(+), 12 deletions(-)
diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
index b2f3adbfae..80a5625c8d 100644
--- a/caffe2/CMakeLists.txt
+++ b/caffe2/CMakeLists.txt
@@ -110,15 +110,15 @@ endif()
# Note: the folders that are being commented out have not been properly
# addressed yet.
-if(NOT MSVC AND USE_XNNPACK)
- if(NOT TARGET fxdiv)
- set(FXDIV_BUILD_TESTS OFF CACHE BOOL "")
- set(FXDIV_BUILD_BENCHMARKS OFF CACHE BOOL "")
- add_subdirectory(
- "${FXDIV_SOURCE_DIR}"
- "${CMAKE_BINARY_DIR}/FXdiv")
- endif()
-endif()
+#if(NOT MSVC AND USE_XNNPACK)
+# if(NOT TARGET fxdiv)
+# set(FXDIV_BUILD_TESTS OFF CACHE BOOL "")
+# set(FXDIV_BUILD_BENCHMARKS OFF CACHE BOOL "")
+# add_subdirectory(
+# "${FXDIV_SOURCE_DIR}"
+# "${CMAKE_BINARY_DIR}/FXdiv")
+# endif()
+#endif()
add_subdirectory(core)
add_subdirectory(serialize)
@@ -1081,9 +1081,9 @@ if(USE_XPU)
target_compile_definitions(torch_xpu PRIVATE USE_XPU)
endif()
-if(NOT MSVC AND USE_XNNPACK)
- TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv)
-endif()
+#if(NOT MSVC AND USE_XNNPACK)
+# TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv)
+#endif()
# ==========================================================
# formerly-libtorch flags
--
2.43.0

View file

@ -0,0 +1,65 @@
From 2ce255b75760a0a513fb1706629b416f76a5c822 Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Sat, 3 Feb 2024 08:16:04 -0500
Subject: [PATCH] no third_party fmt
---
c10/CMakeLists.txt | 2 +-
cmake/Dependencies.cmake | 6 +++---
torch/CMakeLists.txt | 2 +-
3 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/c10/CMakeLists.txt b/c10/CMakeLists.txt
index 1f742f4c176..4fa08913bdd 100644
--- a/c10/CMakeLists.txt
+++ b/c10/CMakeLists.txt
@@ -87,7 +87,7 @@ endif()
if(C10_USE_GLOG)
target_link_libraries(c10 PUBLIC glog::glog)
endif()
-target_link_libraries(c10 PRIVATE fmt::fmt-header-only)
+target_link_libraries(c10 PRIVATE fmt)
if(C10_USE_NUMA)
message(STATUS "NUMA paths:")
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index 6f5a2d5feff..42fbf80f6e8 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -1837,7 +1837,7 @@ endif()
#
set(TEMP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS})
set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libs" FORCE)
-add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)
+# add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)
# Disable compiler feature checks for `fmt`.
#
@@ -1846,9 +1846,9 @@ add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)
# CMAKE_CXX_FLAGS in ways that break feature checks. Since we already know
# `fmt` is compatible with a superset of the compilers that PyTorch is, it
# shouldn't be too bad to just disable the checks.
-set_target_properties(fmt-header-only PROPERTIES INTERFACE_COMPILE_FEATURES "")
+# set_target_properties(fmt-header-only PROPERTIES INTERFACE_COMPILE_FEATURES "")
-list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only)
+# list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only)
set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS} CACHE BOOL "Build shared libs" FORCE)
# ---[ Kineto
diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt
index 97a72eed55b..9e5014d1980 100644
--- a/torch/CMakeLists.txt
+++ b/torch/CMakeLists.txt
@@ -80,7 +80,7 @@ set(TORCH_PYTHON_LINK_LIBRARIES
python::python
pybind::pybind11
shm
- fmt::fmt-header-only
+ fmt
ATEN_CPU_FILES_GEN_LIB)
if(USE_ASAN AND TARGET Sanitizer::address)
--
2.43.2

View file

@ -0,0 +1,36 @@
From 8cb61cf9282102ac225645fcc9fb4a1bb7cb15a2 Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Sat, 3 Feb 2024 08:11:55 -0500
Subject: [PATCH] no third_party foxi
---
cmake/Dependencies.cmake | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index 5f91f3ffab..8e1461af81 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -1567,7 +1567,7 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX)
set_target_properties(onnx_proto PROPERTIES CXX_STANDARD 17)
endif()
endif()
- add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/foxi EXCLUDE_FROM_ALL)
+ # add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/foxi EXCLUDE_FROM_ALL)
add_definitions(-DONNX_NAMESPACE=${ONNX_NAMESPACE})
if(NOT USE_SYSTEM_ONNX)
@@ -1600,8 +1600,8 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX)
message("-- Found onnx: ${ONNX_LIBRARY} ${ONNX_PROTO_LIBRARY}")
list(APPEND Caffe2_DEPENDENCY_LIBS onnx_proto onnx)
endif()
- include_directories(${FOXI_INCLUDE_DIRS})
- list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader)
+# include_directories(${FOXI_INCLUDE_DIRS})
+# list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader)
# Recover the build shared libs option.
set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS})
endif()
--
2.43.0

View file

@ -0,0 +1,25 @@
From 58ccda271e8f51c3fa5b7518cf6ee52ce204fd37 Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Thu, 22 Feb 2024 09:28:11 -0500
Subject: [PATCH] reenable foxi linking
---
cmake/Dependencies.cmake | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index 42fbf80f6e8..bc3a2dc6fee 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -1604,7 +1604,7 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX)
list(APPEND Caffe2_DEPENDENCY_LIBS onnx_proto onnx)
endif()
# include_directories(${FOXI_INCLUDE_DIRS})
-# list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader)
+ list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader)
# Recover the build shared libs option.
set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS})
endif()
--
2.43.2

View file

@ -0,0 +1,25 @@
From 04dd33db93b852fdfd7ea408813080b2e2026650 Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Sat, 3 Feb 2024 06:41:20 -0500
Subject: [PATCH] silence an assert
---
aten/src/ATen/native/cuda/IndexKernel.cu | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/aten/src/ATen/native/cuda/IndexKernel.cu b/aten/src/ATen/native/cuda/IndexKernel.cu
index 657c0c77b3..b406aa6687 100644
--- a/aten/src/ATen/native/cuda/IndexKernel.cu
+++ b/aten/src/ATen/native/cuda/IndexKernel.cu
@@ -249,7 +249,7 @@ void index_put_kernel_quantized_cuda(TensorIterator& iter, const IntArrayRef ind
gpu_index_kernel(iter, index_size, index_stride, [inv_scale, zero_point, qmin, qmax]C10_DEVICE(char* const out_data, const char* const in_data, const int64_t offset) {
int64_t qvalue = static_cast<int64_t>(zero_point + nearbyintf(*(float*)in_data * inv_scale));
- qvalue = std::clamp(qvalue, qmin, qmax);
+ //qvalue = std::clamp(qvalue, qmin, qmax);
*(scalar_t*)(out_data + offset) = static_cast<scalar_t>(qvalue);
});
});
--
2.43.0

34
0001-use-any-hip.patch Normal file
View file

@ -0,0 +1,34 @@
From 4248211ce9a9de81bb3ade5d421ba709b19ead08 Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Sat, 3 Feb 2024 15:01:28 -0500
Subject: [PATCH] use any hip
---
cmake/public/LoadHIP.cmake | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/cmake/public/LoadHIP.cmake b/cmake/public/LoadHIP.cmake
index 1abeb06228..28458c4146 100644
--- a/cmake/public/LoadHIP.cmake
+++ b/cmake/public/LoadHIP.cmake
@@ -30,7 +30,7 @@ endif()
message("Building PyTorch for GPU arch: ${PYTORCH_ROCM_ARCH}")
# Add HIP to the CMAKE Module Path
-set(CMAKE_MODULE_PATH ${ROCM_PATH}/lib/cmake/hip ${CMAKE_MODULE_PATH})
+set(CMAKE_MODULE_PATH ${ROCM_PATH}/lib64/cmake/hip ${CMAKE_MODULE_PATH})
macro(find_package_and_print_version PACKAGE_NAME)
find_package("${PACKAGE_NAME}" ${ARGN})
@@ -38,7 +38,7 @@ macro(find_package_and_print_version PACKAGE_NAME)
endmacro()
# Find the HIP Package
-find_package_and_print_version(HIP 1.0)
+find_package_and_print_version(HIP MODULE)
if(HIP_FOUND)
set(PYTORCH_FOUND_HIP TRUE)
--
2.43.0

View file

@ -1,85 +0,0 @@
From fd535f7bf44f2034cca2a66b4cc7d68d962341df Mon Sep 17 00:00:00 2001
From: Tom Rix <Tom.Rix@amd.com>
Date: Sun, 20 Jul 2025 12:47:58 -0700
Subject: [PATCH] Use horrible dynamo stub
Rawhide's update of python is too fast for dynamo
So paper of the problem with a horrible stub that throws
runtime exceptions if dynamo is used.
Signed-off-by: Tom Rix <Tom.Rix@amd.com>
---
build_variables.bzl | 26 ++++++++++++----------
torch/csrc/dynamo/horrible_dynamo_stub.cpp | 16 +++++++++++++
2 files changed, 30 insertions(+), 12 deletions(-)
create mode 100644 torch/csrc/dynamo/horrible_dynamo_stub.cpp
diff --git a/build_variables.bzl b/build_variables.bzl
index b266c80e8843..a3be6893349b 100644
--- a/build_variables.bzl
+++ b/build_variables.bzl
@@ -140,7 +140,8 @@ core_trainer_sources = [
"torch/csrc/autograd/variable.cpp",
"torch/csrc/autograd/utils/warnings.cpp",
"torch/csrc/autograd/jit_decomp_interface.cpp",
- "torch/csrc/dynamo/compiled_autograd.cpp",
+# "torch/csrc/dynamo/compiled_autograd.cpp",
+ "torch/csrc/dynamo/horrible_dynamo_stub.cpp",
"torch/csrc/jit/frontend/name_mangler.cpp",
"torch/csrc/jit/ir/type_hashing.cpp",
"torch/csrc/jit/serialization/pickler.cpp",
@@ -868,17 +869,18 @@ libtorch_python_core_sources = [
"torch/csrc/autograd/python_torch_functions_manual.cpp",
"torch/csrc/autograd/python_variable.cpp",
"torch/csrc/autograd/python_variable_indexing.cpp",
- "torch/csrc/dynamo/python_compiled_autograd.cpp",
- "torch/csrc/dynamo/cache_entry.cpp",
- "torch/csrc/dynamo/cpp_shim.cpp",
- "torch/csrc/dynamo/cpython_defs.c",
- "torch/csrc/dynamo/eval_frame.c",
- "torch/csrc/dynamo/eval_frame_cpp.cpp",
- "torch/csrc/dynamo/extra_state.cpp",
- "torch/csrc/dynamo/framelocals_mapping.cpp",
- "torch/csrc/dynamo/guards.cpp",
- "torch/csrc/dynamo/utils.cpp",
- "torch/csrc/dynamo/init.cpp",
+# "torch/csrc/dynamo/python_compiled_autograd.cpp",
+# "torch/csrc/dynamo/cache_entry.cpp",
+# "torch/csrc/dynamo/cpp_shim.cpp",
+# "torch/csrc/dynamo/cpython_defs.c",
+# "torch/csrc/dynamo/eval_frame.c",
+# "torch/csrc/dynamo/eval_frame_cpp.cpp",
+# "torch/csrc/dynamo/extra_state.cpp",
+# "torch/csrc/dynamo/framelocals_mapping.cpp",
+# "torch/csrc/dynamo/guards.cpp",
+# "torch/csrc/dynamo/utils.cpp",
+# "torch/csrc/dynamo/init.cpp",
+ "torch/csrc/dynamo/horrible_dynamo_stub.cpp",
"torch/csrc/functorch/init.cpp",
"torch/csrc/fx/node.cpp",
"torch/csrc/mps/Module.cpp",
diff --git a/torch/csrc/dynamo/horrible_dynamo_stub.cpp b/torch/csrc/dynamo/horrible_dynamo_stub.cpp
new file mode 100644
index 000000000000..3ac1324d4557
--- /dev/null
+++ b/torch/csrc/dynamo/horrible_dynamo_stub.cpp
@@ -0,0 +1,16 @@
+#include <torch/csrc/autograd/engine.h>
+#include <torch/csrc/dynamo/compiled_autograd.h>
+
+namespace torch::dynamo::autograd {
+const std::unique_ptr<PyCompilerInterface>& getPyCompilerInterface() {
+ throw std::runtime_error("Dynamo not supported");
+ return nullptr;
+}
+std::vector<std::optional<InputMetadata>> get_input_metadata(
+ const edge_list& edges) {
+ std::vector<std::optional<InputMetadata>> r;
+ throw std::runtime_error("Dynamo not supported");
+ return r;
+}
+
+}
--
2.49.0

View file

@ -1,165 +1,46 @@
# Package ######################################################################
[build-system]
requires = [
# 70.1.0: min version for integrated bdist_wheel command from wheel package
# 77.0.0: min version for SPDX expression support for project.license
"setuptools>=70.1.0,<80.0",
"cmake>=3.27",
"ninja",
"numpy",
"packaging",
"pyyaml",
"requests",
"six", # dependency chain: NNPACK -> PeachPy -> six
"typing-extensions>=4.10.0",
]
build-backend = "setuptools.build_meta"
[dependency-groups]
dev = [
# This list should be kept in sync with the requirements-build.txt
# in PyTorch root until the project fully migrates to pyproject.toml
# after which this can be removed as it is already specified in the
# [build-system] section
"setuptools>=70.1.0,<80.0", # setuptools develop deprecated on 80.0
"cmake>=3.27",
"ninja",
"numpy",
"packaging",
"pyyaml",
"requests",
"six", # dependency chain: NNPACK -> PeachPy -> six
"typing-extensions>=4.10.0",
# This list should be kept in sync with the requirements.txt in
# PyTorch root until the project fully migrates to pyproject.toml
"build[uv]",
"expecttest>=0.3.0",
"filelock",
"fsspec>=0.8.5",
"hypothesis",
"jinja2",
"lintrunner; platform_machine != 's390x' and platform_machine != 'riscv64'",
"networkx>=2.5.1",
"optree>=0.13.0",
"psutil",
"sympy>=1.13.3",
"typing-extensions>=4.13.2",
"setuptools",
"wheel",
"astunparse",
"numpy",
"ninja",
"pyyaml",
"cmake",
"typing-extensions",
"requests",
]
# Use legacy backend to import local packages in setup.py
build-backend = "setuptools.build_meta:__legacy__"
[project]
name = "torch"
description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration"
readme = "README.md"
requires-python = ">=3.10"
# TODO: change to `license = "BSD-3-Clause"` and enable PEP 639 after pinning setuptools>=77
# FIXME: As of 2025.06.20, it is hard to ensure the minimum version of setuptools in our CI environment.
# TOML-table-based license deprecated in setuptools>=77, and the deprecation warning will be changed
# to an error on 2026.02.18. See also: https://github.com/pypa/setuptools/issues/4903
license = { text = "BSD-3-Clause" }
authors = [{ name = "PyTorch Team", email = "packages@pytorch.org" }]
keywords = ["pytorch", "machine learning"]
classifiers = [
"Development Status :: 5 - Production/Stable",
"Intended Audience :: Developers",
"Intended Audience :: Education",
"Intended Audience :: Science/Research",
"Topic :: Scientific/Engineering",
"Topic :: Scientific/Engineering :: Mathematics",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"Topic :: Software Development",
"Topic :: Software Development :: Libraries",
"Topic :: Software Development :: Libraries :: Python Modules",
"Programming Language :: C++",
"Programming Language :: Python :: 3 :: Only",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Programming Language :: Python :: 3.14",
]
dynamic = [
"entry-points",
"dependencies",
"scripts",
"version",
]
[project.urls]
Homepage = "https://pytorch.org"
Repository = "https://github.com/pytorch/pytorch"
Documentation = "https://pytorch.org/docs"
"Issue Tracker" = "https://github.com/pytorch/pytorch/issues"
Forum = "https://discuss.pytorch.org"
[tool.black]
# Uncomment if pyproject.toml worked fine to ensure consistency with flake8
# line-length = 120
target-version = ["py38", "py39", "py310", "py311"]
[project.optional-dependencies]
optree = ["optree>=0.13.0"]
opt-einsum = ["opt-einsum>=3.3"]
pyyaml = ["pyyaml"]
# Linter tools #################################################################
[tool.isort]
src_paths = ["caffe2", "torch", "torchgen", "functorch", "test"]
extra_standard_library = ["typing_extensions"]
skip_gitignore = true
skip_glob = ["third_party/*"]
atomic = true
profile = "black"
indent = 4
line_length = 88
lines_after_imports = 2
multi_line_output = 3
include_trailing_comma = true
combine_as_imports = true
[tool.usort.known]
first_party = ["caffe2", "torch", "torchgen", "functorch", "test"]
standard_library = ["typing_extensions"]
[tool.ruff]
line-length = 88
src = ["caffe2", "torch", "torchgen", "functorch", "test"]
target-version = "py38"
[tool.ruff.format]
docstring-code-format = true
quote-style = "double"
[tool.ruff.lint]
# NOTE: Synchoronize the ignores with .flake8
external = [
"B001",
"B902",
"B950",
"E121",
"E122",
"E128",
"E131",
"E704",
"E723",
"F723",
"F812",
"P201",
"P204",
"T484",
"TOR901",
]
ignore = [
# these ignores are from flake8-bugbear; please fix!
"B007", "B008", "B017",
"B018", # Useless expression
"B019",
"B023",
"B028", # No explicit `stacklevel` keyword argument found
"B904",
"E402",
"C408", # C408 ignored because we like the dict keyword argument syntax
"E501", # E501 is not flexible enough, we're using B950 instead
"E721",
"E731", # Assign lambda expression
"E741",
"EXE001",
"F405",
"FURB122", # writelines
"F841",
# these ignores are from flake8-logging-format; please fix!
"G101",
# these ignores are from ruff NPY; please fix!
@ -167,49 +48,39 @@ ignore = [
# these ignores are from ruff PERF; please fix!
"PERF203",
"PERF401",
"PERF403",
# these ignores are from PYI; please fix!
"PYI019",
"PYI024",
"PYI036",
"PYI041",
"PYI056",
"SIM102", "SIM103", "SIM112", # flake8-simplify code styles
"SIM105", # these ignores are from flake8-simplify. please fix or ignore with commented reason
"SIM108", # SIM108 ignored because we prefer if-else-block instead of ternary expression
"SIM108",
"SIM110",
"SIM114", # Combine `if` branches using logical `or` operator
"SIM115",
"SIM116", # Disable Use a dictionary instead of consecutive `if` statements
"SIM117",
"SIM118",
"UP006", # keep-runtime-typing
"UP007", # keep-runtime-typing
"UP045", # keep-runtime-typing
"TC006",
# TODO: Remove Python-3.10 specific suppressions
"B905",
"UP035",
"UP036",
"UP038",
"UP041",
"FURB161",
]
line-length = 120
select = [
"B",
"B904", # Re-raised error without specifying the cause via the from keyword
"C4",
"G",
"E",
"EXE",
"F",
"SIM1",
"SIM911",
"W",
# Not included in flake8
"FURB",
"LOG",
"NPY",
"PERF",
"PGH004",
"PIE790",
"PIE794",
"PIE800",
"PIE804",
@ -218,96 +89,40 @@ select = [
"PLC0131", # type bivariance
"PLC0132", # type param mismatch
"PLC0205", # string as __slots__
"PLC3002", # unnecessary-direct-lambda-call
"PLE",
"PLR0133", # constant comparison
"PLR0206", # property with params
"PLR1722", # use sys exit
"PLR1736", # unnecessary list index
"PLW0129", # assert on string literal
"PLW0131", # named expr without context
"PLW0133", # useless exception statement
"PLW0245", # super without brackets
"PLW0406", # import self
"PLW0711", # binary op exception
"PLW1501", # bad open mode
"PLW1507", # shallow copy os.environ
"PLW1509", # preexec_fn not safe with threads
"PLW2101", # useless lock statement
"PLW3301", # nested min max
"PT006", # TODO: enable more PT rules
"PT014", # duplicate parameterize case
"PT022",
"PT023",
"PT024",
"PT025",
"PT026",
"PYI",
"Q003", # avoidable escaped quote
"Q004", # unnecessary escaped quote
"RSE",
"RUF008", # mutable dataclass default
"RUF013", # ban implicit optional
"RUF015", # access first ele in constant time
"RUF016", # type error non-integer index
"RUF017",
"RUF018", # no assignment in assert
"RUF019", # unnecessary-key-check
"RUF020", # never union
"RUF024", # from keys mutable
"RUF026", # default factory kwarg
"RUF030", # No print statement in assert
"RUF033", # default values __post_init__ dataclass
"RUF041", # simplify nested Literal
"RUF048", # properly parse `__version__`
"RUF200", # validate pyproject.toml
"S324", # for hashlib FIPS compliance
"SLOT",
"TC",
"TRY002", # ban vanilla raise (todo fix NOQAs)
"TRY203",
"TRY401", # verbose-log-message
"TRY200",
"TRY302",
"UP",
"YTT",
]
[tool.ruff.lint.pyupgrade]
# Preserve types, even if a file imports `from __future__ import annotations`.
keep-runtime-typing = true
[tool.ruff.lint.per-file-ignores]
[tool.ruff.per-file-ignores]
"__init__.py" = [
"F401",
]
"*.pyi" = [
"PYI011", # typed-argument-default-in-stub
"PYI021", # docstring-in-stub
"PYI053", # string-or-bytes-too-long
]
"functorch/notebooks/**" = [
"F401",
]
"test/export/**" = [
"PGH004"
]
"test/typing/**" = [
"PGH004"
]
"test/typing/reveal/**" = [
"F821",
]
"test/torch_np/numpy_tests/**" = [
"F821",
"NPY201",
]
"test/dynamo/test_bytecode_utils.py" = [
"F821",
]
"test/dynamo/test_debug_utils.py" = [
"UP037",
]
"test/dynamo/test_misc.py" = [
"PGH004",
]
"test/jit/**" = [
"PLR0133", # tests require this for JIT
@ -321,33 +136,19 @@ keep-runtime-typing = true
"RUF015",
"UP", # We don't want to modify the jit test as they test specify syntax
]
"test/inductor/s429861_repro.py" = [
"PGH004",
]
"test/inductor/test_torchinductor.py" = [
"UP037",
]
# autogenerated #TODO figure out why file level noqa is ignored
"torch/_appdirs.py" = ["PGH004"]
"torch/jit/_shape_functions.py" = ["PGH004"]
"torch/_inductor/fx_passes/serialized_patterns/**" = ["F401", "F501"]
"torch/_inductor/autoheuristic/artifacts/**" = ["F401", "F501"]
"torch/_inductor/codegen/**" = [
"PGH004"
"torch/onnx/**" = [
"UP037", # ONNX does runtime type checking
]
"torchgen/api/types/__init__.py" = [
"F401",
"F403",
]
"torchgen/executorch/api/types/__init__.py" = [
"F401",
"F403",
]
"torch/utils/collect_env.py" = [
"UP", # collect_env.py needs to work with older versions of Python
]
"torch/_vendor/**" = [
"UP", # No need to mess with _vendor
]
"tools/linter/**" = [
"LOG015" # please fix
]
[tool.codespell]
ignore-words = "tools/linter/dictionary.txt"

File diff suppressed because it is too large Load diff

30
sources
View file

@ -1,19 +1,19 @@
SHA512 (pytorch-v2.7.0.tar.gz) = 17e875a66f1669901f5f770c9d829ba5bfa3967296cfb71550e8a92507181db742548eaf7cc9a2c478c4b91e366f27cc480e2e1bbb328db8501d30e1649839e6
SHA512 (pytorch-v2.1.0.tar.gz) = 59421bf6cea6661d61ed66ab16526e3a07162e70e53381cbd5987042917610ec993d2f151fb086f0f98e5a396fe69e82bbc76f840bebffe4ebe7f50458c3aa44
SHA512 (pytorch-v2.1.2.tar.gz) = b7305407ad9dda877d277a0e7009f65f6d69f39370f2231b8bb8c6a9b711022d2129febdb00f5c83751b6664e01000fe2d30c5e5c13757de89fb8b2b99197a28
SHA512 (pytorch-975d428.tar.gz) = a02195b18d832db9a739c3eeecd0cd0c8868d8b92e4a2fca42e4bdd20735f0745d84573df28d9ae1db014cf79ffd005a8409b3e8bb92f9db2a446f784ef46ff4
SHA512 (v23.3.3.tar.gz) = 4066c94f2473c7ea16917d29a613e16f840a329089c88e0bdbdb999aef3442ba00abfd2aa92266fa9c067e399dc88e6f0ccac40dc151378857e665638e78bbf0
SHA512 (v2.13.6.tar.gz) = 497c25b33b09a9c42f67131ab82e35d689e8ce089dd7639be997305ff9a6d502447b79c824508c455d559e61f0186335b54dd2771d903a7c1621833930622d1a
SHA512 (v2.11.1.tar.gz) = ed1512ff0bca3bc0a45edc2eb8c77f8286ab9389f6ff1d5cb309be24bc608abbe0df6a7f5cb18c8f80a3bfa509058547c13551c3cd6a759af708fd0cdcdd9e95
SHA512 (pytorch-6a89a75.tar.gz) = 6978acc6f37d7c5adc71517a6f379c7133b2bbd040189deddba7753acde41f6ddba2e9f2e397928e89c776d6a5458b8a74f8e04beb312d71fd30b072687ba98f
SHA512 (pytorch-74832f1.tar.gz) = bd553bfbbb422d353bbbf616c201251b2517b905e2621fa05bfe3d97726b078caad377583adccdc0cca234235a11fcb4730a93e834907b2ca4c06d552b2a2683
SHA512 (pytorch-4bb5cb5.tar.gz) = 430ae996ddee560537787646ae9f7aa01498f37c99c2e3fe4c5f66ee732ee3fe4ecf337fdf857bc0c7fe27634af75cee3ce576bbe2576463b81e27dbbfacf6ef
SHA512 (tensorpipe-52791a2.tar.gz) = 1e5faf17a7236c5506c08cb28be16069b11bb929bbca64ed9745ce4277d46739186ab7d6597da7437d90ed2d166d4c37ef2f3bceabe8083ef3adbb0e8e5f227e
SHA512 (v1.41.0.tar.gz) = bb08a1970a10e8d9571ffea3d021643de30ec212cd51317b98d6cf0cfe55d6877992921fb01d1188a6d466687335b77885685d924f8cb7200a0bec30eee05c65
SHA512 (libnop-910b558.tar.gz) = 74c5324eaa1b6b2ac8dfef94c835b5c5b044625f8e5efe3522470b1ecc4798ff43d344a013cee2f6901e83267c6167072947b754e63f1552ae7044cffe234c36
SHA512 (v1.14.2.tar.gz) = 97635bbaf6dd567c201451dfaf7815b2052fe50d9bccc97aade86cfa4a92651374d167296a5453031b2681dc302806a289bca011a9e79ddc381a17d6118971d7
SHA512 (cpp-httplib-3b6597b.tar.gz) = 8f1090658c498d04f14fec5c2f301847b1f3360bf92b18d82927643ee04ab61a6b274733a01c7850f9c030205120d674d1d961358d49fdd15636736fb8704f55
SHA512 (kineto-be13176.tar.gz) = 41a08c7da9eea7d12402f80a5550c9d4df79798719cc52b12a507828c8c896ba28a37c35d8adf809ca72589e1d84965d5ef6dd01f3f8dc1c803c5ed67b03a43a
SHA512 (pytorch-a1cb3cc.tar.gz) = 92bf8b2c2ef0b459406b60169ecebdc50652c75943e3d6087e4d261f6e308dbad365529561e0f07ea3f0b71790efb68b5e4ab2f44e270462097208d924dc2d95
SHA512 (v24.12.23.tar.gz) = f97762ba41b9cfef648e93932fd789324c6bb6ebc5b7aeca8185c9ef602294b67d73aea7ae371035579a1419cbfbeba7c3e88b31b5a5848db98f5e8a03b982b1
SHA512 (kineto-5e75018.tar.gz) = 921b96a56e01d69895b79e67582d8977ed6f873573ab41557c5d026ada5d1f6365e4ed0a0c6804057c52e92510749fc58619f554a164c1ba9d8cd13e789bebd0
SHA512 (pytorch-v2.8.0.tar.gz) = 791e658eab87fb957f025558cb9f925078d2426ab7b6f60771d9841dfb691f67d905ba1330a800008efe7c938b6c69bdc52232bccfe8d4860e795a532cd69d28
SHA512 (v1.18.0.tar.gz) = 2f38664947c8d1efc40620a7c1b1953d2aa4b0a37b67c4886b86e77c1d697363c26413413ddda8eabc545892fb1bcb43afc7e93e62f0901527524a2727e1ea8d
SHA512 (pytorch-715dca6.tar.gz) = 09c9aae54fab3eb17901fc3226fece1c13f41cb8e45a2cb066021823abeb8d27c340993088e01d8e55bb37ed5f94334ec31e6c539cddfacbad157abd27c5e907
SHA512 (pytorch-fd36458.tar.gz) = acbb7475b92ad4a8e8d779f3745da22d8438e4c5ef2d6e76d71c987789f2752c8aef7022c87c9a74640fe4f9c1f1a61a3f12a796f63b1e6be24da8e5aacf37dc
SHA512 (pytorch-0fabc3b.tar.gz) = 2e87975de0bf6f3dcede168b379e1928712bca16170c2a8ee7d63459f53086c01baac05e0763e4d5d28cdaf1c7d8912225ee06adeff96ead4f6f456ee174b341
SHA512 (pytorch-v2.9.0.tar.gz) = ae989e3a7fe30f9ea90944dc25e21ca92f2a94ee40d8de974a168c292d82c16ee8920624eff91a85755469ad05473dce0f85893e3ed7794ec5c6bdd89cbd2023
SHA512 (pytorch-v2.9.1.tar.gz) = 88de0289fa2760abd69bef505b5ae3b6d7ff176b415cbb31bbc89ce5476a3800b322a97c4490f270f8b89657aff931bf9a5516202b268e0bb8b1f63dbb87b34a
SHA512 (pytorch-97ff6cf.tar.gz) = 105ebcba298558fe833f90e7e40b003d35a74609e777f9dc4c47f5668c884f603455113ac0ff252a62b83c81137ae66ceb1a862d351203925dcfc3dcf9f73580
SHA512 (pytorch-v2.3.0.tar.gz) = 0c2ffc7bf2fd86070e9958c34eca1f03a0248a011ac6ffaeb69f65306ff856edd5359986f02af25888433187e6d7f29b60edded092e2ac30c8cec49023166eda
SHA512 (pytorch-v2.3.1.tar.gz) = fe132251b2bae87b70ba3d95dc32f6a4545970d11893118b0ebe6ca129732e516ef4d6cc4f380b3db9bb2277d1db8ce78a401c40149bb1dfbab76eab9e3992c4
SHA512 (xnnpack-fcbf55a.tar.gz) = 8063e27686f7b71cfba05b0c004c46db4506638689ffb112f013b3886de58653b60ca5487978c3f96275c17bb1136883ca4c93ddb2241a2c31925a950cb51759
SHA512 (FXdiv-63058ef.tar.gz) = da33eab4d006645f383a1f24fc3e747db3aeb0613219297ec0ae69aa2617f07ba050ebd6a64a8cbde6d25481f176d0ec3b9753a95d1fbcead2136595f3e50e97
SHA512 (FP16-0a92994.tar.gz) = 3f094f242425ea37de274eb8539dc5f8ab0c13fd5325d14180ef12e9c04e6002a110d086c4c667f7c8054af337deab096d59482eb95cc8a632c3c412b48e89d1
SHA512 (psimd-072586a.tar.gz) = a18faea093423dd9fe19ece8b228e011dccce0a2a22222f777ea19b023a13173966d4a8aea01147e8fc58de5d39cffcedeb2221a1572ae52bd5aba1295f86a94
SHA512 (cpuinfo-d6860c4.tar.gz) = 02dd70f0b95c2cb6d8af4e33072f63f5d72d2314796033ae68bb6b37cb7db18d43dd2cdfedafc896dec0614dbeec9ab507f765f3d958fbda8c0ab3e3a191a87c