Compare commits
10 commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a85b3ec712 | ||
|
|
0a219010a7 | ||
|
|
93593886f9 | ||
|
|
e492aac499 | ||
|
|
7afc241f03 | ||
|
|
6f71567388 | ||
|
|
11bcbe02b2 | ||
|
|
4f4ba3564b | ||
|
|
f75a38ed41 | ||
|
|
4d6bb6e14a |
21 changed files with 1652 additions and 1104 deletions
24
.gitignore
vendored
24
.gitignore
vendored
|
|
@ -12,22 +12,8 @@
|
||||||
/pytorch-97ff6cf.tar.gz
|
/pytorch-97ff6cf.tar.gz
|
||||||
/pytorch-v2.3.0.tar.gz
|
/pytorch-v2.3.0.tar.gz
|
||||||
/pytorch-v2.3.1.tar.gz
|
/pytorch-v2.3.1.tar.gz
|
||||||
/pytorch-v2.4.0.tar.gz
|
/xnnpack-fcbf55a.tar.gz
|
||||||
/v1.14.2.tar.gz
|
/FXdiv-63058ef.tar.gz
|
||||||
/cpp-httplib-3b6597b.tar.gz
|
/FP16-0a92994.tar.gz
|
||||||
/kineto-be13176.tar.gz
|
/psimd-072586a.tar.gz
|
||||||
/pytorch-v2.4.1.tar.gz
|
/cpuinfo-d6860c4.tar.gz
|
||||||
/pytorch-v2.5.0.tar.gz
|
|
||||||
/pytorch-v2.5.1.tar.gz
|
|
||||||
/pytorch-v2.7.0.tar.gz
|
|
||||||
/v2.13.6.tar.gz
|
|
||||||
/pytorch-a1cb3cc.tar.gz
|
|
||||||
/v24.12.23.tar.gz
|
|
||||||
/kineto-5e75018.tar.gz
|
|
||||||
/pytorch-v2.8.0.tar.gz
|
|
||||||
/v1.18.0.tar.gz
|
|
||||||
/pytorch-715dca6.tar.gz
|
|
||||||
/pytorch-fd36458.tar.gz
|
|
||||||
/pytorch-0fabc3b.tar.gz
|
|
||||||
/pytorch-v2.9.0.tar.gz
|
|
||||||
/pytorch-v2.9.1.tar.gz
|
|
||||||
|
|
|
||||||
|
|
@ -1,202 +0,0 @@
|
||||||
From 193854993cd939de186de19589c1add4c4b2cf66 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Tom Rix <Tom.Rix@amd.com>
|
|
||||||
Date: Mon, 21 Jul 2025 11:35:03 -0700
|
|
||||||
Subject: [PATCH] Add cmake variable USE_ROCM_CK
|
|
||||||
|
|
||||||
---
|
|
||||||
CMakeLists.txt | 1 +
|
|
||||||
aten/src/ATen/CMakeLists.txt | 40 ++++++++++++++++-----------------
|
|
||||||
aten/src/ATen/cuda/CUDABlas.cpp | 22 +++++++++---------
|
|
||||||
cmake/Dependencies.cmake | 3 +++
|
|
||||||
4 files changed, 35 insertions(+), 31 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/CMakeLists.txt b/CMakeLists.txt
|
|
||||||
index a5d25e6afa0f..afc1b53efa64 100644
|
|
||||||
--- a/CMakeLists.txt
|
|
||||||
+++ b/CMakeLists.txt
|
|
||||||
@@ -240,6 +240,7 @@ cmake_dependent_option(
|
|
||||||
BUILD_LAZY_CUDA_LINALG "Build cuda linalg ops as separate library" ON
|
|
||||||
"USE_CUDA AND LINUX AND BUILD_PYTHON" OFF)
|
|
||||||
cmake_dependent_option(USE_ROCM "Use ROCm" ON "LINUX" OFF)
|
|
||||||
+cmake_dependent_option(USE_ROCM_CK "Use ROCm Composable Kernel" ON "USE_ROCM" ON)
|
|
||||||
option(CAFFE2_STATIC_LINK_CUDA "Statically link CUDA libraries" OFF)
|
|
||||||
cmake_dependent_option(USE_CUDNN "Use cuDNN" ON "USE_CUDA" OFF)
|
|
||||||
cmake_dependent_option(USE_STATIC_CUDNN "Use cuDNN static libraries" OFF
|
|
||||||
diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt
|
|
||||||
index c9cfd74b501e..59f6178218ee 100644
|
|
||||||
--- a/aten/src/ATen/CMakeLists.txt
|
|
||||||
+++ b/aten/src/ATen/CMakeLists.txt
|
|
||||||
@@ -373,26 +373,26 @@ if(USE_ROCM)
|
|
||||||
# is header only, so this should be ok, except that the CMake build generates
|
|
||||||
# a ck/config.h. We just do that part here. Without this, the ck.h from the
|
|
||||||
# ROCM SDK may get accidentally used instead.
|
|
||||||
- function(_pytorch_rocm_generate_ck_conf)
|
|
||||||
- set(CK_ENABLE_INT8 "ON")
|
|
||||||
- set(CK_ENABLE_FP16 "ON")
|
|
||||||
- set(CK_ENABLE_FP32 "ON")
|
|
||||||
- set(CK_ENABLE_FP64 "ON")
|
|
||||||
- set(CK_ENABLE_BF16 "ON")
|
|
||||||
- set(CK_ENABLE_FP8 "ON")
|
|
||||||
- set(CK_ENABLE_BF8 "ON")
|
|
||||||
- set(CK_USE_XDL "ON")
|
|
||||||
- set(CK_USE_WMMA "ON")
|
|
||||||
- configure_file(
|
|
||||||
- "${Torch_SOURCE_DIR}/third_party/composable_kernel/include/ck/config.h.in"
|
|
||||||
- "${CMAKE_CURRENT_BINARY_DIR}/composable_kernel/ck/config.h"
|
|
||||||
- )
|
|
||||||
- endfunction()
|
|
||||||
+# function(_pytorch_rocm_generate_ck_conf)
|
|
||||||
+# set(CK_ENABLE_INT8 "ON")
|
|
||||||
+# set(CK_ENABLE_FP16 "ON")
|
|
||||||
+# set(CK_ENABLE_FP32 "ON")
|
|
||||||
+# set(CK_ENABLE_FP64 "ON")
|
|
||||||
+# set(CK_ENABLE_BF16 "ON")
|
|
||||||
+# set(CK_ENABLE_FP8 "ON")
|
|
||||||
+# set(CK_ENABLE_BF8 "ON")
|
|
||||||
+# set(CK_USE_XDL "ON")
|
|
||||||
+# set(CK_USE_WMMA "ON")
|
|
||||||
+# configure_file(
|
|
||||||
+# "${Torch_SOURCE_DIR}/third_party/composable_kernel/include/ck/config.h.in"
|
|
||||||
+# "${CMAKE_CURRENT_BINARY_DIR}/composable_kernel/ck/config.h"
|
|
||||||
+# )
|
|
||||||
+# endfunction()
|
|
||||||
list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/hip)
|
|
||||||
- list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/include)
|
|
||||||
- list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/library/include)
|
|
||||||
- list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/composable_kernel)
|
|
||||||
- _pytorch_rocm_generate_ck_conf()
|
|
||||||
+# list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/include)
|
|
||||||
+# list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/library/include)
|
|
||||||
+# list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/composable_kernel)
|
|
||||||
+# _pytorch_rocm_generate_ck_conf()
|
|
||||||
|
|
||||||
# Next two lines are needed because TunableOp uses third-party/fmt
|
|
||||||
list(APPEND ATen_HIP_INCLUDE $<TARGET_PROPERTY:fmt::fmt-header-only,INTERFACE_INCLUDE_DIRECTORIES>)
|
|
||||||
@@ -409,7 +409,7 @@ endif()
|
|
||||||
${native_quantized_hip_hip}
|
|
||||||
${native_transformers_hip_hip} ${native_transformers_src_hip_hip}
|
|
||||||
)
|
|
||||||
- if(WIN32) # Windows doesn't support Composable Kernels
|
|
||||||
+ if(NOT USE_ROCM_CK) # Windows doesn't support Composable Kernels
|
|
||||||
file(GLOB native_hip_bgemm "native/hip/bgemm_kernels/*.hip")
|
|
||||||
file(GLOB native_hip_ck "native/hip/ck*.hip")
|
|
||||||
exclude(ATen_HIP_SRCS "${ATen_HIP_SRCS}"
|
|
||||||
diff --git a/aten/src/ATen/cuda/CUDABlas.cpp b/aten/src/ATen/cuda/CUDABlas.cpp
|
|
||||||
index 89350a11bea7..e5b7960177cf 100644
|
|
||||||
--- a/aten/src/ATen/cuda/CUDABlas.cpp
|
|
||||||
+++ b/aten/src/ATen/cuda/CUDABlas.cpp
|
|
||||||
@@ -752,7 +752,7 @@ template <>
|
|
||||||
void bgemm_internal<double>(CUDABLAS_BGEMM_ARGTYPES(double))
|
|
||||||
{
|
|
||||||
if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
|
|
||||||
-#ifdef USE_ROCM
|
|
||||||
+#ifdef USE_ROCM_CK
|
|
||||||
// hipblaslt does not support double gemm yet
|
|
||||||
bgemm_internal_cublas<double>(CUDABLAS_BGEMM_ARGS(double));
|
|
||||||
#else
|
|
||||||
@@ -836,7 +836,7 @@ void bgemm_internal<at::BFloat16>(CUDABLAS_BGEMM_ARGTYPES(at::BFloat16))
|
|
||||||
bgemm_internal_cublas<at::BFloat16>(CUDABLAS_BGEMM_ARGS(at::BFloat16));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
-#if defined(USE_ROCM) && !defined(_MSC_VER)
|
|
||||||
+#if defined(USE_ROCM) && defined(USE_ROCM_CK)
|
|
||||||
else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
|
|
||||||
at::native::bgemm_internal_ck<at::BFloat16>(CUDABLAS_BGEMM_ARGS(at::BFloat16));
|
|
||||||
}
|
|
||||||
@@ -1270,14 +1270,14 @@ template <>
|
|
||||||
void gemm_internal<double>(CUDABLAS_GEMM_ARGTYPES(double))
|
|
||||||
{
|
|
||||||
if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
|
|
||||||
-#ifdef USE_ROCM
|
|
||||||
+#ifdef USE_ROCM_CK
|
|
||||||
// hipblaslt does not support double gemm yet
|
|
||||||
gemm_internal_cublas<double>(CUDABLAS_GEMM_ARGS(double));
|
|
||||||
#else
|
|
||||||
gemm_internal_cublaslt<double>(CUDABLAS_GEMM_ARGS(double));
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
-#if defined(USE_ROCM) && !defined(_MSC_VER)
|
|
||||||
+#if defined(USE_ROCM) && defined(USE_ROCM_CK)
|
|
||||||
else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
|
|
||||||
at::native::gemm_internal_ck<double>(CUDABLAS_GEMM_ARGS(double));
|
|
||||||
}
|
|
||||||
@@ -1293,7 +1293,7 @@ void gemm_internal<float>(CUDABLAS_GEMM_ARGTYPES(float))
|
|
||||||
if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
|
|
||||||
gemm_internal_cublaslt<float>(CUDABLAS_GEMM_ARGS(float));
|
|
||||||
}
|
|
||||||
-#if defined(USE_ROCM) && !defined(_MSC_VER)
|
|
||||||
+#if defined(USE_ROCM) && defined(USE_ROCM_CK)
|
|
||||||
else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
|
|
||||||
if (at::detail::getCUDAHooks().isGPUArch({"gfx1100"})) { //no CK GEMM version for gfx1100
|
|
||||||
gemm_internal_cublaslt<float>(CUDABLAS_GEMM_ARGS(float));
|
|
||||||
@@ -1311,7 +1311,7 @@ template <>
|
|
||||||
void gemm_internal<c10::complex<double>>(CUDABLAS_GEMM_ARGTYPES(c10::complex<double>))
|
|
||||||
{
|
|
||||||
if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
|
|
||||||
-#ifdef USE_ROCM
|
|
||||||
+#ifdef USE_ROCM_CK
|
|
||||||
// hipblaslt does not support complex gemm yet
|
|
||||||
gemm_internal_cublas<c10::complex<double>>(CUDABLAS_GEMM_ARGS(c10::complex<double>));
|
|
||||||
#else
|
|
||||||
@@ -1327,7 +1327,7 @@ template <>
|
|
||||||
void gemm_internal<c10::complex<float>>(CUDABLAS_GEMM_ARGTYPES(c10::complex<float>))
|
|
||||||
{
|
|
||||||
if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
|
|
||||||
-#ifdef USE_ROCM
|
|
||||||
+#ifdef USE_ROCM_CK
|
|
||||||
// hipblaslt does not support complex gemm yet
|
|
||||||
gemm_internal_cublas<c10::complex<float>>(CUDABLAS_GEMM_ARGS(c10::complex<float>));
|
|
||||||
#else
|
|
||||||
@@ -1345,7 +1345,7 @@ void gemm_internal<at::Half>(CUDABLAS_GEMM_ARGTYPES(at::Half))
|
|
||||||
if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
|
|
||||||
gemm_internal_cublaslt<at::Half>(CUDABLAS_GEMM_ARGS(at::Half));
|
|
||||||
}
|
|
||||||
-#if defined(USE_ROCM) && !defined(_MSC_VER)
|
|
||||||
+#if defined(USE_ROCM) && defined(USE_ROCM_CK)
|
|
||||||
else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
|
|
||||||
at::native::gemm_internal_ck<at::Half>(CUDABLAS_GEMM_ARGS(at::Half));
|
|
||||||
}
|
|
||||||
@@ -1361,7 +1361,7 @@ void gemm_internal<at::BFloat16>(CUDABLAS_GEMM_ARGTYPES(at::BFloat16))
|
|
||||||
if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
|
|
||||||
gemm_internal_cublaslt<at::BFloat16>(CUDABLAS_GEMM_ARGS(at::BFloat16));
|
|
||||||
}
|
|
||||||
-#if defined(USE_ROCM) && !defined(_MSC_VER)
|
|
||||||
+#if defined(USE_ROCM) && defined(USE_ROCM_CK)
|
|
||||||
else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
|
|
||||||
at::native::gemm_internal_ck<at::BFloat16>(CUDABLAS_GEMM_ARGS(at::BFloat16));
|
|
||||||
}
|
|
||||||
@@ -1382,7 +1382,7 @@ void gemm_internal<at::Half, float>(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(at::Half,
|
|
||||||
if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
|
|
||||||
gemm_internal_cublaslt<at::Half, float>(CUDABLAS_GEMM_ARGS(at::Half));
|
|
||||||
}
|
|
||||||
-#if defined(USE_ROCM) && !defined(_MSC_VER)
|
|
||||||
+#if defined(USE_ROCM) && defined(USE_ROCM_CK)
|
|
||||||
else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
|
|
||||||
TORCH_CHECK(false, "gemm input type at::Half and output type float is not supported for ROCm");
|
|
||||||
}
|
|
||||||
@@ -1398,7 +1398,7 @@ void gemm_internal<at::BFloat16, float>(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(at::B
|
|
||||||
if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
|
|
||||||
gemm_internal_cublaslt<at::BFloat16, float>(CUDABLAS_GEMM_ARGS(at::BFloat16));
|
|
||||||
}
|
|
||||||
-#if defined(USE_ROCM) && !defined(_MSC_VER)
|
|
||||||
+#if defined(USE_ROCM) && defined(USE_ROCM_CK)
|
|
||||||
else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
|
|
||||||
TORCH_CHECK(false, "gemm input type at::Half and output type float is not supported for ROCm");
|
|
||||||
}
|
|
||||||
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
|
|
||||||
index a93386c27f8d..be1368999d38 100644
|
|
||||||
--- a/cmake/Dependencies.cmake
|
|
||||||
+++ b/cmake/Dependencies.cmake
|
|
||||||
@@ -1031,6 +1031,9 @@ if(USE_ROCM)
|
|
||||||
if(HIPBLASLT_VEC_EXT)
|
|
||||||
list(APPEND HIP_CXX_FLAGS -DHIPBLASLT_VEC_EXT)
|
|
||||||
endif()
|
|
||||||
+ if(USE_ROCM_CK)
|
|
||||||
+ list(APPEND HIP_CXX_FLAGS -DUSE_ROCM_CK)
|
|
||||||
+ endif()
|
|
||||||
list(APPEND HIP_HIPCC_FLAGS --offload-compress)
|
|
||||||
if(WIN32)
|
|
||||||
add_definitions(-DROCM_ON_WINDOWS)
|
|
||||||
--
|
|
||||||
2.49.0
|
|
||||||
|
|
||||||
|
|
@ -1,359 +0,0 @@
|
||||||
From f2a544b2e3a5bdc04985f6e06223c0c1700120a0 Mon Sep 17 00:00:00 2001
|
|
||||||
From: albanD <desmaison.alban@gmail.com>
|
|
||||||
Date: Sat, 12 Jul 2025 03:42:33 -0400
|
|
||||||
Subject: [PATCH] Fix compilation and "import torch" issues for cpython 3.14
|
|
||||||
|
|
||||||
Imported from
|
|
||||||
https://github.com/albanD/pytorch/tree/cpython314_build
|
|
||||||
commit 88bb9cdb72449f4277829e20d94ad8aec1894216
|
|
||||||
|
|
||||||
Signed-off-by: Tom Rix <Tom.Rix@amd.com>
|
|
||||||
---
|
|
||||||
torch/_dynamo/bytecode_analysis.py | 2 +-
|
|
||||||
torch/ao/quantization/__init__.py | 5 +++-
|
|
||||||
torch/ao/quantization/qconfig.py | 4 ++-
|
|
||||||
torch/ao/quantization/utils.py | 7 +++--
|
|
||||||
torch/csrc/dynamo/cpython_defs.c | 16 +++++++++++
|
|
||||||
torch/csrc/dynamo/cpython_includes.h | 17 ++++++++++++
|
|
||||||
torch/csrc/dynamo/eval_frame.c | 34 +++++++++++++++--------
|
|
||||||
torch/csrc/dynamo/framelocals_mapping.cpp | 14 ++++++++++
|
|
||||||
torch/csrc/utils/python_compat.h | 1 +
|
|
||||||
torch/onnx/__init__.py | 1 -
|
|
||||||
torch/utils/weak.py | 29 +++++++++++++++++--
|
|
||||||
11 files changed, 111 insertions(+), 19 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/torch/_dynamo/bytecode_analysis.py b/torch/_dynamo/bytecode_analysis.py
|
|
||||||
index 3252ea91409f..2de74ee5bf8d 100644
|
|
||||||
--- a/torch/_dynamo/bytecode_analysis.py
|
|
||||||
+++ b/torch/_dynamo/bytecode_analysis.py
|
|
||||||
@@ -33,7 +33,7 @@ if sys.version_info >= (3, 11):
|
|
||||||
TERMINAL_OPCODES.add(dis.opmap["JUMP_FORWARD"])
|
|
||||||
else:
|
|
||||||
TERMINAL_OPCODES.add(dis.opmap["JUMP_ABSOLUTE"])
|
|
||||||
-if sys.version_info >= (3, 12):
|
|
||||||
+if (3, 12) <= sys.version_info < (3, 14):
|
|
||||||
TERMINAL_OPCODES.add(dis.opmap["RETURN_CONST"])
|
|
||||||
if sys.version_info >= (3, 13):
|
|
||||||
TERMINAL_OPCODES.add(dis.opmap["JUMP_BACKWARD_NO_INTERRUPT"])
|
|
||||||
diff --git a/torch/ao/quantization/__init__.py b/torch/ao/quantization/__init__.py
|
|
||||||
index ffc1792fd23f..cf5a8b99a894 100644
|
|
||||||
--- a/torch/ao/quantization/__init__.py
|
|
||||||
+++ b/torch/ao/quantization/__init__.py
|
|
||||||
@@ -1,5 +1,6 @@
|
|
||||||
# mypy: allow-untyped-defs
|
|
||||||
|
|
||||||
+import sys
|
|
||||||
from typing import Callable, Optional, Union
|
|
||||||
|
|
||||||
import torch
|
|
||||||
@@ -33,7 +34,9 @@ from .stubs import * # noqa: F403
|
|
||||||
|
|
||||||
# ensure __module__ is set correctly for public APIs
|
|
||||||
ObserverOrFakeQuantize = Union[ObserverBase, FakeQuantizeBase]
|
|
||||||
-ObserverOrFakeQuantize.__module__ = "torch.ao.quantization"
|
|
||||||
+if sys.version_info < (3, 14):
|
|
||||||
+ ObserverOrFakeQuantize.__module__ = "torch.ao.quantization"
|
|
||||||
+
|
|
||||||
for _f in [
|
|
||||||
compare_results,
|
|
||||||
extract_results_from_loggers,
|
|
||||||
diff --git a/torch/ao/quantization/qconfig.py b/torch/ao/quantization/qconfig.py
|
|
||||||
index efee5302ad42..d9a8fc78bab4 100644
|
|
||||||
--- a/torch/ao/quantization/qconfig.py
|
|
||||||
+++ b/torch/ao/quantization/qconfig.py
|
|
||||||
@@ -1,5 +1,6 @@
|
|
||||||
# mypy: allow-untyped-defs
|
|
||||||
import copy
|
|
||||||
+import sys
|
|
||||||
import warnings
|
|
||||||
from collections import namedtuple
|
|
||||||
from typing import Any, Optional, Union
|
|
||||||
@@ -568,7 +569,8 @@ def _assert_valid_qconfig(qconfig: Optional[QConfig], mod: torch.nn.Module) -> N
|
|
||||||
|
|
||||||
|
|
||||||
QConfigAny = Optional[QConfig]
|
|
||||||
-QConfigAny.__module__ = "torch.ao.quantization.qconfig"
|
|
||||||
+if sys.version_info < (3, 14):
|
|
||||||
+ QConfigAny.__module__ = "torch.ao.quantization.qconfig"
|
|
||||||
|
|
||||||
|
|
||||||
def _add_module_to_qconfig_obs_ctr(
|
|
||||||
diff --git a/torch/ao/quantization/utils.py b/torch/ao/quantization/utils.py
|
|
||||||
index 4ac3112ec072..3b1503e01701 100644
|
|
||||||
--- a/torch/ao/quantization/utils.py
|
|
||||||
+++ b/torch/ao/quantization/utils.py
|
|
||||||
@@ -4,6 +4,7 @@ Utils shared by different modes of quantization (eager/graph)
|
|
||||||
"""
|
|
||||||
|
|
||||||
import functools
|
|
||||||
+import sys
|
|
||||||
import warnings
|
|
||||||
from collections import OrderedDict
|
|
||||||
from inspect import getfullargspec, signature
|
|
||||||
@@ -16,7 +17,8 @@ from torch.nn.utils.parametrize import is_parametrized
|
|
||||||
|
|
||||||
|
|
||||||
NodePattern = Union[tuple[Node, Node], tuple[Node, tuple[Node, Node]], Any]
|
|
||||||
-NodePattern.__module__ = "torch.ao.quantization.utils"
|
|
||||||
+if sys.version_info < (3, 14):
|
|
||||||
+ NodePattern.__module__ = "torch.ao.quantization.utils"
|
|
||||||
|
|
||||||
# This is the Quantizer class instance from torch/quantization/fx/quantize.py.
|
|
||||||
# Define separately to prevent circular imports.
|
|
||||||
@@ -31,7 +33,8 @@ QuantizerCls = Any
|
|
||||||
Pattern = Union[
|
|
||||||
Callable, tuple[Callable, Callable], tuple[Callable, tuple[Callable, Callable]], Any
|
|
||||||
]
|
|
||||||
-Pattern.__module__ = "torch.ao.quantization.utils"
|
|
||||||
+if sys.version_info < (3, 14):
|
|
||||||
+ Pattern.__module__ = "torch.ao.quantization.utils"
|
|
||||||
|
|
||||||
|
|
||||||
# TODO: maybe rename this to MatchInputNode
|
|
||||||
diff --git a/torch/csrc/dynamo/cpython_defs.c b/torch/csrc/dynamo/cpython_defs.c
|
|
||||||
index b68ef894aeaa..244d4165d5e8 100644
|
|
||||||
--- a/torch/csrc/dynamo/cpython_defs.c
|
|
||||||
+++ b/torch/csrc/dynamo/cpython_defs.c
|
|
||||||
@@ -2,6 +2,20 @@
|
|
||||||
#include <torch/csrc/dynamo/cpython_includes.h>
|
|
||||||
#include <torch/csrc/dynamo/debug_macros.h>
|
|
||||||
|
|
||||||
+#if IS_PYTHON_3_14_PLUS
|
|
||||||
+
|
|
||||||
+const uint8_t* THP_PyOpcode_Caches = NULL;
|
|
||||||
+const int THP_PyOpcode_Caches_size = 0;
|
|
||||||
+
|
|
||||||
+void
|
|
||||||
+THP_PyThreadState_PopFrame(PyThreadState *tstate, _PyInterpreterFrame * frame)
|
|
||||||
+{}
|
|
||||||
+void
|
|
||||||
+THP_PyFrame_Clear(_PyInterpreterFrame *frame)
|
|
||||||
+{}
|
|
||||||
+
|
|
||||||
+#else
|
|
||||||
+
|
|
||||||
#if IS_PYTHON_3_11_PLUS
|
|
||||||
|
|
||||||
#define Py_BUILD_CORE
|
|
||||||
@@ -360,3 +374,5 @@ const uint8_t* THP_PyOpcode_Caches = NULL;
|
|
||||||
const int THP_PyOpcode_Caches_size = 0;
|
|
||||||
|
|
||||||
#endif
|
|
||||||
+
|
|
||||||
+#endif // IS_PYTHON_3_14_PLUS
|
|
||||||
\ No newline at end of file
|
|
||||||
diff --git a/torch/csrc/dynamo/cpython_includes.h b/torch/csrc/dynamo/cpython_includes.h
|
|
||||||
index 6b99c1d5aec8..616be16563cf 100644
|
|
||||||
--- a/torch/csrc/dynamo/cpython_includes.h
|
|
||||||
+++ b/torch/csrc/dynamo/cpython_includes.h
|
|
||||||
@@ -21,6 +21,14 @@
|
|
||||||
|
|
||||||
#if IS_PYTHON_3_11_PLUS
|
|
||||||
#include <internal/pycore_frame.h>
|
|
||||||
+#if IS_PYTHON_3_14_PLUS
|
|
||||||
+#include <internal/pycore_interpframe_structs.h>
|
|
||||||
+#include <internal/pycore_stackref.h>
|
|
||||||
+#endif
|
|
||||||
+#endif
|
|
||||||
+
|
|
||||||
+#if IS_PYTHON_3_14_PLUS
|
|
||||||
+#include <internal/pycore_code.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#undef Py_BUILD_CORE
|
|
||||||
@@ -30,6 +38,13 @@
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
+#if IS_PYTHON_3_14_PLUS
|
|
||||||
+
|
|
||||||
+#define F_CODE(x) (PyCodeObject*)PyStackRef_AsPyObjectBorrow(x->f_executable)
|
|
||||||
+#define PREV_INSTR(x) (x)->instr_ptr
|
|
||||||
+
|
|
||||||
+#else
|
|
||||||
+
|
|
||||||
#if IS_PYTHON_3_13_PLUS
|
|
||||||
#define F_CODE(x) ((PyCodeObject*)(x)->f_executable)
|
|
||||||
#define PREV_INSTR(x) (x)->instr_ptr
|
|
||||||
@@ -38,6 +53,8 @@ extern "C" {
|
|
||||||
#define PREV_INSTR(x) (x)->prev_instr
|
|
||||||
#endif
|
|
||||||
|
|
||||||
+#endif // IS_PYTHON_3_14_PLUS
|
|
||||||
+
|
|
||||||
#if IS_PYTHON_3_12_PLUS
|
|
||||||
#define FUNC(x) ((x)->f_funcobj)
|
|
||||||
#else
|
|
||||||
diff --git a/torch/csrc/dynamo/eval_frame.c b/torch/csrc/dynamo/eval_frame.c
|
|
||||||
index f413782b2d30..72bb8839bac3 100644
|
|
||||||
--- a/torch/csrc/dynamo/eval_frame.c
|
|
||||||
+++ b/torch/csrc/dynamo/eval_frame.c
|
|
||||||
@@ -224,17 +224,6 @@ const char* get_frame_name(THP_EVAL_API_FRAME_OBJECT* frame) {
|
|
||||||
return PyUnicode_AsUTF8(F_CODE(frame)->co_name);
|
|
||||||
}
|
|
||||||
|
|
||||||
-void clear_old_frame_if_python_312_plus(
|
|
||||||
- PyThreadState* tstate,
|
|
||||||
- THP_EVAL_API_FRAME_OBJECT* frame) {
|
|
||||||
-#if IS_PYTHON_3_12_PLUS
|
|
||||||
-
|
|
||||||
- THP_PyFrame_Clear(frame);
|
|
||||||
- THP_PyThreadState_PopFrame(tstate, frame);
|
|
||||||
-
|
|
||||||
-#endif
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
static PyObject* dynamo_eval_custom_code_impl(
|
|
||||||
PyThreadState* tstate,
|
|
||||||
THP_EVAL_API_FRAME_OBJECT* frame,
|
|
||||||
@@ -485,6 +474,18 @@ static PyObject* dynamo__custom_eval_frame_shim(
|
|
||||||
|
|
||||||
static void enable_eval_frame_shim(PyThreadState* tstate) {}
|
|
||||||
static void enable_eval_frame_default(PyThreadState* tstate) {}
|
|
||||||
+PyObject* dynamo_eval_custom_code(
|
|
||||||
+ PyThreadState* tstate,
|
|
||||||
+ THP_EVAL_API_FRAME_OBJECT* frame,
|
|
||||||
+ PyCodeObject* code,
|
|
||||||
+ const char* trace_annotation,
|
|
||||||
+ int throw_flag) {}
|
|
||||||
+THPPyInterpreterFrame* THPPyInterpreterFrame_New(
|
|
||||||
+ THP_EVAL_API_FRAME_OBJECT* frame) {}
|
|
||||||
+PyObject* dynamo_eval_frame_default(
|
|
||||||
+ PyThreadState* tstate,
|
|
||||||
+ THP_EVAL_API_FRAME_OBJECT* frame,
|
|
||||||
+ int throw_flag) {}
|
|
||||||
|
|
||||||
static struct PyGetSetDef THPPyInterpreterFrame_properties[] = {NULL};
|
|
||||||
|
|
||||||
@@ -498,6 +499,17 @@ static PyTypeObject THPPyInterpreterFrameType = {
|
|
||||||
|
|
||||||
#endif // !(IS_PYTHON_3_14_PLUS)
|
|
||||||
|
|
||||||
+void clear_old_frame_if_python_312_plus(
|
|
||||||
+ PyThreadState* tstate,
|
|
||||||
+ THP_EVAL_API_FRAME_OBJECT* frame) {
|
|
||||||
+#if IS_PYTHON_3_12_PLUS
|
|
||||||
+
|
|
||||||
+ THP_PyFrame_Clear(frame);
|
|
||||||
+ THP_PyThreadState_PopFrame(tstate, frame);
|
|
||||||
+
|
|
||||||
+#endif
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
static PyObject* increment_working_threads(
|
|
||||||
PyThreadState* tstate,
|
|
||||||
PyObject* module) {
|
|
||||||
diff --git a/torch/csrc/dynamo/framelocals_mapping.cpp b/torch/csrc/dynamo/framelocals_mapping.cpp
|
|
||||||
index b839fb26fc91..c4ee36d87767 100644
|
|
||||||
--- a/torch/csrc/dynamo/framelocals_mapping.cpp
|
|
||||||
+++ b/torch/csrc/dynamo/framelocals_mapping.cpp
|
|
||||||
@@ -26,9 +26,13 @@ FrameLocalsMapping::FrameLocalsMapping(FrameLocalsFrameType* frame)
|
|
||||||
PyCodeObject* co = F_CODE(frame);
|
|
||||||
_framelocals.resize(co->co_nlocalsplus, nullptr);
|
|
||||||
|
|
||||||
+#if IS_PYTHON_3_14_PLUS
|
|
||||||
+ TORCH_CHECK(false, "Python 3.14+ not supported");
|
|
||||||
+#else
|
|
||||||
if (!frame->stacktop) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
+#endif
|
|
||||||
|
|
||||||
auto update_framelocals = [&](int i, PyObject* value) {
|
|
||||||
_PyLocals_Kind kind = _PyLocals_GetKind(co->co_localspluskinds, i);
|
|
||||||
@@ -53,11 +57,21 @@ FrameLocalsMapping::FrameLocalsMapping(FrameLocalsFrameType* frame)
|
|
||||||
};
|
|
||||||
|
|
||||||
auto offset = co->co_nlocalsplus - co->co_nfreevars;
|
|
||||||
+#if IS_PYTHON_3_14_PLUS
|
|
||||||
+ TORCH_CHECK(false, "Python 3.14+ not supported");
|
|
||||||
+#else
|
|
||||||
for (int i = 0; i < offset; i++) {
|
|
||||||
update_framelocals(i, frame->localsplus[i]);
|
|
||||||
}
|
|
||||||
+#endif
|
|
||||||
+
|
|
||||||
// Get references to closure variables
|
|
||||||
+#if IS_PYTHON_3_14_PLUS
|
|
||||||
+ PyObject* closure;
|
|
||||||
+ TORCH_CHECK(false, "Python 3.14+ not supported");
|
|
||||||
+#else
|
|
||||||
PyObject* closure = ((PyFunctionObject*)FUNC(frame))->func_closure;
|
|
||||||
+#endif
|
|
||||||
for (int i = 0; i < co->co_nfreevars; i++) {
|
|
||||||
update_framelocals(offset + i, PyTuple_GET_ITEM(closure, i));
|
|
||||||
}
|
|
||||||
diff --git a/torch/csrc/utils/python_compat.h b/torch/csrc/utils/python_compat.h
|
|
||||||
index a1537611cc47..16292e4fd030 100644
|
|
||||||
--- a/torch/csrc/utils/python_compat.h
|
|
||||||
+++ b/torch/csrc/utils/python_compat.h
|
|
||||||
@@ -13,6 +13,7 @@ extern "C" {
|
|
||||||
#define IS_PYTHON_3_12_PLUS PY_VERSION_HEX >= 0x030C0000
|
|
||||||
#define IS_PYTHON_3_13_PLUS PY_VERSION_HEX >= 0x030D0000
|
|
||||||
#define IS_PYTHON_3_14_PLUS PY_VERSION_HEX >= 0x030E0000
|
|
||||||
+#define IS_PYTHON_3_15_PLUS PY_VERSION_HEX >= 0x030F0000
|
|
||||||
|
|
||||||
static inline int PyCode_GetNCellvars(PyCodeObject* code) {
|
|
||||||
// gh-26364 added co_ncellvars to Python 3.11.0rc1
|
|
||||||
diff --git a/torch/onnx/__init__.py b/torch/onnx/__init__.py
|
|
||||||
index 345ffd2a065b..ceeadde5365b 100644
|
|
||||||
--- a/torch/onnx/__init__.py
|
|
||||||
+++ b/torch/onnx/__init__.py
|
|
||||||
@@ -104,7 +104,6 @@ ONNXProgram.__module__ = "torch.onnx"
|
|
||||||
OnnxExporterError.__module__ = "torch.onnx"
|
|
||||||
_OrtBackend.__module__ = "torch.onnx"
|
|
||||||
_OrtBackendOptions.__module__ = "torch.onnx"
|
|
||||||
-_OrtExecutionProvider.__module__ = "torch.onnx"
|
|
||||||
enable_fake_mode.__module__ = "torch.onnx"
|
|
||||||
is_onnxrt_backend_supported.__module__ = "torch.onnx"
|
|
||||||
|
|
||||||
diff --git a/torch/utils/weak.py b/torch/utils/weak.py
|
|
||||||
index 8bf2ba5ed02b..9c7218cb2ad3 100644
|
|
||||||
--- a/torch/utils/weak.py
|
|
||||||
+++ b/torch/utils/weak.py
|
|
||||||
@@ -3,8 +3,6 @@ from __future__ import annotations
|
|
||||||
|
|
||||||
import collections.abc as _collections_abc
|
|
||||||
import weakref
|
|
||||||
-
|
|
||||||
-from _weakrefset import _IterationGuard # type: ignore[attr-defined]
|
|
||||||
from collections.abc import Mapping, MutableMapping
|
|
||||||
from weakref import ref
|
|
||||||
|
|
||||||
@@ -22,6 +20,33 @@ __all__ = [
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
+# TODO: make weakref properly thread safe following
|
|
||||||
+# https://github.com/python/cpython/pull/125325
|
|
||||||
+class _IterationGuard:
|
|
||||||
+ # This context manager registers itself in the current iterators of the
|
|
||||||
+ # weak container, such as to delay all removals until the context manager
|
|
||||||
+ # exits.
|
|
||||||
+ # This technique should be relatively thread-safe (since sets are).
|
|
||||||
+
|
|
||||||
+ def __init__(self, weakcontainer):
|
|
||||||
+ # Don't create cycles
|
|
||||||
+ self.weakcontainer = ref(weakcontainer)
|
|
||||||
+
|
|
||||||
+ def __enter__(self):
|
|
||||||
+ w = self.weakcontainer()
|
|
||||||
+ if w is not None:
|
|
||||||
+ w._iterating.add(self)
|
|
||||||
+ return self
|
|
||||||
+
|
|
||||||
+ def __exit__(self, e, t, b):
|
|
||||||
+ w = self.weakcontainer()
|
|
||||||
+ if w is not None:
|
|
||||||
+ s = w._iterating
|
|
||||||
+ s.remove(self)
|
|
||||||
+ if not s:
|
|
||||||
+ w._commit_removals()
|
|
||||||
+
|
|
||||||
+
|
|
||||||
# This file defines a variant of WeakKeyDictionary that overrides the hashing
|
|
||||||
# behavior of the key to use object identity, rather than the builtin
|
|
||||||
# __eq__/__hash__ functions. This is useful for Tensor weak keys, as their
|
|
||||||
--
|
|
||||||
2.49.0
|
|
||||||
|
|
||||||
262
0001-Optionally-use-hipblaslt.patch
Normal file
262
0001-Optionally-use-hipblaslt.patch
Normal file
|
|
@ -0,0 +1,262 @@
|
||||||
|
From d77e05d90df006322cda021f1a8affdcc2c7eaef Mon Sep 17 00:00:00 2001
|
||||||
|
From: Tom Rix <trix@redhat.com>
|
||||||
|
Date: Fri, 23 Feb 2024 08:27:30 -0500
|
||||||
|
Subject: [PATCH] Optionally use hipblaslt
|
||||||
|
|
||||||
|
The hipblaslt package is not available on Fedora.
|
||||||
|
Instead of requiring the package, make it optional.
|
||||||
|
If it is found, define the preprocessor variable HIPBLASLT
|
||||||
|
Convert the checks for ROCM_VERSION >= 507000 to HIPBLASLT checks
|
||||||
|
|
||||||
|
Signed-off-by: Tom Rix <trix@redhat.com>
|
||||||
|
---
|
||||||
|
aten/src/ATen/cuda/CUDABlas.cpp | 7 ++++---
|
||||||
|
aten/src/ATen/cuda/CUDABlas.h | 2 +-
|
||||||
|
aten/src/ATen/cuda/CUDAContextLight.h | 4 ++--
|
||||||
|
aten/src/ATen/cuda/CublasHandlePool.cpp | 4 ++--
|
||||||
|
aten/src/ATen/cuda/tunable/TunableGemm.h | 6 +++---
|
||||||
|
aten/src/ATen/native/cuda/Blas.cpp | 14 ++++++++------
|
||||||
|
cmake/Dependencies.cmake | 3 +++
|
||||||
|
cmake/public/LoadHIP.cmake | 4 ++--
|
||||||
|
8 files changed, 25 insertions(+), 19 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/aten/src/ATen/cuda/CUDABlas.cpp b/aten/src/ATen/cuda/CUDABlas.cpp
|
||||||
|
index d534ec5a178..e815463f630 100644
|
||||||
|
--- a/aten/src/ATen/cuda/CUDABlas.cpp
|
||||||
|
+++ b/aten/src/ATen/cuda/CUDABlas.cpp
|
||||||
|
@@ -14,7 +14,7 @@
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
|
#ifdef USE_ROCM
|
||||||
|
-#if ROCM_VERSION >= 60000
|
||||||
|
+#ifdef HIPBLASLT
|
||||||
|
#include <hipblaslt/hipblaslt-ext.hpp>
|
||||||
|
#endif
|
||||||
|
// until hipblas has an API to accept flags, we must use rocblas here
|
||||||
|
@@ -781,7 +781,7 @@ void gemm<at::BFloat16>(CUDABLAS_GEMM_ARGTYPES(at::BFloat16)) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
|
||||||
|
+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
|
||||||
|
|
||||||
|
#if defined(USE_ROCM) && ROCM_VERSION >= 50700 && ROCM_VERSION < 60000
|
||||||
|
// only for rocm 5.7 where we first supported hipblaslt, it was difficult
|
||||||
|
@@ -912,6 +912,7 @@ class CuBlasLtMatmulPreference : public CuBlasLtDescriptor<
|
||||||
|
};
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
|
||||||
|
template <typename Dtype>
|
||||||
|
void gemm_and_bias(
|
||||||
|
bool transpose_mat1,
|
||||||
|
@@ -1124,7 +1125,7 @@ template void gemm_and_bias(
|
||||||
|
at::BFloat16* result_ptr,
|
||||||
|
int64_t result_ld,
|
||||||
|
GEMMAndBiasActivationEpilogue activation);
|
||||||
|
-
|
||||||
|
+#endif
|
||||||
|
void scaled_gemm(
|
||||||
|
char transa,
|
||||||
|
char transb,
|
||||||
|
diff --git a/aten/src/ATen/cuda/CUDABlas.h b/aten/src/ATen/cuda/CUDABlas.h
|
||||||
|
index eb12bb350c5..068607467dd 100644
|
||||||
|
--- a/aten/src/ATen/cuda/CUDABlas.h
|
||||||
|
+++ b/aten/src/ATen/cuda/CUDABlas.h
|
||||||
|
@@ -82,7 +82,7 @@ void gemm_internal<at::Half>(CUDABLAS_GEMM_ARGTYPES(at::Half));
|
||||||
|
template <>
|
||||||
|
void gemm_internal<at::BFloat16>(CUDABLAS_GEMM_ARGTYPES(at::BFloat16));
|
||||||
|
|
||||||
|
-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
|
||||||
|
+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
|
||||||
|
enum GEMMAndBiasActivationEpilogue {
|
||||||
|
None,
|
||||||
|
RELU,
|
||||||
|
diff --git a/aten/src/ATen/cuda/CUDAContextLight.h b/aten/src/ATen/cuda/CUDAContextLight.h
|
||||||
|
index 4ec35f59a21..e28dc42034f 100644
|
||||||
|
--- a/aten/src/ATen/cuda/CUDAContextLight.h
|
||||||
|
+++ b/aten/src/ATen/cuda/CUDAContextLight.h
|
||||||
|
@@ -9,7 +9,7 @@
|
||||||
|
|
||||||
|
// cublasLT was introduced in CUDA 10.1 but we enable only for 11.1 that also
|
||||||
|
// added bf16 support
|
||||||
|
-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
|
||||||
|
+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
|
||||||
|
#include <cublasLt.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
@@ -82,7 +82,7 @@ TORCH_CUDA_CPP_API c10::Allocator* getCUDADeviceAllocator();
|
||||||
|
/* Handles */
|
||||||
|
TORCH_CUDA_CPP_API cusparseHandle_t getCurrentCUDASparseHandle();
|
||||||
|
TORCH_CUDA_CPP_API cublasHandle_t getCurrentCUDABlasHandle();
|
||||||
|
-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
|
||||||
|
+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
|
||||||
|
TORCH_CUDA_CPP_API cublasLtHandle_t getCurrentCUDABlasLtHandle();
|
||||||
|
#endif
|
||||||
|
|
||||||
|
diff --git a/aten/src/ATen/cuda/CublasHandlePool.cpp b/aten/src/ATen/cuda/CublasHandlePool.cpp
|
||||||
|
index 6913d2cd95e..3d4276be372 100644
|
||||||
|
--- a/aten/src/ATen/cuda/CublasHandlePool.cpp
|
||||||
|
+++ b/aten/src/ATen/cuda/CublasHandlePool.cpp
|
||||||
|
@@ -29,7 +29,7 @@ namespace at::cuda {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
-#if defined(USE_ROCM) && ROCM_VERSION >= 50700
|
||||||
|
+#if defined(USE_ROCM) && defined(HIPBLASLT)
|
||||||
|
void createCublasLtHandle(cublasLtHandle_t *handle) {
|
||||||
|
TORCH_CUDABLAS_CHECK(cublasLtCreate(handle));
|
||||||
|
}
|
||||||
|
@@ -190,7 +190,7 @@ cublasHandle_t getCurrentCUDABlasHandle() {
|
||||||
|
return handle;
|
||||||
|
}
|
||||||
|
|
||||||
|
-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
|
||||||
|
+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
|
||||||
|
cublasLtHandle_t getCurrentCUDABlasLtHandle() {
|
||||||
|
#ifdef USE_ROCM
|
||||||
|
c10::DeviceIndex device = 0;
|
||||||
|
diff --git a/aten/src/ATen/cuda/tunable/TunableGemm.h b/aten/src/ATen/cuda/tunable/TunableGemm.h
|
||||||
|
index 3ba0d761277..dde1870cfbf 100644
|
||||||
|
--- a/aten/src/ATen/cuda/tunable/TunableGemm.h
|
||||||
|
+++ b/aten/src/ATen/cuda/tunable/TunableGemm.h
|
||||||
|
@@ -11,7 +11,7 @@
|
||||||
|
|
||||||
|
#include <ATen/cuda/tunable/GemmCommon.h>
|
||||||
|
#ifdef USE_ROCM
|
||||||
|
-#if ROCM_VERSION >= 50700
|
||||||
|
+#ifdef HIPBLASLT
|
||||||
|
#include <ATen/cuda/tunable/GemmHipblaslt.h>
|
||||||
|
#endif
|
||||||
|
#include <ATen/cuda/tunable/GemmRocblas.h>
|
||||||
|
@@ -166,7 +166,7 @@ class GemmTunableOp : public TunableOp<GemmParams<T>, StreamTimer> {
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
-#if defined(USE_ROCM) && ROCM_VERSION >= 50700
|
||||||
|
+#if defined(USE_ROCM) && defined(HIPBLASLT)
|
||||||
|
static const char *env = std::getenv("PYTORCH_TUNABLEOP_HIPBLASLT_ENABLED");
|
||||||
|
if (env == nullptr || strcmp(env, "1") == 0) {
|
||||||
|
// disallow tuning of hipblaslt with c10::complex
|
||||||
|
@@ -240,7 +240,7 @@ class GemmStridedBatchedTunableOp : public TunableOp<GemmStridedBatchedParams<T>
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
-#if defined(USE_ROCM) && ROCM_VERSION >= 50700
|
||||||
|
+#if defined(USE_ROCM) && defined(HIPBLASLT)
|
||||||
|
static const char *env = std::getenv("PYTORCH_TUNABLEOP_HIPBLASLT_ENABLED");
|
||||||
|
if (env == nullptr || strcmp(env, "1") == 0) {
|
||||||
|
// disallow tuning of hipblaslt with c10::complex
|
||||||
|
diff --git a/aten/src/ATen/native/cuda/Blas.cpp b/aten/src/ATen/native/cuda/Blas.cpp
|
||||||
|
index 29e5c5e3cf1..df56f3d7f1d 100644
|
||||||
|
--- a/aten/src/ATen/native/cuda/Blas.cpp
|
||||||
|
+++ b/aten/src/ATen/native/cuda/Blas.cpp
|
||||||
|
@@ -155,7 +155,7 @@ enum class Activation {
|
||||||
|
GELU,
|
||||||
|
};
|
||||||
|
|
||||||
|
-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
|
||||||
|
+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
|
||||||
|
cuda::blas::GEMMAndBiasActivationEpilogue activation_to_gemm_and_blas_arg(Activation a) {
|
||||||
|
switch (a) {
|
||||||
|
case Activation::None:
|
||||||
|
@@ -193,6 +193,7 @@ static bool getDisableAddmmCudaLt() {
|
||||||
|
|
||||||
|
#ifdef USE_ROCM
|
||||||
|
static bool isSupportedHipLtROCmArch(int index) {
|
||||||
|
+#if defined(HIPBLASLT)
|
||||||
|
hipDeviceProp_t* prop = at::cuda::getDeviceProperties(index);
|
||||||
|
std::string device_arch = prop->gcnArchName;
|
||||||
|
static const std::vector<std::string> archs = {"gfx90a", "gfx940", "gfx941", "gfx942"};
|
||||||
|
@@ -203,6 +204,7 @@ static bool isSupportedHipLtROCmArch(int index) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
TORCH_CHECK(false, "Attempting to use hipBLASLt on a unsupported architecture!");
|
||||||
|
+#endif
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
@@ -228,7 +230,7 @@ Tensor& addmm_out_cuda_impl(Tensor& result, const Tensor& self, const Tensor& ma
|
||||||
|
at::ScalarType scalar_type = self.scalar_type();
|
||||||
|
c10::MaybeOwned<Tensor> self_;
|
||||||
|
if (&result != &self) {
|
||||||
|
-#if (defined(CUDA_VERSION) && CUDA_VERSION >= 11040 && !defined(_MSC_VER)) || defined(USE_ROCM) && ROCM_VERSION >= 50700
|
||||||
|
+#if (defined(CUDA_VERSION) && CUDA_VERSION >= 11040 && !defined(_MSC_VER)) || defined(USE_ROCM) && defined(HIPBLASLT)
|
||||||
|
// Strangely, if mat2 has only 1 row or column, we get
|
||||||
|
// CUBLAS_STATUS_INVALID_VALUE error from cublasLtMatmulAlgoGetHeuristic.
|
||||||
|
// self.dim() == 1 && result.dim() == 2 && self.sizes()[0] == mat2_sizes[1]
|
||||||
|
@@ -271,7 +273,7 @@ Tensor& addmm_out_cuda_impl(Tensor& result, const Tensor& self, const Tensor& ma
|
||||||
|
}
|
||||||
|
self__sizes = self_->sizes();
|
||||||
|
} else {
|
||||||
|
-#if defined(USE_ROCM) && ROCM_VERSION >= 50700
|
||||||
|
+#if defined(USE_ROCM) && defined(HIPBLASLT)
|
||||||
|
useLtInterface = !disable_addmm_cuda_lt &&
|
||||||
|
result.dim() == 2 && result.is_contiguous() &&
|
||||||
|
isSupportedHipLtROCmArch(self.device().index()) &&
|
||||||
|
@@ -322,7 +324,7 @@ Tensor& addmm_out_cuda_impl(Tensor& result, const Tensor& self, const Tensor& ma
|
||||||
|
|
||||||
|
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(!args.result->is_conj());
|
||||||
|
|
||||||
|
-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
|
||||||
|
+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
|
||||||
|
if (useLtInterface) {
|
||||||
|
AT_DISPATCH_FLOATING_TYPES_AND2(
|
||||||
|
at::ScalarType::Half,
|
||||||
|
@@ -876,7 +878,7 @@ _scaled_mm_out_cuda(const Tensor& mat1, const Tensor& mat2,
|
||||||
|
at::native::resize_output(out, {mat1_sizes[0], mat2_sizes[1]});
|
||||||
|
at::native::resize_output(amax, {});
|
||||||
|
|
||||||
|
-#if !defined(USE_ROCM) && !defined(_MSC_VER) || (defined(USE_ROCM) && ROCM_VERSION >= 60000)
|
||||||
|
+#if !defined(USE_ROCM) && !defined(_MSC_VER) || (defined(USE_ROCM) && defined(HIPBLASLT))
|
||||||
|
cublasCommonArgs args(mat1, mat2, out);
|
||||||
|
const auto out_dtype_ = args.result->scalar_type();
|
||||||
|
TORCH_CHECK(args.transa == 't' && args.transb == 'n', "Only multiplication of row-major and column-major matrices is supported by cuBLASLt");
|
||||||
|
@@ -906,7 +908,7 @@ _scaled_mm_out_cuda(const Tensor& mat1, const Tensor& mat2,
|
||||||
|
TORCH_CHECK(false, "_scaled_mm_out_cuda is not compiled for this platform.");
|
||||||
|
#endif
|
||||||
|
|
||||||
|
-#if defined(USE_ROCM) && ROCM_VERSION >= 60000
|
||||||
|
+#if defined(USE_ROCM) && defined(HIPBLASLT)
|
||||||
|
// rocm's hipblaslt does not yet support amax, so calculate separately
|
||||||
|
auto out_float32 = out.to(kFloat);
|
||||||
|
out_float32.abs_();
|
||||||
|
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
|
||||||
|
index b7ffbeb07dc..2b6c3678984 100644
|
||||||
|
--- a/cmake/Dependencies.cmake
|
||||||
|
+++ b/cmake/Dependencies.cmake
|
||||||
|
@@ -1273,6 +1273,9 @@ if(USE_ROCM)
|
||||||
|
if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "6.0.0")
|
||||||
|
list(APPEND HIP_CXX_FLAGS -DHIPBLAS_V2)
|
||||||
|
endif()
|
||||||
|
+ if(hipblast_FOUND)
|
||||||
|
+ list(APPEND HIP_CXX_FLAGS -DHIPBLASLT)
|
||||||
|
+ endif()
|
||||||
|
if(HIPBLASLT_CUSTOM_DATA_TYPE)
|
||||||
|
list(APPEND HIP_CXX_FLAGS -DHIPBLASLT_CUSTOM_DATA_TYPE)
|
||||||
|
endif()
|
||||||
|
diff --git a/cmake/public/LoadHIP.cmake b/cmake/public/LoadHIP.cmake
|
||||||
|
index f6ca263c5e5..53eb0b63c1a 100644
|
||||||
|
--- a/cmake/public/LoadHIP.cmake
|
||||||
|
+++ b/cmake/public/LoadHIP.cmake
|
||||||
|
@@ -156,7 +156,7 @@ if(HIP_FOUND)
|
||||||
|
find_package_and_print_version(rocblas REQUIRED)
|
||||||
|
find_package_and_print_version(hipblas REQUIRED)
|
||||||
|
if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "5.7.0")
|
||||||
|
- find_package_and_print_version(hipblaslt REQUIRED)
|
||||||
|
+ find_package_and_print_version(hipblaslt)
|
||||||
|
endif()
|
||||||
|
find_package_and_print_version(miopen REQUIRED)
|
||||||
|
if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "4.1.0")
|
||||||
|
@@ -191,7 +191,7 @@ if(HIP_FOUND)
|
||||||
|
# roctx is part of roctracer
|
||||||
|
find_library(ROCM_ROCTX_LIB roctx64 HINTS ${ROCM_PATH}/lib)
|
||||||
|
|
||||||
|
- if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "5.7.0")
|
||||||
|
+ if(hipblastlt_FOUND)
|
||||||
|
# check whether hipblaslt is using its own datatype
|
||||||
|
set(file "${PROJECT_BINARY_DIR}/hipblaslt_test_data_type.cc")
|
||||||
|
file(WRITE ${file} ""
|
||||||
|
--
|
||||||
|
2.43.2
|
||||||
|
|
||||||
115
0001-Reenable-dim-for-python-3.12.patch
Normal file
115
0001-Reenable-dim-for-python-3.12.patch
Normal file
|
|
@ -0,0 +1,115 @@
|
||||||
|
From ee3fb343a376cdba6f4ce188cac90023f13e2aea Mon Sep 17 00:00:00 2001
|
||||||
|
From: Tom Rix <trix@redhat.com>
|
||||||
|
Date: Thu, 4 Apr 2024 14:21:38 -0600
|
||||||
|
Subject: [PATCH] Reenable dim for python 3.12
|
||||||
|
|
||||||
|
In 3.12:
|
||||||
|
|
||||||
|
_PyArg_Parser added an element to the start of the structure.
|
||||||
|
So existing positional initialization is off. Switch to element
|
||||||
|
initialization.
|
||||||
|
|
||||||
|
_Py_CODEUNIT changed to from an int to a union, but relevant_op
|
||||||
|
is passed an int for the return of decoder.opcode, so the parameter
|
||||||
|
type is wrong, switch it to int.
|
||||||
|
|
||||||
|
The opcode PRECALL was removed, so reduce its handling to 3.11
|
||||||
|
|
||||||
|
Signed-off-by: Tom Rix <trix@redhat.com>
|
||||||
|
---
|
||||||
|
functorch/csrc/dim/dim.cpp | 24 +++++-------------------
|
||||||
|
functorch/csrc/dim/minpybind.h | 4 ++--
|
||||||
|
2 files changed, 7 insertions(+), 21 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/functorch/csrc/dim/dim.cpp b/functorch/csrc/dim/dim.cpp
|
||||||
|
index 4cc027504c77..e48b0d58081f 100644
|
||||||
|
--- a/functorch/csrc/dim/dim.cpp
|
||||||
|
+++ b/functorch/csrc/dim/dim.cpp
|
||||||
|
@@ -6,20 +6,6 @@
|
||||||
|
|
||||||
|
#include <torch/csrc/utils/python_compat.h>
|
||||||
|
|
||||||
|
-
|
||||||
|
-// Many APIs have changed/don't exist anymore
|
||||||
|
-#if IS_PYTHON_3_12_PLUS
|
||||||
|
-
|
||||||
|
-#include "dim.h"
|
||||||
|
-
|
||||||
|
-// Re-enable this some day
|
||||||
|
-PyObject* Dim_init() {
|
||||||
|
- PyErr_SetString(PyExc_RuntimeError, "First class dim doesn't work with python 3.12");
|
||||||
|
- return nullptr;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-#else
|
||||||
|
-
|
||||||
|
#include "minpybind.h"
|
||||||
|
#include <frameobject.h>
|
||||||
|
#include <opcode.h>
|
||||||
|
@@ -441,7 +427,7 @@ static PyObject* DimList_bind(DimList *self,
|
||||||
|
PY_BEGIN
|
||||||
|
mpy::handle sizes;
|
||||||
|
static const char * const _keywords[] = {"sizes", nullptr};
|
||||||
|
- static _PyArg_Parser parser = {"O", _keywords, 0};
|
||||||
|
+ static _PyArg_Parser parser = { .format = "O", .keywords = _keywords};
|
||||||
|
if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &parser, &sizes)) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
@@ -465,7 +451,7 @@ static PyObject* DimList_bind_len(DimList *self,
|
||||||
|
PY_BEGIN
|
||||||
|
int size;
|
||||||
|
static const char * const _keywords[] = {"N", nullptr};
|
||||||
|
- static _PyArg_Parser parser = {"i", _keywords, 0};
|
||||||
|
+ static _PyArg_Parser parser = { .format = "i", .keywords = _keywords};
|
||||||
|
if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &parser, &size)) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
@@ -1468,7 +1454,7 @@ PyTypeObject Tensor::Type = {
|
||||||
|
|
||||||
|
// dim() --------------------
|
||||||
|
|
||||||
|
-static bool relevant_op(_Py_CODEUNIT c) {
|
||||||
|
+static bool relevant_op(int c) {
|
||||||
|
switch(c) {
|
||||||
|
case STORE_NAME:
|
||||||
|
case STORE_GLOBAL:
|
||||||
|
@@ -1587,7 +1573,7 @@ static PyObject* _dims(PyObject *self,
|
||||||
|
auto c = mpy::obj<PyCodeObject>::steal(PyFrame_GetCode(f.ptr()));
|
||||||
|
auto lasti = PyFrame_GetLasti(f.ptr());
|
||||||
|
auto decoder = PyInstDecoder(c.ptr(), lasti);
|
||||||
|
- #if IS_PYTHON_3_11_PLUS
|
||||||
|
+ #if IS_PYTHON_3_11
|
||||||
|
// When py3.11 adapts bytecode lasti points to the precall
|
||||||
|
// rather than the call instruction after it
|
||||||
|
if (decoder.opcode() == PRECALL) {
|
||||||
|
@@ -3268,4 +3254,4 @@ PyObject* Dim_init() {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
-#endif
|
||||||
|
+
|
||||||
|
diff --git a/functorch/csrc/dim/minpybind.h b/functorch/csrc/dim/minpybind.h
|
||||||
|
index de82b5af95a4..d76d4828bf80 100644
|
||||||
|
--- a/functorch/csrc/dim/minpybind.h
|
||||||
|
+++ b/functorch/csrc/dim/minpybind.h
|
||||||
|
@@ -621,7 +621,7 @@ struct vector_args {
|
||||||
|
PyObject *dummy = NULL;
|
||||||
|
_PyArg_ParseStackAndKeywords((PyObject*const*)args, nargs, kwnames.ptr(), _parser, &dummy, &dummy, &dummy, &dummy, &dummy);
|
||||||
|
#else
|
||||||
|
- _PyArg_Parser* _parser = new _PyArg_Parser{NULL, &names_buf[0], fname_cstr, 0};
|
||||||
|
+ _PyArg_Parser* _parser = new _PyArg_Parser{ .keywords = &names_buf[0], .fname = fname_cstr};
|
||||||
|
std::unique_ptr<PyObject*[]> buf(new PyObject*[names.size()]);
|
||||||
|
_PyArg_UnpackKeywords((PyObject*const*)args, nargs, NULL, kwnames.ptr(), _parser, required, (Py_ssize_t)values.size() - kwonly, 0, &buf[0]);
|
||||||
|
#endif
|
||||||
|
@@ -706,7 +706,7 @@ inline object handle::call_vector(vector_args args) {
|
||||||
|
#define MPY_PARSE_ARGS_KWNAMES(fmt, FORALL_ARGS) \
|
||||||
|
static const char * const kwlist[] = { FORALL_ARGS(MPY_ARGS_NAME) nullptr}; \
|
||||||
|
FORALL_ARGS(MPY_ARGS_DECLARE) \
|
||||||
|
- static _PyArg_Parser parser = {fmt, kwlist, 0}; \
|
||||||
|
+ static _PyArg_Parser parser = { .format = fmt, .keywords = kwlist}; \
|
||||||
|
if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &parser, FORALL_ARGS(MPY_ARGS_POINTER) nullptr)) { \
|
||||||
|
throw mpy::exception_set(); \
|
||||||
|
}
|
||||||
|
--
|
||||||
|
2.44.0
|
||||||
|
|
||||||
39
0001-Regenerate-flatbuffer-header.patch
Normal file
39
0001-Regenerate-flatbuffer-header.patch
Normal file
|
|
@ -0,0 +1,39 @@
|
||||||
|
From 5b8e51b24513fa851eeff42f23d942bde301e321 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Tom Rix <trix@redhat.com>
|
||||||
|
Date: Fri, 29 Sep 2023 06:19:29 -0700
|
||||||
|
Subject: [PATCH] Regenerate flatbuffer header
|
||||||
|
|
||||||
|
For this error
|
||||||
|
torch/csrc/jit/serialization/mobile_bytecode_generated.h:12:41:
|
||||||
|
error: static assertion failed: Non-compatible flatbuffers version included
|
||||||
|
12 | FLATBUFFERS_VERSION_MINOR == 3 &&
|
||||||
|
|
||||||
|
PyTorch is expecting 23.3.3, what f38 has
|
||||||
|
Rawhide is at 23.5.26
|
||||||
|
|
||||||
|
Regenerate with
|
||||||
|
flatc --cpp --gen-mutable --no-prefix --scoped-enums mobile_bytecode.fbs
|
||||||
|
|
||||||
|
Signed-off-by: Tom Rix <trix@redhat.com>
|
||||||
|
---
|
||||||
|
torch/csrc/jit/serialization/mobile_bytecode_generated.h | 4 ++--
|
||||||
|
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/torch/csrc/jit/serialization/mobile_bytecode_generated.h b/torch/csrc/jit/serialization/mobile_bytecode_generated.h
|
||||||
|
index cffe8bc7a6..83575e4c19 100644
|
||||||
|
--- a/torch/csrc/jit/serialization/mobile_bytecode_generated.h
|
||||||
|
+++ b/torch/csrc/jit/serialization/mobile_bytecode_generated.h
|
||||||
|
@@ -9,8 +9,8 @@
|
||||||
|
// Ensure the included flatbuffers.h is the same version as when this file was
|
||||||
|
// generated, otherwise it may not be compatible.
|
||||||
|
static_assert(FLATBUFFERS_VERSION_MAJOR == 23 &&
|
||||||
|
- FLATBUFFERS_VERSION_MINOR == 3 &&
|
||||||
|
- FLATBUFFERS_VERSION_REVISION == 3,
|
||||||
|
+ FLATBUFFERS_VERSION_MINOR == 5 &&
|
||||||
|
+ FLATBUFFERS_VERSION_REVISION == 26,
|
||||||
|
"Non-compatible flatbuffers version included");
|
||||||
|
|
||||||
|
namespace torch {
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
73
0001-Stub-in-kineto-ActivityType.patch
Normal file
73
0001-Stub-in-kineto-ActivityType.patch
Normal file
|
|
@ -0,0 +1,73 @@
|
||||||
|
From 3ef82b814179da571b2478f61d4279717ab0b23a Mon Sep 17 00:00:00 2001
|
||||||
|
From: Tom Rix <trix@redhat.com>
|
||||||
|
Date: Fri, 29 Sep 2023 06:25:23 -0700
|
||||||
|
Subject: [PATCH] Stub in kineto ActivityType
|
||||||
|
|
||||||
|
There is an error with kineto is not used, the shim still
|
||||||
|
requires the ActivityTYpe.h header to get the enum Activity type.
|
||||||
|
So cut-n-paste just enough of the header in to do this.
|
||||||
|
|
||||||
|
Signed-off-by: Tom Rix <trix@redhat.com>
|
||||||
|
---
|
||||||
|
torch/csrc/profiler/kineto_shim.h | 44 +++++++++++++++++++++++++++++++
|
||||||
|
1 file changed, 44 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/torch/csrc/profiler/kineto_shim.h b/torch/csrc/profiler/kineto_shim.h
|
||||||
|
index e92cbf003d..68985ab7d0 100644
|
||||||
|
--- a/torch/csrc/profiler/kineto_shim.h
|
||||||
|
+++ b/torch/csrc/profiler/kineto_shim.h
|
||||||
|
@@ -12,7 +12,51 @@
|
||||||
|
#undef USE_KINETO
|
||||||
|
#endif
|
||||||
|
|
||||||
|
+#ifdef USE_KINETO
|
||||||
|
#include <ActivityType.h>
|
||||||
|
+#else
|
||||||
|
+namespace libkineto {
|
||||||
|
+// copied from header
|
||||||
|
+/*
|
||||||
|
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
+ * All rights reserved.
|
||||||
|
+ *
|
||||||
|
+ * This source code is licensed under the BSD-style license found in the
|
||||||
|
+ * LICENSE file in the root directory of this source tree.
|
||||||
|
+ */
|
||||||
|
+
|
||||||
|
+// Note : All activity types are not enabled by default. Please add them
|
||||||
|
+// at correct position in the enum
|
||||||
|
+enum class ActivityType {
|
||||||
|
+ // Activity types enabled by default
|
||||||
|
+ CPU_OP = 0, // cpu side ops
|
||||||
|
+ USER_ANNOTATION,
|
||||||
|
+ GPU_USER_ANNOTATION,
|
||||||
|
+ GPU_MEMCPY,
|
||||||
|
+ GPU_MEMSET,
|
||||||
|
+ CONCURRENT_KERNEL, // on-device kernels
|
||||||
|
+ EXTERNAL_CORRELATION,
|
||||||
|
+ CUDA_RUNTIME, // host side cuda runtime events
|
||||||
|
+ CUDA_DRIVER, // host side cuda driver events
|
||||||
|
+ CPU_INSTANT_EVENT, // host side point-like events
|
||||||
|
+ PYTHON_FUNCTION,
|
||||||
|
+ OVERHEAD, // CUPTI induced overhead events sampled from its overhead API.
|
||||||
|
+
|
||||||
|
+ // Optional Activity types
|
||||||
|
+ CUDA_SYNC, // synchronization events between runtime and kernels
|
||||||
|
+ GLOW_RUNTIME, // host side glow runtime events
|
||||||
|
+ MTIA_RUNTIME, // host side MTIA runtime events
|
||||||
|
+ CUDA_PROFILER_RANGE, // CUPTI Profiler range for performance metrics
|
||||||
|
+ MTIA_CCP_EVENTS, // MTIA ondevice CCP events
|
||||||
|
+ HPU_OP, // HPU host side runtime event
|
||||||
|
+ XPU_RUNTIME, // host side xpu runtime events
|
||||||
|
+
|
||||||
|
+ ENUM_COUNT, // This is to add buffer and not used for any profiling logic. Add your new type before it.
|
||||||
|
+ OPTIONAL_ACTIVITY_TYPE_START = CUDA_SYNC,
|
||||||
|
+};
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+#endif
|
||||||
|
|
||||||
|
#include <torch/csrc/Export.h>
|
||||||
|
#include <torch/csrc/profiler/api.h>
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
25
0001-can-not-use-with-c-files.patch
Normal file
25
0001-can-not-use-with-c-files.patch
Normal file
|
|
@ -0,0 +1,25 @@
|
||||||
|
From a5dff521691a17701b5a02ec75e84cfe1bf605f7 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Tom Rix <trix@redhat.com>
|
||||||
|
Date: Sat, 3 Feb 2024 06:41:49 -0500
|
||||||
|
Subject: [PATCH] can not use with c files
|
||||||
|
|
||||||
|
---
|
||||||
|
cmake/Dependencies.cmake | 2 +-
|
||||||
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
|
||||||
|
index 4dd8042058..5f91f3ffab 100644
|
||||||
|
--- a/cmake/Dependencies.cmake
|
||||||
|
+++ b/cmake/Dependencies.cmake
|
||||||
|
@@ -1269,7 +1269,7 @@ if(USE_ROCM)
|
||||||
|
list(APPEND HIP_CXX_FLAGS -Wno-duplicate-decl-specifier)
|
||||||
|
list(APPEND HIP_CXX_FLAGS -DCAFFE2_USE_MIOPEN)
|
||||||
|
list(APPEND HIP_CXX_FLAGS -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_HIP)
|
||||||
|
- list(APPEND HIP_CXX_FLAGS -std=c++17)
|
||||||
|
+# list(APPEND HIP_CXX_FLAGS -std=c++17)
|
||||||
|
if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "6.0.0")
|
||||||
|
list(APPEND HIP_CXX_FLAGS -DHIPBLAS_V2)
|
||||||
|
endif()
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
42
0001-cuda-hip-signatures.patch
Normal file
42
0001-cuda-hip-signatures.patch
Normal file
|
|
@ -0,0 +1,42 @@
|
||||||
|
From 214dc959acc809e1959643272c344ee5335d5a69 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Tom Rix <trix@redhat.com>
|
||||||
|
Date: Thu, 1 Feb 2024 11:29:47 -0500
|
||||||
|
Subject: [PATCH] cuda - hip signatures
|
||||||
|
|
||||||
|
---
|
||||||
|
aten/src/ATen/cuda/detail/LazyNVRTC.cpp | 9 +++++++++
|
||||||
|
1 file changed, 9 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/aten/src/ATen/cuda/detail/LazyNVRTC.cpp b/aten/src/ATen/cuda/detail/LazyNVRTC.cpp
|
||||||
|
index 1b85e7776e..bb6f88783a 100644
|
||||||
|
--- a/aten/src/ATen/cuda/detail/LazyNVRTC.cpp
|
||||||
|
+++ b/aten/src/ATen/cuda/detail/LazyNVRTC.cpp
|
||||||
|
@@ -134,8 +134,13 @@ nvrtcResult nvrtcCreateProgram(nvrtcProgram *prog,
|
||||||
|
const char *src,
|
||||||
|
const char *name,
|
||||||
|
int numHeaders,
|
||||||
|
+#if !defined(USE_ROCM)
|
||||||
|
const char * const *headers,
|
||||||
|
const char * const *includeNames) {
|
||||||
|
+#else
|
||||||
|
+ const char **headers,
|
||||||
|
+ const char **includeNames) {
|
||||||
|
+#endif
|
||||||
|
auto fn = reinterpret_cast<decltype(&nvrtcCreateProgram)>(getNVRTCLibrary().sym(__func__));
|
||||||
|
if (!fn)
|
||||||
|
throw std::runtime_error("Can't get nvrtcCreateProgram");
|
||||||
|
@@ -150,7 +155,11 @@ NVRTC_STUB2(nvrtcGetPTX, nvrtcProgram, char *);
|
||||||
|
NVRTC_STUB2(nvrtcGetCUBINSize, nvrtcProgram, size_t *);
|
||||||
|
NVRTC_STUB2(nvrtcGetCUBIN, nvrtcProgram, char *);
|
||||||
|
#endif
|
||||||
|
+#if !defined(USE_ROCM)
|
||||||
|
NVRTC_STUB3(nvrtcCompileProgram, nvrtcProgram, int, const char * const *);
|
||||||
|
+#else
|
||||||
|
+NVRTC_STUB3(nvrtcCompileProgram, nvrtcProgram, int, const char **);
|
||||||
|
+#endif
|
||||||
|
_STUB_1(NVRTC, nvrtcGetErrorString, const char *, nvrtcResult);
|
||||||
|
NVRTC_STUB2(nvrtcGetProgramLogSize,nvrtcProgram, size_t*);
|
||||||
|
NVRTC_STUB2(nvrtcGetProgramLog, nvrtcProgram, char *);
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
46
0001-disable-use-of-aotriton.patch
Normal file
46
0001-disable-use-of-aotriton.patch
Normal file
|
|
@ -0,0 +1,46 @@
|
||||||
|
From 33d48f71db7530f00dbd8cff281b65aa8b355b2a Mon Sep 17 00:00:00 2001
|
||||||
|
From: Tom Rix <trix@redhat.com>
|
||||||
|
Date: Tue, 19 Mar 2024 11:32:37 -0400
|
||||||
|
Subject: [PATCH] disable use of aotriton
|
||||||
|
|
||||||
|
---
|
||||||
|
aten/src/ATen/native/transformers/cuda/sdp_utils.cpp | 6 ++++++
|
||||||
|
1 file changed, 6 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp b/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp
|
||||||
|
index 96b839820efd..2d3dd0cb4b0f 100644
|
||||||
|
--- a/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp
|
||||||
|
+++ b/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp
|
||||||
|
@@ -21,9 +21,11 @@
|
||||||
|
#include <cmath>
|
||||||
|
#include <functional>
|
||||||
|
|
||||||
|
+#ifdef USE_FLASH_ATTENTION
|
||||||
|
#if USE_ROCM
|
||||||
|
#include <aotriton/flash.h>
|
||||||
|
#endif
|
||||||
|
+#endif
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Note [SDPA Runtime Dispatch]
|
||||||
|
@@ -183,6 +185,7 @@ bool check_sm_version(cudaDeviceProp * dprops) {
|
||||||
|
}
|
||||||
|
|
||||||
|
bool check_flash_attention_hardware_support(sdp_params const& params, bool debug) {
|
||||||
|
+#ifdef USE_FLASH_ATTENTION
|
||||||
|
// Check that the gpu is capable of running flash attention
|
||||||
|
using sm80 = SMVersion<8, 0>;
|
||||||
|
using sm90 = SMVersion<9, 0>;
|
||||||
|
@@ -211,6 +214,9 @@ bool check_flash_attention_hardware_support(sdp_params const& params, bool debug
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return true;
|
||||||
|
+#else
|
||||||
|
+ return false;
|
||||||
|
+#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
bool check_mem_efficient_hardware_support(sdp_params const& params, bool debug) {
|
||||||
|
--
|
||||||
|
2.44.0
|
||||||
|
|
||||||
226
0001-dynamo-3.12-enable-dynamo-on-3.12-enable-most-dynamo.patch
Normal file
226
0001-dynamo-3.12-enable-dynamo-on-3.12-enable-most-dynamo.patch
Normal file
|
|
@ -0,0 +1,226 @@
|
||||||
|
From b9d45eb1cc90696a4de76676221219e24423c709 Mon Sep 17 00:00:00 2001
|
||||||
|
From: William Wen <williamwen@meta.com>
|
||||||
|
Date: Wed, 3 Apr 2024 17:58:46 -0700
|
||||||
|
Subject: [PATCH] [dynamo, 3.12] enable dynamo on 3.12, enable most dynamo
|
||||||
|
unittests on 3.12 (#123216)
|
||||||
|
|
||||||
|
Pull Request resolved: https://github.com/pytorch/pytorch/pull/123216
|
||||||
|
Approved by: https://github.com/jansel, https://github.com/malfet
|
||||||
|
---
|
||||||
|
test/dynamo/test_autograd_function.py | 3 ++
|
||||||
|
test/dynamo/test_misc.py | 63 +++++++++++++++++++++++++
|
||||||
|
test/functorch/test_eager_transforms.py | 7 ++-
|
||||||
|
test/run_test.py | 3 --
|
||||||
|
torch/__init__.py | 5 +-
|
||||||
|
torch/_dynamo/eval_frame.py | 4 +-
|
||||||
|
torch/_dynamo/test_case.py | 8 +---
|
||||||
|
7 files changed, 74 insertions(+), 19 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/test/dynamo/test_autograd_function.py b/test/dynamo/test_autograd_function.py
|
||||||
|
index d23fec607afa..bc5ebc767038 100644
|
||||||
|
--- a/test/dynamo/test_autograd_function.py
|
||||||
|
+++ b/test/dynamo/test_autograd_function.py
|
||||||
|
@@ -2,6 +2,8 @@
|
||||||
|
|
||||||
|
import copy
|
||||||
|
import math
|
||||||
|
+import sys
|
||||||
|
+import unittest
|
||||||
|
|
||||||
|
import torch
|
||||||
|
|
||||||
|
@@ -528,6 +530,7 @@ class AutogradFunctionTests(torch._dynamo.test_case.TestCase):
|
||||||
|
# I pulled all of these test cases from test_autograd.py
|
||||||
|
# In the future, we should make the Dynamo test suite actually
|
||||||
|
# run on test_autograd.py (it's disabled right now) and delete these.
|
||||||
|
+ @unittest.skipIf(sys.version_info >= (3, 12), "invalid free in 3.12+")
|
||||||
|
def test_smoke_from_test_autograd(self):
|
||||||
|
class Func(torch.autograd.Function):
|
||||||
|
@staticmethod
|
||||||
|
diff --git a/test/dynamo/test_misc.py b/test/dynamo/test_misc.py
|
||||||
|
index a73de8b1c7e9..8f54e0564e6b 100644
|
||||||
|
--- a/test/dynamo/test_misc.py
|
||||||
|
+++ b/test/dynamo/test_misc.py
|
||||||
|
@@ -9760,6 +9760,69 @@ fn
|
||||||
|
lambda mod: mod,
|
||||||
|
)
|
||||||
|
|
||||||
|
+ @xfailIfPy311
|
||||||
|
+ def test_outside_linear_module_free(self):
|
||||||
|
+ # Compared to test_linear_module_free, the linear
|
||||||
|
+ # layer is not the code object that is directly compiled.
|
||||||
|
+ def model_inp_ctr():
|
||||||
|
+ fc = torch.nn.Linear(100, 100)
|
||||||
|
+
|
||||||
|
+ class Mod(torch.nn.Module):
|
||||||
|
+ def __init__(self):
|
||||||
|
+ super().__init__()
|
||||||
|
+ self.fc_ref = fc
|
||||||
|
+
|
||||||
|
+ def forward(self, x):
|
||||||
|
+ return fc(x[0])
|
||||||
|
+
|
||||||
|
+ # return fc to keep it alive in _test_compile_model_free
|
||||||
|
+ return Mod(), (torch.randn(100, 100), fc)
|
||||||
|
+
|
||||||
|
+ self._test_compile_model_free(model_inp_ctr, lambda mod: mod.fc_ref)
|
||||||
|
+
|
||||||
|
+ @unittest.skipIf(sys.version_info >= (3, 12), "leaks in 3.12+")
|
||||||
|
+ def test_parameter_free(self):
|
||||||
|
+ def model_inp_ctr():
|
||||||
|
+ param = torch.nn.Parameter(torch.randn(100, 100))
|
||||||
|
+
|
||||||
|
+ class Mod(torch.nn.Module):
|
||||||
|
+ def __init__(self):
|
||||||
|
+ super().__init__()
|
||||||
|
+ self.param = param
|
||||||
|
+
|
||||||
|
+ def forward(self, x):
|
||||||
|
+ return self.param * x[0]
|
||||||
|
+
|
||||||
|
+ # return param to keep it alive in _test_compile_model_free
|
||||||
|
+ return Mod(), (torch.randn(100, 100), param)
|
||||||
|
+
|
||||||
|
+ self._test_compile_model_free(model_inp_ctr, lambda mod: mod.param)
|
||||||
|
+
|
||||||
|
+ def test_raises_importerror1(self):
|
||||||
|
+ @torch.compile(backend="eager")
|
||||||
|
+ def fn(x):
|
||||||
|
+ try:
|
||||||
|
+ import some_module_that_surely_does_not_exist
|
||||||
|
+
|
||||||
|
+ return
|
||||||
|
+ except ImportError:
|
||||||
|
+ pass
|
||||||
|
+ return x.sin()
|
||||||
|
+
|
||||||
|
+ x = torch.randn(8)
|
||||||
|
+ self.assertEqual(fn(x), x.sin())
|
||||||
|
+
|
||||||
|
+ def test_raises_importerror2(self):
|
||||||
|
+ @torch.compile(backend="eager")
|
||||||
|
+ def fn(x):
|
||||||
|
+ import some_module_that_surely_does_not_exist
|
||||||
|
+
|
||||||
|
+ return x + 1
|
||||||
|
+
|
||||||
|
+ x = torch.randn(8)
|
||||||
|
+ with self.assertRaises(ImportError):
|
||||||
|
+ fn(x)
|
||||||
|
+
|
||||||
|
def test_dynamo_cache_move_to_front(self):
|
||||||
|
class Mod(torch.nn.Module):
|
||||||
|
def __init__(self):
|
||||||
|
diff --git a/test/functorch/test_eager_transforms.py b/test/functorch/test_eager_transforms.py
|
||||||
|
index 09415cf8f48e..60790ec06059 100644
|
||||||
|
--- a/test/functorch/test_eager_transforms.py
|
||||||
|
+++ b/test/functorch/test_eager_transforms.py
|
||||||
|
@@ -4762,8 +4762,7 @@ class TestCompileTransforms(TestCase):
|
||||||
|
# Triton only supports GPU with SM70 or later.
|
||||||
|
@expectedFailureIf((IS_ARM64 and not IS_MACOS) or
|
||||||
|
IS_WINDOWS or
|
||||||
|
- (TEST_CUDA and not SM70OrLater) or
|
||||||
|
- (sys.version_info >= (3, 12)))
|
||||||
|
+ (TEST_CUDA and not SM70OrLater))
|
||||||
|
def test_compile_vmap_hessian(self, device):
|
||||||
|
# The model and inputs are a smaller version
|
||||||
|
# of code at benchmark repo:
|
||||||
|
@@ -4792,8 +4791,8 @@ class TestCompileTransforms(TestCase):
|
||||||
|
actual = opt_fn(params_and_buffers, x)
|
||||||
|
self.assertEqual(actual, expected)
|
||||||
|
|
||||||
|
- # torch.compile is not supported on Windows or on Python 3.12+
|
||||||
|
- @expectedFailureIf(IS_WINDOWS or (sys.version_info >= (3, 12)))
|
||||||
|
+ # torch.compile is not supported on Windows
|
||||||
|
+ @expectedFailureIf(IS_WINDOWS)
|
||||||
|
@torch._dynamo.config.patch(suppress_errors=False)
|
||||||
|
@torch._dynamo.config.patch(capture_func_transforms=True)
|
||||||
|
@skipIfTorchDynamo("Do not test torch.compile on top of torch.compile")
|
||||||
|
diff --git a/test/run_test.py b/test/run_test.py
|
||||||
|
index e86af9623042..ebb14df4167d 100755
|
||||||
|
--- a/test/run_test.py
|
||||||
|
+++ b/test/run_test.py
|
||||||
|
@@ -74,7 +74,6 @@ sys.path.remove(str(REPO_ROOT))
|
||||||
|
RERUN_DISABLED_TESTS = os.getenv("PYTORCH_TEST_RERUN_DISABLED_TESTS", "0") == "1"
|
||||||
|
DISTRIBUTED_TEST_PREFIX = "distributed"
|
||||||
|
INDUCTOR_TEST_PREFIX = "inductor"
|
||||||
|
-DYNAMO_TEST_PREFIX = "dynamo"
|
||||||
|
|
||||||
|
|
||||||
|
# Note [ROCm parallel CI testing]
|
||||||
|
@@ -324,7 +323,6 @@ JIT_EXECUTOR_TESTS = [
|
||||||
|
]
|
||||||
|
|
||||||
|
INDUCTOR_TESTS = [test for test in TESTS if test.startswith(INDUCTOR_TEST_PREFIX)]
|
||||||
|
-DYNAMO_TESTS = [test for test in TESTS if test.startswith(DYNAMO_TEST_PREFIX)]
|
||||||
|
DISTRIBUTED_TESTS = [test for test in TESTS if test.startswith(DISTRIBUTED_TEST_PREFIX)]
|
||||||
|
TORCH_EXPORT_TESTS = [test for test in TESTS if test.startswith("export")]
|
||||||
|
FUNCTORCH_TESTS = [test for test in TESTS if test.startswith("functorch")]
|
||||||
|
@@ -1361,7 +1359,6 @@ def get_selected_tests(options) -> List[str]:
|
||||||
|
# these tests failing in Python 3.12 temporarily disabling
|
||||||
|
if sys.version_info >= (3, 12):
|
||||||
|
options.exclude.extend(INDUCTOR_TESTS)
|
||||||
|
- options.exclude.extend(DYNAMO_TESTS)
|
||||||
|
options.exclude.extend(
|
||||||
|
[
|
||||||
|
"functorch/test_dims",
|
||||||
|
diff --git a/torch/__init__.py b/torch/__init__.py
|
||||||
|
index d381712b4a35..26cdffe81d29 100644
|
||||||
|
--- a/torch/__init__.py
|
||||||
|
+++ b/torch/__init__.py
|
||||||
|
@@ -1861,9 +1861,8 @@ def compile(model: Optional[Callable] = None, *,
|
||||||
|
|
||||||
|
"""
|
||||||
|
_C._log_api_usage_once("torch.compile")
|
||||||
|
- # Temporary until we get proper support for python 3.12
|
||||||
|
- if sys.version_info >= (3, 12):
|
||||||
|
- raise RuntimeError("Dynamo is not supported on Python 3.12+")
|
||||||
|
+ if sys.version_info >= (3, 13):
|
||||||
|
+ raise RuntimeError("Dynamo is not supported on Python 3.13+")
|
||||||
|
|
||||||
|
# Decorator mode
|
||||||
|
if model is None:
|
||||||
|
diff --git a/torch/_dynamo/eval_frame.py b/torch/_dynamo/eval_frame.py
|
||||||
|
index 53ab0df3a947..0a80eeea99ed 100644
|
||||||
|
--- a/torch/_dynamo/eval_frame.py
|
||||||
|
+++ b/torch/_dynamo/eval_frame.py
|
||||||
|
@@ -589,8 +589,8 @@ class _NullDecorator(contextlib.nullcontext): # type: ignore[type-arg]
|
||||||
|
|
||||||
|
|
||||||
|
def check_if_dynamo_supported():
|
||||||
|
- if sys.version_info >= (3, 12):
|
||||||
|
- raise RuntimeError("Python 3.12+ not yet supported for torch.compile")
|
||||||
|
+ if sys.version_info >= (3, 13):
|
||||||
|
+ raise RuntimeError("Python 3.13+ not yet supported for torch.compile")
|
||||||
|
|
||||||
|
|
||||||
|
def is_dynamo_supported():
|
||||||
|
diff --git a/torch/_dynamo/test_case.py b/torch/_dynamo/test_case.py
|
||||||
|
index e3cbef09eaae..297ea6e2bc2a 100644
|
||||||
|
--- a/torch/_dynamo/test_case.py
|
||||||
|
+++ b/torch/_dynamo/test_case.py
|
||||||
|
@@ -1,7 +1,6 @@
|
||||||
|
import contextlib
|
||||||
|
import importlib
|
||||||
|
import logging
|
||||||
|
-import sys
|
||||||
|
|
||||||
|
import torch
|
||||||
|
import torch.testing
|
||||||
|
@@ -20,12 +19,7 @@ log = logging.getLogger(__name__)
|
||||||
|
def run_tests(needs=()):
|
||||||
|
from torch.testing._internal.common_utils import run_tests
|
||||||
|
|
||||||
|
- if (
|
||||||
|
- TEST_WITH_TORCHDYNAMO
|
||||||
|
- or IS_WINDOWS
|
||||||
|
- or TEST_WITH_CROSSREF
|
||||||
|
- or sys.version_info >= (3, 12)
|
||||||
|
- ):
|
||||||
|
+ if TEST_WITH_TORCHDYNAMO or IS_WINDOWS or TEST_WITH_CROSSREF:
|
||||||
|
return # skip testing
|
||||||
|
|
||||||
|
if isinstance(needs, str):
|
||||||
|
--
|
||||||
|
2.44.0
|
||||||
|
|
||||||
54
0001-no-third_party-FXdiv.patch
Normal file
54
0001-no-third_party-FXdiv.patch
Normal file
|
|
@ -0,0 +1,54 @@
|
||||||
|
From b3b307add5724ee5730f161e16594fa702f34a19 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Tom Rix <trix@redhat.com>
|
||||||
|
Date: Sat, 3 Feb 2024 08:20:28 -0500
|
||||||
|
Subject: [PATCH] no third_party FXdiv
|
||||||
|
|
||||||
|
---
|
||||||
|
caffe2/CMakeLists.txt | 24 ++++++++++++------------
|
||||||
|
1 file changed, 12 insertions(+), 12 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
|
||||||
|
index b2f3adbfae..80a5625c8d 100644
|
||||||
|
--- a/caffe2/CMakeLists.txt
|
||||||
|
+++ b/caffe2/CMakeLists.txt
|
||||||
|
@@ -110,15 +110,15 @@ endif()
|
||||||
|
# Note: the folders that are being commented out have not been properly
|
||||||
|
# addressed yet.
|
||||||
|
|
||||||
|
-if(NOT MSVC AND USE_XNNPACK)
|
||||||
|
- if(NOT TARGET fxdiv)
|
||||||
|
- set(FXDIV_BUILD_TESTS OFF CACHE BOOL "")
|
||||||
|
- set(FXDIV_BUILD_BENCHMARKS OFF CACHE BOOL "")
|
||||||
|
- add_subdirectory(
|
||||||
|
- "${FXDIV_SOURCE_DIR}"
|
||||||
|
- "${CMAKE_BINARY_DIR}/FXdiv")
|
||||||
|
- endif()
|
||||||
|
-endif()
|
||||||
|
+#if(NOT MSVC AND USE_XNNPACK)
|
||||||
|
+# if(NOT TARGET fxdiv)
|
||||||
|
+# set(FXDIV_BUILD_TESTS OFF CACHE BOOL "")
|
||||||
|
+# set(FXDIV_BUILD_BENCHMARKS OFF CACHE BOOL "")
|
||||||
|
+# add_subdirectory(
|
||||||
|
+# "${FXDIV_SOURCE_DIR}"
|
||||||
|
+# "${CMAKE_BINARY_DIR}/FXdiv")
|
||||||
|
+# endif()
|
||||||
|
+#endif()
|
||||||
|
|
||||||
|
add_subdirectory(core)
|
||||||
|
add_subdirectory(serialize)
|
||||||
|
@@ -1081,9 +1081,9 @@ if(USE_XPU)
|
||||||
|
target_compile_definitions(torch_xpu PRIVATE USE_XPU)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
-if(NOT MSVC AND USE_XNNPACK)
|
||||||
|
- TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv)
|
||||||
|
-endif()
|
||||||
|
+#if(NOT MSVC AND USE_XNNPACK)
|
||||||
|
+# TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv)
|
||||||
|
+#endif()
|
||||||
|
|
||||||
|
# ==========================================================
|
||||||
|
# formerly-libtorch flags
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
65
0001-no-third_party-fmt.patch
Normal file
65
0001-no-third_party-fmt.patch
Normal file
|
|
@ -0,0 +1,65 @@
|
||||||
|
From 2ce255b75760a0a513fb1706629b416f76a5c822 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Tom Rix <trix@redhat.com>
|
||||||
|
Date: Sat, 3 Feb 2024 08:16:04 -0500
|
||||||
|
Subject: [PATCH] no third_party fmt
|
||||||
|
|
||||||
|
---
|
||||||
|
c10/CMakeLists.txt | 2 +-
|
||||||
|
cmake/Dependencies.cmake | 6 +++---
|
||||||
|
torch/CMakeLists.txt | 2 +-
|
||||||
|
3 files changed, 5 insertions(+), 5 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/c10/CMakeLists.txt b/c10/CMakeLists.txt
|
||||||
|
index 1f742f4c176..4fa08913bdd 100644
|
||||||
|
--- a/c10/CMakeLists.txt
|
||||||
|
+++ b/c10/CMakeLists.txt
|
||||||
|
@@ -87,7 +87,7 @@ endif()
|
||||||
|
if(C10_USE_GLOG)
|
||||||
|
target_link_libraries(c10 PUBLIC glog::glog)
|
||||||
|
endif()
|
||||||
|
-target_link_libraries(c10 PRIVATE fmt::fmt-header-only)
|
||||||
|
+target_link_libraries(c10 PRIVATE fmt)
|
||||||
|
|
||||||
|
if(C10_USE_NUMA)
|
||||||
|
message(STATUS "NUMA paths:")
|
||||||
|
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
|
||||||
|
index 6f5a2d5feff..42fbf80f6e8 100644
|
||||||
|
--- a/cmake/Dependencies.cmake
|
||||||
|
+++ b/cmake/Dependencies.cmake
|
||||||
|
@@ -1837,7 +1837,7 @@ endif()
|
||||||
|
#
|
||||||
|
set(TEMP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS})
|
||||||
|
set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libs" FORCE)
|
||||||
|
-add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)
|
||||||
|
+# add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)
|
||||||
|
|
||||||
|
# Disable compiler feature checks for `fmt`.
|
||||||
|
#
|
||||||
|
@@ -1846,9 +1846,9 @@ add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)
|
||||||
|
# CMAKE_CXX_FLAGS in ways that break feature checks. Since we already know
|
||||||
|
# `fmt` is compatible with a superset of the compilers that PyTorch is, it
|
||||||
|
# shouldn't be too bad to just disable the checks.
|
||||||
|
-set_target_properties(fmt-header-only PROPERTIES INTERFACE_COMPILE_FEATURES "")
|
||||||
|
+# set_target_properties(fmt-header-only PROPERTIES INTERFACE_COMPILE_FEATURES "")
|
||||||
|
|
||||||
|
-list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only)
|
||||||
|
+# list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only)
|
||||||
|
set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS} CACHE BOOL "Build shared libs" FORCE)
|
||||||
|
|
||||||
|
# ---[ Kineto
|
||||||
|
diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt
|
||||||
|
index 97a72eed55b..9e5014d1980 100644
|
||||||
|
--- a/torch/CMakeLists.txt
|
||||||
|
+++ b/torch/CMakeLists.txt
|
||||||
|
@@ -80,7 +80,7 @@ set(TORCH_PYTHON_LINK_LIBRARIES
|
||||||
|
python::python
|
||||||
|
pybind::pybind11
|
||||||
|
shm
|
||||||
|
- fmt::fmt-header-only
|
||||||
|
+ fmt
|
||||||
|
ATEN_CPU_FILES_GEN_LIB)
|
||||||
|
|
||||||
|
if(USE_ASAN AND TARGET Sanitizer::address)
|
||||||
|
--
|
||||||
|
2.43.2
|
||||||
|
|
||||||
36
0001-no-third_party-foxi.patch
Normal file
36
0001-no-third_party-foxi.patch
Normal file
|
|
@ -0,0 +1,36 @@
|
||||||
|
From 8cb61cf9282102ac225645fcc9fb4a1bb7cb15a2 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Tom Rix <trix@redhat.com>
|
||||||
|
Date: Sat, 3 Feb 2024 08:11:55 -0500
|
||||||
|
Subject: [PATCH] no third_party foxi
|
||||||
|
|
||||||
|
---
|
||||||
|
cmake/Dependencies.cmake | 6 +++---
|
||||||
|
1 file changed, 3 insertions(+), 3 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
|
||||||
|
index 5f91f3ffab..8e1461af81 100644
|
||||||
|
--- a/cmake/Dependencies.cmake
|
||||||
|
+++ b/cmake/Dependencies.cmake
|
||||||
|
@@ -1567,7 +1567,7 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX)
|
||||||
|
set_target_properties(onnx_proto PROPERTIES CXX_STANDARD 17)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
- add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/foxi EXCLUDE_FROM_ALL)
|
||||||
|
+ # add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/foxi EXCLUDE_FROM_ALL)
|
||||||
|
|
||||||
|
add_definitions(-DONNX_NAMESPACE=${ONNX_NAMESPACE})
|
||||||
|
if(NOT USE_SYSTEM_ONNX)
|
||||||
|
@@ -1600,8 +1600,8 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX)
|
||||||
|
message("-- Found onnx: ${ONNX_LIBRARY} ${ONNX_PROTO_LIBRARY}")
|
||||||
|
list(APPEND Caffe2_DEPENDENCY_LIBS onnx_proto onnx)
|
||||||
|
endif()
|
||||||
|
- include_directories(${FOXI_INCLUDE_DIRS})
|
||||||
|
- list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader)
|
||||||
|
+# include_directories(${FOXI_INCLUDE_DIRS})
|
||||||
|
+# list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader)
|
||||||
|
# Recover the build shared libs option.
|
||||||
|
set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS})
|
||||||
|
endif()
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
25
0001-reenable-foxi-linking.patch
Normal file
25
0001-reenable-foxi-linking.patch
Normal file
|
|
@ -0,0 +1,25 @@
|
||||||
|
From 58ccda271e8f51c3fa5b7518cf6ee52ce204fd37 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Tom Rix <trix@redhat.com>
|
||||||
|
Date: Thu, 22 Feb 2024 09:28:11 -0500
|
||||||
|
Subject: [PATCH] reenable foxi linking
|
||||||
|
|
||||||
|
---
|
||||||
|
cmake/Dependencies.cmake | 2 +-
|
||||||
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
|
||||||
|
index 42fbf80f6e8..bc3a2dc6fee 100644
|
||||||
|
--- a/cmake/Dependencies.cmake
|
||||||
|
+++ b/cmake/Dependencies.cmake
|
||||||
|
@@ -1604,7 +1604,7 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX)
|
||||||
|
list(APPEND Caffe2_DEPENDENCY_LIBS onnx_proto onnx)
|
||||||
|
endif()
|
||||||
|
# include_directories(${FOXI_INCLUDE_DIRS})
|
||||||
|
-# list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader)
|
||||||
|
+ list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader)
|
||||||
|
# Recover the build shared libs option.
|
||||||
|
set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS})
|
||||||
|
endif()
|
||||||
|
--
|
||||||
|
2.43.2
|
||||||
|
|
||||||
25
0001-silence-an-assert.patch
Normal file
25
0001-silence-an-assert.patch
Normal file
|
|
@ -0,0 +1,25 @@
|
||||||
|
From 04dd33db93b852fdfd7ea408813080b2e2026650 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Tom Rix <trix@redhat.com>
|
||||||
|
Date: Sat, 3 Feb 2024 06:41:20 -0500
|
||||||
|
Subject: [PATCH] silence an assert
|
||||||
|
|
||||||
|
---
|
||||||
|
aten/src/ATen/native/cuda/IndexKernel.cu | 2 +-
|
||||||
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/aten/src/ATen/native/cuda/IndexKernel.cu b/aten/src/ATen/native/cuda/IndexKernel.cu
|
||||||
|
index 657c0c77b3..b406aa6687 100644
|
||||||
|
--- a/aten/src/ATen/native/cuda/IndexKernel.cu
|
||||||
|
+++ b/aten/src/ATen/native/cuda/IndexKernel.cu
|
||||||
|
@@ -249,7 +249,7 @@ void index_put_kernel_quantized_cuda(TensorIterator& iter, const IntArrayRef ind
|
||||||
|
|
||||||
|
gpu_index_kernel(iter, index_size, index_stride, [inv_scale, zero_point, qmin, qmax]C10_DEVICE(char* const out_data, const char* const in_data, const int64_t offset) {
|
||||||
|
int64_t qvalue = static_cast<int64_t>(zero_point + nearbyintf(*(float*)in_data * inv_scale));
|
||||||
|
- qvalue = std::clamp(qvalue, qmin, qmax);
|
||||||
|
+ //qvalue = std::clamp(qvalue, qmin, qmax);
|
||||||
|
*(scalar_t*)(out_data + offset) = static_cast<scalar_t>(qvalue);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
34
0001-use-any-hip.patch
Normal file
34
0001-use-any-hip.patch
Normal file
|
|
@ -0,0 +1,34 @@
|
||||||
|
From 4248211ce9a9de81bb3ade5d421ba709b19ead08 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Tom Rix <trix@redhat.com>
|
||||||
|
Date: Sat, 3 Feb 2024 15:01:28 -0500
|
||||||
|
Subject: [PATCH] use any hip
|
||||||
|
|
||||||
|
---
|
||||||
|
cmake/public/LoadHIP.cmake | 4 ++--
|
||||||
|
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/cmake/public/LoadHIP.cmake b/cmake/public/LoadHIP.cmake
|
||||||
|
index 1abeb06228..28458c4146 100644
|
||||||
|
--- a/cmake/public/LoadHIP.cmake
|
||||||
|
+++ b/cmake/public/LoadHIP.cmake
|
||||||
|
@@ -30,7 +30,7 @@ endif()
|
||||||
|
message("Building PyTorch for GPU arch: ${PYTORCH_ROCM_ARCH}")
|
||||||
|
|
||||||
|
# Add HIP to the CMAKE Module Path
|
||||||
|
-set(CMAKE_MODULE_PATH ${ROCM_PATH}/lib/cmake/hip ${CMAKE_MODULE_PATH})
|
||||||
|
+set(CMAKE_MODULE_PATH ${ROCM_PATH}/lib64/cmake/hip ${CMAKE_MODULE_PATH})
|
||||||
|
|
||||||
|
macro(find_package_and_print_version PACKAGE_NAME)
|
||||||
|
find_package("${PACKAGE_NAME}" ${ARGN})
|
||||||
|
@@ -38,7 +38,7 @@ macro(find_package_and_print_version PACKAGE_NAME)
|
||||||
|
endmacro()
|
||||||
|
|
||||||
|
# Find the HIP Package
|
||||||
|
-find_package_and_print_version(HIP 1.0)
|
||||||
|
+find_package_and_print_version(HIP MODULE)
|
||||||
|
|
||||||
|
if(HIP_FOUND)
|
||||||
|
set(PYTORCH_FOUND_HIP TRUE)
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
|
|
@ -1,85 +0,0 @@
|
||||||
From fd535f7bf44f2034cca2a66b4cc7d68d962341df Mon Sep 17 00:00:00 2001
|
|
||||||
From: Tom Rix <Tom.Rix@amd.com>
|
|
||||||
Date: Sun, 20 Jul 2025 12:47:58 -0700
|
|
||||||
Subject: [PATCH] Use horrible dynamo stub
|
|
||||||
|
|
||||||
Rawhide's update of python is too fast for dynamo
|
|
||||||
So paper of the problem with a horrible stub that throws
|
|
||||||
runtime exceptions if dynamo is used.
|
|
||||||
|
|
||||||
Signed-off-by: Tom Rix <Tom.Rix@amd.com>
|
|
||||||
---
|
|
||||||
build_variables.bzl | 26 ++++++++++++----------
|
|
||||||
torch/csrc/dynamo/horrible_dynamo_stub.cpp | 16 +++++++++++++
|
|
||||||
2 files changed, 30 insertions(+), 12 deletions(-)
|
|
||||||
create mode 100644 torch/csrc/dynamo/horrible_dynamo_stub.cpp
|
|
||||||
|
|
||||||
diff --git a/build_variables.bzl b/build_variables.bzl
|
|
||||||
index b266c80e8843..a3be6893349b 100644
|
|
||||||
--- a/build_variables.bzl
|
|
||||||
+++ b/build_variables.bzl
|
|
||||||
@@ -140,7 +140,8 @@ core_trainer_sources = [
|
|
||||||
"torch/csrc/autograd/variable.cpp",
|
|
||||||
"torch/csrc/autograd/utils/warnings.cpp",
|
|
||||||
"torch/csrc/autograd/jit_decomp_interface.cpp",
|
|
||||||
- "torch/csrc/dynamo/compiled_autograd.cpp",
|
|
||||||
+# "torch/csrc/dynamo/compiled_autograd.cpp",
|
|
||||||
+ "torch/csrc/dynamo/horrible_dynamo_stub.cpp",
|
|
||||||
"torch/csrc/jit/frontend/name_mangler.cpp",
|
|
||||||
"torch/csrc/jit/ir/type_hashing.cpp",
|
|
||||||
"torch/csrc/jit/serialization/pickler.cpp",
|
|
||||||
@@ -868,17 +869,18 @@ libtorch_python_core_sources = [
|
|
||||||
"torch/csrc/autograd/python_torch_functions_manual.cpp",
|
|
||||||
"torch/csrc/autograd/python_variable.cpp",
|
|
||||||
"torch/csrc/autograd/python_variable_indexing.cpp",
|
|
||||||
- "torch/csrc/dynamo/python_compiled_autograd.cpp",
|
|
||||||
- "torch/csrc/dynamo/cache_entry.cpp",
|
|
||||||
- "torch/csrc/dynamo/cpp_shim.cpp",
|
|
||||||
- "torch/csrc/dynamo/cpython_defs.c",
|
|
||||||
- "torch/csrc/dynamo/eval_frame.c",
|
|
||||||
- "torch/csrc/dynamo/eval_frame_cpp.cpp",
|
|
||||||
- "torch/csrc/dynamo/extra_state.cpp",
|
|
||||||
- "torch/csrc/dynamo/framelocals_mapping.cpp",
|
|
||||||
- "torch/csrc/dynamo/guards.cpp",
|
|
||||||
- "torch/csrc/dynamo/utils.cpp",
|
|
||||||
- "torch/csrc/dynamo/init.cpp",
|
|
||||||
+# "torch/csrc/dynamo/python_compiled_autograd.cpp",
|
|
||||||
+# "torch/csrc/dynamo/cache_entry.cpp",
|
|
||||||
+# "torch/csrc/dynamo/cpp_shim.cpp",
|
|
||||||
+# "torch/csrc/dynamo/cpython_defs.c",
|
|
||||||
+# "torch/csrc/dynamo/eval_frame.c",
|
|
||||||
+# "torch/csrc/dynamo/eval_frame_cpp.cpp",
|
|
||||||
+# "torch/csrc/dynamo/extra_state.cpp",
|
|
||||||
+# "torch/csrc/dynamo/framelocals_mapping.cpp",
|
|
||||||
+# "torch/csrc/dynamo/guards.cpp",
|
|
||||||
+# "torch/csrc/dynamo/utils.cpp",
|
|
||||||
+# "torch/csrc/dynamo/init.cpp",
|
|
||||||
+ "torch/csrc/dynamo/horrible_dynamo_stub.cpp",
|
|
||||||
"torch/csrc/functorch/init.cpp",
|
|
||||||
"torch/csrc/fx/node.cpp",
|
|
||||||
"torch/csrc/mps/Module.cpp",
|
|
||||||
diff --git a/torch/csrc/dynamo/horrible_dynamo_stub.cpp b/torch/csrc/dynamo/horrible_dynamo_stub.cpp
|
|
||||||
new file mode 100644
|
|
||||||
index 000000000000..3ac1324d4557
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/torch/csrc/dynamo/horrible_dynamo_stub.cpp
|
|
||||||
@@ -0,0 +1,16 @@
|
|
||||||
+#include <torch/csrc/autograd/engine.h>
|
|
||||||
+#include <torch/csrc/dynamo/compiled_autograd.h>
|
|
||||||
+
|
|
||||||
+namespace torch::dynamo::autograd {
|
|
||||||
+const std::unique_ptr<PyCompilerInterface>& getPyCompilerInterface() {
|
|
||||||
+ throw std::runtime_error("Dynamo not supported");
|
|
||||||
+ return nullptr;
|
|
||||||
+}
|
|
||||||
+std::vector<std::optional<InputMetadata>> get_input_metadata(
|
|
||||||
+ const edge_list& edges) {
|
|
||||||
+ std::vector<std::optional<InputMetadata>> r;
|
|
||||||
+ throw std::runtime_error("Dynamo not supported");
|
|
||||||
+ return r;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+}
|
|
||||||
--
|
|
||||||
2.49.0
|
|
||||||
|
|
||||||
269
pyproject.toml
269
pyproject.toml
|
|
@ -1,165 +1,46 @@
|
||||||
# Package ######################################################################
|
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
requires = [
|
requires = [
|
||||||
# 70.1.0: min version for integrated bdist_wheel command from wheel package
|
"setuptools",
|
||||||
# 77.0.0: min version for SPDX expression support for project.license
|
|
||||||
"setuptools>=70.1.0,<80.0",
|
|
||||||
"cmake>=3.27",
|
|
||||||
"ninja",
|
|
||||||
"numpy",
|
|
||||||
"packaging",
|
|
||||||
"pyyaml",
|
|
||||||
"requests",
|
|
||||||
"six", # dependency chain: NNPACK -> PeachPy -> six
|
|
||||||
"typing-extensions>=4.10.0",
|
|
||||||
]
|
|
||||||
build-backend = "setuptools.build_meta"
|
|
||||||
|
|
||||||
[dependency-groups]
|
|
||||||
dev = [
|
|
||||||
# This list should be kept in sync with the requirements-build.txt
|
|
||||||
# in PyTorch root until the project fully migrates to pyproject.toml
|
|
||||||
# after which this can be removed as it is already specified in the
|
|
||||||
# [build-system] section
|
|
||||||
"setuptools>=70.1.0,<80.0", # setuptools develop deprecated on 80.0
|
|
||||||
"cmake>=3.27",
|
|
||||||
"ninja",
|
|
||||||
"numpy",
|
|
||||||
"packaging",
|
|
||||||
"pyyaml",
|
|
||||||
"requests",
|
|
||||||
"six", # dependency chain: NNPACK -> PeachPy -> six
|
|
||||||
"typing-extensions>=4.10.0",
|
|
||||||
|
|
||||||
# This list should be kept in sync with the requirements.txt in
|
|
||||||
# PyTorch root until the project fully migrates to pyproject.toml
|
|
||||||
"build[uv]",
|
|
||||||
"expecttest>=0.3.0",
|
|
||||||
"filelock",
|
|
||||||
"fsspec>=0.8.5",
|
|
||||||
"hypothesis",
|
|
||||||
"jinja2",
|
|
||||||
"lintrunner; platform_machine != 's390x' and platform_machine != 'riscv64'",
|
|
||||||
"networkx>=2.5.1",
|
|
||||||
"optree>=0.13.0",
|
|
||||||
"psutil",
|
|
||||||
"sympy>=1.13.3",
|
|
||||||
"typing-extensions>=4.13.2",
|
|
||||||
"wheel",
|
"wheel",
|
||||||
|
"astunparse",
|
||||||
|
"numpy",
|
||||||
|
"ninja",
|
||||||
|
"pyyaml",
|
||||||
|
"cmake",
|
||||||
|
"typing-extensions",
|
||||||
|
"requests",
|
||||||
]
|
]
|
||||||
|
# Use legacy backend to import local packages in setup.py
|
||||||
|
build-backend = "setuptools.build_meta:__legacy__"
|
||||||
|
|
||||||
[project]
|
|
||||||
name = "torch"
|
|
||||||
description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration"
|
|
||||||
readme = "README.md"
|
|
||||||
requires-python = ">=3.10"
|
|
||||||
# TODO: change to `license = "BSD-3-Clause"` and enable PEP 639 after pinning setuptools>=77
|
|
||||||
# FIXME: As of 2025.06.20, it is hard to ensure the minimum version of setuptools in our CI environment.
|
|
||||||
# TOML-table-based license deprecated in setuptools>=77, and the deprecation warning will be changed
|
|
||||||
# to an error on 2026.02.18. See also: https://github.com/pypa/setuptools/issues/4903
|
|
||||||
license = { text = "BSD-3-Clause" }
|
|
||||||
authors = [{ name = "PyTorch Team", email = "packages@pytorch.org" }]
|
|
||||||
keywords = ["pytorch", "machine learning"]
|
|
||||||
classifiers = [
|
|
||||||
"Development Status :: 5 - Production/Stable",
|
|
||||||
"Intended Audience :: Developers",
|
|
||||||
"Intended Audience :: Education",
|
|
||||||
"Intended Audience :: Science/Research",
|
|
||||||
"Topic :: Scientific/Engineering",
|
|
||||||
"Topic :: Scientific/Engineering :: Mathematics",
|
|
||||||
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
||||||
"Topic :: Software Development",
|
|
||||||
"Topic :: Software Development :: Libraries",
|
|
||||||
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
||||||
"Programming Language :: C++",
|
|
||||||
"Programming Language :: Python :: 3 :: Only",
|
|
||||||
"Programming Language :: Python :: 3.10",
|
|
||||||
"Programming Language :: Python :: 3.11",
|
|
||||||
"Programming Language :: Python :: 3.12",
|
|
||||||
"Programming Language :: Python :: 3.13",
|
|
||||||
"Programming Language :: Python :: 3.14",
|
|
||||||
]
|
|
||||||
dynamic = [
|
|
||||||
"entry-points",
|
|
||||||
"dependencies",
|
|
||||||
"scripts",
|
|
||||||
"version",
|
|
||||||
]
|
|
||||||
|
|
||||||
[project.urls]
|
[tool.black]
|
||||||
Homepage = "https://pytorch.org"
|
# Uncomment if pyproject.toml worked fine to ensure consistency with flake8
|
||||||
Repository = "https://github.com/pytorch/pytorch"
|
# line-length = 120
|
||||||
Documentation = "https://pytorch.org/docs"
|
target-version = ["py38", "py39", "py310", "py311"]
|
||||||
"Issue Tracker" = "https://github.com/pytorch/pytorch/issues"
|
|
||||||
Forum = "https://discuss.pytorch.org"
|
|
||||||
|
|
||||||
[project.optional-dependencies]
|
|
||||||
optree = ["optree>=0.13.0"]
|
|
||||||
opt-einsum = ["opt-einsum>=3.3"]
|
|
||||||
pyyaml = ["pyyaml"]
|
|
||||||
|
|
||||||
# Linter tools #################################################################
|
|
||||||
|
|
||||||
[tool.isort]
|
|
||||||
src_paths = ["caffe2", "torch", "torchgen", "functorch", "test"]
|
|
||||||
extra_standard_library = ["typing_extensions"]
|
|
||||||
skip_gitignore = true
|
|
||||||
skip_glob = ["third_party/*"]
|
|
||||||
atomic = true
|
|
||||||
profile = "black"
|
|
||||||
indent = 4
|
|
||||||
line_length = 88
|
|
||||||
lines_after_imports = 2
|
|
||||||
multi_line_output = 3
|
|
||||||
include_trailing_comma = true
|
|
||||||
combine_as_imports = true
|
|
||||||
|
|
||||||
[tool.usort.known]
|
|
||||||
first_party = ["caffe2", "torch", "torchgen", "functorch", "test"]
|
|
||||||
standard_library = ["typing_extensions"]
|
|
||||||
|
|
||||||
[tool.ruff]
|
[tool.ruff]
|
||||||
line-length = 88
|
target-version = "py38"
|
||||||
src = ["caffe2", "torch", "torchgen", "functorch", "test"]
|
|
||||||
|
|
||||||
[tool.ruff.format]
|
|
||||||
docstring-code-format = true
|
|
||||||
quote-style = "double"
|
|
||||||
|
|
||||||
[tool.ruff.lint]
|
|
||||||
# NOTE: Synchoronize the ignores with .flake8
|
# NOTE: Synchoronize the ignores with .flake8
|
||||||
external = [
|
|
||||||
"B001",
|
|
||||||
"B902",
|
|
||||||
"B950",
|
|
||||||
"E121",
|
|
||||||
"E122",
|
|
||||||
"E128",
|
|
||||||
"E131",
|
|
||||||
"E704",
|
|
||||||
"E723",
|
|
||||||
"F723",
|
|
||||||
"F812",
|
|
||||||
"P201",
|
|
||||||
"P204",
|
|
||||||
"T484",
|
|
||||||
"TOR901",
|
|
||||||
]
|
|
||||||
ignore = [
|
ignore = [
|
||||||
# these ignores are from flake8-bugbear; please fix!
|
# these ignores are from flake8-bugbear; please fix!
|
||||||
"B007", "B008", "B017",
|
"B007", "B008", "B017",
|
||||||
"B018", # Useless expression
|
"B018", # Useless expression
|
||||||
|
"B019",
|
||||||
"B023",
|
"B023",
|
||||||
"B028", # No explicit `stacklevel` keyword argument found
|
"B028", # No explicit `stacklevel` keyword argument found
|
||||||
|
"B904",
|
||||||
"E402",
|
"E402",
|
||||||
"C408", # C408 ignored because we like the dict keyword argument syntax
|
"C408", # C408 ignored because we like the dict keyword argument syntax
|
||||||
"E501", # E501 is not flexible enough, we're using B950 instead
|
"E501", # E501 is not flexible enough, we're using B950 instead
|
||||||
"E721",
|
"E721",
|
||||||
|
"E731", # Assign lambda expression
|
||||||
"E741",
|
"E741",
|
||||||
"EXE001",
|
"EXE001",
|
||||||
"F405",
|
"F405",
|
||||||
"FURB122", # writelines
|
"F841",
|
||||||
# these ignores are from flake8-logging-format; please fix!
|
# these ignores are from flake8-logging-format; please fix!
|
||||||
"G101",
|
"G101",
|
||||||
# these ignores are from ruff NPY; please fix!
|
# these ignores are from ruff NPY; please fix!
|
||||||
|
|
@ -167,49 +48,39 @@ ignore = [
|
||||||
# these ignores are from ruff PERF; please fix!
|
# these ignores are from ruff PERF; please fix!
|
||||||
"PERF203",
|
"PERF203",
|
||||||
"PERF401",
|
"PERF401",
|
||||||
|
"PERF403",
|
||||||
# these ignores are from PYI; please fix!
|
# these ignores are from PYI; please fix!
|
||||||
|
"PYI019",
|
||||||
"PYI024",
|
"PYI024",
|
||||||
"PYI036",
|
"PYI036",
|
||||||
"PYI041",
|
"PYI041",
|
||||||
"PYI056",
|
"PYI056",
|
||||||
"SIM102", "SIM103", "SIM112", # flake8-simplify code styles
|
"SIM102", "SIM103", "SIM112", # flake8-simplify code styles
|
||||||
"SIM105", # these ignores are from flake8-simplify. please fix or ignore with commented reason
|
"SIM105", # these ignores are from flake8-simplify. please fix or ignore with commented reason
|
||||||
"SIM108", # SIM108 ignored because we prefer if-else-block instead of ternary expression
|
"SIM108",
|
||||||
"SIM110",
|
"SIM110",
|
||||||
"SIM114", # Combine `if` branches using logical `or` operator
|
"SIM114", # Combine `if` branches using logical `or` operator
|
||||||
"SIM115",
|
"SIM115",
|
||||||
"SIM116", # Disable Use a dictionary instead of consecutive `if` statements
|
"SIM116", # Disable Use a dictionary instead of consecutive `if` statements
|
||||||
"SIM117",
|
"SIM117",
|
||||||
"SIM118",
|
"SIM118",
|
||||||
|
"UP006", # keep-runtime-typing
|
||||||
"UP007", # keep-runtime-typing
|
"UP007", # keep-runtime-typing
|
||||||
"UP045", # keep-runtime-typing
|
|
||||||
"TC006",
|
|
||||||
# TODO: Remove Python-3.10 specific suppressions
|
|
||||||
"B905",
|
|
||||||
"UP035",
|
|
||||||
"UP036",
|
|
||||||
"UP038",
|
|
||||||
"UP041",
|
|
||||||
"FURB161",
|
|
||||||
]
|
]
|
||||||
|
line-length = 120
|
||||||
select = [
|
select = [
|
||||||
"B",
|
"B",
|
||||||
"B904", # Re-raised error without specifying the cause via the from keyword
|
|
||||||
"C4",
|
"C4",
|
||||||
"G",
|
"G",
|
||||||
"E",
|
"E",
|
||||||
"EXE",
|
"EXE",
|
||||||
"F",
|
"F",
|
||||||
"SIM1",
|
"SIM1",
|
||||||
"SIM911",
|
|
||||||
"W",
|
"W",
|
||||||
# Not included in flake8
|
# Not included in flake8
|
||||||
"FURB",
|
|
||||||
"LOG",
|
|
||||||
"NPY",
|
"NPY",
|
||||||
"PERF",
|
"PERF",
|
||||||
"PGH004",
|
"PGH004",
|
||||||
"PIE790",
|
|
||||||
"PIE794",
|
"PIE794",
|
||||||
"PIE800",
|
"PIE800",
|
||||||
"PIE804",
|
"PIE804",
|
||||||
|
|
@ -218,96 +89,40 @@ select = [
|
||||||
"PLC0131", # type bivariance
|
"PLC0131", # type bivariance
|
||||||
"PLC0132", # type param mismatch
|
"PLC0132", # type param mismatch
|
||||||
"PLC0205", # string as __slots__
|
"PLC0205", # string as __slots__
|
||||||
"PLC3002", # unnecessary-direct-lambda-call
|
|
||||||
"PLE",
|
"PLE",
|
||||||
"PLR0133", # constant comparison
|
"PLR0133", # constant comparison
|
||||||
"PLR0206", # property with params
|
"PLR0206", # property with params
|
||||||
"PLR1722", # use sys exit
|
"PLR1722", # use sys exit
|
||||||
"PLR1736", # unnecessary list index
|
|
||||||
"PLW0129", # assert on string literal
|
"PLW0129", # assert on string literal
|
||||||
"PLW0131", # named expr without context
|
|
||||||
"PLW0133", # useless exception statement
|
|
||||||
"PLW0245", # super without brackets
|
|
||||||
"PLW0406", # import self
|
"PLW0406", # import self
|
||||||
"PLW0711", # binary op exception
|
"PLW0711", # binary op exception
|
||||||
"PLW1501", # bad open mode
|
|
||||||
"PLW1507", # shallow copy os.environ
|
|
||||||
"PLW1509", # preexec_fn not safe with threads
|
"PLW1509", # preexec_fn not safe with threads
|
||||||
"PLW2101", # useless lock statement
|
|
||||||
"PLW3301", # nested min max
|
"PLW3301", # nested min max
|
||||||
"PT006", # TODO: enable more PT rules
|
"PT006", # TODO: enable more PT rules
|
||||||
"PT014", # duplicate parameterize case
|
|
||||||
"PT022",
|
"PT022",
|
||||||
"PT023",
|
"PT023",
|
||||||
"PT024",
|
"PT024",
|
||||||
"PT025",
|
"PT025",
|
||||||
"PT026",
|
"PT026",
|
||||||
"PYI",
|
"PYI",
|
||||||
"Q003", # avoidable escaped quote
|
|
||||||
"Q004", # unnecessary escaped quote
|
|
||||||
"RSE",
|
|
||||||
"RUF008", # mutable dataclass default
|
"RUF008", # mutable dataclass default
|
||||||
"RUF013", # ban implicit optional
|
|
||||||
"RUF015", # access first ele in constant time
|
"RUF015", # access first ele in constant time
|
||||||
"RUF016", # type error non-integer index
|
"RUF016", # type error non-integer index
|
||||||
"RUF017",
|
"RUF017",
|
||||||
"RUF018", # no assignment in assert
|
"TRY200",
|
||||||
"RUF019", # unnecessary-key-check
|
"TRY302",
|
||||||
"RUF020", # never union
|
|
||||||
"RUF024", # from keys mutable
|
|
||||||
"RUF026", # default factory kwarg
|
|
||||||
"RUF030", # No print statement in assert
|
|
||||||
"RUF033", # default values __post_init__ dataclass
|
|
||||||
"RUF041", # simplify nested Literal
|
|
||||||
"RUF048", # properly parse `__version__`
|
|
||||||
"RUF200", # validate pyproject.toml
|
|
||||||
"S324", # for hashlib FIPS compliance
|
|
||||||
"SLOT",
|
|
||||||
"TC",
|
|
||||||
"TRY002", # ban vanilla raise (todo fix NOQAs)
|
|
||||||
"TRY203",
|
|
||||||
"TRY401", # verbose-log-message
|
|
||||||
"UP",
|
"UP",
|
||||||
"YTT",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
[tool.ruff.lint.pyupgrade]
|
[tool.ruff.per-file-ignores]
|
||||||
# Preserve types, even if a file imports `from __future__ import annotations`.
|
|
||||||
keep-runtime-typing = true
|
|
||||||
|
|
||||||
[tool.ruff.lint.per-file-ignores]
|
|
||||||
"__init__.py" = [
|
"__init__.py" = [
|
||||||
"F401",
|
"F401",
|
||||||
]
|
]
|
||||||
"*.pyi" = [
|
|
||||||
"PYI011", # typed-argument-default-in-stub
|
|
||||||
"PYI021", # docstring-in-stub
|
|
||||||
"PYI053", # string-or-bytes-too-long
|
|
||||||
]
|
|
||||||
"functorch/notebooks/**" = [
|
|
||||||
"F401",
|
|
||||||
]
|
|
||||||
"test/export/**" = [
|
|
||||||
"PGH004"
|
|
||||||
]
|
|
||||||
"test/typing/**" = [
|
|
||||||
"PGH004"
|
|
||||||
]
|
|
||||||
"test/typing/reveal/**" = [
|
"test/typing/reveal/**" = [
|
||||||
"F821",
|
"F821",
|
||||||
]
|
]
|
||||||
"test/torch_np/numpy_tests/**" = [
|
"test/torch_np/numpy_tests/**" = [
|
||||||
"F821",
|
"F821",
|
||||||
"NPY201",
|
|
||||||
]
|
|
||||||
"test/dynamo/test_bytecode_utils.py" = [
|
|
||||||
"F821",
|
|
||||||
]
|
|
||||||
"test/dynamo/test_debug_utils.py" = [
|
|
||||||
"UP037",
|
|
||||||
]
|
|
||||||
"test/dynamo/test_misc.py" = [
|
|
||||||
"PGH004",
|
|
||||||
]
|
]
|
||||||
"test/jit/**" = [
|
"test/jit/**" = [
|
||||||
"PLR0133", # tests require this for JIT
|
"PLR0133", # tests require this for JIT
|
||||||
|
|
@ -321,33 +136,19 @@ keep-runtime-typing = true
|
||||||
"RUF015",
|
"RUF015",
|
||||||
"UP", # We don't want to modify the jit test as they test specify syntax
|
"UP", # We don't want to modify the jit test as they test specify syntax
|
||||||
]
|
]
|
||||||
"test/inductor/s429861_repro.py" = [
|
|
||||||
"PGH004",
|
"torch/onnx/**" = [
|
||||||
]
|
"UP037", # ONNX does runtime type checking
|
||||||
"test/inductor/test_torchinductor.py" = [
|
|
||||||
"UP037",
|
|
||||||
]
|
|
||||||
# autogenerated #TODO figure out why file level noqa is ignored
|
|
||||||
"torch/_appdirs.py" = ["PGH004"]
|
|
||||||
"torch/jit/_shape_functions.py" = ["PGH004"]
|
|
||||||
"torch/_inductor/fx_passes/serialized_patterns/**" = ["F401", "F501"]
|
|
||||||
"torch/_inductor/autoheuristic/artifacts/**" = ["F401", "F501"]
|
|
||||||
"torch/_inductor/codegen/**" = [
|
|
||||||
"PGH004"
|
|
||||||
]
|
]
|
||||||
|
|
||||||
"torchgen/api/types/__init__.py" = [
|
"torchgen/api/types/__init__.py" = [
|
||||||
"F401",
|
"F401",
|
||||||
"F403",
|
"F403",
|
||||||
]
|
]
|
||||||
|
"torchgen/executorch/api/types/__init__.py" = [
|
||||||
|
"F401",
|
||||||
|
"F403",
|
||||||
|
]
|
||||||
"torch/utils/collect_env.py" = [
|
"torch/utils/collect_env.py" = [
|
||||||
"UP", # collect_env.py needs to work with older versions of Python
|
"UP", # collect_env.py needs to work with older versions of Python
|
||||||
]
|
]
|
||||||
"torch/_vendor/**" = [
|
|
||||||
"UP", # No need to mess with _vendor
|
|
||||||
]
|
|
||||||
"tools/linter/**" = [
|
|
||||||
"LOG015" # please fix
|
|
||||||
]
|
|
||||||
|
|
||||||
[tool.codespell]
|
|
||||||
ignore-words = "tools/linter/dictionary.txt"
|
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
30
sources
30
sources
|
|
@ -1,19 +1,19 @@
|
||||||
SHA512 (pytorch-v2.7.0.tar.gz) = 17e875a66f1669901f5f770c9d829ba5bfa3967296cfb71550e8a92507181db742548eaf7cc9a2c478c4b91e366f27cc480e2e1bbb328db8501d30e1649839e6
|
SHA512 (pytorch-v2.1.0.tar.gz) = 59421bf6cea6661d61ed66ab16526e3a07162e70e53381cbd5987042917610ec993d2f151fb086f0f98e5a396fe69e82bbc76f840bebffe4ebe7f50458c3aa44
|
||||||
|
SHA512 (pytorch-v2.1.2.tar.gz) = b7305407ad9dda877d277a0e7009f65f6d69f39370f2231b8bb8c6a9b711022d2129febdb00f5c83751b6664e01000fe2d30c5e5c13757de89fb8b2b99197a28
|
||||||
|
SHA512 (pytorch-975d428.tar.gz) = a02195b18d832db9a739c3eeecd0cd0c8868d8b92e4a2fca42e4bdd20735f0745d84573df28d9ae1db014cf79ffd005a8409b3e8bb92f9db2a446f784ef46ff4
|
||||||
SHA512 (v23.3.3.tar.gz) = 4066c94f2473c7ea16917d29a613e16f840a329089c88e0bdbdb999aef3442ba00abfd2aa92266fa9c067e399dc88e6f0ccac40dc151378857e665638e78bbf0
|
SHA512 (v23.3.3.tar.gz) = 4066c94f2473c7ea16917d29a613e16f840a329089c88e0bdbdb999aef3442ba00abfd2aa92266fa9c067e399dc88e6f0ccac40dc151378857e665638e78bbf0
|
||||||
SHA512 (v2.13.6.tar.gz) = 497c25b33b09a9c42f67131ab82e35d689e8ce089dd7639be997305ff9a6d502447b79c824508c455d559e61f0186335b54dd2771d903a7c1621833930622d1a
|
SHA512 (v2.11.1.tar.gz) = ed1512ff0bca3bc0a45edc2eb8c77f8286ab9389f6ff1d5cb309be24bc608abbe0df6a7f5cb18c8f80a3bfa509058547c13551c3cd6a759af708fd0cdcdd9e95
|
||||||
|
SHA512 (pytorch-6a89a75.tar.gz) = 6978acc6f37d7c5adc71517a6f379c7133b2bbd040189deddba7753acde41f6ddba2e9f2e397928e89c776d6a5458b8a74f8e04beb312d71fd30b072687ba98f
|
||||||
|
SHA512 (pytorch-74832f1.tar.gz) = bd553bfbbb422d353bbbf616c201251b2517b905e2621fa05bfe3d97726b078caad377583adccdc0cca234235a11fcb4730a93e834907b2ca4c06d552b2a2683
|
||||||
|
SHA512 (pytorch-4bb5cb5.tar.gz) = 430ae996ddee560537787646ae9f7aa01498f37c99c2e3fe4c5f66ee732ee3fe4ecf337fdf857bc0c7fe27634af75cee3ce576bbe2576463b81e27dbbfacf6ef
|
||||||
SHA512 (tensorpipe-52791a2.tar.gz) = 1e5faf17a7236c5506c08cb28be16069b11bb929bbca64ed9745ce4277d46739186ab7d6597da7437d90ed2d166d4c37ef2f3bceabe8083ef3adbb0e8e5f227e
|
SHA512 (tensorpipe-52791a2.tar.gz) = 1e5faf17a7236c5506c08cb28be16069b11bb929bbca64ed9745ce4277d46739186ab7d6597da7437d90ed2d166d4c37ef2f3bceabe8083ef3adbb0e8e5f227e
|
||||||
SHA512 (v1.41.0.tar.gz) = bb08a1970a10e8d9571ffea3d021643de30ec212cd51317b98d6cf0cfe55d6877992921fb01d1188a6d466687335b77885685d924f8cb7200a0bec30eee05c65
|
SHA512 (v1.41.0.tar.gz) = bb08a1970a10e8d9571ffea3d021643de30ec212cd51317b98d6cf0cfe55d6877992921fb01d1188a6d466687335b77885685d924f8cb7200a0bec30eee05c65
|
||||||
SHA512 (libnop-910b558.tar.gz) = 74c5324eaa1b6b2ac8dfef94c835b5c5b044625f8e5efe3522470b1ecc4798ff43d344a013cee2f6901e83267c6167072947b754e63f1552ae7044cffe234c36
|
SHA512 (libnop-910b558.tar.gz) = 74c5324eaa1b6b2ac8dfef94c835b5c5b044625f8e5efe3522470b1ecc4798ff43d344a013cee2f6901e83267c6167072947b754e63f1552ae7044cffe234c36
|
||||||
SHA512 (v1.14.2.tar.gz) = 97635bbaf6dd567c201451dfaf7815b2052fe50d9bccc97aade86cfa4a92651374d167296a5453031b2681dc302806a289bca011a9e79ddc381a17d6118971d7
|
SHA512 (pytorch-97ff6cf.tar.gz) = 105ebcba298558fe833f90e7e40b003d35a74609e777f9dc4c47f5668c884f603455113ac0ff252a62b83c81137ae66ceb1a862d351203925dcfc3dcf9f73580
|
||||||
SHA512 (cpp-httplib-3b6597b.tar.gz) = 8f1090658c498d04f14fec5c2f301847b1f3360bf92b18d82927643ee04ab61a6b274733a01c7850f9c030205120d674d1d961358d49fdd15636736fb8704f55
|
SHA512 (pytorch-v2.3.0.tar.gz) = 0c2ffc7bf2fd86070e9958c34eca1f03a0248a011ac6ffaeb69f65306ff856edd5359986f02af25888433187e6d7f29b60edded092e2ac30c8cec49023166eda
|
||||||
SHA512 (kineto-be13176.tar.gz) = 41a08c7da9eea7d12402f80a5550c9d4df79798719cc52b12a507828c8c896ba28a37c35d8adf809ca72589e1d84965d5ef6dd01f3f8dc1c803c5ed67b03a43a
|
SHA512 (pytorch-v2.3.1.tar.gz) = fe132251b2bae87b70ba3d95dc32f6a4545970d11893118b0ebe6ca129732e516ef4d6cc4f380b3db9bb2277d1db8ce78a401c40149bb1dfbab76eab9e3992c4
|
||||||
SHA512 (pytorch-a1cb3cc.tar.gz) = 92bf8b2c2ef0b459406b60169ecebdc50652c75943e3d6087e4d261f6e308dbad365529561e0f07ea3f0b71790efb68b5e4ab2f44e270462097208d924dc2d95
|
SHA512 (xnnpack-fcbf55a.tar.gz) = 8063e27686f7b71cfba05b0c004c46db4506638689ffb112f013b3886de58653b60ca5487978c3f96275c17bb1136883ca4c93ddb2241a2c31925a950cb51759
|
||||||
SHA512 (v24.12.23.tar.gz) = f97762ba41b9cfef648e93932fd789324c6bb6ebc5b7aeca8185c9ef602294b67d73aea7ae371035579a1419cbfbeba7c3e88b31b5a5848db98f5e8a03b982b1
|
SHA512 (FXdiv-63058ef.tar.gz) = da33eab4d006645f383a1f24fc3e747db3aeb0613219297ec0ae69aa2617f07ba050ebd6a64a8cbde6d25481f176d0ec3b9753a95d1fbcead2136595f3e50e97
|
||||||
SHA512 (kineto-5e75018.tar.gz) = 921b96a56e01d69895b79e67582d8977ed6f873573ab41557c5d026ada5d1f6365e4ed0a0c6804057c52e92510749fc58619f554a164c1ba9d8cd13e789bebd0
|
SHA512 (FP16-0a92994.tar.gz) = 3f094f242425ea37de274eb8539dc5f8ab0c13fd5325d14180ef12e9c04e6002a110d086c4c667f7c8054af337deab096d59482eb95cc8a632c3c412b48e89d1
|
||||||
SHA512 (pytorch-v2.8.0.tar.gz) = 791e658eab87fb957f025558cb9f925078d2426ab7b6f60771d9841dfb691f67d905ba1330a800008efe7c938b6c69bdc52232bccfe8d4860e795a532cd69d28
|
SHA512 (psimd-072586a.tar.gz) = a18faea093423dd9fe19ece8b228e011dccce0a2a22222f777ea19b023a13173966d4a8aea01147e8fc58de5d39cffcedeb2221a1572ae52bd5aba1295f86a94
|
||||||
SHA512 (v1.18.0.tar.gz) = 2f38664947c8d1efc40620a7c1b1953d2aa4b0a37b67c4886b86e77c1d697363c26413413ddda8eabc545892fb1bcb43afc7e93e62f0901527524a2727e1ea8d
|
SHA512 (cpuinfo-d6860c4.tar.gz) = 02dd70f0b95c2cb6d8af4e33072f63f5d72d2314796033ae68bb6b37cb7db18d43dd2cdfedafc896dec0614dbeec9ab507f765f3d958fbda8c0ab3e3a191a87c
|
||||||
SHA512 (pytorch-715dca6.tar.gz) = 09c9aae54fab3eb17901fc3226fece1c13f41cb8e45a2cb066021823abeb8d27c340993088e01d8e55bb37ed5f94334ec31e6c539cddfacbad157abd27c5e907
|
|
||||||
SHA512 (pytorch-fd36458.tar.gz) = acbb7475b92ad4a8e8d779f3745da22d8438e4c5ef2d6e76d71c987789f2752c8aef7022c87c9a74640fe4f9c1f1a61a3f12a796f63b1e6be24da8e5aacf37dc
|
|
||||||
SHA512 (pytorch-0fabc3b.tar.gz) = 2e87975de0bf6f3dcede168b379e1928712bca16170c2a8ee7d63459f53086c01baac05e0763e4d5d28cdaf1c7d8912225ee06adeff96ead4f6f456ee174b341
|
|
||||||
SHA512 (pytorch-v2.9.0.tar.gz) = ae989e3a7fe30f9ea90944dc25e21ca92f2a94ee40d8de974a168c292d82c16ee8920624eff91a85755469ad05473dce0f85893e3ed7794ec5c6bdd89cbd2023
|
|
||||||
SHA512 (pytorch-v2.9.1.tar.gz) = 88de0289fa2760abd69bef505b5ae3b6d7ff176b415cbb31bbc89ce5476a3800b322a97c4490f270f8b89657aff931bf9a5516202b268e0bb8b1f63dbb87b34a
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue