Merge branch 'rawhide' into f40

Update to 2.3.1

Signed-off-by: Tom Rix <trix@redhat.com>
This commit is contained in:
Tom Rix 2024-06-11 05:22:31 -06:00
commit e492aac499
32 changed files with 1954 additions and 1199 deletions

12
.gitignore vendored
View file

@ -1,2 +1,14 @@
/pytorch-v2.1.0.tar.gz
/pytorch-v2.1.2.tar.gz
/pytorch-975d428.tar.gz
/v23.3.3.tar.gz
/v2.11.1.tar.gz
/pytorch-6a89a75.tar.gz
/pytorch-74832f1.tar.gz
/pytorch-4bb5cb5.tar.gz
/tensorpipe-52791a2.tar.gz
/v1.41.0.tar.gz
/libnop-910b558.tar.gz
/pytorch-97ff6cf.tar.gz
/pytorch-v2.3.0.tar.gz
/pytorch-v2.3.1.tar.gz

View file

@ -0,0 +1,262 @@
From d77e05d90df006322cda021f1a8affdcc2c7eaef Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Fri, 23 Feb 2024 08:27:30 -0500
Subject: [PATCH] Optionally use hipblaslt
The hipblaslt package is not available on Fedora.
Instead of requiring the package, make it optional.
If it is found, define the preprocessor variable HIPBLASLT
Convert the checks for ROCM_VERSION >= 507000 to HIPBLASLT checks
Signed-off-by: Tom Rix <trix@redhat.com>
---
aten/src/ATen/cuda/CUDABlas.cpp | 7 ++++---
aten/src/ATen/cuda/CUDABlas.h | 2 +-
aten/src/ATen/cuda/CUDAContextLight.h | 4 ++--
aten/src/ATen/cuda/CublasHandlePool.cpp | 4 ++--
aten/src/ATen/cuda/tunable/TunableGemm.h | 6 +++---
aten/src/ATen/native/cuda/Blas.cpp | 14 ++++++++------
cmake/Dependencies.cmake | 3 +++
cmake/public/LoadHIP.cmake | 4 ++--
8 files changed, 25 insertions(+), 19 deletions(-)
diff --git a/aten/src/ATen/cuda/CUDABlas.cpp b/aten/src/ATen/cuda/CUDABlas.cpp
index d534ec5a178..e815463f630 100644
--- a/aten/src/ATen/cuda/CUDABlas.cpp
+++ b/aten/src/ATen/cuda/CUDABlas.cpp
@@ -14,7 +14,7 @@
#include <c10/util/irange.h>
#ifdef USE_ROCM
-#if ROCM_VERSION >= 60000
+#ifdef HIPBLASLT
#include <hipblaslt/hipblaslt-ext.hpp>
#endif
// until hipblas has an API to accept flags, we must use rocblas here
@@ -781,7 +781,7 @@ void gemm<at::BFloat16>(CUDABLAS_GEMM_ARGTYPES(at::BFloat16)) {
}
}
-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
#if defined(USE_ROCM) && ROCM_VERSION >= 50700 && ROCM_VERSION < 60000
// only for rocm 5.7 where we first supported hipblaslt, it was difficult
@@ -912,6 +912,7 @@ class CuBlasLtMatmulPreference : public CuBlasLtDescriptor<
};
} // namespace
+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
template <typename Dtype>
void gemm_and_bias(
bool transpose_mat1,
@@ -1124,7 +1125,7 @@ template void gemm_and_bias(
at::BFloat16* result_ptr,
int64_t result_ld,
GEMMAndBiasActivationEpilogue activation);
-
+#endif
void scaled_gemm(
char transa,
char transb,
diff --git a/aten/src/ATen/cuda/CUDABlas.h b/aten/src/ATen/cuda/CUDABlas.h
index eb12bb350c5..068607467dd 100644
--- a/aten/src/ATen/cuda/CUDABlas.h
+++ b/aten/src/ATen/cuda/CUDABlas.h
@@ -82,7 +82,7 @@ void gemm_internal<at::Half>(CUDABLAS_GEMM_ARGTYPES(at::Half));
template <>
void gemm_internal<at::BFloat16>(CUDABLAS_GEMM_ARGTYPES(at::BFloat16));
-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
enum GEMMAndBiasActivationEpilogue {
None,
RELU,
diff --git a/aten/src/ATen/cuda/CUDAContextLight.h b/aten/src/ATen/cuda/CUDAContextLight.h
index 4ec35f59a21..e28dc42034f 100644
--- a/aten/src/ATen/cuda/CUDAContextLight.h
+++ b/aten/src/ATen/cuda/CUDAContextLight.h
@@ -9,7 +9,7 @@
// cublasLT was introduced in CUDA 10.1 but we enable only for 11.1 that also
// added bf16 support
-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
#include <cublasLt.h>
#endif
@@ -82,7 +82,7 @@ TORCH_CUDA_CPP_API c10::Allocator* getCUDADeviceAllocator();
/* Handles */
TORCH_CUDA_CPP_API cusparseHandle_t getCurrentCUDASparseHandle();
TORCH_CUDA_CPP_API cublasHandle_t getCurrentCUDABlasHandle();
-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
TORCH_CUDA_CPP_API cublasLtHandle_t getCurrentCUDABlasLtHandle();
#endif
diff --git a/aten/src/ATen/cuda/CublasHandlePool.cpp b/aten/src/ATen/cuda/CublasHandlePool.cpp
index 6913d2cd95e..3d4276be372 100644
--- a/aten/src/ATen/cuda/CublasHandlePool.cpp
+++ b/aten/src/ATen/cuda/CublasHandlePool.cpp
@@ -29,7 +29,7 @@ namespace at::cuda {
namespace {
-#if defined(USE_ROCM) && ROCM_VERSION >= 50700
+#if defined(USE_ROCM) && defined(HIPBLASLT)
void createCublasLtHandle(cublasLtHandle_t *handle) {
TORCH_CUDABLAS_CHECK(cublasLtCreate(handle));
}
@@ -190,7 +190,7 @@ cublasHandle_t getCurrentCUDABlasHandle() {
return handle;
}
-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
cublasLtHandle_t getCurrentCUDABlasLtHandle() {
#ifdef USE_ROCM
c10::DeviceIndex device = 0;
diff --git a/aten/src/ATen/cuda/tunable/TunableGemm.h b/aten/src/ATen/cuda/tunable/TunableGemm.h
index 3ba0d761277..dde1870cfbf 100644
--- a/aten/src/ATen/cuda/tunable/TunableGemm.h
+++ b/aten/src/ATen/cuda/tunable/TunableGemm.h
@@ -11,7 +11,7 @@
#include <ATen/cuda/tunable/GemmCommon.h>
#ifdef USE_ROCM
-#if ROCM_VERSION >= 50700
+#ifdef HIPBLASLT
#include <ATen/cuda/tunable/GemmHipblaslt.h>
#endif
#include <ATen/cuda/tunable/GemmRocblas.h>
@@ -166,7 +166,7 @@ class GemmTunableOp : public TunableOp<GemmParams<T>, StreamTimer> {
}
#endif
-#if defined(USE_ROCM) && ROCM_VERSION >= 50700
+#if defined(USE_ROCM) && defined(HIPBLASLT)
static const char *env = std::getenv("PYTORCH_TUNABLEOP_HIPBLASLT_ENABLED");
if (env == nullptr || strcmp(env, "1") == 0) {
// disallow tuning of hipblaslt with c10::complex
@@ -240,7 +240,7 @@ class GemmStridedBatchedTunableOp : public TunableOp<GemmStridedBatchedParams<T>
}
#endif
-#if defined(USE_ROCM) && ROCM_VERSION >= 50700
+#if defined(USE_ROCM) && defined(HIPBLASLT)
static const char *env = std::getenv("PYTORCH_TUNABLEOP_HIPBLASLT_ENABLED");
if (env == nullptr || strcmp(env, "1") == 0) {
// disallow tuning of hipblaslt with c10::complex
diff --git a/aten/src/ATen/native/cuda/Blas.cpp b/aten/src/ATen/native/cuda/Blas.cpp
index 29e5c5e3cf1..df56f3d7f1d 100644
--- a/aten/src/ATen/native/cuda/Blas.cpp
+++ b/aten/src/ATen/native/cuda/Blas.cpp
@@ -155,7 +155,7 @@ enum class Activation {
GELU,
};
-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
cuda::blas::GEMMAndBiasActivationEpilogue activation_to_gemm_and_blas_arg(Activation a) {
switch (a) {
case Activation::None:
@@ -193,6 +193,7 @@ static bool getDisableAddmmCudaLt() {
#ifdef USE_ROCM
static bool isSupportedHipLtROCmArch(int index) {
+#if defined(HIPBLASLT)
hipDeviceProp_t* prop = at::cuda::getDeviceProperties(index);
std::string device_arch = prop->gcnArchName;
static const std::vector<std::string> archs = {"gfx90a", "gfx940", "gfx941", "gfx942"};
@@ -203,6 +204,7 @@ static bool isSupportedHipLtROCmArch(int index) {
}
}
TORCH_CHECK(false, "Attempting to use hipBLASLt on a unsupported architecture!");
+#endif
return false;
}
#endif
@@ -228,7 +230,7 @@ Tensor& addmm_out_cuda_impl(Tensor& result, const Tensor& self, const Tensor& ma
at::ScalarType scalar_type = self.scalar_type();
c10::MaybeOwned<Tensor> self_;
if (&result != &self) {
-#if (defined(CUDA_VERSION) && CUDA_VERSION >= 11040 && !defined(_MSC_VER)) || defined(USE_ROCM) && ROCM_VERSION >= 50700
+#if (defined(CUDA_VERSION) && CUDA_VERSION >= 11040 && !defined(_MSC_VER)) || defined(USE_ROCM) && defined(HIPBLASLT)
// Strangely, if mat2 has only 1 row or column, we get
// CUBLAS_STATUS_INVALID_VALUE error from cublasLtMatmulAlgoGetHeuristic.
// self.dim() == 1 && result.dim() == 2 && self.sizes()[0] == mat2_sizes[1]
@@ -271,7 +273,7 @@ Tensor& addmm_out_cuda_impl(Tensor& result, const Tensor& self, const Tensor& ma
}
self__sizes = self_->sizes();
} else {
-#if defined(USE_ROCM) && ROCM_VERSION >= 50700
+#if defined(USE_ROCM) && defined(HIPBLASLT)
useLtInterface = !disable_addmm_cuda_lt &&
result.dim() == 2 && result.is_contiguous() &&
isSupportedHipLtROCmArch(self.device().index()) &&
@@ -322,7 +324,7 @@ Tensor& addmm_out_cuda_impl(Tensor& result, const Tensor& self, const Tensor& ma
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(!args.result->is_conj());
-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
if (useLtInterface) {
AT_DISPATCH_FLOATING_TYPES_AND2(
at::ScalarType::Half,
@@ -876,7 +878,7 @@ _scaled_mm_out_cuda(const Tensor& mat1, const Tensor& mat2,
at::native::resize_output(out, {mat1_sizes[0], mat2_sizes[1]});
at::native::resize_output(amax, {});
-#if !defined(USE_ROCM) && !defined(_MSC_VER) || (defined(USE_ROCM) && ROCM_VERSION >= 60000)
+#if !defined(USE_ROCM) && !defined(_MSC_VER) || (defined(USE_ROCM) && defined(HIPBLASLT))
cublasCommonArgs args(mat1, mat2, out);
const auto out_dtype_ = args.result->scalar_type();
TORCH_CHECK(args.transa == 't' && args.transb == 'n', "Only multiplication of row-major and column-major matrices is supported by cuBLASLt");
@@ -906,7 +908,7 @@ _scaled_mm_out_cuda(const Tensor& mat1, const Tensor& mat2,
TORCH_CHECK(false, "_scaled_mm_out_cuda is not compiled for this platform.");
#endif
-#if defined(USE_ROCM) && ROCM_VERSION >= 60000
+#if defined(USE_ROCM) && defined(HIPBLASLT)
// rocm's hipblaslt does not yet support amax, so calculate separately
auto out_float32 = out.to(kFloat);
out_float32.abs_();
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index b7ffbeb07dc..2b6c3678984 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -1273,6 +1273,9 @@ if(USE_ROCM)
if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "6.0.0")
list(APPEND HIP_CXX_FLAGS -DHIPBLAS_V2)
endif()
+ if(hipblast_FOUND)
+ list(APPEND HIP_CXX_FLAGS -DHIPBLASLT)
+ endif()
if(HIPBLASLT_CUSTOM_DATA_TYPE)
list(APPEND HIP_CXX_FLAGS -DHIPBLASLT_CUSTOM_DATA_TYPE)
endif()
diff --git a/cmake/public/LoadHIP.cmake b/cmake/public/LoadHIP.cmake
index f6ca263c5e5..53eb0b63c1a 100644
--- a/cmake/public/LoadHIP.cmake
+++ b/cmake/public/LoadHIP.cmake
@@ -156,7 +156,7 @@ if(HIP_FOUND)
find_package_and_print_version(rocblas REQUIRED)
find_package_and_print_version(hipblas REQUIRED)
if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "5.7.0")
- find_package_and_print_version(hipblaslt REQUIRED)
+ find_package_and_print_version(hipblaslt)
endif()
find_package_and_print_version(miopen REQUIRED)
if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "4.1.0")
@@ -191,7 +191,7 @@ if(HIP_FOUND)
# roctx is part of roctracer
find_library(ROCM_ROCTX_LIB roctx64 HINTS ${ROCM_PATH}/lib)
- if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "5.7.0")
+ if(hipblastlt_FOUND)
# check whether hipblaslt is using its own datatype
set(file "${PROJECT_BINARY_DIR}/hipblaslt_test_data_type.cc")
file(WRITE ${file} ""
--
2.43.2

View file

@ -1,169 +0,0 @@
From 24cf0294a67d89ad70367940eea872162b44482c Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Sat, 23 Sep 2023 10:18:52 -0700
Subject: [PATCH] Prepare pytorch cmake for fedora
Use the system fmt
Remove foxi use
Remove warnings/errors for clang 17
fxdiv is not a library
build type is RelWithDebInfo
use system pthreadpool
Signed-off-by: Tom Rix <trix@redhat.com>
---
CMakeLists.txt | 6 +++---
.../native/quantized/cpu/qnnpack/CMakeLists.txt | 3 ---
c10/CMakeLists.txt | 2 +-
caffe2/CMakeLists.txt | 6 +-----
cmake/Dependencies.cmake | 16 +---------------
test/cpp/tensorexpr/CMakeLists.txt | 2 +-
torch/CMakeLists.txt | 2 +-
7 files changed, 8 insertions(+), 29 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3a48eaf4e2..902ee70fd1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -682,7 +682,7 @@ set(CAFFE2_ALLOWLIST "" CACHE STRING "A allowlist file of files that one should
# Set default build type
if(NOT CMAKE_BUILD_TYPE)
message(STATUS "Build type not set - defaulting to Release")
- set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build from: Debug Release RelWithDebInfo MinSizeRel Coverage." FORCE)
+ set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING "Choose the type of build from: Debug Release RelWithDebInfo MinSizeRel Coverage." FORCE)
endif()
# The below means we are cross compiling for arm64 or x86_64 on MacOSX
@@ -917,8 +917,8 @@ if(NOT MSVC)
string(APPEND CMAKE_LINKER_FLAGS_DEBUG " -fno-omit-frame-pointer -O0")
append_cxx_flag_if_supported("-fno-math-errno" CMAKE_CXX_FLAGS)
append_cxx_flag_if_supported("-fno-trapping-math" CMAKE_CXX_FLAGS)
- append_cxx_flag_if_supported("-Werror=format" CMAKE_CXX_FLAGS)
- append_cxx_flag_if_supported("-Werror=cast-function-type" CMAKE_CXX_FLAGS)
+# append_cxx_flag_if_supported("-Werror=format" CMAKE_CXX_FLAGS)
+# append_cxx_flag_if_supported("-Werror=cast-function-type" CMAKE_CXX_FLAGS)
else()
# skip unwanted includes from windows.h
add_compile_definitions(WIN32_LEAN_AND_MEAN)
diff --git a/aten/src/ATen/native/quantized/cpu/qnnpack/CMakeLists.txt b/aten/src/ATen/native/quantized/cpu/qnnpack/CMakeLists.txt
index fd6b7ff551..218c8e9b2a 100644
--- a/aten/src/ATen/native/quantized/cpu/qnnpack/CMakeLists.txt
+++ b/aten/src/ATen/native/quantized/cpu/qnnpack/CMakeLists.txt
@@ -393,10 +393,7 @@ elseif(NOT TARGET fxdiv AND USE_SYSTEM_FXDIV)
if(NOT FXDIV_HDR)
message(FATAL_ERROR "Cannot find fxdiv")
endif()
- add_library(fxdiv STATIC "${FXDIV_HDR}")
- set_property(TARGET fxdiv PROPERTY LINKER_LANGUAGE C)
endif()
-target_link_libraries(pytorch_qnnpack PRIVATE fxdiv)
# ---[ Configure psimd
if(NOT TARGET psimd AND NOT USE_SYSTEM_PSIMD)
diff --git a/c10/CMakeLists.txt b/c10/CMakeLists.txt
index feebad7cbb..7c029cd88d 100644
--- a/c10/CMakeLists.txt
+++ b/c10/CMakeLists.txt
@@ -87,7 +87,7 @@ endif()
if(${USE_GLOG})
target_link_libraries(c10 PUBLIC glog::glog)
endif()
-target_link_libraries(c10 PRIVATE fmt::fmt-header-only)
+target_link_libraries(c10 PRIVATE fmt)
find_package(Backtrace)
if(Backtrace_FOUND)
diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
index 74d0d55719..b975d388a7 100644
--- a/caffe2/CMakeLists.txt
+++ b/caffe2/CMakeLists.txt
@@ -107,7 +107,7 @@ endif()
# Note: the folders that are being commented out have not been properly
# addressed yet.
-if(NOT MSVC AND USE_XNNPACK)
+if(NOT MSVC AND USE_XNNPACK AND NOT USE_SYSTEM_FXDIV)
if(NOT TARGET fxdiv)
set(FXDIV_BUILD_TESTS OFF CACHE BOOL "")
set(FXDIV_BUILD_BENCHMARKS OFF CACHE BOOL "")
@@ -1022,10 +1022,6 @@ elseif(USE_CUDA)
endif()
endif()
-if(NOT MSVC AND USE_XNNPACK)
- TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv)
-endif()
-
# ==========================================================
# formerly-libtorch flags
# ==========================================================
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index c3abce52e4..21b40f3a88 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -1555,7 +1555,6 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX)
set_target_properties(onnx_proto PROPERTIES CXX_STANDARD 17)
endif()
endif()
- add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/foxi EXCLUDE_FROM_ALL)
add_definitions(-DONNX_NAMESPACE=${ONNX_NAMESPACE})
if(NOT USE_SYSTEM_ONNX)
@@ -1588,8 +1587,6 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX)
message("-- Found onnx: ${ONNX_LIBRARY} ${ONNX_PROTO_LIBRARY}")
list(APPEND Caffe2_DEPENDENCY_LIBS onnx_proto onnx)
endif()
- include_directories(${FOXI_INCLUDE_DIRS})
- list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader)
# Recover the build shared libs option.
set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS})
endif()
@@ -1834,18 +1831,7 @@ endif()
#
set(TEMP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS})
set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libs" FORCE)
-add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)
-
-# Disable compiler feature checks for `fmt`.
-#
-# CMake compiles a little program to check compiler features. Some of our build
-# configurations (notably the mobile build analyzer) will populate
-# CMAKE_CXX_FLAGS in ways that break feature checks. Since we already know
-# `fmt` is compatible with a superset of the compilers that PyTorch is, it
-# shouldn't be too bad to just disable the checks.
-set_target_properties(fmt-header-only PROPERTIES INTERFACE_COMPILE_FEATURES "")
-
-list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only)
+list(APPEND Caffe2_DEPENDENCY_LIBS fmt)
set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS} CACHE BOOL "Build shared libs" FORCE)
# ---[ Kineto
diff --git a/test/cpp/tensorexpr/CMakeLists.txt b/test/cpp/tensorexpr/CMakeLists.txt
index 7dff70630d..90b1003591 100644
--- a/test/cpp/tensorexpr/CMakeLists.txt
+++ b/test/cpp/tensorexpr/CMakeLists.txt
@@ -54,7 +54,7 @@ target_include_directories(tutorial_tensorexpr PRIVATE ${ATen_CPU_INCLUDE})
# pthreadpool header. For some build environment we need add the dependency
# explicitly.
if(USE_PTHREADPOOL)
- target_link_libraries(test_tensorexpr PRIVATE pthreadpool_interface)
+ target_link_libraries(test_tensorexpr PRIVATE pthreadpool)
endif()
if(USE_CUDA)
target_link_libraries(test_tensorexpr PRIVATE
diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt
index 62ee4c12a9..8d5375f320 100644
--- a/torch/CMakeLists.txt
+++ b/torch/CMakeLists.txt
@@ -84,7 +84,7 @@ set(TORCH_PYTHON_LINK_LIBRARIES
python::python
pybind::pybind11
shm
- fmt::fmt-header-only
+ fmt
ATEN_CPU_FILES_GEN_LIB)
if(USE_ASAN AND TARGET Sanitizer::address)
--
2.42.1

View file

@ -0,0 +1,115 @@
From ee3fb343a376cdba6f4ce188cac90023f13e2aea Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Thu, 4 Apr 2024 14:21:38 -0600
Subject: [PATCH] Reenable dim for python 3.12
In 3.12:
_PyArg_Parser added an element to the start of the structure.
So existing positional initialization is off. Switch to element
initialization.
_Py_CODEUNIT changed to from an int to a union, but relevant_op
is passed an int for the return of decoder.opcode, so the parameter
type is wrong, switch it to int.
The opcode PRECALL was removed, so reduce its handling to 3.11
Signed-off-by: Tom Rix <trix@redhat.com>
---
functorch/csrc/dim/dim.cpp | 24 +++++-------------------
functorch/csrc/dim/minpybind.h | 4 ++--
2 files changed, 7 insertions(+), 21 deletions(-)
diff --git a/functorch/csrc/dim/dim.cpp b/functorch/csrc/dim/dim.cpp
index 4cc027504c77..e48b0d58081f 100644
--- a/functorch/csrc/dim/dim.cpp
+++ b/functorch/csrc/dim/dim.cpp
@@ -6,20 +6,6 @@
#include <torch/csrc/utils/python_compat.h>
-
-// Many APIs have changed/don't exist anymore
-#if IS_PYTHON_3_12_PLUS
-
-#include "dim.h"
-
-// Re-enable this some day
-PyObject* Dim_init() {
- PyErr_SetString(PyExc_RuntimeError, "First class dim doesn't work with python 3.12");
- return nullptr;
-}
-
-#else
-
#include "minpybind.h"
#include <frameobject.h>
#include <opcode.h>
@@ -441,7 +427,7 @@ static PyObject* DimList_bind(DimList *self,
PY_BEGIN
mpy::handle sizes;
static const char * const _keywords[] = {"sizes", nullptr};
- static _PyArg_Parser parser = {"O", _keywords, 0};
+ static _PyArg_Parser parser = { .format = "O", .keywords = _keywords};
if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &parser, &sizes)) {
return nullptr;
}
@@ -465,7 +451,7 @@ static PyObject* DimList_bind_len(DimList *self,
PY_BEGIN
int size;
static const char * const _keywords[] = {"N", nullptr};
- static _PyArg_Parser parser = {"i", _keywords, 0};
+ static _PyArg_Parser parser = { .format = "i", .keywords = _keywords};
if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &parser, &size)) {
return nullptr;
}
@@ -1468,7 +1454,7 @@ PyTypeObject Tensor::Type = {
// dim() --------------------
-static bool relevant_op(_Py_CODEUNIT c) {
+static bool relevant_op(int c) {
switch(c) {
case STORE_NAME:
case STORE_GLOBAL:
@@ -1587,7 +1573,7 @@ static PyObject* _dims(PyObject *self,
auto c = mpy::obj<PyCodeObject>::steal(PyFrame_GetCode(f.ptr()));
auto lasti = PyFrame_GetLasti(f.ptr());
auto decoder = PyInstDecoder(c.ptr(), lasti);
- #if IS_PYTHON_3_11_PLUS
+ #if IS_PYTHON_3_11
// When py3.11 adapts bytecode lasti points to the precall
// rather than the call instruction after it
if (decoder.opcode() == PRECALL) {
@@ -3268,4 +3254,4 @@ PyObject* Dim_init() {
}
}
-#endif
+
diff --git a/functorch/csrc/dim/minpybind.h b/functorch/csrc/dim/minpybind.h
index de82b5af95a4..d76d4828bf80 100644
--- a/functorch/csrc/dim/minpybind.h
+++ b/functorch/csrc/dim/minpybind.h
@@ -621,7 +621,7 @@ struct vector_args {
PyObject *dummy = NULL;
_PyArg_ParseStackAndKeywords((PyObject*const*)args, nargs, kwnames.ptr(), _parser, &dummy, &dummy, &dummy, &dummy, &dummy);
#else
- _PyArg_Parser* _parser = new _PyArg_Parser{NULL, &names_buf[0], fname_cstr, 0};
+ _PyArg_Parser* _parser = new _PyArg_Parser{ .keywords = &names_buf[0], .fname = fname_cstr};
std::unique_ptr<PyObject*[]> buf(new PyObject*[names.size()]);
_PyArg_UnpackKeywords((PyObject*const*)args, nargs, NULL, kwnames.ptr(), _parser, required, (Py_ssize_t)values.size() - kwonly, 0, &buf[0]);
#endif
@@ -706,7 +706,7 @@ inline object handle::call_vector(vector_args args) {
#define MPY_PARSE_ARGS_KWNAMES(fmt, FORALL_ARGS) \
static const char * const kwlist[] = { FORALL_ARGS(MPY_ARGS_NAME) nullptr}; \
FORALL_ARGS(MPY_ARGS_DECLARE) \
- static _PyArg_Parser parser = {fmt, kwlist, 0}; \
+ static _PyArg_Parser parser = { .format = fmt, .keywords = kwlist}; \
if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &parser, FORALL_ARGS(MPY_ARGS_POINTER) nullptr)) { \
throw mpy::exception_set(); \
}
--
2.44.0

View file

@ -1,36 +0,0 @@
From 1d35a0b1f5cb39fd0c44a486157dc739a02c71b6 Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Wed, 20 Dec 2023 11:23:18 -0500
Subject: [PATCH] add rocm_version fallback
Signed-off-by: Tom Rix <trix@redhat.com>
---
torch/utils/hipify/cuda_to_hip_mappings.py | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/torch/utils/hipify/cuda_to_hip_mappings.py b/torch/utils/hipify/cuda_to_hip_mappings.py
index 73586440e7..9354057a39 100644
--- a/torch/utils/hipify/cuda_to_hip_mappings.py
+++ b/torch/utils/hipify/cuda_to_hip_mappings.py
@@ -57,6 +57,18 @@ if os.path.isfile(rocm_version_h):
if match:
patch = int(match.group(1))
rocm_version = (major, minor, patch)
+else:
+ try:
+ hip_version = subprocess.check_output(["hipconfig", "--version"]).decode("utf-8")
+ hip_split = hip_version.split('.')
+ rocm_version = (int(hip_split[0]), int(hip_split[1]), 0)
+ except subprocess.CalledProcessError:
+ print(f"Warning: hipconfig --version failed")
+ except (FileNotFoundError, PermissionError, NotADirectoryError):
+ # Do not print warning. This is okay. This file can also be imported for non-ROCm builds.
+ pass
+
+
# List of math functions that should be replaced inside device code only.
MATH_TRANSPILATIONS = collections.OrderedDict(
--
2.43.0

View file

@ -1,28 +0,0 @@
From 06499575b177a218846f0e43ff4bc77d245f207f Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Fri, 1 Dec 2023 09:38:05 -0500
Subject: [PATCH] disable as-needed for libtorch
Signed-off-by: Tom Rix <trix@redhat.com>
---
caffe2/CMakeLists.txt | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
index b975d388a7..5e9fd3b3f3 100644
--- a/caffe2/CMakeLists.txt
+++ b/caffe2/CMakeLists.txt
@@ -914,6 +914,10 @@ if(HAVE_SOVERSION)
VERSION ${TORCH_VERSION} SOVERSION ${TORCH_SOVERSION})
endif()
+# Disable global as-needed
+set_target_properties(torch PROPERTIES LINK_FLAGS -Wl,--no-as-needed)
+
+
if(USE_ROCM)
filter_list(__caffe2_hip_srcs_cpp Caffe2_HIP_SRCS "\\.(cu|hip)$")
set_source_files_properties(${__caffe2_hip_srcs_cpp} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
--
2.42.1

View file

@ -0,0 +1,46 @@
From 33d48f71db7530f00dbd8cff281b65aa8b355b2a Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Tue, 19 Mar 2024 11:32:37 -0400
Subject: [PATCH] disable use of aotriton
---
aten/src/ATen/native/transformers/cuda/sdp_utils.cpp | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp b/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp
index 96b839820efd..2d3dd0cb4b0f 100644
--- a/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp
+++ b/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp
@@ -21,9 +21,11 @@
#include <cmath>
#include <functional>
+#ifdef USE_FLASH_ATTENTION
#if USE_ROCM
#include <aotriton/flash.h>
#endif
+#endif
/**
* Note [SDPA Runtime Dispatch]
@@ -183,6 +185,7 @@ bool check_sm_version(cudaDeviceProp * dprops) {
}
bool check_flash_attention_hardware_support(sdp_params const& params, bool debug) {
+#ifdef USE_FLASH_ATTENTION
// Check that the gpu is capable of running flash attention
using sm80 = SMVersion<8, 0>;
using sm90 = SMVersion<9, 0>;
@@ -211,6 +214,9 @@ bool check_flash_attention_hardware_support(sdp_params const& params, bool debug
}
#endif
return true;
+#else
+ return false;
+#endif
}
bool check_mem_efficient_hardware_support(sdp_params const& params, bool debug) {
--
2.44.0

View file

@ -0,0 +1,226 @@
From b9d45eb1cc90696a4de76676221219e24423c709 Mon Sep 17 00:00:00 2001
From: William Wen <williamwen@meta.com>
Date: Wed, 3 Apr 2024 17:58:46 -0700
Subject: [PATCH] [dynamo, 3.12] enable dynamo on 3.12, enable most dynamo
unittests on 3.12 (#123216)
Pull Request resolved: https://github.com/pytorch/pytorch/pull/123216
Approved by: https://github.com/jansel, https://github.com/malfet
---
test/dynamo/test_autograd_function.py | 3 ++
test/dynamo/test_misc.py | 63 +++++++++++++++++++++++++
test/functorch/test_eager_transforms.py | 7 ++-
test/run_test.py | 3 --
torch/__init__.py | 5 +-
torch/_dynamo/eval_frame.py | 4 +-
torch/_dynamo/test_case.py | 8 +---
7 files changed, 74 insertions(+), 19 deletions(-)
diff --git a/test/dynamo/test_autograd_function.py b/test/dynamo/test_autograd_function.py
index d23fec607afa..bc5ebc767038 100644
--- a/test/dynamo/test_autograd_function.py
+++ b/test/dynamo/test_autograd_function.py
@@ -2,6 +2,8 @@
import copy
import math
+import sys
+import unittest
import torch
@@ -528,6 +530,7 @@ class AutogradFunctionTests(torch._dynamo.test_case.TestCase):
# I pulled all of these test cases from test_autograd.py
# In the future, we should make the Dynamo test suite actually
# run on test_autograd.py (it's disabled right now) and delete these.
+ @unittest.skipIf(sys.version_info >= (3, 12), "invalid free in 3.12+")
def test_smoke_from_test_autograd(self):
class Func(torch.autograd.Function):
@staticmethod
diff --git a/test/dynamo/test_misc.py b/test/dynamo/test_misc.py
index a73de8b1c7e9..8f54e0564e6b 100644
--- a/test/dynamo/test_misc.py
+++ b/test/dynamo/test_misc.py
@@ -9760,6 +9760,69 @@ fn
lambda mod: mod,
)
+ @xfailIfPy311
+ def test_outside_linear_module_free(self):
+ # Compared to test_linear_module_free, the linear
+ # layer is not the code object that is directly compiled.
+ def model_inp_ctr():
+ fc = torch.nn.Linear(100, 100)
+
+ class Mod(torch.nn.Module):
+ def __init__(self):
+ super().__init__()
+ self.fc_ref = fc
+
+ def forward(self, x):
+ return fc(x[0])
+
+ # return fc to keep it alive in _test_compile_model_free
+ return Mod(), (torch.randn(100, 100), fc)
+
+ self._test_compile_model_free(model_inp_ctr, lambda mod: mod.fc_ref)
+
+ @unittest.skipIf(sys.version_info >= (3, 12), "leaks in 3.12+")
+ def test_parameter_free(self):
+ def model_inp_ctr():
+ param = torch.nn.Parameter(torch.randn(100, 100))
+
+ class Mod(torch.nn.Module):
+ def __init__(self):
+ super().__init__()
+ self.param = param
+
+ def forward(self, x):
+ return self.param * x[0]
+
+ # return param to keep it alive in _test_compile_model_free
+ return Mod(), (torch.randn(100, 100), param)
+
+ self._test_compile_model_free(model_inp_ctr, lambda mod: mod.param)
+
+ def test_raises_importerror1(self):
+ @torch.compile(backend="eager")
+ def fn(x):
+ try:
+ import some_module_that_surely_does_not_exist
+
+ return
+ except ImportError:
+ pass
+ return x.sin()
+
+ x = torch.randn(8)
+ self.assertEqual(fn(x), x.sin())
+
+ def test_raises_importerror2(self):
+ @torch.compile(backend="eager")
+ def fn(x):
+ import some_module_that_surely_does_not_exist
+
+ return x + 1
+
+ x = torch.randn(8)
+ with self.assertRaises(ImportError):
+ fn(x)
+
def test_dynamo_cache_move_to_front(self):
class Mod(torch.nn.Module):
def __init__(self):
diff --git a/test/functorch/test_eager_transforms.py b/test/functorch/test_eager_transforms.py
index 09415cf8f48e..60790ec06059 100644
--- a/test/functorch/test_eager_transforms.py
+++ b/test/functorch/test_eager_transforms.py
@@ -4762,8 +4762,7 @@ class TestCompileTransforms(TestCase):
# Triton only supports GPU with SM70 or later.
@expectedFailureIf((IS_ARM64 and not IS_MACOS) or
IS_WINDOWS or
- (TEST_CUDA and not SM70OrLater) or
- (sys.version_info >= (3, 12)))
+ (TEST_CUDA and not SM70OrLater))
def test_compile_vmap_hessian(self, device):
# The model and inputs are a smaller version
# of code at benchmark repo:
@@ -4792,8 +4791,8 @@ class TestCompileTransforms(TestCase):
actual = opt_fn(params_and_buffers, x)
self.assertEqual(actual, expected)
- # torch.compile is not supported on Windows or on Python 3.12+
- @expectedFailureIf(IS_WINDOWS or (sys.version_info >= (3, 12)))
+ # torch.compile is not supported on Windows
+ @expectedFailureIf(IS_WINDOWS)
@torch._dynamo.config.patch(suppress_errors=False)
@torch._dynamo.config.patch(capture_func_transforms=True)
@skipIfTorchDynamo("Do not test torch.compile on top of torch.compile")
diff --git a/test/run_test.py b/test/run_test.py
index e86af9623042..ebb14df4167d 100755
--- a/test/run_test.py
+++ b/test/run_test.py
@@ -74,7 +74,6 @@ sys.path.remove(str(REPO_ROOT))
RERUN_DISABLED_TESTS = os.getenv("PYTORCH_TEST_RERUN_DISABLED_TESTS", "0") == "1"
DISTRIBUTED_TEST_PREFIX = "distributed"
INDUCTOR_TEST_PREFIX = "inductor"
-DYNAMO_TEST_PREFIX = "dynamo"
# Note [ROCm parallel CI testing]
@@ -324,7 +323,6 @@ JIT_EXECUTOR_TESTS = [
]
INDUCTOR_TESTS = [test for test in TESTS if test.startswith(INDUCTOR_TEST_PREFIX)]
-DYNAMO_TESTS = [test for test in TESTS if test.startswith(DYNAMO_TEST_PREFIX)]
DISTRIBUTED_TESTS = [test for test in TESTS if test.startswith(DISTRIBUTED_TEST_PREFIX)]
TORCH_EXPORT_TESTS = [test for test in TESTS if test.startswith("export")]
FUNCTORCH_TESTS = [test for test in TESTS if test.startswith("functorch")]
@@ -1361,7 +1359,6 @@ def get_selected_tests(options) -> List[str]:
# these tests failing in Python 3.12 temporarily disabling
if sys.version_info >= (3, 12):
options.exclude.extend(INDUCTOR_TESTS)
- options.exclude.extend(DYNAMO_TESTS)
options.exclude.extend(
[
"functorch/test_dims",
diff --git a/torch/__init__.py b/torch/__init__.py
index d381712b4a35..26cdffe81d29 100644
--- a/torch/__init__.py
+++ b/torch/__init__.py
@@ -1861,9 +1861,8 @@ def compile(model: Optional[Callable] = None, *,
"""
_C._log_api_usage_once("torch.compile")
- # Temporary until we get proper support for python 3.12
- if sys.version_info >= (3, 12):
- raise RuntimeError("Dynamo is not supported on Python 3.12+")
+ if sys.version_info >= (3, 13):
+ raise RuntimeError("Dynamo is not supported on Python 3.13+")
# Decorator mode
if model is None:
diff --git a/torch/_dynamo/eval_frame.py b/torch/_dynamo/eval_frame.py
index 53ab0df3a947..0a80eeea99ed 100644
--- a/torch/_dynamo/eval_frame.py
+++ b/torch/_dynamo/eval_frame.py
@@ -589,8 +589,8 @@ class _NullDecorator(contextlib.nullcontext): # type: ignore[type-arg]
def check_if_dynamo_supported():
- if sys.version_info >= (3, 12):
- raise RuntimeError("Python 3.12+ not yet supported for torch.compile")
+ if sys.version_info >= (3, 13):
+ raise RuntimeError("Python 3.13+ not yet supported for torch.compile")
def is_dynamo_supported():
diff --git a/torch/_dynamo/test_case.py b/torch/_dynamo/test_case.py
index e3cbef09eaae..297ea6e2bc2a 100644
--- a/torch/_dynamo/test_case.py
+++ b/torch/_dynamo/test_case.py
@@ -1,7 +1,6 @@
import contextlib
import importlib
import logging
-import sys
import torch
import torch.testing
@@ -20,12 +19,7 @@ log = logging.getLogger(__name__)
def run_tests(needs=()):
from torch.testing._internal.common_utils import run_tests
- if (
- TEST_WITH_TORCHDYNAMO
- or IS_WINDOWS
- or TEST_WITH_CROSSREF
- or sys.version_info >= (3, 12)
- ):
+ if TEST_WITH_TORCHDYNAMO or IS_WINDOWS or TEST_WITH_CROSSREF:
return # skip testing
if isinstance(needs, str):
--
2.44.0

View file

@ -1,4 +1,4 @@
From c46146dc31ed3dc0ebb6ca28c01330db8ba5d4f2 Mon Sep 17 00:00:00 2001
From 2ce255b75760a0a513fb1706629b416f76a5c822 Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Sat, 3 Feb 2024 08:16:04 -0500
Subject: [PATCH] no third_party fmt
@ -10,23 +10,23 @@ Subject: [PATCH] no third_party fmt
3 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/c10/CMakeLists.txt b/c10/CMakeLists.txt
index cb81556ff2..7529b2aec9 100644
index 1f742f4c176..4fa08913bdd 100644
--- a/c10/CMakeLists.txt
+++ b/c10/CMakeLists.txt
@@ -87,7 +87,7 @@ endif()
if(C10_USE_GLOG)
target_link_libraries(c10 PUBLIC glog::glog)
target_link_libraries(c10 PUBLIC glog::glog)
endif()
-target_link_libraries(c10 PRIVATE fmt::fmt-header-only)
+target_link_libraries(c10 PRIVATE fmt)
if(C10_USE_NUMA)
target_include_directories(c10 PRIVATE ${Numa_INCLUDE_DIR})
message(STATUS "NUMA paths:")
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index 8310f29e01..c99d0d762a 100644
index 6f5a2d5feff..42fbf80f6e8 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -1834,7 +1834,7 @@ endif()
@@ -1837,7 +1837,7 @@ endif()
#
set(TEMP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS})
set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libs" FORCE)
@ -35,7 +35,7 @@ index 8310f29e01..c99d0d762a 100644
# Disable compiler feature checks for `fmt`.
#
@@ -1843,9 +1843,9 @@ add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)
@@ -1846,9 +1846,9 @@ add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)
# CMAKE_CXX_FLAGS in ways that break feature checks. Since we already know
# `fmt` is compatible with a superset of the compilers that PyTorch is, it
# shouldn't be too bad to just disable the checks.
@ -48,7 +48,7 @@ index 8310f29e01..c99d0d762a 100644
# ---[ Kineto
diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt
index 24903a207e..3a7751dc00 100644
index 97a72eed55b..9e5014d1980 100644
--- a/torch/CMakeLists.txt
+++ b/torch/CMakeLists.txt
@@ -80,7 +80,7 @@ set(TORCH_PYTHON_LINK_LIBRARIES
@ -61,5 +61,5 @@ index 24903a207e..3a7751dc00 100644
if(USE_ASAN AND TARGET Sanitizer::address)
--
2.43.0
2.43.2

View file

@ -1,27 +0,0 @@
From cef92207b79ad53e3fcc1b0e22ba91cb9422968c Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Sat, 18 Nov 2023 09:38:52 -0500
Subject: [PATCH] python-torch link with python
Signed-off-by: Tom Rix <trix@redhat.com>
---
torch/CMakeLists.txt | 3 +++
1 file changed, 3 insertions(+)
diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt
index 8d5375f320..6f8c7b65c4 100644
--- a/torch/CMakeLists.txt
+++ b/torch/CMakeLists.txt
@@ -312,6 +312,9 @@ add_dependencies(torch_python torch_python_stubs)
add_dependencies(torch_python flatbuffers)
+# Unresolved syms in -lpython
+target_link_libraries(torch_python PUBLIC ${PYTHON_LIBRARIES})
+
if(USE_PRECOMPILED_HEADERS)
target_precompile_headers(torch_python PRIVATE
"$<$<COMPILE_LANGUAGE:CXX>:ATen/ATen.h>")
--
2.42.1

View file

@ -1,33 +0,0 @@
From f70ef37d0b3c780fd17be199e66a81ffa679f93e Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Sat, 18 Nov 2023 12:05:43 -0500
Subject: [PATCH] python-torch remove ubuntu specific linking
Signed-off-by: Tom Rix <trix@redhat.com>
---
CMakeLists.txt | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 86c34984b2..f7c4a7b05f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -479,9 +479,12 @@ option(BUILD_EXECUTORCH "Master flag to build Executorch" ON)
# This is a fix for a rare build issue on Ubuntu:
# symbol lookup error: miniconda3/envs/pytorch-py3.7/lib/libmkl_intel_lp64.so: undefined symbol: mkl_blas_dsyrk
# https://software.intel.com/en-us/articles/symbol-lookup-error-when-linking-intel-mkl-with-gcc-on-ubuntu
-if(LINUX)
- set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-as-needed")
-endif()
+
+# This is not ubuntu!
+# if(LINUX)
+# set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-as-needed")
+# endif()
+set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--as-needed")
if(MSVC)
set(CMAKE_NINJA_CMCLDEPS_RC OFF)
--
2.42.1

View file

@ -1,26 +0,0 @@
From 527d1ce24a06a14788ca5fc2411985d7c1cb2923 Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Fri, 13 Oct 2023 05:35:19 -0700
Subject: [PATCH] pytorch use SO version by default
Signed-off-by: Tom Rix <trix@redhat.com>
---
CMakeLists.txt | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 902ee70fd1..86c34984b2 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -340,7 +340,7 @@ option(USE_TBB "Use TBB (Deprecated)" OFF)
cmake_dependent_option(
USE_SYSTEM_TBB "Use system-provided Intel TBB." OFF "USE_TBB" OFF)
option(ONNX_ML "Enable traditional ONNX ML API." ON)
-option(HAVE_SOVERSION "Whether to add SOVERSION to the shared objects" OFF)
+option(HAVE_SOVERSION "Whether to add SOVERSION to the shared objects" ON)
option(BUILD_LIBTORCH_CPU_WITH_DEBUG "Enable RelWithDebInfo for libtorch_cpu target only" OFF)
cmake_dependent_option(USE_CCACHE "Attempt using CCache to wrap the compilation" ON "UNIX" OFF)
option(WERROR "Build with -Werror supported by the compiler" OFF)
--
2.42.1

View file

@ -0,0 +1,25 @@
From 58ccda271e8f51c3fa5b7518cf6ee52ce204fd37 Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Thu, 22 Feb 2024 09:28:11 -0500
Subject: [PATCH] reenable foxi linking
---
cmake/Dependencies.cmake | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index 42fbf80f6e8..bc3a2dc6fee 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -1604,7 +1604,7 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX)
list(APPEND Caffe2_DEPENDENCY_LIBS onnx_proto onnx)
endif()
# include_directories(${FOXI_INCLUDE_DIRS})
-# list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader)
+ list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader)
# Recover the build shared libs option.
set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS})
endif()
--
2.43.2

View file

@ -1,32 +0,0 @@
From c47c6e202d60ccac15aa36698bd4788415a9416b Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Sat, 25 Nov 2023 16:46:17 -0500
Subject: [PATCH] torch sane version
---
tools/generate_torch_version.py | 1 +
version.txt | 2 +-
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/tools/generate_torch_version.py b/tools/generate_torch_version.py
index d90d3646ab..11d5bbeba5 100644
--- a/tools/generate_torch_version.py
+++ b/tools/generate_torch_version.py
@@ -42,6 +42,7 @@ def get_tag(pytorch_root: Union[str, Path]) -> str:
def get_torch_version(sha: Optional[str] = None) -> str:
pytorch_root = Path(__file__).parent.parent
version = open(pytorch_root / "version.txt").read().strip()
+ return version
if os.getenv("PYTORCH_BUILD_VERSION"):
assert os.getenv("PYTORCH_BUILD_NUMBER") is not None
diff --git a/version.txt b/version.txt
index ecaf4eea7c..7ec1d6db40 100644
--- a/version.txt
+++ b/version.txt
@@ -1,1 +1,1 @@
-2.1.0a0
+2.1.2
--
2.42.1

View file

@ -1,39 +0,0 @@
From 587a8b10bd3f7a68275356ee6eb6bb43ed711ba2 Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Fri, 29 Sep 2023 06:19:29 -0700
Subject: [PATCH 2/6] Regenerate flatbuffer header
For this error
torch/csrc/jit/serialization/mobile_bytecode_generated.h:12:41:
error: static assertion failed: Non-compatible flatbuffers version included
12 | FLATBUFFERS_VERSION_MINOR == 3 &&
PyTorch is expecting 23.3.3, what f38 has
Rawhide is at 23.5.26
Regenerate with
flatc --cpp --gen-mutable --no-prefix --scoped-enums mobile_bytecode.fbs
Signed-off-by: Tom Rix <trix@redhat.com>
---
torch/csrc/jit/serialization/mobile_bytecode_generated.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/torch/csrc/jit/serialization/mobile_bytecode_generated.h b/torch/csrc/jit/serialization/mobile_bytecode_generated.h
index cffe8bc7a6..83575e4c19 100644
--- a/torch/csrc/jit/serialization/mobile_bytecode_generated.h
+++ b/torch/csrc/jit/serialization/mobile_bytecode_generated.h
@@ -9,8 +9,8 @@
// Ensure the included flatbuffers.h is the same version as when this file was
// generated, otherwise it may not be compatible.
static_assert(FLATBUFFERS_VERSION_MAJOR == 23 &&
- FLATBUFFERS_VERSION_MINOR == 3 &&
- FLATBUFFERS_VERSION_REVISION == 3,
+ FLATBUFFERS_VERSION_MINOR == 5 &&
+ FLATBUFFERS_VERSION_REVISION == 26,
"Non-compatible flatbuffers version included");
namespace torch {
--
2.41.0

View file

@ -1,73 +0,0 @@
From bb52aeacc6dfab2355249b7b5beb72c2761ec319 Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Fri, 29 Sep 2023 06:25:23 -0700
Subject: [PATCH 3/6] Stub in kineto ActivityType
There is an error with kineto is not used, the shim still
requires the ActivityTYpe.h header to get the enum Activity type.
So cut-n-paste just enough of the header in to do this.
Signed-off-by: Tom Rix <trix@redhat.com>
---
torch/csrc/profiler/kineto_shim.h | 44 +++++++++++++++++++++++++++++++
1 file changed, 44 insertions(+)
diff --git a/torch/csrc/profiler/kineto_shim.h b/torch/csrc/profiler/kineto_shim.h
index 2a410719a1..7d6525befd 100644
--- a/torch/csrc/profiler/kineto_shim.h
+++ b/torch/csrc/profiler/kineto_shim.h
@@ -12,7 +12,51 @@
#undef USE_KINETO
#endif
+#ifdef USE_KINETO
#include <ActivityType.h>
+#else
+namespace libkineto {
+// copied from header
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// Note : All activity types are not enabled by default. Please add them
+// at correct position in the enum
+enum class ActivityType {
+ // Activity types enabled by default
+ CPU_OP = 0, // cpu side ops
+ USER_ANNOTATION,
+ GPU_USER_ANNOTATION,
+ GPU_MEMCPY,
+ GPU_MEMSET,
+ CONCURRENT_KERNEL, // on-device kernels
+ EXTERNAL_CORRELATION,
+ CUDA_RUNTIME, // host side cuda runtime events
+ CUDA_DRIVER, // host side cuda driver events
+ CPU_INSTANT_EVENT, // host side point-like events
+ PYTHON_FUNCTION,
+ OVERHEAD, // CUPTI induced overhead events sampled from its overhead API.
+
+ // Optional Activity types
+ CUDA_SYNC, // synchronization events between runtime and kernels
+ GLOW_RUNTIME, // host side glow runtime events
+ MTIA_RUNTIME, // host side MTIA runtime events
+ CUDA_PROFILER_RANGE, // CUPTI Profiler range for performance metrics
+ MTIA_CCP_EVENTS, // MTIA ondevice CCP events
+ HPU_OP, // HPU host side runtime event
+ XPU_RUNTIME, // host side xpu runtime events
+
+ ENUM_COUNT, // This is to add buffer and not used for any profiling logic. Add your new type before it.
+ OPTIONAL_ACTIVITY_TYPE_START = CUDA_SYNC,
+};
+}
+
+#endif
#include <torch/csrc/Export.h>
#include <torch/csrc/profiler/api.h>
--
2.41.0

View file

@ -1,45 +0,0 @@
From fcf3cd70229cdc729d05ddab081ac886c9db6bd7 Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Fri, 29 Sep 2023 13:58:28 -0700
Subject: [PATCH] torch python 3.12 changes
Signed-off-by: Tom Rix <trix@redhat.com>
---
functorch/csrc/dim/dim.cpp | 6 ++++++
torch/csrc/dynamo/cpython_defs.h | 2 +-
2 files changed, 7 insertions(+), 1 deletion(-)
diff --git a/functorch/csrc/dim/dim.cpp b/functorch/csrc/dim/dim.cpp
index b611dc3e8c2..c7009478aee 100644
--- a/functorch/csrc/dim/dim.cpp
+++ b/functorch/csrc/dim/dim.cpp
@@ -10,7 +10,13 @@
// Many APIs have changed/don't exist anymore
#if IS_PYTHON_3_12_PLUS
+#include "dim.h"
+
// Re-enable this some day
+PyObject* Dim_init() {
+ PyErr_SetString(PyExc_RuntimeError, "First class dim doesn't work with python 3.12");
+ return nullptr;
+}
#else
diff --git a/torch/csrc/dynamo/cpython_defs.h b/torch/csrc/dynamo/cpython_defs.h
index f0a0e1a88e2..f58becd246e 100644
--- a/torch/csrc/dynamo/cpython_defs.h
+++ b/torch/csrc/dynamo/cpython_defs.h
@@ -6,7 +6,7 @@
// should go in cpython_defs.c. Copying is required when, e.g.,
// we need to call internal CPython functions that are not exposed.
-#if IS_PYTHON_3_11_PLUS && !(IS_PYTHON_3_12_PLUS)
+#if IS_PYTHON_3_11_PLUS
#include <internal/pycore_frame.h>
--
2.43.0

View file

@ -1,25 +0,0 @@
From fc0d4ce06fecbd2bcd10fb13c515dc6625260870 Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Fri, 29 Sep 2023 17:21:13 -0700
Subject: [PATCH 5/6] disable submodule search
---
setup.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/setup.py b/setup.py
index 17bf16b89a..b8c8ae5506 100644
--- a/setup.py
+++ b/setup.py
@@ -452,7 +452,7 @@ def mirror_files_into_torchgen():
def build_deps():
report("-- Building version " + version)
- check_submodules()
+ # check_submodules()
check_pydep("yaml", "pyyaml")
build_caffe2(
--
2.41.0

15
README.NVIDIA Normal file
View file

@ -0,0 +1,15 @@
Some help for building this package for NVIDIA/CUDA
Review NVIDIA's documenation
https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html
Review PyTorch documentation
https://github.com/pytorch/pytorch#from-source
Some convience strings to cut-n-paste
F39
dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/fedora39/x86_64/cuda-fedora39.repo
Building is local.
Build machine has a supported GPU, the drivers are loaded and CUDA SDK is installed.

350
license.txt Normal file
View file

@ -0,0 +1,350 @@
#
# License Details
# Main license BSD 3-Clause
#
# Apache-2.0
# android/libs/fbjni/LICENSE
# android/libs/fbjni/CMakeLists.txt
# android/libs/fbjni/build.gradle
# android/libs/fbjni/cxx/fbjni/ByteBuffer.cpp
# android/libs/fbjni/cxx/fbjni/ByteBuffer.h
# android/libs/fbjni/cxx/fbjni/Context.h
# android/libs/fbjni/cxx/fbjni/File.h
# android/libs/fbjni/cxx/fbjni/JThread.h
# android/libs/fbjni/cxx/fbjni/NativeRunnable.h
# android/libs/fbjni/cxx/fbjni/OnLoad.cpp
# android/libs/fbjni/cxx/fbjni/ReadableByteChannel.cpp
# android/libs/fbjni/cxx/fbjni/ReadableByteChannel.h
# android/libs/fbjni/cxx/fbjni/detail/Boxed.h
# android/libs/fbjni/cxx/fbjni/detail/Common.h
# android/libs/fbjni/cxx/fbjni/detail/CoreClasses-inl.h
# android/libs/fbjni/cxx/fbjni/detail/CoreClasses.h
# android/libs/fbjni/cxx/fbjni/detail/Environment.cpp
# android/libs/fbjni/cxx/fbjni/detail/Environment.h
# android/libs/fbjni/cxx/fbjni/detail/Exceptions.cpp
# android/libs/fbjni/cxx/fbjni/detail/Exceptions.h
# android/libs/fbjni/cxx/fbjni/detail/FbjniApi.h
# android/libs/fbjni/cxx/fbjni/detail/Hybrid.cpp
# android/libs/fbjni/cxx/fbjni/detail/Hybrid.h
# android/libs/fbjni/cxx/fbjni/detail/Iterator-inl.h
# android/libs/fbjni/cxx/fbjni/detail/Iterator.h
# android/libs/fbjni/cxx/fbjni/detail/JWeakReference.h
# android/libs/fbjni/cxx/fbjni/detail/Log.h
# android/libs/fbjni/cxx/fbjni/detail/Meta-forward.h
# android/libs/fbjni/cxx/fbjni/detail/Meta-inl.h
# android/libs/fbjni/cxx/fbjni/detail/Meta.cpp
# android/libs/fbjni/cxx/fbjni/detail/Meta.h
# android/libs/fbjni/cxx/fbjni/detail/MetaConvert.h
# android/libs/fbjni/cxx/fbjni/detail/ReferenceAllocators-inl.h
# android/libs/fbjni/cxx/fbjni/detail/ReferenceAllocators.h
# android/libs/fbjni/cxx/fbjni/detail/References-forward.h
# android/libs/fbjni/cxx/fbjni/detail/References-inl.h
# android/libs/fbjni/cxx/fbjni/detail/References.cpp
# android/libs/fbjni/cxx/fbjni/detail/References.h
# android/libs/fbjni/cxx/fbjni/detail/Registration-inl.h
# android/libs/fbjni/cxx/fbjni/detail/Registration.h
# android/libs/fbjni/cxx/fbjni/detail/SimpleFixedString.h
# android/libs/fbjni/cxx/fbjni/detail/TypeTraits.h
# android/libs/fbjni/cxx/fbjni/detail/utf8.cpp
# android/libs/fbjni/cxx/fbjni/detail/utf8.h
# android/libs/fbjni/cxx/fbjni/fbjni.cpp
# android/libs/fbjni/cxx/fbjni/fbjni.h
# android/libs/fbjni/cxx/lyra/cxa_throw.cpp
# android/libs/fbjni/cxx/lyra/lyra.cpp
# android/libs/fbjni/cxx/lyra/lyra.h
# android/libs/fbjni/cxx/lyra/lyra_breakpad.cpp
# android/libs/fbjni/cxx/lyra/lyra_exceptions.cpp
# android/libs/fbjni/cxx/lyra/lyra_exceptions.h
# android/libs/fbjni/gradle.properties
# android/libs/fbjni/gradle/android-tasks.gradle
# android/libs/fbjni/gradle/release.gradle
# android/libs/fbjni/gradlew
# android/libs/fbjni/gradlew.bat
# android/libs/fbjni/host.gradle
# android/libs/fbjni/java/com/facebook/jni/CppException.java
# android/libs/fbjni/java/com/facebook/jni/CppSystemErrorException.java
# android/libs/fbjni/java/com/facebook/jni/DestructorThread.java
# android/libs/fbjni/java/com/facebook/jni/HybridClassBase.java
# android/libs/fbjni/java/com/facebook/jni/HybridData.java
# android/libs/fbjni/java/com/facebook/jni/IteratorHelper.java
# android/libs/fbjni/java/com/facebook/jni/MapIteratorHelper.java
# android/libs/fbjni/java/com/facebook/jni/NativeRunnable.java
# android/libs/fbjni/java/com/facebook/jni/ThreadScopeSupport.java
# android/libs/fbjni/java/com/facebook/jni/UnknownCppException.java
# android/libs/fbjni/java/com/facebook/jni/annotations/DoNotStrip.java
# android/libs/fbjni/scripts/android-setup.sh
# android/libs/fbjni/scripts/run-host-tests.sh
# android/libs/fbjni/settings.gradle
# android/libs/fbjni/test/BaseFBJniTests.java
# android/libs/fbjni/test/ByteBufferTests.java
# android/libs/fbjni/test/DocTests.java
# android/libs/fbjni/test/FBJniTests.java
# android/libs/fbjni/test/HybridTests.java
# android/libs/fbjni/test/IteratorTests.java
# android/libs/fbjni/test/PrimitiveArrayTests.java
# android/libs/fbjni/test/ReadableByteChannelTests.java
# android/libs/fbjni/test/jni/CMakeLists.txt
# android/libs/fbjni/test/jni/byte_buffer_tests.cpp
# android/libs/fbjni/test/jni/doc_tests.cpp
# android/libs/fbjni/test/jni/expect.h
# android/libs/fbjni/test/jni/fbjni_onload.cpp
# android/libs/fbjni/test/jni/fbjni_tests.cpp
# android/libs/fbjni/test/jni/hybrid_tests.cpp
# android/libs/fbjni/test/jni/inter_dso_exception_test_1/Test.cpp
# android/libs/fbjni/test/jni/inter_dso_exception_test_1/Test.h
# android/libs/fbjni/test/jni/inter_dso_exception_test_2/Test.cpp
# android/libs/fbjni/test/jni/inter_dso_exception_test_2/Test.h
# android/libs/fbjni/test/jni/iterator_tests.cpp
# android/libs/fbjni/test/jni/modified_utf8_test.cpp
# android/libs/fbjni/test/jni/no_rtti.cpp
# android/libs/fbjni/test/jni/no_rtti.h
# android/libs/fbjni/test/jni/primitive_array_tests.cpp
# android/libs/fbjni/test/jni/readable_byte_channel_tests.cpp
# android/libs/fbjni/test/jni/simple_fixed_string_tests.cpp
# android/libs/fbjni/test/jni/utf16toUTF8_test.cpp
# android/pytorch_android/host/build.gradle
# aten/src/ATen/cuda/llvm_basic.cpp
# aten/src/ATen/cuda/llvm_complex.cpp
# aten/src/ATen/native/quantized/cpu/qnnpack/confu.yaml
# aten/src/ATen/native/quantized/cpu/qnnpack/src/requantization/gemmlowp-neon.c
# aten/src/ATen/native/quantized/cpu/qnnpack/src/requantization/gemmlowp-scalar.h
# aten/src/ATen/native/quantized/cpu/qnnpack/src/requantization/gemmlowp-sse.h
# aten/src/ATen/nnapi/codegen.py
# aten/src/ATen/nnapi/NeuralNetworks.h
# aten/src/ATen/nnapi/nnapi_wrapper.cpp
# aten/src/ATen/nnapi/nnapi_wrapper.h
# binaries/benchmark_args.h
# binaries/benchmark_helper.cc
# binaries/benchmark_helper.h
# binaries/compare_models_torch.cc
# binaries/convert_and_benchmark.cc
# binaries/convert_caffe_image_db.cc
# binaries/convert_db.cc
# binaries/convert_encoded_to_raw_leveldb.cc
# binaries/convert_image_to_tensor.cc
# binaries/core_overhead_benchmark.cc
# binaries/core_overhead_benchmark_gpu.cc
# binaries/db_throughput.cc
# binaries/dump_operator_names.cc
# binaries/inspect_gpu.cc
# binaries/load_benchmark_torch.cc
# binaries/make_cifar_db.cc
# binaries/make_image_db.cc
# binaries/make_mnist_db.cc
# binaries/optimize_for_mobile.cc
# binaries/parallel_info.cc
# binaries/predictor_verifier.cc
# binaries/print_core_object_sizes_gpu.cc
# binaries/print_registered_core_operators.cc
# binaries/run_plan.cc
# binaries/run_plan_mpi.cc
# binaries/speed_benchmark.cc
# binaries/speed_benchmark_torch.cc
# binaries/split_db.cc
# binaries/tsv_2_proto.cc
# binaries/tutorial_blob.cc
# binaries/zmq_feeder.cc
# c10/test/util/small_vector_test.cpp
# c10/util/FunctionRef.h
# c10/util/SmallVector.cpp
# c10/util/SmallVector.h
# c10/util/llvmMathExtras.h
# c10/util/sparse_bitset.h
# caffe2/contrib/aten/gen_op.py
# caffe2/contrib/fakelowp/fp16_fc_acc_op.cc
# caffe2/contrib/fakelowp/fp16_fc_acc_op.h
# caffe2/contrib/gloo/allgather_ops.cc
# caffe2/contrib/gloo/allgather_ops.h
# caffe2/contrib/gloo/reduce_scatter_ops.cc
# caffe2/contrib/gloo/reduce_scatter_ops.h
# caffe2/core/hip/common_miopen.h
# caffe2/core/hip/common_miopen.hip
# caffe2/core/net_async_tracing.cc
# caffe2/core/net_async_tracing.h
# caffe2/core/net_async_tracing_test.cc
# caffe2/experiments/operators/fully_connected_op_decomposition.cc
# caffe2/experiments/operators/fully_connected_op_decomposition.h
# caffe2/experiments/operators/fully_connected_op_decomposition_gpu.cc
# caffe2/experiments/operators/fully_connected_op_prune.cc
# caffe2/experiments/operators/fully_connected_op_prune.h
# caffe2/experiments/operators/fully_connected_op_sparse.cc
# caffe2/experiments/operators/fully_connected_op_sparse.h
# caffe2/experiments/operators/funhash_op.cc
# caffe2/experiments/operators/funhash_op.h
# caffe2/experiments/operators/sparse_funhash_op.cc
# caffe2/experiments/operators/sparse_funhash_op.h
# caffe2/experiments/operators/sparse_matrix_reshape_op.cc
# caffe2/experiments/operators/sparse_matrix_reshape_op.h
# caffe2/experiments/operators/tt_contraction_op.cc
# caffe2/experiments/operators/tt_contraction_op.h
# caffe2/experiments/operators/tt_contraction_op_gpu.cc
# caffe2/experiments/operators/tt_pad_op.cc
# caffe2/experiments/operators/tt_pad_op.h
# caffe2/experiments/python/SparseTransformer.py
# caffe2/experiments/python/convnet_benchmarks.py
# caffe2/experiments/python/device_reduce_sum_bench.py
# caffe2/experiments/python/funhash_op_test.py
# caffe2/experiments/python/net_construct_bench.py
# caffe2/experiments/python/sparse_funhash_op_test.py
# caffe2/experiments/python/sparse_reshape_op_test.py
# caffe2/experiments/python/tt_contraction_op_test.py
# caffe2/experiments/python/tt_pad_op_test.py
# caffe2/mobile/contrib/libvulkan-stub/include/vulkan/vk_platform.h
# caffe2/mobile/contrib/libvulkan-stub/include/vulkan/vulkan.h
# caffe2/mobile/contrib/nnapi/NeuralNetworks.h
# caffe2/mobile/contrib/nnapi/dlnnapi.c
# caffe2/mobile/contrib/nnapi/nnapi_benchmark.cc
# caffe2/observers/profile_observer.cc
# caffe2/observers/profile_observer.h
# caffe2/operators/hip/conv_op_miopen.hip
# caffe2/operators/hip/local_response_normalization_op_miopen.hip
# caffe2/operators/hip/pool_op_miopen.hip
# caffe2/operators/hip/spatial_batch_norm_op_miopen.hip
# caffe2/operators/quantized/int8_utils.h
# caffe2/operators/stump_func_op.cc
# caffe2/operators/stump_func_op.cu
# caffe2/operators/stump_func_op.h
# caffe2/operators/unique_ops.cc
# caffe2/operators/unique_ops.cu
# caffe2/operators/unique_ops.h
# caffe2/operators/upsample_op.cc
# caffe2/operators/upsample_op.h
# caffe2/opt/fusion.h
# caffe2/python/layers/label_smooth.py
# caffe2/python/mint/static/css/simple-sidebar.css
# caffe2/python/modeling/get_entry_from_blobs.py
# caffe2/python/modeling/get_entry_from_blobs_test.py
# caffe2/python/modeling/gradient_clipping_test.py
# caffe2/python/operator_test/unique_ops_test.py
# caffe2/python/operator_test/upsample_op_test.py
# caffe2/python/operator_test/weight_scale_test.py
# caffe2/python/pybind_state_int8.cc
# caffe2/python/transformations.py
# caffe2/python/transformations_test.py
# caffe2/quantization/server/batch_matmul_dnnlowp_op.cc
# caffe2/quantization/server/batch_matmul_dnnlowp_op.h
# caffe2/quantization/server/compute_equalization_scale_test.py
# caffe2/quantization/server/elementwise_linear_dnnlowp_op.cc
# caffe2/quantization/server/elementwise_linear_dnnlowp_op.h
# caffe2/quantization/server/elementwise_sum_relu_op.cc
# caffe2/quantization/server/fb_fc_packed_op.cc
# caffe2/quantization/server/fb_fc_packed_op.h
# caffe2/quantization/server/fbgemm_fp16_pack_op.cc
# caffe2/quantization/server/fbgemm_fp16_pack_op.h
# caffe2/quantization/server/fully_connected_fake_lowp_op.cc
# caffe2/quantization/server/fully_connected_fake_lowp_op.h
# caffe2/quantization/server/int8_gen_quant_params_min_max_test.py
# caffe2/quantization/server/int8_gen_quant_params_test.py
# caffe2/quantization/server/int8_quant_scheme_blob_fill_test.py
# caffe2/quantization/server/spatial_batch_norm_relu_op.cc
# caffe2/sgd/weight_scale_op.cc
# caffe2/sgd/weight_scale_op.h
# caffe2/utils/bench_utils.h
# functorch/examples/maml_omniglot/maml-omniglot-higher.py
# functorch/examples/maml_omniglot/maml-omniglot-ptonly.py
# functorch/examples/maml_omniglot/maml-omniglot-transforms.py
# functorch/examples/maml_omniglot/support/omniglot_loaders.py
# modules/detectron/group_spatial_softmax_op.cc
# modules/detectron/group_spatial_softmax_op.cu
# modules/detectron/group_spatial_softmax_op.h
# modules/detectron/ps_roi_pool_op.cc
# modules/detectron/ps_roi_pool_op.h
# modules/detectron/roi_pool_f_op.cc
# modules/detectron/roi_pool_f_op.cu
# modules/detectron/roi_pool_f_op.h
# modules/detectron/sample_as_op.cc
# modules/detectron/sample_as_op.cu
# modules/detectron/sample_as_op.h
# modules/detectron/select_smooth_l1_loss_op.cc
# modules/detectron/select_smooth_l1_loss_op.cu
# modules/detectron/select_smooth_l1_loss_op.h
# modules/detectron/sigmoid_cross_entropy_loss_op.cc
# modules/detectron/sigmoid_cross_entropy_loss_op.cu
# modules/detectron/sigmoid_cross_entropy_loss_op.h
# modules/detectron/sigmoid_focal_loss_op.cc
# modules/detectron/sigmoid_focal_loss_op.cu
# modules/detectron/sigmoid_focal_loss_op.h
# modules/detectron/smooth_l1_loss_op.cc
# modules/detectron/smooth_l1_loss_op.cu
# modules/detectron/smooth_l1_loss_op.h
# modules/detectron/softmax_focal_loss_op.cc
# modules/detectron/softmax_focal_loss_op.cu
# modules/detectron/softmax_focal_loss_op.h
# modules/detectron/spatial_narrow_as_op.cc
# modules/detectron/spatial_narrow_as_op.cu
# modules/detectron/spatial_narrow_as_op.h
# modules/detectron/upsample_nearest_op.cc
# modules/detectron/upsample_nearest_op.h
# modules/module_test/module_test_dynamic.cc
# modules/rocksdb/rocksdb.cc
# scripts/apache_header.txt
# scripts/apache_python.txt
# torch/distributions/lkj_cholesky.py
#
# Apache 2.0 AND BSD 2-Clause
# caffe2/operators/deform_conv_op.cu
#
# Apache 2.0 AND BSD 2-Clause AND MIT
# modules/detectron/ps_roi_pool_op.cu
#
# Apache 2.0 AND BSD 2-Clause
# modules/detectron/upsample_nearest_op.cu
#
# BSD 0-Clause
# torch/csrc/utils/pythoncapi_compat.h
#
# BSD 2-Clause
# aten/src/ATen/native/quantized/cpu/qnnpack/deps/clog/LICENSE
# caffe2/image/transform_gpu.cu
# caffe2/image/transform_gpu.h
#
# BSL-1.0
# c10/util/flat_hash_map.h
# c10/util/hash.h
# c10/util/Optional.h
# c10/util/order_preserving_flat_hash_map.h
# c10/util/strong_type.h
# c10/util/variant.h
#
# GPL-3.0-or-later AND MIT
# c10/util/reverse_iterator.h
#
# Khronos
# These files are for OpenCL, an unused option
# Replace them later, as-needed with the opencl-headers.rpm
#
# caffe2/contrib/opencl/OpenCL/cl.hpp
# caffe2/mobile/contrib/libopencl-stub/include/CL/cl.h
# caffe2/mobile/contrib/libopencl-stub/include/CL/cl.hpp
# caffe2/mobile/contrib/libopencl-stub/include/CL/cl_ext.h
# caffe2/mobile/contrib/libopencl-stub/include/CL/cl_gl.h
# caffe2/mobile/contrib/libopencl-stub/include/CL/cl_gl_ext.h
# caffe2/mobile/contrib/libopencl-stub/include/CL/cl_platform.h
# caffe2/mobile/contrib/libopencl-stub/include/CL/opencl.h
#
# MIT
# android/libs/fbjni/googletest-CMakeLists.txt.in
# c10/util/BFloat16-math.h
# caffe2/mobile/contrib/libvulkan-stub/include/libvulkan-stub.h
# caffe2/mobile/contrib/libvulkan-stub/src/libvulkan-stub.c
# caffe2/onnx/torch_ops/defs.cc
# cmake/Modules_CUDA_fix/upstream/FindCUDA/make2cmake.cmake
# cmake/Modules_CUDA_fix/upstream/FindCUDA/parse_cubin.cmake
# cmake/Modules_CUDA_fix/upstream/FindCUDA/run_nvcc.cmake
# functorch/einops/_parsing.py
# test/functorch/test_parsing.py
# test/functorch/test_rearrange.py
# third_party/miniz-2.1.0/LICENSE
# third_party/miniz-2.1.0/miniz.c
# tools/coverage_plugins_package/setup.py
# torch/_appdirs.py
# torch/utils/hipify/hipify_python.py
#
# Public Domain
# caffe2/mobile/contrib/libopencl-stub/LICENSE
# caffe2/utils/murmur_hash3.cc
# caffe2/utils/murmur_hash3.h
#
# Zlib
# aten/src/ATen/native/cpu/avx_mathfun.h

View file

@ -1,25 +0,0 @@
From e0b0ea90ecc0dbefc6aef2650e88ba88260935b9 Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Fri, 29 Sep 2023 17:21:13 -0700
Subject: [PATCH] disable submodule search
---
setup.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/setup.py b/setup.py
index 0fd886d945..e397df8fb6 100644
--- a/setup.py
+++ b/setup.py
@@ -458,7 +458,7 @@ def mirror_files_into_torchgen():
def build_deps():
report("-- Building version " + version)
- check_submodules()
+ # check_submodules()
check_pydep("yaml", "pyyaml")
build_caffe2(
--
2.43.0

154
pyproject.toml Normal file
View file

@ -0,0 +1,154 @@
[build-system]
requires = [
"setuptools",
"wheel",
"astunparse",
"numpy",
"ninja",
"pyyaml",
"cmake",
"typing-extensions",
"requests",
]
# Use legacy backend to import local packages in setup.py
build-backend = "setuptools.build_meta:__legacy__"
[tool.black]
# Uncomment if pyproject.toml worked fine to ensure consistency with flake8
# line-length = 120
target-version = ["py38", "py39", "py310", "py311"]
[tool.ruff]
target-version = "py38"
# NOTE: Synchoronize the ignores with .flake8
ignore = [
# these ignores are from flake8-bugbear; please fix!
"B007", "B008", "B017",
"B018", # Useless expression
"B019",
"B023",
"B028", # No explicit `stacklevel` keyword argument found
"B904",
"E402",
"C408", # C408 ignored because we like the dict keyword argument syntax
"E501", # E501 is not flexible enough, we're using B950 instead
"E721",
"E731", # Assign lambda expression
"E741",
"EXE001",
"F405",
"F841",
# these ignores are from flake8-logging-format; please fix!
"G101",
# these ignores are from ruff NPY; please fix!
"NPY002",
# these ignores are from ruff PERF; please fix!
"PERF203",
"PERF401",
"PERF403",
# these ignores are from PYI; please fix!
"PYI019",
"PYI024",
"PYI036",
"PYI041",
"PYI056",
"SIM102", "SIM103", "SIM112", # flake8-simplify code styles
"SIM105", # these ignores are from flake8-simplify. please fix or ignore with commented reason
"SIM108",
"SIM110",
"SIM114", # Combine `if` branches using logical `or` operator
"SIM115",
"SIM116", # Disable Use a dictionary instead of consecutive `if` statements
"SIM117",
"SIM118",
"UP006", # keep-runtime-typing
"UP007", # keep-runtime-typing
]
line-length = 120
select = [
"B",
"C4",
"G",
"E",
"EXE",
"F",
"SIM1",
"W",
# Not included in flake8
"NPY",
"PERF",
"PGH004",
"PIE794",
"PIE800",
"PIE804",
"PIE807",
"PIE810",
"PLC0131", # type bivariance
"PLC0132", # type param mismatch
"PLC0205", # string as __slots__
"PLE",
"PLR0133", # constant comparison
"PLR0206", # property with params
"PLR1722", # use sys exit
"PLW0129", # assert on string literal
"PLW0406", # import self
"PLW0711", # binary op exception
"PLW1509", # preexec_fn not safe with threads
"PLW3301", # nested min max
"PT006", # TODO: enable more PT rules
"PT022",
"PT023",
"PT024",
"PT025",
"PT026",
"PYI",
"RUF008", # mutable dataclass default
"RUF015", # access first ele in constant time
"RUF016", # type error non-integer index
"RUF017",
"TRY200",
"TRY302",
"UP",
]
[tool.ruff.per-file-ignores]
"__init__.py" = [
"F401",
]
"test/typing/reveal/**" = [
"F821",
]
"test/torch_np/numpy_tests/**" = [
"F821",
]
"test/jit/**" = [
"PLR0133", # tests require this for JIT
"PYI",
"RUF015",
"UP", # We don't want to modify the jit test as they test specify syntax
]
"test/test_jit.py" = [
"PLR0133", # tests require this for JIT
"PYI",
"RUF015",
"UP", # We don't want to modify the jit test as they test specify syntax
]
"torch/onnx/**" = [
"UP037", # ONNX does runtime type checking
]
"torchgen/api/types/__init__.py" = [
"F401",
"F403",
]
"torchgen/executorch/api/types/__init__.py" = [
"F401",
"F403",
]
"torch/utils/collect_env.py" = [
"UP", # collect_env.py needs to work with older versions of Python
]

File diff suppressed because it is too large Load diff

12
sources
View file

@ -1,2 +1,14 @@
SHA512 (pytorch-v2.1.0.tar.gz) = 59421bf6cea6661d61ed66ab16526e3a07162e70e53381cbd5987042917610ec993d2f151fb086f0f98e5a396fe69e82bbc76f840bebffe4ebe7f50458c3aa44
SHA512 (pytorch-v2.1.2.tar.gz) = b7305407ad9dda877d277a0e7009f65f6d69f39370f2231b8bb8c6a9b711022d2129febdb00f5c83751b6664e01000fe2d30c5e5c13757de89fb8b2b99197a28
SHA512 (pytorch-975d428.tar.gz) = a02195b18d832db9a739c3eeecd0cd0c8868d8b92e4a2fca42e4bdd20735f0745d84573df28d9ae1db014cf79ffd005a8409b3e8bb92f9db2a446f784ef46ff4
SHA512 (v23.3.3.tar.gz) = 4066c94f2473c7ea16917d29a613e16f840a329089c88e0bdbdb999aef3442ba00abfd2aa92266fa9c067e399dc88e6f0ccac40dc151378857e665638e78bbf0
SHA512 (v2.11.1.tar.gz) = ed1512ff0bca3bc0a45edc2eb8c77f8286ab9389f6ff1d5cb309be24bc608abbe0df6a7f5cb18c8f80a3bfa509058547c13551c3cd6a759af708fd0cdcdd9e95
SHA512 (pytorch-6a89a75.tar.gz) = 6978acc6f37d7c5adc71517a6f379c7133b2bbd040189deddba7753acde41f6ddba2e9f2e397928e89c776d6a5458b8a74f8e04beb312d71fd30b072687ba98f
SHA512 (pytorch-74832f1.tar.gz) = bd553bfbbb422d353bbbf616c201251b2517b905e2621fa05bfe3d97726b078caad377583adccdc0cca234235a11fcb4730a93e834907b2ca4c06d552b2a2683
SHA512 (pytorch-4bb5cb5.tar.gz) = 430ae996ddee560537787646ae9f7aa01498f37c99c2e3fe4c5f66ee732ee3fe4ecf337fdf857bc0c7fe27634af75cee3ce576bbe2576463b81e27dbbfacf6ef
SHA512 (tensorpipe-52791a2.tar.gz) = 1e5faf17a7236c5506c08cb28be16069b11bb929bbca64ed9745ce4277d46739186ab7d6597da7437d90ed2d166d4c37ef2f3bceabe8083ef3adbb0e8e5f227e
SHA512 (v1.41.0.tar.gz) = bb08a1970a10e8d9571ffea3d021643de30ec212cd51317b98d6cf0cfe55d6877992921fb01d1188a6d466687335b77885685d924f8cb7200a0bec30eee05c65
SHA512 (libnop-910b558.tar.gz) = 74c5324eaa1b6b2ac8dfef94c835b5c5b044625f8e5efe3522470b1ecc4798ff43d344a013cee2f6901e83267c6167072947b754e63f1552ae7044cffe234c36
SHA512 (pytorch-97ff6cf.tar.gz) = 105ebcba298558fe833f90e7e40b003d35a74609e777f9dc4c47f5668c884f603455113ac0ff252a62b83c81137ae66ceb1a862d351203925dcfc3dcf9f73580
SHA512 (pytorch-v2.3.0.tar.gz) = 0c2ffc7bf2fd86070e9958c34eca1f03a0248a011ac6ffaeb69f65306ff856edd5359986f02af25888433187e6d7f29b60edded092e2ac30c8cec49023166eda
SHA512 (pytorch-v2.3.1.tar.gz) = fe132251b2bae87b70ba3d95dc32f6a4545970d11893118b0ebe6ca129732e516ef4d6cc4f380b3db9bb2277d1db8ce78a401c40149bb1dfbab76eab9e3992c4