Merge branch 'rawhide' into f40
Update to 2.3.1 Signed-off-by: Tom Rix <trix@redhat.com>
This commit is contained in:
commit
e492aac499
32 changed files with 1954 additions and 1199 deletions
12
.gitignore
vendored
12
.gitignore
vendored
|
|
@ -1,2 +1,14 @@
|
|||
/pytorch-v2.1.0.tar.gz
|
||||
/pytorch-v2.1.2.tar.gz
|
||||
/pytorch-975d428.tar.gz
|
||||
/v23.3.3.tar.gz
|
||||
/v2.11.1.tar.gz
|
||||
/pytorch-6a89a75.tar.gz
|
||||
/pytorch-74832f1.tar.gz
|
||||
/pytorch-4bb5cb5.tar.gz
|
||||
/tensorpipe-52791a2.tar.gz
|
||||
/v1.41.0.tar.gz
|
||||
/libnop-910b558.tar.gz
|
||||
/pytorch-97ff6cf.tar.gz
|
||||
/pytorch-v2.3.0.tar.gz
|
||||
/pytorch-v2.3.1.tar.gz
|
||||
|
|
|
|||
262
0001-Optionally-use-hipblaslt.patch
Normal file
262
0001-Optionally-use-hipblaslt.patch
Normal file
|
|
@ -0,0 +1,262 @@
|
|||
From d77e05d90df006322cda021f1a8affdcc2c7eaef Mon Sep 17 00:00:00 2001
|
||||
From: Tom Rix <trix@redhat.com>
|
||||
Date: Fri, 23 Feb 2024 08:27:30 -0500
|
||||
Subject: [PATCH] Optionally use hipblaslt
|
||||
|
||||
The hipblaslt package is not available on Fedora.
|
||||
Instead of requiring the package, make it optional.
|
||||
If it is found, define the preprocessor variable HIPBLASLT
|
||||
Convert the checks for ROCM_VERSION >= 507000 to HIPBLASLT checks
|
||||
|
||||
Signed-off-by: Tom Rix <trix@redhat.com>
|
||||
---
|
||||
aten/src/ATen/cuda/CUDABlas.cpp | 7 ++++---
|
||||
aten/src/ATen/cuda/CUDABlas.h | 2 +-
|
||||
aten/src/ATen/cuda/CUDAContextLight.h | 4 ++--
|
||||
aten/src/ATen/cuda/CublasHandlePool.cpp | 4 ++--
|
||||
aten/src/ATen/cuda/tunable/TunableGemm.h | 6 +++---
|
||||
aten/src/ATen/native/cuda/Blas.cpp | 14 ++++++++------
|
||||
cmake/Dependencies.cmake | 3 +++
|
||||
cmake/public/LoadHIP.cmake | 4 ++--
|
||||
8 files changed, 25 insertions(+), 19 deletions(-)
|
||||
|
||||
diff --git a/aten/src/ATen/cuda/CUDABlas.cpp b/aten/src/ATen/cuda/CUDABlas.cpp
|
||||
index d534ec5a178..e815463f630 100644
|
||||
--- a/aten/src/ATen/cuda/CUDABlas.cpp
|
||||
+++ b/aten/src/ATen/cuda/CUDABlas.cpp
|
||||
@@ -14,7 +14,7 @@
|
||||
#include <c10/util/irange.h>
|
||||
|
||||
#ifdef USE_ROCM
|
||||
-#if ROCM_VERSION >= 60000
|
||||
+#ifdef HIPBLASLT
|
||||
#include <hipblaslt/hipblaslt-ext.hpp>
|
||||
#endif
|
||||
// until hipblas has an API to accept flags, we must use rocblas here
|
||||
@@ -781,7 +781,7 @@ void gemm<at::BFloat16>(CUDABLAS_GEMM_ARGTYPES(at::BFloat16)) {
|
||||
}
|
||||
}
|
||||
|
||||
-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
|
||||
+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
|
||||
|
||||
#if defined(USE_ROCM) && ROCM_VERSION >= 50700 && ROCM_VERSION < 60000
|
||||
// only for rocm 5.7 where we first supported hipblaslt, it was difficult
|
||||
@@ -912,6 +912,7 @@ class CuBlasLtMatmulPreference : public CuBlasLtDescriptor<
|
||||
};
|
||||
} // namespace
|
||||
|
||||
+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
|
||||
template <typename Dtype>
|
||||
void gemm_and_bias(
|
||||
bool transpose_mat1,
|
||||
@@ -1124,7 +1125,7 @@ template void gemm_and_bias(
|
||||
at::BFloat16* result_ptr,
|
||||
int64_t result_ld,
|
||||
GEMMAndBiasActivationEpilogue activation);
|
||||
-
|
||||
+#endif
|
||||
void scaled_gemm(
|
||||
char transa,
|
||||
char transb,
|
||||
diff --git a/aten/src/ATen/cuda/CUDABlas.h b/aten/src/ATen/cuda/CUDABlas.h
|
||||
index eb12bb350c5..068607467dd 100644
|
||||
--- a/aten/src/ATen/cuda/CUDABlas.h
|
||||
+++ b/aten/src/ATen/cuda/CUDABlas.h
|
||||
@@ -82,7 +82,7 @@ void gemm_internal<at::Half>(CUDABLAS_GEMM_ARGTYPES(at::Half));
|
||||
template <>
|
||||
void gemm_internal<at::BFloat16>(CUDABLAS_GEMM_ARGTYPES(at::BFloat16));
|
||||
|
||||
-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
|
||||
+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
|
||||
enum GEMMAndBiasActivationEpilogue {
|
||||
None,
|
||||
RELU,
|
||||
diff --git a/aten/src/ATen/cuda/CUDAContextLight.h b/aten/src/ATen/cuda/CUDAContextLight.h
|
||||
index 4ec35f59a21..e28dc42034f 100644
|
||||
--- a/aten/src/ATen/cuda/CUDAContextLight.h
|
||||
+++ b/aten/src/ATen/cuda/CUDAContextLight.h
|
||||
@@ -9,7 +9,7 @@
|
||||
|
||||
// cublasLT was introduced in CUDA 10.1 but we enable only for 11.1 that also
|
||||
// added bf16 support
|
||||
-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
|
||||
+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
|
||||
#include <cublasLt.h>
|
||||
#endif
|
||||
|
||||
@@ -82,7 +82,7 @@ TORCH_CUDA_CPP_API c10::Allocator* getCUDADeviceAllocator();
|
||||
/* Handles */
|
||||
TORCH_CUDA_CPP_API cusparseHandle_t getCurrentCUDASparseHandle();
|
||||
TORCH_CUDA_CPP_API cublasHandle_t getCurrentCUDABlasHandle();
|
||||
-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
|
||||
+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
|
||||
TORCH_CUDA_CPP_API cublasLtHandle_t getCurrentCUDABlasLtHandle();
|
||||
#endif
|
||||
|
||||
diff --git a/aten/src/ATen/cuda/CublasHandlePool.cpp b/aten/src/ATen/cuda/CublasHandlePool.cpp
|
||||
index 6913d2cd95e..3d4276be372 100644
|
||||
--- a/aten/src/ATen/cuda/CublasHandlePool.cpp
|
||||
+++ b/aten/src/ATen/cuda/CublasHandlePool.cpp
|
||||
@@ -29,7 +29,7 @@ namespace at::cuda {
|
||||
|
||||
namespace {
|
||||
|
||||
-#if defined(USE_ROCM) && ROCM_VERSION >= 50700
|
||||
+#if defined(USE_ROCM) && defined(HIPBLASLT)
|
||||
void createCublasLtHandle(cublasLtHandle_t *handle) {
|
||||
TORCH_CUDABLAS_CHECK(cublasLtCreate(handle));
|
||||
}
|
||||
@@ -190,7 +190,7 @@ cublasHandle_t getCurrentCUDABlasHandle() {
|
||||
return handle;
|
||||
}
|
||||
|
||||
-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
|
||||
+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
|
||||
cublasLtHandle_t getCurrentCUDABlasLtHandle() {
|
||||
#ifdef USE_ROCM
|
||||
c10::DeviceIndex device = 0;
|
||||
diff --git a/aten/src/ATen/cuda/tunable/TunableGemm.h b/aten/src/ATen/cuda/tunable/TunableGemm.h
|
||||
index 3ba0d761277..dde1870cfbf 100644
|
||||
--- a/aten/src/ATen/cuda/tunable/TunableGemm.h
|
||||
+++ b/aten/src/ATen/cuda/tunable/TunableGemm.h
|
||||
@@ -11,7 +11,7 @@
|
||||
|
||||
#include <ATen/cuda/tunable/GemmCommon.h>
|
||||
#ifdef USE_ROCM
|
||||
-#if ROCM_VERSION >= 50700
|
||||
+#ifdef HIPBLASLT
|
||||
#include <ATen/cuda/tunable/GemmHipblaslt.h>
|
||||
#endif
|
||||
#include <ATen/cuda/tunable/GemmRocblas.h>
|
||||
@@ -166,7 +166,7 @@ class GemmTunableOp : public TunableOp<GemmParams<T>, StreamTimer> {
|
||||
}
|
||||
#endif
|
||||
|
||||
-#if defined(USE_ROCM) && ROCM_VERSION >= 50700
|
||||
+#if defined(USE_ROCM) && defined(HIPBLASLT)
|
||||
static const char *env = std::getenv("PYTORCH_TUNABLEOP_HIPBLASLT_ENABLED");
|
||||
if (env == nullptr || strcmp(env, "1") == 0) {
|
||||
// disallow tuning of hipblaslt with c10::complex
|
||||
@@ -240,7 +240,7 @@ class GemmStridedBatchedTunableOp : public TunableOp<GemmStridedBatchedParams<T>
|
||||
}
|
||||
#endif
|
||||
|
||||
-#if defined(USE_ROCM) && ROCM_VERSION >= 50700
|
||||
+#if defined(USE_ROCM) && defined(HIPBLASLT)
|
||||
static const char *env = std::getenv("PYTORCH_TUNABLEOP_HIPBLASLT_ENABLED");
|
||||
if (env == nullptr || strcmp(env, "1") == 0) {
|
||||
// disallow tuning of hipblaslt with c10::complex
|
||||
diff --git a/aten/src/ATen/native/cuda/Blas.cpp b/aten/src/ATen/native/cuda/Blas.cpp
|
||||
index 29e5c5e3cf1..df56f3d7f1d 100644
|
||||
--- a/aten/src/ATen/native/cuda/Blas.cpp
|
||||
+++ b/aten/src/ATen/native/cuda/Blas.cpp
|
||||
@@ -155,7 +155,7 @@ enum class Activation {
|
||||
GELU,
|
||||
};
|
||||
|
||||
-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
|
||||
+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
|
||||
cuda::blas::GEMMAndBiasActivationEpilogue activation_to_gemm_and_blas_arg(Activation a) {
|
||||
switch (a) {
|
||||
case Activation::None:
|
||||
@@ -193,6 +193,7 @@ static bool getDisableAddmmCudaLt() {
|
||||
|
||||
#ifdef USE_ROCM
|
||||
static bool isSupportedHipLtROCmArch(int index) {
|
||||
+#if defined(HIPBLASLT)
|
||||
hipDeviceProp_t* prop = at::cuda::getDeviceProperties(index);
|
||||
std::string device_arch = prop->gcnArchName;
|
||||
static const std::vector<std::string> archs = {"gfx90a", "gfx940", "gfx941", "gfx942"};
|
||||
@@ -203,6 +204,7 @@ static bool isSupportedHipLtROCmArch(int index) {
|
||||
}
|
||||
}
|
||||
TORCH_CHECK(false, "Attempting to use hipBLASLt on a unsupported architecture!");
|
||||
+#endif
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
@@ -228,7 +230,7 @@ Tensor& addmm_out_cuda_impl(Tensor& result, const Tensor& self, const Tensor& ma
|
||||
at::ScalarType scalar_type = self.scalar_type();
|
||||
c10::MaybeOwned<Tensor> self_;
|
||||
if (&result != &self) {
|
||||
-#if (defined(CUDA_VERSION) && CUDA_VERSION >= 11040 && !defined(_MSC_VER)) || defined(USE_ROCM) && ROCM_VERSION >= 50700
|
||||
+#if (defined(CUDA_VERSION) && CUDA_VERSION >= 11040 && !defined(_MSC_VER)) || defined(USE_ROCM) && defined(HIPBLASLT)
|
||||
// Strangely, if mat2 has only 1 row or column, we get
|
||||
// CUBLAS_STATUS_INVALID_VALUE error from cublasLtMatmulAlgoGetHeuristic.
|
||||
// self.dim() == 1 && result.dim() == 2 && self.sizes()[0] == mat2_sizes[1]
|
||||
@@ -271,7 +273,7 @@ Tensor& addmm_out_cuda_impl(Tensor& result, const Tensor& self, const Tensor& ma
|
||||
}
|
||||
self__sizes = self_->sizes();
|
||||
} else {
|
||||
-#if defined(USE_ROCM) && ROCM_VERSION >= 50700
|
||||
+#if defined(USE_ROCM) && defined(HIPBLASLT)
|
||||
useLtInterface = !disable_addmm_cuda_lt &&
|
||||
result.dim() == 2 && result.is_contiguous() &&
|
||||
isSupportedHipLtROCmArch(self.device().index()) &&
|
||||
@@ -322,7 +324,7 @@ Tensor& addmm_out_cuda_impl(Tensor& result, const Tensor& self, const Tensor& ma
|
||||
|
||||
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(!args.result->is_conj());
|
||||
|
||||
-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
|
||||
+#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
|
||||
if (useLtInterface) {
|
||||
AT_DISPATCH_FLOATING_TYPES_AND2(
|
||||
at::ScalarType::Half,
|
||||
@@ -876,7 +878,7 @@ _scaled_mm_out_cuda(const Tensor& mat1, const Tensor& mat2,
|
||||
at::native::resize_output(out, {mat1_sizes[0], mat2_sizes[1]});
|
||||
at::native::resize_output(amax, {});
|
||||
|
||||
-#if !defined(USE_ROCM) && !defined(_MSC_VER) || (defined(USE_ROCM) && ROCM_VERSION >= 60000)
|
||||
+#if !defined(USE_ROCM) && !defined(_MSC_VER) || (defined(USE_ROCM) && defined(HIPBLASLT))
|
||||
cublasCommonArgs args(mat1, mat2, out);
|
||||
const auto out_dtype_ = args.result->scalar_type();
|
||||
TORCH_CHECK(args.transa == 't' && args.transb == 'n', "Only multiplication of row-major and column-major matrices is supported by cuBLASLt");
|
||||
@@ -906,7 +908,7 @@ _scaled_mm_out_cuda(const Tensor& mat1, const Tensor& mat2,
|
||||
TORCH_CHECK(false, "_scaled_mm_out_cuda is not compiled for this platform.");
|
||||
#endif
|
||||
|
||||
-#if defined(USE_ROCM) && ROCM_VERSION >= 60000
|
||||
+#if defined(USE_ROCM) && defined(HIPBLASLT)
|
||||
// rocm's hipblaslt does not yet support amax, so calculate separately
|
||||
auto out_float32 = out.to(kFloat);
|
||||
out_float32.abs_();
|
||||
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
|
||||
index b7ffbeb07dc..2b6c3678984 100644
|
||||
--- a/cmake/Dependencies.cmake
|
||||
+++ b/cmake/Dependencies.cmake
|
||||
@@ -1273,6 +1273,9 @@ if(USE_ROCM)
|
||||
if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "6.0.0")
|
||||
list(APPEND HIP_CXX_FLAGS -DHIPBLAS_V2)
|
||||
endif()
|
||||
+ if(hipblast_FOUND)
|
||||
+ list(APPEND HIP_CXX_FLAGS -DHIPBLASLT)
|
||||
+ endif()
|
||||
if(HIPBLASLT_CUSTOM_DATA_TYPE)
|
||||
list(APPEND HIP_CXX_FLAGS -DHIPBLASLT_CUSTOM_DATA_TYPE)
|
||||
endif()
|
||||
diff --git a/cmake/public/LoadHIP.cmake b/cmake/public/LoadHIP.cmake
|
||||
index f6ca263c5e5..53eb0b63c1a 100644
|
||||
--- a/cmake/public/LoadHIP.cmake
|
||||
+++ b/cmake/public/LoadHIP.cmake
|
||||
@@ -156,7 +156,7 @@ if(HIP_FOUND)
|
||||
find_package_and_print_version(rocblas REQUIRED)
|
||||
find_package_and_print_version(hipblas REQUIRED)
|
||||
if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "5.7.0")
|
||||
- find_package_and_print_version(hipblaslt REQUIRED)
|
||||
+ find_package_and_print_version(hipblaslt)
|
||||
endif()
|
||||
find_package_and_print_version(miopen REQUIRED)
|
||||
if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "4.1.0")
|
||||
@@ -191,7 +191,7 @@ if(HIP_FOUND)
|
||||
# roctx is part of roctracer
|
||||
find_library(ROCM_ROCTX_LIB roctx64 HINTS ${ROCM_PATH}/lib)
|
||||
|
||||
- if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "5.7.0")
|
||||
+ if(hipblastlt_FOUND)
|
||||
# check whether hipblaslt is using its own datatype
|
||||
set(file "${PROJECT_BINARY_DIR}/hipblaslt_test_data_type.cc")
|
||||
file(WRITE ${file} ""
|
||||
--
|
||||
2.43.2
|
||||
|
||||
|
|
@ -1,169 +0,0 @@
|
|||
From 24cf0294a67d89ad70367940eea872162b44482c Mon Sep 17 00:00:00 2001
|
||||
From: Tom Rix <trix@redhat.com>
|
||||
Date: Sat, 23 Sep 2023 10:18:52 -0700
|
||||
Subject: [PATCH] Prepare pytorch cmake for fedora
|
||||
|
||||
Use the system fmt
|
||||
Remove foxi use
|
||||
Remove warnings/errors for clang 17
|
||||
fxdiv is not a library
|
||||
build type is RelWithDebInfo
|
||||
use system pthreadpool
|
||||
|
||||
Signed-off-by: Tom Rix <trix@redhat.com>
|
||||
---
|
||||
CMakeLists.txt | 6 +++---
|
||||
.../native/quantized/cpu/qnnpack/CMakeLists.txt | 3 ---
|
||||
c10/CMakeLists.txt | 2 +-
|
||||
caffe2/CMakeLists.txt | 6 +-----
|
||||
cmake/Dependencies.cmake | 16 +---------------
|
||||
test/cpp/tensorexpr/CMakeLists.txt | 2 +-
|
||||
torch/CMakeLists.txt | 2 +-
|
||||
7 files changed, 8 insertions(+), 29 deletions(-)
|
||||
|
||||
diff --git a/CMakeLists.txt b/CMakeLists.txt
|
||||
index 3a48eaf4e2..902ee70fd1 100644
|
||||
--- a/CMakeLists.txt
|
||||
+++ b/CMakeLists.txt
|
||||
@@ -682,7 +682,7 @@ set(CAFFE2_ALLOWLIST "" CACHE STRING "A allowlist file of files that one should
|
||||
# Set default build type
|
||||
if(NOT CMAKE_BUILD_TYPE)
|
||||
message(STATUS "Build type not set - defaulting to Release")
|
||||
- set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build from: Debug Release RelWithDebInfo MinSizeRel Coverage." FORCE)
|
||||
+ set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING "Choose the type of build from: Debug Release RelWithDebInfo MinSizeRel Coverage." FORCE)
|
||||
endif()
|
||||
|
||||
# The below means we are cross compiling for arm64 or x86_64 on MacOSX
|
||||
@@ -917,8 +917,8 @@ if(NOT MSVC)
|
||||
string(APPEND CMAKE_LINKER_FLAGS_DEBUG " -fno-omit-frame-pointer -O0")
|
||||
append_cxx_flag_if_supported("-fno-math-errno" CMAKE_CXX_FLAGS)
|
||||
append_cxx_flag_if_supported("-fno-trapping-math" CMAKE_CXX_FLAGS)
|
||||
- append_cxx_flag_if_supported("-Werror=format" CMAKE_CXX_FLAGS)
|
||||
- append_cxx_flag_if_supported("-Werror=cast-function-type" CMAKE_CXX_FLAGS)
|
||||
+# append_cxx_flag_if_supported("-Werror=format" CMAKE_CXX_FLAGS)
|
||||
+# append_cxx_flag_if_supported("-Werror=cast-function-type" CMAKE_CXX_FLAGS)
|
||||
else()
|
||||
# skip unwanted includes from windows.h
|
||||
add_compile_definitions(WIN32_LEAN_AND_MEAN)
|
||||
diff --git a/aten/src/ATen/native/quantized/cpu/qnnpack/CMakeLists.txt b/aten/src/ATen/native/quantized/cpu/qnnpack/CMakeLists.txt
|
||||
index fd6b7ff551..218c8e9b2a 100644
|
||||
--- a/aten/src/ATen/native/quantized/cpu/qnnpack/CMakeLists.txt
|
||||
+++ b/aten/src/ATen/native/quantized/cpu/qnnpack/CMakeLists.txt
|
||||
@@ -393,10 +393,7 @@ elseif(NOT TARGET fxdiv AND USE_SYSTEM_FXDIV)
|
||||
if(NOT FXDIV_HDR)
|
||||
message(FATAL_ERROR "Cannot find fxdiv")
|
||||
endif()
|
||||
- add_library(fxdiv STATIC "${FXDIV_HDR}")
|
||||
- set_property(TARGET fxdiv PROPERTY LINKER_LANGUAGE C)
|
||||
endif()
|
||||
-target_link_libraries(pytorch_qnnpack PRIVATE fxdiv)
|
||||
|
||||
# ---[ Configure psimd
|
||||
if(NOT TARGET psimd AND NOT USE_SYSTEM_PSIMD)
|
||||
diff --git a/c10/CMakeLists.txt b/c10/CMakeLists.txt
|
||||
index feebad7cbb..7c029cd88d 100644
|
||||
--- a/c10/CMakeLists.txt
|
||||
+++ b/c10/CMakeLists.txt
|
||||
@@ -87,7 +87,7 @@ endif()
|
||||
if(${USE_GLOG})
|
||||
target_link_libraries(c10 PUBLIC glog::glog)
|
||||
endif()
|
||||
-target_link_libraries(c10 PRIVATE fmt::fmt-header-only)
|
||||
+target_link_libraries(c10 PRIVATE fmt)
|
||||
|
||||
find_package(Backtrace)
|
||||
if(Backtrace_FOUND)
|
||||
diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
|
||||
index 74d0d55719..b975d388a7 100644
|
||||
--- a/caffe2/CMakeLists.txt
|
||||
+++ b/caffe2/CMakeLists.txt
|
||||
@@ -107,7 +107,7 @@ endif()
|
||||
# Note: the folders that are being commented out have not been properly
|
||||
# addressed yet.
|
||||
|
||||
-if(NOT MSVC AND USE_XNNPACK)
|
||||
+if(NOT MSVC AND USE_XNNPACK AND NOT USE_SYSTEM_FXDIV)
|
||||
if(NOT TARGET fxdiv)
|
||||
set(FXDIV_BUILD_TESTS OFF CACHE BOOL "")
|
||||
set(FXDIV_BUILD_BENCHMARKS OFF CACHE BOOL "")
|
||||
@@ -1022,10 +1022,6 @@ elseif(USE_CUDA)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
-if(NOT MSVC AND USE_XNNPACK)
|
||||
- TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv)
|
||||
-endif()
|
||||
-
|
||||
# ==========================================================
|
||||
# formerly-libtorch flags
|
||||
# ==========================================================
|
||||
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
|
||||
index c3abce52e4..21b40f3a88 100644
|
||||
--- a/cmake/Dependencies.cmake
|
||||
+++ b/cmake/Dependencies.cmake
|
||||
@@ -1555,7 +1555,6 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX)
|
||||
set_target_properties(onnx_proto PROPERTIES CXX_STANDARD 17)
|
||||
endif()
|
||||
endif()
|
||||
- add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/foxi EXCLUDE_FROM_ALL)
|
||||
|
||||
add_definitions(-DONNX_NAMESPACE=${ONNX_NAMESPACE})
|
||||
if(NOT USE_SYSTEM_ONNX)
|
||||
@@ -1588,8 +1587,6 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX)
|
||||
message("-- Found onnx: ${ONNX_LIBRARY} ${ONNX_PROTO_LIBRARY}")
|
||||
list(APPEND Caffe2_DEPENDENCY_LIBS onnx_proto onnx)
|
||||
endif()
|
||||
- include_directories(${FOXI_INCLUDE_DIRS})
|
||||
- list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader)
|
||||
# Recover the build shared libs option.
|
||||
set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS})
|
||||
endif()
|
||||
@@ -1834,18 +1831,7 @@ endif()
|
||||
#
|
||||
set(TEMP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS})
|
||||
set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libs" FORCE)
|
||||
-add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)
|
||||
-
|
||||
-# Disable compiler feature checks for `fmt`.
|
||||
-#
|
||||
-# CMake compiles a little program to check compiler features. Some of our build
|
||||
-# configurations (notably the mobile build analyzer) will populate
|
||||
-# CMAKE_CXX_FLAGS in ways that break feature checks. Since we already know
|
||||
-# `fmt` is compatible with a superset of the compilers that PyTorch is, it
|
||||
-# shouldn't be too bad to just disable the checks.
|
||||
-set_target_properties(fmt-header-only PROPERTIES INTERFACE_COMPILE_FEATURES "")
|
||||
-
|
||||
-list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only)
|
||||
+list(APPEND Caffe2_DEPENDENCY_LIBS fmt)
|
||||
set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS} CACHE BOOL "Build shared libs" FORCE)
|
||||
|
||||
# ---[ Kineto
|
||||
diff --git a/test/cpp/tensorexpr/CMakeLists.txt b/test/cpp/tensorexpr/CMakeLists.txt
|
||||
index 7dff70630d..90b1003591 100644
|
||||
--- a/test/cpp/tensorexpr/CMakeLists.txt
|
||||
+++ b/test/cpp/tensorexpr/CMakeLists.txt
|
||||
@@ -54,7 +54,7 @@ target_include_directories(tutorial_tensorexpr PRIVATE ${ATen_CPU_INCLUDE})
|
||||
# pthreadpool header. For some build environment we need add the dependency
|
||||
# explicitly.
|
||||
if(USE_PTHREADPOOL)
|
||||
- target_link_libraries(test_tensorexpr PRIVATE pthreadpool_interface)
|
||||
+ target_link_libraries(test_tensorexpr PRIVATE pthreadpool)
|
||||
endif()
|
||||
if(USE_CUDA)
|
||||
target_link_libraries(test_tensorexpr PRIVATE
|
||||
diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt
|
||||
index 62ee4c12a9..8d5375f320 100644
|
||||
--- a/torch/CMakeLists.txt
|
||||
+++ b/torch/CMakeLists.txt
|
||||
@@ -84,7 +84,7 @@ set(TORCH_PYTHON_LINK_LIBRARIES
|
||||
python::python
|
||||
pybind::pybind11
|
||||
shm
|
||||
- fmt::fmt-header-only
|
||||
+ fmt
|
||||
ATEN_CPU_FILES_GEN_LIB)
|
||||
|
||||
if(USE_ASAN AND TARGET Sanitizer::address)
|
||||
--
|
||||
2.42.1
|
||||
|
||||
115
0001-Reenable-dim-for-python-3.12.patch
Normal file
115
0001-Reenable-dim-for-python-3.12.patch
Normal file
|
|
@ -0,0 +1,115 @@
|
|||
From ee3fb343a376cdba6f4ce188cac90023f13e2aea Mon Sep 17 00:00:00 2001
|
||||
From: Tom Rix <trix@redhat.com>
|
||||
Date: Thu, 4 Apr 2024 14:21:38 -0600
|
||||
Subject: [PATCH] Reenable dim for python 3.12
|
||||
|
||||
In 3.12:
|
||||
|
||||
_PyArg_Parser added an element to the start of the structure.
|
||||
So existing positional initialization is off. Switch to element
|
||||
initialization.
|
||||
|
||||
_Py_CODEUNIT changed to from an int to a union, but relevant_op
|
||||
is passed an int for the return of decoder.opcode, so the parameter
|
||||
type is wrong, switch it to int.
|
||||
|
||||
The opcode PRECALL was removed, so reduce its handling to 3.11
|
||||
|
||||
Signed-off-by: Tom Rix <trix@redhat.com>
|
||||
---
|
||||
functorch/csrc/dim/dim.cpp | 24 +++++-------------------
|
||||
functorch/csrc/dim/minpybind.h | 4 ++--
|
||||
2 files changed, 7 insertions(+), 21 deletions(-)
|
||||
|
||||
diff --git a/functorch/csrc/dim/dim.cpp b/functorch/csrc/dim/dim.cpp
|
||||
index 4cc027504c77..e48b0d58081f 100644
|
||||
--- a/functorch/csrc/dim/dim.cpp
|
||||
+++ b/functorch/csrc/dim/dim.cpp
|
||||
@@ -6,20 +6,6 @@
|
||||
|
||||
#include <torch/csrc/utils/python_compat.h>
|
||||
|
||||
-
|
||||
-// Many APIs have changed/don't exist anymore
|
||||
-#if IS_PYTHON_3_12_PLUS
|
||||
-
|
||||
-#include "dim.h"
|
||||
-
|
||||
-// Re-enable this some day
|
||||
-PyObject* Dim_init() {
|
||||
- PyErr_SetString(PyExc_RuntimeError, "First class dim doesn't work with python 3.12");
|
||||
- return nullptr;
|
||||
-}
|
||||
-
|
||||
-#else
|
||||
-
|
||||
#include "minpybind.h"
|
||||
#include <frameobject.h>
|
||||
#include <opcode.h>
|
||||
@@ -441,7 +427,7 @@ static PyObject* DimList_bind(DimList *self,
|
||||
PY_BEGIN
|
||||
mpy::handle sizes;
|
||||
static const char * const _keywords[] = {"sizes", nullptr};
|
||||
- static _PyArg_Parser parser = {"O", _keywords, 0};
|
||||
+ static _PyArg_Parser parser = { .format = "O", .keywords = _keywords};
|
||||
if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &parser, &sizes)) {
|
||||
return nullptr;
|
||||
}
|
||||
@@ -465,7 +451,7 @@ static PyObject* DimList_bind_len(DimList *self,
|
||||
PY_BEGIN
|
||||
int size;
|
||||
static const char * const _keywords[] = {"N", nullptr};
|
||||
- static _PyArg_Parser parser = {"i", _keywords, 0};
|
||||
+ static _PyArg_Parser parser = { .format = "i", .keywords = _keywords};
|
||||
if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &parser, &size)) {
|
||||
return nullptr;
|
||||
}
|
||||
@@ -1468,7 +1454,7 @@ PyTypeObject Tensor::Type = {
|
||||
|
||||
// dim() --------------------
|
||||
|
||||
-static bool relevant_op(_Py_CODEUNIT c) {
|
||||
+static bool relevant_op(int c) {
|
||||
switch(c) {
|
||||
case STORE_NAME:
|
||||
case STORE_GLOBAL:
|
||||
@@ -1587,7 +1573,7 @@ static PyObject* _dims(PyObject *self,
|
||||
auto c = mpy::obj<PyCodeObject>::steal(PyFrame_GetCode(f.ptr()));
|
||||
auto lasti = PyFrame_GetLasti(f.ptr());
|
||||
auto decoder = PyInstDecoder(c.ptr(), lasti);
|
||||
- #if IS_PYTHON_3_11_PLUS
|
||||
+ #if IS_PYTHON_3_11
|
||||
// When py3.11 adapts bytecode lasti points to the precall
|
||||
// rather than the call instruction after it
|
||||
if (decoder.opcode() == PRECALL) {
|
||||
@@ -3268,4 +3254,4 @@ PyObject* Dim_init() {
|
||||
}
|
||||
}
|
||||
|
||||
-#endif
|
||||
+
|
||||
diff --git a/functorch/csrc/dim/minpybind.h b/functorch/csrc/dim/minpybind.h
|
||||
index de82b5af95a4..d76d4828bf80 100644
|
||||
--- a/functorch/csrc/dim/minpybind.h
|
||||
+++ b/functorch/csrc/dim/minpybind.h
|
||||
@@ -621,7 +621,7 @@ struct vector_args {
|
||||
PyObject *dummy = NULL;
|
||||
_PyArg_ParseStackAndKeywords((PyObject*const*)args, nargs, kwnames.ptr(), _parser, &dummy, &dummy, &dummy, &dummy, &dummy);
|
||||
#else
|
||||
- _PyArg_Parser* _parser = new _PyArg_Parser{NULL, &names_buf[0], fname_cstr, 0};
|
||||
+ _PyArg_Parser* _parser = new _PyArg_Parser{ .keywords = &names_buf[0], .fname = fname_cstr};
|
||||
std::unique_ptr<PyObject*[]> buf(new PyObject*[names.size()]);
|
||||
_PyArg_UnpackKeywords((PyObject*const*)args, nargs, NULL, kwnames.ptr(), _parser, required, (Py_ssize_t)values.size() - kwonly, 0, &buf[0]);
|
||||
#endif
|
||||
@@ -706,7 +706,7 @@ inline object handle::call_vector(vector_args args) {
|
||||
#define MPY_PARSE_ARGS_KWNAMES(fmt, FORALL_ARGS) \
|
||||
static const char * const kwlist[] = { FORALL_ARGS(MPY_ARGS_NAME) nullptr}; \
|
||||
FORALL_ARGS(MPY_ARGS_DECLARE) \
|
||||
- static _PyArg_Parser parser = {fmt, kwlist, 0}; \
|
||||
+ static _PyArg_Parser parser = { .format = fmt, .keywords = kwlist}; \
|
||||
if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &parser, FORALL_ARGS(MPY_ARGS_POINTER) nullptr)) { \
|
||||
throw mpy::exception_set(); \
|
||||
}
|
||||
--
|
||||
2.44.0
|
||||
|
||||
|
|
@ -1,36 +0,0 @@
|
|||
From 1d35a0b1f5cb39fd0c44a486157dc739a02c71b6 Mon Sep 17 00:00:00 2001
|
||||
From: Tom Rix <trix@redhat.com>
|
||||
Date: Wed, 20 Dec 2023 11:23:18 -0500
|
||||
Subject: [PATCH] add rocm_version fallback
|
||||
|
||||
Signed-off-by: Tom Rix <trix@redhat.com>
|
||||
---
|
||||
torch/utils/hipify/cuda_to_hip_mappings.py | 12 ++++++++++++
|
||||
1 file changed, 12 insertions(+)
|
||||
|
||||
diff --git a/torch/utils/hipify/cuda_to_hip_mappings.py b/torch/utils/hipify/cuda_to_hip_mappings.py
|
||||
index 73586440e7..9354057a39 100644
|
||||
--- a/torch/utils/hipify/cuda_to_hip_mappings.py
|
||||
+++ b/torch/utils/hipify/cuda_to_hip_mappings.py
|
||||
@@ -57,6 +57,18 @@ if os.path.isfile(rocm_version_h):
|
||||
if match:
|
||||
patch = int(match.group(1))
|
||||
rocm_version = (major, minor, patch)
|
||||
+else:
|
||||
+ try:
|
||||
+ hip_version = subprocess.check_output(["hipconfig", "--version"]).decode("utf-8")
|
||||
+ hip_split = hip_version.split('.')
|
||||
+ rocm_version = (int(hip_split[0]), int(hip_split[1]), 0)
|
||||
+ except subprocess.CalledProcessError:
|
||||
+ print(f"Warning: hipconfig --version failed")
|
||||
+ except (FileNotFoundError, PermissionError, NotADirectoryError):
|
||||
+ # Do not print warning. This is okay. This file can also be imported for non-ROCm builds.
|
||||
+ pass
|
||||
+
|
||||
+
|
||||
|
||||
# List of math functions that should be replaced inside device code only.
|
||||
MATH_TRANSPILATIONS = collections.OrderedDict(
|
||||
--
|
||||
2.43.0
|
||||
|
||||
|
|
@ -1,28 +0,0 @@
|
|||
From 06499575b177a218846f0e43ff4bc77d245f207f Mon Sep 17 00:00:00 2001
|
||||
From: Tom Rix <trix@redhat.com>
|
||||
Date: Fri, 1 Dec 2023 09:38:05 -0500
|
||||
Subject: [PATCH] disable as-needed for libtorch
|
||||
|
||||
Signed-off-by: Tom Rix <trix@redhat.com>
|
||||
---
|
||||
caffe2/CMakeLists.txt | 4 ++++
|
||||
1 file changed, 4 insertions(+)
|
||||
|
||||
diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
|
||||
index b975d388a7..5e9fd3b3f3 100644
|
||||
--- a/caffe2/CMakeLists.txt
|
||||
+++ b/caffe2/CMakeLists.txt
|
||||
@@ -914,6 +914,10 @@ if(HAVE_SOVERSION)
|
||||
VERSION ${TORCH_VERSION} SOVERSION ${TORCH_SOVERSION})
|
||||
endif()
|
||||
|
||||
+# Disable global as-needed
|
||||
+set_target_properties(torch PROPERTIES LINK_FLAGS -Wl,--no-as-needed)
|
||||
+
|
||||
+
|
||||
if(USE_ROCM)
|
||||
filter_list(__caffe2_hip_srcs_cpp Caffe2_HIP_SRCS "\\.(cu|hip)$")
|
||||
set_source_files_properties(${__caffe2_hip_srcs_cpp} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
|
||||
--
|
||||
2.42.1
|
||||
|
||||
46
0001-disable-use-of-aotriton.patch
Normal file
46
0001-disable-use-of-aotriton.patch
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
From 33d48f71db7530f00dbd8cff281b65aa8b355b2a Mon Sep 17 00:00:00 2001
|
||||
From: Tom Rix <trix@redhat.com>
|
||||
Date: Tue, 19 Mar 2024 11:32:37 -0400
|
||||
Subject: [PATCH] disable use of aotriton
|
||||
|
||||
---
|
||||
aten/src/ATen/native/transformers/cuda/sdp_utils.cpp | 6 ++++++
|
||||
1 file changed, 6 insertions(+)
|
||||
|
||||
diff --git a/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp b/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp
|
||||
index 96b839820efd..2d3dd0cb4b0f 100644
|
||||
--- a/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp
|
||||
+++ b/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp
|
||||
@@ -21,9 +21,11 @@
|
||||
#include <cmath>
|
||||
#include <functional>
|
||||
|
||||
+#ifdef USE_FLASH_ATTENTION
|
||||
#if USE_ROCM
|
||||
#include <aotriton/flash.h>
|
||||
#endif
|
||||
+#endif
|
||||
|
||||
/**
|
||||
* Note [SDPA Runtime Dispatch]
|
||||
@@ -183,6 +185,7 @@ bool check_sm_version(cudaDeviceProp * dprops) {
|
||||
}
|
||||
|
||||
bool check_flash_attention_hardware_support(sdp_params const& params, bool debug) {
|
||||
+#ifdef USE_FLASH_ATTENTION
|
||||
// Check that the gpu is capable of running flash attention
|
||||
using sm80 = SMVersion<8, 0>;
|
||||
using sm90 = SMVersion<9, 0>;
|
||||
@@ -211,6 +214,9 @@ bool check_flash_attention_hardware_support(sdp_params const& params, bool debug
|
||||
}
|
||||
#endif
|
||||
return true;
|
||||
+#else
|
||||
+ return false;
|
||||
+#endif
|
||||
}
|
||||
|
||||
bool check_mem_efficient_hardware_support(sdp_params const& params, bool debug) {
|
||||
--
|
||||
2.44.0
|
||||
|
||||
226
0001-dynamo-3.12-enable-dynamo-on-3.12-enable-most-dynamo.patch
Normal file
226
0001-dynamo-3.12-enable-dynamo-on-3.12-enable-most-dynamo.patch
Normal file
|
|
@ -0,0 +1,226 @@
|
|||
From b9d45eb1cc90696a4de76676221219e24423c709 Mon Sep 17 00:00:00 2001
|
||||
From: William Wen <williamwen@meta.com>
|
||||
Date: Wed, 3 Apr 2024 17:58:46 -0700
|
||||
Subject: [PATCH] [dynamo, 3.12] enable dynamo on 3.12, enable most dynamo
|
||||
unittests on 3.12 (#123216)
|
||||
|
||||
Pull Request resolved: https://github.com/pytorch/pytorch/pull/123216
|
||||
Approved by: https://github.com/jansel, https://github.com/malfet
|
||||
---
|
||||
test/dynamo/test_autograd_function.py | 3 ++
|
||||
test/dynamo/test_misc.py | 63 +++++++++++++++++++++++++
|
||||
test/functorch/test_eager_transforms.py | 7 ++-
|
||||
test/run_test.py | 3 --
|
||||
torch/__init__.py | 5 +-
|
||||
torch/_dynamo/eval_frame.py | 4 +-
|
||||
torch/_dynamo/test_case.py | 8 +---
|
||||
7 files changed, 74 insertions(+), 19 deletions(-)
|
||||
|
||||
diff --git a/test/dynamo/test_autograd_function.py b/test/dynamo/test_autograd_function.py
|
||||
index d23fec607afa..bc5ebc767038 100644
|
||||
--- a/test/dynamo/test_autograd_function.py
|
||||
+++ b/test/dynamo/test_autograd_function.py
|
||||
@@ -2,6 +2,8 @@
|
||||
|
||||
import copy
|
||||
import math
|
||||
+import sys
|
||||
+import unittest
|
||||
|
||||
import torch
|
||||
|
||||
@@ -528,6 +530,7 @@ class AutogradFunctionTests(torch._dynamo.test_case.TestCase):
|
||||
# I pulled all of these test cases from test_autograd.py
|
||||
# In the future, we should make the Dynamo test suite actually
|
||||
# run on test_autograd.py (it's disabled right now) and delete these.
|
||||
+ @unittest.skipIf(sys.version_info >= (3, 12), "invalid free in 3.12+")
|
||||
def test_smoke_from_test_autograd(self):
|
||||
class Func(torch.autograd.Function):
|
||||
@staticmethod
|
||||
diff --git a/test/dynamo/test_misc.py b/test/dynamo/test_misc.py
|
||||
index a73de8b1c7e9..8f54e0564e6b 100644
|
||||
--- a/test/dynamo/test_misc.py
|
||||
+++ b/test/dynamo/test_misc.py
|
||||
@@ -9760,6 +9760,69 @@ fn
|
||||
lambda mod: mod,
|
||||
)
|
||||
|
||||
+ @xfailIfPy311
|
||||
+ def test_outside_linear_module_free(self):
|
||||
+ # Compared to test_linear_module_free, the linear
|
||||
+ # layer is not the code object that is directly compiled.
|
||||
+ def model_inp_ctr():
|
||||
+ fc = torch.nn.Linear(100, 100)
|
||||
+
|
||||
+ class Mod(torch.nn.Module):
|
||||
+ def __init__(self):
|
||||
+ super().__init__()
|
||||
+ self.fc_ref = fc
|
||||
+
|
||||
+ def forward(self, x):
|
||||
+ return fc(x[0])
|
||||
+
|
||||
+ # return fc to keep it alive in _test_compile_model_free
|
||||
+ return Mod(), (torch.randn(100, 100), fc)
|
||||
+
|
||||
+ self._test_compile_model_free(model_inp_ctr, lambda mod: mod.fc_ref)
|
||||
+
|
||||
+ @unittest.skipIf(sys.version_info >= (3, 12), "leaks in 3.12+")
|
||||
+ def test_parameter_free(self):
|
||||
+ def model_inp_ctr():
|
||||
+ param = torch.nn.Parameter(torch.randn(100, 100))
|
||||
+
|
||||
+ class Mod(torch.nn.Module):
|
||||
+ def __init__(self):
|
||||
+ super().__init__()
|
||||
+ self.param = param
|
||||
+
|
||||
+ def forward(self, x):
|
||||
+ return self.param * x[0]
|
||||
+
|
||||
+ # return param to keep it alive in _test_compile_model_free
|
||||
+ return Mod(), (torch.randn(100, 100), param)
|
||||
+
|
||||
+ self._test_compile_model_free(model_inp_ctr, lambda mod: mod.param)
|
||||
+
|
||||
+ def test_raises_importerror1(self):
|
||||
+ @torch.compile(backend="eager")
|
||||
+ def fn(x):
|
||||
+ try:
|
||||
+ import some_module_that_surely_does_not_exist
|
||||
+
|
||||
+ return
|
||||
+ except ImportError:
|
||||
+ pass
|
||||
+ return x.sin()
|
||||
+
|
||||
+ x = torch.randn(8)
|
||||
+ self.assertEqual(fn(x), x.sin())
|
||||
+
|
||||
+ def test_raises_importerror2(self):
|
||||
+ @torch.compile(backend="eager")
|
||||
+ def fn(x):
|
||||
+ import some_module_that_surely_does_not_exist
|
||||
+
|
||||
+ return x + 1
|
||||
+
|
||||
+ x = torch.randn(8)
|
||||
+ with self.assertRaises(ImportError):
|
||||
+ fn(x)
|
||||
+
|
||||
def test_dynamo_cache_move_to_front(self):
|
||||
class Mod(torch.nn.Module):
|
||||
def __init__(self):
|
||||
diff --git a/test/functorch/test_eager_transforms.py b/test/functorch/test_eager_transforms.py
|
||||
index 09415cf8f48e..60790ec06059 100644
|
||||
--- a/test/functorch/test_eager_transforms.py
|
||||
+++ b/test/functorch/test_eager_transforms.py
|
||||
@@ -4762,8 +4762,7 @@ class TestCompileTransforms(TestCase):
|
||||
# Triton only supports GPU with SM70 or later.
|
||||
@expectedFailureIf((IS_ARM64 and not IS_MACOS) or
|
||||
IS_WINDOWS or
|
||||
- (TEST_CUDA and not SM70OrLater) or
|
||||
- (sys.version_info >= (3, 12)))
|
||||
+ (TEST_CUDA and not SM70OrLater))
|
||||
def test_compile_vmap_hessian(self, device):
|
||||
# The model and inputs are a smaller version
|
||||
# of code at benchmark repo:
|
||||
@@ -4792,8 +4791,8 @@ class TestCompileTransforms(TestCase):
|
||||
actual = opt_fn(params_and_buffers, x)
|
||||
self.assertEqual(actual, expected)
|
||||
|
||||
- # torch.compile is not supported on Windows or on Python 3.12+
|
||||
- @expectedFailureIf(IS_WINDOWS or (sys.version_info >= (3, 12)))
|
||||
+ # torch.compile is not supported on Windows
|
||||
+ @expectedFailureIf(IS_WINDOWS)
|
||||
@torch._dynamo.config.patch(suppress_errors=False)
|
||||
@torch._dynamo.config.patch(capture_func_transforms=True)
|
||||
@skipIfTorchDynamo("Do not test torch.compile on top of torch.compile")
|
||||
diff --git a/test/run_test.py b/test/run_test.py
|
||||
index e86af9623042..ebb14df4167d 100755
|
||||
--- a/test/run_test.py
|
||||
+++ b/test/run_test.py
|
||||
@@ -74,7 +74,6 @@ sys.path.remove(str(REPO_ROOT))
|
||||
RERUN_DISABLED_TESTS = os.getenv("PYTORCH_TEST_RERUN_DISABLED_TESTS", "0") == "1"
|
||||
DISTRIBUTED_TEST_PREFIX = "distributed"
|
||||
INDUCTOR_TEST_PREFIX = "inductor"
|
||||
-DYNAMO_TEST_PREFIX = "dynamo"
|
||||
|
||||
|
||||
# Note [ROCm parallel CI testing]
|
||||
@@ -324,7 +323,6 @@ JIT_EXECUTOR_TESTS = [
|
||||
]
|
||||
|
||||
INDUCTOR_TESTS = [test for test in TESTS if test.startswith(INDUCTOR_TEST_PREFIX)]
|
||||
-DYNAMO_TESTS = [test for test in TESTS if test.startswith(DYNAMO_TEST_PREFIX)]
|
||||
DISTRIBUTED_TESTS = [test for test in TESTS if test.startswith(DISTRIBUTED_TEST_PREFIX)]
|
||||
TORCH_EXPORT_TESTS = [test for test in TESTS if test.startswith("export")]
|
||||
FUNCTORCH_TESTS = [test for test in TESTS if test.startswith("functorch")]
|
||||
@@ -1361,7 +1359,6 @@ def get_selected_tests(options) -> List[str]:
|
||||
# these tests failing in Python 3.12 temporarily disabling
|
||||
if sys.version_info >= (3, 12):
|
||||
options.exclude.extend(INDUCTOR_TESTS)
|
||||
- options.exclude.extend(DYNAMO_TESTS)
|
||||
options.exclude.extend(
|
||||
[
|
||||
"functorch/test_dims",
|
||||
diff --git a/torch/__init__.py b/torch/__init__.py
|
||||
index d381712b4a35..26cdffe81d29 100644
|
||||
--- a/torch/__init__.py
|
||||
+++ b/torch/__init__.py
|
||||
@@ -1861,9 +1861,8 @@ def compile(model: Optional[Callable] = None, *,
|
||||
|
||||
"""
|
||||
_C._log_api_usage_once("torch.compile")
|
||||
- # Temporary until we get proper support for python 3.12
|
||||
- if sys.version_info >= (3, 12):
|
||||
- raise RuntimeError("Dynamo is not supported on Python 3.12+")
|
||||
+ if sys.version_info >= (3, 13):
|
||||
+ raise RuntimeError("Dynamo is not supported on Python 3.13+")
|
||||
|
||||
# Decorator mode
|
||||
if model is None:
|
||||
diff --git a/torch/_dynamo/eval_frame.py b/torch/_dynamo/eval_frame.py
|
||||
index 53ab0df3a947..0a80eeea99ed 100644
|
||||
--- a/torch/_dynamo/eval_frame.py
|
||||
+++ b/torch/_dynamo/eval_frame.py
|
||||
@@ -589,8 +589,8 @@ class _NullDecorator(contextlib.nullcontext): # type: ignore[type-arg]
|
||||
|
||||
|
||||
def check_if_dynamo_supported():
|
||||
- if sys.version_info >= (3, 12):
|
||||
- raise RuntimeError("Python 3.12+ not yet supported for torch.compile")
|
||||
+ if sys.version_info >= (3, 13):
|
||||
+ raise RuntimeError("Python 3.13+ not yet supported for torch.compile")
|
||||
|
||||
|
||||
def is_dynamo_supported():
|
||||
diff --git a/torch/_dynamo/test_case.py b/torch/_dynamo/test_case.py
|
||||
index e3cbef09eaae..297ea6e2bc2a 100644
|
||||
--- a/torch/_dynamo/test_case.py
|
||||
+++ b/torch/_dynamo/test_case.py
|
||||
@@ -1,7 +1,6 @@
|
||||
import contextlib
|
||||
import importlib
|
||||
import logging
|
||||
-import sys
|
||||
|
||||
import torch
|
||||
import torch.testing
|
||||
@@ -20,12 +19,7 @@ log = logging.getLogger(__name__)
|
||||
def run_tests(needs=()):
|
||||
from torch.testing._internal.common_utils import run_tests
|
||||
|
||||
- if (
|
||||
- TEST_WITH_TORCHDYNAMO
|
||||
- or IS_WINDOWS
|
||||
- or TEST_WITH_CROSSREF
|
||||
- or sys.version_info >= (3, 12)
|
||||
- ):
|
||||
+ if TEST_WITH_TORCHDYNAMO or IS_WINDOWS or TEST_WITH_CROSSREF:
|
||||
return # skip testing
|
||||
|
||||
if isinstance(needs, str):
|
||||
--
|
||||
2.44.0
|
||||
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
From c46146dc31ed3dc0ebb6ca28c01330db8ba5d4f2 Mon Sep 17 00:00:00 2001
|
||||
From 2ce255b75760a0a513fb1706629b416f76a5c822 Mon Sep 17 00:00:00 2001
|
||||
From: Tom Rix <trix@redhat.com>
|
||||
Date: Sat, 3 Feb 2024 08:16:04 -0500
|
||||
Subject: [PATCH] no third_party fmt
|
||||
|
|
@ -10,23 +10,23 @@ Subject: [PATCH] no third_party fmt
|
|||
3 files changed, 5 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/c10/CMakeLists.txt b/c10/CMakeLists.txt
|
||||
index cb81556ff2..7529b2aec9 100644
|
||||
index 1f742f4c176..4fa08913bdd 100644
|
||||
--- a/c10/CMakeLists.txt
|
||||
+++ b/c10/CMakeLists.txt
|
||||
@@ -87,7 +87,7 @@ endif()
|
||||
if(C10_USE_GLOG)
|
||||
target_link_libraries(c10 PUBLIC glog::glog)
|
||||
target_link_libraries(c10 PUBLIC glog::glog)
|
||||
endif()
|
||||
-target_link_libraries(c10 PRIVATE fmt::fmt-header-only)
|
||||
+target_link_libraries(c10 PRIVATE fmt)
|
||||
|
||||
if(C10_USE_NUMA)
|
||||
target_include_directories(c10 PRIVATE ${Numa_INCLUDE_DIR})
|
||||
message(STATUS "NUMA paths:")
|
||||
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
|
||||
index 8310f29e01..c99d0d762a 100644
|
||||
index 6f5a2d5feff..42fbf80f6e8 100644
|
||||
--- a/cmake/Dependencies.cmake
|
||||
+++ b/cmake/Dependencies.cmake
|
||||
@@ -1834,7 +1834,7 @@ endif()
|
||||
@@ -1837,7 +1837,7 @@ endif()
|
||||
#
|
||||
set(TEMP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS})
|
||||
set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libs" FORCE)
|
||||
|
|
@ -35,7 +35,7 @@ index 8310f29e01..c99d0d762a 100644
|
|||
|
||||
# Disable compiler feature checks for `fmt`.
|
||||
#
|
||||
@@ -1843,9 +1843,9 @@ add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)
|
||||
@@ -1846,9 +1846,9 @@ add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)
|
||||
# CMAKE_CXX_FLAGS in ways that break feature checks. Since we already know
|
||||
# `fmt` is compatible with a superset of the compilers that PyTorch is, it
|
||||
# shouldn't be too bad to just disable the checks.
|
||||
|
|
@ -48,7 +48,7 @@ index 8310f29e01..c99d0d762a 100644
|
|||
|
||||
# ---[ Kineto
|
||||
diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt
|
||||
index 24903a207e..3a7751dc00 100644
|
||||
index 97a72eed55b..9e5014d1980 100644
|
||||
--- a/torch/CMakeLists.txt
|
||||
+++ b/torch/CMakeLists.txt
|
||||
@@ -80,7 +80,7 @@ set(TORCH_PYTHON_LINK_LIBRARIES
|
||||
|
|
@ -61,5 +61,5 @@ index 24903a207e..3a7751dc00 100644
|
|||
|
||||
if(USE_ASAN AND TARGET Sanitizer::address)
|
||||
--
|
||||
2.43.0
|
||||
2.43.2
|
||||
|
||||
|
|
@ -1,27 +0,0 @@
|
|||
From cef92207b79ad53e3fcc1b0e22ba91cb9422968c Mon Sep 17 00:00:00 2001
|
||||
From: Tom Rix <trix@redhat.com>
|
||||
Date: Sat, 18 Nov 2023 09:38:52 -0500
|
||||
Subject: [PATCH] python-torch link with python
|
||||
|
||||
Signed-off-by: Tom Rix <trix@redhat.com>
|
||||
---
|
||||
torch/CMakeLists.txt | 3 +++
|
||||
1 file changed, 3 insertions(+)
|
||||
|
||||
diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt
|
||||
index 8d5375f320..6f8c7b65c4 100644
|
||||
--- a/torch/CMakeLists.txt
|
||||
+++ b/torch/CMakeLists.txt
|
||||
@@ -312,6 +312,9 @@ add_dependencies(torch_python torch_python_stubs)
|
||||
add_dependencies(torch_python flatbuffers)
|
||||
|
||||
|
||||
+# Unresolved syms in -lpython
|
||||
+target_link_libraries(torch_python PUBLIC ${PYTHON_LIBRARIES})
|
||||
+
|
||||
if(USE_PRECOMPILED_HEADERS)
|
||||
target_precompile_headers(torch_python PRIVATE
|
||||
"$<$<COMPILE_LANGUAGE:CXX>:ATen/ATen.h>")
|
||||
--
|
||||
2.42.1
|
||||
|
||||
|
|
@ -1,33 +0,0 @@
|
|||
From f70ef37d0b3c780fd17be199e66a81ffa679f93e Mon Sep 17 00:00:00 2001
|
||||
From: Tom Rix <trix@redhat.com>
|
||||
Date: Sat, 18 Nov 2023 12:05:43 -0500
|
||||
Subject: [PATCH] python-torch remove ubuntu specific linking
|
||||
|
||||
Signed-off-by: Tom Rix <trix@redhat.com>
|
||||
---
|
||||
CMakeLists.txt | 9 ++++++---
|
||||
1 file changed, 6 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/CMakeLists.txt b/CMakeLists.txt
|
||||
index 86c34984b2..f7c4a7b05f 100644
|
||||
--- a/CMakeLists.txt
|
||||
+++ b/CMakeLists.txt
|
||||
@@ -479,9 +479,12 @@ option(BUILD_EXECUTORCH "Master flag to build Executorch" ON)
|
||||
# This is a fix for a rare build issue on Ubuntu:
|
||||
# symbol lookup error: miniconda3/envs/pytorch-py3.7/lib/libmkl_intel_lp64.so: undefined symbol: mkl_blas_dsyrk
|
||||
# https://software.intel.com/en-us/articles/symbol-lookup-error-when-linking-intel-mkl-with-gcc-on-ubuntu
|
||||
-if(LINUX)
|
||||
- set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-as-needed")
|
||||
-endif()
|
||||
+
|
||||
+# This is not ubuntu!
|
||||
+# if(LINUX)
|
||||
+# set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-as-needed")
|
||||
+# endif()
|
||||
+set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--as-needed")
|
||||
|
||||
if(MSVC)
|
||||
set(CMAKE_NINJA_CMCLDEPS_RC OFF)
|
||||
--
|
||||
2.42.1
|
||||
|
||||
|
|
@ -1,26 +0,0 @@
|
|||
From 527d1ce24a06a14788ca5fc2411985d7c1cb2923 Mon Sep 17 00:00:00 2001
|
||||
From: Tom Rix <trix@redhat.com>
|
||||
Date: Fri, 13 Oct 2023 05:35:19 -0700
|
||||
Subject: [PATCH] pytorch use SO version by default
|
||||
|
||||
Signed-off-by: Tom Rix <trix@redhat.com>
|
||||
---
|
||||
CMakeLists.txt | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/CMakeLists.txt b/CMakeLists.txt
|
||||
index 902ee70fd1..86c34984b2 100644
|
||||
--- a/CMakeLists.txt
|
||||
+++ b/CMakeLists.txt
|
||||
@@ -340,7 +340,7 @@ option(USE_TBB "Use TBB (Deprecated)" OFF)
|
||||
cmake_dependent_option(
|
||||
USE_SYSTEM_TBB "Use system-provided Intel TBB." OFF "USE_TBB" OFF)
|
||||
option(ONNX_ML "Enable traditional ONNX ML API." ON)
|
||||
-option(HAVE_SOVERSION "Whether to add SOVERSION to the shared objects" OFF)
|
||||
+option(HAVE_SOVERSION "Whether to add SOVERSION to the shared objects" ON)
|
||||
option(BUILD_LIBTORCH_CPU_WITH_DEBUG "Enable RelWithDebInfo for libtorch_cpu target only" OFF)
|
||||
cmake_dependent_option(USE_CCACHE "Attempt using CCache to wrap the compilation" ON "UNIX" OFF)
|
||||
option(WERROR "Build with -Werror supported by the compiler" OFF)
|
||||
--
|
||||
2.42.1
|
||||
|
||||
25
0001-reenable-foxi-linking.patch
Normal file
25
0001-reenable-foxi-linking.patch
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
From 58ccda271e8f51c3fa5b7518cf6ee52ce204fd37 Mon Sep 17 00:00:00 2001
|
||||
From: Tom Rix <trix@redhat.com>
|
||||
Date: Thu, 22 Feb 2024 09:28:11 -0500
|
||||
Subject: [PATCH] reenable foxi linking
|
||||
|
||||
---
|
||||
cmake/Dependencies.cmake | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
|
||||
index 42fbf80f6e8..bc3a2dc6fee 100644
|
||||
--- a/cmake/Dependencies.cmake
|
||||
+++ b/cmake/Dependencies.cmake
|
||||
@@ -1604,7 +1604,7 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX)
|
||||
list(APPEND Caffe2_DEPENDENCY_LIBS onnx_proto onnx)
|
||||
endif()
|
||||
# include_directories(${FOXI_INCLUDE_DIRS})
|
||||
-# list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader)
|
||||
+ list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader)
|
||||
# Recover the build shared libs option.
|
||||
set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS})
|
||||
endif()
|
||||
--
|
||||
2.43.2
|
||||
|
||||
|
|
@ -1,32 +0,0 @@
|
|||
From c47c6e202d60ccac15aa36698bd4788415a9416b Mon Sep 17 00:00:00 2001
|
||||
From: Tom Rix <trix@redhat.com>
|
||||
Date: Sat, 25 Nov 2023 16:46:17 -0500
|
||||
Subject: [PATCH] torch sane version
|
||||
|
||||
---
|
||||
tools/generate_torch_version.py | 1 +
|
||||
version.txt | 2 +-
|
||||
2 files changed, 2 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/tools/generate_torch_version.py b/tools/generate_torch_version.py
|
||||
index d90d3646ab..11d5bbeba5 100644
|
||||
--- a/tools/generate_torch_version.py
|
||||
+++ b/tools/generate_torch_version.py
|
||||
@@ -42,6 +42,7 @@ def get_tag(pytorch_root: Union[str, Path]) -> str:
|
||||
def get_torch_version(sha: Optional[str] = None) -> str:
|
||||
pytorch_root = Path(__file__).parent.parent
|
||||
version = open(pytorch_root / "version.txt").read().strip()
|
||||
+ return version
|
||||
|
||||
if os.getenv("PYTORCH_BUILD_VERSION"):
|
||||
assert os.getenv("PYTORCH_BUILD_NUMBER") is not None
|
||||
diff --git a/version.txt b/version.txt
|
||||
index ecaf4eea7c..7ec1d6db40 100644
|
||||
--- a/version.txt
|
||||
+++ b/version.txt
|
||||
@@ -1,1 +1,1 @@
|
||||
-2.1.0a0
|
||||
+2.1.2
|
||||
--
|
||||
2.42.1
|
||||
|
||||
|
|
@ -1,39 +0,0 @@
|
|||
From 587a8b10bd3f7a68275356ee6eb6bb43ed711ba2 Mon Sep 17 00:00:00 2001
|
||||
From: Tom Rix <trix@redhat.com>
|
||||
Date: Fri, 29 Sep 2023 06:19:29 -0700
|
||||
Subject: [PATCH 2/6] Regenerate flatbuffer header
|
||||
|
||||
For this error
|
||||
torch/csrc/jit/serialization/mobile_bytecode_generated.h:12:41:
|
||||
error: static assertion failed: Non-compatible flatbuffers version included
|
||||
12 | FLATBUFFERS_VERSION_MINOR == 3 &&
|
||||
|
||||
PyTorch is expecting 23.3.3, what f38 has
|
||||
Rawhide is at 23.5.26
|
||||
|
||||
Regenerate with
|
||||
flatc --cpp --gen-mutable --no-prefix --scoped-enums mobile_bytecode.fbs
|
||||
|
||||
Signed-off-by: Tom Rix <trix@redhat.com>
|
||||
---
|
||||
torch/csrc/jit/serialization/mobile_bytecode_generated.h | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/torch/csrc/jit/serialization/mobile_bytecode_generated.h b/torch/csrc/jit/serialization/mobile_bytecode_generated.h
|
||||
index cffe8bc7a6..83575e4c19 100644
|
||||
--- a/torch/csrc/jit/serialization/mobile_bytecode_generated.h
|
||||
+++ b/torch/csrc/jit/serialization/mobile_bytecode_generated.h
|
||||
@@ -9,8 +9,8 @@
|
||||
// Ensure the included flatbuffers.h is the same version as when this file was
|
||||
// generated, otherwise it may not be compatible.
|
||||
static_assert(FLATBUFFERS_VERSION_MAJOR == 23 &&
|
||||
- FLATBUFFERS_VERSION_MINOR == 3 &&
|
||||
- FLATBUFFERS_VERSION_REVISION == 3,
|
||||
+ FLATBUFFERS_VERSION_MINOR == 5 &&
|
||||
+ FLATBUFFERS_VERSION_REVISION == 26,
|
||||
"Non-compatible flatbuffers version included");
|
||||
|
||||
namespace torch {
|
||||
--
|
||||
2.41.0
|
||||
|
||||
|
|
@ -1,73 +0,0 @@
|
|||
From bb52aeacc6dfab2355249b7b5beb72c2761ec319 Mon Sep 17 00:00:00 2001
|
||||
From: Tom Rix <trix@redhat.com>
|
||||
Date: Fri, 29 Sep 2023 06:25:23 -0700
|
||||
Subject: [PATCH 3/6] Stub in kineto ActivityType
|
||||
|
||||
There is an error with kineto is not used, the shim still
|
||||
requires the ActivityTYpe.h header to get the enum Activity type.
|
||||
So cut-n-paste just enough of the header in to do this.
|
||||
|
||||
Signed-off-by: Tom Rix <trix@redhat.com>
|
||||
---
|
||||
torch/csrc/profiler/kineto_shim.h | 44 +++++++++++++++++++++++++++++++
|
||||
1 file changed, 44 insertions(+)
|
||||
|
||||
diff --git a/torch/csrc/profiler/kineto_shim.h b/torch/csrc/profiler/kineto_shim.h
|
||||
index 2a410719a1..7d6525befd 100644
|
||||
--- a/torch/csrc/profiler/kineto_shim.h
|
||||
+++ b/torch/csrc/profiler/kineto_shim.h
|
||||
@@ -12,7 +12,51 @@
|
||||
#undef USE_KINETO
|
||||
#endif
|
||||
|
||||
+#ifdef USE_KINETO
|
||||
#include <ActivityType.h>
|
||||
+#else
|
||||
+namespace libkineto {
|
||||
+// copied from header
|
||||
+/*
|
||||
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
+ * All rights reserved.
|
||||
+ *
|
||||
+ * This source code is licensed under the BSD-style license found in the
|
||||
+ * LICENSE file in the root directory of this source tree.
|
||||
+ */
|
||||
+
|
||||
+// Note : All activity types are not enabled by default. Please add them
|
||||
+// at correct position in the enum
|
||||
+enum class ActivityType {
|
||||
+ // Activity types enabled by default
|
||||
+ CPU_OP = 0, // cpu side ops
|
||||
+ USER_ANNOTATION,
|
||||
+ GPU_USER_ANNOTATION,
|
||||
+ GPU_MEMCPY,
|
||||
+ GPU_MEMSET,
|
||||
+ CONCURRENT_KERNEL, // on-device kernels
|
||||
+ EXTERNAL_CORRELATION,
|
||||
+ CUDA_RUNTIME, // host side cuda runtime events
|
||||
+ CUDA_DRIVER, // host side cuda driver events
|
||||
+ CPU_INSTANT_EVENT, // host side point-like events
|
||||
+ PYTHON_FUNCTION,
|
||||
+ OVERHEAD, // CUPTI induced overhead events sampled from its overhead API.
|
||||
+
|
||||
+ // Optional Activity types
|
||||
+ CUDA_SYNC, // synchronization events between runtime and kernels
|
||||
+ GLOW_RUNTIME, // host side glow runtime events
|
||||
+ MTIA_RUNTIME, // host side MTIA runtime events
|
||||
+ CUDA_PROFILER_RANGE, // CUPTI Profiler range for performance metrics
|
||||
+ MTIA_CCP_EVENTS, // MTIA ondevice CCP events
|
||||
+ HPU_OP, // HPU host side runtime event
|
||||
+ XPU_RUNTIME, // host side xpu runtime events
|
||||
+
|
||||
+ ENUM_COUNT, // This is to add buffer and not used for any profiling logic. Add your new type before it.
|
||||
+ OPTIONAL_ACTIVITY_TYPE_START = CUDA_SYNC,
|
||||
+};
|
||||
+}
|
||||
+
|
||||
+#endif
|
||||
|
||||
#include <torch/csrc/Export.h>
|
||||
#include <torch/csrc/profiler/api.h>
|
||||
--
|
||||
2.41.0
|
||||
|
||||
|
|
@ -1,45 +0,0 @@
|
|||
From fcf3cd70229cdc729d05ddab081ac886c9db6bd7 Mon Sep 17 00:00:00 2001
|
||||
From: Tom Rix <trix@redhat.com>
|
||||
Date: Fri, 29 Sep 2023 13:58:28 -0700
|
||||
Subject: [PATCH] torch python 3.12 changes
|
||||
|
||||
Signed-off-by: Tom Rix <trix@redhat.com>
|
||||
---
|
||||
functorch/csrc/dim/dim.cpp | 6 ++++++
|
||||
torch/csrc/dynamo/cpython_defs.h | 2 +-
|
||||
2 files changed, 7 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/functorch/csrc/dim/dim.cpp b/functorch/csrc/dim/dim.cpp
|
||||
index b611dc3e8c2..c7009478aee 100644
|
||||
--- a/functorch/csrc/dim/dim.cpp
|
||||
+++ b/functorch/csrc/dim/dim.cpp
|
||||
@@ -10,7 +10,13 @@
|
||||
// Many APIs have changed/don't exist anymore
|
||||
#if IS_PYTHON_3_12_PLUS
|
||||
|
||||
+#include "dim.h"
|
||||
+
|
||||
// Re-enable this some day
|
||||
+PyObject* Dim_init() {
|
||||
+ PyErr_SetString(PyExc_RuntimeError, "First class dim doesn't work with python 3.12");
|
||||
+ return nullptr;
|
||||
+}
|
||||
|
||||
#else
|
||||
|
||||
diff --git a/torch/csrc/dynamo/cpython_defs.h b/torch/csrc/dynamo/cpython_defs.h
|
||||
index f0a0e1a88e2..f58becd246e 100644
|
||||
--- a/torch/csrc/dynamo/cpython_defs.h
|
||||
+++ b/torch/csrc/dynamo/cpython_defs.h
|
||||
@@ -6,7 +6,7 @@
|
||||
// should go in cpython_defs.c. Copying is required when, e.g.,
|
||||
// we need to call internal CPython functions that are not exposed.
|
||||
|
||||
-#if IS_PYTHON_3_11_PLUS && !(IS_PYTHON_3_12_PLUS)
|
||||
+#if IS_PYTHON_3_11_PLUS
|
||||
|
||||
#include <internal/pycore_frame.h>
|
||||
|
||||
--
|
||||
2.43.0
|
||||
|
||||
|
|
@ -1,25 +0,0 @@
|
|||
From fc0d4ce06fecbd2bcd10fb13c515dc6625260870 Mon Sep 17 00:00:00 2001
|
||||
From: Tom Rix <trix@redhat.com>
|
||||
Date: Fri, 29 Sep 2023 17:21:13 -0700
|
||||
Subject: [PATCH 5/6] disable submodule search
|
||||
|
||||
---
|
||||
setup.py | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/setup.py b/setup.py
|
||||
index 17bf16b89a..b8c8ae5506 100644
|
||||
--- a/setup.py
|
||||
+++ b/setup.py
|
||||
@@ -452,7 +452,7 @@ def mirror_files_into_torchgen():
|
||||
def build_deps():
|
||||
report("-- Building version " + version)
|
||||
|
||||
- check_submodules()
|
||||
+ # check_submodules()
|
||||
check_pydep("yaml", "pyyaml")
|
||||
|
||||
build_caffe2(
|
||||
--
|
||||
2.41.0
|
||||
|
||||
15
README.NVIDIA
Normal file
15
README.NVIDIA
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
Some help for building this package for NVIDIA/CUDA
|
||||
|
||||
Review NVIDIA's documenation
|
||||
https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html
|
||||
|
||||
Review PyTorch documentation
|
||||
https://github.com/pytorch/pytorch#from-source
|
||||
|
||||
Some convience strings to cut-n-paste
|
||||
|
||||
F39
|
||||
dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/fedora39/x86_64/cuda-fedora39.repo
|
||||
|
||||
Building is local.
|
||||
Build machine has a supported GPU, the drivers are loaded and CUDA SDK is installed.
|
||||
350
license.txt
Normal file
350
license.txt
Normal file
|
|
@ -0,0 +1,350 @@
|
|||
#
|
||||
# License Details
|
||||
# Main license BSD 3-Clause
|
||||
#
|
||||
# Apache-2.0
|
||||
# android/libs/fbjni/LICENSE
|
||||
# android/libs/fbjni/CMakeLists.txt
|
||||
# android/libs/fbjni/build.gradle
|
||||
# android/libs/fbjni/cxx/fbjni/ByteBuffer.cpp
|
||||
# android/libs/fbjni/cxx/fbjni/ByteBuffer.h
|
||||
# android/libs/fbjni/cxx/fbjni/Context.h
|
||||
# android/libs/fbjni/cxx/fbjni/File.h
|
||||
# android/libs/fbjni/cxx/fbjni/JThread.h
|
||||
# android/libs/fbjni/cxx/fbjni/NativeRunnable.h
|
||||
# android/libs/fbjni/cxx/fbjni/OnLoad.cpp
|
||||
# android/libs/fbjni/cxx/fbjni/ReadableByteChannel.cpp
|
||||
# android/libs/fbjni/cxx/fbjni/ReadableByteChannel.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/Boxed.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/Common.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/CoreClasses-inl.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/CoreClasses.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/Environment.cpp
|
||||
# android/libs/fbjni/cxx/fbjni/detail/Environment.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/Exceptions.cpp
|
||||
# android/libs/fbjni/cxx/fbjni/detail/Exceptions.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/FbjniApi.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/Hybrid.cpp
|
||||
# android/libs/fbjni/cxx/fbjni/detail/Hybrid.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/Iterator-inl.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/Iterator.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/JWeakReference.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/Log.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/Meta-forward.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/Meta-inl.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/Meta.cpp
|
||||
# android/libs/fbjni/cxx/fbjni/detail/Meta.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/MetaConvert.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/ReferenceAllocators-inl.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/ReferenceAllocators.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/References-forward.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/References-inl.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/References.cpp
|
||||
# android/libs/fbjni/cxx/fbjni/detail/References.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/Registration-inl.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/Registration.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/SimpleFixedString.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/TypeTraits.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/utf8.cpp
|
||||
# android/libs/fbjni/cxx/fbjni/detail/utf8.h
|
||||
# android/libs/fbjni/cxx/fbjni/fbjni.cpp
|
||||
# android/libs/fbjni/cxx/fbjni/fbjni.h
|
||||
# android/libs/fbjni/cxx/lyra/cxa_throw.cpp
|
||||
# android/libs/fbjni/cxx/lyra/lyra.cpp
|
||||
# android/libs/fbjni/cxx/lyra/lyra.h
|
||||
# android/libs/fbjni/cxx/lyra/lyra_breakpad.cpp
|
||||
# android/libs/fbjni/cxx/lyra/lyra_exceptions.cpp
|
||||
# android/libs/fbjni/cxx/lyra/lyra_exceptions.h
|
||||
# android/libs/fbjni/gradle.properties
|
||||
# android/libs/fbjni/gradle/android-tasks.gradle
|
||||
# android/libs/fbjni/gradle/release.gradle
|
||||
# android/libs/fbjni/gradlew
|
||||
# android/libs/fbjni/gradlew.bat
|
||||
# android/libs/fbjni/host.gradle
|
||||
# android/libs/fbjni/java/com/facebook/jni/CppException.java
|
||||
# android/libs/fbjni/java/com/facebook/jni/CppSystemErrorException.java
|
||||
# android/libs/fbjni/java/com/facebook/jni/DestructorThread.java
|
||||
# android/libs/fbjni/java/com/facebook/jni/HybridClassBase.java
|
||||
# android/libs/fbjni/java/com/facebook/jni/HybridData.java
|
||||
# android/libs/fbjni/java/com/facebook/jni/IteratorHelper.java
|
||||
# android/libs/fbjni/java/com/facebook/jni/MapIteratorHelper.java
|
||||
# android/libs/fbjni/java/com/facebook/jni/NativeRunnable.java
|
||||
# android/libs/fbjni/java/com/facebook/jni/ThreadScopeSupport.java
|
||||
# android/libs/fbjni/java/com/facebook/jni/UnknownCppException.java
|
||||
# android/libs/fbjni/java/com/facebook/jni/annotations/DoNotStrip.java
|
||||
# android/libs/fbjni/scripts/android-setup.sh
|
||||
# android/libs/fbjni/scripts/run-host-tests.sh
|
||||
# android/libs/fbjni/settings.gradle
|
||||
# android/libs/fbjni/test/BaseFBJniTests.java
|
||||
# android/libs/fbjni/test/ByteBufferTests.java
|
||||
# android/libs/fbjni/test/DocTests.java
|
||||
# android/libs/fbjni/test/FBJniTests.java
|
||||
# android/libs/fbjni/test/HybridTests.java
|
||||
# android/libs/fbjni/test/IteratorTests.java
|
||||
# android/libs/fbjni/test/PrimitiveArrayTests.java
|
||||
# android/libs/fbjni/test/ReadableByteChannelTests.java
|
||||
# android/libs/fbjni/test/jni/CMakeLists.txt
|
||||
# android/libs/fbjni/test/jni/byte_buffer_tests.cpp
|
||||
# android/libs/fbjni/test/jni/doc_tests.cpp
|
||||
# android/libs/fbjni/test/jni/expect.h
|
||||
# android/libs/fbjni/test/jni/fbjni_onload.cpp
|
||||
# android/libs/fbjni/test/jni/fbjni_tests.cpp
|
||||
# android/libs/fbjni/test/jni/hybrid_tests.cpp
|
||||
# android/libs/fbjni/test/jni/inter_dso_exception_test_1/Test.cpp
|
||||
# android/libs/fbjni/test/jni/inter_dso_exception_test_1/Test.h
|
||||
# android/libs/fbjni/test/jni/inter_dso_exception_test_2/Test.cpp
|
||||
# android/libs/fbjni/test/jni/inter_dso_exception_test_2/Test.h
|
||||
# android/libs/fbjni/test/jni/iterator_tests.cpp
|
||||
# android/libs/fbjni/test/jni/modified_utf8_test.cpp
|
||||
# android/libs/fbjni/test/jni/no_rtti.cpp
|
||||
# android/libs/fbjni/test/jni/no_rtti.h
|
||||
# android/libs/fbjni/test/jni/primitive_array_tests.cpp
|
||||
# android/libs/fbjni/test/jni/readable_byte_channel_tests.cpp
|
||||
# android/libs/fbjni/test/jni/simple_fixed_string_tests.cpp
|
||||
# android/libs/fbjni/test/jni/utf16toUTF8_test.cpp
|
||||
# android/pytorch_android/host/build.gradle
|
||||
# aten/src/ATen/cuda/llvm_basic.cpp
|
||||
# aten/src/ATen/cuda/llvm_complex.cpp
|
||||
# aten/src/ATen/native/quantized/cpu/qnnpack/confu.yaml
|
||||
# aten/src/ATen/native/quantized/cpu/qnnpack/src/requantization/gemmlowp-neon.c
|
||||
# aten/src/ATen/native/quantized/cpu/qnnpack/src/requantization/gemmlowp-scalar.h
|
||||
# aten/src/ATen/native/quantized/cpu/qnnpack/src/requantization/gemmlowp-sse.h
|
||||
# aten/src/ATen/nnapi/codegen.py
|
||||
# aten/src/ATen/nnapi/NeuralNetworks.h
|
||||
# aten/src/ATen/nnapi/nnapi_wrapper.cpp
|
||||
# aten/src/ATen/nnapi/nnapi_wrapper.h
|
||||
# binaries/benchmark_args.h
|
||||
# binaries/benchmark_helper.cc
|
||||
# binaries/benchmark_helper.h
|
||||
# binaries/compare_models_torch.cc
|
||||
# binaries/convert_and_benchmark.cc
|
||||
# binaries/convert_caffe_image_db.cc
|
||||
# binaries/convert_db.cc
|
||||
# binaries/convert_encoded_to_raw_leveldb.cc
|
||||
# binaries/convert_image_to_tensor.cc
|
||||
# binaries/core_overhead_benchmark.cc
|
||||
# binaries/core_overhead_benchmark_gpu.cc
|
||||
# binaries/db_throughput.cc
|
||||
# binaries/dump_operator_names.cc
|
||||
# binaries/inspect_gpu.cc
|
||||
# binaries/load_benchmark_torch.cc
|
||||
# binaries/make_cifar_db.cc
|
||||
# binaries/make_image_db.cc
|
||||
# binaries/make_mnist_db.cc
|
||||
# binaries/optimize_for_mobile.cc
|
||||
# binaries/parallel_info.cc
|
||||
# binaries/predictor_verifier.cc
|
||||
# binaries/print_core_object_sizes_gpu.cc
|
||||
# binaries/print_registered_core_operators.cc
|
||||
# binaries/run_plan.cc
|
||||
# binaries/run_plan_mpi.cc
|
||||
# binaries/speed_benchmark.cc
|
||||
# binaries/speed_benchmark_torch.cc
|
||||
# binaries/split_db.cc
|
||||
# binaries/tsv_2_proto.cc
|
||||
# binaries/tutorial_blob.cc
|
||||
# binaries/zmq_feeder.cc
|
||||
# c10/test/util/small_vector_test.cpp
|
||||
# c10/util/FunctionRef.h
|
||||
# c10/util/SmallVector.cpp
|
||||
# c10/util/SmallVector.h
|
||||
# c10/util/llvmMathExtras.h
|
||||
# c10/util/sparse_bitset.h
|
||||
# caffe2/contrib/aten/gen_op.py
|
||||
# caffe2/contrib/fakelowp/fp16_fc_acc_op.cc
|
||||
# caffe2/contrib/fakelowp/fp16_fc_acc_op.h
|
||||
# caffe2/contrib/gloo/allgather_ops.cc
|
||||
# caffe2/contrib/gloo/allgather_ops.h
|
||||
# caffe2/contrib/gloo/reduce_scatter_ops.cc
|
||||
# caffe2/contrib/gloo/reduce_scatter_ops.h
|
||||
# caffe2/core/hip/common_miopen.h
|
||||
# caffe2/core/hip/common_miopen.hip
|
||||
# caffe2/core/net_async_tracing.cc
|
||||
# caffe2/core/net_async_tracing.h
|
||||
# caffe2/core/net_async_tracing_test.cc
|
||||
# caffe2/experiments/operators/fully_connected_op_decomposition.cc
|
||||
# caffe2/experiments/operators/fully_connected_op_decomposition.h
|
||||
# caffe2/experiments/operators/fully_connected_op_decomposition_gpu.cc
|
||||
# caffe2/experiments/operators/fully_connected_op_prune.cc
|
||||
# caffe2/experiments/operators/fully_connected_op_prune.h
|
||||
# caffe2/experiments/operators/fully_connected_op_sparse.cc
|
||||
# caffe2/experiments/operators/fully_connected_op_sparse.h
|
||||
# caffe2/experiments/operators/funhash_op.cc
|
||||
# caffe2/experiments/operators/funhash_op.h
|
||||
# caffe2/experiments/operators/sparse_funhash_op.cc
|
||||
# caffe2/experiments/operators/sparse_funhash_op.h
|
||||
# caffe2/experiments/operators/sparse_matrix_reshape_op.cc
|
||||
# caffe2/experiments/operators/sparse_matrix_reshape_op.h
|
||||
# caffe2/experiments/operators/tt_contraction_op.cc
|
||||
# caffe2/experiments/operators/tt_contraction_op.h
|
||||
# caffe2/experiments/operators/tt_contraction_op_gpu.cc
|
||||
# caffe2/experiments/operators/tt_pad_op.cc
|
||||
# caffe2/experiments/operators/tt_pad_op.h
|
||||
# caffe2/experiments/python/SparseTransformer.py
|
||||
# caffe2/experiments/python/convnet_benchmarks.py
|
||||
# caffe2/experiments/python/device_reduce_sum_bench.py
|
||||
# caffe2/experiments/python/funhash_op_test.py
|
||||
# caffe2/experiments/python/net_construct_bench.py
|
||||
# caffe2/experiments/python/sparse_funhash_op_test.py
|
||||
# caffe2/experiments/python/sparse_reshape_op_test.py
|
||||
# caffe2/experiments/python/tt_contraction_op_test.py
|
||||
# caffe2/experiments/python/tt_pad_op_test.py
|
||||
# caffe2/mobile/contrib/libvulkan-stub/include/vulkan/vk_platform.h
|
||||
# caffe2/mobile/contrib/libvulkan-stub/include/vulkan/vulkan.h
|
||||
# caffe2/mobile/contrib/nnapi/NeuralNetworks.h
|
||||
# caffe2/mobile/contrib/nnapi/dlnnapi.c
|
||||
# caffe2/mobile/contrib/nnapi/nnapi_benchmark.cc
|
||||
# caffe2/observers/profile_observer.cc
|
||||
# caffe2/observers/profile_observer.h
|
||||
# caffe2/operators/hip/conv_op_miopen.hip
|
||||
# caffe2/operators/hip/local_response_normalization_op_miopen.hip
|
||||
# caffe2/operators/hip/pool_op_miopen.hip
|
||||
# caffe2/operators/hip/spatial_batch_norm_op_miopen.hip
|
||||
# caffe2/operators/quantized/int8_utils.h
|
||||
# caffe2/operators/stump_func_op.cc
|
||||
# caffe2/operators/stump_func_op.cu
|
||||
# caffe2/operators/stump_func_op.h
|
||||
# caffe2/operators/unique_ops.cc
|
||||
# caffe2/operators/unique_ops.cu
|
||||
# caffe2/operators/unique_ops.h
|
||||
# caffe2/operators/upsample_op.cc
|
||||
# caffe2/operators/upsample_op.h
|
||||
# caffe2/opt/fusion.h
|
||||
# caffe2/python/layers/label_smooth.py
|
||||
# caffe2/python/mint/static/css/simple-sidebar.css
|
||||
# caffe2/python/modeling/get_entry_from_blobs.py
|
||||
# caffe2/python/modeling/get_entry_from_blobs_test.py
|
||||
# caffe2/python/modeling/gradient_clipping_test.py
|
||||
# caffe2/python/operator_test/unique_ops_test.py
|
||||
# caffe2/python/operator_test/upsample_op_test.py
|
||||
# caffe2/python/operator_test/weight_scale_test.py
|
||||
# caffe2/python/pybind_state_int8.cc
|
||||
# caffe2/python/transformations.py
|
||||
# caffe2/python/transformations_test.py
|
||||
# caffe2/quantization/server/batch_matmul_dnnlowp_op.cc
|
||||
# caffe2/quantization/server/batch_matmul_dnnlowp_op.h
|
||||
# caffe2/quantization/server/compute_equalization_scale_test.py
|
||||
# caffe2/quantization/server/elementwise_linear_dnnlowp_op.cc
|
||||
# caffe2/quantization/server/elementwise_linear_dnnlowp_op.h
|
||||
# caffe2/quantization/server/elementwise_sum_relu_op.cc
|
||||
# caffe2/quantization/server/fb_fc_packed_op.cc
|
||||
# caffe2/quantization/server/fb_fc_packed_op.h
|
||||
# caffe2/quantization/server/fbgemm_fp16_pack_op.cc
|
||||
# caffe2/quantization/server/fbgemm_fp16_pack_op.h
|
||||
# caffe2/quantization/server/fully_connected_fake_lowp_op.cc
|
||||
# caffe2/quantization/server/fully_connected_fake_lowp_op.h
|
||||
# caffe2/quantization/server/int8_gen_quant_params_min_max_test.py
|
||||
# caffe2/quantization/server/int8_gen_quant_params_test.py
|
||||
# caffe2/quantization/server/int8_quant_scheme_blob_fill_test.py
|
||||
# caffe2/quantization/server/spatial_batch_norm_relu_op.cc
|
||||
# caffe2/sgd/weight_scale_op.cc
|
||||
# caffe2/sgd/weight_scale_op.h
|
||||
# caffe2/utils/bench_utils.h
|
||||
# functorch/examples/maml_omniglot/maml-omniglot-higher.py
|
||||
# functorch/examples/maml_omniglot/maml-omniglot-ptonly.py
|
||||
# functorch/examples/maml_omniglot/maml-omniglot-transforms.py
|
||||
# functorch/examples/maml_omniglot/support/omniglot_loaders.py
|
||||
# modules/detectron/group_spatial_softmax_op.cc
|
||||
# modules/detectron/group_spatial_softmax_op.cu
|
||||
# modules/detectron/group_spatial_softmax_op.h
|
||||
# modules/detectron/ps_roi_pool_op.cc
|
||||
# modules/detectron/ps_roi_pool_op.h
|
||||
# modules/detectron/roi_pool_f_op.cc
|
||||
# modules/detectron/roi_pool_f_op.cu
|
||||
# modules/detectron/roi_pool_f_op.h
|
||||
# modules/detectron/sample_as_op.cc
|
||||
# modules/detectron/sample_as_op.cu
|
||||
# modules/detectron/sample_as_op.h
|
||||
# modules/detectron/select_smooth_l1_loss_op.cc
|
||||
# modules/detectron/select_smooth_l1_loss_op.cu
|
||||
# modules/detectron/select_smooth_l1_loss_op.h
|
||||
# modules/detectron/sigmoid_cross_entropy_loss_op.cc
|
||||
# modules/detectron/sigmoid_cross_entropy_loss_op.cu
|
||||
# modules/detectron/sigmoid_cross_entropy_loss_op.h
|
||||
# modules/detectron/sigmoid_focal_loss_op.cc
|
||||
# modules/detectron/sigmoid_focal_loss_op.cu
|
||||
# modules/detectron/sigmoid_focal_loss_op.h
|
||||
# modules/detectron/smooth_l1_loss_op.cc
|
||||
# modules/detectron/smooth_l1_loss_op.cu
|
||||
# modules/detectron/smooth_l1_loss_op.h
|
||||
# modules/detectron/softmax_focal_loss_op.cc
|
||||
# modules/detectron/softmax_focal_loss_op.cu
|
||||
# modules/detectron/softmax_focal_loss_op.h
|
||||
# modules/detectron/spatial_narrow_as_op.cc
|
||||
# modules/detectron/spatial_narrow_as_op.cu
|
||||
# modules/detectron/spatial_narrow_as_op.h
|
||||
# modules/detectron/upsample_nearest_op.cc
|
||||
# modules/detectron/upsample_nearest_op.h
|
||||
# modules/module_test/module_test_dynamic.cc
|
||||
# modules/rocksdb/rocksdb.cc
|
||||
# scripts/apache_header.txt
|
||||
# scripts/apache_python.txt
|
||||
# torch/distributions/lkj_cholesky.py
|
||||
#
|
||||
# Apache 2.0 AND BSD 2-Clause
|
||||
# caffe2/operators/deform_conv_op.cu
|
||||
#
|
||||
# Apache 2.0 AND BSD 2-Clause AND MIT
|
||||
# modules/detectron/ps_roi_pool_op.cu
|
||||
#
|
||||
# Apache 2.0 AND BSD 2-Clause
|
||||
# modules/detectron/upsample_nearest_op.cu
|
||||
#
|
||||
# BSD 0-Clause
|
||||
# torch/csrc/utils/pythoncapi_compat.h
|
||||
#
|
||||
# BSD 2-Clause
|
||||
# aten/src/ATen/native/quantized/cpu/qnnpack/deps/clog/LICENSE
|
||||
# caffe2/image/transform_gpu.cu
|
||||
# caffe2/image/transform_gpu.h
|
||||
#
|
||||
# BSL-1.0
|
||||
# c10/util/flat_hash_map.h
|
||||
# c10/util/hash.h
|
||||
# c10/util/Optional.h
|
||||
# c10/util/order_preserving_flat_hash_map.h
|
||||
# c10/util/strong_type.h
|
||||
# c10/util/variant.h
|
||||
#
|
||||
# GPL-3.0-or-later AND MIT
|
||||
# c10/util/reverse_iterator.h
|
||||
#
|
||||
# Khronos
|
||||
# These files are for OpenCL, an unused option
|
||||
# Replace them later, as-needed with the opencl-headers.rpm
|
||||
#
|
||||
# caffe2/contrib/opencl/OpenCL/cl.hpp
|
||||
# caffe2/mobile/contrib/libopencl-stub/include/CL/cl.h
|
||||
# caffe2/mobile/contrib/libopencl-stub/include/CL/cl.hpp
|
||||
# caffe2/mobile/contrib/libopencl-stub/include/CL/cl_ext.h
|
||||
# caffe2/mobile/contrib/libopencl-stub/include/CL/cl_gl.h
|
||||
# caffe2/mobile/contrib/libopencl-stub/include/CL/cl_gl_ext.h
|
||||
# caffe2/mobile/contrib/libopencl-stub/include/CL/cl_platform.h
|
||||
# caffe2/mobile/contrib/libopencl-stub/include/CL/opencl.h
|
||||
#
|
||||
# MIT
|
||||
# android/libs/fbjni/googletest-CMakeLists.txt.in
|
||||
# c10/util/BFloat16-math.h
|
||||
# caffe2/mobile/contrib/libvulkan-stub/include/libvulkan-stub.h
|
||||
# caffe2/mobile/contrib/libvulkan-stub/src/libvulkan-stub.c
|
||||
# caffe2/onnx/torch_ops/defs.cc
|
||||
# cmake/Modules_CUDA_fix/upstream/FindCUDA/make2cmake.cmake
|
||||
# cmake/Modules_CUDA_fix/upstream/FindCUDA/parse_cubin.cmake
|
||||
# cmake/Modules_CUDA_fix/upstream/FindCUDA/run_nvcc.cmake
|
||||
# functorch/einops/_parsing.py
|
||||
# test/functorch/test_parsing.py
|
||||
# test/functorch/test_rearrange.py
|
||||
# third_party/miniz-2.1.0/LICENSE
|
||||
# third_party/miniz-2.1.0/miniz.c
|
||||
# tools/coverage_plugins_package/setup.py
|
||||
# torch/_appdirs.py
|
||||
# torch/utils/hipify/hipify_python.py
|
||||
#
|
||||
# Public Domain
|
||||
# caffe2/mobile/contrib/libopencl-stub/LICENSE
|
||||
# caffe2/utils/murmur_hash3.cc
|
||||
# caffe2/utils/murmur_hash3.h
|
||||
#
|
||||
# Zlib
|
||||
# aten/src/ATen/native/cpu/avx_mathfun.h
|
||||
|
||||
|
|
@ -1,25 +0,0 @@
|
|||
From e0b0ea90ecc0dbefc6aef2650e88ba88260935b9 Mon Sep 17 00:00:00 2001
|
||||
From: Tom Rix <trix@redhat.com>
|
||||
Date: Fri, 29 Sep 2023 17:21:13 -0700
|
||||
Subject: [PATCH] disable submodule search
|
||||
|
||||
---
|
||||
setup.py | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/setup.py b/setup.py
|
||||
index 0fd886d945..e397df8fb6 100644
|
||||
--- a/setup.py
|
||||
+++ b/setup.py
|
||||
@@ -458,7 +458,7 @@ def mirror_files_into_torchgen():
|
||||
def build_deps():
|
||||
report("-- Building version " + version)
|
||||
|
||||
- check_submodules()
|
||||
+ # check_submodules()
|
||||
check_pydep("yaml", "pyyaml")
|
||||
|
||||
build_caffe2(
|
||||
--
|
||||
2.43.0
|
||||
|
||||
154
pyproject.toml
Normal file
154
pyproject.toml
Normal file
|
|
@ -0,0 +1,154 @@
|
|||
[build-system]
|
||||
requires = [
|
||||
"setuptools",
|
||||
"wheel",
|
||||
"astunparse",
|
||||
"numpy",
|
||||
"ninja",
|
||||
"pyyaml",
|
||||
"cmake",
|
||||
"typing-extensions",
|
||||
"requests",
|
||||
]
|
||||
# Use legacy backend to import local packages in setup.py
|
||||
build-backend = "setuptools.build_meta:__legacy__"
|
||||
|
||||
|
||||
[tool.black]
|
||||
# Uncomment if pyproject.toml worked fine to ensure consistency with flake8
|
||||
# line-length = 120
|
||||
target-version = ["py38", "py39", "py310", "py311"]
|
||||
|
||||
|
||||
[tool.ruff]
|
||||
target-version = "py38"
|
||||
|
||||
# NOTE: Synchoronize the ignores with .flake8
|
||||
ignore = [
|
||||
# these ignores are from flake8-bugbear; please fix!
|
||||
"B007", "B008", "B017",
|
||||
"B018", # Useless expression
|
||||
"B019",
|
||||
"B023",
|
||||
"B028", # No explicit `stacklevel` keyword argument found
|
||||
"B904",
|
||||
"E402",
|
||||
"C408", # C408 ignored because we like the dict keyword argument syntax
|
||||
"E501", # E501 is not flexible enough, we're using B950 instead
|
||||
"E721",
|
||||
"E731", # Assign lambda expression
|
||||
"E741",
|
||||
"EXE001",
|
||||
"F405",
|
||||
"F841",
|
||||
# these ignores are from flake8-logging-format; please fix!
|
||||
"G101",
|
||||
# these ignores are from ruff NPY; please fix!
|
||||
"NPY002",
|
||||
# these ignores are from ruff PERF; please fix!
|
||||
"PERF203",
|
||||
"PERF401",
|
||||
"PERF403",
|
||||
# these ignores are from PYI; please fix!
|
||||
"PYI019",
|
||||
"PYI024",
|
||||
"PYI036",
|
||||
"PYI041",
|
||||
"PYI056",
|
||||
"SIM102", "SIM103", "SIM112", # flake8-simplify code styles
|
||||
"SIM105", # these ignores are from flake8-simplify. please fix or ignore with commented reason
|
||||
"SIM108",
|
||||
"SIM110",
|
||||
"SIM114", # Combine `if` branches using logical `or` operator
|
||||
"SIM115",
|
||||
"SIM116", # Disable Use a dictionary instead of consecutive `if` statements
|
||||
"SIM117",
|
||||
"SIM118",
|
||||
"UP006", # keep-runtime-typing
|
||||
"UP007", # keep-runtime-typing
|
||||
]
|
||||
line-length = 120
|
||||
select = [
|
||||
"B",
|
||||
"C4",
|
||||
"G",
|
||||
"E",
|
||||
"EXE",
|
||||
"F",
|
||||
"SIM1",
|
||||
"W",
|
||||
# Not included in flake8
|
||||
"NPY",
|
||||
"PERF",
|
||||
"PGH004",
|
||||
"PIE794",
|
||||
"PIE800",
|
||||
"PIE804",
|
||||
"PIE807",
|
||||
"PIE810",
|
||||
"PLC0131", # type bivariance
|
||||
"PLC0132", # type param mismatch
|
||||
"PLC0205", # string as __slots__
|
||||
"PLE",
|
||||
"PLR0133", # constant comparison
|
||||
"PLR0206", # property with params
|
||||
"PLR1722", # use sys exit
|
||||
"PLW0129", # assert on string literal
|
||||
"PLW0406", # import self
|
||||
"PLW0711", # binary op exception
|
||||
"PLW1509", # preexec_fn not safe with threads
|
||||
"PLW3301", # nested min max
|
||||
"PT006", # TODO: enable more PT rules
|
||||
"PT022",
|
||||
"PT023",
|
||||
"PT024",
|
||||
"PT025",
|
||||
"PT026",
|
||||
"PYI",
|
||||
"RUF008", # mutable dataclass default
|
||||
"RUF015", # access first ele in constant time
|
||||
"RUF016", # type error non-integer index
|
||||
"RUF017",
|
||||
"TRY200",
|
||||
"TRY302",
|
||||
"UP",
|
||||
]
|
||||
|
||||
[tool.ruff.per-file-ignores]
|
||||
"__init__.py" = [
|
||||
"F401",
|
||||
]
|
||||
"test/typing/reveal/**" = [
|
||||
"F821",
|
||||
]
|
||||
"test/torch_np/numpy_tests/**" = [
|
||||
"F821",
|
||||
]
|
||||
"test/jit/**" = [
|
||||
"PLR0133", # tests require this for JIT
|
||||
"PYI",
|
||||
"RUF015",
|
||||
"UP", # We don't want to modify the jit test as they test specify syntax
|
||||
]
|
||||
"test/test_jit.py" = [
|
||||
"PLR0133", # tests require this for JIT
|
||||
"PYI",
|
||||
"RUF015",
|
||||
"UP", # We don't want to modify the jit test as they test specify syntax
|
||||
]
|
||||
|
||||
"torch/onnx/**" = [
|
||||
"UP037", # ONNX does runtime type checking
|
||||
]
|
||||
|
||||
"torchgen/api/types/__init__.py" = [
|
||||
"F401",
|
||||
"F403",
|
||||
]
|
||||
"torchgen/executorch/api/types/__init__.py" = [
|
||||
"F401",
|
||||
"F403",
|
||||
]
|
||||
"torch/utils/collect_env.py" = [
|
||||
"UP", # collect_env.py needs to work with older versions of Python
|
||||
]
|
||||
1360
python-torch.spec
1360
python-torch.spec
File diff suppressed because it is too large
Load diff
12
sources
12
sources
|
|
@ -1,2 +1,14 @@
|
|||
SHA512 (pytorch-v2.1.0.tar.gz) = 59421bf6cea6661d61ed66ab16526e3a07162e70e53381cbd5987042917610ec993d2f151fb086f0f98e5a396fe69e82bbc76f840bebffe4ebe7f50458c3aa44
|
||||
SHA512 (pytorch-v2.1.2.tar.gz) = b7305407ad9dda877d277a0e7009f65f6d69f39370f2231b8bb8c6a9b711022d2129febdb00f5c83751b6664e01000fe2d30c5e5c13757de89fb8b2b99197a28
|
||||
SHA512 (pytorch-975d428.tar.gz) = a02195b18d832db9a739c3eeecd0cd0c8868d8b92e4a2fca42e4bdd20735f0745d84573df28d9ae1db014cf79ffd005a8409b3e8bb92f9db2a446f784ef46ff4
|
||||
SHA512 (v23.3.3.tar.gz) = 4066c94f2473c7ea16917d29a613e16f840a329089c88e0bdbdb999aef3442ba00abfd2aa92266fa9c067e399dc88e6f0ccac40dc151378857e665638e78bbf0
|
||||
SHA512 (v2.11.1.tar.gz) = ed1512ff0bca3bc0a45edc2eb8c77f8286ab9389f6ff1d5cb309be24bc608abbe0df6a7f5cb18c8f80a3bfa509058547c13551c3cd6a759af708fd0cdcdd9e95
|
||||
SHA512 (pytorch-6a89a75.tar.gz) = 6978acc6f37d7c5adc71517a6f379c7133b2bbd040189deddba7753acde41f6ddba2e9f2e397928e89c776d6a5458b8a74f8e04beb312d71fd30b072687ba98f
|
||||
SHA512 (pytorch-74832f1.tar.gz) = bd553bfbbb422d353bbbf616c201251b2517b905e2621fa05bfe3d97726b078caad377583adccdc0cca234235a11fcb4730a93e834907b2ca4c06d552b2a2683
|
||||
SHA512 (pytorch-4bb5cb5.tar.gz) = 430ae996ddee560537787646ae9f7aa01498f37c99c2e3fe4c5f66ee732ee3fe4ecf337fdf857bc0c7fe27634af75cee3ce576bbe2576463b81e27dbbfacf6ef
|
||||
SHA512 (tensorpipe-52791a2.tar.gz) = 1e5faf17a7236c5506c08cb28be16069b11bb929bbca64ed9745ce4277d46739186ab7d6597da7437d90ed2d166d4c37ef2f3bceabe8083ef3adbb0e8e5f227e
|
||||
SHA512 (v1.41.0.tar.gz) = bb08a1970a10e8d9571ffea3d021643de30ec212cd51317b98d6cf0cfe55d6877992921fb01d1188a6d466687335b77885685d924f8cb7200a0bec30eee05c65
|
||||
SHA512 (libnop-910b558.tar.gz) = 74c5324eaa1b6b2ac8dfef94c835b5c5b044625f8e5efe3522470b1ecc4798ff43d344a013cee2f6901e83267c6167072947b754e63f1552ae7044cffe234c36
|
||||
SHA512 (pytorch-97ff6cf.tar.gz) = 105ebcba298558fe833f90e7e40b003d35a74609e777f9dc4c47f5668c884f603455113ac0ff252a62b83c81137ae66ceb1a862d351203925dcfc3dcf9f73580
|
||||
SHA512 (pytorch-v2.3.0.tar.gz) = 0c2ffc7bf2fd86070e9958c34eca1f03a0248a011ac6ffaeb69f65306ff856edd5359986f02af25888433187e6d7f29b60edded092e2ac30c8cec49023166eda
|
||||
SHA512 (pytorch-v2.3.1.tar.gz) = fe132251b2bae87b70ba3d95dc32f6a4545970d11893118b0ebe6ca129732e516ef4d6cc4f380b3db9bb2277d1db8ce78a401c40149bb1dfbab76eab9e3992c4
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue