diff --git a/.gitignore b/.gitignore index 3f2501f..315fe1c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,14 @@ /pytorch-v2.1.0.tar.gz /pytorch-v2.1.2.tar.gz +/pytorch-975d428.tar.gz +/v23.3.3.tar.gz +/v2.11.1.tar.gz +/pytorch-6a89a75.tar.gz +/pytorch-74832f1.tar.gz +/pytorch-4bb5cb5.tar.gz +/tensorpipe-52791a2.tar.gz +/v1.41.0.tar.gz +/libnop-910b558.tar.gz +/pytorch-97ff6cf.tar.gz +/pytorch-v2.3.0.tar.gz +/pytorch-v2.3.1.tar.gz diff --git a/0001-Optionally-use-hipblaslt.patch b/0001-Optionally-use-hipblaslt.patch new file mode 100644 index 0000000..56434a7 --- /dev/null +++ b/0001-Optionally-use-hipblaslt.patch @@ -0,0 +1,262 @@ +From d77e05d90df006322cda021f1a8affdcc2c7eaef Mon Sep 17 00:00:00 2001 +From: Tom Rix +Date: Fri, 23 Feb 2024 08:27:30 -0500 +Subject: [PATCH] Optionally use hipblaslt + +The hipblaslt package is not available on Fedora. +Instead of requiring the package, make it optional. +If it is found, define the preprocessor variable HIPBLASLT +Convert the checks for ROCM_VERSION >= 507000 to HIPBLASLT checks + +Signed-off-by: Tom Rix +--- + aten/src/ATen/cuda/CUDABlas.cpp | 7 ++++--- + aten/src/ATen/cuda/CUDABlas.h | 2 +- + aten/src/ATen/cuda/CUDAContextLight.h | 4 ++-- + aten/src/ATen/cuda/CublasHandlePool.cpp | 4 ++-- + aten/src/ATen/cuda/tunable/TunableGemm.h | 6 +++--- + aten/src/ATen/native/cuda/Blas.cpp | 14 ++++++++------ + cmake/Dependencies.cmake | 3 +++ + cmake/public/LoadHIP.cmake | 4 ++-- + 8 files changed, 25 insertions(+), 19 deletions(-) + +diff --git a/aten/src/ATen/cuda/CUDABlas.cpp b/aten/src/ATen/cuda/CUDABlas.cpp +index d534ec5a178..e815463f630 100644 +--- a/aten/src/ATen/cuda/CUDABlas.cpp ++++ b/aten/src/ATen/cuda/CUDABlas.cpp +@@ -14,7 +14,7 @@ + #include + + #ifdef USE_ROCM +-#if ROCM_VERSION >= 60000 ++#ifdef HIPBLASLT + #include + #endif + // until hipblas has an API to accept flags, we must use rocblas here +@@ -781,7 +781,7 @@ void gemm(CUDABLAS_GEMM_ARGTYPES(at::BFloat16)) { + } + } + +-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700) ++#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT)) + + #if defined(USE_ROCM) && ROCM_VERSION >= 50700 && ROCM_VERSION < 60000 + // only for rocm 5.7 where we first supported hipblaslt, it was difficult +@@ -912,6 +912,7 @@ class CuBlasLtMatmulPreference : public CuBlasLtDescriptor< + }; + } // namespace + ++#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT)) + template + void gemm_and_bias( + bool transpose_mat1, +@@ -1124,7 +1125,7 @@ template void gemm_and_bias( + at::BFloat16* result_ptr, + int64_t result_ld, + GEMMAndBiasActivationEpilogue activation); +- ++#endif + void scaled_gemm( + char transa, + char transb, +diff --git a/aten/src/ATen/cuda/CUDABlas.h b/aten/src/ATen/cuda/CUDABlas.h +index eb12bb350c5..068607467dd 100644 +--- a/aten/src/ATen/cuda/CUDABlas.h ++++ b/aten/src/ATen/cuda/CUDABlas.h +@@ -82,7 +82,7 @@ void gemm_internal(CUDABLAS_GEMM_ARGTYPES(at::Half)); + template <> + void gemm_internal(CUDABLAS_GEMM_ARGTYPES(at::BFloat16)); + +-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700) ++#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT)) + enum GEMMAndBiasActivationEpilogue { + None, + RELU, +diff --git a/aten/src/ATen/cuda/CUDAContextLight.h b/aten/src/ATen/cuda/CUDAContextLight.h +index 4ec35f59a21..e28dc42034f 100644 +--- a/aten/src/ATen/cuda/CUDAContextLight.h ++++ b/aten/src/ATen/cuda/CUDAContextLight.h +@@ -9,7 +9,7 @@ + + // cublasLT was introduced in CUDA 10.1 but we enable only for 11.1 that also + // added bf16 support +-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700) ++#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT)) + #include + #endif + +@@ -82,7 +82,7 @@ TORCH_CUDA_CPP_API c10::Allocator* getCUDADeviceAllocator(); + /* Handles */ + TORCH_CUDA_CPP_API cusparseHandle_t getCurrentCUDASparseHandle(); + TORCH_CUDA_CPP_API cublasHandle_t getCurrentCUDABlasHandle(); +-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700) ++#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT)) + TORCH_CUDA_CPP_API cublasLtHandle_t getCurrentCUDABlasLtHandle(); + #endif + +diff --git a/aten/src/ATen/cuda/CublasHandlePool.cpp b/aten/src/ATen/cuda/CublasHandlePool.cpp +index 6913d2cd95e..3d4276be372 100644 +--- a/aten/src/ATen/cuda/CublasHandlePool.cpp ++++ b/aten/src/ATen/cuda/CublasHandlePool.cpp +@@ -29,7 +29,7 @@ namespace at::cuda { + + namespace { + +-#if defined(USE_ROCM) && ROCM_VERSION >= 50700 ++#if defined(USE_ROCM) && defined(HIPBLASLT) + void createCublasLtHandle(cublasLtHandle_t *handle) { + TORCH_CUDABLAS_CHECK(cublasLtCreate(handle)); + } +@@ -190,7 +190,7 @@ cublasHandle_t getCurrentCUDABlasHandle() { + return handle; + } + +-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700) ++#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT)) + cublasLtHandle_t getCurrentCUDABlasLtHandle() { + #ifdef USE_ROCM + c10::DeviceIndex device = 0; +diff --git a/aten/src/ATen/cuda/tunable/TunableGemm.h b/aten/src/ATen/cuda/tunable/TunableGemm.h +index 3ba0d761277..dde1870cfbf 100644 +--- a/aten/src/ATen/cuda/tunable/TunableGemm.h ++++ b/aten/src/ATen/cuda/tunable/TunableGemm.h +@@ -11,7 +11,7 @@ + + #include + #ifdef USE_ROCM +-#if ROCM_VERSION >= 50700 ++#ifdef HIPBLASLT + #include + #endif + #include +@@ -166,7 +166,7 @@ class GemmTunableOp : public TunableOp, StreamTimer> { + } + #endif + +-#if defined(USE_ROCM) && ROCM_VERSION >= 50700 ++#if defined(USE_ROCM) && defined(HIPBLASLT) + static const char *env = std::getenv("PYTORCH_TUNABLEOP_HIPBLASLT_ENABLED"); + if (env == nullptr || strcmp(env, "1") == 0) { + // disallow tuning of hipblaslt with c10::complex +@@ -240,7 +240,7 @@ class GemmStridedBatchedTunableOp : public TunableOp + } + #endif + +-#if defined(USE_ROCM) && ROCM_VERSION >= 50700 ++#if defined(USE_ROCM) && defined(HIPBLASLT) + static const char *env = std::getenv("PYTORCH_TUNABLEOP_HIPBLASLT_ENABLED"); + if (env == nullptr || strcmp(env, "1") == 0) { + // disallow tuning of hipblaslt with c10::complex +diff --git a/aten/src/ATen/native/cuda/Blas.cpp b/aten/src/ATen/native/cuda/Blas.cpp +index 29e5c5e3cf1..df56f3d7f1d 100644 +--- a/aten/src/ATen/native/cuda/Blas.cpp ++++ b/aten/src/ATen/native/cuda/Blas.cpp +@@ -155,7 +155,7 @@ enum class Activation { + GELU, + }; + +-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700) ++#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT)) + cuda::blas::GEMMAndBiasActivationEpilogue activation_to_gemm_and_blas_arg(Activation a) { + switch (a) { + case Activation::None: +@@ -193,6 +193,7 @@ static bool getDisableAddmmCudaLt() { + + #ifdef USE_ROCM + static bool isSupportedHipLtROCmArch(int index) { ++#if defined(HIPBLASLT) + hipDeviceProp_t* prop = at::cuda::getDeviceProperties(index); + std::string device_arch = prop->gcnArchName; + static const std::vector archs = {"gfx90a", "gfx940", "gfx941", "gfx942"}; +@@ -203,6 +204,7 @@ static bool isSupportedHipLtROCmArch(int index) { + } + } + TORCH_CHECK(false, "Attempting to use hipBLASLt on a unsupported architecture!"); ++#endif + return false; + } + #endif +@@ -228,7 +230,7 @@ Tensor& addmm_out_cuda_impl(Tensor& result, const Tensor& self, const Tensor& ma + at::ScalarType scalar_type = self.scalar_type(); + c10::MaybeOwned self_; + if (&result != &self) { +-#if (defined(CUDA_VERSION) && CUDA_VERSION >= 11040 && !defined(_MSC_VER)) || defined(USE_ROCM) && ROCM_VERSION >= 50700 ++#if (defined(CUDA_VERSION) && CUDA_VERSION >= 11040 && !defined(_MSC_VER)) || defined(USE_ROCM) && defined(HIPBLASLT) + // Strangely, if mat2 has only 1 row or column, we get + // CUBLAS_STATUS_INVALID_VALUE error from cublasLtMatmulAlgoGetHeuristic. + // self.dim() == 1 && result.dim() == 2 && self.sizes()[0] == mat2_sizes[1] +@@ -271,7 +273,7 @@ Tensor& addmm_out_cuda_impl(Tensor& result, const Tensor& self, const Tensor& ma + } + self__sizes = self_->sizes(); + } else { +-#if defined(USE_ROCM) && ROCM_VERSION >= 50700 ++#if defined(USE_ROCM) && defined(HIPBLASLT) + useLtInterface = !disable_addmm_cuda_lt && + result.dim() == 2 && result.is_contiguous() && + isSupportedHipLtROCmArch(self.device().index()) && +@@ -322,7 +324,7 @@ Tensor& addmm_out_cuda_impl(Tensor& result, const Tensor& self, const Tensor& ma + + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(!args.result->is_conj()); + +-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700) ++#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT)) + if (useLtInterface) { + AT_DISPATCH_FLOATING_TYPES_AND2( + at::ScalarType::Half, +@@ -876,7 +878,7 @@ _scaled_mm_out_cuda(const Tensor& mat1, const Tensor& mat2, + at::native::resize_output(out, {mat1_sizes[0], mat2_sizes[1]}); + at::native::resize_output(amax, {}); + +-#if !defined(USE_ROCM) && !defined(_MSC_VER) || (defined(USE_ROCM) && ROCM_VERSION >= 60000) ++#if !defined(USE_ROCM) && !defined(_MSC_VER) || (defined(USE_ROCM) && defined(HIPBLASLT)) + cublasCommonArgs args(mat1, mat2, out); + const auto out_dtype_ = args.result->scalar_type(); + TORCH_CHECK(args.transa == 't' && args.transb == 'n', "Only multiplication of row-major and column-major matrices is supported by cuBLASLt"); +@@ -906,7 +908,7 @@ _scaled_mm_out_cuda(const Tensor& mat1, const Tensor& mat2, + TORCH_CHECK(false, "_scaled_mm_out_cuda is not compiled for this platform."); + #endif + +-#if defined(USE_ROCM) && ROCM_VERSION >= 60000 ++#if defined(USE_ROCM) && defined(HIPBLASLT) + // rocm's hipblaslt does not yet support amax, so calculate separately + auto out_float32 = out.to(kFloat); + out_float32.abs_(); +diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake +index b7ffbeb07dc..2b6c3678984 100644 +--- a/cmake/Dependencies.cmake ++++ b/cmake/Dependencies.cmake +@@ -1273,6 +1273,9 @@ if(USE_ROCM) + if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "6.0.0") + list(APPEND HIP_CXX_FLAGS -DHIPBLAS_V2) + endif() ++ if(hipblast_FOUND) ++ list(APPEND HIP_CXX_FLAGS -DHIPBLASLT) ++ endif() + if(HIPBLASLT_CUSTOM_DATA_TYPE) + list(APPEND HIP_CXX_FLAGS -DHIPBLASLT_CUSTOM_DATA_TYPE) + endif() +diff --git a/cmake/public/LoadHIP.cmake b/cmake/public/LoadHIP.cmake +index f6ca263c5e5..53eb0b63c1a 100644 +--- a/cmake/public/LoadHIP.cmake ++++ b/cmake/public/LoadHIP.cmake +@@ -156,7 +156,7 @@ if(HIP_FOUND) + find_package_and_print_version(rocblas REQUIRED) + find_package_and_print_version(hipblas REQUIRED) + if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "5.7.0") +- find_package_and_print_version(hipblaslt REQUIRED) ++ find_package_and_print_version(hipblaslt) + endif() + find_package_and_print_version(miopen REQUIRED) + if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "4.1.0") +@@ -191,7 +191,7 @@ if(HIP_FOUND) + # roctx is part of roctracer + find_library(ROCM_ROCTX_LIB roctx64 HINTS ${ROCM_PATH}/lib) + +- if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "5.7.0") ++ if(hipblastlt_FOUND) + # check whether hipblaslt is using its own datatype + set(file "${PROJECT_BINARY_DIR}/hipblaslt_test_data_type.cc") + file(WRITE ${file} "" +-- +2.43.2 + diff --git a/0001-Prepare-pytorch-cmake-for-fedora.patch b/0001-Prepare-pytorch-cmake-for-fedora.patch deleted file mode 100644 index cf1e843..0000000 --- a/0001-Prepare-pytorch-cmake-for-fedora.patch +++ /dev/null @@ -1,169 +0,0 @@ -From 24cf0294a67d89ad70367940eea872162b44482c Mon Sep 17 00:00:00 2001 -From: Tom Rix -Date: Sat, 23 Sep 2023 10:18:52 -0700 -Subject: [PATCH] Prepare pytorch cmake for fedora - -Use the system fmt -Remove foxi use -Remove warnings/errors for clang 17 -fxdiv is not a library -build type is RelWithDebInfo -use system pthreadpool - -Signed-off-by: Tom Rix ---- - CMakeLists.txt | 6 +++--- - .../native/quantized/cpu/qnnpack/CMakeLists.txt | 3 --- - c10/CMakeLists.txt | 2 +- - caffe2/CMakeLists.txt | 6 +----- - cmake/Dependencies.cmake | 16 +--------------- - test/cpp/tensorexpr/CMakeLists.txt | 2 +- - torch/CMakeLists.txt | 2 +- - 7 files changed, 8 insertions(+), 29 deletions(-) - -diff --git a/CMakeLists.txt b/CMakeLists.txt -index 3a48eaf4e2..902ee70fd1 100644 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -682,7 +682,7 @@ set(CAFFE2_ALLOWLIST "" CACHE STRING "A allowlist file of files that one should - # Set default build type - if(NOT CMAKE_BUILD_TYPE) - message(STATUS "Build type not set - defaulting to Release") -- set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build from: Debug Release RelWithDebInfo MinSizeRel Coverage." FORCE) -+ set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING "Choose the type of build from: Debug Release RelWithDebInfo MinSizeRel Coverage." FORCE) - endif() - - # The below means we are cross compiling for arm64 or x86_64 on MacOSX -@@ -917,8 +917,8 @@ if(NOT MSVC) - string(APPEND CMAKE_LINKER_FLAGS_DEBUG " -fno-omit-frame-pointer -O0") - append_cxx_flag_if_supported("-fno-math-errno" CMAKE_CXX_FLAGS) - append_cxx_flag_if_supported("-fno-trapping-math" CMAKE_CXX_FLAGS) -- append_cxx_flag_if_supported("-Werror=format" CMAKE_CXX_FLAGS) -- append_cxx_flag_if_supported("-Werror=cast-function-type" CMAKE_CXX_FLAGS) -+# append_cxx_flag_if_supported("-Werror=format" CMAKE_CXX_FLAGS) -+# append_cxx_flag_if_supported("-Werror=cast-function-type" CMAKE_CXX_FLAGS) - else() - # skip unwanted includes from windows.h - add_compile_definitions(WIN32_LEAN_AND_MEAN) -diff --git a/aten/src/ATen/native/quantized/cpu/qnnpack/CMakeLists.txt b/aten/src/ATen/native/quantized/cpu/qnnpack/CMakeLists.txt -index fd6b7ff551..218c8e9b2a 100644 ---- a/aten/src/ATen/native/quantized/cpu/qnnpack/CMakeLists.txt -+++ b/aten/src/ATen/native/quantized/cpu/qnnpack/CMakeLists.txt -@@ -393,10 +393,7 @@ elseif(NOT TARGET fxdiv AND USE_SYSTEM_FXDIV) - if(NOT FXDIV_HDR) - message(FATAL_ERROR "Cannot find fxdiv") - endif() -- add_library(fxdiv STATIC "${FXDIV_HDR}") -- set_property(TARGET fxdiv PROPERTY LINKER_LANGUAGE C) - endif() --target_link_libraries(pytorch_qnnpack PRIVATE fxdiv) - - # ---[ Configure psimd - if(NOT TARGET psimd AND NOT USE_SYSTEM_PSIMD) -diff --git a/c10/CMakeLists.txt b/c10/CMakeLists.txt -index feebad7cbb..7c029cd88d 100644 ---- a/c10/CMakeLists.txt -+++ b/c10/CMakeLists.txt -@@ -87,7 +87,7 @@ endif() - if(${USE_GLOG}) - target_link_libraries(c10 PUBLIC glog::glog) - endif() --target_link_libraries(c10 PRIVATE fmt::fmt-header-only) -+target_link_libraries(c10 PRIVATE fmt) - - find_package(Backtrace) - if(Backtrace_FOUND) -diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt -index 74d0d55719..b975d388a7 100644 ---- a/caffe2/CMakeLists.txt -+++ b/caffe2/CMakeLists.txt -@@ -107,7 +107,7 @@ endif() - # Note: the folders that are being commented out have not been properly - # addressed yet. - --if(NOT MSVC AND USE_XNNPACK) -+if(NOT MSVC AND USE_XNNPACK AND NOT USE_SYSTEM_FXDIV) - if(NOT TARGET fxdiv) - set(FXDIV_BUILD_TESTS OFF CACHE BOOL "") - set(FXDIV_BUILD_BENCHMARKS OFF CACHE BOOL "") -@@ -1022,10 +1022,6 @@ elseif(USE_CUDA) - endif() - endif() - --if(NOT MSVC AND USE_XNNPACK) -- TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv) --endif() -- - # ========================================================== - # formerly-libtorch flags - # ========================================================== -diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake -index c3abce52e4..21b40f3a88 100644 ---- a/cmake/Dependencies.cmake -+++ b/cmake/Dependencies.cmake -@@ -1555,7 +1555,6 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX) - set_target_properties(onnx_proto PROPERTIES CXX_STANDARD 17) - endif() - endif() -- add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/foxi EXCLUDE_FROM_ALL) - - add_definitions(-DONNX_NAMESPACE=${ONNX_NAMESPACE}) - if(NOT USE_SYSTEM_ONNX) -@@ -1588,8 +1587,6 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX) - message("-- Found onnx: ${ONNX_LIBRARY} ${ONNX_PROTO_LIBRARY}") - list(APPEND Caffe2_DEPENDENCY_LIBS onnx_proto onnx) - endif() -- include_directories(${FOXI_INCLUDE_DIRS}) -- list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader) - # Recover the build shared libs option. - set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS}) - endif() -@@ -1834,18 +1831,7 @@ endif() - # - set(TEMP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) - set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libs" FORCE) --add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt) -- --# Disable compiler feature checks for `fmt`. --# --# CMake compiles a little program to check compiler features. Some of our build --# configurations (notably the mobile build analyzer) will populate --# CMAKE_CXX_FLAGS in ways that break feature checks. Since we already know --# `fmt` is compatible with a superset of the compilers that PyTorch is, it --# shouldn't be too bad to just disable the checks. --set_target_properties(fmt-header-only PROPERTIES INTERFACE_COMPILE_FEATURES "") -- --list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only) -+list(APPEND Caffe2_DEPENDENCY_LIBS fmt) - set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS} CACHE BOOL "Build shared libs" FORCE) - - # ---[ Kineto -diff --git a/test/cpp/tensorexpr/CMakeLists.txt b/test/cpp/tensorexpr/CMakeLists.txt -index 7dff70630d..90b1003591 100644 ---- a/test/cpp/tensorexpr/CMakeLists.txt -+++ b/test/cpp/tensorexpr/CMakeLists.txt -@@ -54,7 +54,7 @@ target_include_directories(tutorial_tensorexpr PRIVATE ${ATen_CPU_INCLUDE}) - # pthreadpool header. For some build environment we need add the dependency - # explicitly. - if(USE_PTHREADPOOL) -- target_link_libraries(test_tensorexpr PRIVATE pthreadpool_interface) -+ target_link_libraries(test_tensorexpr PRIVATE pthreadpool) - endif() - if(USE_CUDA) - target_link_libraries(test_tensorexpr PRIVATE -diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt -index 62ee4c12a9..8d5375f320 100644 ---- a/torch/CMakeLists.txt -+++ b/torch/CMakeLists.txt -@@ -84,7 +84,7 @@ set(TORCH_PYTHON_LINK_LIBRARIES - python::python - pybind::pybind11 - shm -- fmt::fmt-header-only -+ fmt - ATEN_CPU_FILES_GEN_LIB) - - if(USE_ASAN AND TARGET Sanitizer::address) --- -2.42.1 - diff --git a/0001-Reenable-dim-for-python-3.12.patch b/0001-Reenable-dim-for-python-3.12.patch new file mode 100644 index 0000000..138b5d4 --- /dev/null +++ b/0001-Reenable-dim-for-python-3.12.patch @@ -0,0 +1,115 @@ +From ee3fb343a376cdba6f4ce188cac90023f13e2aea Mon Sep 17 00:00:00 2001 +From: Tom Rix +Date: Thu, 4 Apr 2024 14:21:38 -0600 +Subject: [PATCH] Reenable dim for python 3.12 + +In 3.12: + +_PyArg_Parser added an element to the start of the structure. +So existing positional initialization is off. Switch to element +initialization. + +_Py_CODEUNIT changed to from an int to a union, but relevant_op +is passed an int for the return of decoder.opcode, so the parameter +type is wrong, switch it to int. + +The opcode PRECALL was removed, so reduce its handling to 3.11 + +Signed-off-by: Tom Rix +--- + functorch/csrc/dim/dim.cpp | 24 +++++------------------- + functorch/csrc/dim/minpybind.h | 4 ++-- + 2 files changed, 7 insertions(+), 21 deletions(-) + +diff --git a/functorch/csrc/dim/dim.cpp b/functorch/csrc/dim/dim.cpp +index 4cc027504c77..e48b0d58081f 100644 +--- a/functorch/csrc/dim/dim.cpp ++++ b/functorch/csrc/dim/dim.cpp +@@ -6,20 +6,6 @@ + + #include + +- +-// Many APIs have changed/don't exist anymore +-#if IS_PYTHON_3_12_PLUS +- +-#include "dim.h" +- +-// Re-enable this some day +-PyObject* Dim_init() { +- PyErr_SetString(PyExc_RuntimeError, "First class dim doesn't work with python 3.12"); +- return nullptr; +-} +- +-#else +- + #include "minpybind.h" + #include + #include +@@ -441,7 +427,7 @@ static PyObject* DimList_bind(DimList *self, + PY_BEGIN + mpy::handle sizes; + static const char * const _keywords[] = {"sizes", nullptr}; +- static _PyArg_Parser parser = {"O", _keywords, 0}; ++ static _PyArg_Parser parser = { .format = "O", .keywords = _keywords}; + if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &parser, &sizes)) { + return nullptr; + } +@@ -465,7 +451,7 @@ static PyObject* DimList_bind_len(DimList *self, + PY_BEGIN + int size; + static const char * const _keywords[] = {"N", nullptr}; +- static _PyArg_Parser parser = {"i", _keywords, 0}; ++ static _PyArg_Parser parser = { .format = "i", .keywords = _keywords}; + if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &parser, &size)) { + return nullptr; + } +@@ -1468,7 +1454,7 @@ PyTypeObject Tensor::Type = { + + // dim() -------------------- + +-static bool relevant_op(_Py_CODEUNIT c) { ++static bool relevant_op(int c) { + switch(c) { + case STORE_NAME: + case STORE_GLOBAL: +@@ -1587,7 +1573,7 @@ static PyObject* _dims(PyObject *self, + auto c = mpy::obj::steal(PyFrame_GetCode(f.ptr())); + auto lasti = PyFrame_GetLasti(f.ptr()); + auto decoder = PyInstDecoder(c.ptr(), lasti); +- #if IS_PYTHON_3_11_PLUS ++ #if IS_PYTHON_3_11 + // When py3.11 adapts bytecode lasti points to the precall + // rather than the call instruction after it + if (decoder.opcode() == PRECALL) { +@@ -3268,4 +3254,4 @@ PyObject* Dim_init() { + } + } + +-#endif ++ +diff --git a/functorch/csrc/dim/minpybind.h b/functorch/csrc/dim/minpybind.h +index de82b5af95a4..d76d4828bf80 100644 +--- a/functorch/csrc/dim/minpybind.h ++++ b/functorch/csrc/dim/minpybind.h +@@ -621,7 +621,7 @@ struct vector_args { + PyObject *dummy = NULL; + _PyArg_ParseStackAndKeywords((PyObject*const*)args, nargs, kwnames.ptr(), _parser, &dummy, &dummy, &dummy, &dummy, &dummy); + #else +- _PyArg_Parser* _parser = new _PyArg_Parser{NULL, &names_buf[0], fname_cstr, 0}; ++ _PyArg_Parser* _parser = new _PyArg_Parser{ .keywords = &names_buf[0], .fname = fname_cstr}; + std::unique_ptr buf(new PyObject*[names.size()]); + _PyArg_UnpackKeywords((PyObject*const*)args, nargs, NULL, kwnames.ptr(), _parser, required, (Py_ssize_t)values.size() - kwonly, 0, &buf[0]); + #endif +@@ -706,7 +706,7 @@ inline object handle::call_vector(vector_args args) { + #define MPY_PARSE_ARGS_KWNAMES(fmt, FORALL_ARGS) \ + static const char * const kwlist[] = { FORALL_ARGS(MPY_ARGS_NAME) nullptr}; \ + FORALL_ARGS(MPY_ARGS_DECLARE) \ +- static _PyArg_Parser parser = {fmt, kwlist, 0}; \ ++ static _PyArg_Parser parser = { .format = fmt, .keywords = kwlist}; \ + if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &parser, FORALL_ARGS(MPY_ARGS_POINTER) nullptr)) { \ + throw mpy::exception_set(); \ + } +-- +2.44.0 + diff --git a/next/0001-Regenerate-flatbuffer-header.patch b/0001-Regenerate-flatbuffer-header.patch similarity index 100% rename from next/0001-Regenerate-flatbuffer-header.patch rename to 0001-Regenerate-flatbuffer-header.patch diff --git a/next/0001-Stub-in-kineto-ActivityType.patch b/0001-Stub-in-kineto-ActivityType.patch similarity index 100% rename from next/0001-Stub-in-kineto-ActivityType.patch rename to 0001-Stub-in-kineto-ActivityType.patch diff --git a/0001-add-rocm_version-fallback.patch b/0001-add-rocm_version-fallback.patch deleted file mode 100644 index 25a0c67..0000000 --- a/0001-add-rocm_version-fallback.patch +++ /dev/null @@ -1,36 +0,0 @@ -From 1d35a0b1f5cb39fd0c44a486157dc739a02c71b6 Mon Sep 17 00:00:00 2001 -From: Tom Rix -Date: Wed, 20 Dec 2023 11:23:18 -0500 -Subject: [PATCH] add rocm_version fallback - -Signed-off-by: Tom Rix ---- - torch/utils/hipify/cuda_to_hip_mappings.py | 12 ++++++++++++ - 1 file changed, 12 insertions(+) - -diff --git a/torch/utils/hipify/cuda_to_hip_mappings.py b/torch/utils/hipify/cuda_to_hip_mappings.py -index 73586440e7..9354057a39 100644 ---- a/torch/utils/hipify/cuda_to_hip_mappings.py -+++ b/torch/utils/hipify/cuda_to_hip_mappings.py -@@ -57,6 +57,18 @@ if os.path.isfile(rocm_version_h): - if match: - patch = int(match.group(1)) - rocm_version = (major, minor, patch) -+else: -+ try: -+ hip_version = subprocess.check_output(["hipconfig", "--version"]).decode("utf-8") -+ hip_split = hip_version.split('.') -+ rocm_version = (int(hip_split[0]), int(hip_split[1]), 0) -+ except subprocess.CalledProcessError: -+ print(f"Warning: hipconfig --version failed") -+ except (FileNotFoundError, PermissionError, NotADirectoryError): -+ # Do not print warning. This is okay. This file can also be imported for non-ROCm builds. -+ pass -+ -+ - - # List of math functions that should be replaced inside device code only. - MATH_TRANSPILATIONS = collections.OrderedDict( --- -2.43.0 - diff --git a/next/0001-can-not-use-with-c-files.patch b/0001-can-not-use-with-c-files.patch similarity index 100% rename from next/0001-can-not-use-with-c-files.patch rename to 0001-can-not-use-with-c-files.patch diff --git a/next/0001-cuda-hip-signatures.patch b/0001-cuda-hip-signatures.patch similarity index 100% rename from next/0001-cuda-hip-signatures.patch rename to 0001-cuda-hip-signatures.patch diff --git a/0001-disable-as-needed-for-libtorch.patch b/0001-disable-as-needed-for-libtorch.patch deleted file mode 100644 index 392140a..0000000 --- a/0001-disable-as-needed-for-libtorch.patch +++ /dev/null @@ -1,28 +0,0 @@ -From 06499575b177a218846f0e43ff4bc77d245f207f Mon Sep 17 00:00:00 2001 -From: Tom Rix -Date: Fri, 1 Dec 2023 09:38:05 -0500 -Subject: [PATCH] disable as-needed for libtorch - -Signed-off-by: Tom Rix ---- - caffe2/CMakeLists.txt | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt -index b975d388a7..5e9fd3b3f3 100644 ---- a/caffe2/CMakeLists.txt -+++ b/caffe2/CMakeLists.txt -@@ -914,6 +914,10 @@ if(HAVE_SOVERSION) - VERSION ${TORCH_VERSION} SOVERSION ${TORCH_SOVERSION}) - endif() - -+# Disable global as-needed -+set_target_properties(torch PROPERTIES LINK_FLAGS -Wl,--no-as-needed) -+ -+ - if(USE_ROCM) - filter_list(__caffe2_hip_srcs_cpp Caffe2_HIP_SRCS "\\.(cu|hip)$") - set_source_files_properties(${__caffe2_hip_srcs_cpp} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1) --- -2.42.1 - diff --git a/0001-disable-use-of-aotriton.patch b/0001-disable-use-of-aotriton.patch new file mode 100644 index 0000000..34a1704 --- /dev/null +++ b/0001-disable-use-of-aotriton.patch @@ -0,0 +1,46 @@ +From 33d48f71db7530f00dbd8cff281b65aa8b355b2a Mon Sep 17 00:00:00 2001 +From: Tom Rix +Date: Tue, 19 Mar 2024 11:32:37 -0400 +Subject: [PATCH] disable use of aotriton + +--- + aten/src/ATen/native/transformers/cuda/sdp_utils.cpp | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp b/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp +index 96b839820efd..2d3dd0cb4b0f 100644 +--- a/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp ++++ b/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp +@@ -21,9 +21,11 @@ + #include + #include + ++#ifdef USE_FLASH_ATTENTION + #if USE_ROCM + #include + #endif ++#endif + + /** + * Note [SDPA Runtime Dispatch] +@@ -183,6 +185,7 @@ bool check_sm_version(cudaDeviceProp * dprops) { + } + + bool check_flash_attention_hardware_support(sdp_params const& params, bool debug) { ++#ifdef USE_FLASH_ATTENTION + // Check that the gpu is capable of running flash attention + using sm80 = SMVersion<8, 0>; + using sm90 = SMVersion<9, 0>; +@@ -211,6 +214,9 @@ bool check_flash_attention_hardware_support(sdp_params const& params, bool debug + } + #endif + return true; ++#else ++ return false; ++#endif + } + + bool check_mem_efficient_hardware_support(sdp_params const& params, bool debug) { +-- +2.44.0 + diff --git a/0001-dynamo-3.12-enable-dynamo-on-3.12-enable-most-dynamo.patch b/0001-dynamo-3.12-enable-dynamo-on-3.12-enable-most-dynamo.patch new file mode 100644 index 0000000..0ce5b1f --- /dev/null +++ b/0001-dynamo-3.12-enable-dynamo-on-3.12-enable-most-dynamo.patch @@ -0,0 +1,226 @@ +From b9d45eb1cc90696a4de76676221219e24423c709 Mon Sep 17 00:00:00 2001 +From: William Wen +Date: Wed, 3 Apr 2024 17:58:46 -0700 +Subject: [PATCH] [dynamo, 3.12] enable dynamo on 3.12, enable most dynamo + unittests on 3.12 (#123216) + +Pull Request resolved: https://github.com/pytorch/pytorch/pull/123216 +Approved by: https://github.com/jansel, https://github.com/malfet +--- + test/dynamo/test_autograd_function.py | 3 ++ + test/dynamo/test_misc.py | 63 +++++++++++++++++++++++++ + test/functorch/test_eager_transforms.py | 7 ++- + test/run_test.py | 3 -- + torch/__init__.py | 5 +- + torch/_dynamo/eval_frame.py | 4 +- + torch/_dynamo/test_case.py | 8 +--- + 7 files changed, 74 insertions(+), 19 deletions(-) + +diff --git a/test/dynamo/test_autograd_function.py b/test/dynamo/test_autograd_function.py +index d23fec607afa..bc5ebc767038 100644 +--- a/test/dynamo/test_autograd_function.py ++++ b/test/dynamo/test_autograd_function.py +@@ -2,6 +2,8 @@ + + import copy + import math ++import sys ++import unittest + + import torch + +@@ -528,6 +530,7 @@ class AutogradFunctionTests(torch._dynamo.test_case.TestCase): + # I pulled all of these test cases from test_autograd.py + # In the future, we should make the Dynamo test suite actually + # run on test_autograd.py (it's disabled right now) and delete these. ++ @unittest.skipIf(sys.version_info >= (3, 12), "invalid free in 3.12+") + def test_smoke_from_test_autograd(self): + class Func(torch.autograd.Function): + @staticmethod +diff --git a/test/dynamo/test_misc.py b/test/dynamo/test_misc.py +index a73de8b1c7e9..8f54e0564e6b 100644 +--- a/test/dynamo/test_misc.py ++++ b/test/dynamo/test_misc.py +@@ -9760,6 +9760,69 @@ fn + lambda mod: mod, + ) + ++ @xfailIfPy311 ++ def test_outside_linear_module_free(self): ++ # Compared to test_linear_module_free, the linear ++ # layer is not the code object that is directly compiled. ++ def model_inp_ctr(): ++ fc = torch.nn.Linear(100, 100) ++ ++ class Mod(torch.nn.Module): ++ def __init__(self): ++ super().__init__() ++ self.fc_ref = fc ++ ++ def forward(self, x): ++ return fc(x[0]) ++ ++ # return fc to keep it alive in _test_compile_model_free ++ return Mod(), (torch.randn(100, 100), fc) ++ ++ self._test_compile_model_free(model_inp_ctr, lambda mod: mod.fc_ref) ++ ++ @unittest.skipIf(sys.version_info >= (3, 12), "leaks in 3.12+") ++ def test_parameter_free(self): ++ def model_inp_ctr(): ++ param = torch.nn.Parameter(torch.randn(100, 100)) ++ ++ class Mod(torch.nn.Module): ++ def __init__(self): ++ super().__init__() ++ self.param = param ++ ++ def forward(self, x): ++ return self.param * x[0] ++ ++ # return param to keep it alive in _test_compile_model_free ++ return Mod(), (torch.randn(100, 100), param) ++ ++ self._test_compile_model_free(model_inp_ctr, lambda mod: mod.param) ++ ++ def test_raises_importerror1(self): ++ @torch.compile(backend="eager") ++ def fn(x): ++ try: ++ import some_module_that_surely_does_not_exist ++ ++ return ++ except ImportError: ++ pass ++ return x.sin() ++ ++ x = torch.randn(8) ++ self.assertEqual(fn(x), x.sin()) ++ ++ def test_raises_importerror2(self): ++ @torch.compile(backend="eager") ++ def fn(x): ++ import some_module_that_surely_does_not_exist ++ ++ return x + 1 ++ ++ x = torch.randn(8) ++ with self.assertRaises(ImportError): ++ fn(x) ++ + def test_dynamo_cache_move_to_front(self): + class Mod(torch.nn.Module): + def __init__(self): +diff --git a/test/functorch/test_eager_transforms.py b/test/functorch/test_eager_transforms.py +index 09415cf8f48e..60790ec06059 100644 +--- a/test/functorch/test_eager_transforms.py ++++ b/test/functorch/test_eager_transforms.py +@@ -4762,8 +4762,7 @@ class TestCompileTransforms(TestCase): + # Triton only supports GPU with SM70 or later. + @expectedFailureIf((IS_ARM64 and not IS_MACOS) or + IS_WINDOWS or +- (TEST_CUDA and not SM70OrLater) or +- (sys.version_info >= (3, 12))) ++ (TEST_CUDA and not SM70OrLater)) + def test_compile_vmap_hessian(self, device): + # The model and inputs are a smaller version + # of code at benchmark repo: +@@ -4792,8 +4791,8 @@ class TestCompileTransforms(TestCase): + actual = opt_fn(params_and_buffers, x) + self.assertEqual(actual, expected) + +- # torch.compile is not supported on Windows or on Python 3.12+ +- @expectedFailureIf(IS_WINDOWS or (sys.version_info >= (3, 12))) ++ # torch.compile is not supported on Windows ++ @expectedFailureIf(IS_WINDOWS) + @torch._dynamo.config.patch(suppress_errors=False) + @torch._dynamo.config.patch(capture_func_transforms=True) + @skipIfTorchDynamo("Do not test torch.compile on top of torch.compile") +diff --git a/test/run_test.py b/test/run_test.py +index e86af9623042..ebb14df4167d 100755 +--- a/test/run_test.py ++++ b/test/run_test.py +@@ -74,7 +74,6 @@ sys.path.remove(str(REPO_ROOT)) + RERUN_DISABLED_TESTS = os.getenv("PYTORCH_TEST_RERUN_DISABLED_TESTS", "0") == "1" + DISTRIBUTED_TEST_PREFIX = "distributed" + INDUCTOR_TEST_PREFIX = "inductor" +-DYNAMO_TEST_PREFIX = "dynamo" + + + # Note [ROCm parallel CI testing] +@@ -324,7 +323,6 @@ JIT_EXECUTOR_TESTS = [ + ] + + INDUCTOR_TESTS = [test for test in TESTS if test.startswith(INDUCTOR_TEST_PREFIX)] +-DYNAMO_TESTS = [test for test in TESTS if test.startswith(DYNAMO_TEST_PREFIX)] + DISTRIBUTED_TESTS = [test for test in TESTS if test.startswith(DISTRIBUTED_TEST_PREFIX)] + TORCH_EXPORT_TESTS = [test for test in TESTS if test.startswith("export")] + FUNCTORCH_TESTS = [test for test in TESTS if test.startswith("functorch")] +@@ -1361,7 +1359,6 @@ def get_selected_tests(options) -> List[str]: + # these tests failing in Python 3.12 temporarily disabling + if sys.version_info >= (3, 12): + options.exclude.extend(INDUCTOR_TESTS) +- options.exclude.extend(DYNAMO_TESTS) + options.exclude.extend( + [ + "functorch/test_dims", +diff --git a/torch/__init__.py b/torch/__init__.py +index d381712b4a35..26cdffe81d29 100644 +--- a/torch/__init__.py ++++ b/torch/__init__.py +@@ -1861,9 +1861,8 @@ def compile(model: Optional[Callable] = None, *, + + """ + _C._log_api_usage_once("torch.compile") +- # Temporary until we get proper support for python 3.12 +- if sys.version_info >= (3, 12): +- raise RuntimeError("Dynamo is not supported on Python 3.12+") ++ if sys.version_info >= (3, 13): ++ raise RuntimeError("Dynamo is not supported on Python 3.13+") + + # Decorator mode + if model is None: +diff --git a/torch/_dynamo/eval_frame.py b/torch/_dynamo/eval_frame.py +index 53ab0df3a947..0a80eeea99ed 100644 +--- a/torch/_dynamo/eval_frame.py ++++ b/torch/_dynamo/eval_frame.py +@@ -589,8 +589,8 @@ class _NullDecorator(contextlib.nullcontext): # type: ignore[type-arg] + + + def check_if_dynamo_supported(): +- if sys.version_info >= (3, 12): +- raise RuntimeError("Python 3.12+ not yet supported for torch.compile") ++ if sys.version_info >= (3, 13): ++ raise RuntimeError("Python 3.13+ not yet supported for torch.compile") + + + def is_dynamo_supported(): +diff --git a/torch/_dynamo/test_case.py b/torch/_dynamo/test_case.py +index e3cbef09eaae..297ea6e2bc2a 100644 +--- a/torch/_dynamo/test_case.py ++++ b/torch/_dynamo/test_case.py +@@ -1,7 +1,6 @@ + import contextlib + import importlib + import logging +-import sys + + import torch + import torch.testing +@@ -20,12 +19,7 @@ log = logging.getLogger(__name__) + def run_tests(needs=()): + from torch.testing._internal.common_utils import run_tests + +- if ( +- TEST_WITH_TORCHDYNAMO +- or IS_WINDOWS +- or TEST_WITH_CROSSREF +- or sys.version_info >= (3, 12) +- ): ++ if TEST_WITH_TORCHDYNAMO or IS_WINDOWS or TEST_WITH_CROSSREF: + return # skip testing + + if isinstance(needs, str): +-- +2.44.0 + diff --git a/next/0001-no-third_party-FXdiv.patch b/0001-no-third_party-FXdiv.patch similarity index 100% rename from next/0001-no-third_party-FXdiv.patch rename to 0001-no-third_party-FXdiv.patch diff --git a/next/0001-no-third_party-fmt.patch b/0001-no-third_party-fmt.patch similarity index 83% rename from next/0001-no-third_party-fmt.patch rename to 0001-no-third_party-fmt.patch index f2a82f8..6e82af2 100644 --- a/next/0001-no-third_party-fmt.patch +++ b/0001-no-third_party-fmt.patch @@ -1,4 +1,4 @@ -From c46146dc31ed3dc0ebb6ca28c01330db8ba5d4f2 Mon Sep 17 00:00:00 2001 +From 2ce255b75760a0a513fb1706629b416f76a5c822 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Sat, 3 Feb 2024 08:16:04 -0500 Subject: [PATCH] no third_party fmt @@ -10,23 +10,23 @@ Subject: [PATCH] no third_party fmt 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/c10/CMakeLists.txt b/c10/CMakeLists.txt -index cb81556ff2..7529b2aec9 100644 +index 1f742f4c176..4fa08913bdd 100644 --- a/c10/CMakeLists.txt +++ b/c10/CMakeLists.txt @@ -87,7 +87,7 @@ endif() if(C10_USE_GLOG) - target_link_libraries(c10 PUBLIC glog::glog) + target_link_libraries(c10 PUBLIC glog::glog) endif() -target_link_libraries(c10 PRIVATE fmt::fmt-header-only) +target_link_libraries(c10 PRIVATE fmt) if(C10_USE_NUMA) - target_include_directories(c10 PRIVATE ${Numa_INCLUDE_DIR}) + message(STATUS "NUMA paths:") diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake -index 8310f29e01..c99d0d762a 100644 +index 6f5a2d5feff..42fbf80f6e8 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake -@@ -1834,7 +1834,7 @@ endif() +@@ -1837,7 +1837,7 @@ endif() # set(TEMP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libs" FORCE) @@ -35,7 +35,7 @@ index 8310f29e01..c99d0d762a 100644 # Disable compiler feature checks for `fmt`. # -@@ -1843,9 +1843,9 @@ add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt) +@@ -1846,9 +1846,9 @@ add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt) # CMAKE_CXX_FLAGS in ways that break feature checks. Since we already know # `fmt` is compatible with a superset of the compilers that PyTorch is, it # shouldn't be too bad to just disable the checks. @@ -48,7 +48,7 @@ index 8310f29e01..c99d0d762a 100644 # ---[ Kineto diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt -index 24903a207e..3a7751dc00 100644 +index 97a72eed55b..9e5014d1980 100644 --- a/torch/CMakeLists.txt +++ b/torch/CMakeLists.txt @@ -80,7 +80,7 @@ set(TORCH_PYTHON_LINK_LIBRARIES @@ -61,5 +61,5 @@ index 24903a207e..3a7751dc00 100644 if(USE_ASAN AND TARGET Sanitizer::address) -- -2.43.0 +2.43.2 diff --git a/next/0001-no-third_party-foxi.patch b/0001-no-third_party-foxi.patch similarity index 100% rename from next/0001-no-third_party-foxi.patch rename to 0001-no-third_party-foxi.patch diff --git a/0001-python-torch-link-with-python.patch b/0001-python-torch-link-with-python.patch deleted file mode 100644 index d52f034..0000000 --- a/0001-python-torch-link-with-python.patch +++ /dev/null @@ -1,27 +0,0 @@ -From cef92207b79ad53e3fcc1b0e22ba91cb9422968c Mon Sep 17 00:00:00 2001 -From: Tom Rix -Date: Sat, 18 Nov 2023 09:38:52 -0500 -Subject: [PATCH] python-torch link with python - -Signed-off-by: Tom Rix ---- - torch/CMakeLists.txt | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt -index 8d5375f320..6f8c7b65c4 100644 ---- a/torch/CMakeLists.txt -+++ b/torch/CMakeLists.txt -@@ -312,6 +312,9 @@ add_dependencies(torch_python torch_python_stubs) - add_dependencies(torch_python flatbuffers) - - -+# Unresolved syms in -lpython -+target_link_libraries(torch_python PUBLIC ${PYTHON_LIBRARIES}) -+ - if(USE_PRECOMPILED_HEADERS) - target_precompile_headers(torch_python PRIVATE - "$<$:ATen/ATen.h>") --- -2.42.1 - diff --git a/0001-python-torch-remove-ubuntu-specific-linking.patch b/0001-python-torch-remove-ubuntu-specific-linking.patch deleted file mode 100644 index 6d9bd24..0000000 --- a/0001-python-torch-remove-ubuntu-specific-linking.patch +++ /dev/null @@ -1,33 +0,0 @@ -From f70ef37d0b3c780fd17be199e66a81ffa679f93e Mon Sep 17 00:00:00 2001 -From: Tom Rix -Date: Sat, 18 Nov 2023 12:05:43 -0500 -Subject: [PATCH] python-torch remove ubuntu specific linking - -Signed-off-by: Tom Rix ---- - CMakeLists.txt | 9 ++++++--- - 1 file changed, 6 insertions(+), 3 deletions(-) - -diff --git a/CMakeLists.txt b/CMakeLists.txt -index 86c34984b2..f7c4a7b05f 100644 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -479,9 +479,12 @@ option(BUILD_EXECUTORCH "Master flag to build Executorch" ON) - # This is a fix for a rare build issue on Ubuntu: - # symbol lookup error: miniconda3/envs/pytorch-py3.7/lib/libmkl_intel_lp64.so: undefined symbol: mkl_blas_dsyrk - # https://software.intel.com/en-us/articles/symbol-lookup-error-when-linking-intel-mkl-with-gcc-on-ubuntu --if(LINUX) -- set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-as-needed") --endif() -+ -+# This is not ubuntu! -+# if(LINUX) -+# set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-as-needed") -+# endif() -+set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--as-needed") - - if(MSVC) - set(CMAKE_NINJA_CMCLDEPS_RC OFF) --- -2.42.1 - diff --git a/0001-pytorch-use-SO-version-by-default.patch b/0001-pytorch-use-SO-version-by-default.patch deleted file mode 100644 index b70d6ec..0000000 --- a/0001-pytorch-use-SO-version-by-default.patch +++ /dev/null @@ -1,26 +0,0 @@ -From 527d1ce24a06a14788ca5fc2411985d7c1cb2923 Mon Sep 17 00:00:00 2001 -From: Tom Rix -Date: Fri, 13 Oct 2023 05:35:19 -0700 -Subject: [PATCH] pytorch use SO version by default - -Signed-off-by: Tom Rix ---- - CMakeLists.txt | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/CMakeLists.txt b/CMakeLists.txt -index 902ee70fd1..86c34984b2 100644 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -340,7 +340,7 @@ option(USE_TBB "Use TBB (Deprecated)" OFF) - cmake_dependent_option( - USE_SYSTEM_TBB "Use system-provided Intel TBB." OFF "USE_TBB" OFF) - option(ONNX_ML "Enable traditional ONNX ML API." ON) --option(HAVE_SOVERSION "Whether to add SOVERSION to the shared objects" OFF) -+option(HAVE_SOVERSION "Whether to add SOVERSION to the shared objects" ON) - option(BUILD_LIBTORCH_CPU_WITH_DEBUG "Enable RelWithDebInfo for libtorch_cpu target only" OFF) - cmake_dependent_option(USE_CCACHE "Attempt using CCache to wrap the compilation" ON "UNIX" OFF) - option(WERROR "Build with -Werror supported by the compiler" OFF) --- -2.42.1 - diff --git a/0001-reenable-foxi-linking.patch b/0001-reenable-foxi-linking.patch new file mode 100644 index 0000000..8e39795 --- /dev/null +++ b/0001-reenable-foxi-linking.patch @@ -0,0 +1,25 @@ +From 58ccda271e8f51c3fa5b7518cf6ee52ce204fd37 Mon Sep 17 00:00:00 2001 +From: Tom Rix +Date: Thu, 22 Feb 2024 09:28:11 -0500 +Subject: [PATCH] reenable foxi linking + +--- + cmake/Dependencies.cmake | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake +index 42fbf80f6e8..bc3a2dc6fee 100644 +--- a/cmake/Dependencies.cmake ++++ b/cmake/Dependencies.cmake +@@ -1604,7 +1604,7 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX) + list(APPEND Caffe2_DEPENDENCY_LIBS onnx_proto onnx) + endif() + # include_directories(${FOXI_INCLUDE_DIRS}) +-# list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader) ++ list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader) + # Recover the build shared libs option. + set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS}) + endif() +-- +2.43.2 + diff --git a/next/0001-silence-an-assert.patch b/0001-silence-an-assert.patch similarity index 100% rename from next/0001-silence-an-assert.patch rename to 0001-silence-an-assert.patch diff --git a/0001-torch-sane-version.patch b/0001-torch-sane-version.patch deleted file mode 100644 index cb1211a..0000000 --- a/0001-torch-sane-version.patch +++ /dev/null @@ -1,32 +0,0 @@ -From c47c6e202d60ccac15aa36698bd4788415a9416b Mon Sep 17 00:00:00 2001 -From: Tom Rix -Date: Sat, 25 Nov 2023 16:46:17 -0500 -Subject: [PATCH] torch sane version - ---- - tools/generate_torch_version.py | 1 + - version.txt | 2 +- - 2 files changed, 2 insertions(+), 1 deletion(-) - -diff --git a/tools/generate_torch_version.py b/tools/generate_torch_version.py -index d90d3646ab..11d5bbeba5 100644 ---- a/tools/generate_torch_version.py -+++ b/tools/generate_torch_version.py -@@ -42,6 +42,7 @@ def get_tag(pytorch_root: Union[str, Path]) -> str: - def get_torch_version(sha: Optional[str] = None) -> str: - pytorch_root = Path(__file__).parent.parent - version = open(pytorch_root / "version.txt").read().strip() -+ return version - - if os.getenv("PYTORCH_BUILD_VERSION"): - assert os.getenv("PYTORCH_BUILD_NUMBER") is not None -diff --git a/version.txt b/version.txt -index ecaf4eea7c..7ec1d6db40 100644 ---- a/version.txt -+++ b/version.txt -@@ -1,1 +1,1 @@ --2.1.0a0 -+2.1.2 --- -2.42.1 - diff --git a/next/0001-use-any-hip.patch b/0001-use-any-hip.patch similarity index 100% rename from next/0001-use-any-hip.patch rename to 0001-use-any-hip.patch diff --git a/0002-Regenerate-flatbuffer-header.patch b/0002-Regenerate-flatbuffer-header.patch deleted file mode 100644 index 662a7c5..0000000 --- a/0002-Regenerate-flatbuffer-header.patch +++ /dev/null @@ -1,39 +0,0 @@ -From 587a8b10bd3f7a68275356ee6eb6bb43ed711ba2 Mon Sep 17 00:00:00 2001 -From: Tom Rix -Date: Fri, 29 Sep 2023 06:19:29 -0700 -Subject: [PATCH 2/6] Regenerate flatbuffer header - -For this error -torch/csrc/jit/serialization/mobile_bytecode_generated.h:12:41: -error: static assertion failed: Non-compatible flatbuffers version included - 12 | FLATBUFFERS_VERSION_MINOR == 3 && - -PyTorch is expecting 23.3.3, what f38 has -Rawhide is at 23.5.26 - -Regenerate with -flatc --cpp --gen-mutable --no-prefix --scoped-enums mobile_bytecode.fbs - -Signed-off-by: Tom Rix ---- - torch/csrc/jit/serialization/mobile_bytecode_generated.h | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/torch/csrc/jit/serialization/mobile_bytecode_generated.h b/torch/csrc/jit/serialization/mobile_bytecode_generated.h -index cffe8bc7a6..83575e4c19 100644 ---- a/torch/csrc/jit/serialization/mobile_bytecode_generated.h -+++ b/torch/csrc/jit/serialization/mobile_bytecode_generated.h -@@ -9,8 +9,8 @@ - // Ensure the included flatbuffers.h is the same version as when this file was - // generated, otherwise it may not be compatible. - static_assert(FLATBUFFERS_VERSION_MAJOR == 23 && -- FLATBUFFERS_VERSION_MINOR == 3 && -- FLATBUFFERS_VERSION_REVISION == 3, -+ FLATBUFFERS_VERSION_MINOR == 5 && -+ FLATBUFFERS_VERSION_REVISION == 26, - "Non-compatible flatbuffers version included"); - - namespace torch { --- -2.41.0 - diff --git a/0003-Stub-in-kineto-ActivityType.patch b/0003-Stub-in-kineto-ActivityType.patch deleted file mode 100644 index 0823acb..0000000 --- a/0003-Stub-in-kineto-ActivityType.patch +++ /dev/null @@ -1,73 +0,0 @@ -From bb52aeacc6dfab2355249b7b5beb72c2761ec319 Mon Sep 17 00:00:00 2001 -From: Tom Rix -Date: Fri, 29 Sep 2023 06:25:23 -0700 -Subject: [PATCH 3/6] Stub in kineto ActivityType - -There is an error with kineto is not used, the shim still -requires the ActivityTYpe.h header to get the enum Activity type. -So cut-n-paste just enough of the header in to do this. - -Signed-off-by: Tom Rix ---- - torch/csrc/profiler/kineto_shim.h | 44 +++++++++++++++++++++++++++++++ - 1 file changed, 44 insertions(+) - -diff --git a/torch/csrc/profiler/kineto_shim.h b/torch/csrc/profiler/kineto_shim.h -index 2a410719a1..7d6525befd 100644 ---- a/torch/csrc/profiler/kineto_shim.h -+++ b/torch/csrc/profiler/kineto_shim.h -@@ -12,7 +12,51 @@ - #undef USE_KINETO - #endif - -+#ifdef USE_KINETO - #include -+#else -+namespace libkineto { -+// copied from header -+/* -+ * Copyright (c) Meta Platforms, Inc. and affiliates. -+ * All rights reserved. -+ * -+ * This source code is licensed under the BSD-style license found in the -+ * LICENSE file in the root directory of this source tree. -+ */ -+ -+// Note : All activity types are not enabled by default. Please add them -+// at correct position in the enum -+enum class ActivityType { -+ // Activity types enabled by default -+ CPU_OP = 0, // cpu side ops -+ USER_ANNOTATION, -+ GPU_USER_ANNOTATION, -+ GPU_MEMCPY, -+ GPU_MEMSET, -+ CONCURRENT_KERNEL, // on-device kernels -+ EXTERNAL_CORRELATION, -+ CUDA_RUNTIME, // host side cuda runtime events -+ CUDA_DRIVER, // host side cuda driver events -+ CPU_INSTANT_EVENT, // host side point-like events -+ PYTHON_FUNCTION, -+ OVERHEAD, // CUPTI induced overhead events sampled from its overhead API. -+ -+ // Optional Activity types -+ CUDA_SYNC, // synchronization events between runtime and kernels -+ GLOW_RUNTIME, // host side glow runtime events -+ MTIA_RUNTIME, // host side MTIA runtime events -+ CUDA_PROFILER_RANGE, // CUPTI Profiler range for performance metrics -+ MTIA_CCP_EVENTS, // MTIA ondevice CCP events -+ HPU_OP, // HPU host side runtime event -+ XPU_RUNTIME, // host side xpu runtime events -+ -+ ENUM_COUNT, // This is to add buffer and not used for any profiling logic. Add your new type before it. -+ OPTIONAL_ACTIVITY_TYPE_START = CUDA_SYNC, -+}; -+} -+ -+#endif - - #include - #include --- -2.41.0 - diff --git a/0004-torch-python-3.12-changes.patch b/0004-torch-python-3.12-changes.patch deleted file mode 100644 index bdcec7f..0000000 --- a/0004-torch-python-3.12-changes.patch +++ /dev/null @@ -1,45 +0,0 @@ -From fcf3cd70229cdc729d05ddab081ac886c9db6bd7 Mon Sep 17 00:00:00 2001 -From: Tom Rix -Date: Fri, 29 Sep 2023 13:58:28 -0700 -Subject: [PATCH] torch python 3.12 changes - -Signed-off-by: Tom Rix ---- - functorch/csrc/dim/dim.cpp | 6 ++++++ - torch/csrc/dynamo/cpython_defs.h | 2 +- - 2 files changed, 7 insertions(+), 1 deletion(-) - -diff --git a/functorch/csrc/dim/dim.cpp b/functorch/csrc/dim/dim.cpp -index b611dc3e8c2..c7009478aee 100644 ---- a/functorch/csrc/dim/dim.cpp -+++ b/functorch/csrc/dim/dim.cpp -@@ -10,7 +10,13 @@ - // Many APIs have changed/don't exist anymore - #if IS_PYTHON_3_12_PLUS - -+#include "dim.h" -+ - // Re-enable this some day -+PyObject* Dim_init() { -+ PyErr_SetString(PyExc_RuntimeError, "First class dim doesn't work with python 3.12"); -+ return nullptr; -+} - - #else - -diff --git a/torch/csrc/dynamo/cpython_defs.h b/torch/csrc/dynamo/cpython_defs.h -index f0a0e1a88e2..f58becd246e 100644 ---- a/torch/csrc/dynamo/cpython_defs.h -+++ b/torch/csrc/dynamo/cpython_defs.h -@@ -6,7 +6,7 @@ - // should go in cpython_defs.c. Copying is required when, e.g., - // we need to call internal CPython functions that are not exposed. - --#if IS_PYTHON_3_11_PLUS && !(IS_PYTHON_3_12_PLUS) -+#if IS_PYTHON_3_11_PLUS - - #include - --- -2.43.0 - diff --git a/0005-disable-submodule-search.patch b/0005-disable-submodule-search.patch deleted file mode 100644 index 1b5509f..0000000 --- a/0005-disable-submodule-search.patch +++ /dev/null @@ -1,25 +0,0 @@ -From fc0d4ce06fecbd2bcd10fb13c515dc6625260870 Mon Sep 17 00:00:00 2001 -From: Tom Rix -Date: Fri, 29 Sep 2023 17:21:13 -0700 -Subject: [PATCH 5/6] disable submodule search - ---- - setup.py | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/setup.py b/setup.py -index 17bf16b89a..b8c8ae5506 100644 ---- a/setup.py -+++ b/setup.py -@@ -452,7 +452,7 @@ def mirror_files_into_torchgen(): - def build_deps(): - report("-- Building version " + version) - -- check_submodules() -+ # check_submodules() - check_pydep("yaml", "pyyaml") - - build_caffe2( --- -2.41.0 - diff --git a/README.NVIDIA b/README.NVIDIA new file mode 100644 index 0000000..b927f47 --- /dev/null +++ b/README.NVIDIA @@ -0,0 +1,15 @@ +Some help for building this package for NVIDIA/CUDA + +Review NVIDIA's documenation +https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html + +Review PyTorch documentation +https://github.com/pytorch/pytorch#from-source + +Some convience strings to cut-n-paste + +F39 +dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/fedora39/x86_64/cuda-fedora39.repo + +Building is local. +Build machine has a supported GPU, the drivers are loaded and CUDA SDK is installed. diff --git a/license.txt b/license.txt new file mode 100644 index 0000000..0503793 --- /dev/null +++ b/license.txt @@ -0,0 +1,350 @@ +# +# License Details +# Main license BSD 3-Clause +# +# Apache-2.0 +# android/libs/fbjni/LICENSE +# android/libs/fbjni/CMakeLists.txt +# android/libs/fbjni/build.gradle +# android/libs/fbjni/cxx/fbjni/ByteBuffer.cpp +# android/libs/fbjni/cxx/fbjni/ByteBuffer.h +# android/libs/fbjni/cxx/fbjni/Context.h +# android/libs/fbjni/cxx/fbjni/File.h +# android/libs/fbjni/cxx/fbjni/JThread.h +# android/libs/fbjni/cxx/fbjni/NativeRunnable.h +# android/libs/fbjni/cxx/fbjni/OnLoad.cpp +# android/libs/fbjni/cxx/fbjni/ReadableByteChannel.cpp +# android/libs/fbjni/cxx/fbjni/ReadableByteChannel.h +# android/libs/fbjni/cxx/fbjni/detail/Boxed.h +# android/libs/fbjni/cxx/fbjni/detail/Common.h +# android/libs/fbjni/cxx/fbjni/detail/CoreClasses-inl.h +# android/libs/fbjni/cxx/fbjni/detail/CoreClasses.h +# android/libs/fbjni/cxx/fbjni/detail/Environment.cpp +# android/libs/fbjni/cxx/fbjni/detail/Environment.h +# android/libs/fbjni/cxx/fbjni/detail/Exceptions.cpp +# android/libs/fbjni/cxx/fbjni/detail/Exceptions.h +# android/libs/fbjni/cxx/fbjni/detail/FbjniApi.h +# android/libs/fbjni/cxx/fbjni/detail/Hybrid.cpp +# android/libs/fbjni/cxx/fbjni/detail/Hybrid.h +# android/libs/fbjni/cxx/fbjni/detail/Iterator-inl.h +# android/libs/fbjni/cxx/fbjni/detail/Iterator.h +# android/libs/fbjni/cxx/fbjni/detail/JWeakReference.h +# android/libs/fbjni/cxx/fbjni/detail/Log.h +# android/libs/fbjni/cxx/fbjni/detail/Meta-forward.h +# android/libs/fbjni/cxx/fbjni/detail/Meta-inl.h +# android/libs/fbjni/cxx/fbjni/detail/Meta.cpp +# android/libs/fbjni/cxx/fbjni/detail/Meta.h +# android/libs/fbjni/cxx/fbjni/detail/MetaConvert.h +# android/libs/fbjni/cxx/fbjni/detail/ReferenceAllocators-inl.h +# android/libs/fbjni/cxx/fbjni/detail/ReferenceAllocators.h +# android/libs/fbjni/cxx/fbjni/detail/References-forward.h +# android/libs/fbjni/cxx/fbjni/detail/References-inl.h +# android/libs/fbjni/cxx/fbjni/detail/References.cpp +# android/libs/fbjni/cxx/fbjni/detail/References.h +# android/libs/fbjni/cxx/fbjni/detail/Registration-inl.h +# android/libs/fbjni/cxx/fbjni/detail/Registration.h +# android/libs/fbjni/cxx/fbjni/detail/SimpleFixedString.h +# android/libs/fbjni/cxx/fbjni/detail/TypeTraits.h +# android/libs/fbjni/cxx/fbjni/detail/utf8.cpp +# android/libs/fbjni/cxx/fbjni/detail/utf8.h +# android/libs/fbjni/cxx/fbjni/fbjni.cpp +# android/libs/fbjni/cxx/fbjni/fbjni.h +# android/libs/fbjni/cxx/lyra/cxa_throw.cpp +# android/libs/fbjni/cxx/lyra/lyra.cpp +# android/libs/fbjni/cxx/lyra/lyra.h +# android/libs/fbjni/cxx/lyra/lyra_breakpad.cpp +# android/libs/fbjni/cxx/lyra/lyra_exceptions.cpp +# android/libs/fbjni/cxx/lyra/lyra_exceptions.h +# android/libs/fbjni/gradle.properties +# android/libs/fbjni/gradle/android-tasks.gradle +# android/libs/fbjni/gradle/release.gradle +# android/libs/fbjni/gradlew +# android/libs/fbjni/gradlew.bat +# android/libs/fbjni/host.gradle +# android/libs/fbjni/java/com/facebook/jni/CppException.java +# android/libs/fbjni/java/com/facebook/jni/CppSystemErrorException.java +# android/libs/fbjni/java/com/facebook/jni/DestructorThread.java +# android/libs/fbjni/java/com/facebook/jni/HybridClassBase.java +# android/libs/fbjni/java/com/facebook/jni/HybridData.java +# android/libs/fbjni/java/com/facebook/jni/IteratorHelper.java +# android/libs/fbjni/java/com/facebook/jni/MapIteratorHelper.java +# android/libs/fbjni/java/com/facebook/jni/NativeRunnable.java +# android/libs/fbjni/java/com/facebook/jni/ThreadScopeSupport.java +# android/libs/fbjni/java/com/facebook/jni/UnknownCppException.java +# android/libs/fbjni/java/com/facebook/jni/annotations/DoNotStrip.java +# android/libs/fbjni/scripts/android-setup.sh +# android/libs/fbjni/scripts/run-host-tests.sh +# android/libs/fbjni/settings.gradle +# android/libs/fbjni/test/BaseFBJniTests.java +# android/libs/fbjni/test/ByteBufferTests.java +# android/libs/fbjni/test/DocTests.java +# android/libs/fbjni/test/FBJniTests.java +# android/libs/fbjni/test/HybridTests.java +# android/libs/fbjni/test/IteratorTests.java +# android/libs/fbjni/test/PrimitiveArrayTests.java +# android/libs/fbjni/test/ReadableByteChannelTests.java +# android/libs/fbjni/test/jni/CMakeLists.txt +# android/libs/fbjni/test/jni/byte_buffer_tests.cpp +# android/libs/fbjni/test/jni/doc_tests.cpp +# android/libs/fbjni/test/jni/expect.h +# android/libs/fbjni/test/jni/fbjni_onload.cpp +# android/libs/fbjni/test/jni/fbjni_tests.cpp +# android/libs/fbjni/test/jni/hybrid_tests.cpp +# android/libs/fbjni/test/jni/inter_dso_exception_test_1/Test.cpp +# android/libs/fbjni/test/jni/inter_dso_exception_test_1/Test.h +# android/libs/fbjni/test/jni/inter_dso_exception_test_2/Test.cpp +# android/libs/fbjni/test/jni/inter_dso_exception_test_2/Test.h +# android/libs/fbjni/test/jni/iterator_tests.cpp +# android/libs/fbjni/test/jni/modified_utf8_test.cpp +# android/libs/fbjni/test/jni/no_rtti.cpp +# android/libs/fbjni/test/jni/no_rtti.h +# android/libs/fbjni/test/jni/primitive_array_tests.cpp +# android/libs/fbjni/test/jni/readable_byte_channel_tests.cpp +# android/libs/fbjni/test/jni/simple_fixed_string_tests.cpp +# android/libs/fbjni/test/jni/utf16toUTF8_test.cpp +# android/pytorch_android/host/build.gradle +# aten/src/ATen/cuda/llvm_basic.cpp +# aten/src/ATen/cuda/llvm_complex.cpp +# aten/src/ATen/native/quantized/cpu/qnnpack/confu.yaml +# aten/src/ATen/native/quantized/cpu/qnnpack/src/requantization/gemmlowp-neon.c +# aten/src/ATen/native/quantized/cpu/qnnpack/src/requantization/gemmlowp-scalar.h +# aten/src/ATen/native/quantized/cpu/qnnpack/src/requantization/gemmlowp-sse.h +# aten/src/ATen/nnapi/codegen.py +# aten/src/ATen/nnapi/NeuralNetworks.h +# aten/src/ATen/nnapi/nnapi_wrapper.cpp +# aten/src/ATen/nnapi/nnapi_wrapper.h +# binaries/benchmark_args.h +# binaries/benchmark_helper.cc +# binaries/benchmark_helper.h +# binaries/compare_models_torch.cc +# binaries/convert_and_benchmark.cc +# binaries/convert_caffe_image_db.cc +# binaries/convert_db.cc +# binaries/convert_encoded_to_raw_leveldb.cc +# binaries/convert_image_to_tensor.cc +# binaries/core_overhead_benchmark.cc +# binaries/core_overhead_benchmark_gpu.cc +# binaries/db_throughput.cc +# binaries/dump_operator_names.cc +# binaries/inspect_gpu.cc +# binaries/load_benchmark_torch.cc +# binaries/make_cifar_db.cc +# binaries/make_image_db.cc +# binaries/make_mnist_db.cc +# binaries/optimize_for_mobile.cc +# binaries/parallel_info.cc +# binaries/predictor_verifier.cc +# binaries/print_core_object_sizes_gpu.cc +# binaries/print_registered_core_operators.cc +# binaries/run_plan.cc +# binaries/run_plan_mpi.cc +# binaries/speed_benchmark.cc +# binaries/speed_benchmark_torch.cc +# binaries/split_db.cc +# binaries/tsv_2_proto.cc +# binaries/tutorial_blob.cc +# binaries/zmq_feeder.cc +# c10/test/util/small_vector_test.cpp +# c10/util/FunctionRef.h +# c10/util/SmallVector.cpp +# c10/util/SmallVector.h +# c10/util/llvmMathExtras.h +# c10/util/sparse_bitset.h +# caffe2/contrib/aten/gen_op.py +# caffe2/contrib/fakelowp/fp16_fc_acc_op.cc +# caffe2/contrib/fakelowp/fp16_fc_acc_op.h +# caffe2/contrib/gloo/allgather_ops.cc +# caffe2/contrib/gloo/allgather_ops.h +# caffe2/contrib/gloo/reduce_scatter_ops.cc +# caffe2/contrib/gloo/reduce_scatter_ops.h +# caffe2/core/hip/common_miopen.h +# caffe2/core/hip/common_miopen.hip +# caffe2/core/net_async_tracing.cc +# caffe2/core/net_async_tracing.h +# caffe2/core/net_async_tracing_test.cc +# caffe2/experiments/operators/fully_connected_op_decomposition.cc +# caffe2/experiments/operators/fully_connected_op_decomposition.h +# caffe2/experiments/operators/fully_connected_op_decomposition_gpu.cc +# caffe2/experiments/operators/fully_connected_op_prune.cc +# caffe2/experiments/operators/fully_connected_op_prune.h +# caffe2/experiments/operators/fully_connected_op_sparse.cc +# caffe2/experiments/operators/fully_connected_op_sparse.h +# caffe2/experiments/operators/funhash_op.cc +# caffe2/experiments/operators/funhash_op.h +# caffe2/experiments/operators/sparse_funhash_op.cc +# caffe2/experiments/operators/sparse_funhash_op.h +# caffe2/experiments/operators/sparse_matrix_reshape_op.cc +# caffe2/experiments/operators/sparse_matrix_reshape_op.h +# caffe2/experiments/operators/tt_contraction_op.cc +# caffe2/experiments/operators/tt_contraction_op.h +# caffe2/experiments/operators/tt_contraction_op_gpu.cc +# caffe2/experiments/operators/tt_pad_op.cc +# caffe2/experiments/operators/tt_pad_op.h +# caffe2/experiments/python/SparseTransformer.py +# caffe2/experiments/python/convnet_benchmarks.py +# caffe2/experiments/python/device_reduce_sum_bench.py +# caffe2/experiments/python/funhash_op_test.py +# caffe2/experiments/python/net_construct_bench.py +# caffe2/experiments/python/sparse_funhash_op_test.py +# caffe2/experiments/python/sparse_reshape_op_test.py +# caffe2/experiments/python/tt_contraction_op_test.py +# caffe2/experiments/python/tt_pad_op_test.py +# caffe2/mobile/contrib/libvulkan-stub/include/vulkan/vk_platform.h +# caffe2/mobile/contrib/libvulkan-stub/include/vulkan/vulkan.h +# caffe2/mobile/contrib/nnapi/NeuralNetworks.h +# caffe2/mobile/contrib/nnapi/dlnnapi.c +# caffe2/mobile/contrib/nnapi/nnapi_benchmark.cc +# caffe2/observers/profile_observer.cc +# caffe2/observers/profile_observer.h +# caffe2/operators/hip/conv_op_miopen.hip +# caffe2/operators/hip/local_response_normalization_op_miopen.hip +# caffe2/operators/hip/pool_op_miopen.hip +# caffe2/operators/hip/spatial_batch_norm_op_miopen.hip +# caffe2/operators/quantized/int8_utils.h +# caffe2/operators/stump_func_op.cc +# caffe2/operators/stump_func_op.cu +# caffe2/operators/stump_func_op.h +# caffe2/operators/unique_ops.cc +# caffe2/operators/unique_ops.cu +# caffe2/operators/unique_ops.h +# caffe2/operators/upsample_op.cc +# caffe2/operators/upsample_op.h +# caffe2/opt/fusion.h +# caffe2/python/layers/label_smooth.py +# caffe2/python/mint/static/css/simple-sidebar.css +# caffe2/python/modeling/get_entry_from_blobs.py +# caffe2/python/modeling/get_entry_from_blobs_test.py +# caffe2/python/modeling/gradient_clipping_test.py +# caffe2/python/operator_test/unique_ops_test.py +# caffe2/python/operator_test/upsample_op_test.py +# caffe2/python/operator_test/weight_scale_test.py +# caffe2/python/pybind_state_int8.cc +# caffe2/python/transformations.py +# caffe2/python/transformations_test.py +# caffe2/quantization/server/batch_matmul_dnnlowp_op.cc +# caffe2/quantization/server/batch_matmul_dnnlowp_op.h +# caffe2/quantization/server/compute_equalization_scale_test.py +# caffe2/quantization/server/elementwise_linear_dnnlowp_op.cc +# caffe2/quantization/server/elementwise_linear_dnnlowp_op.h +# caffe2/quantization/server/elementwise_sum_relu_op.cc +# caffe2/quantization/server/fb_fc_packed_op.cc +# caffe2/quantization/server/fb_fc_packed_op.h +# caffe2/quantization/server/fbgemm_fp16_pack_op.cc +# caffe2/quantization/server/fbgemm_fp16_pack_op.h +# caffe2/quantization/server/fully_connected_fake_lowp_op.cc +# caffe2/quantization/server/fully_connected_fake_lowp_op.h +# caffe2/quantization/server/int8_gen_quant_params_min_max_test.py +# caffe2/quantization/server/int8_gen_quant_params_test.py +# caffe2/quantization/server/int8_quant_scheme_blob_fill_test.py +# caffe2/quantization/server/spatial_batch_norm_relu_op.cc +# caffe2/sgd/weight_scale_op.cc +# caffe2/sgd/weight_scale_op.h +# caffe2/utils/bench_utils.h +# functorch/examples/maml_omniglot/maml-omniglot-higher.py +# functorch/examples/maml_omniglot/maml-omniglot-ptonly.py +# functorch/examples/maml_omniglot/maml-omniglot-transforms.py +# functorch/examples/maml_omniglot/support/omniglot_loaders.py +# modules/detectron/group_spatial_softmax_op.cc +# modules/detectron/group_spatial_softmax_op.cu +# modules/detectron/group_spatial_softmax_op.h +# modules/detectron/ps_roi_pool_op.cc +# modules/detectron/ps_roi_pool_op.h +# modules/detectron/roi_pool_f_op.cc +# modules/detectron/roi_pool_f_op.cu +# modules/detectron/roi_pool_f_op.h +# modules/detectron/sample_as_op.cc +# modules/detectron/sample_as_op.cu +# modules/detectron/sample_as_op.h +# modules/detectron/select_smooth_l1_loss_op.cc +# modules/detectron/select_smooth_l1_loss_op.cu +# modules/detectron/select_smooth_l1_loss_op.h +# modules/detectron/sigmoid_cross_entropy_loss_op.cc +# modules/detectron/sigmoid_cross_entropy_loss_op.cu +# modules/detectron/sigmoid_cross_entropy_loss_op.h +# modules/detectron/sigmoid_focal_loss_op.cc +# modules/detectron/sigmoid_focal_loss_op.cu +# modules/detectron/sigmoid_focal_loss_op.h +# modules/detectron/smooth_l1_loss_op.cc +# modules/detectron/smooth_l1_loss_op.cu +# modules/detectron/smooth_l1_loss_op.h +# modules/detectron/softmax_focal_loss_op.cc +# modules/detectron/softmax_focal_loss_op.cu +# modules/detectron/softmax_focal_loss_op.h +# modules/detectron/spatial_narrow_as_op.cc +# modules/detectron/spatial_narrow_as_op.cu +# modules/detectron/spatial_narrow_as_op.h +# modules/detectron/upsample_nearest_op.cc +# modules/detectron/upsample_nearest_op.h +# modules/module_test/module_test_dynamic.cc +# modules/rocksdb/rocksdb.cc +# scripts/apache_header.txt +# scripts/apache_python.txt +# torch/distributions/lkj_cholesky.py +# +# Apache 2.0 AND BSD 2-Clause +# caffe2/operators/deform_conv_op.cu +# +# Apache 2.0 AND BSD 2-Clause AND MIT +# modules/detectron/ps_roi_pool_op.cu +# +# Apache 2.0 AND BSD 2-Clause +# modules/detectron/upsample_nearest_op.cu +# +# BSD 0-Clause +# torch/csrc/utils/pythoncapi_compat.h +# +# BSD 2-Clause +# aten/src/ATen/native/quantized/cpu/qnnpack/deps/clog/LICENSE +# caffe2/image/transform_gpu.cu +# caffe2/image/transform_gpu.h +# +# BSL-1.0 +# c10/util/flat_hash_map.h +# c10/util/hash.h +# c10/util/Optional.h +# c10/util/order_preserving_flat_hash_map.h +# c10/util/strong_type.h +# c10/util/variant.h +# +# GPL-3.0-or-later AND MIT +# c10/util/reverse_iterator.h +# +# Khronos +# These files are for OpenCL, an unused option +# Replace them later, as-needed with the opencl-headers.rpm +# +# caffe2/contrib/opencl/OpenCL/cl.hpp +# caffe2/mobile/contrib/libopencl-stub/include/CL/cl.h +# caffe2/mobile/contrib/libopencl-stub/include/CL/cl.hpp +# caffe2/mobile/contrib/libopencl-stub/include/CL/cl_ext.h +# caffe2/mobile/contrib/libopencl-stub/include/CL/cl_gl.h +# caffe2/mobile/contrib/libopencl-stub/include/CL/cl_gl_ext.h +# caffe2/mobile/contrib/libopencl-stub/include/CL/cl_platform.h +# caffe2/mobile/contrib/libopencl-stub/include/CL/opencl.h +# +# MIT +# android/libs/fbjni/googletest-CMakeLists.txt.in +# c10/util/BFloat16-math.h +# caffe2/mobile/contrib/libvulkan-stub/include/libvulkan-stub.h +# caffe2/mobile/contrib/libvulkan-stub/src/libvulkan-stub.c +# caffe2/onnx/torch_ops/defs.cc +# cmake/Modules_CUDA_fix/upstream/FindCUDA/make2cmake.cmake +# cmake/Modules_CUDA_fix/upstream/FindCUDA/parse_cubin.cmake +# cmake/Modules_CUDA_fix/upstream/FindCUDA/run_nvcc.cmake +# functorch/einops/_parsing.py +# test/functorch/test_parsing.py +# test/functorch/test_rearrange.py +# third_party/miniz-2.1.0/LICENSE +# third_party/miniz-2.1.0/miniz.c +# tools/coverage_plugins_package/setup.py +# torch/_appdirs.py +# torch/utils/hipify/hipify_python.py +# +# Public Domain +# caffe2/mobile/contrib/libopencl-stub/LICENSE +# caffe2/utils/murmur_hash3.cc +# caffe2/utils/murmur_hash3.h +# +# Zlib +# aten/src/ATen/native/cpu/avx_mathfun.h + diff --git a/next/0001-disable-submodule-search.patch b/next/0001-disable-submodule-search.patch deleted file mode 100644 index b830fa6..0000000 --- a/next/0001-disable-submodule-search.patch +++ /dev/null @@ -1,25 +0,0 @@ -From e0b0ea90ecc0dbefc6aef2650e88ba88260935b9 Mon Sep 17 00:00:00 2001 -From: Tom Rix -Date: Fri, 29 Sep 2023 17:21:13 -0700 -Subject: [PATCH] disable submodule search - ---- - setup.py | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/setup.py b/setup.py -index 0fd886d945..e397df8fb6 100644 ---- a/setup.py -+++ b/setup.py -@@ -458,7 +458,7 @@ def mirror_files_into_torchgen(): - def build_deps(): - report("-- Building version " + version) - -- check_submodules() -+ # check_submodules() - check_pydep("yaml", "pyyaml") - - build_caffe2( --- -2.43.0 - diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..9508ad0 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,154 @@ +[build-system] +requires = [ + "setuptools", + "wheel", + "astunparse", + "numpy", + "ninja", + "pyyaml", + "cmake", + "typing-extensions", + "requests", +] +# Use legacy backend to import local packages in setup.py +build-backend = "setuptools.build_meta:__legacy__" + + +[tool.black] +# Uncomment if pyproject.toml worked fine to ensure consistency with flake8 +# line-length = 120 +target-version = ["py38", "py39", "py310", "py311"] + + +[tool.ruff] +target-version = "py38" + +# NOTE: Synchoronize the ignores with .flake8 +ignore = [ + # these ignores are from flake8-bugbear; please fix! + "B007", "B008", "B017", + "B018", # Useless expression + "B019", + "B023", + "B028", # No explicit `stacklevel` keyword argument found + "B904", + "E402", + "C408", # C408 ignored because we like the dict keyword argument syntax + "E501", # E501 is not flexible enough, we're using B950 instead + "E721", + "E731", # Assign lambda expression + "E741", + "EXE001", + "F405", + "F841", + # these ignores are from flake8-logging-format; please fix! + "G101", + # these ignores are from ruff NPY; please fix! + "NPY002", + # these ignores are from ruff PERF; please fix! + "PERF203", + "PERF401", + "PERF403", + # these ignores are from PYI; please fix! + "PYI019", + "PYI024", + "PYI036", + "PYI041", + "PYI056", + "SIM102", "SIM103", "SIM112", # flake8-simplify code styles + "SIM105", # these ignores are from flake8-simplify. please fix or ignore with commented reason + "SIM108", + "SIM110", + "SIM114", # Combine `if` branches using logical `or` operator + "SIM115", + "SIM116", # Disable Use a dictionary instead of consecutive `if` statements + "SIM117", + "SIM118", + "UP006", # keep-runtime-typing + "UP007", # keep-runtime-typing +] +line-length = 120 +select = [ + "B", + "C4", + "G", + "E", + "EXE", + "F", + "SIM1", + "W", + # Not included in flake8 + "NPY", + "PERF", + "PGH004", + "PIE794", + "PIE800", + "PIE804", + "PIE807", + "PIE810", + "PLC0131", # type bivariance + "PLC0132", # type param mismatch + "PLC0205", # string as __slots__ + "PLE", + "PLR0133", # constant comparison + "PLR0206", # property with params + "PLR1722", # use sys exit + "PLW0129", # assert on string literal + "PLW0406", # import self + "PLW0711", # binary op exception + "PLW1509", # preexec_fn not safe with threads + "PLW3301", # nested min max + "PT006", # TODO: enable more PT rules + "PT022", + "PT023", + "PT024", + "PT025", + "PT026", + "PYI", + "RUF008", # mutable dataclass default + "RUF015", # access first ele in constant time + "RUF016", # type error non-integer index + "RUF017", + "TRY200", + "TRY302", + "UP", +] + +[tool.ruff.per-file-ignores] +"__init__.py" = [ + "F401", +] +"test/typing/reveal/**" = [ + "F821", +] +"test/torch_np/numpy_tests/**" = [ + "F821", +] +"test/jit/**" = [ + "PLR0133", # tests require this for JIT + "PYI", + "RUF015", + "UP", # We don't want to modify the jit test as they test specify syntax +] +"test/test_jit.py" = [ + "PLR0133", # tests require this for JIT + "PYI", + "RUF015", + "UP", # We don't want to modify the jit test as they test specify syntax +] + +"torch/onnx/**" = [ + "UP037", # ONNX does runtime type checking +] + +"torchgen/api/types/__init__.py" = [ + "F401", + "F403", +] +"torchgen/executorch/api/types/__init__.py" = [ + "F401", + "F403", +] +"torch/utils/collect_env.py" = [ + "UP", # collect_env.py needs to work with older versions of Python +] diff --git a/python-torch.spec b/python-torch.spec index 5f4e2e0..6f102ca 100644 --- a/python-torch.spec +++ b/python-torch.spec @@ -6,14 +6,13 @@ # So pre releases can be tried %bcond_with gitcommit %if %{with gitcommit} -# The top of tree ~2/6/24 -%global commit0 064610d8ac53f3f5916a1dc8b43acbeeb2469c11 +# ToT +%global commit0 75b0720a97ac5d82e8a7a1a6ae7c5f7a87d7183d %global shortcommit0 %(c=%{commit0}; echo ${c:0:7}) - -%global pypi_version 2.3.0 +%global date0 20240609 +%global pypi_version 2.4.0 %else -%global pypi_version 2.1.2 - +%global pypi_version 2.3.1 %endif # For -test subpackage @@ -23,161 +22,373 @@ # /usr/lib64/python3.12/site-packages/torch/bin/test_api, test_lazy %bcond_with test -# For testing rocm -# Not viable on 2.1.2, use --with gitcommit +%ifarch x86_64 +# ROCm support came in F40 +%if 0%{?fedora} > 39 +%bcond_without rocm +%else %bcond_with rocm +%endif +%endif +# hipblaslt is in development +%bcond_with hipblaslt +# Which families gpu build for +%global rocm_gpu_list gfx8 gfx9 gfx10 gfx11 gfx90a gfx942 gfx1100 +%global rocm_default_gpu default +%bcond_without rocm_loop -# For testing openmp -%bcond_with openmp - -# For testing caffe2 +# Caffe2 support came in F41 +%if 0%{?fedora} > 40 +%bcond_without caffe2 +%else %bcond_with caffe2 +%endif -# For testing distributed +# Distributed support came in F41 +%if 0%{?fedora} > 40 +%bcond_without distributed +# For testing distributed+rccl etc. +%bcond_without rccl +%bcond_with gloo +%bcond_without mpi +%bcond_without tensorpipe +%else %bcond_with distributed +%endif + +# OpenCV support came in F41 +%if 0%{?fedora} > 40 +%bcond_without opencv +%else +%bcond_with opencv +%endif + +# Do no confuse xnnpack versions +%if 0%{?fedora} > 40 +%bcond_without xnnpack +%else +%bcond_with xnnpack +%endif + +%if 0%{?fedora} > 39 +%bcond_without pthreadpool +%else +%bcond_with pthreadpool +%endif + +%if 0%{?fedora} > 39 +%bcond_without pocketfft +%else +%bcond_with pocketfft +%endif + +# For testing cuda +%ifarch x86_64 +%bcond_with cuda +%endif + +# For testing compat-gcc +%global compat_gcc_major 13 +%bcond_with compat_gcc + +# Disable dwz with rocm because memory can be exhausted +%if %{with rocm} +%define _find_debuginfo_dwz_opts %{nil} +%endif + +%if %{with cuda} +# workaround problems with -pie +%global build_cxxflags %{nil} +%global build_ldflags %{nil} +%endif + +# These came in 2.4 and not yet in Fedora +%if %{with gitcommit} +%bcond_with opentelemetry +%bcond_with httplib +%bcond_with kineto +%else +%bcond_without opentelemetry +%bcond_without httplib +%bcond_without kineto +%endif Name: python-%{pypi_name} +%if %{with gitcommit} +Version: %{pypi_version}^git%{date0}.%{shortcommit0} +%else Version: %{pypi_version} +%endif Release: %autorelease Summary: PyTorch AI/ML framework -# See below for details +# See license.txt for license details License: BSD-3-Clause AND BSD-2-Clause AND 0BSD AND Apache-2.0 AND MIT AND BSL-1.0 AND GPL-3.0-or-later AND Zlib URL: https://pytorch.org/ %if %{with gitcommit} Source0: %{forgeurl}/archive/%{commit0}/pytorch-%{shortcommit0}.tar.gz -Source1: pyproject.toml +Source1000: pyproject.toml %else Source0: %{forgeurl}/releases/download/v%{version}/pytorch-v%{version}.tar.gz %endif +Source1: https://github.com/google/flatbuffers/archive/refs/tags/v23.3.3.tar.gz +Source2: https://github.com/pybind/pybind11/archive/refs/tags/v2.11.1.tar.gz + +%if %{with cuda} +%global cuf_ver 1.1.2 +Source10: https://github.com/NVIDIA/cudnn-frontend/archive/refs/tags/v%{cuf_ver}.tar.gz +%global cul_ver 3.4.1 +Source11: https://github.com/NVIDIA/cutlass/archive/refs/tags/v%{cul_ver}.tar.gz +%endif + +%if %{with tensorpipe} +# Developement on tensorpipe has stopped, repo made read only July 1, 2023, this is the last commit +%global tp_commit 52791a2fd214b2a9dc5759d36725909c1daa7f2e +%global tp_scommit %(c=%{tp_commit}; echo ${c:0:7}) +Source20: https://github.com/pytorch/tensorpipe/archive/%{tp_commit}/tensorpipe-%{tp_scommit}.tar.gz +# The old libuv tensorpipe uses +Source21: https://github.com/libuv/libuv/archive/refs/tags/v1.41.0.tar.gz +# Developement afaik on libnop has stopped, this is the last commit +%global nop_commit 910b55815be16109f04f4180e9adee14fb4ce281 +%global nop_scommit %(c=%{nop_commit}; echo ${c:0:7}) +Source22: https://github.com/google/libnop/archive/%{nop_commit}/libnop-%{nop_scommit}.tar.gz +%endif + +%if %{without xnnpack} +%global xnn_commit fcbf55af6cf28a4627bcd1f703ab7ad843f0f3a2 +%global xnn_scommit %(c=%{xnn_commit}; echo ${c:0:7}) +Source30: https://github.com/google/xnnpack/archive/%{xnn_commit}/xnnpack-%{xnn_scommit}.tar.gz +%global fx_commit 63058eff77e11aa15bf531df5dd34395ec3017c8 +%global fx_scommit %(c=%{fx_commit}; echo ${c:0:7}) +Source31: https://github.com/Maratyszcza/fxdiv/archive/%{fx_commit}/FXdiv-%{fx_scommit}.tar.gz +%global fp_commit 0a92994d729ff76a58f692d3028ca1b64b145d91 +%global fp_scommit %(c=%{fp_commit}; echo ${c:0:7}) +Source32: https://github.com/Maratyszcza/FP16/archive/%{fp_commit}/FP16-%{fp_scommit}.tar.gz +%global ps_commit 072586a71b55b7f8c584153d223e95687148a900 +%global ps_scommit %(c=%{ps_commit}; echo ${c:0:7}) +Source33: https://github.com/Maratyszcza/psimd/archive/%{ps_commit}/psimd-%{ps_scommit}.tar.gz +%endif + +%if %{without pthreadpool} +%global pt_commit 4fe0e1e183925bf8cfa6aae24237e724a96479b8 +%global pt_scommit %(c=%{pt_commit}; echo ${c:0:7}) +Source40: https://github.com/Maratyszcza/pthreadpool/archive/%{pt_commit}/pthreadpool-%{pt_scommit}.tar.gz +%endif + +%if %{without pocketfft} +%global pf_commit 076cb3d2536b7c5d0629093ad886e10ac05f3623 +%global pf_scommit %(c=%{pf_commit}; echo ${c:0:7}) +Source50: https://github.com/mreineck/pocketfft/archive/%{pf_commit}/pocketfft-%{pf_scommit}.tar.gz +%endif %if %{with gitcommit} +%if %{without opentelemetry} +%global ot_ver 1.14.2 +Source60: https://github.com/open-telemetry/opentelemetry-cpp/archive/refs/tags/v%{ot_ver}.tar.gz +%endif + +%if %{without httplib} +%global hl_commit 3b6597bba913d51161383657829b7e644e59c006 +%global hl_scommit %(c=%{hl_commit}; echo ${c:0:7}) +Source70: https://github.com/yhirose/cpp-httplib/archive/%{hl_commit}/cpp-httplib-%{hl_scommit}.tar.gz +%endif + +%if %{without kineto} +%global ki_commit be1317644c68b4bfc4646024a6b221066e430031 +%global ki_scommit %(c=%{ki_commit}; echo ${c:0:7}) +Source80: https://github.com/pytorch/kineto/archive/%{ki_commit}/kineto-%{ki_scommit}.tar.gz +%endif +%endif Patch0: 0001-no-third_party-foxi.patch -Patch1: 0001-no-third_party-fmt.patch -Patch2: 0001-no-third_party-FXdiv.patch + +%if %{without gitcommit} Patch3: 0001-Stub-in-kineto-ActivityType.patch -Patch4: 0001-Regenerate-flatbuffer-header.patch -Patch5: 0001-disable-submodule-search.patch - -%if %{with rocm} -Patch100: 0001-cuda-hip-signatures.patch -Patch101: 0001-silence-an-assert.patch -Patch102: 0001-can-not-use-with-c-files.patch -Patch103: 0001-use-any-hip.patch %endif -%else -# Misc cmake changes that would be difficult to upstream -# * Use the system fmt -# * Remove foxi use -# * Remove warnings/errors for clang 17 -# * fxdiv is not a library on Fedora -Patch0: 0001-Prepare-pytorch-cmake-for-fedora.patch -# Use Fedora's fmt -Patch1: 0002-Regenerate-flatbuffer-header.patch -# https://github.com/pytorch/pytorch/pull/111048 -Patch2: 0003-Stub-in-kineto-ActivityType.patch -# PyTorch has not fully baked 3.12 support because 3.12 is so new -Patch3: 0004-torch-python-3.12-changes.patch -# Short circuit looking for things that can not be downloade by mock -Patch4: 0005-disable-submodule-search.patch -# libtorch_python.so: undefined symbols: Py* -Patch6: 0001-python-torch-link-with-python.patch -# E: unused-direct-shlib-dependency libshm.so.2.1.0 libtorch.so.2.1 -# turn on as-needed globally -Patch7: 0001-python-torch-remove-ubuntu-specific-linking.patch -# Tries to use git and is confused by tarball -Patch8: 0001-torch-sane-version.patch -# libtorch is a wrapper so turn off as-needed locally -# resolves this rpmlint -# E: shared-library-without-dependency-information libtorch.so.2.1.0 -# causes these -# E: unused-direct-shlib-dependency libtorch.so.2.1.0 libtorch_cpu.so.2.1 -# etc. -# As a wrapper library, this should be the expected behavior. -Patch9: 0001-disable-as-needed-for-libtorch.patch +%if %{with caffe2} +Patch6: 0001-reenable-foxi-linking.patch +%endif + +# Bring some patches forward +%if %{without gitcommit} +# https://github.com/pytorch/pytorch/pull/123384 +Patch7: 0001-Reenable-dim-for-python-3.12.patch + +# Dynamo/Inductor on 3.12 +# Fails to apply on 2.3.1 +# Patch8: 0001-dynamo-3.12-enable-dynamo-on-3.12-enable-most-dynamo.patch +%endif + +# ROCm patches +# Patches need to be refactored for ToT +%if %{without gitcommit} +# These are ROCm packages +%if %{without cuda} +# https://github.com/pytorch/pytorch/pull/120551 +Patch100: 0001-Optionally-use-hipblaslt.patch +Patch101: 0001-cuda-hip-signatures.patch +Patch102: 0001-silence-an-assert.patch +Patch103: 0001-can-not-use-with-c-files.patch +Patch104: 0001-use-any-hip.patch +Patch105: 0001-disable-use-of-aotriton.patch +%endif %endif -# Limit to these because they are well behaved with clang ExclusiveArch: x86_64 aarch64 -%global toolchain clang +%global toolchain gcc +%global _lto_cflags %nil -BuildRequires: clang-devel BuildRequires: cmake BuildRequires: cpuinfo-devel BuildRequires: eigen3-devel BuildRequires: fmt-devel -BuildRequires: flatbuffers-devel -BuildRequires: FP16-devel -BuildRequires: fxdiv-devel +%if %{with caffe2} +BuildRequires: foxi-devel +%endif + +%if %{with compat_gcc} +BuildRequires: compat-gcc-%{compat_gcc_major}-c++ +BuildRequires: compat-gcc-%{compat_gcc_major}-gfortran +%else BuildRequires: gcc-c++ BuildRequires: gcc-gfortran +%endif + %if %{with distributed} +%if %{with gloo} BuildRequires: gloo-devel %endif +%endif BuildRequires: ninja-build BuildRequires: onnx-devel -BuildRequires: openblas-devel -BuildRequires: pocketfft-devel -%if %{with caffe2} -BuildRequires: protobuf-lite-devel -%else -BuildRequires: protobuf-devel +BuildRequires: libomp-devel +%if %{with distributed} +%if %{with mpi} +BuildRequires: openmpi-devel %endif -BuildRequires: pthreadpool-devel -BuildRequires: psimd-devel -BuildRequires: python3-numpy -BuildRequires: python3-pybind11 -BuildRequires: python3-pyyaml -BuildRequires: python3-typing-extensions +%endif +BuildRequires: openblas-devel +BuildRequires: protobuf-devel BuildRequires: sleef-devel BuildRequires: valgrind-devel -BuildRequires: xnnpack-devel = 0.0^git20221221.51a9875 + +%if %{with pocketfft} +BuildRequires: pocketfft-devel +%endif + +%if %{with pthreadpool} +BuildRequires: pthreadpool-devel +%endif + +%if %{with xnnpack} +BuildRequires: FP16-devel +BuildRequires: fxdiv-devel +BuildRequires: psimd-devel +BuildRequires: xnnpack-devel = 0.0^git20240229.fcbf55a +%endif BuildRequires: python3-devel BuildRequires: python3dist(filelock) -BuildRequires: python3dist(fsspec) BuildRequires: python3dist(jinja2) BuildRequires: python3dist(networkx) +BuildRequires: python3dist(numpy) +BuildRequires: python3dist(pyyaml) BuildRequires: python3dist(setuptools) -BuildRequires: python3dist(sympy) -BuildRequires: python3dist(typing-extensions) BuildRequires: python3dist(sphinx) +BuildRequires: python3dist(typing-extensions) + +%if 0%{?fedora} +BuildRequires: python3-pybind11 +BuildRequires: python3dist(fsspec) +BuildRequires: python3dist(sympy) +%endif %if %{with rocm} BuildRequires: hipblas-devel +%if %{with hipblaslt} BuildRequires: hipblaslt-devel +%endif BuildRequires: hipcub-devel BuildRequires: hipfft-devel +BuildRequires: hiprand-devel BuildRequires: hipsparse-devel BuildRequires: hipsolver-devel BuildRequires: miopen-devel BuildRequires: rocblas-devel +BuildRequires: rocrand-devel +BuildRequires: rocfft-devel %if %{with distributed} +%if %{with rccl} BuildRequires: rccl-devel %endif +%endif BuildRequires: rocprim-devel BuildRequires: rocm-cmake BuildRequires: rocm-comgr-devel +BuildRequires: rocm-core-devel BuildRequires: rocm-hip-devel BuildRequires: rocm-runtime-devel BuildRequires: rocm-rpm-macros BuildRequires: rocm-rpm-macros-modules BuildRequires: rocthrust-devel +BuildRequires: roctracer-devel Requires: rocm-rpm-macros-modules %endif -%if %{with caffe2} -BuildRequires: foxi-devel +%if %{with opencv} +BuildRequires: opencv-devel %endif %if %{with test} BuildRequires: google-benchmark-devel %endif +Requires: python3dist(dill) + +# For convience +Provides: pytorch + +# Apache-2.0 +Provides: bundled(flatbuffers) = 22.3.3 +# MIT Provides: bundled(miniz) = 2.1.0 +Provides: bundled(pybind11) = 2.11.1 + +%if %{with tensorpipe} +# BSD-3-Clause +Provides: bundled(tensorpipe) +# Apache-2.0 +Provides: bundled(libnop) +# MIT AND CC-BY-4.0 AND ISC AND BSD-2-Clause +Provides: bundled(libuv) = 1.41.0 +%endif + +# These are already in Fedora +%if %{without xnnpack} +# BSD-3-Clause +Provides: bundled(xnnpack) +# MIT +Provides: bundled(FP16) +# MIT +Provides: bundled(fxdiv) +# MIT +Provides: bundled(psimd) +%endif + +%if %{without pthreadpool} +# BSD-2-Clause +Provides: bundled(pthreadpool) +%endif + +%if %{without pocketfft} +# BSD-3-Clause +Provides: bundled(pocketfft) +%endif + +# For convience +Provides: pytorch %description PyTorch is a Python package that provides two high-level features: @@ -200,13 +411,48 @@ PyTorch is a Python package that provides two high-level features: You can reuse your favorite Python packages such as NumPy, SciPy, and Cython to extend PyTorch when needed. -%package -n python3-%{pypi_name}-devel -Summary: Libraries and headers for %{name} -Requires: python3-%{pypi_name}%{?_isa} = %{version}-%{release} +%if %{with rocm} +%package -n python3-%{pypi_name}-rocm-gfx8 +Summary: %{name} for ROCm gfx8 -%description -n python3-%{pypi_name}-devel +%description -n python3-%{pypi_name}-rocm-gfx8 %{summary} +%package -n python3-%{pypi_name}-rocm-gfx9 +Summary: %{name} for ROCm gfx9 + +%description -n python3-%{pypi_name}-rocm-gfx9 +%{summary} + +%package -n python3-%{pypi_name}-rocm-gfx10 +Summary: %{name} for ROCm gfx10 + +%description -n python3-%{pypi_name}-rocm-gfx10 +%{summary} + +%package -n python3-%{pypi_name}-rocm-gfx11 +Summary: %{name} for ROCm gfx11 + +%description -n python3-%{pypi_name}-rocm-gfx11 +%{summary} + +%package -n python3-%{pypi_name}-rocm-gfx90a +Summary: %{name} for ROCm MI200 +%description -n python3-%{pypi_name}-rocm-gfx90a +%{summary} + +%package -n python3-%{pypi_name}-rocm-gfx942 +Summary: %{name} for ROCm MI300 +%description -n python3-%{pypi_name}-rocm-gfx942 +%{summary} + +%package -n python3-%{pypi_name}-rocm-gfx1100 +Summary: %{name} for W7900 +%description -n python3-%{pypi_name}-rocm-gfx1100 +%{summary} + +%endif + %if %{with test} %package -n python3-%{pypi_name}-test Summary: Tests for %{name} @@ -218,29 +464,132 @@ Requires: python3-%{pypi_name}%{?_isa} = %{version}-%{release} %prep + %if %{with gitcommit} %autosetup -p1 -n pytorch-%{commit0} +# Overwrite with a git checkout of the pyproject.toml +cp %{SOURCE1000} . +%else +%autosetup -p1 -n pytorch-v%{version} +%endif # Remove bundled egg-info rm -rf %{pypi_name}.egg-info -# Overwrite with a git checkout of the pyproject.toml -cp %{SOURCE1} . -%if %{with rocm} -# hipify -./tools/amd_build/build_amd.py +tar xf %{SOURCE1} +rm -rf third_party/flatbuffers/* +cp -r flatbuffers-23.3.3/* third_party/flatbuffers/ + +tar xf %{SOURCE2} +rm -rf third_party/pybind11/* +cp -r pybind11-2.11.1/* third_party/pybind11/ + +%if %{with cuda} +tar xf %{SOURCE10} +rm -rf third_party/cudnn_frontend/* +cp -r cudnn-frontend-%{cuf_ver}/* third_party/cudnn_frontend/ +tar xf %{SOURCE11} +rm -rf third_party/cutlass/* +cp -r cutlass-%{cul_ver}/* third_party/cutlass/ %endif -%else -%autosetup -p1 -n pytorch-v%{version} +%if %{with tensorpipe} +tar xf %{SOURCE20} +rm -rf third_party/tensorpipe/* +cp -r tensorpipe-*/* third_party/tensorpipe/ +tar xf %{SOURCE21} +rm -rf third_party/tensorpipe/third_party/libuv/* +cp -r libuv-*/* third_party/tensorpipe/third_party/libuv/ +tar xf %{SOURCE22} +rm -rf third_party/tensorpipe/third_party/libnop/* +cp -r libnop-*/* third_party/tensorpipe/third_party/libnop/ +%endif + +%if %{without xnnpack} +tar xf %{SOURCE30} +rm -rf third_party/XNNPACK/* +cp -r XNNPACK-*/* third_party/XNNPACK/ +tar xf %{SOURCE31} +rm -rf third_party/FXdiv/* +cp -r FXdiv-*/* third_party/FXdiv/ +tar xf %{SOURCE32} +rm -rf third_party/FP16/* +cp -r FP16-*/* third_party/FP16/ +tar xf %{SOURCE33} +rm -rf third_party/psimd/* +cp -r psimd-*/* third_party/psimd/ +%endif + +%if %{without pthreadpool} +tar xf %{SOURCE40} +rm -rf third_party/pthreadpool/* +cp -r pthreadpool-*/* third_party/pthreadpool/ +%endif + +%if %{without pocketfft} +tar xf %{SOURCE50} +rm -rf third_party/pocketfft/* +cp -r pocketfft-*/* third_party/pocketfft/ +%endif + +%if %{with gitcommit} +%if %{without opentelemtry} +tar xf %{SOURCE60} +rm -rf third_party/opentelemetry-cpp/* +cp -r opentelemetry-cpp-*/* third_party/opentelemetry-cpp/ +%endif + +%if %{without httplib} +tar xf %{SOURCE70} +rm -rf third_party/cpp-httplib/* +cp -r cpp-httplib-*/* third_party/cpp-httplib/ +%endif + +%if %{without kineto} +tar xf %{SOURCE80} +rm -rf third_party/kineto/* +cp -r kineto-*/* third_party/kineto/ +%endif +%endif %if %{with opencv} +%if %{without gitcommit} # Reduce requirements, *FOUND is not set sed -i -e 's/USE_OPENCV AND OpenCV_FOUND AND USE_FFMPEG AND FFMPEG_FOUND/USE_OPENCV AND USE_FFMPEG/' caffe2/video/CMakeLists.txt sed -i -e 's/USE_OPENCV AND OpenCV_FOUND/USE_OPENCV/' caffe2/image/CMakeLists.txt sed -i -e 's/STATUS/FATAL/' caffe2/image/CMakeLists.txt -cat caffe2/image/CMakeLists.txt %endif +%endif + +%if 0%{?rhel} +# In RHEL but too old +sed -i -e '/typing-extensions/d' setup.py +# Need to pip these +sed -i -e '/sympy/d' setup.py +sed -i -e '/fsspec/d' setup.py +%endif + +# A new dependency +# Connected to USE_FLASH_ATTENTION, since this is off, do not need it +sed -i -e '/aotriton.cmake/d' cmake/Dependencies.cmake + +# No third_party fmt, use system +sed -i -e 's@fmt::fmt-header-only@fmt@' CMakeLists.txt +sed -i -e 's@fmt::fmt-header-only@fmt@' c10/CMakeLists.txt +sed -i -e 's@fmt::fmt-header-only@fmt@' torch/CMakeLists.txt +sed -i -e 's@fmt::fmt-header-only@fmt@' cmake/Dependencies.cmake +sed -i -e 's@add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)@#add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)@' cmake/Dependencies.cmake +sed -i -e 's@set_target_properties(fmt-header-only PROPERTIES INTERFACE_COMPILE_FEATURES "")@#set_target_properties(fmt-header-only PROPERTIES INTERFACE_COMPILE_FEATURES "")@' cmake/Dependencies.cmake +sed -i -e 's@list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only)@#list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only)@' cmake/Dependencies.cmake + +# No third_party FXdiv +%if %{with xnnpack} +sed -i -e 's@if(NOT TARGET fxdiv)@if(MSVC AND USE_XNNPACK)@' caffe2/CMakeLists.txt +sed -i -e 's@TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv)@#TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv)@' caffe2/CMakeLists.txt +%endif + +# Disable the use of check_submodule's in the setup.py, we are a tarball, not a git repo +sed -i -e 's@check_submodules()$@#check_submodules()@' setup.py # Release comes fully loaded with third party src # Remove what we can @@ -255,6 +604,47 @@ mv third_party/miniz-2.1.0 . # setup.py depends on this script mv third_party/build_bundled.py . +# Need the just untarred flatbuffers/flatbuffers.h +mv third_party/flatbuffers . + +mv third_party/pybind11 . + +%if %{with cuda} +mv third_party/cudnn_frontend . +mv third_party/cutlass . +%endif + +%if %{with tensorpipe} +mv third_party/tensorpipe . +%endif + +%if %{without xnnpack} +mv third_party/XNNPACK . +mv third_party/FXdiv . +mv third_party/FP16 . +mv third_party/psimd . +%endif + +%if %{without pthreadpool} +mv third_party/pthreadpool . +%endif + +%if %{without pocketfft} +mv third_party/pocketfft . +%endif + +%if %{without opentelemetry} +mv third_party/opentelemetry-cpp . +%endif + +%if %{without httplib} +mv third_party/cpp-httplib . +%endif + +%if %{without kineto} +mv third_party/kineto . +%endif + %if %{with test} mv third_party/googletest . %endif @@ -264,26 +654,115 @@ rm -rf third_party/* # Put stuff back mv build_bundled.py third_party mv miniz-2.1.0 third_party +mv flatbuffers third_party +mv pybind11 third_party + +%if %{with cuda} +mv cudnn_frontend third_party +mv cutlass third_party +%endif + +%if %{with tensorpipe} +mv tensorpipe third_party +%endif + +%if %{without xnnpack} +mv XNNPACK third_party +mv FXdiv third_party +mv FP16 third_party +mv psimd third_party +%endif + +%if %{without pthreadpool} +mv pthreadpool third_party +%endif + +%if %{without pocketfft} +mv pocketfft third_party +%endif + +%if %{without opentelemetry} +mv opentelemetry-cpp third_party +%endif + +%if %{without httplib} +mv cpp-httplib third_party +%endif + +%if %{without kineto} +mv kineto third_party +%endif + %if %{with test} mv googletest third_party %endif + +%if %{with pocketfft} # # Fake out pocketfft, and system header will be used mkdir third_party/pocketfft +%endif + # # Use the system valgrind headers mkdir third_party/valgrind-headers cp %{_includedir}/valgrind/* third_party/valgrind-headers +%if %{without gitcommit} # Remove unneeded OpenCL files that confuse the lincense scanner rm caffe2/contrib/opencl/OpenCL/cl.hpp rm caffe2/mobile/contrib/libopencl-stub/include/CL/*.h rm caffe2/mobile/contrib/libopencl-stub/include/CL/*.hpp +%endif + +%if %{with rocm} +# hipify +./tools/amd_build/build_amd.py +# Fedora installs to /usr/include, not /usr/include/rocm-core +sed -i -e 's@rocm-core/rocm_version.h@rocm_version.h@' aten/src/ATen/hip/tunable/TunableGemm.h +%endif + +%if %{with cuda} + +# TBD %endif %build +# +# Control the number of jobs +# +# The build can fail if too many threads exceed the physical memory +# So count core and and memory and increase the build memory util the build succeeds +# +# Real cores, No hyperthreading +COMPILE_JOBS=`cat /proc/cpuinfo | grep -m 1 'cpu cores' | awk '{ print $4 }'` +if [ ${COMPILE_JOBS}x = x ]; then + COMPILE_JOBS=1 +fi +# Take into account memmory usage per core, do not thrash real memory +%if %{with cuda} +BUILD_MEM=4 +%else +BUILD_MEM=2 +%endif +MEM_KB=0 +MEM_KB=`cat /proc/meminfo | grep MemTotal | awk '{ print $2 }'` +MEM_MB=`eval "expr ${MEM_KB} / 1024"` +MEM_GB=`eval "expr ${MEM_MB} / 1024"` +COMPILE_JOBS_MEM=`eval "expr 1 + ${MEM_GB} / ${BUILD_MEM}"` +if [ "$COMPILE_JOBS_MEM" -lt "$COMPILE_JOBS" ]; then + COMPILE_JOBS=$COMPILE_JOBS_MEM +fi +export MAX_JOBS=$COMPILE_JOBS + +%if %{with compat_gcc} +export CC=%{_bindir}/gcc%{compat_gcc_major} +export CXX=%{_bindir}/g++%{compat_gcc_major} +export FC=%{_bindir}/gfortran%{compat_gcc_major} +%endif + # For debugging setup.py # export SETUPTOOLS_SCM_DEBUG=1 @@ -293,63 +772,97 @@ rm caffe2/mobile/contrib/libopencl-stub/include/CL/*.hpp # export CMAKE_SHARED_LINKER_FLAGS=-Wl,--verbose # Manually set this hardening flag +# CUDA is unhappy with pie, so do not use it +%if %{without cuda} export CMAKE_EXE_LINKER_FLAGS=-pie +%endif export BUILD_CUSTOM_PROTOBUF=OFF +export BUILD_NVFUSER=OFF export BUILD_SHARED_LIBS=ON +export BUILD_TEST=OFF export CMAKE_BUILD_TYPE=RelWithDebInfo export CMAKE_FIND_PACKAGE_PREFER_CONFIG=ON export CAFFE2_LINK_LOCAL_PROTOBUF=OFF +export INTERN_BUILD_MOBILE=OFF +export USE_DISTRIBUTED=OFF export USE_CUDA=OFF export USE_FBGEMM=OFF +export USE_FLASH_ATTENTION=OFF export USE_GOLD_LINKER=OFF +export USE_GLOO=OFF export USE_ITT=OFF export USE_KINETO=OFF export USE_LITE_INTERPRETER_PROFILER=OFF +export USE_LITE_PROTO=OFF +export USE_MAGMA=OFF export USE_MKLDNN=OFF +export USE_MPI=OFF +export USE_NCCL=OFF export USE_NNPACK=OFF export USE_NUMPY=ON - +export USE_OPENMP=ON export USE_PYTORCH_QNNPACK=OFF +%if %{without gitcommit} export USE_QNNPACK=OFF -export USE_SYSTEM_LIBS=ON +%endif +export USE_ROCM=OFF +export USE_SYSTEM_CPUINFO=ON +export USE_SYSTEM_SLEEF=ON +export USE_SYSTEM_EIGEN_INSTALL=ON +export USE_SYSTEM_ONNX=ON +export USE_SYSTEM_PYBIND11=OFF +export USE_SYSTEM_LIBS=OFF export USE_TENSORPIPE=OFF export USE_XNNPACK=ON +%if %{with pthreadpool} +export USE_SYSTEM_PTHREADPOOL=ON +%endif + +%if %{with xnnpack} +export USE_SYSTEM_FP16=ON +export USE_SYSTEM_FXDIV=ON +export USE_SYSTEM_PSIMD=ON +export USE_SYSTEM_XNNPACK=ON +%endif + %if %{with caffe2} export BUILD_CAFFE2=ON -export INTERN_BUILD_MOBILE=OFF -export USE_LITE_PROTO=ON +%endif + +%if %{with cuda} +%if %{without rocm} +export CUDACXX=/usr/local/cuda/bin/nvcc +export CPLUS_INCLUDE_PATH=/usr/local/cuda/include +export USE_CUDA=ON +# The arches to build for +export TORCH_CUDA_ARCH_LIST="8.0 8.6 8.9 9.0" +%endif %endif %if %{with distributed} export USE_DISTRIBUTED=ON -%else -export USE_DISTRIBUTED=OFF +%if %{with tensorpipe} +export USE_TENSORPIPE=ON +export TP_BUILD_LIBUV=OFF %endif - -%if %{with openmp} -export USE_OPENMP=ON -%else -export USE_OPENMP=OFF +%if %{with gloo} +export USE_GLOO=ON +export USE_SYSTEM_GLOO=ON +%endif +%if %{with mpi} +export USE_MPI=ON +%endif %endif -%if %{with rocm} -export USE_ROCM=ON -export USE_NCCL=OFF -export BUILD_NVFUSER=OFF -export HIP_PATH=%{_prefix} -export ROCM_PATH=%{_prefix} -export DEVICE_LIB_PATH=/usr/lib/clang/17/amdgcn/bitcode -%else -export USE_ROCM=OFF +%if %{with opencv} +export USE_OPENCV=ON %endif %if %{with test} export BUILD_TEST=ON -%else -export BUILD_TEST=OFF %endif # Why we are using py3_ vs pyproject_ @@ -360,549 +873,132 @@ export BUILD_TEST=OFF # Adding pip to build requires does not fix # # See BZ 2244862 -%py3_build -%install + %if %{with rocm} + export USE_ROCM=ON -export HIP_PATH=%{_prefix} -export ROCM_PATH=%{_prefix} -export DEVICE_LIB_PATH=/usr/lib/clang/17/amdgcn/bitcode +export HIP_PATH=`hipconfig -p` +export ROCM_PATH=`hipconfig -R` +export HIP_CLANG_PATH=`hipconfig -l` +RESOURCE_DIR=`${HIP_CLANG_PATH}/clang -print-resource-dir` +export DEVICE_LIB_PATH=${RESOURCE_DIR}/amdgcn/bitcode + +gpu=%{rocm_default_gpu} +module load rocm/$gpu +export PYTORCH_ROCM_ARCH=$ROCM_GPUS +%py3_build +mv build build-${gpu} +module purge + +%if %{with rocm_loop} +for gpu in %{rocm_gpu_list} +do + module load rocm/$gpu + export PYTORCH_ROCM_ARCH=$ROCM_GPUS + %py3_build + mv build build-${gpu} + module purge +done %endif +%else + +%py3_build + +%endif + +%install + +%if %{with compat_gcc} +export CC=%{_bindir}/gcc%{compat_gcc_major} +export CXX=%{_bindir}/g++%{compat_gcc_major} +export FC=%{_bindir}/gfortran%{compat_gcc_major} +%endif + +%if %{with rocm} +export USE_ROCM=ON +export HIP_PATH=`hipconfig -p` +export ROCM_PATH=`hipconfig -R` +export HIP_CLANG_PATH=`hipconfig -l` +RESOURCE_DIR=`${HIP_CLANG_PATH}/clang -print-resource-dir` +export DEVICE_LIB_PATH=${RESOURCE_DIR}/amdgcn/bitcode + +gpu=%{rocm_default_gpu} +module load rocm/$gpu +export PYTORCH_ROCM_ARCH=$ROCM_GPUS +mv build-${gpu} build +%py3_install +mv build build-${gpu} +module purge + +%if %{with rocm_loop} +for gpu in %{rocm_gpu_list} +do + module load rocm/$gpu + export PYTORCH_ROCM_ARCH=$ROCM_GPUS + mv build-${gpu} build + # need to customize the install location, so replace py3_install + %{__python3} %{py_setup} %{?py_setup_args} install -O1 --skip-build --root %{buildroot} --prefix /usr/lib64/rocm/${gpu} %{?*} + rm -rfv %{buildroot}/usr/lib/rocm/${gpu}/bin/__pycache__ + mv build build-${gpu} + module purge +done +%endif + +%else %py3_install -# empty files -rm %{buildroot}%{python3_sitearch}/torch/py.typed -rm %{buildroot}%{python3_sitearch}/torch/ao/quantization/backend_config/observation_type.py -rm %{buildroot}%{python3_sitearch}/torch/ao/quantization/backend_config/__pycache__/observation_type.*.pyc -rm %{buildroot}%{python3_sitearch}/torch/cuda/error.py -rm %{buildroot}%{python3_sitearch}/torch/cuda/__pycache__/error.*.pyc -rm %{buildroot}%{python3_sitearch}/torch/include/ATen/cudnn/Exceptions.h +%endif -# exec permission -for f in `find %{buildroot}%{python3_sitearch} -name '*.py'`; do - if [ ! -x $f ]; then - sed -i '1{\@^#!/usr/bin@d}' $f - fi -done +# Do not remote the empty files -# shebangs -%py3_shebang_fix %{buildroot}%{python3_sitearch} - -# Programatically create the list of dirs -echo "s|%{buildroot}%{python3_sitearch}|%%dir %%{python3_sitearch}|g" > br.sed -find %{buildroot}%{python3_sitearch} -mindepth 1 -type d > dirs.files -sed -i -f br.sed dirs.files -cat dirs.files > main.files - -# Similar for the python files -find %{buildroot}%{python3_sitearch} -type f -name "*.py" -o -name "*.pyc" -o -name "*.pyi" > py.files -echo "s|%{buildroot}%{python3_sitearch}|%%{python3_sitearch}|g" > br.sed -sed -i -f br.sed py.files -cat py.files >> main.files - -# devel files, headers and such -find %{buildroot}%{python3_sitearch} -type f -name "*.h" -o -name "*.hpp" -o -name "*.cuh" -o -name "*.cpp" -o -name "*.cu" > devel.files -sed -i -f br.sed devel.files - -# -# Main package -##% dir % {python3_sitearch}/torch*.egg-info - -%files -n python3-%{pypi_name} -f main.files +%files -n python3-%{pypi_name} %license LICENSE -%doc README.md - -# bins +%doc README.md %{_bindir}/convert-caffe2-to-onnx %{_bindir}/convert-onnx-to-caffe2 %{_bindir}/torchrun -%{python3_sitearch}/torch/bin/torch_shm_manager +%{python3_sitearch}/%{pypi_name} +%{python3_sitearch}/%{pypi_name}-*.egg-info +%{python3_sitearch}/functorch +%{python3_sitearch}/torchgen +%if %{with caffe2} +%{python3_sitearch}/caffe2 +%endif -# libs -%{python3_sitearch}/functorch/_C.cpython*.so -%{python3_sitearch}/torch/_C.cpython*.so -%{python3_sitearch}/torch/lib/libc10.so -%{python3_sitearch}/torch/lib/libshm.so -%{python3_sitearch}/torch/lib/libtorch.so -%{python3_sitearch}/torch/lib/libtorch_cpu.so -%{python3_sitearch}/torch/lib/libtorch_global_deps.so -%{python3_sitearch}/torch/lib/libtorch_python.so %if %{with rocm} -%{python3_sitearch}/torch/lib/libc10_hip.so -%{python3_sitearch}/torch/lib/libcaffe2_nvrtc.so -%{python3_sitearch}/torch/lib/libtorch_hip.so -%endif +%files -n python3-%{pypi_name}-rocm-gfx8 +%{_libdir}/rocm/gfx8/bin/* +%{_libdir}/rocm/gfx8/lib64/* -# misc -%{python3_sitearch}/torch/utils/model_dump/{*.js,*.mjs,*.html} -%{python3_sitearch}/torchgen/packaged/ATen/native/*.yaml -%{python3_sitearch}/torchgen/packaged/autograd/{*.md,*.yaml} -%if %{with gitcommit} -%{python3_sitearch}/torch/_export/serde/schema.yaml -%{python3_sitearch}/torch/distributed/pipeline/sync/_balance/py.typed -%{python3_sitearch}/torch/distributed/pipeline/sync/py.typed -%endif +%files -n python3-%{pypi_name}-rocm-gfx9 +%{_libdir}/rocm/gfx9/bin/* +%{_libdir}/rocm/gfx9/lib64/* -# egg -%{python3_sitearch}/torch*.egg-info/* +%files -n python3-%{pypi_name}-rocm-gfx10 +%{_libdir}/rocm/gfx10/bin/* +%{_libdir}/rocm/gfx10/lib64/* -# excludes -# bazel build cruft -%exclude %{python3_sitearch}/torchgen/packaged/autograd/{BUILD.bazel,build.bzl} +%files -n python3-%{pypi_name}-rocm-gfx11 +%{_libdir}/rocm/gfx11/bin/* +%{_libdir}/rocm/gfx11/lib64/* -# -# devel package -# -%files -n python3-%{pypi_name}-devel -f devel.files +%files -n python3-%{pypi_name}-rocm-gfx90a +%{_libdir}/rocm/gfx90a/bin/* +%{_libdir}/rocm/gfx90a/lib64/* -# devel cmake -%{python3_sitearch}/torch/share/cmake/{ATen,Caffe2,Torch}/*.cmake -%{python3_sitearch}/torch/share/cmake/Caffe2/public/*.cmake -%{python3_sitearch}/torch/share/cmake/Caffe2/Modules_CUDA_fix/*.cmake -%{python3_sitearch}/torch/share/cmake/Caffe2/Modules_CUDA_fix/upstream/*.cmake -%{python3_sitearch}/torch/share/cmake/Caffe2/Modules_CUDA_fix/upstream/FindCUDA/*.cmake +%files -n python3-%{pypi_name}-rocm-gfx942 +%{_libdir}/rocm/gfx942/bin/* +%{_libdir}/rocm/gfx942/lib64/* -# devel misc -%{python3_sitearch}/torchgen/packaged/ATen/templates/RegisterDispatchDefinitions.ini -%{python3_sitearch}/torchgen/packaged/autograd/templates/annotated_fn_args.py.in - -%if %{with test} -%files -n python3-%{pypi_name}-test - -# test bins -%{python3_sitearch}/torch/bin/test_api -%{python3_sitearch}/torch/bin/test_edge_op_registration -%{python3_sitearch}/torch/bin/test_jit -%{python3_sitearch}/torch/bin/test_lazy -%{python3_sitearch}/torch/bin/test_tensorexpr -%{python3_sitearch}/torch/bin/tutorial_tensorexpr - -# test libs -# Unversioned - not ment for release -%{python3_sitearch}/torch/lib/libbackend_with_compiler.so -%{python3_sitearch}/torch/lib/libjitbackend_test.so -%{python3_sitearch}/torch/lib/libtorchbind_test.so - -# tests -%{python3_sitearch}/torch/test/* +%files -n python3-%{pypi_name}-rocm-gfx1100 +%{_libdir}/rocm/gfx1100/bin/* +%{_libdir}/rocm/gfx1100/lib64/* %endif -# -# License Details -# Main license BSD 3-Clause -# -# Apache-2.0 -# android/libs/fbjni/LICENSE -# android/libs/fbjni/CMakeLists.txt -# android/libs/fbjni/build.gradle -# android/libs/fbjni/cxx/fbjni/ByteBuffer.cpp -# android/libs/fbjni/cxx/fbjni/ByteBuffer.h -# android/libs/fbjni/cxx/fbjni/Context.h -# android/libs/fbjni/cxx/fbjni/File.h -# android/libs/fbjni/cxx/fbjni/JThread.h -# android/libs/fbjni/cxx/fbjni/NativeRunnable.h -# android/libs/fbjni/cxx/fbjni/OnLoad.cpp -# android/libs/fbjni/cxx/fbjni/ReadableByteChannel.cpp -# android/libs/fbjni/cxx/fbjni/ReadableByteChannel.h -# android/libs/fbjni/cxx/fbjni/detail/Boxed.h -# android/libs/fbjni/cxx/fbjni/detail/Common.h -# android/libs/fbjni/cxx/fbjni/detail/CoreClasses-inl.h -# android/libs/fbjni/cxx/fbjni/detail/CoreClasses.h -# android/libs/fbjni/cxx/fbjni/detail/Environment.cpp -# android/libs/fbjni/cxx/fbjni/detail/Environment.h -# android/libs/fbjni/cxx/fbjni/detail/Exceptions.cpp -# android/libs/fbjni/cxx/fbjni/detail/Exceptions.h -# android/libs/fbjni/cxx/fbjni/detail/FbjniApi.h -# android/libs/fbjni/cxx/fbjni/detail/Hybrid.cpp -# android/libs/fbjni/cxx/fbjni/detail/Hybrid.h -# android/libs/fbjni/cxx/fbjni/detail/Iterator-inl.h -# android/libs/fbjni/cxx/fbjni/detail/Iterator.h -# android/libs/fbjni/cxx/fbjni/detail/JWeakReference.h -# android/libs/fbjni/cxx/fbjni/detail/Log.h -# android/libs/fbjni/cxx/fbjni/detail/Meta-forward.h -# android/libs/fbjni/cxx/fbjni/detail/Meta-inl.h -# android/libs/fbjni/cxx/fbjni/detail/Meta.cpp -# android/libs/fbjni/cxx/fbjni/detail/Meta.h -# android/libs/fbjni/cxx/fbjni/detail/MetaConvert.h -# android/libs/fbjni/cxx/fbjni/detail/ReferenceAllocators-inl.h -# android/libs/fbjni/cxx/fbjni/detail/ReferenceAllocators.h -# android/libs/fbjni/cxx/fbjni/detail/References-forward.h -# android/libs/fbjni/cxx/fbjni/detail/References-inl.h -# android/libs/fbjni/cxx/fbjni/detail/References.cpp -# android/libs/fbjni/cxx/fbjni/detail/References.h -# android/libs/fbjni/cxx/fbjni/detail/Registration-inl.h -# android/libs/fbjni/cxx/fbjni/detail/Registration.h -# android/libs/fbjni/cxx/fbjni/detail/SimpleFixedString.h -# android/libs/fbjni/cxx/fbjni/detail/TypeTraits.h -# android/libs/fbjni/cxx/fbjni/detail/utf8.cpp -# android/libs/fbjni/cxx/fbjni/detail/utf8.h -# android/libs/fbjni/cxx/fbjni/fbjni.cpp -# android/libs/fbjni/cxx/fbjni/fbjni.h -# android/libs/fbjni/cxx/lyra/cxa_throw.cpp -# android/libs/fbjni/cxx/lyra/lyra.cpp -# android/libs/fbjni/cxx/lyra/lyra.h -# android/libs/fbjni/cxx/lyra/lyra_breakpad.cpp -# android/libs/fbjni/cxx/lyra/lyra_exceptions.cpp -# android/libs/fbjni/cxx/lyra/lyra_exceptions.h -# android/libs/fbjni/gradle.properties -# android/libs/fbjni/gradle/android-tasks.gradle -# android/libs/fbjni/gradle/release.gradle -# android/libs/fbjni/gradlew -# android/libs/fbjni/gradlew.bat -# android/libs/fbjni/host.gradle -# android/libs/fbjni/java/com/facebook/jni/CppException.java -# android/libs/fbjni/java/com/facebook/jni/CppSystemErrorException.java -# android/libs/fbjni/java/com/facebook/jni/DestructorThread.java -# android/libs/fbjni/java/com/facebook/jni/HybridClassBase.java -# android/libs/fbjni/java/com/facebook/jni/HybridData.java -# android/libs/fbjni/java/com/facebook/jni/IteratorHelper.java -# android/libs/fbjni/java/com/facebook/jni/MapIteratorHelper.java -# android/libs/fbjni/java/com/facebook/jni/NativeRunnable.java -# android/libs/fbjni/java/com/facebook/jni/ThreadScopeSupport.java -# android/libs/fbjni/java/com/facebook/jni/UnknownCppException.java -# android/libs/fbjni/java/com/facebook/jni/annotations/DoNotStrip.java -# android/libs/fbjni/scripts/android-setup.sh -# android/libs/fbjni/scripts/run-host-tests.sh -# android/libs/fbjni/settings.gradle -# android/libs/fbjni/test/BaseFBJniTests.java -# android/libs/fbjni/test/ByteBufferTests.java -# android/libs/fbjni/test/DocTests.java -# android/libs/fbjni/test/FBJniTests.java -# android/libs/fbjni/test/HybridTests.java -# android/libs/fbjni/test/IteratorTests.java -# android/libs/fbjni/test/PrimitiveArrayTests.java -# android/libs/fbjni/test/ReadableByteChannelTests.java -# android/libs/fbjni/test/jni/CMakeLists.txt -# android/libs/fbjni/test/jni/byte_buffer_tests.cpp -# android/libs/fbjni/test/jni/doc_tests.cpp -# android/libs/fbjni/test/jni/expect.h -# android/libs/fbjni/test/jni/fbjni_onload.cpp -# android/libs/fbjni/test/jni/fbjni_tests.cpp -# android/libs/fbjni/test/jni/hybrid_tests.cpp -# android/libs/fbjni/test/jni/inter_dso_exception_test_1/Test.cpp -# android/libs/fbjni/test/jni/inter_dso_exception_test_1/Test.h -# android/libs/fbjni/test/jni/inter_dso_exception_test_2/Test.cpp -# android/libs/fbjni/test/jni/inter_dso_exception_test_2/Test.h -# android/libs/fbjni/test/jni/iterator_tests.cpp -# android/libs/fbjni/test/jni/modified_utf8_test.cpp -# android/libs/fbjni/test/jni/no_rtti.cpp -# android/libs/fbjni/test/jni/no_rtti.h -# android/libs/fbjni/test/jni/primitive_array_tests.cpp -# android/libs/fbjni/test/jni/readable_byte_channel_tests.cpp -# android/libs/fbjni/test/jni/simple_fixed_string_tests.cpp -# android/libs/fbjni/test/jni/utf16toUTF8_test.cpp -# android/pytorch_android/host/build.gradle -# aten/src/ATen/cuda/llvm_basic.cpp -# aten/src/ATen/cuda/llvm_complex.cpp -# aten/src/ATen/native/quantized/cpu/qnnpack/confu.yaml -# aten/src/ATen/native/quantized/cpu/qnnpack/src/requantization/gemmlowp-neon.c -# aten/src/ATen/native/quantized/cpu/qnnpack/src/requantization/gemmlowp-scalar.h -# aten/src/ATen/native/quantized/cpu/qnnpack/src/requantization/gemmlowp-sse.h -# aten/src/ATen/nnapi/codegen.py -# aten/src/ATen/nnapi/NeuralNetworks.h -# aten/src/ATen/nnapi/nnapi_wrapper.cpp -# aten/src/ATen/nnapi/nnapi_wrapper.h -# binaries/benchmark_args.h -# binaries/benchmark_helper.cc -# binaries/benchmark_helper.h -# binaries/compare_models_torch.cc -# binaries/convert_and_benchmark.cc -# binaries/convert_caffe_image_db.cc -# binaries/convert_db.cc -# binaries/convert_encoded_to_raw_leveldb.cc -# binaries/convert_image_to_tensor.cc -# binaries/core_overhead_benchmark.cc -# binaries/core_overhead_benchmark_gpu.cc -# binaries/db_throughput.cc -# binaries/dump_operator_names.cc -# binaries/inspect_gpu.cc -# binaries/load_benchmark_torch.cc -# binaries/make_cifar_db.cc -# binaries/make_image_db.cc -# binaries/make_mnist_db.cc -# binaries/optimize_for_mobile.cc -# binaries/parallel_info.cc -# binaries/predictor_verifier.cc -# binaries/print_core_object_sizes_gpu.cc -# binaries/print_registered_core_operators.cc -# binaries/run_plan.cc -# binaries/run_plan_mpi.cc -# binaries/speed_benchmark.cc -# binaries/speed_benchmark_torch.cc -# binaries/split_db.cc -# binaries/tsv_2_proto.cc -# binaries/tutorial_blob.cc -# binaries/zmq_feeder.cc -# c10/test/util/small_vector_test.cpp -# c10/util/FunctionRef.h -# c10/util/SmallVector.cpp -# c10/util/SmallVector.h -# c10/util/llvmMathExtras.h -# c10/util/sparse_bitset.h -# caffe2/contrib/aten/gen_op.py -# caffe2/contrib/fakelowp/fp16_fc_acc_op.cc -# caffe2/contrib/fakelowp/fp16_fc_acc_op.h -# caffe2/contrib/gloo/allgather_ops.cc -# caffe2/contrib/gloo/allgather_ops.h -# caffe2/contrib/gloo/reduce_scatter_ops.cc -# caffe2/contrib/gloo/reduce_scatter_ops.h -# caffe2/core/hip/common_miopen.h -# caffe2/core/hip/common_miopen.hip -# caffe2/core/net_async_tracing.cc -# caffe2/core/net_async_tracing.h -# caffe2/core/net_async_tracing_test.cc -# caffe2/experiments/operators/fully_connected_op_decomposition.cc -# caffe2/experiments/operators/fully_connected_op_decomposition.h -# caffe2/experiments/operators/fully_connected_op_decomposition_gpu.cc -# caffe2/experiments/operators/fully_connected_op_prune.cc -# caffe2/experiments/operators/fully_connected_op_prune.h -# caffe2/experiments/operators/fully_connected_op_sparse.cc -# caffe2/experiments/operators/fully_connected_op_sparse.h -# caffe2/experiments/operators/funhash_op.cc -# caffe2/experiments/operators/funhash_op.h -# caffe2/experiments/operators/sparse_funhash_op.cc -# caffe2/experiments/operators/sparse_funhash_op.h -# caffe2/experiments/operators/sparse_matrix_reshape_op.cc -# caffe2/experiments/operators/sparse_matrix_reshape_op.h -# caffe2/experiments/operators/tt_contraction_op.cc -# caffe2/experiments/operators/tt_contraction_op.h -# caffe2/experiments/operators/tt_contraction_op_gpu.cc -# caffe2/experiments/operators/tt_pad_op.cc -# caffe2/experiments/operators/tt_pad_op.h -# caffe2/experiments/python/SparseTransformer.py -# caffe2/experiments/python/convnet_benchmarks.py -# caffe2/experiments/python/device_reduce_sum_bench.py -# caffe2/experiments/python/funhash_op_test.py -# caffe2/experiments/python/net_construct_bench.py -# caffe2/experiments/python/sparse_funhash_op_test.py -# caffe2/experiments/python/sparse_reshape_op_test.py -# caffe2/experiments/python/tt_contraction_op_test.py -# caffe2/experiments/python/tt_pad_op_test.py -# caffe2/mobile/contrib/libvulkan-stub/include/vulkan/vk_platform.h -# caffe2/mobile/contrib/libvulkan-stub/include/vulkan/vulkan.h -# caffe2/mobile/contrib/nnapi/NeuralNetworks.h -# caffe2/mobile/contrib/nnapi/dlnnapi.c -# caffe2/mobile/contrib/nnapi/nnapi_benchmark.cc -# caffe2/observers/profile_observer.cc -# caffe2/observers/profile_observer.h -# caffe2/operators/hip/conv_op_miopen.hip -# caffe2/operators/hip/local_response_normalization_op_miopen.hip -# caffe2/operators/hip/pool_op_miopen.hip -# caffe2/operators/hip/spatial_batch_norm_op_miopen.hip -# caffe2/operators/quantized/int8_utils.h -# caffe2/operators/stump_func_op.cc -# caffe2/operators/stump_func_op.cu -# caffe2/operators/stump_func_op.h -# caffe2/operators/unique_ops.cc -# caffe2/operators/unique_ops.cu -# caffe2/operators/unique_ops.h -# caffe2/operators/upsample_op.cc -# caffe2/operators/upsample_op.h -# caffe2/opt/fusion.h -# caffe2/python/layers/label_smooth.py -# caffe2/python/mint/static/css/simple-sidebar.css -# caffe2/python/modeling/get_entry_from_blobs.py -# caffe2/python/modeling/get_entry_from_blobs_test.py -# caffe2/python/modeling/gradient_clipping_test.py -# caffe2/python/operator_test/unique_ops_test.py -# caffe2/python/operator_test/upsample_op_test.py -# caffe2/python/operator_test/weight_scale_test.py -# caffe2/python/pybind_state_int8.cc -# caffe2/python/transformations.py -# caffe2/python/transformations_test.py -# caffe2/quantization/server/batch_matmul_dnnlowp_op.cc -# caffe2/quantization/server/batch_matmul_dnnlowp_op.h -# caffe2/quantization/server/compute_equalization_scale_test.py -# caffe2/quantization/server/elementwise_linear_dnnlowp_op.cc -# caffe2/quantization/server/elementwise_linear_dnnlowp_op.h -# caffe2/quantization/server/elementwise_sum_relu_op.cc -# caffe2/quantization/server/fb_fc_packed_op.cc -# caffe2/quantization/server/fb_fc_packed_op.h -# caffe2/quantization/server/fbgemm_fp16_pack_op.cc -# caffe2/quantization/server/fbgemm_fp16_pack_op.h -# caffe2/quantization/server/fully_connected_fake_lowp_op.cc -# caffe2/quantization/server/fully_connected_fake_lowp_op.h -# caffe2/quantization/server/int8_gen_quant_params_min_max_test.py -# caffe2/quantization/server/int8_gen_quant_params_test.py -# caffe2/quantization/server/int8_quant_scheme_blob_fill_test.py -# caffe2/quantization/server/spatial_batch_norm_relu_op.cc -# caffe2/sgd/weight_scale_op.cc -# caffe2/sgd/weight_scale_op.h -# caffe2/utils/bench_utils.h -# functorch/examples/maml_omniglot/maml-omniglot-higher.py -# functorch/examples/maml_omniglot/maml-omniglot-ptonly.py -# functorch/examples/maml_omniglot/maml-omniglot-transforms.py -# functorch/examples/maml_omniglot/support/omniglot_loaders.py -# modules/detectron/group_spatial_softmax_op.cc -# modules/detectron/group_spatial_softmax_op.cu -# modules/detectron/group_spatial_softmax_op.h -# modules/detectron/ps_roi_pool_op.cc -# modules/detectron/ps_roi_pool_op.h -# modules/detectron/roi_pool_f_op.cc -# modules/detectron/roi_pool_f_op.cu -# modules/detectron/roi_pool_f_op.h -# modules/detectron/sample_as_op.cc -# modules/detectron/sample_as_op.cu -# modules/detectron/sample_as_op.h -# modules/detectron/select_smooth_l1_loss_op.cc -# modules/detectron/select_smooth_l1_loss_op.cu -# modules/detectron/select_smooth_l1_loss_op.h -# modules/detectron/sigmoid_cross_entropy_loss_op.cc -# modules/detectron/sigmoid_cross_entropy_loss_op.cu -# modules/detectron/sigmoid_cross_entropy_loss_op.h -# modules/detectron/sigmoid_focal_loss_op.cc -# modules/detectron/sigmoid_focal_loss_op.cu -# modules/detectron/sigmoid_focal_loss_op.h -# modules/detectron/smooth_l1_loss_op.cc -# modules/detectron/smooth_l1_loss_op.cu -# modules/detectron/smooth_l1_loss_op.h -# modules/detectron/softmax_focal_loss_op.cc -# modules/detectron/softmax_focal_loss_op.cu -# modules/detectron/softmax_focal_loss_op.h -# modules/detectron/spatial_narrow_as_op.cc -# modules/detectron/spatial_narrow_as_op.cu -# modules/detectron/spatial_narrow_as_op.h -# modules/detectron/upsample_nearest_op.cc -# modules/detectron/upsample_nearest_op.h -# modules/module_test/module_test_dynamic.cc -# modules/rocksdb/rocksdb.cc -# scripts/apache_header.txt -# scripts/apache_python.txt -# torch/distributions/lkj_cholesky.py -# -# Apache 2.0 AND BSD 2-Clause -# caffe2/operators/deform_conv_op.cu -# -# Apache 2.0 AND BSD 2-Clause AND MIT -# modules/detectron/ps_roi_pool_op.cu -# -# Apache 2.0 AND BSD 2-Clause -# modules/detectron/upsample_nearest_op.cu -# -# BSD 0-Clause -# torch/csrc/utils/pythoncapi_compat.h -# -# BSD 2-Clause -# aten/src/ATen/native/quantized/cpu/qnnpack/deps/clog/LICENSE -# caffe2/image/transform_gpu.cu -# caffe2/image/transform_gpu.h -# -# BSL-1.0 -# c10/util/flat_hash_map.h -# c10/util/hash.h -# c10/util/Optional.h -# c10/util/order_preserving_flat_hash_map.h -# c10/util/strong_type.h -# c10/util/variant.h -# -# GPL-3.0-or-later AND MIT -# c10/util/reverse_iterator.h -# -# Khronos -# These files are for OpenCL, an unused option -# Replace them later, as-needed with the opencl-headers.rpm -# -# caffe2/contrib/opencl/OpenCL/cl.hpp -# caffe2/mobile/contrib/libopencl-stub/include/CL/cl.h -# caffe2/mobile/contrib/libopencl-stub/include/CL/cl.hpp -# caffe2/mobile/contrib/libopencl-stub/include/CL/cl_ext.h -# caffe2/mobile/contrib/libopencl-stub/include/CL/cl_gl.h -# caffe2/mobile/contrib/libopencl-stub/include/CL/cl_gl_ext.h -# caffe2/mobile/contrib/libopencl-stub/include/CL/cl_platform.h -# caffe2/mobile/contrib/libopencl-stub/include/CL/opencl.h -# -# MIT -# android/libs/fbjni/googletest-CMakeLists.txt.in -# c10/util/BFloat16-math.h -# caffe2/mobile/contrib/libvulkan-stub/include/libvulkan-stub.h -# caffe2/mobile/contrib/libvulkan-stub/src/libvulkan-stub.c -# caffe2/onnx/torch_ops/defs.cc -# cmake/Modules_CUDA_fix/upstream/FindCUDA/make2cmake.cmake -# cmake/Modules_CUDA_fix/upstream/FindCUDA/parse_cubin.cmake -# cmake/Modules_CUDA_fix/upstream/FindCUDA/run_nvcc.cmake -# functorch/einops/_parsing.py -# test/functorch/test_parsing.py -# test/functorch/test_rearrange.py -# third_party/miniz-2.1.0/LICENSE -# third_party/miniz-2.1.0/miniz.c -# tools/coverage_plugins_package/setup.py -# torch/_appdirs.py -# torch/utils/hipify/hipify_python.py -# -# Public Domain -# caffe2/mobile/contrib/libopencl-stub/LICENSE -# caffe2/utils/murmur_hash3.cc -# caffe2/utils/murmur_hash3.h -# -# Zlib -# aten/src/ATen/native/cpu/avx_mathfun.h - %changelog -* Fri Jan 26 2024 Fedora Release Engineering - 2.1.2-3 -- Rebuilt for https://fedoraproject.org/wiki/Fedora_40_Mass_Rebuild - -* Mon Jan 22 2024 Fedora Release Engineering - 2.1.2-2 -- Rebuilt for https://fedoraproject.org/wiki/Fedora_40_Mass_Rebuild - -* Wed Dec 27 2023 Tom Rix - 2.1.2-1 -- Update to 2.1.2 -- Stop versioning *.so's - 2.1.2's version is wrong -- Stub in caffe2 to test in flight package - -* Wed Dec 27 2023 Tom Rix - 2.1.0-13 -- Stub in openmp to test in openmp - -* Wed Dec 20 2023 Tom Rix - 2.1.0-12 -- Stub in rocm to test in flight packages - -* Wed Dec 13 2023 Tom Rix - 2.1.0-11 -- Move unversioned *.so's to main package - -* Fri Dec 1 2023 Tom Rix - 2.1.0-10 -- Disable gold linker -- Remove python requires -- Change to openblas, remove -lgfortran fixes -- Manually add -pie to linking options - -* Fri Nov 24 2023 Tom Rix - 2.1.0-9 -- Enable debug build -- Remove Khronos licensed files from source -- Use 0BSD license identifier -- Generate lists directories, python and header files -- Add a -test subpackage - -* Wed Nov 15 2023 Tom Rix - 2.1.0-8 -- Address review comments - -* Thu Nov 2 2023 Tom Rix - 2.1.0-7 -- Address review comments -- remove pyproject option - -* Thu Oct 19 2023 Tom Rix - 2.1.0-6 -- Address review comments - -* Wed Oct 18 2023 Tom Rix - 2.1.0-5 -- Address review comments - -* Sat Oct 14 2023 Tom Rix - 2.1.0-4 -- Use gloo, xnnpack -- Find missing build_bundled.py -- Add pyproject option - -* Thu Oct 12 2023 Tom Rix - 2.1.0-3 -- Address review comments -- Force so versioning on - -* Mon Oct 9 2023 Tom Rix - 2.1.0-2 -- Use the 2.1 release -- Reduce USE_SYSTEM_LIBS to parts -- Remove almost all of third_party/ -- Remove py2rpm generated noise - -* Sat Sep 30 2023 Tom Rix - 2.1.0-1 -- Initial package. +%autochangelog diff --git a/sources b/sources index 90b1128..60cce58 100644 --- a/sources +++ b/sources @@ -1,2 +1,14 @@ SHA512 (pytorch-v2.1.0.tar.gz) = 59421bf6cea6661d61ed66ab16526e3a07162e70e53381cbd5987042917610ec993d2f151fb086f0f98e5a396fe69e82bbc76f840bebffe4ebe7f50458c3aa44 SHA512 (pytorch-v2.1.2.tar.gz) = b7305407ad9dda877d277a0e7009f65f6d69f39370f2231b8bb8c6a9b711022d2129febdb00f5c83751b6664e01000fe2d30c5e5c13757de89fb8b2b99197a28 +SHA512 (pytorch-975d428.tar.gz) = a02195b18d832db9a739c3eeecd0cd0c8868d8b92e4a2fca42e4bdd20735f0745d84573df28d9ae1db014cf79ffd005a8409b3e8bb92f9db2a446f784ef46ff4 +SHA512 (v23.3.3.tar.gz) = 4066c94f2473c7ea16917d29a613e16f840a329089c88e0bdbdb999aef3442ba00abfd2aa92266fa9c067e399dc88e6f0ccac40dc151378857e665638e78bbf0 +SHA512 (v2.11.1.tar.gz) = ed1512ff0bca3bc0a45edc2eb8c77f8286ab9389f6ff1d5cb309be24bc608abbe0df6a7f5cb18c8f80a3bfa509058547c13551c3cd6a759af708fd0cdcdd9e95 +SHA512 (pytorch-6a89a75.tar.gz) = 6978acc6f37d7c5adc71517a6f379c7133b2bbd040189deddba7753acde41f6ddba2e9f2e397928e89c776d6a5458b8a74f8e04beb312d71fd30b072687ba98f +SHA512 (pytorch-74832f1.tar.gz) = bd553bfbbb422d353bbbf616c201251b2517b905e2621fa05bfe3d97726b078caad377583adccdc0cca234235a11fcb4730a93e834907b2ca4c06d552b2a2683 +SHA512 (pytorch-4bb5cb5.tar.gz) = 430ae996ddee560537787646ae9f7aa01498f37c99c2e3fe4c5f66ee732ee3fe4ecf337fdf857bc0c7fe27634af75cee3ce576bbe2576463b81e27dbbfacf6ef +SHA512 (tensorpipe-52791a2.tar.gz) = 1e5faf17a7236c5506c08cb28be16069b11bb929bbca64ed9745ce4277d46739186ab7d6597da7437d90ed2d166d4c37ef2f3bceabe8083ef3adbb0e8e5f227e +SHA512 (v1.41.0.tar.gz) = bb08a1970a10e8d9571ffea3d021643de30ec212cd51317b98d6cf0cfe55d6877992921fb01d1188a6d466687335b77885685d924f8cb7200a0bec30eee05c65 +SHA512 (libnop-910b558.tar.gz) = 74c5324eaa1b6b2ac8dfef94c835b5c5b044625f8e5efe3522470b1ecc4798ff43d344a013cee2f6901e83267c6167072947b754e63f1552ae7044cffe234c36 +SHA512 (pytorch-97ff6cf.tar.gz) = 105ebcba298558fe833f90e7e40b003d35a74609e777f9dc4c47f5668c884f603455113ac0ff252a62b83c81137ae66ceb1a862d351203925dcfc3dcf9f73580 +SHA512 (pytorch-v2.3.0.tar.gz) = 0c2ffc7bf2fd86070e9958c34eca1f03a0248a011ac6ffaeb69f65306ff856edd5359986f02af25888433187e6d7f29b60edded092e2ac30c8cec49023166eda +SHA512 (pytorch-v2.3.1.tar.gz) = fe132251b2bae87b70ba3d95dc32f6a4545970d11893118b0ebe6ca129732e516ef4d6cc4f380b3db9bb2277d1db8ce78a401c40149bb1dfbab76eab9e3992c4