diff --git a/.gitignore b/.gitignore
index 3f2501f..315fe1c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,14 @@
 /pytorch-v2.1.0.tar.gz
 /pytorch-v2.1.2.tar.gz
+/pytorch-975d428.tar.gz
+/v23.3.3.tar.gz
+/v2.11.1.tar.gz
+/pytorch-6a89a75.tar.gz
+/pytorch-74832f1.tar.gz
+/pytorch-4bb5cb5.tar.gz
+/tensorpipe-52791a2.tar.gz
+/v1.41.0.tar.gz
+/libnop-910b558.tar.gz
+/pytorch-97ff6cf.tar.gz
+/pytorch-v2.3.0.tar.gz
+/pytorch-v2.3.1.tar.gz
diff --git a/0001-Optionally-use-hipblaslt.patch b/0001-Optionally-use-hipblaslt.patch
new file mode 100644
index 0000000..56434a7
--- /dev/null
+++ b/0001-Optionally-use-hipblaslt.patch
@@ -0,0 +1,262 @@
+From d77e05d90df006322cda021f1a8affdcc2c7eaef Mon Sep 17 00:00:00 2001
+From: Tom Rix <trix@redhat.com>
+Date: Fri, 23 Feb 2024 08:27:30 -0500
+Subject: [PATCH] Optionally use hipblaslt
+
+The hipblaslt package is not available on Fedora.
+Instead of requiring the package, make it optional.
+If it is found, define the preprocessor variable HIPBLASLT
+Convert the checks for ROCM_VERSION >= 507000 to HIPBLASLT checks
+
+Signed-off-by: Tom Rix <trix@redhat.com>
+---
+ aten/src/ATen/cuda/CUDABlas.cpp          |  7 ++++---
+ aten/src/ATen/cuda/CUDABlas.h            |  2 +-
+ aten/src/ATen/cuda/CUDAContextLight.h    |  4 ++--
+ aten/src/ATen/cuda/CublasHandlePool.cpp  |  4 ++--
+ aten/src/ATen/cuda/tunable/TunableGemm.h |  6 +++---
+ aten/src/ATen/native/cuda/Blas.cpp       | 14 ++++++++------
+ cmake/Dependencies.cmake                 |  3 +++
+ cmake/public/LoadHIP.cmake               |  4 ++--
+ 8 files changed, 25 insertions(+), 19 deletions(-)
+
+diff --git a/aten/src/ATen/cuda/CUDABlas.cpp b/aten/src/ATen/cuda/CUDABlas.cpp
+index d534ec5a178..e815463f630 100644
+--- a/aten/src/ATen/cuda/CUDABlas.cpp
++++ b/aten/src/ATen/cuda/CUDABlas.cpp
+@@ -14,7 +14,7 @@
+ #include <c10/util/irange.h>
+ 
+ #ifdef USE_ROCM
+-#if ROCM_VERSION >= 60000
++#ifdef HIPBLASLT
+ #include <hipblaslt/hipblaslt-ext.hpp>
+ #endif
+ // until hipblas has an API to accept flags, we must use rocblas here
+@@ -781,7 +781,7 @@ void gemm<at::BFloat16>(CUDABLAS_GEMM_ARGTYPES(at::BFloat16)) {
+   }
+ }
+ 
+-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
++#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
+ 
+ #if defined(USE_ROCM) && ROCM_VERSION >= 50700 && ROCM_VERSION < 60000
+ // only for rocm 5.7 where we first supported hipblaslt, it was difficult
+@@ -912,6 +912,7 @@ class CuBlasLtMatmulPreference : public CuBlasLtDescriptor<
+ };
+ } // namespace
+ 
++#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
+ template <typename Dtype>
+ void gemm_and_bias(
+     bool transpose_mat1,
+@@ -1124,7 +1125,7 @@ template void gemm_and_bias(
+     at::BFloat16* result_ptr,
+     int64_t result_ld,
+     GEMMAndBiasActivationEpilogue activation);
+-
++#endif
+ void scaled_gemm(
+     char transa,
+     char transb,
+diff --git a/aten/src/ATen/cuda/CUDABlas.h b/aten/src/ATen/cuda/CUDABlas.h
+index eb12bb350c5..068607467dd 100644
+--- a/aten/src/ATen/cuda/CUDABlas.h
++++ b/aten/src/ATen/cuda/CUDABlas.h
+@@ -82,7 +82,7 @@ void gemm_internal<at::Half>(CUDABLAS_GEMM_ARGTYPES(at::Half));
+ template <>
+ void gemm_internal<at::BFloat16>(CUDABLAS_GEMM_ARGTYPES(at::BFloat16));
+ 
+-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
++#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
+ enum GEMMAndBiasActivationEpilogue {
+   None,
+   RELU,
+diff --git a/aten/src/ATen/cuda/CUDAContextLight.h b/aten/src/ATen/cuda/CUDAContextLight.h
+index 4ec35f59a21..e28dc42034f 100644
+--- a/aten/src/ATen/cuda/CUDAContextLight.h
++++ b/aten/src/ATen/cuda/CUDAContextLight.h
+@@ -9,7 +9,7 @@
+ 
+ // cublasLT was introduced in CUDA 10.1 but we enable only for 11.1 that also
+ // added bf16 support
+-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
++#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
+ #include <cublasLt.h>
+ #endif
+ 
+@@ -82,7 +82,7 @@ TORCH_CUDA_CPP_API c10::Allocator* getCUDADeviceAllocator();
+ /* Handles */
+ TORCH_CUDA_CPP_API cusparseHandle_t getCurrentCUDASparseHandle();
+ TORCH_CUDA_CPP_API cublasHandle_t getCurrentCUDABlasHandle();
+-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
++#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
+ TORCH_CUDA_CPP_API cublasLtHandle_t getCurrentCUDABlasLtHandle();
+ #endif
+ 
+diff --git a/aten/src/ATen/cuda/CublasHandlePool.cpp b/aten/src/ATen/cuda/CublasHandlePool.cpp
+index 6913d2cd95e..3d4276be372 100644
+--- a/aten/src/ATen/cuda/CublasHandlePool.cpp
++++ b/aten/src/ATen/cuda/CublasHandlePool.cpp
+@@ -29,7 +29,7 @@ namespace at::cuda {
+ 
+ namespace {
+ 
+-#if defined(USE_ROCM) && ROCM_VERSION >= 50700
++#if defined(USE_ROCM) && defined(HIPBLASLT)
+ void createCublasLtHandle(cublasLtHandle_t *handle) {
+   TORCH_CUDABLAS_CHECK(cublasLtCreate(handle));
+ }
+@@ -190,7 +190,7 @@ cublasHandle_t getCurrentCUDABlasHandle() {
+   return handle;
+ }
+ 
+-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
++#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
+ cublasLtHandle_t getCurrentCUDABlasLtHandle() {
+ #ifdef USE_ROCM
+   c10::DeviceIndex device = 0;
+diff --git a/aten/src/ATen/cuda/tunable/TunableGemm.h b/aten/src/ATen/cuda/tunable/TunableGemm.h
+index 3ba0d761277..dde1870cfbf 100644
+--- a/aten/src/ATen/cuda/tunable/TunableGemm.h
++++ b/aten/src/ATen/cuda/tunable/TunableGemm.h
+@@ -11,7 +11,7 @@
+ 
+ #include <ATen/cuda/tunable/GemmCommon.h>
+ #ifdef USE_ROCM
+-#if ROCM_VERSION >= 50700
++#ifdef HIPBLASLT
+ #include <ATen/cuda/tunable/GemmHipblaslt.h>
+ #endif
+ #include <ATen/cuda/tunable/GemmRocblas.h>
+@@ -166,7 +166,7 @@ class GemmTunableOp : public TunableOp<GemmParams<T>, StreamTimer> {
+     }
+ #endif
+ 
+-#if defined(USE_ROCM) && ROCM_VERSION >= 50700
++#if defined(USE_ROCM) && defined(HIPBLASLT)
+     static const char *env = std::getenv("PYTORCH_TUNABLEOP_HIPBLASLT_ENABLED");
+     if (env == nullptr || strcmp(env, "1") == 0) {
+       // disallow tuning of hipblaslt with c10::complex
+@@ -240,7 +240,7 @@ class GemmStridedBatchedTunableOp : public TunableOp<GemmStridedBatchedParams<T>
+     }
+ #endif
+ 
+-#if defined(USE_ROCM) && ROCM_VERSION >= 50700
++#if defined(USE_ROCM) && defined(HIPBLASLT)
+     static const char *env = std::getenv("PYTORCH_TUNABLEOP_HIPBLASLT_ENABLED");
+     if (env == nullptr || strcmp(env, "1") == 0) {
+       // disallow tuning of hipblaslt with c10::complex
+diff --git a/aten/src/ATen/native/cuda/Blas.cpp b/aten/src/ATen/native/cuda/Blas.cpp
+index 29e5c5e3cf1..df56f3d7f1d 100644
+--- a/aten/src/ATen/native/cuda/Blas.cpp
++++ b/aten/src/ATen/native/cuda/Blas.cpp
+@@ -155,7 +155,7 @@ enum class Activation {
+   GELU,
+ };
+ 
+-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
++#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
+ cuda::blas::GEMMAndBiasActivationEpilogue activation_to_gemm_and_blas_arg(Activation a) {
+   switch (a) {
+     case Activation::None:
+@@ -193,6 +193,7 @@ static bool getDisableAddmmCudaLt() {
+ 
+ #ifdef USE_ROCM
+ static bool isSupportedHipLtROCmArch(int index) {
++#if defined(HIPBLASLT)
+     hipDeviceProp_t* prop = at::cuda::getDeviceProperties(index);
+     std::string device_arch = prop->gcnArchName;
+     static const std::vector<std::string> archs = {"gfx90a", "gfx940", "gfx941", "gfx942"};
+@@ -203,6 +204,7 @@ static bool isSupportedHipLtROCmArch(int index) {
+         }
+     }
+     TORCH_CHECK(false, "Attempting to use hipBLASLt on a unsupported architecture!");
++#endif
+     return false;
+ }
+ #endif
+@@ -228,7 +230,7 @@ Tensor& addmm_out_cuda_impl(Tensor& result, const Tensor& self, const Tensor& ma
+   at::ScalarType scalar_type = self.scalar_type();
+   c10::MaybeOwned<Tensor> self_;
+   if (&result != &self) {
+-#if (defined(CUDA_VERSION) && CUDA_VERSION >= 11040 && !defined(_MSC_VER)) || defined(USE_ROCM) && ROCM_VERSION >= 50700
++#if (defined(CUDA_VERSION) && CUDA_VERSION >= 11040 && !defined(_MSC_VER)) || defined(USE_ROCM) && defined(HIPBLASLT)
+     // Strangely, if mat2 has only 1 row or column, we get
+     // CUBLAS_STATUS_INVALID_VALUE error from cublasLtMatmulAlgoGetHeuristic.
+     // self.dim() == 1 && result.dim() == 2 && self.sizes()[0] == mat2_sizes[1]
+@@ -271,7 +273,7 @@ Tensor& addmm_out_cuda_impl(Tensor& result, const Tensor& self, const Tensor& ma
+     }
+     self__sizes = self_->sizes();
+   } else {
+-#if defined(USE_ROCM) && ROCM_VERSION >= 50700
++#if defined(USE_ROCM) && defined(HIPBLASLT)
+     useLtInterface = !disable_addmm_cuda_lt &&
+         result.dim() == 2 && result.is_contiguous() &&
+         isSupportedHipLtROCmArch(self.device().index()) &&
+@@ -322,7 +324,7 @@ Tensor& addmm_out_cuda_impl(Tensor& result, const Tensor& self, const Tensor& ma
+ 
+   TORCH_INTERNAL_ASSERT_DEBUG_ONLY(!args.result->is_conj());
+ 
+-#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
++#if (!defined(USE_ROCM) && !defined(_MSC_VER)) || (defined(USE_ROCM) && defined(HIPBLASLT))
+   if (useLtInterface) {
+     AT_DISPATCH_FLOATING_TYPES_AND2(
+         at::ScalarType::Half,
+@@ -876,7 +878,7 @@ _scaled_mm_out_cuda(const Tensor& mat1, const Tensor& mat2,
+   at::native::resize_output(out, {mat1_sizes[0], mat2_sizes[1]});
+   at::native::resize_output(amax, {});
+ 
+-#if !defined(USE_ROCM) && !defined(_MSC_VER) || (defined(USE_ROCM) && ROCM_VERSION >= 60000)
++#if !defined(USE_ROCM) && !defined(_MSC_VER) || (defined(USE_ROCM) && defined(HIPBLASLT))
+   cublasCommonArgs args(mat1, mat2, out);
+   const auto out_dtype_ = args.result->scalar_type();
+   TORCH_CHECK(args.transa == 't' && args.transb == 'n', "Only multiplication of row-major and column-major matrices is supported by cuBLASLt");
+@@ -906,7 +908,7 @@ _scaled_mm_out_cuda(const Tensor& mat1, const Tensor& mat2,
+   TORCH_CHECK(false, "_scaled_mm_out_cuda is not compiled for this platform.");
+ #endif
+ 
+-#if defined(USE_ROCM) && ROCM_VERSION >= 60000
++#if defined(USE_ROCM) && defined(HIPBLASLT)
+   // rocm's hipblaslt does not yet support amax, so calculate separately
+   auto out_float32 = out.to(kFloat);
+   out_float32.abs_();
+diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
+index b7ffbeb07dc..2b6c3678984 100644
+--- a/cmake/Dependencies.cmake
++++ b/cmake/Dependencies.cmake
+@@ -1273,6 +1273,9 @@ if(USE_ROCM)
+     if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "6.0.0")
+       list(APPEND HIP_CXX_FLAGS -DHIPBLAS_V2)
+     endif()
++    if(hipblast_FOUND)
++      list(APPEND HIP_CXX_FLAGS -DHIPBLASLT)
++    endif()
+     if(HIPBLASLT_CUSTOM_DATA_TYPE)
+       list(APPEND HIP_CXX_FLAGS -DHIPBLASLT_CUSTOM_DATA_TYPE)
+     endif()
+diff --git a/cmake/public/LoadHIP.cmake b/cmake/public/LoadHIP.cmake
+index f6ca263c5e5..53eb0b63c1a 100644
+--- a/cmake/public/LoadHIP.cmake
++++ b/cmake/public/LoadHIP.cmake
+@@ -156,7 +156,7 @@ if(HIP_FOUND)
+   find_package_and_print_version(rocblas REQUIRED)
+   find_package_and_print_version(hipblas REQUIRED)
+   if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "5.7.0")
+-    find_package_and_print_version(hipblaslt REQUIRED)
++    find_package_and_print_version(hipblaslt)
+   endif()
+   find_package_and_print_version(miopen REQUIRED)
+   if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "4.1.0")
+@@ -191,7 +191,7 @@ if(HIP_FOUND)
+   # roctx is part of roctracer
+   find_library(ROCM_ROCTX_LIB roctx64 HINTS ${ROCM_PATH}/lib)
+ 
+-  if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "5.7.0")
++  if(hipblastlt_FOUND)
+     # check whether hipblaslt is using its own datatype
+     set(file "${PROJECT_BINARY_DIR}/hipblaslt_test_data_type.cc")
+     file(WRITE ${file} ""
+-- 
+2.43.2
+
diff --git a/0001-Prepare-pytorch-cmake-for-fedora.patch b/0001-Prepare-pytorch-cmake-for-fedora.patch
deleted file mode 100644
index cf1e843..0000000
--- a/0001-Prepare-pytorch-cmake-for-fedora.patch
+++ /dev/null
@@ -1,169 +0,0 @@
-From 24cf0294a67d89ad70367940eea872162b44482c Mon Sep 17 00:00:00 2001
-From: Tom Rix <trix@redhat.com>
-Date: Sat, 23 Sep 2023 10:18:52 -0700
-Subject: [PATCH] Prepare pytorch cmake for fedora
-
-Use the system fmt
-Remove foxi use
-Remove warnings/errors for clang 17
-fxdiv is not a library
-build type is RelWithDebInfo
-use system pthreadpool
-
-Signed-off-by: Tom Rix <trix@redhat.com>
----
- CMakeLists.txt                                   |  6 +++---
- .../native/quantized/cpu/qnnpack/CMakeLists.txt  |  3 ---
- c10/CMakeLists.txt                               |  2 +-
- caffe2/CMakeLists.txt                            |  6 +-----
- cmake/Dependencies.cmake                         | 16 +---------------
- test/cpp/tensorexpr/CMakeLists.txt               |  2 +-
- torch/CMakeLists.txt                             |  2 +-
- 7 files changed, 8 insertions(+), 29 deletions(-)
-
-diff --git a/CMakeLists.txt b/CMakeLists.txt
-index 3a48eaf4e2..902ee70fd1 100644
---- a/CMakeLists.txt
-+++ b/CMakeLists.txt
-@@ -682,7 +682,7 @@ set(CAFFE2_ALLOWLIST "" CACHE STRING "A allowlist file of files that one should
- # Set default build type
- if(NOT CMAKE_BUILD_TYPE)
-     message(STATUS "Build type not set - defaulting to Release")
--    set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build from: Debug Release RelWithDebInfo MinSizeRel Coverage." FORCE)
-+    set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING "Choose the type of build from: Debug Release RelWithDebInfo MinSizeRel Coverage." FORCE)
- endif()
- 
- # The below means we are cross compiling for arm64 or x86_64 on MacOSX
-@@ -917,8 +917,8 @@ if(NOT MSVC)
-   string(APPEND CMAKE_LINKER_FLAGS_DEBUG " -fno-omit-frame-pointer -O0")
-   append_cxx_flag_if_supported("-fno-math-errno" CMAKE_CXX_FLAGS)
-   append_cxx_flag_if_supported("-fno-trapping-math" CMAKE_CXX_FLAGS)
--  append_cxx_flag_if_supported("-Werror=format" CMAKE_CXX_FLAGS)
--  append_cxx_flag_if_supported("-Werror=cast-function-type" CMAKE_CXX_FLAGS)
-+#  append_cxx_flag_if_supported("-Werror=format" CMAKE_CXX_FLAGS)
-+#  append_cxx_flag_if_supported("-Werror=cast-function-type" CMAKE_CXX_FLAGS)
- else()
-   # skip unwanted includes from windows.h
-   add_compile_definitions(WIN32_LEAN_AND_MEAN)
-diff --git a/aten/src/ATen/native/quantized/cpu/qnnpack/CMakeLists.txt b/aten/src/ATen/native/quantized/cpu/qnnpack/CMakeLists.txt
-index fd6b7ff551..218c8e9b2a 100644
---- a/aten/src/ATen/native/quantized/cpu/qnnpack/CMakeLists.txt
-+++ b/aten/src/ATen/native/quantized/cpu/qnnpack/CMakeLists.txt
-@@ -393,10 +393,7 @@ elseif(NOT TARGET fxdiv AND USE_SYSTEM_FXDIV)
-   if(NOT FXDIV_HDR)
-     message(FATAL_ERROR "Cannot find fxdiv")
-   endif()
--  add_library(fxdiv STATIC "${FXDIV_HDR}")
--  set_property(TARGET fxdiv PROPERTY LINKER_LANGUAGE C)
- endif()
--target_link_libraries(pytorch_qnnpack PRIVATE fxdiv)
- 
- # ---[ Configure psimd
- if(NOT TARGET psimd AND NOT USE_SYSTEM_PSIMD)
-diff --git a/c10/CMakeLists.txt b/c10/CMakeLists.txt
-index feebad7cbb..7c029cd88d 100644
---- a/c10/CMakeLists.txt
-+++ b/c10/CMakeLists.txt
-@@ -87,7 +87,7 @@ endif()
- if(${USE_GLOG})
-     target_link_libraries(c10 PUBLIC glog::glog)
- endif()
--target_link_libraries(c10 PRIVATE fmt::fmt-header-only)
-+target_link_libraries(c10 PRIVATE fmt)
- 
- find_package(Backtrace)
- if(Backtrace_FOUND)
-diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
-index 74d0d55719..b975d388a7 100644
---- a/caffe2/CMakeLists.txt
-+++ b/caffe2/CMakeLists.txt
-@@ -107,7 +107,7 @@ endif()
- # Note: the folders that are being commented out have not been properly
- # addressed yet.
- 
--if(NOT MSVC AND USE_XNNPACK)
-+if(NOT MSVC AND USE_XNNPACK AND NOT USE_SYSTEM_FXDIV)
-   if(NOT TARGET fxdiv)
-     set(FXDIV_BUILD_TESTS OFF CACHE BOOL "")
-     set(FXDIV_BUILD_BENCHMARKS OFF CACHE BOOL "")
-@@ -1022,10 +1022,6 @@ elseif(USE_CUDA)
-   endif()
- endif()
- 
--if(NOT MSVC AND USE_XNNPACK)
--  TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv)
--endif()
--
- # ==========================================================
- # formerly-libtorch flags
- # ==========================================================
-diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
-index c3abce52e4..21b40f3a88 100644
---- a/cmake/Dependencies.cmake
-+++ b/cmake/Dependencies.cmake
-@@ -1555,7 +1555,6 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX)
-       set_target_properties(onnx_proto PROPERTIES CXX_STANDARD 17)
-     endif()
-   endif()
--  add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/foxi EXCLUDE_FROM_ALL)
- 
-   add_definitions(-DONNX_NAMESPACE=${ONNX_NAMESPACE})
-   if(NOT USE_SYSTEM_ONNX)
-@@ -1588,8 +1587,6 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX)
-     message("-- Found onnx: ${ONNX_LIBRARY} ${ONNX_PROTO_LIBRARY}")
-     list(APPEND Caffe2_DEPENDENCY_LIBS onnx_proto onnx)
-   endif()
--  include_directories(${FOXI_INCLUDE_DIRS})
--  list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader)
-   # Recover the build shared libs option.
-   set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS})
- endif()
-@@ -1834,18 +1831,7 @@ endif()
- #
- set(TEMP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS})
- set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libs" FORCE)
--add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)
--
--# Disable compiler feature checks for `fmt`.
--#
--# CMake compiles a little program to check compiler features. Some of our build
--# configurations (notably the mobile build analyzer) will populate
--# CMAKE_CXX_FLAGS in ways that break feature checks. Since we already know
--# `fmt` is compatible with a superset of the compilers that PyTorch is, it
--# shouldn't be too bad to just disable the checks.
--set_target_properties(fmt-header-only PROPERTIES INTERFACE_COMPILE_FEATURES "")
--
--list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only)
-+list(APPEND Caffe2_DEPENDENCY_LIBS fmt)
- set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS} CACHE BOOL "Build shared libs" FORCE)
- 
- # ---[ Kineto
-diff --git a/test/cpp/tensorexpr/CMakeLists.txt b/test/cpp/tensorexpr/CMakeLists.txt
-index 7dff70630d..90b1003591 100644
---- a/test/cpp/tensorexpr/CMakeLists.txt
-+++ b/test/cpp/tensorexpr/CMakeLists.txt
-@@ -54,7 +54,7 @@ target_include_directories(tutorial_tensorexpr PRIVATE ${ATen_CPU_INCLUDE})
- # pthreadpool header. For some build environment we need add the dependency
- # explicitly.
- if(USE_PTHREADPOOL)
--  target_link_libraries(test_tensorexpr PRIVATE pthreadpool_interface)
-+  target_link_libraries(test_tensorexpr PRIVATE pthreadpool)
- endif()
- if(USE_CUDA)
-   target_link_libraries(test_tensorexpr PRIVATE
-diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt
-index 62ee4c12a9..8d5375f320 100644
---- a/torch/CMakeLists.txt
-+++ b/torch/CMakeLists.txt
-@@ -84,7 +84,7 @@ set(TORCH_PYTHON_LINK_LIBRARIES
-     python::python
-     pybind::pybind11
-     shm
--    fmt::fmt-header-only
-+    fmt
-     ATEN_CPU_FILES_GEN_LIB)
- 
- if(USE_ASAN AND TARGET Sanitizer::address)
--- 
-2.42.1
-
diff --git a/0001-Reenable-dim-for-python-3.12.patch b/0001-Reenable-dim-for-python-3.12.patch
new file mode 100644
index 0000000..138b5d4
--- /dev/null
+++ b/0001-Reenable-dim-for-python-3.12.patch
@@ -0,0 +1,115 @@
+From ee3fb343a376cdba6f4ce188cac90023f13e2aea Mon Sep 17 00:00:00 2001
+From: Tom Rix <trix@redhat.com>
+Date: Thu, 4 Apr 2024 14:21:38 -0600
+Subject: [PATCH] Reenable dim for python 3.12
+
+In 3.12:
+
+_PyArg_Parser added an element to the start of the structure.
+So existing positional initialization is off.  Switch to element
+initialization.
+
+_Py_CODEUNIT changed to from an int to a union, but relevant_op
+is passed an int for the return of decoder.opcode, so the parameter
+type is wrong, switch it to int.
+
+The opcode PRECALL was removed, so reduce its handling to 3.11
+
+Signed-off-by: Tom Rix <trix@redhat.com>
+---
+ functorch/csrc/dim/dim.cpp     | 24 +++++-------------------
+ functorch/csrc/dim/minpybind.h |  4 ++--
+ 2 files changed, 7 insertions(+), 21 deletions(-)
+
+diff --git a/functorch/csrc/dim/dim.cpp b/functorch/csrc/dim/dim.cpp
+index 4cc027504c77..e48b0d58081f 100644
+--- a/functorch/csrc/dim/dim.cpp
++++ b/functorch/csrc/dim/dim.cpp
+@@ -6,20 +6,6 @@
+ 
+ #include <torch/csrc/utils/python_compat.h>
+ 
+-
+-// Many APIs have changed/don't exist anymore
+-#if IS_PYTHON_3_12_PLUS
+-
+-#include "dim.h"
+-
+-// Re-enable this some day
+-PyObject* Dim_init() {
+-    PyErr_SetString(PyExc_RuntimeError, "First class dim doesn't work with python 3.12");
+-    return nullptr;
+-}
+-
+-#else
+-
+ #include "minpybind.h"
+ #include <frameobject.h>
+ #include <opcode.h>
+@@ -441,7 +427,7 @@ static PyObject* DimList_bind(DimList *self,
+     PY_BEGIN
+     mpy::handle sizes;
+     static const char * const _keywords[] = {"sizes", nullptr};
+-    static _PyArg_Parser parser = {"O", _keywords, 0};
++    static _PyArg_Parser parser = { .format = "O", .keywords = _keywords};
+     if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &parser, &sizes)) {
+         return nullptr;
+     }
+@@ -465,7 +451,7 @@ static PyObject* DimList_bind_len(DimList *self,
+     PY_BEGIN
+     int size;
+     static const char * const _keywords[] = {"N", nullptr};
+-    static _PyArg_Parser parser = {"i", _keywords, 0};
++    static _PyArg_Parser parser = { .format = "i", .keywords = _keywords};
+     if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &parser, &size)) {
+         return nullptr;
+     }
+@@ -1468,7 +1454,7 @@ PyTypeObject Tensor::Type = {
+ 
+ // dim() --------------------
+ 
+-static bool relevant_op(_Py_CODEUNIT c) {
++static bool relevant_op(int c) {
+     switch(c) {
+         case STORE_NAME:
+         case STORE_GLOBAL:
+@@ -1587,7 +1573,7 @@ static PyObject* _dims(PyObject *self,
+     auto c = mpy::obj<PyCodeObject>::steal(PyFrame_GetCode(f.ptr()));
+     auto lasti = PyFrame_GetLasti(f.ptr());
+     auto decoder = PyInstDecoder(c.ptr(), lasti);
+-    #if IS_PYTHON_3_11_PLUS
++    #if IS_PYTHON_3_11
+     // When py3.11 adapts bytecode lasti points to the precall
+     // rather than the call instruction after it
+     if (decoder.opcode() == PRECALL) {
+@@ -3268,4 +3254,4 @@ PyObject* Dim_init() {
+     }
+ }
+ 
+-#endif
++
+diff --git a/functorch/csrc/dim/minpybind.h b/functorch/csrc/dim/minpybind.h
+index de82b5af95a4..d76d4828bf80 100644
+--- a/functorch/csrc/dim/minpybind.h
++++ b/functorch/csrc/dim/minpybind.h
+@@ -621,7 +621,7 @@ struct vector_args {
+             PyObject *dummy = NULL;
+             _PyArg_ParseStackAndKeywords((PyObject*const*)args, nargs, kwnames.ptr(), _parser, &dummy, &dummy, &dummy, &dummy, &dummy);
+ #else
+-            _PyArg_Parser* _parser = new _PyArg_Parser{NULL, &names_buf[0], fname_cstr, 0};
++            _PyArg_Parser* _parser = new _PyArg_Parser{ .keywords = &names_buf[0], .fname = fname_cstr};
+             std::unique_ptr<PyObject*[]> buf(new PyObject*[names.size()]);
+             _PyArg_UnpackKeywords((PyObject*const*)args, nargs, NULL, kwnames.ptr(), _parser, required, (Py_ssize_t)values.size() - kwonly, 0, &buf[0]);
+ #endif
+@@ -706,7 +706,7 @@ inline object handle::call_vector(vector_args args) {
+ #define MPY_PARSE_ARGS_KWNAMES(fmt, FORALL_ARGS) \
+     static const char * const kwlist[] = { FORALL_ARGS(MPY_ARGS_NAME) nullptr}; \
+     FORALL_ARGS(MPY_ARGS_DECLARE) \
+-    static _PyArg_Parser parser = {fmt, kwlist, 0}; \
++    static _PyArg_Parser parser = { .format = fmt, .keywords = kwlist}; \
+     if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &parser, FORALL_ARGS(MPY_ARGS_POINTER) nullptr)) { \
+         throw mpy::exception_set(); \
+     }
+-- 
+2.44.0
+
diff --git a/next/0001-Regenerate-flatbuffer-header.patch b/0001-Regenerate-flatbuffer-header.patch
similarity index 100%
rename from next/0001-Regenerate-flatbuffer-header.patch
rename to 0001-Regenerate-flatbuffer-header.patch
diff --git a/next/0001-Stub-in-kineto-ActivityType.patch b/0001-Stub-in-kineto-ActivityType.patch
similarity index 100%
rename from next/0001-Stub-in-kineto-ActivityType.patch
rename to 0001-Stub-in-kineto-ActivityType.patch
diff --git a/0001-add-rocm_version-fallback.patch b/0001-add-rocm_version-fallback.patch
deleted file mode 100644
index 25a0c67..0000000
--- a/0001-add-rocm_version-fallback.patch
+++ /dev/null
@@ -1,36 +0,0 @@
-From 1d35a0b1f5cb39fd0c44a486157dc739a02c71b6 Mon Sep 17 00:00:00 2001
-From: Tom Rix <trix@redhat.com>
-Date: Wed, 20 Dec 2023 11:23:18 -0500
-Subject: [PATCH] add rocm_version fallback
-
-Signed-off-by: Tom Rix <trix@redhat.com>
----
- torch/utils/hipify/cuda_to_hip_mappings.py | 12 ++++++++++++
- 1 file changed, 12 insertions(+)
-
-diff --git a/torch/utils/hipify/cuda_to_hip_mappings.py b/torch/utils/hipify/cuda_to_hip_mappings.py
-index 73586440e7..9354057a39 100644
---- a/torch/utils/hipify/cuda_to_hip_mappings.py
-+++ b/torch/utils/hipify/cuda_to_hip_mappings.py
-@@ -57,6 +57,18 @@ if os.path.isfile(rocm_version_h):
-         if match:
-             patch = int(match.group(1))
-     rocm_version = (major, minor, patch)
-+else:
-+    try:
-+        hip_version = subprocess.check_output(["hipconfig", "--version"]).decode("utf-8")
-+        hip_split = hip_version.split('.')
-+        rocm_version = (int(hip_split[0]), int(hip_split[1]), 0)
-+    except subprocess.CalledProcessError:
-+        print(f"Warning: hipconfig --version failed")
-+    except (FileNotFoundError, PermissionError, NotADirectoryError):
-+        # Do not print warning. This is okay. This file can also be imported for non-ROCm builds.
-+        pass
-+
-+
- 
- # List of math functions that should be replaced inside device code only.
- MATH_TRANSPILATIONS = collections.OrderedDict(
--- 
-2.43.0
-
diff --git a/next/0001-can-not-use-with-c-files.patch b/0001-can-not-use-with-c-files.patch
similarity index 100%
rename from next/0001-can-not-use-with-c-files.patch
rename to 0001-can-not-use-with-c-files.patch
diff --git a/next/0001-cuda-hip-signatures.patch b/0001-cuda-hip-signatures.patch
similarity index 100%
rename from next/0001-cuda-hip-signatures.patch
rename to 0001-cuda-hip-signatures.patch
diff --git a/0001-disable-as-needed-for-libtorch.patch b/0001-disable-as-needed-for-libtorch.patch
deleted file mode 100644
index 392140a..0000000
--- a/0001-disable-as-needed-for-libtorch.patch
+++ /dev/null
@@ -1,28 +0,0 @@
-From 06499575b177a218846f0e43ff4bc77d245f207f Mon Sep 17 00:00:00 2001
-From: Tom Rix <trix@redhat.com>
-Date: Fri, 1 Dec 2023 09:38:05 -0500
-Subject: [PATCH] disable as-needed for libtorch
-
-Signed-off-by: Tom Rix <trix@redhat.com>
----
- caffe2/CMakeLists.txt | 4 ++++
- 1 file changed, 4 insertions(+)
-
-diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
-index b975d388a7..5e9fd3b3f3 100644
---- a/caffe2/CMakeLists.txt
-+++ b/caffe2/CMakeLists.txt
-@@ -914,6 +914,10 @@ if(HAVE_SOVERSION)
-       VERSION ${TORCH_VERSION} SOVERSION ${TORCH_SOVERSION})
- endif()
- 
-+# Disable global as-needed
-+set_target_properties(torch PROPERTIES LINK_FLAGS -Wl,--no-as-needed)
-+
-+
- if(USE_ROCM)
-   filter_list(__caffe2_hip_srcs_cpp Caffe2_HIP_SRCS "\\.(cu|hip)$")
-   set_source_files_properties(${__caffe2_hip_srcs_cpp} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
--- 
-2.42.1
-
diff --git a/0001-disable-use-of-aotriton.patch b/0001-disable-use-of-aotriton.patch
new file mode 100644
index 0000000..34a1704
--- /dev/null
+++ b/0001-disable-use-of-aotriton.patch
@@ -0,0 +1,46 @@
+From 33d48f71db7530f00dbd8cff281b65aa8b355b2a Mon Sep 17 00:00:00 2001
+From: Tom Rix <trix@redhat.com>
+Date: Tue, 19 Mar 2024 11:32:37 -0400
+Subject: [PATCH] disable use of aotriton
+
+---
+ aten/src/ATen/native/transformers/cuda/sdp_utils.cpp | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp b/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp
+index 96b839820efd..2d3dd0cb4b0f 100644
+--- a/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp
++++ b/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp
+@@ -21,9 +21,11 @@
+ #include <cmath>
+ #include <functional>
+ 
++#ifdef USE_FLASH_ATTENTION
+ #if USE_ROCM
+ #include <aotriton/flash.h>
+ #endif
++#endif
+ 
+ /**
+ * Note [SDPA Runtime Dispatch]
+@@ -183,6 +185,7 @@ bool check_sm_version(cudaDeviceProp * dprops) {
+ }
+ 
+ bool check_flash_attention_hardware_support(sdp_params const& params, bool debug) {
++#ifdef USE_FLASH_ATTENTION
+   // Check that the gpu is capable of running flash attention
+   using sm80 = SMVersion<8, 0>;
+   using sm90 = SMVersion<9, 0>;
+@@ -211,6 +214,9 @@ bool check_flash_attention_hardware_support(sdp_params const& params, bool debug
+   }
+ #endif
+   return true;
++#else
++  return false;
++#endif
+ }
+ 
+ bool check_mem_efficient_hardware_support(sdp_params const& params, bool debug) {
+-- 
+2.44.0
+
diff --git a/0001-dynamo-3.12-enable-dynamo-on-3.12-enable-most-dynamo.patch b/0001-dynamo-3.12-enable-dynamo-on-3.12-enable-most-dynamo.patch
new file mode 100644
index 0000000..0ce5b1f
--- /dev/null
+++ b/0001-dynamo-3.12-enable-dynamo-on-3.12-enable-most-dynamo.patch
@@ -0,0 +1,226 @@
+From b9d45eb1cc90696a4de76676221219e24423c709 Mon Sep 17 00:00:00 2001
+From: William Wen <williamwen@meta.com>
+Date: Wed, 3 Apr 2024 17:58:46 -0700
+Subject: [PATCH] [dynamo, 3.12] enable dynamo on 3.12, enable most dynamo
+ unittests on 3.12 (#123216)
+
+Pull Request resolved: https://github.com/pytorch/pytorch/pull/123216
+Approved by: https://github.com/jansel, https://github.com/malfet
+---
+ test/dynamo/test_autograd_function.py   |  3 ++
+ test/dynamo/test_misc.py                | 63 +++++++++++++++++++++++++
+ test/functorch/test_eager_transforms.py |  7 ++-
+ test/run_test.py                        |  3 --
+ torch/__init__.py                       |  5 +-
+ torch/_dynamo/eval_frame.py             |  4 +-
+ torch/_dynamo/test_case.py              |  8 +---
+ 7 files changed, 74 insertions(+), 19 deletions(-)
+
+diff --git a/test/dynamo/test_autograd_function.py b/test/dynamo/test_autograd_function.py
+index d23fec607afa..bc5ebc767038 100644
+--- a/test/dynamo/test_autograd_function.py
++++ b/test/dynamo/test_autograd_function.py
+@@ -2,6 +2,8 @@
+ 
+ import copy
+ import math
++import sys
++import unittest
+ 
+ import torch
+ 
+@@ -528,6 +530,7 @@ class AutogradFunctionTests(torch._dynamo.test_case.TestCase):
+     # I pulled all of these test cases from test_autograd.py
+     # In the future, we should make the Dynamo test suite actually
+     # run on test_autograd.py (it's disabled right now) and delete these.
++    @unittest.skipIf(sys.version_info >= (3, 12), "invalid free in 3.12+")
+     def test_smoke_from_test_autograd(self):
+         class Func(torch.autograd.Function):
+             @staticmethod
+diff --git a/test/dynamo/test_misc.py b/test/dynamo/test_misc.py
+index a73de8b1c7e9..8f54e0564e6b 100644
+--- a/test/dynamo/test_misc.py
++++ b/test/dynamo/test_misc.py
+@@ -9760,6 +9760,69 @@ fn
+             lambda mod: mod,
+         )
+ 
++    @xfailIfPy311
++    def test_outside_linear_module_free(self):
++        # Compared to test_linear_module_free, the linear
++        # layer is not the code object that is directly compiled.
++        def model_inp_ctr():
++            fc = torch.nn.Linear(100, 100)
++
++            class Mod(torch.nn.Module):
++                def __init__(self):
++                    super().__init__()
++                    self.fc_ref = fc
++
++                def forward(self, x):
++                    return fc(x[0])
++
++            # return fc to keep it alive in _test_compile_model_free
++            return Mod(), (torch.randn(100, 100), fc)
++
++        self._test_compile_model_free(model_inp_ctr, lambda mod: mod.fc_ref)
++
++    @unittest.skipIf(sys.version_info >= (3, 12), "leaks in 3.12+")
++    def test_parameter_free(self):
++        def model_inp_ctr():
++            param = torch.nn.Parameter(torch.randn(100, 100))
++
++            class Mod(torch.nn.Module):
++                def __init__(self):
++                    super().__init__()
++                    self.param = param
++
++                def forward(self, x):
++                    return self.param * x[0]
++
++            # return param to keep it alive in _test_compile_model_free
++            return Mod(), (torch.randn(100, 100), param)
++
++        self._test_compile_model_free(model_inp_ctr, lambda mod: mod.param)
++
++    def test_raises_importerror1(self):
++        @torch.compile(backend="eager")
++        def fn(x):
++            try:
++                import some_module_that_surely_does_not_exist
++
++                return
++            except ImportError:
++                pass
++            return x.sin()
++
++        x = torch.randn(8)
++        self.assertEqual(fn(x), x.sin())
++
++    def test_raises_importerror2(self):
++        @torch.compile(backend="eager")
++        def fn(x):
++            import some_module_that_surely_does_not_exist
++
++            return x + 1
++
++        x = torch.randn(8)
++        with self.assertRaises(ImportError):
++            fn(x)
++
+     def test_dynamo_cache_move_to_front(self):
+         class Mod(torch.nn.Module):
+             def __init__(self):
+diff --git a/test/functorch/test_eager_transforms.py b/test/functorch/test_eager_transforms.py
+index 09415cf8f48e..60790ec06059 100644
+--- a/test/functorch/test_eager_transforms.py
++++ b/test/functorch/test_eager_transforms.py
+@@ -4762,8 +4762,7 @@ class TestCompileTransforms(TestCase):
+     # Triton only supports GPU with SM70 or later.
+     @expectedFailureIf((IS_ARM64 and not IS_MACOS) or
+                        IS_WINDOWS or
+-                       (TEST_CUDA and not SM70OrLater) or
+-                       (sys.version_info >= (3, 12)))
++                       (TEST_CUDA and not SM70OrLater))
+     def test_compile_vmap_hessian(self, device):
+         # The model and inputs are a smaller version
+         # of code at benchmark repo:
+@@ -4792,8 +4791,8 @@ class TestCompileTransforms(TestCase):
+         actual = opt_fn(params_and_buffers, x)
+         self.assertEqual(actual, expected)
+ 
+-    # torch.compile is not supported on Windows or on Python 3.12+
+-    @expectedFailureIf(IS_WINDOWS or (sys.version_info >= (3, 12)))
++    # torch.compile is not supported on Windows
++    @expectedFailureIf(IS_WINDOWS)
+     @torch._dynamo.config.patch(suppress_errors=False)
+     @torch._dynamo.config.patch(capture_func_transforms=True)
+     @skipIfTorchDynamo("Do not test torch.compile on top of torch.compile")
+diff --git a/test/run_test.py b/test/run_test.py
+index e86af9623042..ebb14df4167d 100755
+--- a/test/run_test.py
++++ b/test/run_test.py
+@@ -74,7 +74,6 @@ sys.path.remove(str(REPO_ROOT))
+ RERUN_DISABLED_TESTS = os.getenv("PYTORCH_TEST_RERUN_DISABLED_TESTS", "0") == "1"
+ DISTRIBUTED_TEST_PREFIX = "distributed"
+ INDUCTOR_TEST_PREFIX = "inductor"
+-DYNAMO_TEST_PREFIX = "dynamo"
+ 
+ 
+ # Note [ROCm parallel CI testing]
+@@ -324,7 +323,6 @@ JIT_EXECUTOR_TESTS = [
+ ]
+ 
+ INDUCTOR_TESTS = [test for test in TESTS if test.startswith(INDUCTOR_TEST_PREFIX)]
+-DYNAMO_TESTS = [test for test in TESTS if test.startswith(DYNAMO_TEST_PREFIX)]
+ DISTRIBUTED_TESTS = [test for test in TESTS if test.startswith(DISTRIBUTED_TEST_PREFIX)]
+ TORCH_EXPORT_TESTS = [test for test in TESTS if test.startswith("export")]
+ FUNCTORCH_TESTS = [test for test in TESTS if test.startswith("functorch")]
+@@ -1361,7 +1359,6 @@ def get_selected_tests(options) -> List[str]:
+     # these tests failing in Python 3.12 temporarily disabling
+     if sys.version_info >= (3, 12):
+         options.exclude.extend(INDUCTOR_TESTS)
+-        options.exclude.extend(DYNAMO_TESTS)
+         options.exclude.extend(
+             [
+                 "functorch/test_dims",
+diff --git a/torch/__init__.py b/torch/__init__.py
+index d381712b4a35..26cdffe81d29 100644
+--- a/torch/__init__.py
++++ b/torch/__init__.py
+@@ -1861,9 +1861,8 @@ def compile(model: Optional[Callable] = None, *,
+ 
+     """
+     _C._log_api_usage_once("torch.compile")
+-    # Temporary until we get proper support for python 3.12
+-    if sys.version_info >= (3, 12):
+-        raise RuntimeError("Dynamo is not supported on Python 3.12+")
++    if sys.version_info >= (3, 13):
++        raise RuntimeError("Dynamo is not supported on Python 3.13+")
+ 
+     # Decorator mode
+     if model is None:
+diff --git a/torch/_dynamo/eval_frame.py b/torch/_dynamo/eval_frame.py
+index 53ab0df3a947..0a80eeea99ed 100644
+--- a/torch/_dynamo/eval_frame.py
++++ b/torch/_dynamo/eval_frame.py
+@@ -589,8 +589,8 @@ class _NullDecorator(contextlib.nullcontext):  # type: ignore[type-arg]
+ 
+ 
+ def check_if_dynamo_supported():
+-    if sys.version_info >= (3, 12):
+-        raise RuntimeError("Python 3.12+ not yet supported for torch.compile")
++    if sys.version_info >= (3, 13):
++        raise RuntimeError("Python 3.13+ not yet supported for torch.compile")
+ 
+ 
+ def is_dynamo_supported():
+diff --git a/torch/_dynamo/test_case.py b/torch/_dynamo/test_case.py
+index e3cbef09eaae..297ea6e2bc2a 100644
+--- a/torch/_dynamo/test_case.py
++++ b/torch/_dynamo/test_case.py
+@@ -1,7 +1,6 @@
+ import contextlib
+ import importlib
+ import logging
+-import sys
+ 
+ import torch
+ import torch.testing
+@@ -20,12 +19,7 @@ log = logging.getLogger(__name__)
+ def run_tests(needs=()):
+     from torch.testing._internal.common_utils import run_tests
+ 
+-    if (
+-        TEST_WITH_TORCHDYNAMO
+-        or IS_WINDOWS
+-        or TEST_WITH_CROSSREF
+-        or sys.version_info >= (3, 12)
+-    ):
++    if TEST_WITH_TORCHDYNAMO or IS_WINDOWS or TEST_WITH_CROSSREF:
+         return  # skip testing
+ 
+     if isinstance(needs, str):
+-- 
+2.44.0
+
diff --git a/next/0001-no-third_party-FXdiv.patch b/0001-no-third_party-FXdiv.patch
similarity index 100%
rename from next/0001-no-third_party-FXdiv.patch
rename to 0001-no-third_party-FXdiv.patch
diff --git a/next/0001-no-third_party-fmt.patch b/0001-no-third_party-fmt.patch
similarity index 83%
rename from next/0001-no-third_party-fmt.patch
rename to 0001-no-third_party-fmt.patch
index f2a82f8..6e82af2 100644
--- a/next/0001-no-third_party-fmt.patch
+++ b/0001-no-third_party-fmt.patch
@@ -1,4 +1,4 @@
-From c46146dc31ed3dc0ebb6ca28c01330db8ba5d4f2 Mon Sep 17 00:00:00 2001
+From 2ce255b75760a0a513fb1706629b416f76a5c822 Mon Sep 17 00:00:00 2001
 From: Tom Rix <trix@redhat.com>
 Date: Sat, 3 Feb 2024 08:16:04 -0500
 Subject: [PATCH] no third_party fmt
@@ -10,23 +10,23 @@ Subject: [PATCH] no third_party fmt
  3 files changed, 5 insertions(+), 5 deletions(-)
 
 diff --git a/c10/CMakeLists.txt b/c10/CMakeLists.txt
-index cb81556ff2..7529b2aec9 100644
+index 1f742f4c176..4fa08913bdd 100644
 --- a/c10/CMakeLists.txt
 +++ b/c10/CMakeLists.txt
 @@ -87,7 +87,7 @@ endif()
  if(C10_USE_GLOG)
-     target_link_libraries(c10 PUBLIC glog::glog)
+   target_link_libraries(c10 PUBLIC glog::glog)
  endif()
 -target_link_libraries(c10 PRIVATE fmt::fmt-header-only)
 +target_link_libraries(c10 PRIVATE fmt)
  
  if(C10_USE_NUMA)
-   target_include_directories(c10 PRIVATE ${Numa_INCLUDE_DIR})
+   message(STATUS "NUMA paths:")
 diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
-index 8310f29e01..c99d0d762a 100644
+index 6f5a2d5feff..42fbf80f6e8 100644
 --- a/cmake/Dependencies.cmake
 +++ b/cmake/Dependencies.cmake
-@@ -1834,7 +1834,7 @@ endif()
+@@ -1837,7 +1837,7 @@ endif()
  #
  set(TEMP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS})
  set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libs" FORCE)
@@ -35,7 +35,7 @@ index 8310f29e01..c99d0d762a 100644
  
  # Disable compiler feature checks for `fmt`.
  #
-@@ -1843,9 +1843,9 @@ add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)
+@@ -1846,9 +1846,9 @@ add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)
  # CMAKE_CXX_FLAGS in ways that break feature checks. Since we already know
  # `fmt` is compatible with a superset of the compilers that PyTorch is, it
  # shouldn't be too bad to just disable the checks.
@@ -48,7 +48,7 @@ index 8310f29e01..c99d0d762a 100644
  
  # ---[ Kineto
 diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt
-index 24903a207e..3a7751dc00 100644
+index 97a72eed55b..9e5014d1980 100644
 --- a/torch/CMakeLists.txt
 +++ b/torch/CMakeLists.txt
 @@ -80,7 +80,7 @@ set(TORCH_PYTHON_LINK_LIBRARIES
@@ -61,5 +61,5 @@ index 24903a207e..3a7751dc00 100644
  
  if(USE_ASAN AND TARGET Sanitizer::address)
 -- 
-2.43.0
+2.43.2
 
diff --git a/next/0001-no-third_party-foxi.patch b/0001-no-third_party-foxi.patch
similarity index 100%
rename from next/0001-no-third_party-foxi.patch
rename to 0001-no-third_party-foxi.patch
diff --git a/0001-python-torch-link-with-python.patch b/0001-python-torch-link-with-python.patch
deleted file mode 100644
index d52f034..0000000
--- a/0001-python-torch-link-with-python.patch
+++ /dev/null
@@ -1,27 +0,0 @@
-From cef92207b79ad53e3fcc1b0e22ba91cb9422968c Mon Sep 17 00:00:00 2001
-From: Tom Rix <trix@redhat.com>
-Date: Sat, 18 Nov 2023 09:38:52 -0500
-Subject: [PATCH] python-torch link with python
-
-Signed-off-by: Tom Rix <trix@redhat.com>
----
- torch/CMakeLists.txt | 3 +++
- 1 file changed, 3 insertions(+)
-
-diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt
-index 8d5375f320..6f8c7b65c4 100644
---- a/torch/CMakeLists.txt
-+++ b/torch/CMakeLists.txt
-@@ -312,6 +312,9 @@ add_dependencies(torch_python torch_python_stubs)
- add_dependencies(torch_python flatbuffers)
- 
- 
-+# Unresolved syms in -lpython
-+target_link_libraries(torch_python PUBLIC ${PYTHON_LIBRARIES})
-+
- if(USE_PRECOMPILED_HEADERS)
-   target_precompile_headers(torch_python PRIVATE
-       "$<$<COMPILE_LANGUAGE:CXX>:ATen/ATen.h>")
--- 
-2.42.1
-
diff --git a/0001-python-torch-remove-ubuntu-specific-linking.patch b/0001-python-torch-remove-ubuntu-specific-linking.patch
deleted file mode 100644
index 6d9bd24..0000000
--- a/0001-python-torch-remove-ubuntu-specific-linking.patch
+++ /dev/null
@@ -1,33 +0,0 @@
-From f70ef37d0b3c780fd17be199e66a81ffa679f93e Mon Sep 17 00:00:00 2001
-From: Tom Rix <trix@redhat.com>
-Date: Sat, 18 Nov 2023 12:05:43 -0500
-Subject: [PATCH] python-torch remove ubuntu specific linking
-
-Signed-off-by: Tom Rix <trix@redhat.com>
----
- CMakeLists.txt | 9 ++++++---
- 1 file changed, 6 insertions(+), 3 deletions(-)
-
-diff --git a/CMakeLists.txt b/CMakeLists.txt
-index 86c34984b2..f7c4a7b05f 100644
---- a/CMakeLists.txt
-+++ b/CMakeLists.txt
-@@ -479,9 +479,12 @@ option(BUILD_EXECUTORCH "Master flag to build Executorch" ON)
- # This is a fix for a rare build issue on Ubuntu:
- # symbol lookup error: miniconda3/envs/pytorch-py3.7/lib/libmkl_intel_lp64.so: undefined symbol: mkl_blas_dsyrk
- # https://software.intel.com/en-us/articles/symbol-lookup-error-when-linking-intel-mkl-with-gcc-on-ubuntu
--if(LINUX)
--  set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-as-needed")
--endif()
-+
-+# This is not ubuntu!
-+# if(LINUX)
-+#  set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-as-needed")
-+# endif()
-+set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--as-needed")
- 
- if(MSVC)
-   set(CMAKE_NINJA_CMCLDEPS_RC OFF)
--- 
-2.42.1
-
diff --git a/0001-pytorch-use-SO-version-by-default.patch b/0001-pytorch-use-SO-version-by-default.patch
deleted file mode 100644
index b70d6ec..0000000
--- a/0001-pytorch-use-SO-version-by-default.patch
+++ /dev/null
@@ -1,26 +0,0 @@
-From 527d1ce24a06a14788ca5fc2411985d7c1cb2923 Mon Sep 17 00:00:00 2001
-From: Tom Rix <trix@redhat.com>
-Date: Fri, 13 Oct 2023 05:35:19 -0700
-Subject: [PATCH] pytorch use SO version by default
-
-Signed-off-by: Tom Rix <trix@redhat.com>
----
- CMakeLists.txt | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/CMakeLists.txt b/CMakeLists.txt
-index 902ee70fd1..86c34984b2 100644
---- a/CMakeLists.txt
-+++ b/CMakeLists.txt
-@@ -340,7 +340,7 @@ option(USE_TBB "Use TBB (Deprecated)" OFF)
- cmake_dependent_option(
-     USE_SYSTEM_TBB "Use system-provided Intel TBB." OFF "USE_TBB" OFF)
- option(ONNX_ML "Enable traditional ONNX ML API." ON)
--option(HAVE_SOVERSION "Whether to add SOVERSION to the shared objects" OFF)
-+option(HAVE_SOVERSION "Whether to add SOVERSION to the shared objects" ON)
- option(BUILD_LIBTORCH_CPU_WITH_DEBUG "Enable RelWithDebInfo for libtorch_cpu target only" OFF)
- cmake_dependent_option(USE_CCACHE "Attempt using CCache to wrap the compilation" ON "UNIX" OFF)
- option(WERROR "Build with -Werror supported by the compiler" OFF)
--- 
-2.42.1
-
diff --git a/0001-reenable-foxi-linking.patch b/0001-reenable-foxi-linking.patch
new file mode 100644
index 0000000..8e39795
--- /dev/null
+++ b/0001-reenable-foxi-linking.patch
@@ -0,0 +1,25 @@
+From 58ccda271e8f51c3fa5b7518cf6ee52ce204fd37 Mon Sep 17 00:00:00 2001
+From: Tom Rix <trix@redhat.com>
+Date: Thu, 22 Feb 2024 09:28:11 -0500
+Subject: [PATCH] reenable foxi linking
+
+---
+ cmake/Dependencies.cmake | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
+index 42fbf80f6e8..bc3a2dc6fee 100644
+--- a/cmake/Dependencies.cmake
++++ b/cmake/Dependencies.cmake
+@@ -1604,7 +1604,7 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX)
+     list(APPEND Caffe2_DEPENDENCY_LIBS onnx_proto onnx)
+   endif()
+ #  include_directories(${FOXI_INCLUDE_DIRS})
+-#  list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader)
++  list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader)
+   # Recover the build shared libs option.
+   set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS})
+ endif()
+-- 
+2.43.2
+
diff --git a/next/0001-silence-an-assert.patch b/0001-silence-an-assert.patch
similarity index 100%
rename from next/0001-silence-an-assert.patch
rename to 0001-silence-an-assert.patch
diff --git a/0001-torch-sane-version.patch b/0001-torch-sane-version.patch
deleted file mode 100644
index cb1211a..0000000
--- a/0001-torch-sane-version.patch
+++ /dev/null
@@ -1,32 +0,0 @@
-From c47c6e202d60ccac15aa36698bd4788415a9416b Mon Sep 17 00:00:00 2001
-From: Tom Rix <trix@redhat.com>
-Date: Sat, 25 Nov 2023 16:46:17 -0500
-Subject: [PATCH] torch sane version
-
----
- tools/generate_torch_version.py | 1 +
- version.txt                     | 2 +-
- 2 files changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/tools/generate_torch_version.py b/tools/generate_torch_version.py
-index d90d3646ab..11d5bbeba5 100644
---- a/tools/generate_torch_version.py
-+++ b/tools/generate_torch_version.py
-@@ -42,6 +42,7 @@ def get_tag(pytorch_root: Union[str, Path]) -> str:
- def get_torch_version(sha: Optional[str] = None) -> str:
-     pytorch_root = Path(__file__).parent.parent
-     version = open(pytorch_root / "version.txt").read().strip()
-+    return version
- 
-     if os.getenv("PYTORCH_BUILD_VERSION"):
-         assert os.getenv("PYTORCH_BUILD_NUMBER") is not None
-diff --git a/version.txt b/version.txt
-index ecaf4eea7c..7ec1d6db40 100644
---- a/version.txt
-+++ b/version.txt
-@@ -1,1 +1,1 @@
--2.1.0a0
-+2.1.2
--- 
-2.42.1
-
diff --git a/next/0001-use-any-hip.patch b/0001-use-any-hip.patch
similarity index 100%
rename from next/0001-use-any-hip.patch
rename to 0001-use-any-hip.patch
diff --git a/0002-Regenerate-flatbuffer-header.patch b/0002-Regenerate-flatbuffer-header.patch
deleted file mode 100644
index 662a7c5..0000000
--- a/0002-Regenerate-flatbuffer-header.patch
+++ /dev/null
@@ -1,39 +0,0 @@
-From 587a8b10bd3f7a68275356ee6eb6bb43ed711ba2 Mon Sep 17 00:00:00 2001
-From: Tom Rix <trix@redhat.com>
-Date: Fri, 29 Sep 2023 06:19:29 -0700
-Subject: [PATCH 2/6] Regenerate flatbuffer header
-
-For this error
-torch/csrc/jit/serialization/mobile_bytecode_generated.h:12:41:
-error: static assertion failed: Non-compatible flatbuffers version included
-   12 |               FLATBUFFERS_VERSION_MINOR == 3 &&
-
-PyTorch is expecting 23.3.3, what f38 has
-Rawhide is at 23.5.26
-
-Regenerate with
-flatc --cpp --gen-mutable --no-prefix --scoped-enums mobile_bytecode.fbs
-
-Signed-off-by: Tom Rix <trix@redhat.com>
----
- torch/csrc/jit/serialization/mobile_bytecode_generated.h | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/torch/csrc/jit/serialization/mobile_bytecode_generated.h b/torch/csrc/jit/serialization/mobile_bytecode_generated.h
-index cffe8bc7a6..83575e4c19 100644
---- a/torch/csrc/jit/serialization/mobile_bytecode_generated.h
-+++ b/torch/csrc/jit/serialization/mobile_bytecode_generated.h
-@@ -9,8 +9,8 @@
- // Ensure the included flatbuffers.h is the same version as when this file was
- // generated, otherwise it may not be compatible.
- static_assert(FLATBUFFERS_VERSION_MAJOR == 23 &&
--              FLATBUFFERS_VERSION_MINOR == 3 &&
--              FLATBUFFERS_VERSION_REVISION == 3,
-+              FLATBUFFERS_VERSION_MINOR == 5 &&
-+              FLATBUFFERS_VERSION_REVISION == 26,
-              "Non-compatible flatbuffers version included");
- 
- namespace torch {
--- 
-2.41.0
-
diff --git a/0003-Stub-in-kineto-ActivityType.patch b/0003-Stub-in-kineto-ActivityType.patch
deleted file mode 100644
index 0823acb..0000000
--- a/0003-Stub-in-kineto-ActivityType.patch
+++ /dev/null
@@ -1,73 +0,0 @@
-From bb52aeacc6dfab2355249b7b5beb72c2761ec319 Mon Sep 17 00:00:00 2001
-From: Tom Rix <trix@redhat.com>
-Date: Fri, 29 Sep 2023 06:25:23 -0700
-Subject: [PATCH 3/6] Stub in kineto ActivityType
-
-There is an error with kineto is not used, the shim still
-requires the ActivityTYpe.h header to get the enum Activity type.
-So cut-n-paste just enough of the header in to do this.
-
-Signed-off-by: Tom Rix <trix@redhat.com>
----
- torch/csrc/profiler/kineto_shim.h | 44 +++++++++++++++++++++++++++++++
- 1 file changed, 44 insertions(+)
-
-diff --git a/torch/csrc/profiler/kineto_shim.h b/torch/csrc/profiler/kineto_shim.h
-index 2a410719a1..7d6525befd 100644
---- a/torch/csrc/profiler/kineto_shim.h
-+++ b/torch/csrc/profiler/kineto_shim.h
-@@ -12,7 +12,51 @@
- #undef USE_KINETO
- #endif
- 
-+#ifdef USE_KINETO
- #include <ActivityType.h>
-+#else
-+namespace libkineto {
-+// copied from header
-+/*
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-+ * All rights reserved.
-+ *
-+ * This source code is licensed under the BSD-style license found in the
-+ * LICENSE file in the root directory of this source tree.
-+ */
-+
-+// Note : All activity types are not enabled by default. Please add them
-+// at correct position in the enum
-+enum class ActivityType {
-+    // Activity types enabled by default
-+    CPU_OP = 0, // cpu side ops
-+    USER_ANNOTATION,
-+    GPU_USER_ANNOTATION,
-+    GPU_MEMCPY,
-+    GPU_MEMSET,
-+    CONCURRENT_KERNEL, // on-device kernels
-+    EXTERNAL_CORRELATION,
-+    CUDA_RUNTIME, // host side cuda runtime events
-+    CUDA_DRIVER, // host side cuda driver events
-+    CPU_INSTANT_EVENT, // host side point-like events
-+    PYTHON_FUNCTION,
-+    OVERHEAD, // CUPTI induced overhead events sampled from its overhead API.
-+
-+    // Optional Activity types
-+    CUDA_SYNC, // synchronization events between runtime and kernels
-+    GLOW_RUNTIME, // host side glow runtime events
-+    MTIA_RUNTIME, // host side MTIA runtime events
-+    CUDA_PROFILER_RANGE, // CUPTI Profiler range for performance metrics
-+    MTIA_CCP_EVENTS, // MTIA ondevice CCP events
-+    HPU_OP, // HPU host side runtime event
-+    XPU_RUNTIME, // host side xpu runtime events
-+
-+    ENUM_COUNT, // This is to add buffer and not used for any profiling logic. Add your new type before it.
-+    OPTIONAL_ACTIVITY_TYPE_START = CUDA_SYNC,
-+};
-+}
-+
-+#endif
- 
- #include <torch/csrc/Export.h>
- #include <torch/csrc/profiler/api.h>
--- 
-2.41.0
-
diff --git a/0004-torch-python-3.12-changes.patch b/0004-torch-python-3.12-changes.patch
deleted file mode 100644
index bdcec7f..0000000
--- a/0004-torch-python-3.12-changes.patch
+++ /dev/null
@@ -1,45 +0,0 @@
-From fcf3cd70229cdc729d05ddab081ac886c9db6bd7 Mon Sep 17 00:00:00 2001
-From: Tom Rix <trix@redhat.com>
-Date: Fri, 29 Sep 2023 13:58:28 -0700
-Subject: [PATCH] torch python 3.12 changes
-
-Signed-off-by: Tom Rix <trix@redhat.com>
----
- functorch/csrc/dim/dim.cpp       | 6 ++++++
- torch/csrc/dynamo/cpython_defs.h | 2 +-
- 2 files changed, 7 insertions(+), 1 deletion(-)
-
-diff --git a/functorch/csrc/dim/dim.cpp b/functorch/csrc/dim/dim.cpp
-index b611dc3e8c2..c7009478aee 100644
---- a/functorch/csrc/dim/dim.cpp
-+++ b/functorch/csrc/dim/dim.cpp
-@@ -10,7 +10,13 @@
- // Many APIs have changed/don't exist anymore
- #if IS_PYTHON_3_12_PLUS
- 
-+#include "dim.h"
-+
- // Re-enable this some day
-+PyObject* Dim_init() {
-+    PyErr_SetString(PyExc_RuntimeError, "First class dim doesn't work with python 3.12");
-+    return nullptr;
-+}
- 
- #else
- 
-diff --git a/torch/csrc/dynamo/cpython_defs.h b/torch/csrc/dynamo/cpython_defs.h
-index f0a0e1a88e2..f58becd246e 100644
---- a/torch/csrc/dynamo/cpython_defs.h
-+++ b/torch/csrc/dynamo/cpython_defs.h
-@@ -6,7 +6,7 @@
- // should go in cpython_defs.c. Copying is required when, e.g.,
- // we need to call internal CPython functions that are not exposed.
- 
--#if IS_PYTHON_3_11_PLUS && !(IS_PYTHON_3_12_PLUS)
-+#if IS_PYTHON_3_11_PLUS 
- 
- #include <internal/pycore_frame.h>
- 
--- 
-2.43.0
-
diff --git a/0005-disable-submodule-search.patch b/0005-disable-submodule-search.patch
deleted file mode 100644
index 1b5509f..0000000
--- a/0005-disable-submodule-search.patch
+++ /dev/null
@@ -1,25 +0,0 @@
-From fc0d4ce06fecbd2bcd10fb13c515dc6625260870 Mon Sep 17 00:00:00 2001
-From: Tom Rix <trix@redhat.com>
-Date: Fri, 29 Sep 2023 17:21:13 -0700
-Subject: [PATCH 5/6] disable submodule search
-
----
- setup.py | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/setup.py b/setup.py
-index 17bf16b89a..b8c8ae5506 100644
---- a/setup.py
-+++ b/setup.py
-@@ -452,7 +452,7 @@ def mirror_files_into_torchgen():
- def build_deps():
-     report("-- Building version " + version)
- 
--    check_submodules()
-+    # check_submodules()
-     check_pydep("yaml", "pyyaml")
- 
-     build_caffe2(
--- 
-2.41.0
-
diff --git a/README.NVIDIA b/README.NVIDIA
new file mode 100644
index 0000000..b927f47
--- /dev/null
+++ b/README.NVIDIA
@@ -0,0 +1,15 @@
+Some help for building this package for NVIDIA/CUDA
+
+Review NVIDIA's documenation
+https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html
+
+Review PyTorch documentation
+https://github.com/pytorch/pytorch#from-source
+
+Some convience strings to cut-n-paste
+
+F39
+dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/fedora39/x86_64/cuda-fedora39.repo
+
+Building is local.
+Build machine has a supported GPU, the drivers are loaded and CUDA SDK is installed.
diff --git a/license.txt b/license.txt
new file mode 100644
index 0000000..0503793
--- /dev/null
+++ b/license.txt
@@ -0,0 +1,350 @@
+#
+# License Details
+# Main license BSD 3-Clause
+#
+# Apache-2.0
+# android/libs/fbjni/LICENSE
+# android/libs/fbjni/CMakeLists.txt
+# android/libs/fbjni/build.gradle
+# android/libs/fbjni/cxx/fbjni/ByteBuffer.cpp
+# android/libs/fbjni/cxx/fbjni/ByteBuffer.h
+# android/libs/fbjni/cxx/fbjni/Context.h
+# android/libs/fbjni/cxx/fbjni/File.h
+# android/libs/fbjni/cxx/fbjni/JThread.h
+# android/libs/fbjni/cxx/fbjni/NativeRunnable.h
+# android/libs/fbjni/cxx/fbjni/OnLoad.cpp
+# android/libs/fbjni/cxx/fbjni/ReadableByteChannel.cpp
+# android/libs/fbjni/cxx/fbjni/ReadableByteChannel.h
+# android/libs/fbjni/cxx/fbjni/detail/Boxed.h
+# android/libs/fbjni/cxx/fbjni/detail/Common.h
+# android/libs/fbjni/cxx/fbjni/detail/CoreClasses-inl.h
+# android/libs/fbjni/cxx/fbjni/detail/CoreClasses.h
+# android/libs/fbjni/cxx/fbjni/detail/Environment.cpp
+# android/libs/fbjni/cxx/fbjni/detail/Environment.h
+# android/libs/fbjni/cxx/fbjni/detail/Exceptions.cpp
+# android/libs/fbjni/cxx/fbjni/detail/Exceptions.h
+# android/libs/fbjni/cxx/fbjni/detail/FbjniApi.h
+# android/libs/fbjni/cxx/fbjni/detail/Hybrid.cpp
+# android/libs/fbjni/cxx/fbjni/detail/Hybrid.h
+# android/libs/fbjni/cxx/fbjni/detail/Iterator-inl.h
+# android/libs/fbjni/cxx/fbjni/detail/Iterator.h
+# android/libs/fbjni/cxx/fbjni/detail/JWeakReference.h
+# android/libs/fbjni/cxx/fbjni/detail/Log.h
+# android/libs/fbjni/cxx/fbjni/detail/Meta-forward.h
+# android/libs/fbjni/cxx/fbjni/detail/Meta-inl.h
+# android/libs/fbjni/cxx/fbjni/detail/Meta.cpp
+# android/libs/fbjni/cxx/fbjni/detail/Meta.h
+# android/libs/fbjni/cxx/fbjni/detail/MetaConvert.h
+# android/libs/fbjni/cxx/fbjni/detail/ReferenceAllocators-inl.h
+# android/libs/fbjni/cxx/fbjni/detail/ReferenceAllocators.h
+# android/libs/fbjni/cxx/fbjni/detail/References-forward.h
+# android/libs/fbjni/cxx/fbjni/detail/References-inl.h
+# android/libs/fbjni/cxx/fbjni/detail/References.cpp
+# android/libs/fbjni/cxx/fbjni/detail/References.h
+# android/libs/fbjni/cxx/fbjni/detail/Registration-inl.h
+# android/libs/fbjni/cxx/fbjni/detail/Registration.h
+# android/libs/fbjni/cxx/fbjni/detail/SimpleFixedString.h
+# android/libs/fbjni/cxx/fbjni/detail/TypeTraits.h
+# android/libs/fbjni/cxx/fbjni/detail/utf8.cpp
+# android/libs/fbjni/cxx/fbjni/detail/utf8.h
+# android/libs/fbjni/cxx/fbjni/fbjni.cpp
+# android/libs/fbjni/cxx/fbjni/fbjni.h
+# android/libs/fbjni/cxx/lyra/cxa_throw.cpp
+# android/libs/fbjni/cxx/lyra/lyra.cpp
+# android/libs/fbjni/cxx/lyra/lyra.h
+# android/libs/fbjni/cxx/lyra/lyra_breakpad.cpp
+# android/libs/fbjni/cxx/lyra/lyra_exceptions.cpp
+# android/libs/fbjni/cxx/lyra/lyra_exceptions.h
+# android/libs/fbjni/gradle.properties
+# android/libs/fbjni/gradle/android-tasks.gradle
+# android/libs/fbjni/gradle/release.gradle
+# android/libs/fbjni/gradlew
+# android/libs/fbjni/gradlew.bat
+# android/libs/fbjni/host.gradle
+# android/libs/fbjni/java/com/facebook/jni/CppException.java
+# android/libs/fbjni/java/com/facebook/jni/CppSystemErrorException.java
+# android/libs/fbjni/java/com/facebook/jni/DestructorThread.java
+# android/libs/fbjni/java/com/facebook/jni/HybridClassBase.java
+# android/libs/fbjni/java/com/facebook/jni/HybridData.java
+# android/libs/fbjni/java/com/facebook/jni/IteratorHelper.java
+# android/libs/fbjni/java/com/facebook/jni/MapIteratorHelper.java
+# android/libs/fbjni/java/com/facebook/jni/NativeRunnable.java
+# android/libs/fbjni/java/com/facebook/jni/ThreadScopeSupport.java
+# android/libs/fbjni/java/com/facebook/jni/UnknownCppException.java
+# android/libs/fbjni/java/com/facebook/jni/annotations/DoNotStrip.java
+# android/libs/fbjni/scripts/android-setup.sh
+# android/libs/fbjni/scripts/run-host-tests.sh
+# android/libs/fbjni/settings.gradle
+# android/libs/fbjni/test/BaseFBJniTests.java
+# android/libs/fbjni/test/ByteBufferTests.java
+# android/libs/fbjni/test/DocTests.java
+# android/libs/fbjni/test/FBJniTests.java
+# android/libs/fbjni/test/HybridTests.java
+# android/libs/fbjni/test/IteratorTests.java
+# android/libs/fbjni/test/PrimitiveArrayTests.java
+# android/libs/fbjni/test/ReadableByteChannelTests.java
+# android/libs/fbjni/test/jni/CMakeLists.txt
+# android/libs/fbjni/test/jni/byte_buffer_tests.cpp
+# android/libs/fbjni/test/jni/doc_tests.cpp
+# android/libs/fbjni/test/jni/expect.h
+# android/libs/fbjni/test/jni/fbjni_onload.cpp
+# android/libs/fbjni/test/jni/fbjni_tests.cpp
+# android/libs/fbjni/test/jni/hybrid_tests.cpp
+# android/libs/fbjni/test/jni/inter_dso_exception_test_1/Test.cpp
+# android/libs/fbjni/test/jni/inter_dso_exception_test_1/Test.h
+# android/libs/fbjni/test/jni/inter_dso_exception_test_2/Test.cpp
+# android/libs/fbjni/test/jni/inter_dso_exception_test_2/Test.h
+# android/libs/fbjni/test/jni/iterator_tests.cpp
+# android/libs/fbjni/test/jni/modified_utf8_test.cpp
+# android/libs/fbjni/test/jni/no_rtti.cpp
+# android/libs/fbjni/test/jni/no_rtti.h
+# android/libs/fbjni/test/jni/primitive_array_tests.cpp
+# android/libs/fbjni/test/jni/readable_byte_channel_tests.cpp
+# android/libs/fbjni/test/jni/simple_fixed_string_tests.cpp
+# android/libs/fbjni/test/jni/utf16toUTF8_test.cpp
+# android/pytorch_android/host/build.gradle
+# aten/src/ATen/cuda/llvm_basic.cpp
+# aten/src/ATen/cuda/llvm_complex.cpp
+# aten/src/ATen/native/quantized/cpu/qnnpack/confu.yaml
+# aten/src/ATen/native/quantized/cpu/qnnpack/src/requantization/gemmlowp-neon.c
+# aten/src/ATen/native/quantized/cpu/qnnpack/src/requantization/gemmlowp-scalar.h
+# aten/src/ATen/native/quantized/cpu/qnnpack/src/requantization/gemmlowp-sse.h
+# aten/src/ATen/nnapi/codegen.py
+# aten/src/ATen/nnapi/NeuralNetworks.h
+# aten/src/ATen/nnapi/nnapi_wrapper.cpp
+# aten/src/ATen/nnapi/nnapi_wrapper.h
+# binaries/benchmark_args.h
+# binaries/benchmark_helper.cc
+# binaries/benchmark_helper.h
+# binaries/compare_models_torch.cc
+# binaries/convert_and_benchmark.cc
+# binaries/convert_caffe_image_db.cc
+# binaries/convert_db.cc
+# binaries/convert_encoded_to_raw_leveldb.cc
+# binaries/convert_image_to_tensor.cc
+# binaries/core_overhead_benchmark.cc
+# binaries/core_overhead_benchmark_gpu.cc
+# binaries/db_throughput.cc
+# binaries/dump_operator_names.cc
+# binaries/inspect_gpu.cc
+# binaries/load_benchmark_torch.cc
+# binaries/make_cifar_db.cc
+# binaries/make_image_db.cc
+# binaries/make_mnist_db.cc
+# binaries/optimize_for_mobile.cc
+# binaries/parallel_info.cc
+# binaries/predictor_verifier.cc
+# binaries/print_core_object_sizes_gpu.cc
+# binaries/print_registered_core_operators.cc
+# binaries/run_plan.cc
+# binaries/run_plan_mpi.cc
+# binaries/speed_benchmark.cc
+# binaries/speed_benchmark_torch.cc
+# binaries/split_db.cc
+# binaries/tsv_2_proto.cc
+# binaries/tutorial_blob.cc
+# binaries/zmq_feeder.cc
+# c10/test/util/small_vector_test.cpp
+# c10/util/FunctionRef.h
+# c10/util/SmallVector.cpp
+# c10/util/SmallVector.h
+# c10/util/llvmMathExtras.h
+# c10/util/sparse_bitset.h
+# caffe2/contrib/aten/gen_op.py
+# caffe2/contrib/fakelowp/fp16_fc_acc_op.cc
+# caffe2/contrib/fakelowp/fp16_fc_acc_op.h
+# caffe2/contrib/gloo/allgather_ops.cc
+# caffe2/contrib/gloo/allgather_ops.h
+# caffe2/contrib/gloo/reduce_scatter_ops.cc
+# caffe2/contrib/gloo/reduce_scatter_ops.h
+# caffe2/core/hip/common_miopen.h
+# caffe2/core/hip/common_miopen.hip
+# caffe2/core/net_async_tracing.cc
+# caffe2/core/net_async_tracing.h
+# caffe2/core/net_async_tracing_test.cc
+# caffe2/experiments/operators/fully_connected_op_decomposition.cc
+# caffe2/experiments/operators/fully_connected_op_decomposition.h
+# caffe2/experiments/operators/fully_connected_op_decomposition_gpu.cc
+# caffe2/experiments/operators/fully_connected_op_prune.cc
+# caffe2/experiments/operators/fully_connected_op_prune.h
+# caffe2/experiments/operators/fully_connected_op_sparse.cc
+# caffe2/experiments/operators/fully_connected_op_sparse.h
+# caffe2/experiments/operators/funhash_op.cc
+# caffe2/experiments/operators/funhash_op.h
+# caffe2/experiments/operators/sparse_funhash_op.cc
+# caffe2/experiments/operators/sparse_funhash_op.h
+# caffe2/experiments/operators/sparse_matrix_reshape_op.cc
+# caffe2/experiments/operators/sparse_matrix_reshape_op.h
+# caffe2/experiments/operators/tt_contraction_op.cc
+# caffe2/experiments/operators/tt_contraction_op.h
+# caffe2/experiments/operators/tt_contraction_op_gpu.cc
+# caffe2/experiments/operators/tt_pad_op.cc
+# caffe2/experiments/operators/tt_pad_op.h
+# caffe2/experiments/python/SparseTransformer.py
+# caffe2/experiments/python/convnet_benchmarks.py
+# caffe2/experiments/python/device_reduce_sum_bench.py
+# caffe2/experiments/python/funhash_op_test.py
+# caffe2/experiments/python/net_construct_bench.py
+# caffe2/experiments/python/sparse_funhash_op_test.py
+# caffe2/experiments/python/sparse_reshape_op_test.py
+# caffe2/experiments/python/tt_contraction_op_test.py
+# caffe2/experiments/python/tt_pad_op_test.py
+# caffe2/mobile/contrib/libvulkan-stub/include/vulkan/vk_platform.h
+# caffe2/mobile/contrib/libvulkan-stub/include/vulkan/vulkan.h
+# caffe2/mobile/contrib/nnapi/NeuralNetworks.h
+# caffe2/mobile/contrib/nnapi/dlnnapi.c
+# caffe2/mobile/contrib/nnapi/nnapi_benchmark.cc
+# caffe2/observers/profile_observer.cc
+# caffe2/observers/profile_observer.h
+# caffe2/operators/hip/conv_op_miopen.hip
+# caffe2/operators/hip/local_response_normalization_op_miopen.hip
+# caffe2/operators/hip/pool_op_miopen.hip
+# caffe2/operators/hip/spatial_batch_norm_op_miopen.hip
+# caffe2/operators/quantized/int8_utils.h
+# caffe2/operators/stump_func_op.cc
+# caffe2/operators/stump_func_op.cu
+# caffe2/operators/stump_func_op.h
+# caffe2/operators/unique_ops.cc
+# caffe2/operators/unique_ops.cu
+# caffe2/operators/unique_ops.h
+# caffe2/operators/upsample_op.cc
+# caffe2/operators/upsample_op.h
+# caffe2/opt/fusion.h
+# caffe2/python/layers/label_smooth.py
+# caffe2/python/mint/static/css/simple-sidebar.css
+# caffe2/python/modeling/get_entry_from_blobs.py
+# caffe2/python/modeling/get_entry_from_blobs_test.py
+# caffe2/python/modeling/gradient_clipping_test.py
+# caffe2/python/operator_test/unique_ops_test.py
+# caffe2/python/operator_test/upsample_op_test.py
+# caffe2/python/operator_test/weight_scale_test.py
+# caffe2/python/pybind_state_int8.cc
+# caffe2/python/transformations.py
+# caffe2/python/transformations_test.py
+# caffe2/quantization/server/batch_matmul_dnnlowp_op.cc
+# caffe2/quantization/server/batch_matmul_dnnlowp_op.h
+# caffe2/quantization/server/compute_equalization_scale_test.py
+# caffe2/quantization/server/elementwise_linear_dnnlowp_op.cc
+# caffe2/quantization/server/elementwise_linear_dnnlowp_op.h
+# caffe2/quantization/server/elementwise_sum_relu_op.cc
+# caffe2/quantization/server/fb_fc_packed_op.cc
+# caffe2/quantization/server/fb_fc_packed_op.h
+# caffe2/quantization/server/fbgemm_fp16_pack_op.cc
+# caffe2/quantization/server/fbgemm_fp16_pack_op.h
+# caffe2/quantization/server/fully_connected_fake_lowp_op.cc
+# caffe2/quantization/server/fully_connected_fake_lowp_op.h
+# caffe2/quantization/server/int8_gen_quant_params_min_max_test.py
+# caffe2/quantization/server/int8_gen_quant_params_test.py
+# caffe2/quantization/server/int8_quant_scheme_blob_fill_test.py
+# caffe2/quantization/server/spatial_batch_norm_relu_op.cc
+# caffe2/sgd/weight_scale_op.cc
+# caffe2/sgd/weight_scale_op.h
+# caffe2/utils/bench_utils.h
+# functorch/examples/maml_omniglot/maml-omniglot-higher.py
+# functorch/examples/maml_omniglot/maml-omniglot-ptonly.py
+# functorch/examples/maml_omniglot/maml-omniglot-transforms.py
+# functorch/examples/maml_omniglot/support/omniglot_loaders.py
+# modules/detectron/group_spatial_softmax_op.cc
+# modules/detectron/group_spatial_softmax_op.cu
+# modules/detectron/group_spatial_softmax_op.h
+# modules/detectron/ps_roi_pool_op.cc
+# modules/detectron/ps_roi_pool_op.h
+# modules/detectron/roi_pool_f_op.cc
+# modules/detectron/roi_pool_f_op.cu
+# modules/detectron/roi_pool_f_op.h
+# modules/detectron/sample_as_op.cc
+# modules/detectron/sample_as_op.cu
+# modules/detectron/sample_as_op.h
+# modules/detectron/select_smooth_l1_loss_op.cc
+# modules/detectron/select_smooth_l1_loss_op.cu
+# modules/detectron/select_smooth_l1_loss_op.h
+# modules/detectron/sigmoid_cross_entropy_loss_op.cc
+# modules/detectron/sigmoid_cross_entropy_loss_op.cu
+# modules/detectron/sigmoid_cross_entropy_loss_op.h
+# modules/detectron/sigmoid_focal_loss_op.cc
+# modules/detectron/sigmoid_focal_loss_op.cu
+# modules/detectron/sigmoid_focal_loss_op.h
+# modules/detectron/smooth_l1_loss_op.cc
+# modules/detectron/smooth_l1_loss_op.cu
+# modules/detectron/smooth_l1_loss_op.h
+# modules/detectron/softmax_focal_loss_op.cc
+# modules/detectron/softmax_focal_loss_op.cu
+# modules/detectron/softmax_focal_loss_op.h
+# modules/detectron/spatial_narrow_as_op.cc
+# modules/detectron/spatial_narrow_as_op.cu
+# modules/detectron/spatial_narrow_as_op.h
+# modules/detectron/upsample_nearest_op.cc
+# modules/detectron/upsample_nearest_op.h
+# modules/module_test/module_test_dynamic.cc
+# modules/rocksdb/rocksdb.cc
+# scripts/apache_header.txt
+# scripts/apache_python.txt
+# torch/distributions/lkj_cholesky.py
+#
+# Apache 2.0 AND BSD 2-Clause
+# caffe2/operators/deform_conv_op.cu
+#
+# Apache 2.0 AND BSD 2-Clause AND MIT
+# modules/detectron/ps_roi_pool_op.cu
+#
+# Apache 2.0 AND BSD 2-Clause
+# modules/detectron/upsample_nearest_op.cu
+#
+# BSD 0-Clause
+# torch/csrc/utils/pythoncapi_compat.h
+#
+# BSD 2-Clause
+# aten/src/ATen/native/quantized/cpu/qnnpack/deps/clog/LICENSE
+# caffe2/image/transform_gpu.cu
+# caffe2/image/transform_gpu.h
+#
+# BSL-1.0
+# c10/util/flat_hash_map.h
+# c10/util/hash.h
+# c10/util/Optional.h
+# c10/util/order_preserving_flat_hash_map.h
+# c10/util/strong_type.h
+# c10/util/variant.h
+#
+# GPL-3.0-or-later AND MIT
+# c10/util/reverse_iterator.h
+#
+# Khronos
+# These files are for OpenCL, an unused option
+# Replace them later, as-needed with the opencl-headers.rpm
+#
+# caffe2/contrib/opencl/OpenCL/cl.hpp
+# caffe2/mobile/contrib/libopencl-stub/include/CL/cl.h
+# caffe2/mobile/contrib/libopencl-stub/include/CL/cl.hpp
+# caffe2/mobile/contrib/libopencl-stub/include/CL/cl_ext.h
+# caffe2/mobile/contrib/libopencl-stub/include/CL/cl_gl.h
+# caffe2/mobile/contrib/libopencl-stub/include/CL/cl_gl_ext.h
+# caffe2/mobile/contrib/libopencl-stub/include/CL/cl_platform.h
+# caffe2/mobile/contrib/libopencl-stub/include/CL/opencl.h
+#
+# MIT
+# android/libs/fbjni/googletest-CMakeLists.txt.in
+# c10/util/BFloat16-math.h
+# caffe2/mobile/contrib/libvulkan-stub/include/libvulkan-stub.h
+# caffe2/mobile/contrib/libvulkan-stub/src/libvulkan-stub.c
+# caffe2/onnx/torch_ops/defs.cc
+# cmake/Modules_CUDA_fix/upstream/FindCUDA/make2cmake.cmake
+# cmake/Modules_CUDA_fix/upstream/FindCUDA/parse_cubin.cmake
+# cmake/Modules_CUDA_fix/upstream/FindCUDA/run_nvcc.cmake
+# functorch/einops/_parsing.py
+# test/functorch/test_parsing.py
+# test/functorch/test_rearrange.py
+# third_party/miniz-2.1.0/LICENSE
+# third_party/miniz-2.1.0/miniz.c
+# tools/coverage_plugins_package/setup.py
+# torch/_appdirs.py
+# torch/utils/hipify/hipify_python.py
+#
+# Public Domain
+# caffe2/mobile/contrib/libopencl-stub/LICENSE
+# caffe2/utils/murmur_hash3.cc
+# caffe2/utils/murmur_hash3.h
+#
+# Zlib
+# aten/src/ATen/native/cpu/avx_mathfun.h
+
diff --git a/next/0001-disable-submodule-search.patch b/next/0001-disable-submodule-search.patch
deleted file mode 100644
index b830fa6..0000000
--- a/next/0001-disable-submodule-search.patch
+++ /dev/null
@@ -1,25 +0,0 @@
-From e0b0ea90ecc0dbefc6aef2650e88ba88260935b9 Mon Sep 17 00:00:00 2001
-From: Tom Rix <trix@redhat.com>
-Date: Fri, 29 Sep 2023 17:21:13 -0700
-Subject: [PATCH] disable submodule search
-
----
- setup.py | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/setup.py b/setup.py
-index 0fd886d945..e397df8fb6 100644
---- a/setup.py
-+++ b/setup.py
-@@ -458,7 +458,7 @@ def mirror_files_into_torchgen():
- def build_deps():
-     report("-- Building version " + version)
- 
--    check_submodules()
-+    # check_submodules()
-     check_pydep("yaml", "pyyaml")
- 
-     build_caffe2(
--- 
-2.43.0
-
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..9508ad0
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,154 @@
+[build-system]
+requires = [
+    "setuptools",
+    "wheel",
+    "astunparse",
+    "numpy",
+    "ninja",
+    "pyyaml",
+    "cmake",
+    "typing-extensions",
+    "requests",
+]
+# Use legacy backend to import local packages in setup.py
+build-backend = "setuptools.build_meta:__legacy__"
+
+
+[tool.black]
+# Uncomment if pyproject.toml worked fine to ensure consistency with flake8
+# line-length = 120
+target-version = ["py38", "py39", "py310", "py311"]
+
+
+[tool.ruff]
+target-version = "py38"
+
+# NOTE: Synchoronize the ignores with .flake8
+ignore = [
+    # these ignores are from flake8-bugbear; please fix!
+    "B007", "B008", "B017",
+    "B018", # Useless expression
+    "B019",
+    "B023",
+    "B028", # No explicit `stacklevel` keyword argument found
+    "B904",
+    "E402",
+    "C408", # C408 ignored because we like the dict keyword argument syntax
+    "E501", # E501 is not flexible enough, we're using B950 instead
+    "E721",
+    "E731", # Assign lambda expression
+    "E741",
+    "EXE001",
+    "F405",
+    "F841",
+    # these ignores are from flake8-logging-format; please fix!
+    "G101",
+    # these ignores are from ruff NPY; please fix!
+    "NPY002",
+    # these ignores are from ruff PERF; please fix!
+    "PERF203",
+    "PERF401",
+    "PERF403",
+    # these ignores are from PYI; please fix!
+    "PYI019",
+    "PYI024",
+    "PYI036",
+    "PYI041",
+    "PYI056",
+    "SIM102", "SIM103", "SIM112", # flake8-simplify code styles
+    "SIM105", # these ignores are from flake8-simplify. please fix or ignore with commented reason
+    "SIM108",
+    "SIM110",
+    "SIM114", # Combine `if` branches using logical `or` operator
+    "SIM115",
+    "SIM116", # Disable Use a dictionary instead of consecutive `if` statements
+    "SIM117",
+    "SIM118",
+    "UP006", # keep-runtime-typing
+    "UP007", # keep-runtime-typing
+]
+line-length = 120
+select = [
+    "B",
+    "C4",
+    "G",
+    "E",
+    "EXE",
+    "F",
+    "SIM1",
+    "W",
+    # Not included in flake8
+    "NPY",
+    "PERF",
+    "PGH004",
+    "PIE794",
+    "PIE800",
+    "PIE804",
+    "PIE807",
+    "PIE810",
+    "PLC0131", # type bivariance
+    "PLC0132", # type param mismatch
+    "PLC0205", # string as __slots__
+    "PLE",
+    "PLR0133", # constant comparison
+    "PLR0206", # property with params
+    "PLR1722", # use sys exit
+    "PLW0129", # assert on string literal
+    "PLW0406", # import self
+    "PLW0711", # binary op exception
+    "PLW1509", # preexec_fn not safe with threads
+    "PLW3301", # nested min max
+    "PT006", # TODO: enable more PT rules
+    "PT022",
+    "PT023",
+    "PT024",
+    "PT025",
+    "PT026",
+    "PYI",
+    "RUF008", # mutable dataclass default
+    "RUF015", # access first ele in constant time
+    "RUF016", # type error non-integer index
+    "RUF017",
+    "TRY200",
+    "TRY302",
+    "UP",
+]
+
+[tool.ruff.per-file-ignores]
+"__init__.py" = [
+    "F401",
+]
+"test/typing/reveal/**" = [
+    "F821",
+]
+"test/torch_np/numpy_tests/**" = [
+    "F821",
+]
+"test/jit/**" = [
+    "PLR0133", # tests require this for JIT
+    "PYI",
+    "RUF015",
+    "UP", # We don't want to modify the jit test as they test specify syntax
+]
+"test/test_jit.py" = [
+    "PLR0133", # tests require this for JIT
+    "PYI",
+    "RUF015",
+    "UP", # We don't want to modify the jit test as they test specify syntax
+]
+
+"torch/onnx/**" = [
+    "UP037", # ONNX does runtime type checking
+]
+
+"torchgen/api/types/__init__.py" = [
+    "F401",
+    "F403",
+]
+"torchgen/executorch/api/types/__init__.py" = [
+    "F401",
+    "F403",
+]
+"torch/utils/collect_env.py" = [
+    "UP", # collect_env.py needs to work with older versions of Python
+]
diff --git a/python-torch.spec b/python-torch.spec
index 5f4e2e0..6f102ca 100644
--- a/python-torch.spec
+++ b/python-torch.spec
@@ -6,14 +6,13 @@
 # So pre releases can be tried
 %bcond_with gitcommit
 %if %{with gitcommit}
-# The top of tree ~2/6/24
-%global commit0 064610d8ac53f3f5916a1dc8b43acbeeb2469c11
+# ToT
+%global commit0 75b0720a97ac5d82e8a7a1a6ae7c5f7a87d7183d
 %global shortcommit0 %(c=%{commit0}; echo ${c:0:7})
-
-%global pypi_version 2.3.0
+%global date0 20240609
+%global pypi_version 2.4.0
 %else
-%global pypi_version 2.1.2
-
+%global pypi_version 2.3.1
 %endif
 
 # For -test subpackage
@@ -23,161 +22,373 @@
 #   /usr/lib64/python3.12/site-packages/torch/bin/test_api, test_lazy
 %bcond_with test
 
-# For testing rocm
-# Not viable on 2.1.2, use --with gitcommit
+%ifarch x86_64
+# ROCm support came in F40
+%if 0%{?fedora} > 39
+%bcond_without rocm
+%else
 %bcond_with rocm
+%endif
+%endif
+# hipblaslt is in development
+%bcond_with hipblaslt
+# Which families gpu build for
+%global rocm_gpu_list gfx8 gfx9 gfx10 gfx11 gfx90a gfx942 gfx1100
+%global rocm_default_gpu default
+%bcond_without rocm_loop
 
-# For testing openmp
-%bcond_with openmp
-
-# For testing caffe2
+# Caffe2 support came in F41
+%if 0%{?fedora} > 40
+%bcond_without caffe2
+%else
 %bcond_with caffe2
+%endif
 
-# For testing distributed
+# Distributed support came in F41
+%if 0%{?fedora} > 40
+%bcond_without distributed
+# For testing distributed+rccl etc.
+%bcond_without rccl
+%bcond_with gloo
+%bcond_without mpi
+%bcond_without tensorpipe
+%else
 %bcond_with distributed
+%endif
+
+# OpenCV support came in F41
+%if 0%{?fedora} > 40
+%bcond_without opencv
+%else
+%bcond_with opencv
+%endif
+
+# Do no confuse xnnpack versions
+%if 0%{?fedora} > 40
+%bcond_without xnnpack
+%else
+%bcond_with xnnpack
+%endif
+
+%if 0%{?fedora} > 39
+%bcond_without pthreadpool
+%else
+%bcond_with pthreadpool
+%endif
+
+%if 0%{?fedora} > 39
+%bcond_without pocketfft
+%else
+%bcond_with pocketfft
+%endif
+
+# For testing cuda
+%ifarch x86_64
+%bcond_with cuda
+%endif
+
+# For testing compat-gcc
+%global compat_gcc_major 13
+%bcond_with compat_gcc
+
+# Disable dwz with rocm because memory can be exhausted
+%if %{with rocm}
+%define _find_debuginfo_dwz_opts %{nil}
+%endif
+
+%if %{with cuda}
+# workaround problems with -pie
+%global build_cxxflags %{nil}
+%global build_ldflags %{nil}
+%endif
+			 
+# These came in 2.4 and not yet in Fedora
+%if %{with gitcommit}
+%bcond_with opentelemetry
+%bcond_with httplib
+%bcond_with kineto
+%else
+%bcond_without opentelemetry
+%bcond_without httplib
+%bcond_without kineto
+%endif
 
 Name:           python-%{pypi_name}
+%if %{with gitcommit}
+Version:        %{pypi_version}^git%{date0}.%{shortcommit0}
+%else
 Version:        %{pypi_version}
+%endif
 Release:        %autorelease
 Summary:        PyTorch AI/ML framework
-# See below for details
+# See license.txt for license details
 License:        BSD-3-Clause AND BSD-2-Clause AND 0BSD AND Apache-2.0 AND MIT AND BSL-1.0 AND GPL-3.0-or-later AND Zlib
 
 URL:            https://pytorch.org/
 %if %{with gitcommit}
 Source0:        %{forgeurl}/archive/%{commit0}/pytorch-%{shortcommit0}.tar.gz
-Source1:        pyproject.toml
+Source1000:     pyproject.toml
 %else
 Source0:        %{forgeurl}/releases/download/v%{version}/pytorch-v%{version}.tar.gz
 %endif
+Source1:        https://github.com/google/flatbuffers/archive/refs/tags/v23.3.3.tar.gz
+Source2:        https://github.com/pybind/pybind11/archive/refs/tags/v2.11.1.tar.gz
+
+%if %{with cuda}
+%global cuf_ver 1.1.2
+Source10:       https://github.com/NVIDIA/cudnn-frontend/archive/refs/tags/v%{cuf_ver}.tar.gz
+%global cul_ver 3.4.1
+Source11:       https://github.com/NVIDIA/cutlass/archive/refs/tags/v%{cul_ver}.tar.gz
+%endif
+
+%if %{with tensorpipe}
+# Developement on tensorpipe has stopped, repo made read only July 1, 2023, this is the last commit
+%global tp_commit 52791a2fd214b2a9dc5759d36725909c1daa7f2e
+%global tp_scommit %(c=%{tp_commit}; echo ${c:0:7})
+Source20:       https://github.com/pytorch/tensorpipe/archive/%{tp_commit}/tensorpipe-%{tp_scommit}.tar.gz
+# The old libuv tensorpipe uses
+Source21:       https://github.com/libuv/libuv/archive/refs/tags/v1.41.0.tar.gz
+# Developement afaik on libnop has stopped, this is the last commit
+%global nop_commit 910b55815be16109f04f4180e9adee14fb4ce281
+%global nop_scommit %(c=%{nop_commit}; echo ${c:0:7})
+Source22:       https://github.com/google/libnop/archive/%{nop_commit}/libnop-%{nop_scommit}.tar.gz
+%endif
+
+%if %{without xnnpack}
+%global xnn_commit fcbf55af6cf28a4627bcd1f703ab7ad843f0f3a2
+%global xnn_scommit %(c=%{xnn_commit}; echo ${c:0:7})
+Source30:       https://github.com/google/xnnpack/archive/%{xnn_commit}/xnnpack-%{xnn_scommit}.tar.gz
+%global fx_commit 63058eff77e11aa15bf531df5dd34395ec3017c8
+%global fx_scommit %(c=%{fx_commit}; echo ${c:0:7})
+Source31:       https://github.com/Maratyszcza/fxdiv/archive/%{fx_commit}/FXdiv-%{fx_scommit}.tar.gz
+%global fp_commit 0a92994d729ff76a58f692d3028ca1b64b145d91
+%global fp_scommit %(c=%{fp_commit}; echo ${c:0:7})
+Source32:       https://github.com/Maratyszcza/FP16/archive/%{fp_commit}/FP16-%{fp_scommit}.tar.gz
+%global ps_commit 072586a71b55b7f8c584153d223e95687148a900
+%global ps_scommit %(c=%{ps_commit}; echo ${c:0:7})
+Source33:       https://github.com/Maratyszcza/psimd/archive/%{ps_commit}/psimd-%{ps_scommit}.tar.gz
+%endif
+
+%if %{without pthreadpool}
+%global pt_commit 4fe0e1e183925bf8cfa6aae24237e724a96479b8
+%global pt_scommit %(c=%{pt_commit}; echo ${c:0:7})
+Source40:       https://github.com/Maratyszcza/pthreadpool/archive/%{pt_commit}/pthreadpool-%{pt_scommit}.tar.gz
+%endif
+
+%if %{without pocketfft}
+%global pf_commit 076cb3d2536b7c5d0629093ad886e10ac05f3623
+%global pf_scommit %(c=%{pf_commit}; echo ${c:0:7})
+Source50:       https://github.com/mreineck/pocketfft/archive/%{pf_commit}/pocketfft-%{pf_scommit}.tar.gz
+%endif
 
 %if %{with gitcommit}
+%if %{without opentelemetry}
+%global ot_ver 1.14.2
+Source60:       https://github.com/open-telemetry/opentelemetry-cpp/archive/refs/tags/v%{ot_ver}.tar.gz
+%endif
+
+%if %{without httplib}
+%global hl_commit 3b6597bba913d51161383657829b7e644e59c006
+%global hl_scommit %(c=%{hl_commit}; echo ${c:0:7})
+Source70:       https://github.com/yhirose/cpp-httplib/archive/%{hl_commit}/cpp-httplib-%{hl_scommit}.tar.gz
+%endif
+
+%if %{without kineto}
+%global ki_commit be1317644c68b4bfc4646024a6b221066e430031
+%global ki_scommit %(c=%{ki_commit}; echo ${c:0:7})
+Source80:       https://github.com/pytorch/kineto/archive/%{ki_commit}/kineto-%{ki_scommit}.tar.gz
+%endif
+%endif
 
 Patch0:        0001-no-third_party-foxi.patch
-Patch1:        0001-no-third_party-fmt.patch
-Patch2:        0001-no-third_party-FXdiv.patch
+
+%if %{without gitcommit}
 Patch3:        0001-Stub-in-kineto-ActivityType.patch
-Patch4:        0001-Regenerate-flatbuffer-header.patch
-Patch5:        0001-disable-submodule-search.patch
-
-%if %{with rocm}
-Patch100:      0001-cuda-hip-signatures.patch
-Patch101:      0001-silence-an-assert.patch
-Patch102:      0001-can-not-use-with-c-files.patch
-Patch103:      0001-use-any-hip.patch
 %endif
 
-%else
-# Misc cmake changes that would be difficult to upstream
-# * Use the system fmt
-# * Remove foxi use
-# * Remove warnings/errors for clang 17
-# * fxdiv is not a library on Fedora
-Patch0:         0001-Prepare-pytorch-cmake-for-fedora.patch
-# Use Fedora's fmt
-Patch1:         0002-Regenerate-flatbuffer-header.patch
-# https://github.com/pytorch/pytorch/pull/111048
-Patch2:         0003-Stub-in-kineto-ActivityType.patch
-# PyTorch has not fully baked 3.12 support because 3.12 is so new
-Patch3:         0004-torch-python-3.12-changes.patch
-# Short circuit looking for things that can not be downloade by mock
-Patch4:         0005-disable-submodule-search.patch
-# libtorch_python.so: undefined symbols: Py*
-Patch6:         0001-python-torch-link-with-python.patch
-# E: unused-direct-shlib-dependency libshm.so.2.1.0 libtorch.so.2.1
-# turn on as-needed globally
-Patch7:         0001-python-torch-remove-ubuntu-specific-linking.patch
-# Tries to use git and is confused by tarball
-Patch8:         0001-torch-sane-version.patch
-# libtorch is a wrapper so turn off as-needed locally
-# resolves this rpmlint
-# E: shared-library-without-dependency-information libtorch.so.2.1.0
-# causes these
-# E: unused-direct-shlib-dependency libtorch.so.2.1.0 libtorch_cpu.so.2.1
-# etc.
-# As a wrapper library, this should be the expected behavior.
-Patch9:         0001-disable-as-needed-for-libtorch.patch
+%if %{with caffe2}
+Patch6:        0001-reenable-foxi-linking.patch
+%endif
+
+# Bring some patches forward
+%if %{without gitcommit}
+# https://github.com/pytorch/pytorch/pull/123384
+Patch7:        0001-Reenable-dim-for-python-3.12.patch
+
+# Dynamo/Inductor on 3.12
+# Fails to apply on 2.3.1
+# Patch8:        0001-dynamo-3.12-enable-dynamo-on-3.12-enable-most-dynamo.patch
+%endif
+
+# ROCm patches
+# Patches need to be refactored for ToT
+%if %{without gitcommit}
+# These are ROCm packages
+%if %{without cuda}
+# https://github.com/pytorch/pytorch/pull/120551
+Patch100:      0001-Optionally-use-hipblaslt.patch
+Patch101:      0001-cuda-hip-signatures.patch
+Patch102:      0001-silence-an-assert.patch
+Patch103:      0001-can-not-use-with-c-files.patch
+Patch104:      0001-use-any-hip.patch
+Patch105:      0001-disable-use-of-aotriton.patch
+%endif
 %endif
 
-# Limit to these because they are well behaved with clang
 ExclusiveArch:  x86_64 aarch64
-%global toolchain clang
+%global toolchain gcc
+%global _lto_cflags %nil
 
-BuildRequires:  clang-devel
 BuildRequires:  cmake
 BuildRequires:  cpuinfo-devel
 BuildRequires:  eigen3-devel
 BuildRequires:  fmt-devel
-BuildRequires:  flatbuffers-devel
-BuildRequires:  FP16-devel
-BuildRequires:  fxdiv-devel
+%if %{with caffe2}
+BuildRequires:  foxi-devel
+%endif
+
+%if %{with compat_gcc}
+BuildRequires:  compat-gcc-%{compat_gcc_major}-c++
+BuildRequires:  compat-gcc-%{compat_gcc_major}-gfortran
+%else
 BuildRequires:  gcc-c++
 BuildRequires:  gcc-gfortran
+%endif
+
 %if %{with distributed}
+%if %{with gloo}
 BuildRequires:  gloo-devel
 %endif
+%endif
 BuildRequires:  ninja-build
 BuildRequires:  onnx-devel
-BuildRequires:  openblas-devel
-BuildRequires:  pocketfft-devel
-%if %{with caffe2}
-BuildRequires:  protobuf-lite-devel
-%else
-BuildRequires:  protobuf-devel
+BuildRequires:  libomp-devel
+%if %{with distributed}
+%if %{with mpi}
+BuildRequires:  openmpi-devel
 %endif
-BuildRequires:  pthreadpool-devel
-BuildRequires:  psimd-devel
-BuildRequires:  python3-numpy
-BuildRequires:  python3-pybind11
-BuildRequires:  python3-pyyaml
-BuildRequires:  python3-typing-extensions
+%endif
+BuildRequires:  openblas-devel
+BuildRequires:  protobuf-devel
 BuildRequires:  sleef-devel
 BuildRequires:  valgrind-devel
-BuildRequires:  xnnpack-devel = 0.0^git20221221.51a9875
+
+%if %{with pocketfft}
+BuildRequires:  pocketfft-devel
+%endif
+
+%if %{with pthreadpool}
+BuildRequires:  pthreadpool-devel
+%endif
+
+%if %{with xnnpack}
+BuildRequires:  FP16-devel
+BuildRequires:  fxdiv-devel
+BuildRequires:  psimd-devel
+BuildRequires:  xnnpack-devel = 0.0^git20240229.fcbf55a
+%endif
 
 BuildRequires:  python3-devel
 BuildRequires:  python3dist(filelock)
-BuildRequires:  python3dist(fsspec)
 BuildRequires:  python3dist(jinja2)
 BuildRequires:  python3dist(networkx)
+BuildRequires:  python3dist(numpy)
+BuildRequires:  python3dist(pyyaml)
 BuildRequires:  python3dist(setuptools)
-BuildRequires:  python3dist(sympy)
-BuildRequires:  python3dist(typing-extensions)
 BuildRequires:  python3dist(sphinx)
+BuildRequires:  python3dist(typing-extensions)
+
+%if 0%{?fedora}
+BuildRequires:  python3-pybind11
+BuildRequires:  python3dist(fsspec)
+BuildRequires:  python3dist(sympy)
+%endif
 
 %if %{with rocm}
 BuildRequires:  hipblas-devel
+%if %{with hipblaslt}
 BuildRequires:  hipblaslt-devel
+%endif
 BuildRequires:  hipcub-devel
 BuildRequires:  hipfft-devel
+BuildRequires:  hiprand-devel
 BuildRequires:  hipsparse-devel
 BuildRequires:  hipsolver-devel
 BuildRequires:  miopen-devel
 BuildRequires:  rocblas-devel
+BuildRequires:  rocrand-devel
+BuildRequires:  rocfft-devel
 %if %{with distributed}
+%if %{with rccl}
 BuildRequires:  rccl-devel
 %endif
+%endif
 BuildRequires:  rocprim-devel
 BuildRequires:  rocm-cmake
 BuildRequires:  rocm-comgr-devel
+BuildRequires:  rocm-core-devel
 BuildRequires:  rocm-hip-devel
 BuildRequires:  rocm-runtime-devel
 BuildRequires:  rocm-rpm-macros
 BuildRequires:  rocm-rpm-macros-modules
 BuildRequires:  rocthrust-devel
+BuildRequires:  roctracer-devel
 
 Requires:       rocm-rpm-macros-modules
 %endif
 
-%if %{with caffe2}
-BuildRequires:  foxi-devel
+%if %{with opencv}
+BuildRequires:  opencv-devel
 %endif
 
 %if %{with test}
 BuildRequires:  google-benchmark-devel
 %endif
 
+Requires:       python3dist(dill)
+
+# For convience
+Provides:       pytorch
+
+# Apache-2.0
+Provides:       bundled(flatbuffers) = 22.3.3
+# MIT
 Provides:       bundled(miniz) = 2.1.0
+Provides:       bundled(pybind11) = 2.11.1
+
+%if %{with tensorpipe}
+# BSD-3-Clause
+Provides:       bundled(tensorpipe)
+# Apache-2.0
+Provides:       bundled(libnop)
+# MIT AND CC-BY-4.0 AND ISC AND BSD-2-Clause
+Provides:       bundled(libuv) = 1.41.0
+%endif
+
+# These are already in Fedora
+%if %{without xnnpack}
+# BSD-3-Clause
+Provides:       bundled(xnnpack)
+# MIT
+Provides:       bundled(FP16)
+# MIT
+Provides:       bundled(fxdiv)
+# MIT
+Provides:       bundled(psimd)
+%endif
+
+%if %{without pthreadpool}
+# BSD-2-Clause
+Provides:       bundled(pthreadpool)
+%endif
+
+%if %{without pocketfft}
+# BSD-3-Clause
+Provides:       bundled(pocketfft)
+%endif
+
+# For convience
+Provides:       pytorch
 
 %description
 PyTorch is a Python package that provides two high-level features:
@@ -200,13 +411,48 @@ PyTorch is a Python package that provides two high-level features:
 You can reuse your favorite Python packages such as NumPy, SciPy,
 and Cython to extend PyTorch when needed.
 
-%package -n python3-%{pypi_name}-devel
-Summary:        Libraries and headers for %{name}
-Requires:       python3-%{pypi_name}%{?_isa} = %{version}-%{release}
+%if %{with rocm}
+%package -n python3-%{pypi_name}-rocm-gfx8
+Summary:        %{name} for ROCm gfx8
 
-%description -n python3-%{pypi_name}-devel
+%description -n python3-%{pypi_name}-rocm-gfx8
 %{summary}
 
+%package -n python3-%{pypi_name}-rocm-gfx9
+Summary:        %{name} for ROCm gfx9
+
+%description -n python3-%{pypi_name}-rocm-gfx9
+%{summary}
+
+%package -n python3-%{pypi_name}-rocm-gfx10
+Summary:        %{name} for ROCm gfx10
+
+%description -n python3-%{pypi_name}-rocm-gfx10
+%{summary}
+
+%package -n python3-%{pypi_name}-rocm-gfx11
+Summary:        %{name} for ROCm gfx11
+
+%description -n python3-%{pypi_name}-rocm-gfx11
+%{summary}
+
+%package -n python3-%{pypi_name}-rocm-gfx90a
+Summary:        %{name} for ROCm MI200
+%description -n python3-%{pypi_name}-rocm-gfx90a
+%{summary}
+
+%package -n python3-%{pypi_name}-rocm-gfx942
+Summary:        %{name} for ROCm MI300
+%description -n python3-%{pypi_name}-rocm-gfx942
+%{summary}
+
+%package -n python3-%{pypi_name}-rocm-gfx1100
+Summary:        %{name} for W7900
+%description -n python3-%{pypi_name}-rocm-gfx1100
+%{summary}
+
+%endif
+
 %if %{with test}
 %package -n python3-%{pypi_name}-test
 Summary:        Tests for %{name}
@@ -218,29 +464,132 @@ Requires:       python3-%{pypi_name}%{?_isa} = %{version}-%{release}
 
 
 %prep
+
 %if %{with gitcommit}
 %autosetup -p1 -n pytorch-%{commit0}
+# Overwrite with a git checkout of the pyproject.toml
+cp %{SOURCE1000} .
+%else
+%autosetup -p1 -n pytorch-v%{version}
+%endif
 
 # Remove bundled egg-info
 rm -rf %{pypi_name}.egg-info
-# Overwrite with a git checkout of the pyproject.toml
-cp %{SOURCE1} .
 
-%if %{with rocm}
-# hipify
-./tools/amd_build/build_amd.py
+tar xf %{SOURCE1}
+rm -rf third_party/flatbuffers/*
+cp -r flatbuffers-23.3.3/* third_party/flatbuffers/
+
+tar xf %{SOURCE2}
+rm -rf third_party/pybind11/*
+cp -r pybind11-2.11.1/* third_party/pybind11/
+
+%if %{with cuda}
+tar xf %{SOURCE10}
+rm -rf third_party/cudnn_frontend/*
+cp -r cudnn-frontend-%{cuf_ver}/* third_party/cudnn_frontend/
+tar xf %{SOURCE11}
+rm -rf third_party/cutlass/*
+cp -r cutlass-%{cul_ver}/* third_party/cutlass/
 %endif
 
-%else
-%autosetup -p1 -n pytorch-v%{version}
+%if %{with tensorpipe}
+tar xf %{SOURCE20}
+rm -rf third_party/tensorpipe/*
+cp -r tensorpipe-*/* third_party/tensorpipe/
+tar xf %{SOURCE21}
+rm -rf third_party/tensorpipe/third_party/libuv/*
+cp -r libuv-*/* third_party/tensorpipe/third_party/libuv/
+tar xf %{SOURCE22}
+rm -rf third_party/tensorpipe/third_party/libnop/*
+cp -r libnop-*/* third_party/tensorpipe/third_party/libnop/
+%endif
+
+%if %{without xnnpack}
+tar xf %{SOURCE30}
+rm -rf third_party/XNNPACK/*
+cp -r XNNPACK-*/* third_party/XNNPACK/
+tar xf %{SOURCE31}
+rm -rf third_party/FXdiv/*
+cp -r FXdiv-*/* third_party/FXdiv/
+tar xf %{SOURCE32}
+rm -rf third_party/FP16/*
+cp -r FP16-*/* third_party/FP16/
+tar xf %{SOURCE33}
+rm -rf third_party/psimd/*
+cp -r psimd-*/* third_party/psimd/
+%endif
+
+%if %{without pthreadpool}
+tar xf %{SOURCE40}
+rm -rf third_party/pthreadpool/*
+cp -r pthreadpool-*/* third_party/pthreadpool/
+%endif
+
+%if %{without pocketfft}
+tar xf %{SOURCE50}
+rm -rf third_party/pocketfft/*
+cp -r pocketfft-*/* third_party/pocketfft/
+%endif
+
+%if %{with gitcommit}
+%if %{without opentelemtry}
+tar xf %{SOURCE60}
+rm -rf third_party/opentelemetry-cpp/*
+cp -r opentelemetry-cpp-*/* third_party/opentelemetry-cpp/
+%endif
+
+%if %{without httplib}
+tar xf %{SOURCE70}
+rm -rf third_party/cpp-httplib/*
+cp -r cpp-httplib-*/* third_party/cpp-httplib/
+%endif
+
+%if %{without kineto}
+tar xf %{SOURCE80}
+rm -rf third_party/kineto/*
+cp -r kineto-*/* third_party/kineto/
+%endif
+%endif
 
 %if %{with opencv}
+%if %{without gitcommit}
 # Reduce requirements, *FOUND is not set 
 sed -i -e 's/USE_OPENCV AND OpenCV_FOUND AND USE_FFMPEG AND FFMPEG_FOUND/USE_OPENCV AND USE_FFMPEG/' caffe2/video/CMakeLists.txt
 sed -i -e 's/USE_OPENCV AND OpenCV_FOUND/USE_OPENCV/' caffe2/image/CMakeLists.txt
 sed -i -e 's/STATUS/FATAL/' caffe2/image/CMakeLists.txt
-cat caffe2/image/CMakeLists.txt
 %endif
+%endif
+
+%if 0%{?rhel}
+# In RHEL but too old
+sed -i -e '/typing-extensions/d' setup.py
+# Need to pip these
+sed -i -e '/sympy/d' setup.py
+sed -i -e '/fsspec/d' setup.py
+%endif
+
+# A new dependency
+# Connected to USE_FLASH_ATTENTION, since this is off, do not need it
+sed -i -e '/aotriton.cmake/d' cmake/Dependencies.cmake
+
+# No third_party fmt, use system
+sed -i -e 's@fmt::fmt-header-only@fmt@' CMakeLists.txt
+sed -i -e 's@fmt::fmt-header-only@fmt@' c10/CMakeLists.txt
+sed -i -e 's@fmt::fmt-header-only@fmt@' torch/CMakeLists.txt
+sed -i -e 's@fmt::fmt-header-only@fmt@' cmake/Dependencies.cmake
+sed -i -e 's@add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)@#add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)@' cmake/Dependencies.cmake
+sed -i -e 's@set_target_properties(fmt-header-only PROPERTIES INTERFACE_COMPILE_FEATURES "")@#set_target_properties(fmt-header-only PROPERTIES INTERFACE_COMPILE_FEATURES "")@' cmake/Dependencies.cmake
+sed -i -e 's@list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only)@#list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only)@' cmake/Dependencies.cmake
+
+# No third_party FXdiv
+%if %{with xnnpack}
+sed -i -e 's@if(NOT TARGET fxdiv)@if(MSVC AND USE_XNNPACK)@' caffe2/CMakeLists.txt
+sed -i -e 's@TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv)@#TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv)@' caffe2/CMakeLists.txt
+%endif
+
+# Disable the use of check_submodule's in the setup.py, we are a tarball, not a git repo
+sed -i -e 's@check_submodules()$@#check_submodules()@' setup.py
 
 # Release comes fully loaded with third party src
 # Remove what we can
@@ -255,6 +604,47 @@ mv third_party/miniz-2.1.0 .
 # setup.py depends on this script
 mv third_party/build_bundled.py .
 
+# Need the just untarred flatbuffers/flatbuffers.h
+mv third_party/flatbuffers .
+
+mv third_party/pybind11 .
+
+%if %{with cuda}
+mv third_party/cudnn_frontend .
+mv third_party/cutlass .
+%endif
+
+%if %{with tensorpipe}
+mv third_party/tensorpipe .
+%endif
+
+%if %{without xnnpack}
+mv third_party/XNNPACK .
+mv third_party/FXdiv .
+mv third_party/FP16 .
+mv third_party/psimd .
+%endif
+
+%if %{without pthreadpool}
+mv third_party/pthreadpool .
+%endif
+
+%if %{without pocketfft}
+mv third_party/pocketfft .
+%endif
+
+%if %{without opentelemetry}
+mv third_party/opentelemetry-cpp .
+%endif
+
+%if %{without httplib}
+mv third_party/cpp-httplib .
+%endif
+
+%if %{without kineto}
+mv third_party/kineto .
+%endif
+
 %if %{with test}
 mv third_party/googletest .
 %endif
@@ -264,26 +654,115 @@ rm -rf third_party/*
 # Put stuff back
 mv build_bundled.py third_party
 mv miniz-2.1.0 third_party
+mv flatbuffers third_party
+mv pybind11 third_party
+
+%if %{with cuda}
+mv cudnn_frontend third_party
+mv cutlass third_party
+%endif
+
+%if %{with tensorpipe}
+mv tensorpipe third_party
+%endif
+
+%if %{without xnnpack}
+mv XNNPACK third_party
+mv FXdiv third_party
+mv FP16 third_party
+mv psimd third_party
+%endif
+
+%if %{without pthreadpool}
+mv pthreadpool third_party
+%endif
+
+%if %{without pocketfft}
+mv pocketfft third_party
+%endif
+
+%if %{without opentelemetry}
+mv opentelemetry-cpp third_party
+%endif
+
+%if %{without httplib}
+mv cpp-httplib third_party
+%endif
+
+%if %{without kineto}
+mv kineto third_party
+%endif
+
 %if %{with test}
 mv googletest third_party
 %endif
+
+%if %{with pocketfft}
 #
 # Fake out pocketfft, and system header will be used
 mkdir third_party/pocketfft
+%endif
+
 #
 # Use the system valgrind headers
 mkdir third_party/valgrind-headers
 cp %{_includedir}/valgrind/* third_party/valgrind-headers
 
+%if %{without gitcommit}
 # Remove unneeded OpenCL files that confuse the lincense scanner
 rm caffe2/contrib/opencl/OpenCL/cl.hpp
 rm caffe2/mobile/contrib/libopencl-stub/include/CL/*.h
 rm caffe2/mobile/contrib/libopencl-stub/include/CL/*.hpp
+%endif
+
+%if %{with rocm}
+# hipify
+./tools/amd_build/build_amd.py
+# Fedora installs to /usr/include, not /usr/include/rocm-core
+sed -i -e 's@rocm-core/rocm_version.h@rocm_version.h@' aten/src/ATen/hip/tunable/TunableGemm.h
+%endif
+
+%if %{with cuda}
+
+# TBD
 
 %endif
 
 %build
 
+#
+# Control the number of jobs
+#
+# The build can fail if too many threads exceed the physical memory
+# So count core and and memory and increase the build memory util the build succeeds
+#
+# Real cores, No hyperthreading
+COMPILE_JOBS=`cat /proc/cpuinfo | grep -m 1 'cpu cores' | awk '{ print $4 }'`
+if [ ${COMPILE_JOBS}x = x ]; then
+    COMPILE_JOBS=1
+fi
+# Take into account memmory usage per core, do not thrash real memory
+%if %{with cuda}
+BUILD_MEM=4
+%else
+BUILD_MEM=2
+%endif
+MEM_KB=0
+MEM_KB=`cat /proc/meminfo | grep MemTotal | awk '{ print $2 }'`
+MEM_MB=`eval "expr ${MEM_KB} / 1024"`
+MEM_GB=`eval "expr ${MEM_MB} / 1024"`
+COMPILE_JOBS_MEM=`eval "expr 1 + ${MEM_GB} / ${BUILD_MEM}"`
+if [ "$COMPILE_JOBS_MEM" -lt "$COMPILE_JOBS" ]; then
+    COMPILE_JOBS=$COMPILE_JOBS_MEM
+fi
+export MAX_JOBS=$COMPILE_JOBS
+
+%if %{with compat_gcc}
+export CC=%{_bindir}/gcc%{compat_gcc_major}
+export CXX=%{_bindir}/g++%{compat_gcc_major}
+export FC=%{_bindir}/gfortran%{compat_gcc_major}
+%endif
+
 # For debugging setup.py
 # export SETUPTOOLS_SCM_DEBUG=1
 
@@ -293,63 +772,97 @@ rm caffe2/mobile/contrib/libopencl-stub/include/CL/*.hpp
 # export CMAKE_SHARED_LINKER_FLAGS=-Wl,--verbose
 
 # Manually set this hardening flag
+# CUDA is unhappy with pie, so do not use it
+%if %{without cuda}
 export CMAKE_EXE_LINKER_FLAGS=-pie
+%endif
 
 export BUILD_CUSTOM_PROTOBUF=OFF
+export BUILD_NVFUSER=OFF
 export BUILD_SHARED_LIBS=ON
+export BUILD_TEST=OFF
 export CMAKE_BUILD_TYPE=RelWithDebInfo
 export CMAKE_FIND_PACKAGE_PREFER_CONFIG=ON
 export CAFFE2_LINK_LOCAL_PROTOBUF=OFF
+export INTERN_BUILD_MOBILE=OFF
+export USE_DISTRIBUTED=OFF
 export USE_CUDA=OFF
 export USE_FBGEMM=OFF
+export USE_FLASH_ATTENTION=OFF
 export USE_GOLD_LINKER=OFF
+export USE_GLOO=OFF
 export USE_ITT=OFF
 export USE_KINETO=OFF
 export USE_LITE_INTERPRETER_PROFILER=OFF
+export USE_LITE_PROTO=OFF
+export USE_MAGMA=OFF
 export USE_MKLDNN=OFF
+export USE_MPI=OFF
+export USE_NCCL=OFF
 export USE_NNPACK=OFF
 export USE_NUMPY=ON
-
+export USE_OPENMP=ON
 export USE_PYTORCH_QNNPACK=OFF
+%if %{without gitcommit}
 export USE_QNNPACK=OFF
-export USE_SYSTEM_LIBS=ON
+%endif
+export USE_ROCM=OFF
+export USE_SYSTEM_CPUINFO=ON
+export USE_SYSTEM_SLEEF=ON
+export USE_SYSTEM_EIGEN_INSTALL=ON
+export USE_SYSTEM_ONNX=ON
+export USE_SYSTEM_PYBIND11=OFF
+export USE_SYSTEM_LIBS=OFF
 export USE_TENSORPIPE=OFF
 export USE_XNNPACK=ON
 
+%if %{with pthreadpool}
+export USE_SYSTEM_PTHREADPOOL=ON
+%endif
+
+%if %{with xnnpack}
+export USE_SYSTEM_FP16=ON
+export USE_SYSTEM_FXDIV=ON
+export USE_SYSTEM_PSIMD=ON
+export USE_SYSTEM_XNNPACK=ON
+%endif
+
 %if %{with caffe2}
 export BUILD_CAFFE2=ON
-export INTERN_BUILD_MOBILE=OFF
-export USE_LITE_PROTO=ON
+%endif
+
+%if %{with cuda}
+%if %{without rocm}
+export CUDACXX=/usr/local/cuda/bin/nvcc
+export CPLUS_INCLUDE_PATH=/usr/local/cuda/include
+export USE_CUDA=ON
+# The arches to build for
+export TORCH_CUDA_ARCH_LIST="8.0 8.6 8.9 9.0"
+%endif
 %endif
 
 %if %{with distributed}
 export USE_DISTRIBUTED=ON
-%else
-export USE_DISTRIBUTED=OFF
+%if %{with tensorpipe}
+export USE_TENSORPIPE=ON
+export TP_BUILD_LIBUV=OFF
 %endif
 
-
-%if %{with openmp}
-export USE_OPENMP=ON
-%else
-export USE_OPENMP=OFF
+%if %{with gloo}
+export USE_GLOO=ON
+export USE_SYSTEM_GLOO=ON
+%endif
+%if %{with mpi}
+export USE_MPI=ON
+%endif
 %endif
 
-%if %{with rocm}
-export USE_ROCM=ON
-export USE_NCCL=OFF
-export BUILD_NVFUSER=OFF
-export HIP_PATH=%{_prefix}
-export ROCM_PATH=%{_prefix}
-export DEVICE_LIB_PATH=/usr/lib/clang/17/amdgcn/bitcode
-%else
-export USE_ROCM=OFF
+%if %{with opencv}
+export USE_OPENCV=ON
 %endif
 
 %if %{with test}
 export BUILD_TEST=ON
-%else
-export BUILD_TEST=OFF
 %endif
 
 # Why we are using py3_ vs pyproject_
@@ -360,549 +873,132 @@ export BUILD_TEST=OFF
 # Adding pip to build requires does not fix
 #
 # See BZ 2244862
-%py3_build
 
-%install
+
 %if %{with rocm}
+
 export USE_ROCM=ON
-export HIP_PATH=%{_prefix}
-export ROCM_PATH=%{_prefix}
-export DEVICE_LIB_PATH=/usr/lib/clang/17/amdgcn/bitcode
+export HIP_PATH=`hipconfig -p`
+export ROCM_PATH=`hipconfig -R`
+export HIP_CLANG_PATH=`hipconfig -l`
+RESOURCE_DIR=`${HIP_CLANG_PATH}/clang -print-resource-dir`
+export DEVICE_LIB_PATH=${RESOURCE_DIR}/amdgcn/bitcode
+
+gpu=%{rocm_default_gpu}
+module load rocm/$gpu
+export PYTORCH_ROCM_ARCH=$ROCM_GPUS
+%py3_build
+mv build build-${gpu}
+module purge
+
+%if %{with rocm_loop}
+for gpu in %{rocm_gpu_list}
+do
+    module load rocm/$gpu
+    export PYTORCH_ROCM_ARCH=$ROCM_GPUS
+    %py3_build
+    mv build build-${gpu}
+    module purge
+done
 %endif
 
+%else
+
+%py3_build
+
+%endif
+
+%install
+
+%if %{with compat_gcc}
+export CC=%{_bindir}/gcc%{compat_gcc_major}
+export CXX=%{_bindir}/g++%{compat_gcc_major}
+export FC=%{_bindir}/gfortran%{compat_gcc_major}
+%endif
+
+%if %{with rocm}
+export USE_ROCM=ON
+export HIP_PATH=`hipconfig -p`
+export ROCM_PATH=`hipconfig -R`
+export HIP_CLANG_PATH=`hipconfig -l`
+RESOURCE_DIR=`${HIP_CLANG_PATH}/clang -print-resource-dir`
+export DEVICE_LIB_PATH=${RESOURCE_DIR}/amdgcn/bitcode
+
+gpu=%{rocm_default_gpu}
+module load rocm/$gpu
+export PYTORCH_ROCM_ARCH=$ROCM_GPUS
+mv build-${gpu} build
+%py3_install
+mv build build-${gpu}
+module purge
+
+%if %{with rocm_loop}
+for gpu in %{rocm_gpu_list}
+do
+    module load rocm/$gpu
+    export PYTORCH_ROCM_ARCH=$ROCM_GPUS
+    mv build-${gpu} build
+    # need to customize the install location, so replace py3_install
+    %{__python3} %{py_setup} %{?py_setup_args} install -O1 --skip-build --root %{buildroot} --prefix /usr/lib64/rocm/${gpu} %{?*}
+    rm -rfv %{buildroot}/usr/lib/rocm/${gpu}/bin/__pycache__
+    mv build build-${gpu}
+    module purge
+done
+%endif
+
+%else
 %py3_install
 
-# empty files
-rm %{buildroot}%{python3_sitearch}/torch/py.typed
-rm %{buildroot}%{python3_sitearch}/torch/ao/quantization/backend_config/observation_type.py
-rm %{buildroot}%{python3_sitearch}/torch/ao/quantization/backend_config/__pycache__/observation_type.*.pyc
-rm %{buildroot}%{python3_sitearch}/torch/cuda/error.py
-rm %{buildroot}%{python3_sitearch}/torch/cuda/__pycache__/error.*.pyc
-rm %{buildroot}%{python3_sitearch}/torch/include/ATen/cudnn/Exceptions.h
+%endif
 
-# exec permission
-for f in `find %{buildroot}%{python3_sitearch} -name '*.py'`; do
-    if [ ! -x $f ]; then
-        sed -i '1{\@^#!/usr/bin@d}' $f
-    fi
-done
+# Do not remote the empty files
 
-# shebangs
-%py3_shebang_fix %{buildroot}%{python3_sitearch}
-
-# Programatically create the list of dirs
-echo "s|%{buildroot}%{python3_sitearch}|%%dir %%{python3_sitearch}|g" > br.sed
-find %{buildroot}%{python3_sitearch} -mindepth 1 -type d  > dirs.files
-sed -i -f br.sed dirs.files 
-cat dirs.files > main.files
-
-# Similar for the python files
-find %{buildroot}%{python3_sitearch} -type f -name "*.py" -o -name "*.pyc" -o -name "*.pyi"  > py.files
-echo "s|%{buildroot}%{python3_sitearch}|%%{python3_sitearch}|g" > br.sed
-sed -i -f br.sed py.files
-cat py.files >> main.files
-
-# devel files, headers and such
-find %{buildroot}%{python3_sitearch} -type f -name "*.h" -o -name "*.hpp" -o -name "*.cuh" -o -name "*.cpp" -o -name "*.cu" > devel.files
-sed -i -f br.sed devel.files
-
-#
-# Main package
-##% dir % {python3_sitearch}/torch*.egg-info
-
-%files -n python3-%{pypi_name} -f main.files
 
+%files -n python3-%{pypi_name} 
 %license LICENSE
-%doc README.md
-
-# bins
+%doc README.md 
 %{_bindir}/convert-caffe2-to-onnx
 %{_bindir}/convert-onnx-to-caffe2
 %{_bindir}/torchrun
-%{python3_sitearch}/torch/bin/torch_shm_manager
+%{python3_sitearch}/%{pypi_name}
+%{python3_sitearch}/%{pypi_name}-*.egg-info
+%{python3_sitearch}/functorch
+%{python3_sitearch}/torchgen
+%if %{with caffe2}
+%{python3_sitearch}/caffe2
+%endif
 
-# libs
-%{python3_sitearch}/functorch/_C.cpython*.so
-%{python3_sitearch}/torch/_C.cpython*.so
-%{python3_sitearch}/torch/lib/libc10.so
-%{python3_sitearch}/torch/lib/libshm.so
-%{python3_sitearch}/torch/lib/libtorch.so
-%{python3_sitearch}/torch/lib/libtorch_cpu.so
-%{python3_sitearch}/torch/lib/libtorch_global_deps.so
-%{python3_sitearch}/torch/lib/libtorch_python.so
 %if %{with rocm}
-%{python3_sitearch}/torch/lib/libc10_hip.so
-%{python3_sitearch}/torch/lib/libcaffe2_nvrtc.so
-%{python3_sitearch}/torch/lib/libtorch_hip.so
-%endif
+%files -n python3-%{pypi_name}-rocm-gfx8
+%{_libdir}/rocm/gfx8/bin/*
+%{_libdir}/rocm/gfx8/lib64/*
 
-# misc
-%{python3_sitearch}/torch/utils/model_dump/{*.js,*.mjs,*.html}
-%{python3_sitearch}/torchgen/packaged/ATen/native/*.yaml
-%{python3_sitearch}/torchgen/packaged/autograd/{*.md,*.yaml}
-%if %{with gitcommit}
-%{python3_sitearch}/torch/_export/serde/schema.yaml
-%{python3_sitearch}/torch/distributed/pipeline/sync/_balance/py.typed
-%{python3_sitearch}/torch/distributed/pipeline/sync/py.typed
-%endif
+%files -n python3-%{pypi_name}-rocm-gfx9
+%{_libdir}/rocm/gfx9/bin/*
+%{_libdir}/rocm/gfx9/lib64/*
 
-# egg
-%{python3_sitearch}/torch*.egg-info/*
+%files -n python3-%{pypi_name}-rocm-gfx10
+%{_libdir}/rocm/gfx10/bin/*
+%{_libdir}/rocm/gfx10/lib64/*
 
-# excludes
-# bazel build cruft
-%exclude %{python3_sitearch}/torchgen/packaged/autograd/{BUILD.bazel,build.bzl}
+%files -n python3-%{pypi_name}-rocm-gfx11
+%{_libdir}/rocm/gfx11/bin/*
+%{_libdir}/rocm/gfx11/lib64/*
 
-#
-# devel package
-#
-%files -n python3-%{pypi_name}-devel -f devel.files
+%files -n python3-%{pypi_name}-rocm-gfx90a
+%{_libdir}/rocm/gfx90a/bin/*
+%{_libdir}/rocm/gfx90a/lib64/*
 
-# devel cmake
-%{python3_sitearch}/torch/share/cmake/{ATen,Caffe2,Torch}/*.cmake
-%{python3_sitearch}/torch/share/cmake/Caffe2/public/*.cmake
-%{python3_sitearch}/torch/share/cmake/Caffe2/Modules_CUDA_fix/*.cmake
-%{python3_sitearch}/torch/share/cmake/Caffe2/Modules_CUDA_fix/upstream/*.cmake
-%{python3_sitearch}/torch/share/cmake/Caffe2/Modules_CUDA_fix/upstream/FindCUDA/*.cmake
+%files -n python3-%{pypi_name}-rocm-gfx942
+%{_libdir}/rocm/gfx942/bin/*
+%{_libdir}/rocm/gfx942/lib64/*
 
-# devel misc
-%{python3_sitearch}/torchgen/packaged/ATen/templates/RegisterDispatchDefinitions.ini
-%{python3_sitearch}/torchgen/packaged/autograd/templates/annotated_fn_args.py.in
-
-%if %{with test}
-%files -n python3-%{pypi_name}-test
-
-# test bins
-%{python3_sitearch}/torch/bin/test_api
-%{python3_sitearch}/torch/bin/test_edge_op_registration
-%{python3_sitearch}/torch/bin/test_jit
-%{python3_sitearch}/torch/bin/test_lazy
-%{python3_sitearch}/torch/bin/test_tensorexpr
-%{python3_sitearch}/torch/bin/tutorial_tensorexpr
-
-# test libs
-# Unversioned - not ment for release
-%{python3_sitearch}/torch/lib/libbackend_with_compiler.so
-%{python3_sitearch}/torch/lib/libjitbackend_test.so
-%{python3_sitearch}/torch/lib/libtorchbind_test.so
-   
-# tests
-%{python3_sitearch}/torch/test/*
+%files -n python3-%{pypi_name}-rocm-gfx1100
+%{_libdir}/rocm/gfx1100/bin/*
+%{_libdir}/rocm/gfx1100/lib64/*
 
 %endif
 
-#
-# License Details
-# Main license BSD 3-Clause
-#
-# Apache-2.0
-# android/libs/fbjni/LICENSE
-# android/libs/fbjni/CMakeLists.txt
-# android/libs/fbjni/build.gradle
-# android/libs/fbjni/cxx/fbjni/ByteBuffer.cpp
-# android/libs/fbjni/cxx/fbjni/ByteBuffer.h
-# android/libs/fbjni/cxx/fbjni/Context.h
-# android/libs/fbjni/cxx/fbjni/File.h
-# android/libs/fbjni/cxx/fbjni/JThread.h
-# android/libs/fbjni/cxx/fbjni/NativeRunnable.h
-# android/libs/fbjni/cxx/fbjni/OnLoad.cpp
-# android/libs/fbjni/cxx/fbjni/ReadableByteChannel.cpp
-# android/libs/fbjni/cxx/fbjni/ReadableByteChannel.h
-# android/libs/fbjni/cxx/fbjni/detail/Boxed.h
-# android/libs/fbjni/cxx/fbjni/detail/Common.h
-# android/libs/fbjni/cxx/fbjni/detail/CoreClasses-inl.h
-# android/libs/fbjni/cxx/fbjni/detail/CoreClasses.h
-# android/libs/fbjni/cxx/fbjni/detail/Environment.cpp
-# android/libs/fbjni/cxx/fbjni/detail/Environment.h
-# android/libs/fbjni/cxx/fbjni/detail/Exceptions.cpp
-# android/libs/fbjni/cxx/fbjni/detail/Exceptions.h
-# android/libs/fbjni/cxx/fbjni/detail/FbjniApi.h
-# android/libs/fbjni/cxx/fbjni/detail/Hybrid.cpp
-# android/libs/fbjni/cxx/fbjni/detail/Hybrid.h
-# android/libs/fbjni/cxx/fbjni/detail/Iterator-inl.h
-# android/libs/fbjni/cxx/fbjni/detail/Iterator.h
-# android/libs/fbjni/cxx/fbjni/detail/JWeakReference.h
-# android/libs/fbjni/cxx/fbjni/detail/Log.h
-# android/libs/fbjni/cxx/fbjni/detail/Meta-forward.h
-# android/libs/fbjni/cxx/fbjni/detail/Meta-inl.h
-# android/libs/fbjni/cxx/fbjni/detail/Meta.cpp
-# android/libs/fbjni/cxx/fbjni/detail/Meta.h
-# android/libs/fbjni/cxx/fbjni/detail/MetaConvert.h
-# android/libs/fbjni/cxx/fbjni/detail/ReferenceAllocators-inl.h
-# android/libs/fbjni/cxx/fbjni/detail/ReferenceAllocators.h
-# android/libs/fbjni/cxx/fbjni/detail/References-forward.h
-# android/libs/fbjni/cxx/fbjni/detail/References-inl.h
-# android/libs/fbjni/cxx/fbjni/detail/References.cpp
-# android/libs/fbjni/cxx/fbjni/detail/References.h
-# android/libs/fbjni/cxx/fbjni/detail/Registration-inl.h
-# android/libs/fbjni/cxx/fbjni/detail/Registration.h
-# android/libs/fbjni/cxx/fbjni/detail/SimpleFixedString.h
-# android/libs/fbjni/cxx/fbjni/detail/TypeTraits.h
-# android/libs/fbjni/cxx/fbjni/detail/utf8.cpp
-# android/libs/fbjni/cxx/fbjni/detail/utf8.h
-# android/libs/fbjni/cxx/fbjni/fbjni.cpp
-# android/libs/fbjni/cxx/fbjni/fbjni.h
-# android/libs/fbjni/cxx/lyra/cxa_throw.cpp
-# android/libs/fbjni/cxx/lyra/lyra.cpp
-# android/libs/fbjni/cxx/lyra/lyra.h
-# android/libs/fbjni/cxx/lyra/lyra_breakpad.cpp
-# android/libs/fbjni/cxx/lyra/lyra_exceptions.cpp
-# android/libs/fbjni/cxx/lyra/lyra_exceptions.h
-# android/libs/fbjni/gradle.properties
-# android/libs/fbjni/gradle/android-tasks.gradle
-# android/libs/fbjni/gradle/release.gradle
-# android/libs/fbjni/gradlew
-# android/libs/fbjni/gradlew.bat
-# android/libs/fbjni/host.gradle
-# android/libs/fbjni/java/com/facebook/jni/CppException.java
-# android/libs/fbjni/java/com/facebook/jni/CppSystemErrorException.java
-# android/libs/fbjni/java/com/facebook/jni/DestructorThread.java
-# android/libs/fbjni/java/com/facebook/jni/HybridClassBase.java
-# android/libs/fbjni/java/com/facebook/jni/HybridData.java
-# android/libs/fbjni/java/com/facebook/jni/IteratorHelper.java
-# android/libs/fbjni/java/com/facebook/jni/MapIteratorHelper.java
-# android/libs/fbjni/java/com/facebook/jni/NativeRunnable.java
-# android/libs/fbjni/java/com/facebook/jni/ThreadScopeSupport.java
-# android/libs/fbjni/java/com/facebook/jni/UnknownCppException.java
-# android/libs/fbjni/java/com/facebook/jni/annotations/DoNotStrip.java
-# android/libs/fbjni/scripts/android-setup.sh
-# android/libs/fbjni/scripts/run-host-tests.sh
-# android/libs/fbjni/settings.gradle
-# android/libs/fbjni/test/BaseFBJniTests.java
-# android/libs/fbjni/test/ByteBufferTests.java
-# android/libs/fbjni/test/DocTests.java
-# android/libs/fbjni/test/FBJniTests.java
-# android/libs/fbjni/test/HybridTests.java
-# android/libs/fbjni/test/IteratorTests.java
-# android/libs/fbjni/test/PrimitiveArrayTests.java
-# android/libs/fbjni/test/ReadableByteChannelTests.java
-# android/libs/fbjni/test/jni/CMakeLists.txt
-# android/libs/fbjni/test/jni/byte_buffer_tests.cpp
-# android/libs/fbjni/test/jni/doc_tests.cpp
-# android/libs/fbjni/test/jni/expect.h
-# android/libs/fbjni/test/jni/fbjni_onload.cpp
-# android/libs/fbjni/test/jni/fbjni_tests.cpp
-# android/libs/fbjni/test/jni/hybrid_tests.cpp
-# android/libs/fbjni/test/jni/inter_dso_exception_test_1/Test.cpp
-# android/libs/fbjni/test/jni/inter_dso_exception_test_1/Test.h
-# android/libs/fbjni/test/jni/inter_dso_exception_test_2/Test.cpp
-# android/libs/fbjni/test/jni/inter_dso_exception_test_2/Test.h
-# android/libs/fbjni/test/jni/iterator_tests.cpp
-# android/libs/fbjni/test/jni/modified_utf8_test.cpp
-# android/libs/fbjni/test/jni/no_rtti.cpp
-# android/libs/fbjni/test/jni/no_rtti.h
-# android/libs/fbjni/test/jni/primitive_array_tests.cpp
-# android/libs/fbjni/test/jni/readable_byte_channel_tests.cpp
-# android/libs/fbjni/test/jni/simple_fixed_string_tests.cpp
-# android/libs/fbjni/test/jni/utf16toUTF8_test.cpp
-# android/pytorch_android/host/build.gradle
-# aten/src/ATen/cuda/llvm_basic.cpp
-# aten/src/ATen/cuda/llvm_complex.cpp
-# aten/src/ATen/native/quantized/cpu/qnnpack/confu.yaml
-# aten/src/ATen/native/quantized/cpu/qnnpack/src/requantization/gemmlowp-neon.c
-# aten/src/ATen/native/quantized/cpu/qnnpack/src/requantization/gemmlowp-scalar.h
-# aten/src/ATen/native/quantized/cpu/qnnpack/src/requantization/gemmlowp-sse.h
-# aten/src/ATen/nnapi/codegen.py
-# aten/src/ATen/nnapi/NeuralNetworks.h
-# aten/src/ATen/nnapi/nnapi_wrapper.cpp
-# aten/src/ATen/nnapi/nnapi_wrapper.h
-# binaries/benchmark_args.h
-# binaries/benchmark_helper.cc
-# binaries/benchmark_helper.h
-# binaries/compare_models_torch.cc
-# binaries/convert_and_benchmark.cc
-# binaries/convert_caffe_image_db.cc
-# binaries/convert_db.cc
-# binaries/convert_encoded_to_raw_leveldb.cc
-# binaries/convert_image_to_tensor.cc
-# binaries/core_overhead_benchmark.cc
-# binaries/core_overhead_benchmark_gpu.cc
-# binaries/db_throughput.cc
-# binaries/dump_operator_names.cc
-# binaries/inspect_gpu.cc
-# binaries/load_benchmark_torch.cc
-# binaries/make_cifar_db.cc
-# binaries/make_image_db.cc
-# binaries/make_mnist_db.cc
-# binaries/optimize_for_mobile.cc
-# binaries/parallel_info.cc
-# binaries/predictor_verifier.cc
-# binaries/print_core_object_sizes_gpu.cc
-# binaries/print_registered_core_operators.cc
-# binaries/run_plan.cc
-# binaries/run_plan_mpi.cc
-# binaries/speed_benchmark.cc
-# binaries/speed_benchmark_torch.cc
-# binaries/split_db.cc
-# binaries/tsv_2_proto.cc
-# binaries/tutorial_blob.cc
-# binaries/zmq_feeder.cc
-# c10/test/util/small_vector_test.cpp
-# c10/util/FunctionRef.h
-# c10/util/SmallVector.cpp
-# c10/util/SmallVector.h
-# c10/util/llvmMathExtras.h
-# c10/util/sparse_bitset.h
-# caffe2/contrib/aten/gen_op.py
-# caffe2/contrib/fakelowp/fp16_fc_acc_op.cc
-# caffe2/contrib/fakelowp/fp16_fc_acc_op.h
-# caffe2/contrib/gloo/allgather_ops.cc
-# caffe2/contrib/gloo/allgather_ops.h
-# caffe2/contrib/gloo/reduce_scatter_ops.cc
-# caffe2/contrib/gloo/reduce_scatter_ops.h
-# caffe2/core/hip/common_miopen.h
-# caffe2/core/hip/common_miopen.hip
-# caffe2/core/net_async_tracing.cc
-# caffe2/core/net_async_tracing.h
-# caffe2/core/net_async_tracing_test.cc
-# caffe2/experiments/operators/fully_connected_op_decomposition.cc
-# caffe2/experiments/operators/fully_connected_op_decomposition.h
-# caffe2/experiments/operators/fully_connected_op_decomposition_gpu.cc
-# caffe2/experiments/operators/fully_connected_op_prune.cc
-# caffe2/experiments/operators/fully_connected_op_prune.h
-# caffe2/experiments/operators/fully_connected_op_sparse.cc
-# caffe2/experiments/operators/fully_connected_op_sparse.h
-# caffe2/experiments/operators/funhash_op.cc
-# caffe2/experiments/operators/funhash_op.h
-# caffe2/experiments/operators/sparse_funhash_op.cc
-# caffe2/experiments/operators/sparse_funhash_op.h
-# caffe2/experiments/operators/sparse_matrix_reshape_op.cc
-# caffe2/experiments/operators/sparse_matrix_reshape_op.h
-# caffe2/experiments/operators/tt_contraction_op.cc
-# caffe2/experiments/operators/tt_contraction_op.h
-# caffe2/experiments/operators/tt_contraction_op_gpu.cc
-# caffe2/experiments/operators/tt_pad_op.cc
-# caffe2/experiments/operators/tt_pad_op.h
-# caffe2/experiments/python/SparseTransformer.py
-# caffe2/experiments/python/convnet_benchmarks.py
-# caffe2/experiments/python/device_reduce_sum_bench.py
-# caffe2/experiments/python/funhash_op_test.py
-# caffe2/experiments/python/net_construct_bench.py
-# caffe2/experiments/python/sparse_funhash_op_test.py
-# caffe2/experiments/python/sparse_reshape_op_test.py
-# caffe2/experiments/python/tt_contraction_op_test.py
-# caffe2/experiments/python/tt_pad_op_test.py
-# caffe2/mobile/contrib/libvulkan-stub/include/vulkan/vk_platform.h
-# caffe2/mobile/contrib/libvulkan-stub/include/vulkan/vulkan.h
-# caffe2/mobile/contrib/nnapi/NeuralNetworks.h
-# caffe2/mobile/contrib/nnapi/dlnnapi.c
-# caffe2/mobile/contrib/nnapi/nnapi_benchmark.cc
-# caffe2/observers/profile_observer.cc
-# caffe2/observers/profile_observer.h
-# caffe2/operators/hip/conv_op_miopen.hip
-# caffe2/operators/hip/local_response_normalization_op_miopen.hip
-# caffe2/operators/hip/pool_op_miopen.hip
-# caffe2/operators/hip/spatial_batch_norm_op_miopen.hip
-# caffe2/operators/quantized/int8_utils.h
-# caffe2/operators/stump_func_op.cc
-# caffe2/operators/stump_func_op.cu
-# caffe2/operators/stump_func_op.h
-# caffe2/operators/unique_ops.cc
-# caffe2/operators/unique_ops.cu
-# caffe2/operators/unique_ops.h
-# caffe2/operators/upsample_op.cc
-# caffe2/operators/upsample_op.h
-# caffe2/opt/fusion.h
-# caffe2/python/layers/label_smooth.py
-# caffe2/python/mint/static/css/simple-sidebar.css
-# caffe2/python/modeling/get_entry_from_blobs.py
-# caffe2/python/modeling/get_entry_from_blobs_test.py
-# caffe2/python/modeling/gradient_clipping_test.py
-# caffe2/python/operator_test/unique_ops_test.py
-# caffe2/python/operator_test/upsample_op_test.py
-# caffe2/python/operator_test/weight_scale_test.py
-# caffe2/python/pybind_state_int8.cc
-# caffe2/python/transformations.py
-# caffe2/python/transformations_test.py
-# caffe2/quantization/server/batch_matmul_dnnlowp_op.cc
-# caffe2/quantization/server/batch_matmul_dnnlowp_op.h
-# caffe2/quantization/server/compute_equalization_scale_test.py
-# caffe2/quantization/server/elementwise_linear_dnnlowp_op.cc
-# caffe2/quantization/server/elementwise_linear_dnnlowp_op.h
-# caffe2/quantization/server/elementwise_sum_relu_op.cc
-# caffe2/quantization/server/fb_fc_packed_op.cc
-# caffe2/quantization/server/fb_fc_packed_op.h
-# caffe2/quantization/server/fbgemm_fp16_pack_op.cc
-# caffe2/quantization/server/fbgemm_fp16_pack_op.h
-# caffe2/quantization/server/fully_connected_fake_lowp_op.cc
-# caffe2/quantization/server/fully_connected_fake_lowp_op.h
-# caffe2/quantization/server/int8_gen_quant_params_min_max_test.py
-# caffe2/quantization/server/int8_gen_quant_params_test.py
-# caffe2/quantization/server/int8_quant_scheme_blob_fill_test.py
-# caffe2/quantization/server/spatial_batch_norm_relu_op.cc
-# caffe2/sgd/weight_scale_op.cc
-# caffe2/sgd/weight_scale_op.h
-# caffe2/utils/bench_utils.h
-# functorch/examples/maml_omniglot/maml-omniglot-higher.py
-# functorch/examples/maml_omniglot/maml-omniglot-ptonly.py
-# functorch/examples/maml_omniglot/maml-omniglot-transforms.py
-# functorch/examples/maml_omniglot/support/omniglot_loaders.py
-# modules/detectron/group_spatial_softmax_op.cc
-# modules/detectron/group_spatial_softmax_op.cu
-# modules/detectron/group_spatial_softmax_op.h
-# modules/detectron/ps_roi_pool_op.cc
-# modules/detectron/ps_roi_pool_op.h
-# modules/detectron/roi_pool_f_op.cc
-# modules/detectron/roi_pool_f_op.cu
-# modules/detectron/roi_pool_f_op.h
-# modules/detectron/sample_as_op.cc
-# modules/detectron/sample_as_op.cu
-# modules/detectron/sample_as_op.h
-# modules/detectron/select_smooth_l1_loss_op.cc
-# modules/detectron/select_smooth_l1_loss_op.cu
-# modules/detectron/select_smooth_l1_loss_op.h
-# modules/detectron/sigmoid_cross_entropy_loss_op.cc
-# modules/detectron/sigmoid_cross_entropy_loss_op.cu
-# modules/detectron/sigmoid_cross_entropy_loss_op.h
-# modules/detectron/sigmoid_focal_loss_op.cc
-# modules/detectron/sigmoid_focal_loss_op.cu
-# modules/detectron/sigmoid_focal_loss_op.h
-# modules/detectron/smooth_l1_loss_op.cc
-# modules/detectron/smooth_l1_loss_op.cu
-# modules/detectron/smooth_l1_loss_op.h
-# modules/detectron/softmax_focal_loss_op.cc
-# modules/detectron/softmax_focal_loss_op.cu
-# modules/detectron/softmax_focal_loss_op.h
-# modules/detectron/spatial_narrow_as_op.cc
-# modules/detectron/spatial_narrow_as_op.cu
-# modules/detectron/spatial_narrow_as_op.h
-# modules/detectron/upsample_nearest_op.cc
-# modules/detectron/upsample_nearest_op.h
-# modules/module_test/module_test_dynamic.cc
-# modules/rocksdb/rocksdb.cc
-# scripts/apache_header.txt
-# scripts/apache_python.txt
-# torch/distributions/lkj_cholesky.py
-#
-# Apache 2.0 AND BSD 2-Clause
-# caffe2/operators/deform_conv_op.cu
-#
-# Apache 2.0 AND BSD 2-Clause AND MIT
-# modules/detectron/ps_roi_pool_op.cu
-#
-# Apache 2.0 AND BSD 2-Clause
-# modules/detectron/upsample_nearest_op.cu
-#
-# BSD 0-Clause
-# torch/csrc/utils/pythoncapi_compat.h
-#
-# BSD 2-Clause
-# aten/src/ATen/native/quantized/cpu/qnnpack/deps/clog/LICENSE
-# caffe2/image/transform_gpu.cu
-# caffe2/image/transform_gpu.h
-#
-# BSL-1.0
-# c10/util/flat_hash_map.h
-# c10/util/hash.h
-# c10/util/Optional.h
-# c10/util/order_preserving_flat_hash_map.h
-# c10/util/strong_type.h
-# c10/util/variant.h
-#
-# GPL-3.0-or-later AND MIT
-# c10/util/reverse_iterator.h
-#
-# Khronos
-# These files are for OpenCL, an unused option
-# Replace them later, as-needed with the opencl-headers.rpm
-#
-# caffe2/contrib/opencl/OpenCL/cl.hpp
-# caffe2/mobile/contrib/libopencl-stub/include/CL/cl.h
-# caffe2/mobile/contrib/libopencl-stub/include/CL/cl.hpp
-# caffe2/mobile/contrib/libopencl-stub/include/CL/cl_ext.h
-# caffe2/mobile/contrib/libopencl-stub/include/CL/cl_gl.h
-# caffe2/mobile/contrib/libopencl-stub/include/CL/cl_gl_ext.h
-# caffe2/mobile/contrib/libopencl-stub/include/CL/cl_platform.h
-# caffe2/mobile/contrib/libopencl-stub/include/CL/opencl.h
-#
-# MIT
-# android/libs/fbjni/googletest-CMakeLists.txt.in
-# c10/util/BFloat16-math.h
-# caffe2/mobile/contrib/libvulkan-stub/include/libvulkan-stub.h
-# caffe2/mobile/contrib/libvulkan-stub/src/libvulkan-stub.c
-# caffe2/onnx/torch_ops/defs.cc
-# cmake/Modules_CUDA_fix/upstream/FindCUDA/make2cmake.cmake
-# cmake/Modules_CUDA_fix/upstream/FindCUDA/parse_cubin.cmake
-# cmake/Modules_CUDA_fix/upstream/FindCUDA/run_nvcc.cmake
-# functorch/einops/_parsing.py
-# test/functorch/test_parsing.py
-# test/functorch/test_rearrange.py
-# third_party/miniz-2.1.0/LICENSE
-# third_party/miniz-2.1.0/miniz.c
-# tools/coverage_plugins_package/setup.py
-# torch/_appdirs.py
-# torch/utils/hipify/hipify_python.py
-#
-# Public Domain
-# caffe2/mobile/contrib/libopencl-stub/LICENSE
-# caffe2/utils/murmur_hash3.cc
-# caffe2/utils/murmur_hash3.h
-#
-# Zlib
-# aten/src/ATen/native/cpu/avx_mathfun.h
-
 %changelog
-* Fri Jan 26 2024 Fedora Release Engineering <releng@fedoraproject.org> - 2.1.2-3
-- Rebuilt for https://fedoraproject.org/wiki/Fedora_40_Mass_Rebuild
-
-* Mon Jan 22 2024 Fedora Release Engineering <releng@fedoraproject.org> - 2.1.2-2
-- Rebuilt for https://fedoraproject.org/wiki/Fedora_40_Mass_Rebuild
-
-* Wed Dec 27 2023 Tom Rix <trix@redhat.com> - 2.1.2-1
-- Update to 2.1.2
-- Stop versioning *.so's - 2.1.2's version is wrong
-- Stub in caffe2 to test in flight package
-
-* Wed Dec 27 2023 Tom Rix <trix@redhat.com> - 2.1.0-13
-- Stub in openmp to test in openmp
-
-* Wed Dec 20 2023 Tom Rix <trix@redhat.com> - 2.1.0-12
-- Stub in rocm to test in flight packages
-
-* Wed Dec 13 2023 Tom Rix <trix@redhat.com> - 2.1.0-11
-- Move unversioned *.so's to main package
-
-* Fri Dec 1 2023 Tom Rix <trix@redhat.com> - 2.1.0-10
-- Disable gold linker
-- Remove python requires
-- Change to openblas, remove -lgfortran fixes
-- Manually add -pie to linking options
-
-* Fri Nov 24 2023 Tom Rix <trix@redhat.com> - 2.1.0-9
-- Enable debug build
-- Remove Khronos licensed files from source
-- Use 0BSD license identifier
-- Generate lists directories, python and header files
-- Add a -test subpackage
-
-* Wed Nov 15 2023 Tom Rix <trix@redhat.com> - 2.1.0-8
-- Address review comments
-
-* Thu Nov 2 2023 Tom Rix <trix@redhat.com> - 2.1.0-7
-- Address review comments
-- remove pyproject option
-
-* Thu Oct 19 2023 Tom Rix <trix@redhat.com> - 2.1.0-6
-- Address review comments
-
-* Wed Oct 18 2023 Tom Rix <trix@redhat.com> - 2.1.0-5
-- Address review comments
-
-* Sat Oct 14 2023 Tom Rix <trix@redhat.com> - 2.1.0-4
-- Use gloo, xnnpack
-- Find missing build_bundled.py
-- Add pyproject option
-
-* Thu Oct 12 2023 Tom Rix <trix@redhat.com> - 2.1.0-3
-- Address review comments
-- Force so versioning on
-
-* Mon Oct 9 2023 Tom Rix <trix@redhat.com> - 2.1.0-2
-- Use the 2.1 release
-- Reduce USE_SYSTEM_LIBS to parts
-- Remove almost all of third_party/
-- Remove py2rpm generated noise
-
-* Sat Sep 30 2023 Tom Rix <trix@redhat.com> - 2.1.0-1
-- Initial package.
+%autochangelog
 
diff --git a/sources b/sources
index 90b1128..60cce58 100644
--- a/sources
+++ b/sources
@@ -1,2 +1,14 @@
 SHA512 (pytorch-v2.1.0.tar.gz) = 59421bf6cea6661d61ed66ab16526e3a07162e70e53381cbd5987042917610ec993d2f151fb086f0f98e5a396fe69e82bbc76f840bebffe4ebe7f50458c3aa44
 SHA512 (pytorch-v2.1.2.tar.gz) = b7305407ad9dda877d277a0e7009f65f6d69f39370f2231b8bb8c6a9b711022d2129febdb00f5c83751b6664e01000fe2d30c5e5c13757de89fb8b2b99197a28
+SHA512 (pytorch-975d428.tar.gz) = a02195b18d832db9a739c3eeecd0cd0c8868d8b92e4a2fca42e4bdd20735f0745d84573df28d9ae1db014cf79ffd005a8409b3e8bb92f9db2a446f784ef46ff4
+SHA512 (v23.3.3.tar.gz) = 4066c94f2473c7ea16917d29a613e16f840a329089c88e0bdbdb999aef3442ba00abfd2aa92266fa9c067e399dc88e6f0ccac40dc151378857e665638e78bbf0
+SHA512 (v2.11.1.tar.gz) = ed1512ff0bca3bc0a45edc2eb8c77f8286ab9389f6ff1d5cb309be24bc608abbe0df6a7f5cb18c8f80a3bfa509058547c13551c3cd6a759af708fd0cdcdd9e95
+SHA512 (pytorch-6a89a75.tar.gz) = 6978acc6f37d7c5adc71517a6f379c7133b2bbd040189deddba7753acde41f6ddba2e9f2e397928e89c776d6a5458b8a74f8e04beb312d71fd30b072687ba98f
+SHA512 (pytorch-74832f1.tar.gz) = bd553bfbbb422d353bbbf616c201251b2517b905e2621fa05bfe3d97726b078caad377583adccdc0cca234235a11fcb4730a93e834907b2ca4c06d552b2a2683
+SHA512 (pytorch-4bb5cb5.tar.gz) = 430ae996ddee560537787646ae9f7aa01498f37c99c2e3fe4c5f66ee732ee3fe4ecf337fdf857bc0c7fe27634af75cee3ce576bbe2576463b81e27dbbfacf6ef
+SHA512 (tensorpipe-52791a2.tar.gz) = 1e5faf17a7236c5506c08cb28be16069b11bb929bbca64ed9745ce4277d46739186ab7d6597da7437d90ed2d166d4c37ef2f3bceabe8083ef3adbb0e8e5f227e
+SHA512 (v1.41.0.tar.gz) = bb08a1970a10e8d9571ffea3d021643de30ec212cd51317b98d6cf0cfe55d6877992921fb01d1188a6d466687335b77885685d924f8cb7200a0bec30eee05c65
+SHA512 (libnop-910b558.tar.gz) = 74c5324eaa1b6b2ac8dfef94c835b5c5b044625f8e5efe3522470b1ecc4798ff43d344a013cee2f6901e83267c6167072947b754e63f1552ae7044cffe234c36
+SHA512 (pytorch-97ff6cf.tar.gz) = 105ebcba298558fe833f90e7e40b003d35a74609e777f9dc4c47f5668c884f603455113ac0ff252a62b83c81137ae66ceb1a862d351203925dcfc3dcf9f73580
+SHA512 (pytorch-v2.3.0.tar.gz) = 0c2ffc7bf2fd86070e9958c34eca1f03a0248a011ac6ffaeb69f65306ff856edd5359986f02af25888433187e6d7f29b60edded092e2ac30c8cec49023166eda
+SHA512 (pytorch-v2.3.1.tar.gz) = fe132251b2bae87b70ba3d95dc32f6a4545970d11893118b0ebe6ca129732e516ef4d6cc4f380b3db9bb2277d1db8ce78a401c40149bb1dfbab76eab9e3992c4