From 3d98ae877ea611631645e8fb89df726da704c10a Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Sat, 6 Apr 2024 07:08:52 -0600
Subject: [PATCH] Update to 2.3-rc7

Patch the dim issue needed for torchrl
Stage tensorpipe thirdparty needed for distributed.

Signed-off-by: Tom Rix <trix@redhat.com>
---
 .gitignore                              |   4 +
 0001-Reenable-dim-for-python-3.12.patch | 115 ++++++++++++++++++++++++
 python-torch.spec                       |  89 ++++++++++++++++--
 sources                                 |   4 +
 4 files changed, 203 insertions(+), 9 deletions(-)
 create mode 100644 0001-Reenable-dim-for-python-3.12.patch
diff --git a/.gitignore b/.gitignore
index 96d43c4..cad15bf 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,3 +5,7 @@
 /v2.11.1.tar.gz
 /pytorch-6a89a75.tar.gz
 /pytorch-74832f1.tar.gz
+/pytorch-4bb5cb5.tar.gz
+/tensorpipe-52791a2.tar.gz
+/v1.41.0.tar.gz
+/libnop-910b558.tar.gz
diff --git a/0001-Reenable-dim-for-python-3.12.patch b/0001-Reenable-dim-for-python-3.12.patch
new file mode 100644
index 0000000..138b5d4
--- /dev/null
+++ b/0001-Reenable-dim-for-python-3.12.patch
@@ -0,0 +1,115 @@
+From ee3fb343a376cdba6f4ce188cac90023f13e2aea Mon Sep 17 00:00:00 2001
+From: Tom Rix <trix@redhat.com>
+Date: Thu, 4 Apr 2024 14:21:38 -0600
+Subject: [PATCH] Reenable dim for python 3.12
+
+In 3.12:
+
+_PyArg_Parser added an element to the start of the structure.
+So existing positional initialization is off.  Switch to element
+initialization.
+
+_Py_CODEUNIT changed to from an int to a union, but relevant_op
+is passed an int for the return of decoder.opcode, so the parameter
+type is wrong, switch it to int.
+
+The opcode PRECALL was removed, so reduce its handling to 3.11
+
+Signed-off-by: Tom Rix <trix@redhat.com>
+---
+ functorch/csrc/dim/dim.cpp     | 24 +++++-------------------
+ functorch/csrc/dim/minpybind.h |  4 ++--
+ 2 files changed, 7 insertions(+), 21 deletions(-)
+
+diff --git a/functorch/csrc/dim/dim.cpp b/functorch/csrc/dim/dim.cpp
+index 4cc027504c77..e48b0d58081f 100644
+--- a/functorch/csrc/dim/dim.cpp
++++ b/functorch/csrc/dim/dim.cpp
+@@ -6,20 +6,6 @@
+ 
+ #include <torch/csrc/utils/python_compat.h>
+ 
+-
+-// Many APIs have changed/don't exist anymore
+-#if IS_PYTHON_3_12_PLUS
+-
+-#include "dim.h"
+-
+-// Re-enable this some day
+-PyObject* Dim_init() {
+-    PyErr_SetString(PyExc_RuntimeError, "First class dim doesn't work with python 3.12");
+-    return nullptr;
+-}
+-
+-#else
+-
+ #include "minpybind.h"
+ #include <frameobject.h>
+ #include <opcode.h>
+@@ -441,7 +427,7 @@ static PyObject* DimList_bind(DimList *self,
+     PY_BEGIN
+     mpy::handle sizes;
+     static const char * const _keywords[] = {"sizes", nullptr};
+-    static _PyArg_Parser parser = {"O", _keywords, 0};
++    static _PyArg_Parser parser = { .format = "O", .keywords = _keywords};
+     if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &parser, &sizes)) {
+         return nullptr;
+     }
+@@ -465,7 +451,7 @@ static PyObject* DimList_bind_len(DimList *self,
+     PY_BEGIN
+     int size;
+     static const char * const _keywords[] = {"N", nullptr};
+-    static _PyArg_Parser parser = {"i", _keywords, 0};
++    static _PyArg_Parser parser = { .format = "i", .keywords = _keywords};
+     if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &parser, &size)) {
+         return nullptr;
+     }
+@@ -1468,7 +1454,7 @@ PyTypeObject Tensor::Type = {
+ 
+ // dim() --------------------
+ 
+-static bool relevant_op(_Py_CODEUNIT c) {
++static bool relevant_op(int c) {
+     switch(c) {
+         case STORE_NAME:
+         case STORE_GLOBAL:
+@@ -1587,7 +1573,7 @@ static PyObject* _dims(PyObject *self,
+     auto c = mpy::obj<PyCodeObject>::steal(PyFrame_GetCode(f.ptr()));
+     auto lasti = PyFrame_GetLasti(f.ptr());
+     auto decoder = PyInstDecoder(c.ptr(), lasti);
+-    #if IS_PYTHON_3_11_PLUS
++    #if IS_PYTHON_3_11
+     // When py3.11 adapts bytecode lasti points to the precall
+     // rather than the call instruction after it
+     if (decoder.opcode() == PRECALL) {
+@@ -3268,4 +3254,4 @@ PyObject* Dim_init() {
+     }
+ }
+ 
+-#endif
++
+diff --git a/functorch/csrc/dim/minpybind.h b/functorch/csrc/dim/minpybind.h
+index de82b5af95a4..d76d4828bf80 100644
+--- a/functorch/csrc/dim/minpybind.h
++++ b/functorch/csrc/dim/minpybind.h
+@@ -621,7 +621,7 @@ struct vector_args {
+             PyObject *dummy = NULL;
+             _PyArg_ParseStackAndKeywords((PyObject*const*)args, nargs, kwnames.ptr(), _parser, &dummy, &dummy, &dummy, &dummy, &dummy);
+ #else
+-            _PyArg_Parser* _parser = new _PyArg_Parser{NULL, &names_buf[0], fname_cstr, 0};
++            _PyArg_Parser* _parser = new _PyArg_Parser{ .keywords = &names_buf[0], .fname = fname_cstr};
+             std::unique_ptr<PyObject*[]> buf(new PyObject*[names.size()]);
+             _PyArg_UnpackKeywords((PyObject*const*)args, nargs, NULL, kwnames.ptr(), _parser, required, (Py_ssize_t)values.size() - kwonly, 0, &buf[0]);
+ #endif
+@@ -706,7 +706,7 @@ inline object handle::call_vector(vector_args args) {
+ #define MPY_PARSE_ARGS_KWNAMES(fmt, FORALL_ARGS) \
+     static const char * const kwlist[] = { FORALL_ARGS(MPY_ARGS_NAME) nullptr}; \
+     FORALL_ARGS(MPY_ARGS_DECLARE) \
+-    static _PyArg_Parser parser = {fmt, kwlist, 0}; \
++    static _PyArg_Parser parser = { .format = fmt, .keywords = kwlist}; \
+     if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &parser, FORALL_ARGS(MPY_ARGS_POINTER) nullptr)) { \
+         throw mpy::exception_set(); \
+     }
+-- 
+2.44.0
+
diff --git a/python-torch.spec b/python-torch.spec
index f26f395..1b83697 100644
--- a/python-torch.spec
+++ b/python-torch.spec
@@ -6,10 +6,10 @@
 # So pre releases can be tried
 %bcond_without gitcommit
 %if %{with gitcommit}
-# git tag v2.3.0-rc6
-%global commit0 74832f12fae2e1bc51bf1f9971dcd12c90a971f5
+# git tag v2.3.0-rc7
+%global commit0 4bb5cb51e6ceeb4b0b70b439c7b92168855f146f
 %global shortcommit0 %(c=%{commit0}; echo ${c:0:7})
-%global date0 20242213
+%global date0 20240402
 %else
 %global commit0 975d4284250170602db60adfda5eb1664a3b8acc
 %global shortcommit0 %(c=%{commit0}; echo ${c:0:7})
@@ -47,6 +47,11 @@
 
 # For testing distributed
 %bcond_with distributed
+# For testing distributed+rccl etc.
+%bcond_with rccl
+%bcond_with gloo
+%bcond_with mpi
+%bcond_with tensorpipe
 
 # For testing openvs
 %bcond_with opencv
@@ -81,6 +86,20 @@ Source10:       https://github.com/NVIDIA/cudnn-frontend/archive/refs/tags/v%{cu
 Source11:       https://github.com/NVIDIA/cutlass/archive/refs/tags/v%{cul_ver}.tar.gz
 %endif
 
+%if %{with tensorpipe}
+# Developement on tensorpipe has stopped, repo made read only July 1, 2023, this is the last commit
+%global tp_commit 52791a2fd214b2a9dc5759d36725909c1daa7f2e
+%global tp_scommit %(c=%{tp_commit}; echo ${c:0:7})
+Source20:       https://github.com/pytorch/tensorpipe/archive/%{tp_commit}/tensorpipe-%{tp_scommit}.tar.gz
+# The old libuv tensorpipe uses
+Source21:       https://github.com/libuv/libuv/archive/refs/tags/v1.41.0.tar.gz
+# Developement afaik on libnop has stopped, this is the last commit
+%global nop_commit 910b55815be16109f04f4180e9adee14fb4ce281
+%global nop_scommit %(c=%{nop_commit}; echo ${c:0:7})
+Source22:       https://github.com/google/libnop/archive/%{nop_commit}/libnop-%{nop_scommit}.tar.gz
+
+%endif
+
 Patch0:        0001-no-third_party-foxi.patch
 Patch1:        0001-no-third_party-fmt.patch
 Patch2:        0001-no-third_party-FXdiv.patch
@@ -91,6 +110,9 @@ Patch5:        0001-disable-submodule-search.patch
 Patch6:        0001-reenable-foxi-linking.patch
 %endif
 
+# https://github.com/pytorch/pytorch/pull/123384
+Patch7:        0001-Reenable-dim-for-python-3.12.patch
+
 # ROCm patches
 # https://github.com/pytorch/pytorch/pull/120551
 Patch100:      0001-Optionally-use-hipblaslt.patch
@@ -114,19 +136,23 @@ BuildRequires:  fxdiv-devel
 BuildRequires:  gcc-c++
 BuildRequires:  gcc-gfortran
 %if %{with distributed}
+%if %{with gloo}
 BuildRequires:  gloo-devel
 %endif
+%endif
 BuildRequires:  ninja-build
 BuildRequires:  onnx-devel
 BuildRequires:  libomp-devel
+%if %{with distributed}
+%if %{with mpi}
+BuildRequires:  openmpi-devel
+%endif
+%endif
 BuildRequires:  openblas-devel
 BuildRequires:  pocketfft-devel
 BuildRequires:  protobuf-devel
 BuildRequires:  pthreadpool-devel
 BuildRequires:  psimd-devel
-BuildRequires:  python3-numpy
-BuildRequires:  python3-pyyaml
-BuildRequires:  python3-typing-extensions
 BuildRequires:  sleef-devel
 BuildRequires:  valgrind-devel
 BuildRequires:  xnnpack-devel = 0.0^git20240229.fcbf55a
@@ -135,9 +161,11 @@ BuildRequires:  python3-devel
 BuildRequires:  python3dist(filelock)
 BuildRequires:  python3dist(jinja2)
 BuildRequires:  python3dist(networkx)
+BuildRequires:  python3dist(numpy)
+BuildRequires:  python3dist(pyyaml)
 BuildRequires:  python3dist(setuptools)
-BuildRequires:  python3dist(typing-extensions)
 BuildRequires:  python3dist(sphinx)
+BuildRequires:  python3dist(typing-extensions)
 
 %if 0%{?fedora}
 BuildRequires:  python3-pybind11
@@ -160,8 +188,10 @@ BuildRequires:  rocblas-devel
 BuildRequires:  rocrand-devel
 BuildRequires:  rocfft-devel
 %if %{with distributed}
+%if %{with rccl}
 BuildRequires:  rccl-devel
 %endif
+%endif
 BuildRequires:  rocprim-devel
 BuildRequires:  rocm-cmake
 BuildRequires:  rocm-comgr-devel
@@ -186,13 +216,23 @@ BuildRequires:  google-benchmark-devel
 
 Requires:       python3dist(dill)
 
+# For convience
+Provides:       pytorch
+
 # Apache-2.0
 Provides:       bundled(flatbuffers) = 22.3.3
 # MIT
 Provides:       bundled(miniz) = 2.1.0
 Provides:       bundled(pybind11) = 2.11.1
-# For convience
-Provides:       pytorch
+
+%if %{with tensorpipe}
+# BSD-3-Clause
+Provides:       bundled(tensorpipe)
+# Apache-2.0
+Provides:       bundled(libnop)
+# MIT AND CC-BY-4.0 AND ISC AND BSD-2-Clause
+Provides:       bundled(libuv) = 1.41.0
+%endif
 
 
 
@@ -276,6 +316,15 @@ tar xf %{SOURCE11}
 cp -r cutlass-%{cul_ver}/* third_party/cutlass/
 %endif
 
+%if %{with tensorpipe}
+tar xf %{SOURCE20}
+cp -r tensorpipe-*/* third_party/tensorpipe/
+tar xf %{SOURCE21}
+cp -r libuv-*/* third_party/tensorpipe/third_party/libuv/
+tar xf %{SOURCE22}
+cp -r libnop-*/* third_party/tensorpipe/third_party/libnop/
+%endif
+
 %if %{with opencv}
 # Reduce requirements, *FOUND is not set 
 sed -i -e 's/USE_OPENCV AND OpenCV_FOUND AND USE_FFMPEG AND FFMPEG_FOUND/USE_OPENCV AND USE_FFMPEG/' caffe2/video/CMakeLists.txt
@@ -318,6 +367,10 @@ mv third_party/cudnn_frontend .
 mv third_party/cutlass .
 %endif
 
+%if %{with tensorpipe}
+mv third_party/tensorpipe .
+%endif
+
 %if %{with test}
 mv third_party/googletest .
 %endif
@@ -335,6 +388,10 @@ mv cudnn_frontend third_party
 mv cutlass third_party
 %endif
 
+%if %{with tensorpipe}
+mv tensorpipe third_party
+%endif
+
 %if %{with test}
 mv googletest third_party
 %endif
@@ -417,11 +474,14 @@ export USE_CUDA=OFF
 export USE_FBGEMM=OFF
 export USE_FLASH_ATTENTION=OFF
 export USE_GOLD_LINKER=OFF
+export USE_GLOO=OFF
 export USE_ITT=OFF
 export USE_KINETO=OFF
 export USE_LITE_INTERPRETER_PROFILER=OFF
 export USE_LITE_PROTO=OFF
+export USE_MAGMA=OFF
 export USE_MKLDNN=OFF
+export USE_MPI=OFF
 export USE_NCCL=OFF
 export USE_NNPACK=OFF
 export USE_NUMPY=ON
@@ -457,6 +517,17 @@ export USE_CUDA=ON
 
 %if %{with distributed}
 export USE_DISTRIBUTED=ON
+%if %{with tensorpipe}
+export USE_TENSORPIPE=ON
+export TP_BUILD_LIBUV=OFF
+%endif
+
+%if %{with gloo}
+export USE_GLOO=ON
+%endif
+%if %{with mpi}
+export USE_MPI=ON
+%endif
 %endif
 
 %if %{with opencv}
diff --git a/sources b/sources
index 0c21ff2..72835ff 100644
--- a/sources
+++ b/sources
@@ -5,3 +5,7 @@ SHA512 (v23.3.3.tar.gz) = 4066c94f2473c7ea16917d29a613e16f840a329089c88e0bdbdb99
 SHA512 (v2.11.1.tar.gz) = ed1512ff0bca3bc0a45edc2eb8c77f8286ab9389f6ff1d5cb309be24bc608abbe0df6a7f5cb18c8f80a3bfa509058547c13551c3cd6a759af708fd0cdcdd9e95
 SHA512 (pytorch-6a89a75.tar.gz) = 6978acc6f37d7c5adc71517a6f379c7133b2bbd040189deddba7753acde41f6ddba2e9f2e397928e89c776d6a5458b8a74f8e04beb312d71fd30b072687ba98f
 SHA512 (pytorch-74832f1.tar.gz) = bd553bfbbb422d353bbbf616c201251b2517b905e2621fa05bfe3d97726b078caad377583adccdc0cca234235a11fcb4730a93e834907b2ca4c06d552b2a2683
+SHA512 (pytorch-4bb5cb5.tar.gz) = 430ae996ddee560537787646ae9f7aa01498f37c99c2e3fe4c5f66ee732ee3fe4ecf337fdf857bc0c7fe27634af75cee3ce576bbe2576463b81e27dbbfacf6ef
+SHA512 (tensorpipe-52791a2.tar.gz) = 1e5faf17a7236c5506c08cb28be16069b11bb929bbca64ed9745ce4277d46739186ab7d6597da7437d90ed2d166d4c37ef2f3bceabe8083ef3adbb0e8e5f227e
+SHA512 (v1.41.0.tar.gz) = bb08a1970a10e8d9571ffea3d021643de30ec212cd51317b98d6cf0cfe55d6877992921fb01d1188a6d466687335b77885685d924f8cb7200a0bec30eee05c65
+SHA512 (libnop-910b558.tar.gz) = 74c5324eaa1b6b2ac8dfef94c835b5c5b044625f8e5efe3522470b1ecc4798ff43d344a013cee2f6901e83267c6167072947b754e63f1552ae7044cffe234c36