From 6989951b26a6532106a941846464a8d6fa28e073 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Sat, 16 Mar 2024 15:39:58 -0400 Subject: [PATCH 1/4] Update to 2.3.0-rc2 Signed-off-by: Tom Rix --- python-torch.spec | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/python-torch.spec b/python-torch.spec index 3fbab79..313f013 100644 --- a/python-torch.spec +++ b/python-torch.spec @@ -4,11 +4,12 @@ %global forgeurl https://github.com/pytorch/pytorch # So pre releases can be tried -%bcond_with gitcommit +%bcond_without gitcommit %if %{with gitcommit} -%global commit0 975d4284250170602db60adfda5eb1664a3b8acc +# git tag v2.3.0-rc2 +%global commit0 6a89a753b1556fe8558582c452fdba083f6ec01a %global shortcommit0 %(c=%{commit0}; echo ${c:0:7}) -%global date0 20240307 +%global date0 20240313 %else %global commit0 975d4284250170602db60adfda5eb1664a3b8acc %global shortcommit0 %(c=%{commit0}; echo ${c:0:7}) From 5a5c58f298c41ccb079d81d7fb25407586445723 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Tue, 19 Mar 2024 08:14:22 -0400 Subject: [PATCH 2/4] Fix ROCm build Signed-off-by: Tom Rix --- python-torch.spec | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/python-torch.spec b/python-torch.spec index 313f013..7f9c596 100644 --- a/python-torch.spec +++ b/python-torch.spec @@ -26,7 +26,7 @@ %ifarch x86_64 %if 0%{?fedora} -%bcond_with rocm +%bcond_without rocm %else %bcond_with rocm %endif @@ -35,8 +35,8 @@ %bcond_with hipblaslt # Which families gpu build for %global rocm_gpu_list gfx8 gfx9 gfx10 gfx11 -%global rocm_default_gpu default -%bcond_without rocm_loop +%global rocm_default_gpu gfx9 +%bcond_with rocm_loop # For testing caffe2 %if 0%{?fedora} @@ -49,7 +49,7 @@ %bcond_with distributed # For testing openvs -%bcond_without opencv +%bcond_with opencv # For testing cuda %ifarch x86_64 @@ -265,7 +265,6 @@ cp -r cutlass-%{cul_ver}/* third_party/cutlass/ sed -i -e 's/USE_OPENCV AND OpenCV_FOUND AND USE_FFMPEG AND FFMPEG_FOUND/USE_OPENCV AND USE_FFMPEG/' caffe2/video/CMakeLists.txt sed -i -e 's/USE_OPENCV AND OpenCV_FOUND/USE_OPENCV/' caffe2/image/CMakeLists.txt sed -i -e 's/STATUS/FATAL/' caffe2/image/CMakeLists.txt -cat caffe2/image/CMakeLists.txt %endif %if 0%{?rhel} @@ -276,6 +275,12 @@ sed -i -e '/sympy/d' setup.py sed -i -e '/fsspec/d' setup.py %endif +# A new dependency +# Connected to USE_FLASH_ATTENTION, since this is off, do not need it +sed -i -e '/aotriton.cmake/d' cmake/Dependencies.cmake +# sed -i -e '/aotriton/d' aten/src/ATen/native/transformers/cuda/sdp_utils.cpp +rm aten/src/ATen/native/transformers/cuda/sdp_utils.cpp + # Release comes fully loaded with third party src # Remove what we can # @@ -396,6 +401,7 @@ export INTERN_BUILD_MOBILE=OFF export USE_DISTRIBUTED=OFF export USE_CUDA=OFF export USE_FBGEMM=OFF +export USE_FLASH_ATTENTION=OFF export USE_GOLD_LINKER=OFF export USE_ITT=OFF export USE_KINETO=OFF @@ -460,9 +466,11 @@ export BUILD_TEST=ON %if %{with rocm} export USE_ROCM=ON -export HIP_PATH=%{_prefix} -export ROCM_PATH=%{_prefix} -export DEVICE_LIB_PATH=%{clang_resource_dir}/amdgcn/bitcode +export HIP_PATH=`hipconfig -p` +export ROCM_PATH=`hipconfig -R` +export HIP_CLANG_PATH=`hipconfig -l` +RESOURCE_DIR=`${HIP_CLANG_PATH}/clang -print-resource-dir` +export DEVICE_LIB_PATH=${RESOURCE_DIR}/amdgcn/bitcode gpu=%{rocm_default_gpu} module load rocm/$gpu @@ -493,9 +501,11 @@ done %if %{with rocm} export USE_ROCM=ON -export HIP_PATH=%{_prefix} -export ROCM_PATH=%{_prefix} -export DEVICE_LIB_PATH=%{clang_resource_dir}/amdgcn/bitcode +export HIP_PATH=`hipconfig -p` +export ROCM_PATH=`hipconfig -R` +export HIP_CLANG_PATH=`hipconfig -l` +RESOURCE_DIR=`${HIP_CLANG_PATH}/clang -print-resource-dir` +export DEVICE_LIB_PATH=${RESOURCE_DIR}/amdgcn/bitcode gpu=%{rocm_default_gpu} module load rocm/$gpu @@ -541,9 +551,11 @@ done %{python3_sitearch}/caffe2 %endif %if %{with rocm} +%if %{with rocm_loop} %{_libdir}/rocm/gfx*/bin/* %{_libdir}/rocm/gfx*/lib64/* %endif +%endif %changelog %autochangelog From 74a2fd1d898282cee4274fffef1c5a738c253af6 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Tue, 19 Mar 2024 14:53:39 -0400 Subject: [PATCH 3/4] Better disabling of aotriton Signed-off-by: Tom Rix --- 0001-disable-use-of-aotriton.patch | 46 ++++++++++++++++++++++++++++++ python-torch.spec | 3 +- 2 files changed, 47 insertions(+), 2 deletions(-) create mode 100644 0001-disable-use-of-aotriton.patch diff --git a/0001-disable-use-of-aotriton.patch b/0001-disable-use-of-aotriton.patch new file mode 100644 index 0000000..34a1704 --- /dev/null +++ b/0001-disable-use-of-aotriton.patch @@ -0,0 +1,46 @@ +From 33d48f71db7530f00dbd8cff281b65aa8b355b2a Mon Sep 17 00:00:00 2001 +From: Tom Rix +Date: Tue, 19 Mar 2024 11:32:37 -0400 +Subject: [PATCH] disable use of aotriton + +--- + aten/src/ATen/native/transformers/cuda/sdp_utils.cpp | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp b/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp +index 96b839820efd..2d3dd0cb4b0f 100644 +--- a/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp ++++ b/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp +@@ -21,9 +21,11 @@ + #include + #include + ++#ifdef USE_FLASH_ATTENTION + #if USE_ROCM + #include + #endif ++#endif + + /** + * Note [SDPA Runtime Dispatch] +@@ -183,6 +185,7 @@ bool check_sm_version(cudaDeviceProp * dprops) { + } + + bool check_flash_attention_hardware_support(sdp_params const& params, bool debug) { ++#ifdef USE_FLASH_ATTENTION + // Check that the gpu is capable of running flash attention + using sm80 = SMVersion<8, 0>; + using sm90 = SMVersion<9, 0>; +@@ -211,6 +214,9 @@ bool check_flash_attention_hardware_support(sdp_params const& params, bool debug + } + #endif + return true; ++#else ++ return false; ++#endif + } + + bool check_mem_efficient_hardware_support(sdp_params const& params, bool debug) { +-- +2.44.0 + diff --git a/python-torch.spec b/python-torch.spec index 7f9c596..181382d 100644 --- a/python-torch.spec +++ b/python-torch.spec @@ -98,6 +98,7 @@ Patch101: 0001-cuda-hip-signatures.patch Patch102: 0001-silence-an-assert.patch Patch103: 0001-can-not-use-with-c-files.patch Patch104: 0001-use-any-hip.patch +Patch105: 0001-disable-use-of-aotriton.patch %endif ExclusiveArch: x86_64 aarch64 @@ -278,8 +279,6 @@ sed -i -e '/fsspec/d' setup.py # A new dependency # Connected to USE_FLASH_ATTENTION, since this is off, do not need it sed -i -e '/aotriton.cmake/d' cmake/Dependencies.cmake -# sed -i -e '/aotriton/d' aten/src/ATen/native/transformers/cuda/sdp_utils.cpp -rm aten/src/ATen/native/transformers/cuda/sdp_utils.cpp # Release comes fully loaded with third party src # Remove what we can From cc2688494a77a3e3a05f66d5728a34f736a5c1d8 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Tue, 19 Mar 2024 20:00:51 -0600 Subject: [PATCH 4/4] Add roctracer Signed-off-by: Tom Rix --- python-torch.spec | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python-torch.spec b/python-torch.spec index 181382d..07a47eb 100644 --- a/python-torch.spec +++ b/python-torch.spec @@ -35,8 +35,8 @@ %bcond_with hipblaslt # Which families gpu build for %global rocm_gpu_list gfx8 gfx9 gfx10 gfx11 -%global rocm_default_gpu gfx9 -%bcond_with rocm_loop +%global rocm_default_gpu default +%bcond_without rocm_loop # For testing caffe2 %if 0%{?fedora} @@ -172,6 +172,7 @@ BuildRequires: rocm-runtime-devel BuildRequires: rocm-rpm-macros BuildRequires: rocm-rpm-macros-modules BuildRequires: rocthrust-devel +BuildRequires: roctracer-devel Requires: rocm-rpm-macros-modules %endif