add a --with cuda option

This commit is contained in:
Tom Rix 2024-02-20 11:07:12 -08:00
commit aca80a52c1
2 changed files with 218 additions and 4 deletions

154
next/pyproject.toml Normal file
View file

@ -0,0 +1,154 @@
[build-system]
requires = [
"setuptools",
"wheel",
"astunparse",
"numpy",
"ninja",
"pyyaml",
"cmake",
"typing-extensions",
"requests",
]
# Use legacy backend to import local packages in setup.py
build-backend = "setuptools.build_meta:__legacy__"
[tool.black]
# Uncomment if pyproject.toml worked fine to ensure consistency with flake8
# line-length = 120
target-version = ["py38", "py39", "py310", "py311"]
[tool.ruff]
target-version = "py38"
# NOTE: Synchoronize the ignores with .flake8
ignore = [
# these ignores are from flake8-bugbear; please fix!
"B007", "B008", "B017",
"B018", # Useless expression
"B019",
"B023",
"B028", # No explicit `stacklevel` keyword argument found
"B904",
"E402",
"C408", # C408 ignored because we like the dict keyword argument syntax
"E501", # E501 is not flexible enough, we're using B950 instead
"E721",
"E731", # Assign lambda expression
"E741",
"EXE001",
"F405",
"F841",
# these ignores are from flake8-logging-format; please fix!
"G101",
# these ignores are from ruff NPY; please fix!
"NPY002",
# these ignores are from ruff PERF; please fix!
"PERF203",
"PERF401",
"PERF403",
# these ignores are from PYI; please fix!
"PYI019",
"PYI024",
"PYI036",
"PYI041",
"PYI056",
"SIM102", "SIM103", "SIM112", # flake8-simplify code styles
"SIM105", # these ignores are from flake8-simplify. please fix or ignore with commented reason
"SIM108",
"SIM110",
"SIM114", # Combine `if` branches using logical `or` operator
"SIM115",
"SIM116", # Disable Use a dictionary instead of consecutive `if` statements
"SIM117",
"SIM118",
"UP006", # keep-runtime-typing
"UP007", # keep-runtime-typing
]
line-length = 120
select = [
"B",
"C4",
"G",
"E",
"EXE",
"F",
"SIM1",
"W",
# Not included in flake8
"NPY",
"PERF",
"PGH004",
"PIE794",
"PIE800",
"PIE804",
"PIE807",
"PIE810",
"PLC0131", # type bivariance
"PLC0132", # type param mismatch
"PLC0205", # string as __slots__
"PLE",
"PLR0133", # constant comparison
"PLR0206", # property with params
"PLR1722", # use sys exit
"PLW0129", # assert on string literal
"PLW0406", # import self
"PLW0711", # binary op exception
"PLW1509", # preexec_fn not safe with threads
"PLW3301", # nested min max
"PT006", # TODO: enable more PT rules
"PT022",
"PT023",
"PT024",
"PT025",
"PT026",
"PYI",
"RUF008", # mutable dataclass default
"RUF015", # access first ele in constant time
"RUF016", # type error non-integer index
"RUF017",
"TRY200",
"TRY302",
"UP",
]
[tool.ruff.per-file-ignores]
"__init__.py" = [
"F401",
]
"test/typing/reveal/**" = [
"F821",
]
"test/torch_np/numpy_tests/**" = [
"F821",
]
"test/jit/**" = [
"PLR0133", # tests require this for JIT
"PYI",
"RUF015",
"UP", # We don't want to modify the jit test as they test specify syntax
]
"test/test_jit.py" = [
"PLR0133", # tests require this for JIT
"PYI",
"RUF015",
"UP", # We don't want to modify the jit test as they test specify syntax
]
"torch/onnx/**" = [
"UP037", # ONNX does runtime type checking
]
"torchgen/api/types/__init__.py" = [
"F401",
"F403",
]
"torchgen/executorch/api/types/__init__.py" = [
"F401",
"F403",
]
"torch/utils/collect_env.py" = [
"UP", # collect_env.py needs to work with older versions of Python
]

View file

@ -4,7 +4,7 @@
%global forgeurl https://github.com/pytorch/pytorch
# So pre releases can be tried
%bcond_with gitcommit
%bcond_without gitcommit
%if %{with gitcommit}
# The top of tree ~2/18/24
%global commit0 372d078f361e726bb4ac0884ac334b04c58179ef
@ -28,7 +28,7 @@
%bcond_with rocm
# For testing openmp
%bcond_with openmp
%bcond_without openmp
# For testing caffe2
%bcond_with caffe2
@ -62,6 +62,11 @@ Source0: %{forgeurl}/releases/download/v%{version}/pytorch-v%{version}.ta
Source1: https://github.com/google/flatbuffers/archive/refs/tags/v23.3.3.tar.gz
Source2: https://github.com/pybind/pybind11/archive/refs/tags/v2.11.1.tar.gz
%if %{with cuda}
Source10: https://github.com/NVIDIA/cudnn-frontend/archive/refs/tags/v1.0.3.tar.gz
Source11: https://github.com/NVIDIA/cutlass/archive/refs/tags/v3.3.0.tar.gz
%endif
%if %{with gitcommit}
Patch0: 0001-no-third_party-foxi.patch
@ -144,7 +149,11 @@ BuildRequires: python3-pyyaml
BuildRequires: python3-typing-extensions
BuildRequires: sleef-devel
BuildRequires: valgrind-devel
%if %{with gitcommit}
BuildRequires: xnnpack-devel = 0.0^git20231127.d9cce34
%else
BuildRequires: xnnpack-devel = 0.0^git20221221.51a9875
%endif
BuildRequires: python3-devel
BuildRequires: python3dist(filelock)
@ -264,6 +273,13 @@ cp -r flatbuffers-23.3.3/* third_party/flatbuffers/
tar xf %{SOURCE2}
cp -r pybind11-2.11.1/* third_party/pybind11/
%if %{with cuda}
tar xf %{SOURCE10}
cp -r cudnn-frontend-1.0.3/* third_party/cudnn_frontend/
tar xf %{SOURCE11}
cp -r cutlass-3.3.0/* third_party/cutlass/
%endif
%if %{with opencv}
# Reduce requirements, *FOUND is not set
sed -i -e 's/USE_OPENCV AND OpenCV_FOUND AND USE_FFMPEG AND FFMPEG_FOUND/USE_OPENCV AND USE_FFMPEG/' caffe2/video/CMakeLists.txt
@ -272,6 +288,14 @@ sed -i -e 's/STATUS/FATAL/' caffe2/image/CMakeLists.txt
cat caffe2/image/CMakeLists.txt
%endif
%if 0%{?rhel}
# In RHEL but too old
sed -i -e '/typing-extensions/d' setup.py
# Need to pip these
sed -i -e '/sympy/d' setup.py
sed -i -e '/fsspec/d' setup.py
%endif
# Release comes fully loaded with third party src
# Remove what we can
#
@ -291,8 +315,8 @@ mv third_party/flatbuffers .
mv third_party/pybind11 .
%if %{with cuda}
mv third_party/nvfuser .
mv third_party/cudnn_frontend .
mv third_party/cutlass .
%endif
%if %{with test}
@ -308,8 +332,8 @@ mv flatbuffers third_party
mv pybind11 third_party
%if %{with cuda}
mv nvfuser third_party
mv cudnn_frontend third_party
mv cutlass third_party
%endif
%if %{with test}
@ -336,6 +360,33 @@ sed -i -e 's@string(APPEND CMAKE_CUDA_FLAGS " -D_GLIBCXX_USE_CXX11_ABI=${GLIBCXX
%build
#
# Control the number of jobs
#
# The build can fail if too many threads exceed the physical memory
# So count core and and memory and increase the build memory util the build succeeds
#
# Real cores, No hyperthreading
COMPILE_JOBS=`cat /proc/cpuinfo | grep -m 1 'cpu cores' | awk '{ print $4 }'`
if [ ${COMPILE_JOBS}x = x ]; then
COMPILE_JOBS=1
fi
# Take into account memmory usage per core, do not thrash real memory
%if %{with cuda}
BUILD_MEM=4
%else
BUILD_MEM=2
%endif
MEM_KB=0
MEM_KB=`cat /proc/meminfo | grep MemTotal | awk '{ print $2 }'`
MEM_MB=`eval "expr ${MEM_KB} / 1024"`
MEM_GB=`eval "expr ${MEM_MB} / 1024"`
COMPILE_JOBS_MEM=`eval "expr 1 + ${MEM_GB} / ${BUILD_MEM}"`
if [ "$COMPILE_JOBS_MEM" -lt "$COMPILE_JOBS" ]; then
COMPILE_JOBS=$COMPILE_JOBS_MEM
fi
export MAX_JOBS=$COMPILE_JOBS
# For debugging setup.py
# export SETUPTOOLS_SCM_DEBUG=1
@ -408,6 +459,7 @@ export USE_ROCM=OFF
%if %{with cuda}
export CUDACXX=/usr/local/cuda/bin/nvcc
export CPLUS_INCLUDE_PATH=/usr/local/cuda/include
export USE_CUDA=ON
export USE_NCCL=OFF
%else
@ -503,6 +555,12 @@ sed -i -f br.sed devel.files
%{python3_sitearch}/torch/lib/libcaffe2_nvrtc.so
%{python3_sitearch}/torch/lib/libtorch_hip.so
%endif
%if %{with cuda}
%{python3_sitearch}/torch/lib/libc10_cuda.so
%{python3_sitearch}/torch/lib/libcaffe2_nvrtc.so
%{python3_sitearch}/torch/lib/libtorch_cuda.so
%{python3_sitearch}/torch/lib/libtorch_cuda_linalg.so
%endif
# misc
%{python3_sitearch}/torch/utils/model_dump/{*.js,*.mjs,*.html}
@ -510,9 +568,11 @@ sed -i -f br.sed devel.files
%{python3_sitearch}/torchgen/packaged/autograd/{*.md,*.yaml}
%if %{with gitcommit}
%{python3_sitearch}/torch/_export/serde/schema.yaml
%if 0%{?fedora}
%{python3_sitearch}/torch/distributed/pipeline/sync/_balance/py.typed
%{python3_sitearch}/torch/distributed/pipeline/sync/py.typed
%endif
%endif
# egg
%{python3_sitearch}/torch*.egg-info/*