add a --with cuda option
This commit is contained in:
parent
b8381c9f2d
commit
aca80a52c1
2 changed files with 218 additions and 4 deletions
154
next/pyproject.toml
Normal file
154
next/pyproject.toml
Normal file
|
|
@ -0,0 +1,154 @@
|
|||
[build-system]
|
||||
requires = [
|
||||
"setuptools",
|
||||
"wheel",
|
||||
"astunparse",
|
||||
"numpy",
|
||||
"ninja",
|
||||
"pyyaml",
|
||||
"cmake",
|
||||
"typing-extensions",
|
||||
"requests",
|
||||
]
|
||||
# Use legacy backend to import local packages in setup.py
|
||||
build-backend = "setuptools.build_meta:__legacy__"
|
||||
|
||||
|
||||
[tool.black]
|
||||
# Uncomment if pyproject.toml worked fine to ensure consistency with flake8
|
||||
# line-length = 120
|
||||
target-version = ["py38", "py39", "py310", "py311"]
|
||||
|
||||
|
||||
[tool.ruff]
|
||||
target-version = "py38"
|
||||
|
||||
# NOTE: Synchoronize the ignores with .flake8
|
||||
ignore = [
|
||||
# these ignores are from flake8-bugbear; please fix!
|
||||
"B007", "B008", "B017",
|
||||
"B018", # Useless expression
|
||||
"B019",
|
||||
"B023",
|
||||
"B028", # No explicit `stacklevel` keyword argument found
|
||||
"B904",
|
||||
"E402",
|
||||
"C408", # C408 ignored because we like the dict keyword argument syntax
|
||||
"E501", # E501 is not flexible enough, we're using B950 instead
|
||||
"E721",
|
||||
"E731", # Assign lambda expression
|
||||
"E741",
|
||||
"EXE001",
|
||||
"F405",
|
||||
"F841",
|
||||
# these ignores are from flake8-logging-format; please fix!
|
||||
"G101",
|
||||
# these ignores are from ruff NPY; please fix!
|
||||
"NPY002",
|
||||
# these ignores are from ruff PERF; please fix!
|
||||
"PERF203",
|
||||
"PERF401",
|
||||
"PERF403",
|
||||
# these ignores are from PYI; please fix!
|
||||
"PYI019",
|
||||
"PYI024",
|
||||
"PYI036",
|
||||
"PYI041",
|
||||
"PYI056",
|
||||
"SIM102", "SIM103", "SIM112", # flake8-simplify code styles
|
||||
"SIM105", # these ignores are from flake8-simplify. please fix or ignore with commented reason
|
||||
"SIM108",
|
||||
"SIM110",
|
||||
"SIM114", # Combine `if` branches using logical `or` operator
|
||||
"SIM115",
|
||||
"SIM116", # Disable Use a dictionary instead of consecutive `if` statements
|
||||
"SIM117",
|
||||
"SIM118",
|
||||
"UP006", # keep-runtime-typing
|
||||
"UP007", # keep-runtime-typing
|
||||
]
|
||||
line-length = 120
|
||||
select = [
|
||||
"B",
|
||||
"C4",
|
||||
"G",
|
||||
"E",
|
||||
"EXE",
|
||||
"F",
|
||||
"SIM1",
|
||||
"W",
|
||||
# Not included in flake8
|
||||
"NPY",
|
||||
"PERF",
|
||||
"PGH004",
|
||||
"PIE794",
|
||||
"PIE800",
|
||||
"PIE804",
|
||||
"PIE807",
|
||||
"PIE810",
|
||||
"PLC0131", # type bivariance
|
||||
"PLC0132", # type param mismatch
|
||||
"PLC0205", # string as __slots__
|
||||
"PLE",
|
||||
"PLR0133", # constant comparison
|
||||
"PLR0206", # property with params
|
||||
"PLR1722", # use sys exit
|
||||
"PLW0129", # assert on string literal
|
||||
"PLW0406", # import self
|
||||
"PLW0711", # binary op exception
|
||||
"PLW1509", # preexec_fn not safe with threads
|
||||
"PLW3301", # nested min max
|
||||
"PT006", # TODO: enable more PT rules
|
||||
"PT022",
|
||||
"PT023",
|
||||
"PT024",
|
||||
"PT025",
|
||||
"PT026",
|
||||
"PYI",
|
||||
"RUF008", # mutable dataclass default
|
||||
"RUF015", # access first ele in constant time
|
||||
"RUF016", # type error non-integer index
|
||||
"RUF017",
|
||||
"TRY200",
|
||||
"TRY302",
|
||||
"UP",
|
||||
]
|
||||
|
||||
[tool.ruff.per-file-ignores]
|
||||
"__init__.py" = [
|
||||
"F401",
|
||||
]
|
||||
"test/typing/reveal/**" = [
|
||||
"F821",
|
||||
]
|
||||
"test/torch_np/numpy_tests/**" = [
|
||||
"F821",
|
||||
]
|
||||
"test/jit/**" = [
|
||||
"PLR0133", # tests require this for JIT
|
||||
"PYI",
|
||||
"RUF015",
|
||||
"UP", # We don't want to modify the jit test as they test specify syntax
|
||||
]
|
||||
"test/test_jit.py" = [
|
||||
"PLR0133", # tests require this for JIT
|
||||
"PYI",
|
||||
"RUF015",
|
||||
"UP", # We don't want to modify the jit test as they test specify syntax
|
||||
]
|
||||
|
||||
"torch/onnx/**" = [
|
||||
"UP037", # ONNX does runtime type checking
|
||||
]
|
||||
|
||||
"torchgen/api/types/__init__.py" = [
|
||||
"F401",
|
||||
"F403",
|
||||
]
|
||||
"torchgen/executorch/api/types/__init__.py" = [
|
||||
"F401",
|
||||
"F403",
|
||||
]
|
||||
"torch/utils/collect_env.py" = [
|
||||
"UP", # collect_env.py needs to work with older versions of Python
|
||||
]
|
||||
|
|
@ -4,7 +4,7 @@
|
|||
%global forgeurl https://github.com/pytorch/pytorch
|
||||
|
||||
# So pre releases can be tried
|
||||
%bcond_with gitcommit
|
||||
%bcond_without gitcommit
|
||||
%if %{with gitcommit}
|
||||
# The top of tree ~2/18/24
|
||||
%global commit0 372d078f361e726bb4ac0884ac334b04c58179ef
|
||||
|
|
@ -28,7 +28,7 @@
|
|||
%bcond_with rocm
|
||||
|
||||
# For testing openmp
|
||||
%bcond_with openmp
|
||||
%bcond_without openmp
|
||||
|
||||
# For testing caffe2
|
||||
%bcond_with caffe2
|
||||
|
|
@ -62,6 +62,11 @@ Source0: %{forgeurl}/releases/download/v%{version}/pytorch-v%{version}.ta
|
|||
Source1: https://github.com/google/flatbuffers/archive/refs/tags/v23.3.3.tar.gz
|
||||
Source2: https://github.com/pybind/pybind11/archive/refs/tags/v2.11.1.tar.gz
|
||||
|
||||
%if %{with cuda}
|
||||
Source10: https://github.com/NVIDIA/cudnn-frontend/archive/refs/tags/v1.0.3.tar.gz
|
||||
Source11: https://github.com/NVIDIA/cutlass/archive/refs/tags/v3.3.0.tar.gz
|
||||
%endif
|
||||
|
||||
%if %{with gitcommit}
|
||||
|
||||
Patch0: 0001-no-third_party-foxi.patch
|
||||
|
|
@ -144,7 +149,11 @@ BuildRequires: python3-pyyaml
|
|||
BuildRequires: python3-typing-extensions
|
||||
BuildRequires: sleef-devel
|
||||
BuildRequires: valgrind-devel
|
||||
%if %{with gitcommit}
|
||||
BuildRequires: xnnpack-devel = 0.0^git20231127.d9cce34
|
||||
%else
|
||||
BuildRequires: xnnpack-devel = 0.0^git20221221.51a9875
|
||||
%endif
|
||||
|
||||
BuildRequires: python3-devel
|
||||
BuildRequires: python3dist(filelock)
|
||||
|
|
@ -264,6 +273,13 @@ cp -r flatbuffers-23.3.3/* third_party/flatbuffers/
|
|||
tar xf %{SOURCE2}
|
||||
cp -r pybind11-2.11.1/* third_party/pybind11/
|
||||
|
||||
%if %{with cuda}
|
||||
tar xf %{SOURCE10}
|
||||
cp -r cudnn-frontend-1.0.3/* third_party/cudnn_frontend/
|
||||
tar xf %{SOURCE11}
|
||||
cp -r cutlass-3.3.0/* third_party/cutlass/
|
||||
%endif
|
||||
|
||||
%if %{with opencv}
|
||||
# Reduce requirements, *FOUND is not set
|
||||
sed -i -e 's/USE_OPENCV AND OpenCV_FOUND AND USE_FFMPEG AND FFMPEG_FOUND/USE_OPENCV AND USE_FFMPEG/' caffe2/video/CMakeLists.txt
|
||||
|
|
@ -272,6 +288,14 @@ sed -i -e 's/STATUS/FATAL/' caffe2/image/CMakeLists.txt
|
|||
cat caffe2/image/CMakeLists.txt
|
||||
%endif
|
||||
|
||||
%if 0%{?rhel}
|
||||
# In RHEL but too old
|
||||
sed -i -e '/typing-extensions/d' setup.py
|
||||
# Need to pip these
|
||||
sed -i -e '/sympy/d' setup.py
|
||||
sed -i -e '/fsspec/d' setup.py
|
||||
%endif
|
||||
|
||||
# Release comes fully loaded with third party src
|
||||
# Remove what we can
|
||||
#
|
||||
|
|
@ -291,8 +315,8 @@ mv third_party/flatbuffers .
|
|||
mv third_party/pybind11 .
|
||||
|
||||
%if %{with cuda}
|
||||
mv third_party/nvfuser .
|
||||
mv third_party/cudnn_frontend .
|
||||
mv third_party/cutlass .
|
||||
%endif
|
||||
|
||||
%if %{with test}
|
||||
|
|
@ -308,8 +332,8 @@ mv flatbuffers third_party
|
|||
mv pybind11 third_party
|
||||
|
||||
%if %{with cuda}
|
||||
mv nvfuser third_party
|
||||
mv cudnn_frontend third_party
|
||||
mv cutlass third_party
|
||||
%endif
|
||||
|
||||
%if %{with test}
|
||||
|
|
@ -336,6 +360,33 @@ sed -i -e 's@string(APPEND CMAKE_CUDA_FLAGS " -D_GLIBCXX_USE_CXX11_ABI=${GLIBCXX
|
|||
|
||||
%build
|
||||
|
||||
#
|
||||
# Control the number of jobs
|
||||
#
|
||||
# The build can fail if too many threads exceed the physical memory
|
||||
# So count core and and memory and increase the build memory util the build succeeds
|
||||
#
|
||||
# Real cores, No hyperthreading
|
||||
COMPILE_JOBS=`cat /proc/cpuinfo | grep -m 1 'cpu cores' | awk '{ print $4 }'`
|
||||
if [ ${COMPILE_JOBS}x = x ]; then
|
||||
COMPILE_JOBS=1
|
||||
fi
|
||||
# Take into account memmory usage per core, do not thrash real memory
|
||||
%if %{with cuda}
|
||||
BUILD_MEM=4
|
||||
%else
|
||||
BUILD_MEM=2
|
||||
%endif
|
||||
MEM_KB=0
|
||||
MEM_KB=`cat /proc/meminfo | grep MemTotal | awk '{ print $2 }'`
|
||||
MEM_MB=`eval "expr ${MEM_KB} / 1024"`
|
||||
MEM_GB=`eval "expr ${MEM_MB} / 1024"`
|
||||
COMPILE_JOBS_MEM=`eval "expr 1 + ${MEM_GB} / ${BUILD_MEM}"`
|
||||
if [ "$COMPILE_JOBS_MEM" -lt "$COMPILE_JOBS" ]; then
|
||||
COMPILE_JOBS=$COMPILE_JOBS_MEM
|
||||
fi
|
||||
export MAX_JOBS=$COMPILE_JOBS
|
||||
|
||||
# For debugging setup.py
|
||||
# export SETUPTOOLS_SCM_DEBUG=1
|
||||
|
||||
|
|
@ -408,6 +459,7 @@ export USE_ROCM=OFF
|
|||
|
||||
%if %{with cuda}
|
||||
export CUDACXX=/usr/local/cuda/bin/nvcc
|
||||
export CPLUS_INCLUDE_PATH=/usr/local/cuda/include
|
||||
export USE_CUDA=ON
|
||||
export USE_NCCL=OFF
|
||||
%else
|
||||
|
|
@ -503,6 +555,12 @@ sed -i -f br.sed devel.files
|
|||
%{python3_sitearch}/torch/lib/libcaffe2_nvrtc.so
|
||||
%{python3_sitearch}/torch/lib/libtorch_hip.so
|
||||
%endif
|
||||
%if %{with cuda}
|
||||
%{python3_sitearch}/torch/lib/libc10_cuda.so
|
||||
%{python3_sitearch}/torch/lib/libcaffe2_nvrtc.so
|
||||
%{python3_sitearch}/torch/lib/libtorch_cuda.so
|
||||
%{python3_sitearch}/torch/lib/libtorch_cuda_linalg.so
|
||||
%endif
|
||||
|
||||
# misc
|
||||
%{python3_sitearch}/torch/utils/model_dump/{*.js,*.mjs,*.html}
|
||||
|
|
@ -510,9 +568,11 @@ sed -i -f br.sed devel.files
|
|||
%{python3_sitearch}/torchgen/packaged/autograd/{*.md,*.yaml}
|
||||
%if %{with gitcommit}
|
||||
%{python3_sitearch}/torch/_export/serde/schema.yaml
|
||||
%if 0%{?fedora}
|
||||
%{python3_sitearch}/torch/distributed/pipeline/sync/_balance/py.typed
|
||||
%{python3_sitearch}/torch/distributed/pipeline/sync/py.typed
|
||||
%endif
|
||||
%endif
|
||||
|
||||
# egg
|
||||
%{python3_sitearch}/torch*.egg-info/*
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue