Linux v5.6.14

This commit is contained in:
Justin M. Forbes 2020-05-20 12:17:15 -05:00
commit 4e5efc03c3
5 changed files with 866 additions and 422 deletions

View file

@ -1,320 +0,0 @@
From patchwork Wed Mar 25 20:16:03 2020
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Thierry Reding <thierry.reding@gmail.com>
X-Patchwork-Id: 1261638
Return-Path: <linux-tegra-owner@vger.kernel.org>
X-Original-To: incoming@patchwork.ozlabs.org
Delivered-To: patchwork-incoming@bilbo.ozlabs.org
Authentication-Results: ozlabs.org; spf=none (no SPF record)
smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67;
helo=vger.kernel.org;
envelope-from=linux-tegra-owner@vger.kernel.org;
receiver=<UNKNOWN>)
Authentication-Results: ozlabs.org;
dmarc=pass (p=none dis=none) header.from=gmail.com
Authentication-Results: ozlabs.org; dkim=pass (2048-bit key;
unprotected) header.d=gmail.com header.i=@gmail.com
header.a=rsa-sha256 header.s=20161025 header.b=sj7XVrax;
dkim-atps=neutral
Received: from vger.kernel.org (vger.kernel.org [209.132.180.67])
by ozlabs.org (Postfix) with ESMTP id 48nfWs1X7mz9sRf
for <incoming@patchwork.ozlabs.org>;
Thu, 26 Mar 2020 07:16:09 +1100 (AEDT)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
id S1727374AbgCYUQI (ORCPT <rfc822;incoming@patchwork.ozlabs.org>);
Wed, 25 Mar 2020 16:16:08 -0400
Received: from mail-wm1-f68.google.com ([209.85.128.68]:50585 "EHLO
mail-wm1-f68.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
with ESMTP id S1727328AbgCYUQI (ORCPT
<rfc822;linux-tegra@vger.kernel.org>);
Wed, 25 Mar 2020 16:16:08 -0400
Received: by mail-wm1-f68.google.com with SMTP id d198so4073496wmd.0
for <linux-tegra@vger.kernel.org>;
Wed, 25 Mar 2020 13:16:07 -0700 (PDT)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20161025;
h=from:to:cc:subject:date:message-id:mime-version
:content-transfer-encoding;
bh=yPLnagV0XBnTWT+nGjtRaD+LnSq2BqmeAJnp8U+CWPw=;
b=sj7XVraxdwiyRAeepEQ0wy1nLUUH6vcloNotxoFwaAZmvU2GILePtp+OM8VZxzmSg1
qVjos+BzgdtxI0QGYvlsRwZJmw1PdwfTDzM8kMKmP2AfXDgnFG7LZsGZnzTmdPqErqG6
RfQwpZiPunHplEvI/epnPHACQlV9HoX+teAIWP9gyJkMYwBCVOirkfv4yGqGZWyEciZ2
yM5mGeUZ/OprHtVVEEuF5yb50CJm8cBEHBMr2ooS+0jm+avVEG8DKe9QM2nWgJB7+TXH
7+iryK1A4PDr9L6syw0p6sAbkFd2+P/p44d/rqsKPWTQG0lkd0cgRHx9fVPls/P4Snyr
JwCA==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=1e100.net; s=20161025;
h=x-gm-message-state:from:to:cc:subject:date:message-id:mime-version
:content-transfer-encoding;
bh=yPLnagV0XBnTWT+nGjtRaD+LnSq2BqmeAJnp8U+CWPw=;
b=HWu2t1YnW/GoMLlkfp6ZQha8CvUnfMi/OK1zsN3hDtTtMLwVQL9YBFPvXYfAASIGzA
qXmgdbIdQmwOXRxlDmgcXk8KcOJmvnJTSoE+GPeLrKGVq9h2c6XLINshs7RDWqY7//GM
/NMVkESX/sVh5qVQYVzsQOBWAsLkwpVAmt3lJ81XrCGdA/L5aN2FWOftTWJWoStgtHuB
9N27ffBkV8/72gDCcGxM/lJlfxMBcfPIEMDGWlErsl2U/EPtF+e5AH1kF9/a+lImxa1h
vBlXvgfPKazfOLm1jA809U0QJrCy5bmTOJsaLqnkLPNJRyvlY6JZqk8a1Wc4u6l44uoI
4l3g==
X-Gm-Message-State: ANhLgQ0GzmzHn/uC4G4GzXRW/D8i6fcQ7Y04Wxx+yBOvoeixp0lD9PYD
9Q7E3Ezt7uCnfh5D41Ym8jY=
X-Google-Smtp-Source: ADFU+vvV+Qjqcd+wksczhsC9MSisSEM36LfhftNulFkmYxqwCfpDcq22YDEoWHYpgjaXwwZC4lgCyg==
X-Received: by 2002:a7b:c842:: with SMTP id c2mr5416219wml.154.1585167366416;
Wed, 25 Mar 2020 13:16:06 -0700 (PDT)
Received: from localhost
(p200300E41F4A9B0076D02BFFFE273F51.dip0.t-ipconnect.de.
[2003:e4:1f4a:9b00:76d0:2bff:fe27:3f51])
by smtp.gmail.com with ESMTPSA id
i4sm132568wrm.32.2020.03.25.13.16.05
(version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256);
Wed, 25 Mar 2020 13:16:05 -0700 (PDT)
From: Thierry Reding <thierry.reding@gmail.com>
To: Thierry Reding <thierry.reding@gmail.com>
Cc: dri-devel@lists.freedesktop.org, linux-tegra@vger.kernel.org
Subject: [PATCH 1/2] drm/tegra: Fix SMMU support on Tegra124 and Tegra210
Date: Wed, 25 Mar 2020 21:16:03 +0100
Message-Id: <20200325201604.833898-1-thierry.reding@gmail.com>
X-Mailer: git-send-email 2.24.1
MIME-Version: 1.0
Sender: linux-tegra-owner@vger.kernel.org
Precedence: bulk
List-ID: <linux-tegra.vger.kernel.org>
X-Mailing-List: linux-tegra@vger.kernel.org
From: Thierry Reding <treding@nvidia.com>
When testing whether or not to enable the use of the SMMU, consult the
supported DMA mask rather than the actually configured DMA mask, since
the latter might already have been restricted.
Fixes: 2d9384ff9177 ("drm/tegra: Relax IOMMU usage criteria on old Tegra")
Signed-off-by: Thierry Reding <treding@nvidia.com>
Tested-by: Jon Hunter <jonathanh@nvidia.com>
---
drivers/gpu/drm/tegra/drm.c | 3 ++-
drivers/gpu/host1x/dev.c | 13 +++++++++++++
include/linux/host1x.h | 3 +++
3 files changed, 18 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index bd268028fb3d..583cd6e0ae27 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -1039,6 +1039,7 @@ void tegra_drm_free(struct tegra_drm *tegra, size_t size, void *virt,
static bool host1x_drm_wants_iommu(struct host1x_device *dev)
{
+ struct host1x *host1x = dev_get_drvdata(dev->dev.parent);
struct iommu_domain *domain;
/*
@@ -1076,7 +1077,7 @@ static bool host1x_drm_wants_iommu(struct host1x_device *dev)
* sufficient and whether or not the host1x is attached to an IOMMU
* doesn't matter.
*/
- if (!domain && dma_get_mask(dev->dev.parent) <= DMA_BIT_MASK(32))
+ if (!domain && host1x_get_dma_mask(host1x) <= DMA_BIT_MASK(32))
return true;
return domain != NULL;
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 388bcc2889aa..40a4b9f8b861 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -502,6 +502,19 @@ static void __exit tegra_host1x_exit(void)
}
module_exit(tegra_host1x_exit);
+/**
+ * host1x_get_dma_mask() - query the supported DMA mask for host1x
+ * @host1x: host1x instance
+ *
+ * Note that this returns the supported DMA mask for host1x, which can be
+ * different from the applicable DMA mask under certain circumstances.
+ */
+u64 host1x_get_dma_mask(struct host1x *host1x)
+{
+ return host1x->info->dma_mask;
+}
+EXPORT_SYMBOL(host1x_get_dma_mask);
+
MODULE_AUTHOR("Thierry Reding <thierry.reding@avionic-design.de>");
MODULE_AUTHOR("Terje Bergstrom <tbergstrom@nvidia.com>");
MODULE_DESCRIPTION("Host1x driver for Tegra products");
diff --git a/include/linux/host1x.h b/include/linux/host1x.h
index 62d216ff1097..c230b4e70d75 100644
--- a/include/linux/host1x.h
+++ b/include/linux/host1x.h
@@ -17,9 +17,12 @@ enum host1x_class {
HOST1X_CLASS_GR3D = 0x60,
};
+struct host1x;
struct host1x_client;
struct iommu_group;
+u64 host1x_get_dma_mask(struct host1x *host1x);
+
/**
* struct host1x_client_ops - host1x client operations
* @init: host1x client initialization code
From patchwork Wed Mar 25 20:16:04 2020
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Thierry Reding <thierry.reding@gmail.com>
X-Patchwork-Id: 1261639
Return-Path: <linux-tegra-owner@vger.kernel.org>
X-Original-To: incoming@patchwork.ozlabs.org
Delivered-To: patchwork-incoming@bilbo.ozlabs.org
Authentication-Results: ozlabs.org; spf=none (no SPF record)
smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67;
helo=vger.kernel.org;
envelope-from=linux-tegra-owner@vger.kernel.org;
receiver=<UNKNOWN>)
Authentication-Results: ozlabs.org;
dmarc=pass (p=none dis=none) header.from=gmail.com
Authentication-Results: ozlabs.org; dkim=pass (2048-bit key;
unprotected) header.d=gmail.com header.i=@gmail.com
header.a=rsa-sha256 header.s=20161025 header.b=XXUz449u;
dkim-atps=neutral
Received: from vger.kernel.org (vger.kernel.org [209.132.180.67])
by ozlabs.org (Postfix) with ESMTP id 48nfWw6NvSz9sPk
for <incoming@patchwork.ozlabs.org>;
Thu, 26 Mar 2020 07:16:12 +1100 (AEDT)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
id S1727316AbgCYUQM (ORCPT <rfc822;incoming@patchwork.ozlabs.org>);
Wed, 25 Mar 2020 16:16:12 -0400
Received: from mail-wr1-f65.google.com ([209.85.221.65]:33914 "EHLO
mail-wr1-f65.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
with ESMTP id S1727328AbgCYUQM (ORCPT
<rfc822;linux-tegra@vger.kernel.org>);
Wed, 25 Mar 2020 16:16:12 -0400
Received: by mail-wr1-f65.google.com with SMTP id 65so4990084wrl.1
for <linux-tegra@vger.kernel.org>;
Wed, 25 Mar 2020 13:16:09 -0700 (PDT)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20161025;
h=from:to:cc:subject:date:message-id:in-reply-to:references
:mime-version:content-transfer-encoding;
bh=aW1zxIHiei+l8kDSE2lVXf/aMBDE/GtIkGFrQXvKkrY=;
b=XXUz449uJivXz+1lH6pKa9IvT3vUx61/skXaEyQxpkslFR268FwckKE0ryQDUx701N
hFN9ocSGCuE6bKpdgya8YmthXDASOYWZzKV0R5jms1rqgazVMF6jARv+kE4Jaj9Ek4tl
4eTpmnHinx0xIrgGWCQbfltjb+zAE5XOGX8UCX1526r3yQQpu+OQlKZ70Tvq3pdw0zfT
URkTU8sfdTa9DCxUSsUukPcK9vKOk6XHkFleL6FisODDvXphdzzLa1TCv9UTGLrUsHSd
XDrukLto5efrUE03q5jP6ZN4xbnLDbhY6IkB7PAW1qwSPG/Eg0p0ivpJ58+QwwmBH6zF
ByDQ==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=1e100.net; s=20161025;
h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to
:references:mime-version:content-transfer-encoding;
bh=aW1zxIHiei+l8kDSE2lVXf/aMBDE/GtIkGFrQXvKkrY=;
b=DIWKPWCoYx1rnX34DSkRPm2K6lR1SurVvq+IIY5Nrc9uq+E3pmXQcActG0DDAHHK8a
SgnziEvuWTeROgrlwONYq+FUZRQ6s1TRR1+qDXqAlRtdebU/cEep+LRvdzJe/qJBpPqd
SnSTR3Xntgo7EcyLRj9YqSodasylPt3OzrhuDudfTSQtKZghElLfyJV/tzgwG+OC3TD4
RJAykZ0tgWHy7Bc1UB+z6LovuT/sgcPUSLfNqDehQWqwQeqHqXgFAomUN0CCEr2YdjkT
sCpBZPqKtb22FdDWlDiNnEkEmMPA+K4MIWbZL9VuvArjFaaBn6fBxvnX4tAKEcOiKeUy
EZXw==
X-Gm-Message-State: ANhLgQ1Vj1gSFYKgV/7jV1T3UIwTE5jasGmLOhuuGuWvjBs2xXUgieyz
VhNVgYIYU/8R/0Vx9Hv44rw=
X-Google-Smtp-Source: ADFU+vtTfrVHW69I+ZhOz8qw8xUje/j42rKoNxAP2wTt+E5WQ5s6QhBcgeHzC4Bw5Q5NdWxjLUtZ/g==
X-Received: by 2002:adf:800e:: with SMTP id 14mr5104354wrk.369.1585167368929;
Wed, 25 Mar 2020 13:16:08 -0700 (PDT)
Received: from localhost
(p200300E41F4A9B0076D02BFFFE273F51.dip0.t-ipconnect.de.
[2003:e4:1f4a:9b00:76d0:2bff:fe27:3f51])
by smtp.gmail.com with ESMTPSA id
e9sm151985wrw.30.2020.03.25.13.16.07
(version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256);
Wed, 25 Mar 2020 13:16:07 -0700 (PDT)
From: Thierry Reding <thierry.reding@gmail.com>
To: Thierry Reding <thierry.reding@gmail.com>
Cc: dri-devel@lists.freedesktop.org, linux-tegra@vger.kernel.org
Subject: [PATCH 2/2] gpu: host1x: Use SMMU on Tegra124 and Tegra210
Date: Wed, 25 Mar 2020 21:16:04 +0100
Message-Id: <20200325201604.833898-2-thierry.reding@gmail.com>
X-Mailer: git-send-email 2.24.1
In-Reply-To: <20200325201604.833898-1-thierry.reding@gmail.com>
References: <20200325201604.833898-1-thierry.reding@gmail.com>
MIME-Version: 1.0
Sender: linux-tegra-owner@vger.kernel.org
Precedence: bulk
List-ID: <linux-tegra.vger.kernel.org>
X-Mailing-List: linux-tegra@vger.kernel.org
From: Thierry Reding <treding@nvidia.com>
Tegra124 and Tegra210 support addressing more than 32 bits of physical
memory. However, since their host1x does not support the wide GATHER
opcode, they should use the SMMU if at all possible to ensure that all
the system memory can be used for command buffers, irrespective of
whether or not the host1x firewall is enabled.
Signed-off-by: Thierry Reding <treding@nvidia.com>
Tested-by: Jon Hunter <jonathanh@nvidia.com>
---
drivers/gpu/host1x/dev.c | 46 ++++++++++++++++++++++++++++++++++++----
1 file changed, 42 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 40a4b9f8b861..d24344e91922 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -192,17 +192,55 @@ static void host1x_setup_sid_table(struct host1x *host)
}
}
+static bool host1x_wants_iommu(struct host1x *host1x)
+{
+ /*
+ * If we support addressing a maximum of 32 bits of physical memory
+ * and if the host1x firewall is enabled, there's no need to enable
+ * IOMMU support. This can happen for example on Tegra20, Tegra30
+ * and Tegra114.
+ *
+ * Tegra124 and later can address up to 34 bits of physical memory and
+ * many platforms come equipped with more than 2 GiB of system memory,
+ * which requires crossing the 4 GiB boundary. But there's a catch: on
+ * SoCs before Tegra186 (i.e. Tegra124 and Tegra210), the host1x can
+ * only address up to 32 bits of memory in GATHER opcodes, which means
+ * that command buffers need to either be in the first 2 GiB of system
+ * memory (which could quickly lead to memory exhaustion), or command
+ * buffers need to be treated differently from other buffers (which is
+ * not possible with the current ABI).
+ *
+ * A third option is to use the IOMMU in these cases to make sure all
+ * buffers will be mapped into a 32-bit IOVA space that host1x can
+ * address. This allows all of the system memory to be used and works
+ * within the limitations of the host1x on these SoCs.
+ *
+ * In summary, default to enable IOMMU on Tegra124 and later. For any
+ * of the earlier SoCs, only use the IOMMU for additional safety when
+ * the host1x firewall is disabled.
+ */
+ if (host1x->info->dma_mask <= DMA_BIT_MASK(32)) {
+ if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL))
+ return false;
+ }
+
+ return true;
+}
+
static struct iommu_domain *host1x_iommu_attach(struct host1x *host)
{
struct iommu_domain *domain = iommu_get_domain_for_dev(host->dev);
int err;
/*
- * If the host1x firewall is enabled, there's no need to enable IOMMU
- * support. Similarly, if host1x is already attached to an IOMMU (via
- * the DMA API), don't try to attach again.
+ * We may not always want to enable IOMMU support (for example if the
+ * host1x firewall is already enabled and we don't support addressing
+ * more than 32 bits of physical memory), so check for that first.
+ *
+ * Similarly, if host1x is already attached to an IOMMU (via the DMA
+ * API), don't try to attach again.
*/
- if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) || domain)
+ if (!host1x_wants_iommu(host) || domain)
return domain;
host->group = iommu_group_get(host->dev);

View file

@ -89,7 +89,7 @@ Summary: The Linux kernel
%if 0%{?released_kernel}
# Do we have a -stable update to apply?
%define stable_update 13
%define stable_update 14
# Set rpm version accordingly
%if 0%{?stable_update}
%define stablerev %{stable_update}
@ -837,8 +837,6 @@ Patch321: arm64-serial-8250_tegra-Create-Tegra-specific-8250-driver.patch
Patch324: regulator-pwm-Don-t-warn-on-probe-deferral.patch
# http://patchwork.ozlabs.org/patch/1243112/
Patch325: backlight-lp855x-Ensure-regulators-are-disabled-on-probe-failure.patch
# https://patchwork.ozlabs.org/patch/1261638/
Patch326: arm64-drm-tegra-Fix-SMMU-support-on-Tegra124-and-Tegra210.patch
# http://patchwork.ozlabs.org/patch/1221384/
Patch327: PCI-Add-MCFG-quirks-for-Tegra194-host-controllers.patch
@ -898,9 +896,6 @@ Patch511: e1000e-bump-up-timeout-to-wait-when-ME-un-configure-ULP-mode.patch
Patch512: drm-dp_mst-Fix-drm_dp_send_dpcd_write-return-code.patch
# CVE-2020-10711 rhbz 1825116 1834778
Patch513: net-netlabel-cope-with-NULL-catmap.patch
#rhbz 1779611
Patch514: tpm-check-event-log-version-before-reading-final-eve.patch
@ -913,6 +908,9 @@ Patch516: 0001-pwm-lpss-Fix-get_state-runtime-pm-reference-handling.patch
# kernel.org bz 206217
Patch517: RFC-PCI-tegra-Revert-raw_violation_fixup-for-tegra124.patch
# CVE-2020-12888 rhbz 1836245 1836244
Patch518: vfio-pci-block-user-access-to-disabled-device-MMIO.patch
# END OF PATCH DEFINITIONS
%endif
@ -2943,6 +2941,10 @@ fi
#
#
%changelog
* Wed May 20 2020 Justin M. Forbes <jforbes@fedoraproject.org> - 5.6.14-200
- Linux v5.6.14
- Fix CVE-2020-12888 (rhbz 1836245 1836244)
* Mon May 18 2020 Justin M. Forbes <jforbes@fedoraproject.org>
- Fix stability issue with the jetson-tk1 NIC

View file

@ -1,95 +0,0 @@
From MAILER-DAEMON Tue May 12 19:31:23 2020
From: Paolo Abeni <pabeni@redhat.com>
To: netdev@vger.kernel.org
Cc: "David S. Miller" <davem@davemloft.net>, Jakub Kicinski <kuba@kernel.org>, linux-security-module@vger.kernel.org, Paul Moore <paul@paul-moore.com>, ppandit@redhat.com, Matthew Sheets <matthew.sheets@gd-ms.com>
Subject: [PATCH net] netlabel: cope with NULL catmap
Date: Tue, 12 May 2020 14:43:14 +0200
Message-Id: <07d99ae197bfdb2964931201db67b6cd0b38db5b.1589276729.git.pabeni@redhat.com>
Sender: owner-linux-security-module@vger.kernel.org
List-ID: <linux-security-module.vger.kernel.org>
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: 7bit
The cipso and calipso code can set the MLS_CAT attribute on
successful parsing, even if the corresponding catmap has
not been allocated, as per current configuration and external
input.
Later, selinux code tries to access the catmap if the MLS_CAT flag
is present via netlbl_catmap_getlong(). That may cause null ptr
dereference while processing incoming network traffic.
Address the issue setting the MLS_CAT flag only if the catmap is
really allocated. Additionally let netlbl_catmap_getlong() cope
with NULL catmap.
Fixes: ceba1832b1b2 ("calipso: Set the calipso socket label to match the secattr.")
Fixes: 4b8feff251da ("netlabel: fix the horribly broken catmap functions")
Reported-by: Matthew Sheets <matthew.sheets@gd-ms.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
net/ipv4/cipso_ipv4.c | 6 ++++--
net/ipv6/calipso.c | 3 ++-
net/netlabel/netlabel_kapi.c | 6 ++++++
3 files changed, 12 insertions(+), 3 deletions(-)
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 0bd10a1f477f..a23094b050f8 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1258,7 +1258,8 @@ static int cipso_v4_parsetag_rbm(const struct cipso_v4_doi *doi_def,
return ret_val;
}
- secattr->flags |= NETLBL_SECATTR_MLS_CAT;
+ if (secattr->attr.mls.cat)
+ secattr->flags |= NETLBL_SECATTR_MLS_CAT;
}
return 0;
@@ -1439,7 +1440,8 @@ static int cipso_v4_parsetag_rng(const struct cipso_v4_doi *doi_def,
return ret_val;
}
- secattr->flags |= NETLBL_SECATTR_MLS_CAT;
+ if (secattr->attr.mls.cat)
+ secattr->flags |= NETLBL_SECATTR_MLS_CAT;
}
return 0;
diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c
index 221c81f85cbf..8d3f66c310db 100644
--- a/net/ipv6/calipso.c
+++ b/net/ipv6/calipso.c
@@ -1047,7 +1047,8 @@ static int calipso_opt_getattr(const unsigned char *calipso,
goto getattr_return;
}
- secattr->flags |= NETLBL_SECATTR_MLS_CAT;
+ if (secattr->attr.mls.cat)
+ secattr->flags |= NETLBL_SECATTR_MLS_CAT;
}
secattr->type = NETLBL_NLTYPE_CALIPSO;
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 409a3ae47ce2..5e1239cef000 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -734,6 +734,12 @@ int netlbl_catmap_getlong(struct netlbl_lsm_catmap *catmap,
if ((off & (BITS_PER_LONG - 1)) != 0)
return -EINVAL;
+ /* a null catmap is equivalent to an empty one */
+ if (!catmap) {
+ *offset = (u32)-1;
+ return 0;
+ }
+
if (off < catmap->startbit) {
off = catmap->startbit;
*offset = off;
--
2.21.3

View file

@ -1,2 +1,2 @@
SHA512 (linux-5.6.tar.xz) = 80846fe2b4e4a7ff471d2dde28a8216ae807a3209f959e93d39ea4fc9a189ea28ec3db9d303b3fe15a28c2cb90e7446876678e93e23353c2d6f262e364a06bc9
SHA512 (patch-5.6.13.xz) = 10eabe59db21b0d82932b8122d3f07f12aec435900350a6d7f3e281676a1036860e24284252425c5b08fea02215166e3f65c49e5b4af8dbb7e03bcfbc6a86148
SHA512 (patch-5.6.14.xz) = 8019bf12993f43d5508845a48b250d30220a8db6db262c8b97cd3f7c73abf0d054fcdfacb671644de529ceb495b95982d693a889051fb60fafc3180bd5b4f29e

View file

@ -0,0 +1,857 @@
From MAILER-DAEMON Wed May 20 15:47:40 2020
Subject: [PATCH v2 1/3] vfio/type1: Support faulting PFNMAP vmas
From: Alex Williamson <alex.williamson@redhat.com>
To: kvm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org, cohuck@redhat.com, jgg@ziepe.ca
Date: Tue, 05 May 2020 15:54:44 -0600
Message-ID: <158871568480.15589.17339878308143043906.stgit@gimli.home>
In-Reply-To: <158871401328.15589.17598154478222071285.stgit@gimli.home>
References: <158871401328.15589.17598154478222071285.stgit@gimli.home>
Sender: kvm-owner@vger.kernel.org
List-ID: <kvm.vger.kernel.org>
X-Mailing-List: kvm@vger.kernel.org
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: 7bit
With conversion to follow_pfn(), DMA mapping a PFNMAP range depends on
the range being faulted into the vma. Add support to manually provide
that, in the same way as done on KVM with hva_to_pfn_remapped().
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
---
drivers/vfio/vfio_iommu_type1.c | 36 +++++++++++++++++++++++++++++++++---
1 file changed, 33 insertions(+), 3 deletions(-)
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index cc1d64765ce7..4a4cb7cd86b2 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -317,6 +317,32 @@ static int put_pfn(unsigned long pfn, int prot)
return 0;
}
+static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm,
+ unsigned long vaddr, unsigned long *pfn,
+ bool write_fault)
+{
+ int ret;
+
+ ret = follow_pfn(vma, vaddr, pfn);
+ if (ret) {
+ bool unlocked = false;
+
+ ret = fixup_user_fault(NULL, mm, vaddr,
+ FAULT_FLAG_REMOTE |
+ (write_fault ? FAULT_FLAG_WRITE : 0),
+ &unlocked);
+ if (unlocked)
+ return -EAGAIN;
+
+ if (ret)
+ return ret;
+
+ ret = follow_pfn(vma, vaddr, pfn);
+ }
+
+ return ret;
+}
+
static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr,
int prot, unsigned long *pfn)
{
@@ -339,12 +365,16 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr,
vaddr = untagged_addr(vaddr);
+retry:
vma = find_vma_intersection(mm, vaddr, vaddr + 1);
if (vma && vma->vm_flags & VM_PFNMAP) {
- if (!follow_pfn(vma, vaddr, pfn) &&
- is_invalid_reserved_pfn(*pfn))
- ret = 0;
+ ret = follow_fault_pfn(vma, mm, vaddr, pfn, prot & IOMMU_WRITE);
+ if (ret == -EAGAIN)
+ goto retry;
+
+ if (!ret && !is_invalid_reserved_pfn(*pfn))
+ ret = -EFAULT;
}
done:
up_read(&mm->mmap_sem);
From MAILER-DAEMON Wed May 20 15:47:40 2020
Subject: [PATCH v2 2/3] vfio-pci: Fault mmaps to enable vma tracking
From: Alex Williamson <alex.williamson@redhat.com>
To: kvm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org, cohuck@redhat.com, jgg@ziepe.ca
Date: Tue, 05 May 2020 15:54:53 -0600
Message-ID: <158871569380.15589.16950418949340311053.stgit@gimli.home>
In-Reply-To: <158871401328.15589.17598154478222071285.stgit@gimli.home>
References: <158871401328.15589.17598154478222071285.stgit@gimli.home>
Sender: kvm-owner@vger.kernel.org
List-ID: <kvm.vger.kernel.org>
X-Mailing-List: kvm@vger.kernel.org
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: 7bit
Rather than calling remap_pfn_range() when a region is mmap'd, setup
a vm_ops handler to support dynamic faulting of the range on access.
This allows us to manage a list of vmas actively mapping the area that
we can later use to invalidate those mappings. The open callback
invalidates the vma range so that all tracking is inserted in the
fault handler and removed in the close handler.
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
---
drivers/vfio/pci/vfio_pci.c | 76 ++++++++++++++++++++++++++++++++++-
drivers/vfio/pci/vfio_pci_private.h | 7 +++
2 files changed, 81 insertions(+), 2 deletions(-)
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 6c6b37b5c04e..66a545a01f8f 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -1299,6 +1299,70 @@ static ssize_t vfio_pci_write(void *device_data, const char __user *buf,
return vfio_pci_rw(device_data, (char __user *)buf, count, ppos, true);
}
+static int vfio_pci_add_vma(struct vfio_pci_device *vdev,
+ struct vm_area_struct *vma)
+{
+ struct vfio_pci_mmap_vma *mmap_vma;
+
+ mmap_vma = kmalloc(sizeof(*mmap_vma), GFP_KERNEL);
+ if (!mmap_vma)
+ return -ENOMEM;
+
+ mmap_vma->vma = vma;
+
+ mutex_lock(&vdev->vma_lock);
+ list_add(&mmap_vma->vma_next, &vdev->vma_list);
+ mutex_unlock(&vdev->vma_lock);
+
+ return 0;
+}
+
+/*
+ * Zap mmaps on open so that we can fault them in on access and therefore
+ * our vma_list only tracks mappings accessed since last zap.
+ */
+static void vfio_pci_mmap_open(struct vm_area_struct *vma)
+{
+ zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
+}
+
+static void vfio_pci_mmap_close(struct vm_area_struct *vma)
+{
+ struct vfio_pci_device *vdev = vma->vm_private_data;
+ struct vfio_pci_mmap_vma *mmap_vma;
+
+ mutex_lock(&vdev->vma_lock);
+ list_for_each_entry(mmap_vma, &vdev->vma_list, vma_next) {
+ if (mmap_vma->vma == vma) {
+ list_del(&mmap_vma->vma_next);
+ kfree(mmap_vma);
+ break;
+ }
+ }
+ mutex_unlock(&vdev->vma_lock);
+}
+
+static vm_fault_t vfio_pci_mmap_fault(struct vm_fault *vmf)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ struct vfio_pci_device *vdev = vma->vm_private_data;
+
+ if (vfio_pci_add_vma(vdev, vma))
+ return VM_FAULT_OOM;
+
+ if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+ vma->vm_end - vma->vm_start, vma->vm_page_prot))
+ return VM_FAULT_SIGBUS;
+
+ return VM_FAULT_NOPAGE;
+}
+
+static const struct vm_operations_struct vfio_pci_mmap_ops = {
+ .open = vfio_pci_mmap_open,
+ .close = vfio_pci_mmap_close,
+ .fault = vfio_pci_mmap_fault,
+};
+
static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma)
{
struct vfio_pci_device *vdev = device_data;
@@ -1357,8 +1421,14 @@ static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma)
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
vma->vm_pgoff = (pci_resource_start(pdev, index) >> PAGE_SHIFT) + pgoff;
- return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
- req_len, vma->vm_page_prot);
+ /*
+ * See remap_pfn_range(), called from vfio_pci_fault() but we can't
+ * change vm_flags within the fault handler. Set them now.
+ */
+ vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
+ vma->vm_ops = &vfio_pci_mmap_ops;
+
+ return 0;
}
static void vfio_pci_request(void *device_data, unsigned int count)
@@ -1608,6 +1678,8 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
spin_lock_init(&vdev->irqlock);
mutex_init(&vdev->ioeventfds_lock);
INIT_LIST_HEAD(&vdev->ioeventfds_list);
+ mutex_init(&vdev->vma_lock);
+ INIT_LIST_HEAD(&vdev->vma_list);
ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev);
if (ret) {
diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h
index 36ec69081ecd..9b25f9f6ce1d 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -92,6 +92,11 @@ struct vfio_pci_vf_token {
struct mutex lock;
};
+struct vfio_pci_mmap_vma {
+ struct vm_area_struct *vma;
+ struct list_head vma_next;
+};
+
struct vfio_pci_device {
struct pci_dev *pdev;
void __iomem *barmap[PCI_STD_NUM_BARS];
@@ -132,6 +137,8 @@ struct vfio_pci_device {
struct list_head dummy_resources_list;
struct mutex ioeventfds_lock;
struct list_head ioeventfds_list;
+ struct mutex vma_lock;
+ struct list_head vma_list;
};
#define is_intx(vdev) (vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX)
From MAILER-DAEMON Wed May 20 15:47:40 2020
Subject: [PATCH v2 3/3] vfio-pci: Invalidate mmaps and block MMIO access on disabled memory
From: Alex Williamson <alex.williamson@redhat.com>
To: kvm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org, cohuck@redhat.com, jgg@ziepe.ca
Date: Tue, 05 May 2020 15:55:02 -0600
Message-ID: <158871570274.15589.10563806532874116326.stgit@gimli.home>
In-Reply-To: <158871401328.15589.17598154478222071285.stgit@gimli.home>
References: <158871401328.15589.17598154478222071285.stgit@gimli.home>
Sender: kvm-owner@vger.kernel.org
List-ID: <kvm.vger.kernel.org>
X-Mailing-List: kvm@vger.kernel.org
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: 7bit
Accessing the disabled memory space of a PCI device would typically
result in a master abort response on conventional PCI, or an
unsupported request on PCI express. The user would generally see
these as a -1 response for the read return data and the write would be
silently discarded, possibly with an uncorrected, non-fatal AER error
triggered on the host. Some systems however take it upon themselves
to bring down the entire system when they see something that might
indicate a loss of data, such as this discarded write to a disabled
memory space.
To avoid this, we want to try to block the user from accessing memory
spaces while they're disabled. We start with a semaphore around the
memory enable bit, where writers modify the memory enable state and
must be serialized, while readers make use of the memory region and
can access in parallel. Writers include both direct manipulation via
the command register, as well as any reset path where the internal
mechanics of the reset may both explicitly and implicitly disable
memory access, and manipulation of the MSI-X configuration, where the
MSI-X vector table resides in MMIO space of the device. Readers
include the read and write file ops to access the vfio device fd
offsets as well as memory mapped access. In the latter case, we make
use of our new vma list support to zap, or invalidate, those memory
mappings in order to force them to be faulted back in on access.
Our semaphore usage will stall user access to MMIO spaces across
internal operations like reset, but the user might experience new
behavior when trying to access the MMIO space while disabled via the
PCI command register. Access via read or write while disabled will
return -EIO and access via memory maps will result in a SIGBUS. This
is expected to be compatible with known use cases and potentially
provides better error handling capabilities than present in the
hardware, while avoiding the more readily accessible and severe
platform error responses that might otherwise occur.
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
drivers/vfio/pci/vfio_pci.c | 263 +++++++++++++++++++++++++++++++----
drivers/vfio/pci/vfio_pci_config.c | 36 ++++-
drivers/vfio/pci/vfio_pci_intrs.c | 18 ++
drivers/vfio/pci/vfio_pci_private.h | 5 +
drivers/vfio/pci/vfio_pci_rdwr.c | 12 ++
5 files changed, 300 insertions(+), 34 deletions(-)
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 66a545a01f8f..49ae9faa6099 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -26,6 +26,7 @@
#include <linux/vfio.h>
#include <linux/vgaarb.h>
#include <linux/nospec.h>
+#include <linux/sched/mm.h>
#include "vfio_pci_private.h"
@@ -184,6 +185,7 @@ static void vfio_pci_probe_mmaps(struct vfio_pci_device *vdev)
static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev);
static void vfio_pci_disable(struct vfio_pci_device *vdev);
+static int vfio_pci_try_zap_and_vma_lock_cb(struct pci_dev *pdev, void *data);
/*
* INTx masking requires the ability to disable INTx signaling via PCI_COMMAND
@@ -736,6 +738,12 @@ int vfio_pci_register_dev_region(struct vfio_pci_device *vdev,
return 0;
}
+struct vfio_devices {
+ struct vfio_device **devices;
+ int cur_index;
+ int max_index;
+};
+
static long vfio_pci_ioctl(void *device_data,
unsigned int cmd, unsigned long arg)
{
@@ -984,8 +992,16 @@ static long vfio_pci_ioctl(void *device_data,
return ret;
} else if (cmd == VFIO_DEVICE_RESET) {
- return vdev->reset_works ?
- pci_try_reset_function(vdev->pdev) : -EINVAL;
+ int ret;
+
+ if (!vdev->reset_works)
+ return -EINVAL;
+
+ vfio_pci_zap_and_down_write_memory_lock(vdev);
+ ret = pci_try_reset_function(vdev->pdev);
+ up_write(&vdev->memory_lock);
+
+ return ret;
} else if (cmd == VFIO_DEVICE_GET_PCI_HOT_RESET_INFO) {
struct vfio_pci_hot_reset_info hdr;
@@ -1065,8 +1081,9 @@ static long vfio_pci_ioctl(void *device_data,
int32_t *group_fds;
struct vfio_pci_group_entry *groups;
struct vfio_pci_group_info info;
+ struct vfio_devices devs = { .cur_index = 0 };
bool slot = false;
- int i, count = 0, ret = 0;
+ int i, group_idx, mem_idx = 0, count = 0, ret = 0;
minsz = offsetofend(struct vfio_pci_hot_reset, count);
@@ -1118,9 +1135,9 @@ static long vfio_pci_ioctl(void *device_data,
* user interface and store the group and iommu ID. This
* ensures the group is held across the reset.
*/
- for (i = 0; i < hdr.count; i++) {
+ for (group_idx = 0; group_idx < hdr.count; group_idx++) {
struct vfio_group *group;
- struct fd f = fdget(group_fds[i]);
+ struct fd f = fdget(group_fds[group_idx]);
if (!f.file) {
ret = -EBADF;
break;
@@ -1133,8 +1150,9 @@ static long vfio_pci_ioctl(void *device_data,
break;
}
- groups[i].group = group;
- groups[i].id = vfio_external_user_iommu_id(group);
+ groups[group_idx].group = group;
+ groups[group_idx].id =
+ vfio_external_user_iommu_id(group);
}
kfree(group_fds);
@@ -1153,13 +1171,63 @@ static long vfio_pci_ioctl(void *device_data,
ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,
vfio_pci_validate_devs,
&info, slot);
- if (!ret)
- /* User has access, do the reset */
- ret = pci_reset_bus(vdev->pdev);
+ if (ret)
+ goto hot_reset_release;
+
+ devs.max_index = count;
+ devs.devices = kcalloc(count, sizeof(struct vfio_device *),
+ GFP_KERNEL);
+ if (!devs.devices) {
+ ret = -ENOMEM;
+ goto hot_reset_release;
+ }
+
+ /*
+ * We need to get memory_lock for each device, but devices
+ * can share mmap_sem, therefore we need to zap and hold
+ * the vma_lock for each device, and only then get each
+ * memory_lock.
+ */
+ ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,
+ vfio_pci_try_zap_and_vma_lock_cb,
+ &devs, slot);
+ if (ret)
+ goto hot_reset_release;
+
+ for (; mem_idx < devs.cur_index; mem_idx++) {
+ struct vfio_pci_device *tmp;
+
+ tmp = vfio_device_data(devs.devices[mem_idx]);
+
+ ret = down_write_trylock(&tmp->memory_lock);
+ if (!ret) {
+ ret = -EBUSY;
+ goto hot_reset_release;
+ }
+ mutex_unlock(&tmp->vma_lock);
+ }
+
+ /* User has access, do the reset */
+ ret = pci_reset_bus(vdev->pdev);
hot_reset_release:
- for (i--; i >= 0; i--)
- vfio_group_put_external_user(groups[i].group);
+ for (i = 0; i < devs.cur_index; i++) {
+ struct vfio_device *device;
+ struct vfio_pci_device *tmp;
+
+ device = devs.devices[i];
+ tmp = vfio_device_data(device);
+
+ if (i < mem_idx)
+ up_write(&tmp->memory_lock);
+ else
+ mutex_unlock(&tmp->vma_lock);
+ vfio_device_put(device);
+ }
+ kfree(devs.devices);
+
+ for (group_idx--; group_idx >= 0; group_idx--)
+ vfio_group_put_external_user(groups[group_idx].group);
kfree(groups);
return ret;
@@ -1299,8 +1367,107 @@ static ssize_t vfio_pci_write(void *device_data, const char __user *buf,
return vfio_pci_rw(device_data, (char __user *)buf, count, ppos, true);
}
-static int vfio_pci_add_vma(struct vfio_pci_device *vdev,
- struct vm_area_struct *vma)
+/* Return 1 on zap and vma_lock acquired, 0 on contention (only with @try) */
+static int vfio_pci_zap_and_vma_lock(struct vfio_pci_device *vdev, bool try)
+{
+ struct vfio_pci_mmap_vma *mmap_vma, *tmp;
+
+ /*
+ * Lock ordering:
+ * vma_lock is nested under mmap_sem for vm_ops callback paths.
+ * The memory_lock semaphore is used by both code paths calling
+ * into this function to zap vmas and the vm_ops.fault callback
+ * to protect the memory enable state of the device.
+ *
+ * When zapping vmas we need to maintain the mmap_sem => vma_lock
+ * ordering, which requires using vma_lock to walk vma_list to
+ * acquire an mm, then dropping vma_lock to get the mmap_sem and
+ * reacquiring vma_lock. This logic is derived from similar
+ * requirements in uverbs_user_mmap_disassociate().
+ *
+ * mmap_sem must always be the top-level lock when it is taken.
+ * Therefore we can only hold the memory_lock write lock when
+ * vma_list is empty, as we'd need to take mmap_sem to clear
+ * entries. vma_list can only be guaranteed empty when holding
+ * vma_lock, thus memory_lock is nested under vma_lock.
+ *
+ * This enables the vm_ops.fault callback to acquire vma_lock,
+ * followed by memory_lock read lock, while already holding
+ * mmap_sem without risk of deadlock.
+ */
+ while (1) {
+ struct mm_struct *mm = NULL;
+
+ if (try) {
+ if (!mutex_trylock(&vdev->vma_lock))
+ return 0;
+ } else {
+ mutex_lock(&vdev->vma_lock);
+ }
+ while (!list_empty(&vdev->vma_list)) {
+ mmap_vma = list_first_entry(&vdev->vma_list,
+ struct vfio_pci_mmap_vma,
+ vma_next);
+ mm = mmap_vma->vma->vm_mm;
+ if (mmget_not_zero(mm))
+ break;
+
+ list_del(&mmap_vma->vma_next);
+ kfree(mmap_vma);
+ mm = NULL;
+ }
+ if (!mm)
+ return 1;
+ mutex_unlock(&vdev->vma_lock);
+
+ if (try) {
+ if (!down_read_trylock(&mm->mmap_sem)) {
+ mmput(mm);
+ return 0;
+ }
+ } else {
+ down_read(&mm->mmap_sem);
+ }
+ if (mmget_still_valid(mm)) {
+ if (try) {
+ if (!mutex_trylock(&vdev->vma_lock)) {
+ up_read(&mm->mmap_sem);
+ mmput(mm);
+ return 0;
+ }
+ } else {
+ mutex_lock(&vdev->vma_lock);
+ }
+ list_for_each_entry_safe(mmap_vma, tmp,
+ &vdev->vma_list, vma_next) {
+ struct vm_area_struct *vma = mmap_vma->vma;
+
+ if (vma->vm_mm != mm)
+ continue;
+
+ list_del(&mmap_vma->vma_next);
+ kfree(mmap_vma);
+
+ zap_vma_ptes(vma, vma->vm_start,
+ vma->vm_end - vma->vm_start);
+ }
+ mutex_unlock(&vdev->vma_lock);
+ }
+ up_read(&mm->mmap_sem);
+ mmput(mm);
+ }
+}
+
+void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_device *vdev)
+{
+ vfio_pci_zap_and_vma_lock(vdev, false);
+ down_write(&vdev->memory_lock);
+ mutex_unlock(&vdev->vma_lock);
+}
+
+/* Caller holds vma_lock */
+static int __vfio_pci_add_vma(struct vfio_pci_device *vdev,
+ struct vm_area_struct *vma)
{
struct vfio_pci_mmap_vma *mmap_vma;
@@ -1309,10 +1476,7 @@ static int vfio_pci_add_vma(struct vfio_pci_device *vdev,
return -ENOMEM;
mmap_vma->vma = vma;
-
- mutex_lock(&vdev->vma_lock);
list_add(&mmap_vma->vma_next, &vdev->vma_list);
- mutex_unlock(&vdev->vma_lock);
return 0;
}
@@ -1346,15 +1510,32 @@ static vm_fault_t vfio_pci_mmap_fault(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
struct vfio_pci_device *vdev = vma->vm_private_data;
+ vm_fault_t ret = VM_FAULT_NOPAGE;
- if (vfio_pci_add_vma(vdev, vma))
- return VM_FAULT_OOM;
+ mutex_lock(&vdev->vma_lock);
+ down_read(&vdev->memory_lock);
+
+ if (!__vfio_pci_memory_enabled(vdev)) {
+ ret = VM_FAULT_SIGBUS;
+ mutex_unlock(&vdev->vma_lock);
+ goto up_out;
+ }
+
+ if (__vfio_pci_add_vma(vdev, vma)) {
+ ret = VM_FAULT_OOM;
+ mutex_unlock(&vdev->vma_lock);
+ goto up_out;
+ }
+
+ mutex_unlock(&vdev->vma_lock);
if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
vma->vm_end - vma->vm_start, vma->vm_page_prot))
- return VM_FAULT_SIGBUS;
+ ret = VM_FAULT_SIGBUS;
- return VM_FAULT_NOPAGE;
+up_out:
+ up_read(&vdev->memory_lock);
+ return ret;
}
static const struct vm_operations_struct vfio_pci_mmap_ops = {
@@ -1680,6 +1861,7 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
INIT_LIST_HEAD(&vdev->ioeventfds_list);
mutex_init(&vdev->vma_lock);
INIT_LIST_HEAD(&vdev->vma_list);
+ init_rwsem(&vdev->memory_lock);
ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev);
if (ret) {
@@ -1933,12 +2115,6 @@ static void vfio_pci_reflck_put(struct vfio_pci_reflck *reflck)
kref_put_mutex(&reflck->kref, vfio_pci_reflck_release, &reflck_lock);
}
-struct vfio_devices {
- struct vfio_device **devices;
- int cur_index;
- int max_index;
-};
-
static int vfio_pci_get_unused_devs(struct pci_dev *pdev, void *data)
{
struct vfio_devices *devs = data;
@@ -1969,6 +2145,39 @@ static int vfio_pci_get_unused_devs(struct pci_dev *pdev, void *data)
return 0;
}
+static int vfio_pci_try_zap_and_vma_lock_cb(struct pci_dev *pdev, void *data)
+{
+ struct vfio_devices *devs = data;
+ struct vfio_device *device;
+ struct vfio_pci_device *vdev;
+
+ if (devs->cur_index == devs->max_index)
+ return -ENOSPC;
+
+ device = vfio_device_get_from_dev(&pdev->dev);
+ if (!device)
+ return -EINVAL;
+
+ if (pci_dev_driver(pdev) != &vfio_pci_driver) {
+ vfio_device_put(device);
+ return -EBUSY;
+ }
+
+ vdev = vfio_device_data(device);
+
+ /*
+ * Locking multiple devices is prone to deadlock, runaway and
+ * unwind if we hit contention.
+ */
+ if (!vfio_pci_zap_and_vma_lock(vdev, true)) {
+ vfio_device_put(device);
+ return -EBUSY;
+ }
+
+ devs->devices[devs->cur_index++] = device;
+ return 0;
+}
+
/*
* If a bus or slot reset is available for the provided device and:
* - All of the devices affected by that bus or slot reset are unused
diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c
index 90c0b80f8acf..3dcddbd572e6 100644
--- a/drivers/vfio/pci/vfio_pci_config.c
+++ b/drivers/vfio/pci/vfio_pci_config.c
@@ -395,6 +395,14 @@ static inline void p_setd(struct perm_bits *p, int off, u32 virt, u32 write)
*(__le32 *)(&p->write[off]) = cpu_to_le32(write);
}
+/* Caller should hold memory_lock semaphore */
+bool __vfio_pci_memory_enabled(struct vfio_pci_device *vdev)
+{
+ u16 cmd = le16_to_cpu(*(__le16 *)&vdev->vconfig[PCI_COMMAND]);
+
+ return cmd & PCI_COMMAND_MEMORY;
+}
+
/*
* Restore the *real* BARs after we detect a FLR or backdoor reset.
* (backdoor = some device specific technique that we didn't catch)
@@ -556,13 +564,18 @@ static int vfio_basic_config_write(struct vfio_pci_device *vdev, int pos,
new_cmd = le32_to_cpu(val);
+ phys_io = !!(phys_cmd & PCI_COMMAND_IO);
+ virt_io = !!(le16_to_cpu(*virt_cmd) & PCI_COMMAND_IO);
+ new_io = !!(new_cmd & PCI_COMMAND_IO);
+
phys_mem = !!(phys_cmd & PCI_COMMAND_MEMORY);
virt_mem = !!(le16_to_cpu(*virt_cmd) & PCI_COMMAND_MEMORY);
new_mem = !!(new_cmd & PCI_COMMAND_MEMORY);
- phys_io = !!(phys_cmd & PCI_COMMAND_IO);
- virt_io = !!(le16_to_cpu(*virt_cmd) & PCI_COMMAND_IO);
- new_io = !!(new_cmd & PCI_COMMAND_IO);
+ if (!new_mem)
+ vfio_pci_zap_and_down_write_memory_lock(vdev);
+ else
+ down_write(&vdev->memory_lock);
/*
* If the user is writing mem/io enable (new_mem/io) and we
@@ -579,8 +592,11 @@ static int vfio_basic_config_write(struct vfio_pci_device *vdev, int pos,
}
count = vfio_default_config_write(vdev, pos, count, perm, offset, val);
- if (count < 0)
+ if (count < 0) {
+ if (offset == PCI_COMMAND)
+ up_write(&vdev->memory_lock);
return count;
+ }
/*
* Save current memory/io enable bits in vconfig to allow for
@@ -591,6 +607,8 @@ static int vfio_basic_config_write(struct vfio_pci_device *vdev, int pos,
*virt_cmd &= cpu_to_le16(~mask);
*virt_cmd |= cpu_to_le16(new_cmd & mask);
+
+ up_write(&vdev->memory_lock);
}
/* Emulate INTx disable */
@@ -828,8 +846,11 @@ static int vfio_exp_config_write(struct vfio_pci_device *vdev, int pos,
pos - offset + PCI_EXP_DEVCAP,
&cap);
- if (!ret && (cap & PCI_EXP_DEVCAP_FLR))
+ if (!ret && (cap & PCI_EXP_DEVCAP_FLR)) {
+ vfio_pci_zap_and_down_write_memory_lock(vdev);
pci_try_reset_function(vdev->pdev);
+ up_write(&vdev->memory_lock);
+ }
}
/*
@@ -907,8 +928,11 @@ static int vfio_af_config_write(struct vfio_pci_device *vdev, int pos,
pos - offset + PCI_AF_CAP,
&cap);
- if (!ret && (cap & PCI_AF_CAP_FLR) && (cap & PCI_AF_CAP_TP))
+ if (!ret && (cap & PCI_AF_CAP_FLR) && (cap & PCI_AF_CAP_TP)) {
+ vfio_pci_zap_and_down_write_memory_lock(vdev);
pci_try_reset_function(vdev->pdev);
+ up_write(&vdev->memory_lock);
+ }
}
return count;
diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
index 2056f3f85f59..54102a7eb9d3 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -626,6 +626,8 @@ int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev, uint32_t flags,
int (*func)(struct vfio_pci_device *vdev, unsigned index,
unsigned start, unsigned count, uint32_t flags,
void *data) = NULL;
+ int ret;
+ u16 cmd;
switch (index) {
case VFIO_PCI_INTX_IRQ_INDEX:
@@ -673,5 +675,19 @@ int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev, uint32_t flags,
if (!func)
return -ENOTTY;
- return func(vdev, index, start, count, flags, data);
+ if (index == VFIO_PCI_MSIX_IRQ_INDEX) {
+ down_write(&vdev->memory_lock);
+ pci_read_config_word(vdev->pdev, PCI_COMMAND, &cmd);
+ pci_write_config_word(vdev->pdev, PCI_COMMAND,
+ cmd | PCI_COMMAND_MEMORY);
+ }
+
+ ret = func(vdev, index, start, count, flags, data);
+
+ if (index == VFIO_PCI_MSIX_IRQ_INDEX) {
+ pci_write_config_word(vdev->pdev, PCI_COMMAND, cmd);
+ up_write(&vdev->memory_lock);
+ }
+
+ return ret;
}
diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h
index 9b25f9f6ce1d..c4f25f1e80d7 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -139,6 +139,7 @@ struct vfio_pci_device {
struct list_head ioeventfds_list;
struct mutex vma_lock;
struct list_head vma_list;
+ struct rw_semaphore memory_lock;
};
#define is_intx(vdev) (vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX)
@@ -181,6 +182,10 @@ extern int vfio_pci_register_dev_region(struct vfio_pci_device *vdev,
extern int vfio_pci_set_power_state(struct vfio_pci_device *vdev,
pci_power_t state);
+extern bool __vfio_pci_memory_enabled(struct vfio_pci_device *vdev);
+extern void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_device
+ *vdev);
+
#ifdef CONFIG_VFIO_PCI_IGD
extern int vfio_pci_igd_init(struct vfio_pci_device *vdev);
#else
diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c
index a87992892a9f..f58c45308682 100644
--- a/drivers/vfio/pci/vfio_pci_rdwr.c
+++ b/drivers/vfio/pci/vfio_pci_rdwr.c
@@ -162,6 +162,7 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf,
size_t x_start = 0, x_end = 0;
resource_size_t end;
void __iomem *io;
+ struct resource *res = &vdev->pdev->resource[bar];
ssize_t done;
if (pci_resource_start(pdev, bar))
@@ -200,8 +201,19 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf,
x_end = vdev->msix_offset + vdev->msix_size;
}
+ if (res->flags & IORESOURCE_MEM) {
+ down_read(&vdev->memory_lock);
+ if (!__vfio_pci_memory_enabled(vdev)) {
+ up_read(&vdev->memory_lock);
+ return -EIO;
+ }
+ }
+
done = do_io_rw(io, buf, pos, count, x_start, x_end, iswrite);
+ if (res->flags & IORESOURCE_MEM)
+ up_read(&vdev->memory_lock);
+
if (done >= 0)
*ppos += done;