mirror of
https://github.com/torvalds/linux.git
synced 2026-05-13 00:28:54 +02:00
Add struct tso_dma_map to tso.h for tracking DMA addresses of mapped GSO payload data and tso_dma_map_completion_state. The tso_dma_map combines DMA mapping storage with iterator state, allowing drivers to walk pre-mapped DMA regions linearly. Includes fields for the DMA IOVA path (iova_state, iova_offset, total_len) and a fallback per-region path (linear_dma, frags[], frag_idx, offset). The tso_dma_map_completion_state makes the IOVA completion state opaque for drivers. Drivers are expected to allocate this and use the added helpers to update the completion state. Adds skb_frag_phys() to skbuff.h, returning the physical address of a paged fragment's data, which is used by the tso_dma_map helpers introduced in this commit described below. The added TSO DMA map helpers are: tso_dma_map_init(): DMA-maps the linear payload region and all frags upfront. Prefers the DMA IOVA API for a single contiguous mapping with one IOTLB sync; falls back to per-region dma_map_phys() otherwise. Returns 0 on success, cleans up partial mappings on failure. tso_dma_map_cleanup(): Handles both IOVA and fallback teardown paths. tso_dma_map_count(): counts how many descriptors the next N bytes of payload will need. Returns 1 if IOVA is used since the mapping is contiguous. tso_dma_map_next(): yields the next (dma_addr, chunk_len) pair. On the IOVA path, each segment is a single contiguous chunk. On the fallback path, indicates when a chunk starts a new DMA mapping so the driver can set dma_unmap_len on that descriptor for completion-time unmapping. tso_dma_map_completion_save(): updates the completion state. Drivers will call this at xmit time. tso_dma_map_complete(): tears down the mapping at completion time and returns true if the IOVA path was used. If it was not used, this is a no-op and returns false. Suggested-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Joe Damato <joe@dama.to> Link: https://patch.msgid.link/20260408230607.2019402-2-joe@dama.to Signed-off-by: Jakub Kicinski <kuba@kernel.org>
359 lines
9.3 KiB
C
359 lines
9.3 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
#include <linux/export.h>
|
|
#include <linux/if_vlan.h>
|
|
#include <net/ip.h>
|
|
#include <net/tso.h>
|
|
#include <linux/dma-mapping.h>
|
|
#include <linux/unaligned.h>
|
|
|
|
void tso_build_hdr(const struct sk_buff *skb, char *hdr, struct tso_t *tso,
|
|
int size, bool is_last)
|
|
{
|
|
int hdr_len = skb_transport_offset(skb) + tso->tlen;
|
|
int mac_hdr_len = skb_network_offset(skb);
|
|
|
|
memcpy(hdr, skb->data, hdr_len);
|
|
if (!tso->ipv6) {
|
|
struct iphdr *iph = (void *)(hdr + mac_hdr_len);
|
|
|
|
iph->id = htons(tso->ip_id);
|
|
iph->tot_len = htons(size + hdr_len - mac_hdr_len);
|
|
tso->ip_id++;
|
|
} else {
|
|
struct ipv6hdr *iph = (void *)(hdr + mac_hdr_len);
|
|
|
|
iph->payload_len = htons(size + tso->tlen);
|
|
}
|
|
hdr += skb_transport_offset(skb);
|
|
if (tso->tlen != sizeof(struct udphdr)) {
|
|
struct tcphdr *tcph = (struct tcphdr *)hdr;
|
|
|
|
put_unaligned_be32(tso->tcp_seq, &tcph->seq);
|
|
|
|
if (!is_last) {
|
|
/* Clear all special flags for not last packet */
|
|
tcph->psh = 0;
|
|
tcph->fin = 0;
|
|
tcph->rst = 0;
|
|
}
|
|
} else {
|
|
struct udphdr *uh = (struct udphdr *)hdr;
|
|
|
|
uh->len = htons(sizeof(*uh) + size);
|
|
}
|
|
}
|
|
EXPORT_SYMBOL(tso_build_hdr);
|
|
|
|
void tso_build_data(const struct sk_buff *skb, struct tso_t *tso, int size)
|
|
{
|
|
tso->tcp_seq += size; /* not worth avoiding this operation for UDP */
|
|
tso->size -= size;
|
|
tso->data += size;
|
|
|
|
if ((tso->size == 0) &&
|
|
(tso->next_frag_idx < skb_shinfo(skb)->nr_frags)) {
|
|
skb_frag_t *frag = &skb_shinfo(skb)->frags[tso->next_frag_idx];
|
|
|
|
/* Move to next segment */
|
|
tso->size = skb_frag_size(frag);
|
|
tso->data = skb_frag_address(frag);
|
|
tso->next_frag_idx++;
|
|
}
|
|
}
|
|
EXPORT_SYMBOL(tso_build_data);
|
|
|
|
int tso_start(struct sk_buff *skb, struct tso_t *tso)
|
|
{
|
|
int tlen = skb_is_gso_tcp(skb) ? tcp_hdrlen(skb) : sizeof(struct udphdr);
|
|
int hdr_len = skb_transport_offset(skb) + tlen;
|
|
|
|
tso->tlen = tlen;
|
|
tso->ip_id = ntohs(ip_hdr(skb)->id);
|
|
tso->tcp_seq = (tlen != sizeof(struct udphdr)) ? ntohl(tcp_hdr(skb)->seq) : 0;
|
|
tso->next_frag_idx = 0;
|
|
tso->ipv6 = vlan_get_protocol(skb) == htons(ETH_P_IPV6);
|
|
|
|
/* Build first data */
|
|
tso->size = skb_headlen(skb) - hdr_len;
|
|
tso->data = skb->data + hdr_len;
|
|
if ((tso->size == 0) &&
|
|
(tso->next_frag_idx < skb_shinfo(skb)->nr_frags)) {
|
|
skb_frag_t *frag = &skb_shinfo(skb)->frags[tso->next_frag_idx];
|
|
|
|
/* Move to next segment */
|
|
tso->size = skb_frag_size(frag);
|
|
tso->data = skb_frag_address(frag);
|
|
tso->next_frag_idx++;
|
|
}
|
|
return hdr_len;
|
|
}
|
|
EXPORT_SYMBOL(tso_start);
|
|
|
|
static int tso_dma_iova_try(struct device *dev, struct tso_dma_map *map,
|
|
phys_addr_t phys, size_t linear_len,
|
|
size_t total_len, size_t *offset)
|
|
{
|
|
const struct sk_buff *skb;
|
|
unsigned int nr_frags;
|
|
int i;
|
|
|
|
if (!dma_iova_try_alloc(dev, &map->iova_state, phys, total_len))
|
|
return 1;
|
|
|
|
skb = map->skb;
|
|
nr_frags = skb_shinfo(skb)->nr_frags;
|
|
|
|
if (linear_len) {
|
|
if (dma_iova_link(dev, &map->iova_state,
|
|
phys, *offset, linear_len,
|
|
DMA_TO_DEVICE, 0))
|
|
goto iova_fail;
|
|
map->linear_len = linear_len;
|
|
*offset += linear_len;
|
|
}
|
|
|
|
for (i = 0; i < nr_frags; i++) {
|
|
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
|
|
unsigned int frag_len = skb_frag_size(frag);
|
|
|
|
if (dma_iova_link(dev, &map->iova_state,
|
|
skb_frag_phys(frag), *offset,
|
|
frag_len, DMA_TO_DEVICE, 0)) {
|
|
map->nr_frags = i;
|
|
goto iova_fail;
|
|
}
|
|
map->frags[i].len = frag_len;
|
|
*offset += frag_len;
|
|
map->nr_frags = i + 1;
|
|
}
|
|
|
|
if (dma_iova_sync(dev, &map->iova_state, 0, total_len))
|
|
goto iova_fail;
|
|
|
|
return 0;
|
|
|
|
iova_fail:
|
|
dma_iova_destroy(dev, &map->iova_state, *offset,
|
|
DMA_TO_DEVICE, 0);
|
|
memset(&map->iova_state, 0, sizeof(map->iova_state));
|
|
|
|
/* reset map state */
|
|
map->frag_idx = -1;
|
|
map->offset = 0;
|
|
map->linear_len = 0;
|
|
map->nr_frags = 0;
|
|
|
|
return 1;
|
|
}
|
|
|
|
/**
|
|
* tso_dma_map_init - DMA-map GSO payload regions
|
|
* @map: map struct to initialize
|
|
* @dev: device for DMA mapping
|
|
* @skb: the GSO skb
|
|
* @hdr_len: per-segment header length in bytes
|
|
*
|
|
* DMA-maps the linear payload (after headers) and all frags.
|
|
* Prefers the DMA IOVA API (one contiguous mapping, one IOTLB sync);
|
|
* falls back to per-region dma_map_phys() when IOVA is not available.
|
|
* Positions the iterator at byte 0 of the payload.
|
|
*
|
|
* Return: 0 on success, -ENOMEM on DMA mapping failure (partial mappings
|
|
* are cleaned up internally).
|
|
*/
|
|
int tso_dma_map_init(struct tso_dma_map *map, struct device *dev,
|
|
const struct sk_buff *skb, unsigned int hdr_len)
|
|
{
|
|
unsigned int linear_len = skb_headlen(skb) - hdr_len;
|
|
unsigned int nr_frags = skb_shinfo(skb)->nr_frags;
|
|
size_t total_len = skb->len - hdr_len;
|
|
size_t offset = 0;
|
|
phys_addr_t phys;
|
|
int i;
|
|
|
|
map->dev = dev;
|
|
map->skb = skb;
|
|
map->hdr_len = hdr_len;
|
|
map->frag_idx = -1;
|
|
map->offset = 0;
|
|
map->iova_offset = 0;
|
|
map->total_len = total_len;
|
|
map->linear_len = 0;
|
|
map->nr_frags = 0;
|
|
memset(&map->iova_state, 0, sizeof(map->iova_state));
|
|
|
|
if (!total_len)
|
|
return 0;
|
|
|
|
if (linear_len)
|
|
phys = virt_to_phys(skb->data + hdr_len);
|
|
else
|
|
phys = skb_frag_phys(&skb_shinfo(skb)->frags[0]);
|
|
|
|
if (tso_dma_iova_try(dev, map, phys, linear_len, total_len, &offset)) {
|
|
/* IOVA path failed, map state was reset. Fallback to
|
|
* per-region dma_map_phys()
|
|
*/
|
|
if (linear_len) {
|
|
map->linear_dma = dma_map_phys(dev, phys, linear_len,
|
|
DMA_TO_DEVICE, 0);
|
|
if (dma_mapping_error(dev, map->linear_dma))
|
|
return -ENOMEM;
|
|
map->linear_len = linear_len;
|
|
}
|
|
|
|
for (i = 0; i < nr_frags; i++) {
|
|
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
|
|
unsigned int frag_len = skb_frag_size(frag);
|
|
|
|
map->frags[i].len = frag_len;
|
|
map->frags[i].dma = dma_map_phys(dev, skb_frag_phys(frag),
|
|
frag_len, DMA_TO_DEVICE, 0);
|
|
if (dma_mapping_error(dev, map->frags[i].dma)) {
|
|
tso_dma_map_cleanup(map);
|
|
return -ENOMEM;
|
|
}
|
|
map->nr_frags = i + 1;
|
|
}
|
|
}
|
|
|
|
if (linear_len == 0 && nr_frags > 0)
|
|
map->frag_idx = 0;
|
|
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL(tso_dma_map_init);
|
|
|
|
/**
|
|
* tso_dma_map_cleanup - unmap all DMA regions in a tso_dma_map
|
|
* @map: the map to clean up
|
|
*
|
|
* Handles both IOVA and fallback paths. For IOVA, calls
|
|
* dma_iova_destroy(). For fallback, unmaps each region individually.
|
|
*/
|
|
void tso_dma_map_cleanup(struct tso_dma_map *map)
|
|
{
|
|
int i;
|
|
|
|
if (dma_use_iova(&map->iova_state)) {
|
|
dma_iova_destroy(map->dev, &map->iova_state, map->total_len,
|
|
DMA_TO_DEVICE, 0);
|
|
memset(&map->iova_state, 0, sizeof(map->iova_state));
|
|
} else {
|
|
if (map->linear_len)
|
|
dma_unmap_phys(map->dev, map->linear_dma,
|
|
map->linear_len, DMA_TO_DEVICE, 0);
|
|
|
|
for (i = 0; i < map->nr_frags; i++)
|
|
dma_unmap_phys(map->dev, map->frags[i].dma,
|
|
map->frags[i].len, DMA_TO_DEVICE, 0);
|
|
}
|
|
|
|
map->linear_len = 0;
|
|
map->nr_frags = 0;
|
|
}
|
|
EXPORT_SYMBOL(tso_dma_map_cleanup);
|
|
|
|
/**
|
|
* tso_dma_map_count - count descriptors for a payload range
|
|
* @map: the payload map
|
|
* @len: number of payload bytes in this segment
|
|
*
|
|
* Counts how many contiguous DMA region chunks the next @len bytes
|
|
* will span, without advancing the iterator. On the IOVA path this
|
|
* is always 1 (contiguous). On the fallback path, uses region sizes
|
|
* from the current position.
|
|
*
|
|
* Return: the number of descriptors needed for @len bytes of payload.
|
|
*/
|
|
unsigned int tso_dma_map_count(struct tso_dma_map *map, unsigned int len)
|
|
{
|
|
unsigned int offset = map->offset;
|
|
int idx = map->frag_idx;
|
|
unsigned int count = 0;
|
|
|
|
if (!len)
|
|
return 0;
|
|
|
|
if (dma_use_iova(&map->iova_state))
|
|
return 1;
|
|
|
|
while (len > 0) {
|
|
unsigned int region_len, chunk;
|
|
|
|
if (idx == -1)
|
|
region_len = map->linear_len;
|
|
else
|
|
region_len = map->frags[idx].len;
|
|
|
|
chunk = min(len, region_len - offset);
|
|
len -= chunk;
|
|
count++;
|
|
offset = 0;
|
|
idx++;
|
|
}
|
|
|
|
return count;
|
|
}
|
|
EXPORT_SYMBOL(tso_dma_map_count);
|
|
|
|
/**
|
|
* tso_dma_map_next - yield the next DMA address range
|
|
* @map: the payload map
|
|
* @addr: output DMA address
|
|
* @chunk_len: output chunk length
|
|
* @mapping_len: full DMA mapping length when this chunk starts a new
|
|
* mapping region, or 0 when continuing a previous one.
|
|
* On the IOVA path this is always 0 (driver must not
|
|
* do per-region unmaps; use tso_dma_map_cleanup instead).
|
|
* @seg_remaining: bytes left in current segment
|
|
*
|
|
* Yields the next (dma_addr, chunk_len) pair and advances the iterator.
|
|
* On the IOVA path, the entire payload is contiguous so each segment
|
|
* is always a single chunk.
|
|
*
|
|
* Return: true if a chunk was yielded, false when @seg_remaining is 0.
|
|
*/
|
|
bool tso_dma_map_next(struct tso_dma_map *map, dma_addr_t *addr,
|
|
unsigned int *chunk_len, unsigned int *mapping_len,
|
|
unsigned int seg_remaining)
|
|
{
|
|
unsigned int region_len, chunk;
|
|
|
|
if (!seg_remaining)
|
|
return false;
|
|
|
|
/* IOVA path: contiguous DMA range, no region boundaries */
|
|
if (dma_use_iova(&map->iova_state)) {
|
|
*addr = map->iova_state.addr + map->iova_offset;
|
|
*chunk_len = seg_remaining;
|
|
*mapping_len = 0;
|
|
map->iova_offset += seg_remaining;
|
|
return true;
|
|
}
|
|
|
|
/* Fallback path: per-region iteration */
|
|
|
|
if (map->frag_idx == -1) {
|
|
region_len = map->linear_len;
|
|
chunk = min(seg_remaining, region_len - map->offset);
|
|
*addr = map->linear_dma + map->offset;
|
|
} else {
|
|
region_len = map->frags[map->frag_idx].len;
|
|
chunk = min(seg_remaining, region_len - map->offset);
|
|
*addr = map->frags[map->frag_idx].dma + map->offset;
|
|
}
|
|
|
|
*mapping_len = (map->offset == 0) ? region_len : 0;
|
|
*chunk_len = chunk;
|
|
map->offset += chunk;
|
|
|
|
if (map->offset >= region_len) {
|
|
map->frag_idx++;
|
|
map->offset = 0;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
EXPORT_SYMBOL(tso_dma_map_next);
|