DPDK-22.11.2 [四] 官方basicfwd编译运行讲解
步骤概览
- 编译安装dpdk
- 绑定网卡驱动vfio,必须有两个网口
- 配置LD_LIBRARY_PATH和PKG_CONFIG_PATH
- 编译运行
源码
basicfwd.c
这个程序是从一个网口获取数据,然后发送到另一个网口,所以收发消息的代码都有了,可以根据这个程序编写自己的dpdk应用。
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2010-2015 Intel Corporation
*/
#include <stdint.h>
#include <stdlib.h>
#include <inttypes.h>
#include <rte_eal.h>
#include <rte_ethdev.h>
#include <rte_cycles.h>
#include <rte_lcore.h>
#include <rte_mbuf.h>
#define RX_RING_SIZE 1024
#define TX_RING_SIZE 1024
#define NUM_MBUFS 8191
#define MBUF_CACHE_SIZE 250
#define BURST_SIZE 32
/* basicfwd.c: Basic DPDK skeleton forwarding example. */
/*
* Initializes a given port using global settings and with the RX buffers
* coming from the mbuf_pool passed as a parameter.
*/
/* Main functional part of port initialization. 8< */
static inline int
port_init(uint16_t port, struct rte_mempool *mbuf_pool)
{
struct rte_eth_conf port_conf;
const uint16_t rx_rings = 1, tx_rings = 1;
uint16_t nb_rxd = RX_RING_SIZE;
uint16_t nb_txd = TX_RING_SIZE;
int retval;
uint16_t q;
struct rte_eth_dev_info dev_info;
struct rte_eth_txconf txconf;
// 判断网口是否合法
if (!rte_eth_dev_is_valid_port(port))
return -1;
memset(&port_conf, 0, sizeof(struct rte_eth_conf));
// 获取网口配置信息
retval = rte_eth_dev_info_get(port, &dev_info);
if (retval != 0) {
printf("Error during getting device (port %u) info: %s\n",
port, strerror(-retval));
return retval;
}
// 测试网卡是否支持RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE,如果支持,就设置
// 大体作用就是快速释放mbuf,不需要等待或者锁
if (dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE)
port_conf.txmode.offloads |=
RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE;
/* Configure the Ethernet device. */
// 配置网口,其中设置了接收队列个数rx_rings和发送队列个数tx_rings
retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf);
if (retval != 0)
return retval;
// 设置收发队列的descriptors,如果超过了设备的最大限制,就设定到设备的最大限制。比如nb_rxd等于RX_RING_SIZE(1024),如果设备只有512,就设定为512,并把nb_rxd修改为512。
retval = rte_eth_dev_adjust_nb_rx_tx_desc(port, &nb_rxd, &nb_txd);
if (retval != 0)
return retval;
/* Allocate and set up 1 RX queue per Ethernet port. */
for (q = 0; q < rx_rings; q++) {
retval = rte_eth_rx_queue_setup(port, q, nb_rxd,
rte_eth_dev_socket_id(port), NULL, mbuf_pool);
if (retval < 0)
return retval;
}
txconf = dev_info.default_txconf;
txconf.offloads = port_conf.txmode.offloads;
/* Allocate and set up 1 TX queue per Ethernet port. */
for (q = 0; q < tx_rings; q++) {
retval = rte_eth_tx_queue_setup(port, q, nb_txd,
rte_eth_dev_socket_id(port), &txconf);
if (retval < 0)
return retval;
}
/* Starting Ethernet port. 8< */
retval = rte_eth_dev_start(port);
/* >8 End of starting of ethernet port. */
if (retval < 0)
return retval;
/* Display the port MAC address. */
struct rte_ether_addr addr;
retval = rte_eth_macaddr_get(port, &addr);
if (retval != 0)
return retval;
printf("Port %u MAC: %02" PRIx8 " %02" PRIx8 " %02" PRIx8
" %02" PRIx8 " %02" PRIx8 " %02" PRIx8 "\n",
port, RTE_ETHER_ADDR_BYTES(&addr));
/* Enable RX in promiscuous mode for the Ethernet device. */
retval = rte_eth_promiscuous_enable(port);
/* End of setting RX port in promiscuous mode. */
if (retval != 0)
return retval;
return 0;
}
/* >8 End of main functional part of port initialization. */
/*
* The lcore main. This is the main thread that does the work, reading from
* an input port and writing to an output port.
*/
/* Basic forwarding application lcore. 8< */
static __rte_noreturn void
lcore_main(void)
{
uint16_t port;
/*
* Check that the port is on the same NUMA node as the polling thread
* for best performance.
*/
RTE_ETH_FOREACH_DEV(port)
if (rte_eth_dev_socket_id(port) >= 0 &&
rte_eth_dev_socket_id(port) !=
(int)rte_socket_id())
printf("WARNING, port %u is on remote NUMA node to "
"polling thread.\n\tPerformance will "
"not be optimal.\n", port);
printf("\nCore %u forwarding packets. [Ctrl+C to quit]\n",
rte_lcore_id());
/* Main work of application loop. 8< */
for (;;) {
/*
* Receive packets on a port and forward them on the paired
* port. The mapping is 0 -> 1, 1 -> 0, 2 -> 3, 3 -> 2, etc.
*/
RTE_ETH_FOREACH_DEV(port) {
/* Get burst of RX packets, from first port of pair. */
struct rte_mbuf *bufs[BURST_SIZE];
const uint16_t nb_rx = rte_eth_rx_burst(port, 0,
bufs, BURST_SIZE);
if (unlikely(nb_rx == 0))
continue;
/* Send burst of TX packets, to second port of pair. */
const uint16_t nb_tx = rte_eth_tx_burst(port ^ 1, 0,
bufs, nb_rx);
/* Free any unsent packets. */
if (unlikely(nb_tx < nb_rx)) {
uint16_t buf;
for (buf = nb_tx; buf < nb_rx; buf++)
rte_pktmbuf_free(bufs[buf]);
}
}
}
/* >8 End of loop. */
}
/* >8 End Basic forwarding application lcore. */
/*
* The main function, which does initialization and calls the per-lcore
* functions.
*/
int
main(int argc, char *argv[])
{
struct rte_mempool *mbuf_pool;
unsigned nb_ports;
uint16_t portid;
/* Initializion the Environment Abstraction Layer (EAL). 8< */
int ret = rte_eal_init(argc, argv);
if (ret < 0)
rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
/* >8 End of initialization the Environment Abstraction Layer (EAL). */
argc -= ret;
argv += ret;
/* Check that there is an even number of ports to send/receive on. */
// 获取当前可用的网口数
nb_ports = rte_eth_dev_count_avail();
if (nb_ports < 2 || (nb_ports & 1))
rte_exit(EXIT_FAILURE, "Error: number of ports must be even\n");
/* Creates a new mempool in memory to hold the mbufs. */
/* Allocates mempool to hold the mbufs. 8< */
mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", NUM_MBUFS * nb_ports,
MBUF_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
/* >8 End of allocating mempool to hold mbuf. */
if (mbuf_pool == NULL)
rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
/* Initializing all ports. 8< */
RTE_ETH_FOREACH_DEV(portid)
if (port_init(portid, mbuf_pool) != 0)
rte_exit(EXIT_FAILURE, "Cannot init port %"PRIu16 "\n",
portid);
/* >8 End of initializing all ports. */
if (rte_lcore_count() > 1)
printf("\nWARNING: Too many lcores enabled. Only 1 used.\n");
/* Call lcore_main on the main core only. Called on single lcore. 8< */
lcore_main();
/* >8 End of called on single lcore. */
/* clean up the EAL */
rte_eal_cleanup();
return 0;
}
重要api解析
int ret = rte_eal_init(argc, argv);
rte_eal_init()
int rte_eal_init ( int argc,
char ** argv
)
Initialize the Environment Abstraction Layer (EAL).
This function is to be executed on the MAIN lcore only, as soon as possible in the application's main() function. It puts the WORKER lcores in the WAIT state.
初始化程序,需要在主线程调用,尽可能优先调用。
argc
和argv
与c语言main
函数的参数一致,argc
表示参数个数,argv
是参数列表。
支持的参数
官方文档给定了rte_eal_init
的参数说明,比如常见的:
-l <core list>
List of cores to run on
The argument format is <c1>[-c2][,c3[-c4],...] where c1, c2, etc are core indexes between 0 and 128.
具体可以参考官方资料 http://doc.dpdk.org/guides-22.11/linux_gsg/linux_eal_parameters.html
返回值表示rte_eal_init
使用了几个参数,后续通过
argc -= ret;
argv += ret;
跳过使用过的参数,然后解析自己程序需要的参数。
所以dpdk中的示例代码大部分都有这套逻辑,并且官方有说明,rte_eal_init
需要的参数要放在前面,后面跟--
,然后是自己的参数。rte_eal_init
解析到--
就会结束,返回解析了多少参数,然后跳过对应的参数,再解析自己的参数。
l2fwd
http://doc.dpdk.org/guides-22.11/sample_app_ug/l2_forward_real_virtual.html
我们可以找一个示例确认一下,比如l2fwd,代码中有相同逻辑
/* Init EAL. 8< */
ret = rte_eal_init(argc, argv);
if (ret < 0)
rte_exit(EXIT_FAILURE, "Invalid EAL arguments\n");
argc -= ret;
argv += ret;
force_quit = false;
signal(SIGINT, signal_handler);
signal(SIGTERM, signal_handler);
/* parse application arguments (after the EAL ones) */
ret = l2fwd_parse_args(argc, argv);
官方文档也介绍了用法:
./<build_dir>/examples/dpdk-l2fwd [EAL options] -- -p PORTMASK
[-P]
[-q NQ]
--[no-]mac-updating
[--portmap="(port, port)[,(port, port)]"]
前面是EAL options,中间使用--
分割开,后面是l2fwd自己的参数,由l2fwd_parse_args
自己进行解析。
rte_pktmbuf_pool_create
创建内存池
/* Creates a new mempool in memory to hold the mbufs. */
/* Allocates mempool to hold the mbufs. 8< */
mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", NUM_MBUFS * nb_ports,
MBUF_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
/* >8 End of allocating mempool to hold mbuf. */
rte_pktmbuf_pool_create()
struct rte_mempool* rte_pktmbuf_pool_create ( const char * name,
unsigned n,
unsigned cache_size,
uint16_t priv_size,
uint16_t data_room_size,
int socket_id
)
Create a mbuf pool.
This function creates and initializes a packet mbuf pool. It is a wrapper to rte_mempool functions.
Parameters
name The name of the mbuf pool. 内存池的名称
n The number of elements in the mbuf pool. The optimum size (in terms of memory usage) for a mempool is when n is a power of two minus one: n = (2^q - 1). 内存池元素的个数,最好是2的q次方减1。代码中NUM_MBUFS定义的就是2^13-1,然后乘以可用的网口数,也就是为每一个网口申请一个包含NUM_MBUFS个元素的内存池。
cache_size Size of the per-core object cache. See rte_mempool_create() for details. cache大小,具体如何设置,可以参考rte_mempool_create的介绍。代码中设定的是MBUF_CACHE_SIZE,大小是250。
priv_size Size of application private are between the rte_mbuf structure and the data buffer. This value must be aligned to RTE_MBUF_PRIV_ALIGN. 私有程序的大小,必须使用RTE_MBUF_PRIV_ALIGN对齐。代码中设置的是0。
data_room_size Size of data buffer in each mbuf, including RTE_PKTMBUF_HEADROOM. 每一个mbuf的大小。代码中设置的是RTE_MBUF_DEFAULT_BUF_SIZE
socket_id The socket identifier where the memory should be allocated. The value can be SOCKET_ID_ANY if there is no NUMA constraint for the reserved zone. 内存应该在那个socket上申请,这里就是前面知识介绍的CPU的socket,也可以说在那个numa的node上申请。代码中设置的是rte_socket_id(),获取当前程序运行的socket id。建议程序运行,和网卡和内存都在同一个socket上。前面文章也有介绍过。
rte_lcore_count
获取当前程序运行在几个cpu core上,当前程序需要1个,有的可能需要多个。
if (rte_lcore_count() > 1)
printf("\nWARNING: Too many lcores enabled. Only 1 used.\n");
test_order_common
比如这个测试case,就需要3个,一个用作生产数据,一个用作处理数据,一个是主线程运行。
/* 1 producer + N workers + main */
if (rte_lcore_count() < 3) {
evt_err("test need minimum 3 lcores");
return -1;
}
http://doc.dpdk.org/api-22.11/index.html
http://doc.dpdk.org/guides-22.11/index.html