当前位置: 代码迷 >> 综合 >> dpdk Ipv4组包逻辑解析
  详细解决方案

dpdk Ipv4组包逻辑解析

热度:43   发布时间:2023-11-15 00:36:09.0

零、参考

30. IP Fragmentation and Reassembly Library — Data Plane Development Kit 20.02.1 documentation (dpdk.org)http://doc.dpdk.org/guides-20.02/prog_guide/ip_fragment_reassembly_lib.html#ip-fragment-table     源码:DPDK-21.11/lib/ip_frag

一、重组步骤

1、用 “<src ip>, <dst ip>, <pkt id>” 生成的 key 在 fragment table 中查找。

2、如果查找到 entry,检查 entry 是否超时。如果超时,那么释放所以之前接收的分片,并移除他们在 entry 中的相关信息。

3、如果使用那个key在表中没有找到对应的 entry,那么会尝试使用两种方法来创建一个新的:

  • 使用一个空的 entry
  • 删除一个已经超时的 entry,释放相关的 Mbuf,重新存储一个新的 key 在里面。

4、根据新到的分片更新 entry 信息,检查数据包是否能被重组(entry 中已经包含了所有的分片)

  • 如果检查分片都已经到齐了,则进行重组,把 entry 标记为空,同时,返回重组后的 mbuf 指针给调用者。
  • 如果没有到齐,则返回 NULL。

二、源码分析

 1、相关数据结构描述

/* fragmented mbuf */
struct ip_frag {uint16_t ofs;        /* offset into the packet */uint16_t len;        /* length of fragment */struct rte_mbuf *mb; /* fragment mbuf */
};/** key: <src addr, dst_addr, id> to uniquely identify fragmented datagram.*/
struct ip_frag_key {uint64_t src_dst[4];/* src and dst address, only first 8 bytes used for IPv4 */RTE_STD_C11union {uint64_t id_key_len; /* combined for easy fetch */__extension__struct {uint32_t id;      /* packet id */uint32_t key_len; /* src/dst key length */};};
};/** Fragmented packet to reassemble.* First two entries in the frags[] array are for the last and first fragments.*/
struct ip_frag_pkt {RTE_TAILQ_ENTRY(ip_frag_pkt) lru;      /* LRU list */struct ip_frag_key key;                /* fragmentation key */uint64_t start;                        /* creation timestamp */uint32_t total_size;                   /* expected reassembled size */uint32_t frag_size;                    /* size of fragments received */uint32_t last_idx;                     /* index of next entry to fill */struct ip_frag frags[IP_MAX_FRAG_NUM]; /* fragments */
} __rte_cache_aligned;
/* fragmentation table statistics */
struct ip_frag_tbl_stat {uint64_t find_num;     /* total # of find/insert attempts. */uint64_t add_num;      /* # of add ops. */uint64_t del_num;      /* # of del ops. */uint64_t reuse_num;    /* # of reuse (del/add) ops. */uint64_t fail_total;   /* total # of add failures. */uint64_t fail_nospace; /* # of 'no space' add failures. */
} __rte_cache_aligned;/* fragmentation table */
struct rte_ip_frag_tbl {uint64_t max_cycles;     /* ttl for table entries. */uint32_t entry_mask;     /* hash value mask. */uint32_t max_entries;    /* max entries allowed. */uint32_t use_entries;    /* entries in use. */uint32_t bucket_entries; /* hash associativity. */uint32_t nb_entries;     /* total size of the table. */uint32_t nb_buckets;     /* num of associativity lines. */struct ip_frag_pkt *last;     /* last used entry. */struct ip_pkt_list lru;       /* LRU list for table entries. */struct ip_frag_tbl_stat stat; /* statistics counters. */__extension__ struct ip_frag_pkt pkt[0]; /* hash table. */
};

 2、组包过程中的函数调用

        接收到 ip fragment 后进行 key 的查找,将fragment保存到找到的pkt中

/** Process new mbuf with fragment of IPV4 packet.* Incoming mbuf should have it's l2_len/l3_len fields setup correctly.* @param tbl*   Table where to lookup/add the fragmented packet.* @param mb*   Incoming mbuf with IPV4 fragment.* @param tms*   Fragment arrival timestamp.* @param ip_hdr*   Pointer to the IPV4 header inside the fragment.* @return*   Pointer to mbuf for reassembled packet, or NULL if:*   - an error occurred.*   - not all fragments of the packet are collected yet.*/
struct rte_mbuf *
rte_ipv4_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,struct rte_ip_frag_death_row *dr, struct rte_mbuf *mb, uint64_t tms,struct rte_ipv4_hdr *ip_hdr)
{.../* try to find/add entry into the fragment's table. */if ((fp = ip_frag_find(tbl, dr, &key, tms)) == NULL) {IP_FRAG_MBUF2DR(dr, mb);return NULL;}.../* process the fragmented packet. */mb = ip_frag_process(fp, dr, mb, ip_ofs, ip_len, ip_flag);ip_frag_inuse(tbl, fp);...
}

         根据 ip 三元组在 hash 表中寻找 pkt

/** Find an entry in the table for the corresponding fragment.* If such entry is not present, then allocate a new one.* If the entry is stale, then free and reuse it.*/
struct ip_frag_pkt *
ip_frag_find(struct rte_ip_frag_tbl *tbl, struct rte_ip_frag_death_row *dr,const struct ip_frag_key *key, uint64_t tms)
{struct ip_frag_pkt *pkt, *free, *stale, *lru;uint64_t max_cycles;/** Actually the two line below are totally redundant.* they are here, just to make gcc 4.6 happy.*/free = NULL;stale = NULL;max_cycles = tbl->max_cycles;IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, find_num, 1);if ((pkt = ip_frag_lookup(tbl, key, tms, &free, &stale)) == NULL) {/*timed-out entry, free and invalidate it*/if (stale != NULL) {ip_frag_tbl_del(tbl, dr, stale);free = stale;/** we found a free entry, check if we can use it.* If we run out of free entries in the table, then* check if we have a timed out entry to delete.*/} else if (free != NULL &&tbl->max_entries <= tbl->use_entries) {lru = TAILQ_FIRST(&tbl->lru);if (max_cycles + lru->start < tms) {ip_frag_tbl_del(tbl, dr, lru);} else {free = NULL;IP_FRAG_TBL_STAT_UPDATE(&tbl->stat,fail_nospace, 1);}}/* found a free entry to reuse. */if (free != NULL) {ip_frag_tbl_add(tbl,  free, key, tms);pkt = free;}/** we found the flow, but it is already timed out,* so free associated resources, reposition it in the LRU list,* and reuse it.*/} else if (max_cycles + pkt->start < tms) {ip_frag_tbl_reuse(tbl, dr, pkt, tms);}IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, fail_total, (pkt == NULL));tbl->last = pkt;return pkt;
}

               找到对应的pkt之后,将fragment保存到该pkt中

struct rte_mbuf *
ip_frag_process(struct ip_frag_pkt *fp, struct rte_ip_frag_death_row *dr,struct rte_mbuf *mb, uint16_t ofs, uint16_t len, uint16_t more_frags)
{uint32_t idx;fp->frag_size += len;/* this is the first fragment. */if (ofs == 0) {idx = (fp->frags[IP_FIRST_FRAG_IDX].mb == NULL) ?IP_FIRST_FRAG_IDX : UINT32_MAX;/* this is the last fragment. */} else if (more_frags == 0) {fp->total_size = ofs + len;idx = (fp->frags[IP_LAST_FRAG_IDX].mb == NULL) ?IP_LAST_FRAG_IDX : UINT32_MAX;/* this is the intermediate fragment. */} else if ((idx = fp->last_idx) < RTE_DIM(fp->frags)) {fp->last_idx++;}...fp->frags[idx].ofs = ofs;fp->frags[idx].len = len;fp->frags[idx].mb = mb;mb = NULL;/* not all fragments are collected yet. */if (likely (fp->frag_size < fp->total_size)) {return mb;/* if we collected all fragments, then try to reassemble. */} else if (fp->frag_size == fp->total_size &&fp->frags[IP_FIRST_FRAG_IDX].mb != NULL) {if (fp->key.key_len == IPV4_KEYLEN)mb = ipv4_frag_reassemble(fp);elsemb = ipv6_frag_reassemble(fp);}.../* we are done with that entry, invalidate it. */ip_frag_key_invalidate(&fp->key);return mb;
}

        fragment全部接收后进行组装

/** Reassemble fragments into one packet.*/
struct rte_mbuf *
ipv4_frag_reassemble(struct ip_frag_pkt *fp)
{ struct rte_ipv4_hdr *ip_hdr;struct rte_mbuf *m, *prev;uint32_t i, n, ofs, first_len;uint32_t curr_idx = 0;first_len = fp->frags[IP_FIRST_FRAG_IDX].len;n = fp->last_idx - 1;/*start from the last fragment. */m = fp->frags[IP_LAST_FRAG_IDX].mb;ofs = fp->frags[IP_LAST_FRAG_IDX].ofs;curr_idx = IP_LAST_FRAG_IDX;while (ofs != first_len) {prev = m;for (i = n; i != IP_FIRST_FRAG_IDX && ofs != first_len; i--) {/* previous fragment found. */if(fp->frags[i].ofs + fp->frags[i].len == ofs) {RTE_ASSERT(curr_idx != i);/* adjust start of the last fragment data. */rte_pktmbuf_adj(m,(uint16_t)(m->l2_len + m->l3_len));rte_pktmbuf_chain(fp->frags[i].mb, m);/* this mbuf should not be accessed directly */fp->frags[curr_idx].mb = NULL;curr_idx = i;/* update our last fragment and offset. */m = fp->frags[i].mb;ofs = fp->frags[i].ofs;}}/* error - hole in the packet. */if (m == prev) {return NULL;}}/* chain with the first fragment. */rte_pktmbuf_adj(m, (uint16_t)(m->l2_len + m->l3_len));rte_pktmbuf_chain(fp->frags[IP_FIRST_FRAG_IDX].mb, m);fp->frags[curr_idx].mb = NULL;m = fp->frags[IP_FIRST_FRAG_IDX].mb;fp->frags[IP_FIRST_FRAG_IDX].mb = NULL;/* update ipv4 header for the reassembled packet */ip_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *, m->l2_len);ip_hdr->total_length = rte_cpu_to_be_16((uint16_t)(fp->total_size +m->l3_len));ip_hdr->fragment_offset = (uint16_t)(ip_hdr->fragment_offset &rte_cpu_to_be_16(RTE_IPV4_HDR_DF_FLAG));ip_hdr->hdr_checksum = 0;return m;
}