Linux内核中的IPSEC兑现(3)

2012-08-16
Linux内核中的IPSEC实现(3)本文档的Copyleft归yfydz所有，使用GPL发布，可以自由拷贝，转载，转载时请保持文档
Linux内核中的IPSEC实现(3)
本文档的Copyleft归yfydz所有，使用GPL发布，可以自由拷贝，转载，转载时请保持文档的完整性，严禁用于任何商业用途。
msn: yfydz_no1@hotmail.com
来源：http://yfydz.cublog.cn
5. 安全策略(xfrm_policy)处理本节所介绍的函数都在net/xfrm/xfrm_policy.c中定义。5.1 策略分配策略分配函数为xfrm_policy_alloc(), 该函数被pfkey_spdadd()函数调用struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp){ struct xfrm_policy *policy;// 分配struct xfrm_policy结构空间并清零 policy = kzalloc(sizeof(struct xfrm_policy), gfp); if (policy) {// 初始化链接节点  INIT_HLIST_NODE(&policy->bydst);  INIT_HLIST_NODE(&policy->byidx);// 初始化锁  rwlock_init(&policy->lock);// 策略引用计数初始化为1  atomic_set(&policy->refcnt, 1);// 初始化定时器  init_timer(&policy->timer);  policy->timer.data = (unsigned long)policy;  policy->timer.function = xfrm_policy_timer; } return policy;}EXPORT_SYMBOL(xfrm_policy_alloc);定时器函数:static void xfrm_policy_timer(unsigned long data){ struct xfrm_policy *xp = (struct xfrm_policy*)data; unsigned long now = (unsigned long)xtime.tv_sec; long next = LONG_MAX; int warn = 0; int dir;// 加锁 read_lock(&xp->lock);// 如果策略已经是死的, 退出 if (xp->dead)  goto out;// 根据策略索引号确定策略处理的数据的方向, 看索引号的后3位 dir = xfrm_policy_id2dir(xp->index);// 如果到期了还要强制要增加一些时间 if (xp->lft.hard_add_expires_seconds) {// 计算强制增加的超时时间  long tmo = xp->lft.hard_add_expires_seconds +   xp->curlft.add_time - now;// 没法增加超时了, 到期  if (tmo <= 0)   goto expired;  if (tmo < next)   next = tmo; }// 如果到期了还要强制要增加的使用时间 if (xp->lft.hard_use_expires_seconds) {// 计算强制增加的使用时间  long tmo = xp->lft.hard_use_expires_seconds +   (xp->curlft.use_time ? : xp->curlft.add_time) - now;// 没法增加超时了, 到期  if (tmo <= 0)   goto expired;  if (tmo < next)   next = tmo; }// 如果到期了还要软性要增加一些时间 if (xp->lft.soft_add_expires_seconds) {// 计算软性增加的时间  long tmo = xp->lft.soft_add_expires_seconds +   xp->curlft.add_time - now;// 软性增加超时小于0, 设置报警标志, 并将超时设置为XFRM_KM_TIMEOUT, 这点和其他不同  if (tmo <= 0) {   warn = 1;   tmo = XFRM_KM_TIMEOUT;  }  if (tmo < next)   next = tmo; }// 如果到期了还要软性要增加的使用时间 if (xp->lft.soft_use_expires_seconds) {// 计算软性增加的使用时间  long tmo = xp->lft.soft_use_expires_seconds +   (xp->curlft.use_time ? : xp->curlft.add_time) - now;// 软性增加超时小于0, 设置报警标志, 并将超时设置为XFRM_KM_TIMEOUT, 这点和其他不同  if (tmo <= 0) {   warn = 1;   tmo = XFRM_KM_TIMEOUT;  }  if (tmo < next)   next = tmo; }// 需要报警, 调用到期回调 if (warn)  km_policy_expired(xp, dir, 0, 0);// 如果更新的超时值有效, 修改定时器超时, 增加策略使用计数 if (next != LONG_MAX &&     !mod_timer(&xp->timer, jiffies + make_jiffies(next)))  xfrm_pol_hold(xp);out: read_unlock(&xp->lock); xfrm_pol_put(xp); return;expired: read_unlock(&xp->lock);// 如果确实到期, 删除策略 if (!xfrm_policy_delete(xp, dir))// 1表示是硬性到期了  km_policy_expired(xp, dir, 1, 0); xfrm_pol_put(xp);} 5.2 策略插入策略插入函数为xfrm_policy_insert(), 该函数被pfkey_spdadd()函数调用, 注意策略链表是按优先权大小进行排序的有序链表, 因此插入策略时要进行优先权比较后插入到合适的位置.int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl){ struct xfrm_policy *pol; struct xfrm_policy *delpol; struct hlist_head *chain; struct hlist_node *entry, *newpos, *last; struct dst_entry *gc_list; write_lock_bh(&xfrm_policy_lock);// 找到具体的hash链表 chain = policy_hash_bysel(&policy->selector, policy->family, dir); delpol = NULL; newpos = NULL; last = NULL;// 遍历链表, 该链表是以策略的优先级值进行排序的链表, 因此需要根据新策略的优先级大小// 将新策略插到合适的位置 hlist_for_each_entry(pol, entry, chain, bydst) {// delpol要为空  if (!delpol &&// 策略类型比较      pol->type == policy->type &&// 选择子比较      !selector_cmp(&pol->selector, &policy->selector) &&// 安全上下文比较      xfrm_sec_ctx_match(pol->security, policy->security)) {// 新策略和已有的某策略匹配   if (excl) {// 如果是排他性添加操作, 要插入的策略在数据库中已经存在, 发生错误    write_unlock_bh(&xfrm_policy_lock);    return -EEXIST;   }// 保存好要删除的策略位置   delpol = pol;// 要更新的策略优先级值大于原有的优先级值, 重新循环找到合适的插入位置// 因为这个链表是以优先级值进行排序的, 不能乱// 现在delpol已经非空了,  前面的策略查找条件已经不可能满足了   if (policy->priority > pol->priority)    continue;  } else if (policy->priority >= pol->priority) {// 如果新的优先级不低于当前的优先级, 保存当前节点, 继续查找合适插入位置   last = &pol->bydst;   continue;  }// 这里是根据新策略的优先级确定的插入位置  if (!newpos)   newpos = &pol->bydst;// 如果已经找到要删除的策略, 中断  if (delpol)   break;  last = &pol->bydst; } if (!newpos)  newpos = last;// 插入策略到按目的地址HASH的链表的指定位置 if (newpos)  hlist_add_after(newpos, &policy->bydst); else  hlist_add_head(&policy->bydst, chain);// 增加策略引用计数 xfrm_pol_hold(policy);// 该方向的策略数增1 xfrm_policy_count[dir]++; atomic_inc(&flow_cache_genid);// 如果有相同的老策略, 要从目的地址HASH和索引号HASH这两个表中删除 if (delpol) {  hlist_del(&delpol->bydst);  hlist_del(&delpol->byidx);  xfrm_policy_count[dir]--; }// 获取策略索引号, 插入索引HASH链表 policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir); hlist_add_head(&policy->byidx, xfrm_policy_byidx+idx_hash(policy->index));// 策略插入实际时间 policy->curlft.add_time = (unsigned long)xtime.tv_sec; policy->curlft.use_time = 0; if (!mod_timer(&policy->timer, jiffies + HZ))  xfrm_pol_hold(policy); write_unlock_bh(&xfrm_policy_lock);// 释放老策略 if (delpol)  xfrm_policy_kill(delpol); else if (xfrm_bydst_should_resize(dir, NULL))  schedule_work(&xfrm_hash_work);// 下面释放所有策略当前的路由cache read_lock_bh(&xfrm_policy_lock); gc_list = NULL; entry = &policy->bydst;// 遍历链表, 搜集垃圾路由cache建立链表 hlist_for_each_entry_continue(policy, entry, bydst) {  struct dst_entry *dst;  write_lock(&policy->lock);// 策略的路由链表头  dst = policy->bundles;  if (dst) {// 直接将整个策略路由链表加到垃圾链表前面   struct dst_entry *tail = dst;   while (tail->next)    tail = tail->next;   tail->next = gc_list;   gc_list = dst;// 当前策略的路由为空   policy->bundles = NULL;  }  write_unlock(&policy->lock); } read_unlock_bh(&xfrm_policy_lock);// 释放垃圾路由cahce while (gc_list) {  struct dst_entry *dst = gc_list;  gc_list = dst->next;  dst_free(dst); } return 0;}EXPORT_SYMBOL(xfrm_policy_insert); 5.3  删除某类型的全部安全策略该函数被pfkey_spdflush()等函数调用void xfrm_policy_flush(u8 type){ int dir; write_lock_bh(&xfrm_policy_lock); for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {  struct xfrm_policy *pol;  struct hlist_node *entry;  int i, killed;  killed = 0; again1:// 遍历inexact HASH链表  hlist_for_each_entry(pol, entry,         &xfrm_policy_inexact[dir], bydst) {// 判断类型   if (pol->type != type)    continue;// 将策略从bydst链表中断开   hlist_del(&pol->bydst);// 将策略从byidt链表中断开   hlist_del(&pol->byidx);   write_unlock_bh(&xfrm_policy_lock);// 将策略状态置为dead, 并添加到系统的策略垃圾链表进行调度处理准备删除   xfrm_policy_kill(pol);   killed++;   write_lock_bh(&xfrm_policy_lock);   goto again1;  }// 遍历所有目的HASH链表  for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { again2:// 遍历按目的地址HASH的链表   hlist_for_each_entry(pol, entry,          xfrm_policy_bydst[dir].table + i,          bydst) {    if (pol->type != type)     continue;// 将节点从链表中断开    hlist_del(&pol->bydst);    hlist_del(&pol->byidx);    write_unlock_bh(&xfrm_policy_lock);// 释放节点    xfrm_policy_kill(pol);    killed++;    write_lock_bh(&xfrm_policy_lock);    goto again2;   }  }  xfrm_policy_count[dir] -= killed; } atomic_inc(&flow_cache_genid); write_unlock_bh(&xfrm_policy_lock);}EXPORT_SYMBOL(xfrm_policy_flush); /* Rule must be locked. Release descentant resources, announce * entry dead. The rule must be unlinked from lists to the moment. */// 策略释放到垃圾链表static void xfrm_policy_kill(struct xfrm_policy *policy){ int dead; write_lock_bh(&policy->lock);// 保留老的DEAD标志 dead = policy->dead;// 设置策略DEAD标志 policy->dead = 1; write_unlock_bh(&policy->lock);// 为什么不在前面判断DEAD呢? if (unlikely(dead)) {  WARN_ON(1);  return; } spin_lock(&xfrm_policy_gc_lock);// 将该策略节点从当前链表断开, 插入策略垃圾链表 hlist_add_head(&policy->bydst, &xfrm_policy_gc_list); spin_unlock(&xfrm_policy_gc_lock);// 调度策略垃圾策略工作结构 schedule_work(&xfrm_policy_gc_work);}5.4 策略查找5.4.1 策略查找并删除根据选择子和安全上下文查找策略, 可查找策略并删除, 被pfkey_spddelete()函数调用struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir,       struct xfrm_selector *sel,       struct xfrm_sec_ctx *ctx, int delete){ struct xfrm_policy *pol, *ret; struct hlist_head *chain; struct hlist_node *entry; write_lock_bh(&xfrm_policy_lock);// 定位HASH表 chain = policy_hash_bysel(sel, sel->family, dir); ret = NULL;// 遍历链表 hlist_for_each_entry(pol, entry, chain, bydst) {// 根据类型, 选择子和上下文进行匹配  if (pol->type == type &&      !selector_cmp(sel, &pol->selector) &&      xfrm_sec_ctx_match(ctx, pol->security)) {   xfrm_pol_hold(pol);   if (delete) {// 要的删除话将策略节点从目的地址HASH链表和索引HASH链表中断开    hlist_del(&pol->bydst);    hlist_del(&pol->byidx);    xfrm_policy_count[dir]--;   }   ret = pol;   break;  } } write_unlock_bh(&xfrm_policy_lock); if (ret && delete) {// 增加genid  atomic_inc(&flow_cache_genid);// 将策略状态置为dead, 并添加到系统的策略垃圾链表进行调度处理准备删除  xfrm_policy_kill(ret); } return ret;}EXPORT_SYMBOL(xfrm_policy_bysel_ctx);5.4.2 按索引号查找并删除struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete){ struct xfrm_policy *pol, *ret; struct hlist_head *chain; struct hlist_node *entry; write_lock_bh(&xfrm_policy_lock);// 根据索引号定位链表 chain = xfrm_policy_byidx + idx_hash(id); ret = NULL;// 遍历链表 hlist_for_each_entry(pol, entry, chain, byidx) {// 策略的类型和索引号相同  if (pol->type == type && pol->index == id) {   xfrm_pol_hold(pol);// 如果要删除, 将策略节点从链表中删除   if (delete) {    hlist_del(&pol->bydst);    hlist_del(&pol->byidx);    xfrm_policy_count[dir]--;   }   ret = pol;   break;  } } write_unlock_bh(&xfrm_policy_lock); if (ret && delete) {// 增加genid  atomic_inc(&flow_cache_genid);// 将策略状态置为dead, 并添加到系统的策略垃圾链表进行调度处理准备删除  xfrm_policy_kill(ret); } return ret;}EXPORT_SYMBOL(xfrm_policy_byid);5.4.3 根据路由查找策略// 参数fl是路由相关的结构, 常用于路由查找中// 注意返回值是整数, 0成功, 非0失败, 找到的策略通过参数objp进行传递static int xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,          void **objp, atomic_t **obj_refp){ struct xfrm_policy *pol; int err = 0;#ifdef CONFIG_XFRM_SUB_POLICY// 子策略查找, 属于Linux自己的扩展功能, 非标准功能 pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_SUB, fl, family, dir); if (IS_ERR(pol)) {  err = PTR_ERR(pol);  pol = NULL; } if (pol || err)  goto end;#endif// 查找MAIN类型的策略 pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, fl, family, dir); if (IS_ERR(pol)) {  err = PTR_ERR(pol);  pol = NULL; }#ifdef CONFIG_XFRM_SUB_POLICYend:#endif// 将找到的策略赋值给objp返回 if ((*objp = (void *) pol) != NULL)  *obj_refp = &pol->refcnt; return err;}// 按类型查找策略static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl,           u16 family, u8 dir){ int err; struct xfrm_policy *pol, *ret; xfrm_address_t *daddr, *saddr; struct hlist_node *entry; struct hlist_head *chain; u32 priority = ~0U;// 由流结构的目的和源地址 daddr = xfrm_flowi_daddr(fl, family); saddr = xfrm_flowi_saddr(fl, family); if (unlikely(!daddr || !saddr))  return NULL; read_lock_bh(&xfrm_policy_lock);// 根据地址信息查找HASH链表 chain = policy_hash_direct(daddr, saddr, family, dir); ret = NULL;// 循环HASH链表 hlist_for_each_entry(pol, entry, chain, bydst) {// 检查流结构,类型和协议族是否匹配策略, 返回0表示匹配  err = xfrm_policy_match(pol, fl, type, family, dir);  if (err) {   if (err == -ESRCH)    continue;   else {    ret = ERR_PTR(err);    goto fail;   }  } else {// 备份找到的策略和优先级   ret = pol;   priority = ret->priority;   break;  } }// 再在inexact链表中查找策略, 如果也找到策略, 而且优先级更小,// 将新找到的策略替代前面找到的策略 chain = &xfrm_policy_inexact[dir];// 循环HASH链表 hlist_for_each_entry(pol, entry, chain, bydst) {// 检查流结构,类型和协议族是否匹配策略, 返回0表示匹配  err = xfrm_policy_match(pol, fl, type, family, dir);  if (err) {   if (err == -ESRCH)    continue;   else {    ret = ERR_PTR(err);    goto fail;   }  } else if (pol->priority < priority) {// 如果新找到的策略优先级更小, 将其取代原来找到的策略   ret = pol;   break;  } } if (ret)  xfrm_pol_hold(ret);fail: read_unlock_bh(&xfrm_policy_lock); return ret;}// 检查xfrm策略是否和流参数匹配// 返回0表示匹配成功static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl,        u8 type, u16 family, int dir){// 选择子 struct xfrm_selector *sel = &pol->selector; int match, ret = -ESRCH;// 检查策略协议族和类型是否匹配 if (pol->family != family ||     pol->type != type)  return ret;// 检查选择子是否匹配, 返回非0值表示匹配成功 match = xfrm_selector_match(sel, fl, family); if (match)// 这种security函数可以不用考虑, 当作返回0的函数即可  ret = security_xfrm_policy_lookup(pol, fl->secid, dir); return ret;}// 选择子匹配,分别对IPV4和IPV6协议族比较static inline intxfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl,      unsigned short family){ switch (family) { case AF_INET:  return __xfrm4_selector_match(sel, fl); case AF_INET6:  return __xfrm6_selector_match(sel, fl); } return 0;}//IPV4协议族选择子比较static inline int__xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl){// 比较V4目的地址, V4源地址, 目的端口, 源端口, 协议, 网卡索引号 return  addr_match(&fl->fl4_dst, &sel->daddr, sel->prefixlen_d) &&  addr_match(&fl->fl4_src, &sel->saddr, sel->prefixlen_s) &&  !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) &&  !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) &&  (fl->proto == sel->proto || !sel->proto) &&  (fl->oif == sel->ifindex || !sel->ifindex);}//IPV6协议族选择子比较static inline int__xfrm6_selector_match(struct xfrm_selector *sel, struct flowi *fl){// 比较V6目的地址, V6源地址, 目的端口, 源端口, 协议, 网卡索引号 return  addr_match(&fl->fl6_dst, &sel->daddr, sel->prefixlen_d) &&  addr_match(&fl->fl6_src, &sel->saddr, sel->prefixlen_s) &&  !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) &&  !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) &&  (fl->proto == sel->proto || !sel->proto) &&  (fl->oif == sel->ifindex || !sel->ifindex);}5.4.4 查找和sock对应的策略static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl){ struct xfrm_policy *pol; read_lock_bh(&xfrm_policy_lock);// sock结构中有sk_policy用来指向双向数据的安全策略 if ((pol = sk->sk_policy[dir]) != NULL) {// 检查该策略的选择子是否和流结构匹配   int match = xfrm_selector_match(&pol->selector, fl,      sk->sk_family);   int err = 0;// 如果匹配的话将策略作为结果返回  if (match) {// 这个security函数可视为返回0的空函数   err = security_xfrm_policy_lookup(pol, fl->secid,     policy_to_flow_dir(dir));   if (!err)    xfrm_pol_hold(pol);   else if (err == -ESRCH)    pol = NULL;   else    pol = ERR_PTR(err);  } else   pol = NULL; } read_unlock_bh(&xfrm_policy_lock); return pol;}5.5 遍历安全策略该函数被pfkey_spddump()等函数中调用// func函数用来指定对遍历的策略进行的查找// 实际遍历了两次所有策略int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*),       void *data){ struct xfrm_policy *pol; struct hlist_node *entry; int dir, count, error; read_lock_bh(&xfrm_policy_lock); count = 0;// 先统计符合类型的策略的总数量, 方向是双向的 for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {  struct hlist_head *table = xfrm_policy_bydst[dir].table;  int i;// inexact HASH表  hlist_for_each_entry(pol, entry,         &xfrm_policy_inexact[dir], bydst) {   if (pol->type == type)    count++;  }// 遍历按地址HASH的链表  for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {// 遍历链表   hlist_for_each_entry(pol, entry, table + i, bydst) {    if (pol->type == type)     count++;   }  } } if (count == 0) {  error = -ENOENT;  goto out; }// 重新遍历HASH表, 当前的count值作为SA的序号, 因此用户空间收到的序号是递减的 for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {  struct hlist_head *table = xfrm_policy_bydst[dir].table;  int i;// 遍历inexact链表  hlist_for_each_entry(pol, entry,         &xfrm_policy_inexact[dir], bydst) {   if (pol->type != type)    continue;// 对符合类型的策略调用func函数   error = func(pol, dir % XFRM_POLICY_MAX, --count, data);   if (error)    goto out;  }// 遍历按地址HASH的链表  for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {   hlist_for_each_entry(pol, entry, table + i, bydst) {    if (pol->type != type)     continue;// 对符合类型的策略调用func函数, 当count递减到0时表示是最后一个策略了    error = func(pol, dir % XFRM_POLICY_MAX, --count, data);    if (error)     goto out;   }  } } error = 0;out: read_unlock_bh(&xfrm_policy_lock); return error;}EXPORT_SYMBOL(xfrm_policy_walk); 5.5 策略检查__xfrm_policy_check函数也是一个比较重要的函数, 被xfrm_policy_check()调用, 又被xfrm4_policy_check()和xfrm6_policy_check()调用, 而这两个函数在网络层的输入和转发处调用.对普通包就返回合法, 对IPSEC包检查策略是否合法, 是否和路由方向匹配// 返回1表示合法, 0表示不合法, 对于该函数返回0的数据包通常是被丢弃int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,   unsigned short family){ struct xfrm_policy *pol; struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; int npols = 0; int xfrm_nr; int pi; struct flowi fl;// 将策略方向转换为流方向, 其实值是一样的 u8 fl_dir = policy_to_flow_dir(dir); int xerr_idx = -1;// 调用协议族的decode_session()函数, 对IPV4来说就是_decode_session4// 将skb中的地址端口等信息填入流结构fl中 if (xfrm_decode_session(skb, &fl, family) < 0)  return 0;// 如果内核支持NETFILTER, 将调用ip_nat_decode_session函数填写NAT信息// 否则的话就是个空函数 nf_nat_decode_session(skb, &fl, family); /* First, check used SA against their selectors. */ if (skb->sp) {// 该包是进行了解密后的IPSEC包  int i;  for (i=skb->sp->len-1; i>=0; i--) {// 获取该包相关的SA信息   struct xfrm_state *x = skb->sp->xvec[i];// 检查SA选择子和流参数(路由)是否匹配, 结果为0表示不匹配, 不匹配的话返回   if (!xfrm_selector_match(&x->sel, &fl, family))    return 0;  } } pol = NULL;// 如果sock结构中有策略 if (sk && sk->sk_policy[dir]) {// 检查策略是否和流结构匹配, 匹配的话返回策略  pol = xfrm_sk_policy_lookup(sk, dir, &fl);  if (IS_ERR(pol))   return 0; }// 查找路由信息, 如果没有就创建路由, xfrm_policy_lookup()函数作为参数传递给// flow_cache_lookup()函数, 查找和该路由对应的安全策略 if (!pol)  pol = flow_cache_lookup(&fl, family, fl_dir,     xfrm_policy_lookup);// 查找过程中出错,返回0 if (IS_ERR(pol))  return 0;// 策略不存在 if (!pol) {// 如果该包是IPSEC包而且安全路径中的SA不是传输模式,// 转发时, 对于已经封装的包没必要再次封装;// 输入时, 是自身的IPSEC通信包封装基本也无意义  if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) {// 拒绝该安全路径, 返回0失败   xfrm_secpath_reject(xerr_idx, skb, &fl);   return 0;  }// 普通包处理, 安全策略不存在, 返回1  return 1; }// 找到安全策略, 对该包要根据策略进行IPSEC处理// 更新策略当前使用时间 pol->curlft.use_time = (unsigned long)xtime.tv_sec; pols[0] = pol; npols ++;#ifdef CONFIG_XFRM_SUB_POLICY// 如果定义了子策略的话极限查找子策略, 这是标准IPSEC中没定义的, 可以不考虑 if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {  pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN,          &fl, family,          XFRM_POLICY_IN);  if (pols[1]) {   if (IS_ERR(pols[1]))    return 0;   pols[1]->curlft.use_time = (unsigned long)xtime.tv_sec;   npols ++;  } }#endif// 策略动作是允许通过 if (pol->action == XFRM_POLICY_ALLOW) {  struct sec_path *sp;// 先伪造个安全路径  static struct sec_path dummy;  struct xfrm_tmpl *tp[XFRM_MAX_DEPTH];  struct xfrm_tmpl *stp[XFRM_MAX_DEPTH];  struct xfrm_tmpl **tpp = tp;  int ti = 0;  int i, k;// 如果数据包没有安全路径, 路径指针初始化为伪造的安全路径  if ((sp = skb->sp) == NULL)   sp = &dummy;// 遍历策略数组, 包括主策略和子策略(内核支持子策略的话),一般情况下就一个策略  for (pi = 0; pi < npols; pi++) {// 如果有非允许通过的其他安全策略, 放弃   if (pols[pi] != pol &&       pols[pi]->action != XFRM_POLICY_ALLOW)    goto reject;// 如果策略层次太多, 放弃   if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH)    goto reject_error;// 备份策略中的xfrm向量模板, ti是数量   for (i = 0; i < pols[pi]->xfrm_nr; i++)    tpp[ti++] = &pols[pi]->xfrm_vec[i];  }// 策略数量  xfrm_nr = ti;  if (npols > 1) {// 如果超过一个策略,进行排序, 只是在内核支持子系统时才用, 否则只是返回错误// 但该错误可以忽略   xfrm_tmpl_sort(stp, tpp, xfrm_nr, family);   tpp = stp;  }  /* For each tunnel xfrm, find the first matching tmpl.   * For each tmpl before that, find corresponding xfrm.   * Order is _important_. Later we will implement   * some barriers, but at the moment barriers   * are implied between each two transformations.   */// 遍历检查策略模板是否OK  for (i = xfrm_nr-1, k = 0; i >= 0; i--) {// 注意k既是输入, 也是输出值, k初始化为0// 返回值大于等于0表示策略合法可用   k = xfrm_policy_ok(tpp[i], sp, k, family);   if (k < 0) {    if (k < -1)     /* "-2 - errored_index" returned */     xerr_idx = -(2+k);    goto reject;   }  }// 存在非传输模式的策略, 放弃  if (secpath_has_nontransport(sp, k, &xerr_idx))   goto reject;  xfrm_pols_put(pols, npols);  return 1; }// 放弃, 返回0表示检查不通过reject: xfrm_secpath_reject(xerr_idx, skb, &fl);reject_error: xfrm_pols_put(pols, npols); return 0;}EXPORT_SYMBOL(__xfrm_policy_check); /* * 0 or more than 0 is returned when validation is succeeded (either bypass * because of optional transport mode, or next index of the mathced secpath * state with the template. * -1 is returned when no matching template is found. * Otherwise "-2 - errored_index" is returned. */static inline intxfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start,        unsigned short family){ int idx = start; if (tmpl->optional) {// 如果是传输模式, 直接返回  if (tmpl->mode == XFRM_MODE_TRANSPORT)   return start; } else  start = -1; for (; idx < sp->len; idx++) {// sp->xvec是xfrm状态// 如果安全路径和模板匹配,返回索引位置  if (xfrm_state_ok(tmpl, sp->xvec[idx], family))   return ++idx;// 如果安全路径中的SA不是传输模式,返回错误  if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) {   if (start == -1)    start = -2-idx;   break;  } } return start;}5.6 安全策略路由查找 xfrm_lookup函数是个非常重要的函数, 用来根据安全策略构造数据包的路由项链表, 该路由项链表反映了对数据包进行IPSEC封装的多层次的处理, 每封装一次, 就增加一个路由项.该函数被路由查找函数ip_route_output_flow()调用, 针对的是转发或发出的数据包./* Main function: finds/creates a bundle for given flow. * * At the moment we eat a raw IP route. Mostly to speed up lookups * on interfaces with disabled IPsec. */// 返回0表示超过, 负数表示失败int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,  struct sock *sk, int flags){ struct xfrm_policy *policy; struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; int npols; int pol_dead; int xfrm_nr; int pi; struct xfrm_state *xfrm[XFRM_MAX_DEPTH]; struct dst_entry *dst, *dst_orig = *dst_p; int nx = 0; int err; u32 genid; u16 family; u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);restart:// 初始化清零操作 genid = atomic_read(&flow_cache_genid); policy = NULL; for (pi = 0; pi < ARRAY_SIZE(pols); pi++)  pols[pi] = NULL; npols = 0; pol_dead = 0; xfrm_nr = 0; if (sk && sk->sk_policy[1]) {// 如果在sock中定义了安全策略, 查找该sock相关的策略// 一个socket的安全策略可通过setsockopt()设置, socket选项为// IP_IPSEC_POLICY或IP_XFRM_POLICY(net/ipv4/ip_sockglue.c)  policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);  if (IS_ERR(policy))   return PTR_ERR(policy); } if (!policy) {// 没找到sock自身定义的安全策略  /* To accelerate a bit...  */// 如果初始路由中设置了非IPSEC标志或没有发出方向的安全策略, 直接返回  if ((dst_orig->flags & DST_NOXFRM) ||      !xfrm_policy_count[XFRM_POLICY_OUT])   return 0;// 查找路由信息, 如果没有就创建路由, xfrm_policy_lookup()函数作为参数传递给// flow_cache_lookup()函数, 查找和该路由对应的安全策略  policy = flow_cache_lookup(fl, dst_orig->ops->family,        dir, xfrm_policy_lookup);  if (IS_ERR(policy))   return PTR_ERR(policy); }// 找不到策略的话返回, 就是普通包普通路由项 if (!policy)  return 0;// 以下是存在安全策略的情况, 要对该包建立安全路由链表// 初始路由的协议族 family = dst_orig->ops->family;// 安全策略最近使用时间 policy->curlft.use_time = (unsigned long)xtime.tv_sec;// 将找到的策略作为策略数组的第一项 pols[0] = policy; npols ++; xfrm_nr += pols[0]->xfrm_nr;// 根据策略操作结果进行相关处理, 只有两种情况: 阻塞或通过 switch (policy->action) { case XFRM_POLICY_BLOCK:// 阻塞该数据包, 返回错误  /* Prohibit the flow */  err = -EPERM;  goto error; case XFRM_POLICY_ALLOW:// 允许该包通过, 这样就要对该包进行IPSEC处理#ifndef CONFIG_XFRM_SUB_POLICY// 对子策略操作忽略  if (policy->xfrm_nr == 0) {   /* Flow passes not transformed. */   xfrm_pol_put(policy);   return 0;  }#endif  /* Try to find matching bundle.   *   * LATER: help from flow cache. It is optional, this   * is required only for output policy.   */// 查找是否已经存在安全路由, bundle可以理解为描述安全处理的安全路由, 数据包走该路由// 就是进行某种安全封装, 和普通路由项一样, 用过的安全路由也被缓存起来  dst = xfrm_find_bundle(fl, policy, family);  if (IS_ERR(dst)) {   err = PTR_ERR(dst);   goto error;  }// 如果找到安全路由, 退出switch  if (dst)   break;#ifdef CONFIG_XFRM_SUB_POLICY// 对子策略操作, 由于是非标准IPSEC,忽略  if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {   pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN,           fl, family,           XFRM_POLICY_OUT);   if (pols[1]) {    if (IS_ERR(pols[1])) {     err = PTR_ERR(pols[1]);     goto error;    }    if (pols[1]->action == XFRM_POLICY_BLOCK) {     err = -EPERM;     goto error;    }    npols ++;    xfrm_nr += pols[1]->xfrm_nr;   }  }  /*   * Because neither flowi nor bundle information knows about   * transformation template size. On more than one policy usage   * we can realize whether all of them is bypass or not after   * they are searched. See above not-transformed bypass   * is surrounded by non-sub policy configuration, too.   */  if (xfrm_nr == 0) {   /* Flow passes not transformed. */   xfrm_pols_put(pols, npols);   return 0;  }#endif// 没找到安全路由, 准备构造新的路由项// 利用策略, 流等参数构造相关SA(xfrm_state)保存在xfrm中, nx为SA数量  nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);  if (unlikely(nx<0)) {// nx<0表示失败, 没找到SA// 但如果是-EAGAIN表示已经通知用户空间的IKE进行协商新的SA了,// 目前只生成了ACQUIRE类型的xfrm_state   err = nx;   if (err == -EAGAIN && flags) {// 进程进入阻塞状态    DECLARE_WAITQUEUE(wait, current);    add_wait_queue(&km_waitq, &wait);    set_current_state(TASK_INTERRUPTIBLE);    schedule();    set_current_state(TASK_RUNNING);    remove_wait_queue(&km_waitq, &wait);// 阻塞解除, 重新解析SA    nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);    if (nx == -EAGAIN && signal_pending(current)) {     err = -ERESTART;     goto error;    }    if (nx == -EAGAIN ||        genid != atomic_read(&flow_cache_genid)) {     xfrm_pols_put(pols, npols);     goto restart;    }    err = nx;   }   if (err < 0)    goto error;  }  if (nx == 0) {// nx==0表示数据是不需要进行IPSEC处理的, 返回   /* Flow passes not transformed. */   xfrm_pols_put(pols, npols);   return 0;  }// 保存初始路由  dst = dst_orig;// 创建新的安全路由, 返回0 表示成功, 失败返回负数// dst在成功返回时保存安全路由项, 每个SA处理对应一个安全路由, 这些安全路由通过// 路由项中的child链接为一个链表, 这样就可以对数据包进行连续变换, 如先压缩,// 再ESP封装, 再AH封装等.// 路由项链表的构造和协议族相关, 后续文章中介绍具体协议族中的实现时再详细描述// 所构造出的路由项的具体结构情况  err = xfrm_bundle_create(policy, xfrm, nx, fl, &dst, family);  if (unlikely(err)) {// 失败的话释放刚获取的SA   int i;   for (i=0; i<nx; i++)    xfrm_state_put(xfrm[i]);   goto error;  }// 检查所有策略的dead状态  for (pi = 0; pi < npols; pi++) {   read_lock_bh(&pols[pi]->lock);   pol_dead |= pols[pi]->dead;   read_unlock_bh(&pols[pi]->lock);  }  write_lock_bh(&policy->lock);// 如果有策略是dead或获取的安全路由项有问题, 释放安全路由  if (unlikely(pol_dead || stale_bundle(dst))) {   /* Wow! While we worked on resolving, this    * policy has gone. Retry. It is not paranoia,    * we just cannot enlist new bundle to dead object.    * We can't enlist stable bundles either.    */   write_unlock_bh(&policy->lock);   if (dst)    dst_free(dst);   err = -EHOSTUNREACH;   goto error;  }// 将安全路由加入到策略的路由项链表头, 该链表是以NULL结尾的单向链表// 不过一般情况下应该只有一个元素  dst->next = policy->bundles;  policy->bundles = dst;  dst_hold(dst);  write_unlock_bh(&policy->lock); }// 将安全链表作为 *dst_p = dst; dst_release(dst_orig);  xfrm_pols_put(pols, npols); return 0;error: dst_release(dst_orig); xfrm_pols_put(pols, npols); *dst_p = NULL; return err;}EXPORT_SYMBOL(xfrm_lookup);以下是在xfrm_lookup中用到的两个bundle的操作函数: 查找和创建, 由于使用了地址参数, 是和协议族相关的, 因此具体实现是在各协议族中实现的, 在后续文章中介绍协议族中的xfrm实现时再详细介绍.static struct dst_entry *xfrm_find_bundle(struct flowi *fl, struct xfrm_policy *policy, unsigned short family){ struct dst_entry *x; struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); if (unlikely(afinfo == NULL))  return ERR_PTR(-EINVAL); x = afinfo->find_bundle(fl, policy); xfrm_policy_put_afinfo(afinfo); return x;}/* Allocate chain of dst_entry's, attach known xfrm's, calculate * all the metrics... Shortly, bundle a bundle. */static intxfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx,     struct flowi *fl, struct dst_entry **dst_p,     unsigned short family){ int err; struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); if (unlikely(afinfo == NULL))  return -EINVAL; err = afinfo->bundle_create(policy, xfrm, nx, fl, dst_p); xfrm_policy_put_afinfo(afinfo); return err;}// 策略解析, 生成SAstatic intxfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl,    struct xfrm_state **xfrm,    unsigned short family){ struct xfrm_state *tp[XFRM_MAX_DEPTH];// npols > 1是定义了子策略的情况, 这时用tp数组保存找到的SA, 但没法返回原函数中了// 不明白为什么这么作 struct xfrm_state **tpp = (npols > 1) ? tp : xfrm; int cnx = 0; int error; int ret; int i;// 遍历策略, 一般情况下npols其实只是1 for (i = 0; i < npols; i++) {// 检查保存SA的缓冲区是否还够大  if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) {   error = -ENOBUFS;   goto fail;  }// 协议一个策略模板  ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family);  if (ret < 0) {   error = ret;   goto fail;  } else   cnx += ret; } /* found states are sorted for outbound processing */// 多个策略的话对找到的SA排序, 在没定义子策略的情况下是个空函数 if (npols > 1)  xfrm_state_sort(xfrm, tpp, cnx, family); return cnx; fail: for (cnx--; cnx>=0; cnx--)  xfrm_state_put(tpp[cnx]); return error;}/* Resolve list of templates for the flow, given policy. */static intxfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl,        struct xfrm_state **xfrm,        unsigned short family){ int nx; int i, error;// 从流结构中获取地址信息 xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family); xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family); xfrm_address_t tmp;// 遍历策略中的所有SA for (nx=0, i = 0; i < policy->xfrm_nr; i++) {  struct xfrm_state *x;  xfrm_address_t *remote = daddr;  xfrm_address_t *local  = saddr;  struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];  if (tmpl->mode == XFRM_MODE_TUNNEL) {// 如果是通道模式, 会添加外部IP头, 内部IP头都封装在内部, 因此地址信息使用外部地址// 即策略的SA模板中的地址信息   remote = &tmpl->id.daddr;   local = &tmpl->saddr;// 如果local地址没定义, 选取个源地址作为本地地址, 选取过程是协议族相关的   if (xfrm_addr_any(local, family)) {    error = xfrm_get_saddr(&tmp, remote, family);    if (error)     goto fail;    local = &tmp;   }  }// 根据地址,流,策略等新查找SA(xfrm_state),如果找不到现成的会通知IKE程序进行协商// 生成新的SA, 但生成可用SA前先返回ACQUIRE类型的SA, 见前一篇文章  x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);  if (x && x->km.state == XFRM_STATE_VALID) {// 如果SA是合法, 保存   xfrm[nx++] = x;   daddr = remote;   saddr = local;   continue;  }  if (x) {// x存在但不是VALID的, 只要不出错, 应该是ACQUIRE类型的, 等IKE进程协商结果, 返回-EAGAIN   error = (x->km.state == XFRM_STATE_ERROR ?     -EINVAL : -EAGAIN);   xfrm_state_put(x);  }  if (!tmpl->optional)   goto fail; } return nx;fail: for (nx--; nx>=0; nx--)  xfrm_state_put(xfrm[nx]); return error;}关于路由处理过程在后面介绍IPSEC包的发出过程时会介绍路由处理过程, 从而了解安全路由的作用. 5.6 变更HASH表大小改变策略状态表的是通过工作队列来实现的, 和xfrm_state类似工作定义:static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize, NULL);// 更改HASH表大小static void xfrm_hash_resize(void *__unused){ int dir, total; mutex_lock(&hash_resize_mutex); total = 0;// 注意策略都是双向的 for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {// 按目的地址进行HASH的链表: 如果需要更改HASH表大小, 修改之  if (xfrm_bydst_should_resize(dir, &total))   xfrm_bydst_resize(dir); }// 按索引号进行HASH的链表更新 if (xfrm_byidx_should_resize(total))  xfrm_byidx_resize(total); mutex_unlock(&hash_resize_mutex);}// 检查按目的地址HASH的HASH链表static inline int xfrm_bydst_should_resize(int dir, int *total){// 该方向是策略的数量 unsigned int cnt = xfrm_policy_count[dir];// 该方向是策略的掩码 unsigned int hmask = xfrm_policy_bydst[dir].hmask;// 累加策略数量 if (total)  *total += cnt;// 如果策略数量大于策略掩码量, 该增加了 if ((hmask + 1) < xfrm_policy_hashmax &&     cnt > hmask)  return 1;// 否则不用 return 0;}// 检查按索引号HASH的HASH链表static inline int xfrm_byidx_should_resize(int total){ unsigned int hmask = xfrm_idx_hmask;// 策略总量超过当前的索引号掩码, 该扩大了 if ((hmask + 1) < xfrm_policy_hashmax &&     total > hmask)  return 1; return 0;}// 更改按目的地址HASH的HASH链表大小static void xfrm_bydst_resize(int dir){// 该方向的HASH表掩码(最大值, 一般是2^N-1) unsigned int hmask = xfrm_policy_bydst[dir].hmask;// 新HASH表掩码(2^(N+1)-1) unsigned int nhashmask = xfrm_new_hash_mask(hmask);// 新HASH表大小 unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);// 老HAHS表 struct hlist_head *odst = xfrm_policy_bydst[dir].table;// 新HASH表 struct hlist_head *ndst = xfrm_hash_alloc(nsize); int i;// 新HASH表空间分配不出来, 返回 if (!ndst)  return; write_lock_bh(&xfrm_policy_lock);// 将所有策略节点转到新HASH表 for (i = hmask; i >= 0; i--)  xfrm_dst_hash_transfer(odst + i, ndst, nhashmask);// 将全局变量值更新为新HASH表参数 xfrm_policy_bydst[dir].table = ndst; xfrm_policy_bydst[dir].hmask = nhashmask; write_unlock_bh(&xfrm_policy_lock);// 释放老HASH表参数 xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));}// 更改按索引号HASH的HASH链表大小, 操作和上面类似static void xfrm_byidx_resize(int total){ unsigned int hmask = xfrm_idx_hmask; unsigned int nhashmask = xfrm_new_hash_mask(hmask); unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); struct hlist_head *oidx = xfrm_policy_byidx; struct hlist_head *nidx = xfrm_hash_alloc(nsize); int i; if (!nidx)  return; write_lock_bh(&xfrm_policy_lock); for (i = hmask; i >= 0; i--)  xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask); xfrm_policy_byidx = nidx; xfrm_idx_hmask = nhashmask; write_unlock_bh(&xfrm_policy_lock); xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head));} 5.7 垃圾搜集 垃圾搜集的是不用的安全路由项, 是和协议族相关的afinfo->garbage_collect = __xfrm_garbage_collect;// 就是xfrm_prune_bundles()函数的包装函数,条件是unused_bundle()函数定义static void __xfrm_garbage_collect(void){ xfrm_prune_bundles(unused_bundle);}// 删减安全路由static void xfrm_prune_bundles(int (*func)(struct dst_entry *)){// 垃圾链表 struct dst_entry *gc_list = NULL; int dir; read_lock_bh(&xfrm_policy_lock);// 循环所有方向 for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {  struct xfrm_policy *pol;  struct hlist_node *entry;  struct hlist_head *table;  int i;// 遍历inexact链表  hlist_for_each_entry(pol, entry,         &xfrm_policy_inexact[dir], bydst)// 如果节点满足条件就删除挂接到垃圾链表   prune_one_bundle(pol, func, &gc_list);// 遍历目的地址HASH的链表  table = xfrm_policy_bydst[dir].table;  for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {// 如果节点满足条件就删除挂接到垃圾链表   hlist_for_each_entry(pol, entry, table + i, bydst)    prune_one_bundle(pol, func, &gc_list);  } } read_unlock_bh(&xfrm_policy_lock);// 如果搜集到的垃圾, 释放安全路由 while (gc_list) {  struct dst_entry *dst = gc_list;  gc_list = dst->next;  dst_free(dst); }}// 没用的路由, 使用数为0static int unused_bundle(struct dst_entry *dst){ return !atomic_read(&dst->__refcnt);}// 删除单个路由static void prune_one_bundle(struct xfrm_policy *pol, int (*func)(struct dst_entry *), struct dst_entry **gc_list_p){ struct dst_entry *dst, **dstp;// 策略写锁 write_lock(&pol->lock);// 策略的路由项链表起点 dstp = &pol->bundles;// 遍历链表 while ((dst=*dstp) != NULL) {  if (func(dst)) {// 如果满足条件, 将节点从链表中删除, 添加到垃圾链表   *dstp = dst->next;   dst->next = *gc_list_p;   *gc_list_p = dst;  } else {   dstp = &dst->next;  } } write_unlock(&pol->lock);}5.8 杂项这些杂项并不是策略的直接处理函数, 而是xfrm的一些相关处理, 只是也放在xfrm_policy.c中了.5.8.1 协议处理类型处理xfrm_type用来定义各种协议处理类型, 如AH,ESP, IPCOMP, IPIP等// 登记协议处理类型, 返回0成功, 非0失败int xfrm_register_type(struct xfrm_type *type, unsigned short family){// 找到协议族相关的策略信息结构 struct xfrm_policy_afinfo *afinfo = xfrm_policy_lock_afinfo(family); struct xfrm_type **typemap; int err = 0; if (unlikely(afinfo == NULL))  return -EAFNOSUPPORT;// 策略信息结构中的类型数组 typemap = afinfo->type_map;// 如果数组中相应协议对应元素非空, 则赋值, 否则发生错误 if (likely(typemap[type->proto] == NULL))  typemap[type->proto] = type; else  err = -EEXIST; xfrm_policy_unlock_afinfo(afinfo); return err;}EXPORT_SYMBOL(xfrm_register_type);// 拆除协议处理类型, 返回0成功, 非0失败int xfrm_unregister_type(struct xfrm_type *type, unsigned short family){// 找到协议族相关的策略信息结构 struct xfrm_policy_afinfo *afinfo = xfrm_policy_lock_afinfo(family); struct xfrm_type **typemap; int err = 0; if (unlikely(afinfo == NULL))  return -EAFNOSUPPORT;// 策略信息结构中的类型数组 typemap = afinfo->type_map;// 如果数组中相应协议对应元素等于要删除的结构, 元素清空, 否则发生错误 if (unlikely(typemap[type->proto] != type))  err = -ENOENT; else  typemap[type->proto] = NULL; xfrm_policy_unlock_afinfo(afinfo); return err;}EXPORT_SYMBOL(xfrm_unregister_type);// 根据协议号和协议族查找类型struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family){ struct xfrm_policy_afinfo *afinfo; struct xfrm_type **typemap; struct xfrm_type *type; int modload_attempted = 0;retry:// 找到协议族相关的策略信息结构 afinfo = xfrm_policy_get_afinfo(family); if (unlikely(afinfo == NULL))  return NULL;// 策略信息结构中的类型数组 typemap = afinfo->type_map;// 数组中对应指定协议的元素 type = typemap[proto];// 增加type模块的使用计数 if (unlikely(type && !try_module_get(type->owner)))  type = NULL;// 如果当前type为空, 则加载type的内核模块, 重新查找 if (!type && !modload_attempted) {  xfrm_policy_put_afinfo(afinfo);  request_module("xfrm-type-%d-%d",          (int) family, (int) proto);  modload_attempted = 1;  goto retry; } xfrm_policy_put_afinfo(afinfo); return type;}// 释放类型模块使用计数void xfrm_put_type(struct xfrm_type *type){ module_put(type->owner);} 5.8.2 协议模式处理模式目前包括通道和传输两种.// 登记模式, 返回0成功, 非0失败int xfrm_register_mode(struct xfrm_mode *mode, int family){ struct xfrm_policy_afinfo *afinfo; struct xfrm_mode **modemap; int err; if (unlikely(mode->encap >= XFRM_MODE_MAX))  return -EINVAL;// 找到协议族相关的策略信息结构 afinfo = xfrm_policy_lock_afinfo(family); if (unlikely(afinfo == NULL))  return -EAFNOSUPPORT; err = -EEXIST;// 策略信息结构中的模式数组 modemap = afinfo->mode_map;// 数组元素非空的话赋值, 返回成功 if (likely(modemap[mode->encap] == NULL)) {  modemap[mode->encap] = mode;  err = 0; } xfrm_policy_unlock_afinfo(afinfo); return err;}EXPORT_SYMBOL(xfrm_register_mode);// 拆除模式, 返回0成功, 非0失败int xfrm_unregister_mode(struct xfrm_mode *mode, int family){ struct xfrm_policy_afinfo *afinfo; struct xfrm_mode **modemap; int err; if (unlikely(mode->encap >= XFRM_MODE_MAX))  return -EINVAL;// 找到协议族相关的策略信息结构 afinfo = xfrm_policy_lock_afinfo(family); if (unlikely(afinfo == NULL))  return -EAFNOSUPPORT; err = -ENOENT;// 策略信息结构中的模式数组 modemap = afinfo->mode_map;// 数组元素等于要拆除的模式, 清空, 返回成功 if (likely(modemap[mode->encap] == mode)) {  modemap[mode->encap] = NULL;  err = 0; } xfrm_policy_unlock_afinfo(afinfo); return err;}EXPORT_SYMBOL(xfrm_unregister_mode);// 查找模式struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family){ struct xfrm_policy_afinfo *afinfo; struct xfrm_mode *mode; int modload_attempted = 0; if (unlikely(encap >= XFRM_MODE_MAX))  return NULL;retry:// 找到协议族相关的策略信息结构 afinfo = xfrm_policy_get_afinfo(family); if (unlikely(afinfo == NULL))  return NULL;// 策略信息结构中的模式数组 mode = afinfo->mode_map[encap];// 增加模式模块的使用计数 if (unlikely(mode && !try_module_get(mode->owner)))  mode = NULL;// 如果当前模式为空, 则加载模式对应的内核模块, 重新查找 if (!mode && !modload_attempted) {  xfrm_policy_put_afinfo(afinfo);  request_module("xfrm-mode-%d-%d", family, encap);  modload_attempted = 1;  goto retry; } xfrm_policy_put_afinfo(afinfo); return mode;}// 释放模式模块使用计数void xfrm_put_mode(struct xfrm_mode *mode){ module_put(mode->owner);}5.8.3 协议信息处理// 登记协议信息结构int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo){ int err = 0; if (unlikely(afinfo == NULL))  return -EINVAL; if (unlikely(afinfo->family >= NPROTO))  return -EAFNOSUPPORT; write_lock_bh(&xfrm_policy_afinfo_lock);// 数组中的对应协议的协议信息结构元素应该为空 if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL))  err = -ENOBUFS; else {// 安全路由操作结构  struct dst_ops *dst_ops = afinfo->dst_ops;// 安全路由操作结构的参数和操作函数赋值  if (likely(dst_ops->kmem_cachep == NULL))   dst_ops->kmem_cachep = xfrm_dst_cache;  if (likely(dst_ops->check == NULL))   dst_ops->check = xfrm_dst_check;  if (likely(dst_ops->negative_advice == NULL))   dst_ops->negative_advice = xfrm_negative_advice;  if (likely(dst_ops->link_failure == NULL))   dst_ops->link_failure = xfrm_link_failure;  if (likely(afinfo->garbage_collect == NULL))   afinfo->garbage_collect = __xfrm_garbage_collect;// 数组中的对应协议的协议信息结构元素填为协议信息结构  xfrm_policy_afinfo[afinfo->family] = afinfo; } write_unlock_bh(&xfrm_policy_afinfo_lock); return err;}EXPORT_SYMBOL(xfrm_policy_register_afinfo);// 拆除协议信息结构int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo){ int err = 0; if (unlikely(afinfo == NULL))  return -EINVAL; if (unlikely(afinfo->family >= NPROTO))  return -EAFNOSUPPORT; write_lock_bh(&xfrm_policy_afinfo_lock); if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) {// 数组中的协议信息结构等于指定的信息结构  if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo))   err = -EINVAL;  else {// 清空协议信息数组元素和路由操作结构参数   struct dst_ops *dst_ops = afinfo->dst_ops;   xfrm_policy_afinfo[afinfo->family] = NULL;   dst_ops->kmem_cachep = NULL;   dst_ops->check = NULL;   dst_ops->negative_advice = NULL;   dst_ops->link_failure = NULL;   afinfo->garbage_collect = NULL;  } } write_unlock_bh(&xfrm_policy_afinfo_lock); return err;}EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);// 查找协议信息结构, 加读锁static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family){ struct xfrm_policy_afinfo *afinfo; if (unlikely(family >= NPROTO))  return NULL; read_lock(&xfrm_policy_afinfo_lock);// 获取指定协议位置处的协议信息结构 afinfo = xfrm_policy_afinfo[family];// 如果该协议信息结构不存在, 解锁 if (unlikely(!afinfo))  read_unlock(&xfrm_policy_afinfo_lock); return afinfo;}// 释放协议信息结构, 解读锁static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo){ read_unlock(&xfrm_policy_afinfo_lock);}// 协议信息结构加写锁, 返回指定的协议信息结构, 错误时返回NULLstatic struct xfrm_policy_afinfo *xfrm_policy_lock_afinfo(unsigned int family){ struct xfrm_policy_afinfo *afinfo; if (unlikely(family >= NPROTO))  return NULL; write_lock_bh(&xfrm_policy_afinfo_lock);// 获取指定协议位置处的协议信息结构 afinfo = xfrm_policy_afinfo[family];// 如果该协议信息结构不存在, 解锁 if (unlikely(!afinfo))  write_unlock_bh(&xfrm_policy_afinfo_lock); return afinfo;}// 协议信息结构解写锁static void xfrm_policy_unlock_afinfo(struct xfrm_policy_afinfo *afinfo){ write_unlock_bh(&xfrm_policy_afinfo_lock);} 5.8.4 网卡回调// 网卡通知结构static struct notifier_block xfrm_dev_notifier = { xfrm_dev_event, NULL, 0};// 回调函数static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr){ switch (event) {// 就只响应网卡停事件, 删除和网卡相关的所有安全路由项 case NETDEV_DOWN:  xfrm_flush_bundles(); } return NOTIFY_DONE;}static int xfrm_flush_bundles(void){// 也是使用xfrm_prune_bundles()函数进行删除操作// 条件函数是stale_bundle xfrm_prune_bundles(stale_bundle); return 0;}// 判断安全路由项是否可用// 返回1表示不可用, 0表示可用static int stale_bundle(struct dst_entry *dst){ return !xfrm_bundle_ok(NULL, (struct xfrm_dst *)dst, NULL, AF_UNSPEC, 0);}// 返回0表示不可用, 1表示可用int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,  struct flowi *fl, int family, int strict){ struct dst_entry *dst = &first->u.dst; struct xfrm_dst *last; u32 mtu;// 检查路由项 if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) ||// 检查网卡是否在运行     (dst->dev && !netif_running(dst->dev)))  return 0; last = NULL; do {// 安全路由  struct xfrm_dst *xdst = (struct xfrm_dst *)dst;// 检查SA选择子是否匹配流结构  if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family))   return 0;  if (fl && !security_xfrm_flow_state_match(fl, dst->xfrm, pol))   return 0;// 检查SA状态是否合法  if (dst->xfrm->km.state != XFRM_STATE_VALID)   return 0;  if (xdst->genid != dst->xfrm->genid)   return 0;// 严格检查时, 检查非通道模式下的SA地址和流结构参数是否匹配  if (strict && fl && dst->xfrm->props.mode != XFRM_MODE_TUNNEL &&      !xfrm_state_addr_flow_check(dst->xfrm, fl, family))   return 0;// 子路由项的MTU  mtu = dst_mtu(dst->child);  if (xdst->child_mtu_cached != mtu) {   last = xdst;   xdst->child_mtu_cached = mtu;  }// 通用路由检查  if (!dst_check(xdst->route, xdst->route_cookie))   return 0;// 安全路由相关的普通路由的MTU  mtu = dst_mtu(xdst->route);  if (xdst->route_mtu_cached != mtu) {   last = xdst;   xdst->route_mtu_cached = mtu;  }// 遍历安全路由链表  dst = dst->child; } while (dst->xfrm);// last是最后一个和子路由和普通路由的MTU不同的安全路由, 一般都是相同的 if (likely(!last))  return 1;// 调整各路由项中的MTU mtu = last->child_mtu_cached; for (;;) {  dst = &last->u.dst;  mtu = xfrm_state_mtu(dst->xfrm, mtu);  if (mtu > last->route_mtu_cached)   mtu = last->route_mtu_cached;  dst->metrics[RTAX_MTU-1] = mtu;  if (last == first)   break;  last = last->u.next;  last->child_mtu_cached = mtu; } return 1;}5.9 小结xfrm_policy相关函数的调用被调用关系可如下简单表示:ip_route_output_flow  ->xfrm_lookup: find xfrm_dst for the skb, create dst_list    -> xfrm_sk_policy_lookup    -> flow_cache_lookup    -> xfrm_find_bundle    -> xfrm_policy_lookup_bytype    -> xfrm_tmpl_resolve      -> xfrm_tmpl_resolve_one        -> xfrm_get_saddr          -> afinfo->get_saddr == xfrm4_get_saddr            -> xfrm4_dst_lookup        -> xfrm_state_find          -> __xfrm_state_lookup          -> xfrm_state_alloc          -> km_query            -> km->acquire (pfkey_acquire, xfrm_send_acquire)      -> xfrm_state_sort        -> afinfo->state_sort == NULL    -> km_wait_queue    -> xfrm_bundle_createdo_ip_setsockopt  -> xfrm_user_policy    -> km->compile_policy -> xfrm_sk_policy_insert pfkey_compile_policy  -> xfrm_policy_alloc    timer.func=xfrm_policy_timerpfkey_spdadd  -> xfrm_policy_alloc  -> xfrm_policy_insert    -> policy_hash_bysel    -> selector_cmp    -> xfrm_sel_ctx_match   pfkey_spddelete  -> xfrm_policy_bysel_ctx    -> policy_hash_bysel    -> xfrm_sel_ctx_match   pfkey_spdget  -> xfrm_policy_byidxfrm_flush_policypfkey_policy_flush  -> xfrm_policy_flush    -> xfrm_policy_killxfrm_dump_policy  -> xfrm_policy_walk    -> dump_one_policypfkey_spddump  -> xfrm_policy_walk    -> dump_spgen_reqid  -> xfrm_policy_walk    -> check_reqidxfrm_add_pol_expirexfrm_policy_timer  -> xfrm_policy_delete    -> __xfrm_policy_unlink    -> xfrm_policy_kill xfrm_sk_policy_insert  -> xfrm_get_index  -> __xfrm_policy_link  -> __xfrm_policy_unlink  -> xfrm_policy_kill  xfrm_sk_clone_policy  -> __xfrm_sk_clone_policy    -> clone_policy      -> xfrm_policy_alloc      -> __xfrm_policy_linkxfrm_decode_session  -> xfrm4_decode_sessionxfrm4_route_forward  -> xfrm_route_forward    -> __xfrm_route_forward      -> xfrm4_decode_session      -> xfrm_lookupxfrm4_policy_check  -> xfrm_policy_check    -> __xfrm_policy_check      -> xfrm4_decode_session      -> __xfrm_sk_policy_lookup        -> xfrm_selector_match      -> __flow_cache_lookup        -> xfrm_policy_lookup        -> xfrm_policy_lookup_bytype          -> policy_hash_direct          -> xfrm_policy_match            -> xfrm_selector_match      -> xfrm_policy_lookup_bytype      -> xfrm_tmpl_sort      -> xfrm_policy_ok        -> xfrm_state_ok       xfrm_flush_bundles  -> xfrm_prune_bundles    -> prune_one_bundles      -> stale_bundle     发表于： 2007-06-09，修改于： 2007-06-09 08:37，已浏览3527次，有评论4条 推荐 投诉网友： Zetalog 时间：2007-11-20 17:54:12 IP地址：218.81.225.★关于xfrm_policy_lookup_bytype我来注释两句。可以说xfrm_policy_bydst中存放的是目标地址和源地址是单一地址的策略（换言之，就是掩码prefixlen_x都是32），而xfrm_policy_inexact里面存放的是掩码非32的策略。看函数xfrm_policy_lookup_bytype实现能发现一个问题，就是单一地址策略即使优先级低于掩码型策略，单一地址策略表也要被先遍历一遍。不知道为什么要将bydst分成两个哈希表。暂时还没看到。网友： Zetalog 时间：2007-11-20 17:54:12 IP地址：218.81.225.★关于xfrm_policy_lookup_bytype我来注释两句。可以说xfrm_policy_bydst中存放的是目标地址和源地址是单一地址的策略（换言之，就是掩码prefixlen_x都是32），而xfrm_policy_inexact里面存放的是掩码非32的策略。看函数xfrm_policy_lookup_bytype实现能发现一个问题，就是单一地址策略即使优先级低于掩码型策略，单一地址策略表也要被先遍历一遍。不知道为什么要将bydst分成两个哈希表。暂时还没看到。网友： Evan 时间：2008-03-26 15:03:24 IP地址：58.34.236.★请问： xfrm_policy_insert的最后要清理新插入的policy后面的dst_entry， 这样做的目的是什么呢， 如果不清理有什么影响呢？谢谢。网友： yfydz 时间：2008-04-01 09:07:00 IP地址：218.247.216.★没啥关系,gc工作队列应该也会处理,只是这个是新的策略,顺手把老的删除了也没啥
热点排行
UNIXLINUX

Linux内核中的IPSEC兑现(3)