感谢支持
我们一直在努力

Linux 2.6.18.8内核中netfilter分析

1 简单介绍

在2.6.16内核的netfilter中,netfilter一个重大修正思想就是将netfilter作为一个协议无关的框架,表现在内核结构树中单独建立net/netfilter目录,而在以前netfilter是附着在各个协议目录之下的,如在net/ipv4, net/ipv6等目录下。现在虽然各协议目录下也都有,但主要是处理和各协议相关的东西了,而一些共同的东西,就都放在net/netfilter目录下,文件名也有所改变,虽然现在还不是很独立,比如说net/netfilter/nf_conntrack_core.c和net/ipv4/netfilter/ip_conntrack_core.c就仍然很相似,让人觉得没必要那么分,但不少和协议无关的匹配和目标模块已经和协议分离,只在此目录下有,而不放在协议目录下了。

在net/netfilter下的匹配和目标模块文件名称都以“xt_”打头,如 xt_comment.c,xt_policy.c等

目标模块有:

xt_CLASSIFY.c
xt_NFQUEUE.c
xt_NOTRACK.c

为了和iptables兼容(因为iptables找模块文件前缀是按“ipt_”或“ip6t_”找的),这些文件中增加了一个新的宏定义:MODULE_ALIAS,来表示模块的别名。

如在xt_limit.c中就如下定义:
MODULE_ALIAS(“ipt_limit”);
MODULE_ALIAS(“ip6t_limit”);

在include/linux/netfilter_ipv4/ip_tables.h中进行了以下定义:
#define ipt_match xt_match
#define ipt_target xt_target
#define ipt_table xt_table

2 代码分析

以下是新匹配和目标模块的结构定义:
struct xt_match
{
   struct list_head list;

   const char name[XT_FUNCTION_MAXNAMELEN-1];

   /* Return true or false: return FALSE and set *hotdrop = 1 to
      force immediate packet drop. */
   /* Arguments changed since 2.6.9, as this must now handle
      non-linear skb, using skb_header_pointer and
      skb_ip_make_writable. */
   int (*match)(const struct sk_buff *skb,
          const struct net_device *in,
          const struct net_device *out,
          const struct xt_match *match,
          const void *matchinfo,
          int offset,
          unsigned int protoff,
          int *hotdrop);

   /* Called when user tries to insert an entry of this type. */
   /* Should return true or false. */
   int (*checkentry)(const char *tablename,
            const void *ip,
            const struct xt_match *match,
            void *matchinfo,
            unsigned int matchinfosize,
            unsigned int hook_mask);

   /* Called when entry of this type deleted. */
   void (*destroy)(const struct xt_match *match, void *matchinfo,
           unsigned int matchinfosize);

   /* Called when userspace align differs from kernel space one */
   int (*compat)(void *match, void **dstptr, int *size, int convert);

   /* Set this to THIS_MODULE if you are a module, otherwise NULL */
   struct module *me;

   char *table;
   unsigned int matchsize;
   unsigned int hooks;
   unsigned short proto;

   unsigned short family;
   u_int8_t revision;
};

/* Registration hooks for targets. */
struct xt_target
{
   struct list_head list;

   const char name[XT_FUNCTION_MAXNAMELEN-1];

   /* Returns verdict. Argument order changed since 2.6.9, as this
      must now handle non-linear skbs, using skb_copy_bits and
      skb_ip_make_writable. */
   unsigned int (*target)(struct sk_buff **pskb,
                const struct net_device *in,
                const struct net_device *out,
                unsigned int hooknum,
                const struct xt_target *target,
                const void *targinfo,
                void *userdata);

   /* Called when user tries to insert an entry of this type:
      hook_mask is a bitmask of hooks from which it can be
      called. */
   /* Should return true or false. */
   int (*checkentry)(const char *tablename,
            const void *entry,
            const struct xt_target *target,
            void *targinfo,
            unsigned int targinfosize,
            unsigned int hook_mask);

   /* Called when entry of this type deleted. */
   void (*destroy)(const struct xt_target *target, void *targinfo,
           unsigned int targinfosize);

   /* Called when userspace align differs from kernel space one */
   int (*compat)(void *target, void **dstptr, int *size, int convert);

   /* Set this to THIS_MODULE if you are a module, otherwise NULL */
   struct module *me;

   char *table;
   unsigned int targetsize;
   unsigned int hooks;
   unsigned short proto;

   unsigned short family;
   u_int8_t revision;
};

/* Furniture shopping… */
struct xt_table
{
   struct list_head list;

   /* A unique name… */
   char name[XT_TABLE_MAXNAMELEN];

   /* What hooks you will enter on */
   unsigned int valid_hooks;

   /* Lock for the curtain */
   rwlock_t lock;

   /* Man behind the curtain… */
   //struct ip6t_table_info *private;
   void *private;

   /* Set this to THIS_MODULE if you are a module, otherwise NULL */
   struct module *me;

   int af;        /* address/protocol family */
};

/* The table itself */
struct xt_table_info
{
   /* Size per table */
   unsigned int size;
   /* Number of entries: FIXME. –RR */
   unsigned int number;
   /* Initial number of entries. Needed for module usage count */
   unsigned int initial_entries;

   /* Entry points and underflows */
   unsigned int hook_entry[NF_IP_NUMHOOKS];
   unsigned int underflow[NF_IP_NUMHOOKS];

   /* ipt_entry tables: one per CPU */
   char *entries[NR_CPUS];
};

/* 主要结构 */
struct xt_af {
   struct mutex mutex;
   struct list_head match;
   struct list_head target;
   struct list_head tables;
   struct mutex compat_mutex;
};

/*数据结构的管理模块 */
static struct xt_af *xt;

/* netfilter模块初始化*/
static int __init xt_init(void)
{
   int i;

/* 每种协议分配一个资源 */
   xt = kmalloc(sizeof(struct xt_af) * NPROTO, GFP_KERNEL);
   if (!xt)
       return -ENOMEM;

   for (i = 0; i < NPROTO; i++) {
       mutex_init(&xt.mutex);
#ifdef CONFIG_COMPAT
       mutex_init(&xt.compat_mutex);
#endif

       /* 初始化table, target, match资源 */
       INIT_LIST_HEAD(&xt.target);
       INIT_LIST_HEAD(&xt.match);
       INIT_LIST_HEAD(&xt.tables);
   }
   return 0;
}
目前2.6.16内核中支持了三类协议族,IPv4/IPv6/ARP,在各协议族中查找相应模块用的前缀为:
static const char *xt_prefix[NPROTO] = {
   [AF_INET]    = “ip”,
   [AF_INET6]    = “ip6”,
   [NF_ARP]    = “arp”,
};
对应的具体前缀分别为“ipt”、“ip6t”、“arpt”。

而和老的2.4内核的struct ipt_match和struct ipt_target结构的主要区别是增加了compat函数,以及struct modulde *me参数后面的一系列参数,是和协议相关的,比如limit匹配,分别为ipv4和ipv6定义了匹配结构后,只有family参数不同,一个是AF_INET,另一个是AF_INET6,其他都相同,而挂接时并不会有问题,因为这些模块都分别挂接到不同协议族的链表:

/* Registration hooks for targets. */
int
xt_register_target(struct xt_target *target)
{
   int ret, af = target->family;

   ret = mutex_lock_interruptible(&xt[af].mutex);
   if (ret != 0)
       return ret;
       
   /* 添加 target*/
   list_add(&target->list, &xt[af].target);
   mutex_unlock(&xt[af].mutex);
   return ret;
}

int
xt_register_match(struct xt_match *match)
{
   int ret, af = match->family;

   ret = mutex_lock_interruptible(&xt[af].mutex);
   if (ret != 0)
       return ret;
   /* 添加match */
   list_add(&match->list, &xt[af].match);
   mutex_unlock(&xt[af].mutex);

   return ret;
}

table注册发生在各协议的netfilte中:

int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl)
{
   int ret;
   struct xt_table_info *newinfo;
   static struct xt_table_info bootstrap
       = { 0, 0, 0, { 0 }, { 0 }, { } };
   void *loc_cpu_entry;

   newinfo = xt_alloc_table_info(repl->size);
   if (!newinfo)
       return -ENOMEM;

   /* choose the copy on our node/cpu
    * but dont care of preemption
    */
   loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
   memcpy(loc_cpu_entry, repl->entries, repl->size);

   ret = translate_table(table->name, table->valid_hooks,
                newinfo, loc_cpu_entry, repl->size,
                repl->num_entries,
                repl->hook_entry,
                repl->underflow);
   if (ret != 0) {
       xt_free_table_info(newinfo);
       return ret;
   }

   if (xt_register_table(table, &bootstrap, newinfo) != 0) {
       xt_free_table_info(newinfo);
       return ret;
   }

   return 0;
}

/* 分配table_info资源,注意这里是每个cpu会对应一个entry */
struct xt_table_info *xt_alloc_table_info(unsigned int size)
{
   struct xt_table_info *newinfo;
   int cpu;

   /* Pedantry: prevent them from hitting BUG() in vmalloc.c –RR */
   if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > num_physpages) /*超过物理内存空间*/
       return NULL;

   newinfo = kzalloc(sizeof(struct xt_table_info), GFP_KERNEL);
   if (!newinfo)
       return NULL;

   newinfo->size = size;

   for_each_possible_cpu(cpu) {/* 遍历每个cpu */
       if (size <= PAGE_SIZE)
           newinfo->entries[cpu] = kmalloc_node(size,
                           GFP_KERNEL,
                           cpu_to_node(cpu)); /* 直接分配物理空间 */
       else
           newinfo->entries[cpu] = vmalloc_node(size,
                           cpu_to_node(cpu));/* 分配虚拟空间*/

       if (newinfo->entries[cpu] == NULL) {
           xt_free_table_info(newinfo);
           return NULL;
       }
   }

   return newinfo;
}

int xt_register_table(struct xt_table *table,
            struct xt_table_info *bootstrap,
            struct xt_table_info *newinfo)
{
   int ret;
   struct xt_table_info *private;

   ret = mutex_lock_interruptible(&xt[table->af].mutex);
   if (ret != 0)
       return ret;

   /* Don’t autoload: we’d eat our tail… */
   if (list_named_find(&xt[table->af].tables, table->name)) {
       ret = -EEXIST;
       goto unlock;
   }

   /* Simplifies replace_table code. */
   table->private = bootstrap;
   rwlock_init(&table->lock);
   if (!xt_replace_table(table, 0, newinfo, &ret))
       goto unlock;

   private = table->private;
   duprintf(“table->private->number = %u\n”, private->number);

   /* save number of initial entries */
   private->initial_entries = private->number;

   list_prepend(&xt[table->af].tables, table);

   ret = 0;
unlock:
   mutex_unlock(&xt[table->af].mutex);
   return ret;
}

但在进行实际匹配目标查找时会进行名字、协议族、表名、挂接点、协议等的比较,如匹配的检查
int xt_check_match(const struct xt_match *match, unsigned short family,
            unsigned int size, const char *table, unsigned int hook_mask,
          unsigned short proto, int inv_proto)
{
   if (XT_ALIGN(match->matchsize) != size) {
       printk(“%s_tables: %s match: invalid size %Zu != %u\n”,
            xt_prefix[family], match->name,
            XT_ALIGN(match->matchsize), size);
       return -EINVAL;
   }
   if (match->table && strcmp(match->table, table)) {
       printk(“%s_tables: %s match: only valid in %s table, not %s\n”,
            xt_prefix[family], match->name, match->table, table);
       return -EINVAL;
   }
   if (match->hooks && (hook_mask & ~match->hooks) != 0) {
       printk(“%s_tables: %s match: bad hook_mask %u\n”,
            xt_prefix[family], match->name, hook_mask);
       return -EINVAL;
   }
   if (match->proto && (match->proto != proto || inv_proto)) {
       printk(“%s_tables: %s match: only valid for protocol %u\n”,
            xt_prefix[family], match->name, match->proto);
       return -EINVAL;
   }
   return 0;
}

int xt_check_target(const struct xt_target *target, unsigned short family,
          unsigned int size, const char *table, unsigned int hook_mask,
          unsigned short proto, int inv_proto)
{
   if (XT_ALIGN(target->targetsize) != size) {
       printk(“%s_tables: %s target: invalid size %Zu != %u\n”,
            xt_prefix[family], target->name,
            XT_ALIGN(target->targetsize), size);
       return -EINVAL;
   }
   if (target->table && strcmp(target->table, table)) {
       printk(“%s_tables: %s target: only valid in %s table, not %s\n”,
            xt_prefix[family], target->name, target->table, table);
       return -EINVAL;
   }
   if (target->hooks && (hook_mask & ~target->hooks) != 0) {
       printk(“%s_tables: %s target: bad hook_mask %u\n”,
            xt_prefix[family], target->name, hook_mask);
       return -EINVAL;
   }
   if (target->proto && (target->proto != proto || inv_proto)) {
       printk(“%s_tables: %s target: only valid for protocol %u\n”,
            xt_prefix[family], target->name, target->proto);
       return -EINVAL;
   }
   return 0;
}

/* 下面是ipsec的policy检验过程处理 */

static struct xt_match policy_match = {
   .name        = “policy”,
   .family        = AF_INET,
   .match        = match,
   .matchsize    = sizeof(struct xt_policy_info),
   .checkentry    = checkentry,
   .family        = AF_INET,
   .me        = THIS_MODULE,
};

static int __init init(void)
{
   int ret;

   ret = xt_register_match(&policy_match);
   if (ret)
       return ret;
   ret = xt_register_match(&policy6_match);
   if (ret)
       xt_unregister_match(&policy_match);
   return ret;
}

static int
match_policy_in(const struct sk_buff *skb, const struct xt_policy_info *info,
       unsigned short family)
{
   const struct xt_policy_elem *e;
   struct sec_path *sp = skb->sp;
   int strict = info->flags & XT_POLICY_MATCH_STRICT;
   int i, pos;

   if (sp == NULL)
       return -1;
   if (strict && info->len != sp->len)
       return 0;

   for (i = sp->len – 1; i >= 0; i–) {
       pos = strict ? i – sp->len + 1 : 0;
       if (pos >= info->len)
           return 0;
       e = &info->pol[pos];

       /* 检查策略 */
       if (match_xfrm_state(sp->xvec, e, family)) {
           if (!strict)
               return 1;
       } else if (strict)
           return 0;
   }

   return strict ? 1 : 0;
}

赞(0) 打赏
转载请注明出处:服务器评测 » Linux 2.6.18.8内核中netfilter分析
分享到: 更多 (0)

听说打赏我的人,都进福布斯排行榜啦!

支付宝扫一扫打赏

微信扫一扫打赏