总结了高端内存区的固定内核映射区、临时内核映射与永久内核映射。但是对于高端内存中各个区间的布置我们任然不是很清楚,首先我们从整体上看看内核对高端内存的划分情况。
如果内存足够大(比如用户:内核线性空间=3:1,内核就只能访问线性空间的第4GB内容,如果物理内存超过1GB则视为足够大),内核线性空间无法同时映射所有内存。这就需要将内核线性空间分出一段不直接映射物理内存,而是作为窗口分时映射使用到的未映射的内存。
相关阅读:
http://www.linuxidc.com/Linux/2012-02/53457.htm
http://www.linuxidc.com/Linux/2012-02/53458.htm
http://www.linuxidc.com/Linux/2012-02/53459.htm
一、非连续内存区布局
Linux内核中对于非连续区间的开始:
[cpp]
- #define VMALLOC_START ((unsigned long)high_memory + VMALLOC_OFFSET)
[cpp]
- #define VMALLOC_OFFSET (8 * 1024 * 1024)
对于变量high_memory变量:
[cpp]
- void __init initmem_init(unsigned long start_pfn,
- unsigned long end_pfn)
- {
- highstart_pfn = highend_pfn = max_pfn;
- if (max_pfn > max_low_pfn)
- highstart_pfn = max_low_pfn;
- ……
- num_physpages = highend_pfn;
- /*高端内存开始地址物理*/
- high_memory = (void *) __va(highstart_pfn * PAGE_SIZE – 1) + 1;
- ……
- }
其中,变量max_low_pfn在highmem_pfn_init()函数中初始化为下面值
[cpp]
- #define MAXMEM (VMALLOC_END – PAGE_OFFSET – __VMALLOC_RESERVE)
[cpp]
- <p>unsigned int __VMALLOC_RESERVE = 128 << 20;</p>
对于非连续区间的结束定义:
[cpp]
- # define VMALLOC_END (PKMAP_BASE – 2 * PAGE_SIZE)
由上面的内核代码,画出内存布局细节图如下
由上面的布局可知128M+4M+4M+8K,然而直接映射区和连续内存之间空出来了8M的空间不能用,非连续空间和永久内核映射区之间也有8K的空间不可用,另外,内存顶端空出了4K不可用的。这样,高端内存能用的空间为128M+4M+4M+8K-4K-8M-8K=128M-4K大小的内存。
二、数据结构描述
虚拟内存区描述(对于vmlist链表)
[cpp]
- struct vm_struct {
- struct vm_struct *next;
- void *addr;/*内存区的第一个内存单元的线性地址*/
- unsigned long size;
- unsigned long flags;/*类型*/
- struct page **pages;/*指向nr_pages数组的指针,该数组由指向页描述符的指针组成*/
- unsigned int nr_pages;/*内存区填充的页的个数*/
- unsigned long phys_addr;/*该字段设为0,除非内存已被创建来映射一个硬件设备的IO共享内存*/
- void *caller;
- };
虚拟内存区描述(对于红黑树)
[html]
- struct vmap_area {
- unsigned long va_start;
- unsigned long va_end;
- unsigned long flags;
- struct rb_node rb_node; /* address sorted rbtree */
- struct list_head list; /* address sorted list */
- struct list_head purge_list; /* “lazy purge” list */
- void *private;
- struct rcu_head rcu_head;
- };
内存区由next字段链接到一起,并且为了查找简单,他们以地址为次序。为了防止溢出,每个区域至少由一个页面隔离开。
三、非连续内存区初始化
非连续内存区的初始化工作在start_kernel()->mm_init()->vmalloc_init()完成
[cpp]
- void __init vmalloc_init(void)
- {
- struct vmap_area *va;
- struct vm_struct *tmp;
- int i;
- for_each_possible_cpu(i) {
- struct vmap_block_queue *vbq;
- vbq = &per_cpu(vmap_block_queue, i);
- spin_lock_init(&vbq->lock);
- INIT_LIST_HEAD(&vbq->free);
- INIT_LIST_HEAD(&vbq->dirty);
- vbq->nr_dirty = 0;
- }
- /* Import existing vmlist entries. */
- for (tmp = vmlist; tmp; tmp = tmp->next) {/*导入vmlist中已经有的数据到红黑树中*/
- va = kzalloc(sizeof(struct vmap_area), GFP_NOWAIT);
- va->flags = tmp->flags | VM_VM_AREA;
- va->va_start = (unsigned long)tmp->addr;
- va->va_end = va->va_start + tmp->size;
- __insert_vmap_area(va);
- }
- vmap_area_pcpu_hole = VMALLOC_END;
- vmap_initialized = true;/*已经初始化*/
- }
四、创建非连续内存的线性区
vm_struct结构链接在一个链表中,链表的第一个元素的地址存放在vmlist变量中。当内核需要分配一块新的内存时,函数get_vm_area()分配结构体所需要的空间,然后将其插入到链表中。另外,该版本的内核中增加了红黑树的管理。函数get_vm_area()不仅要将其插入到vmlist链表中,还有将结构体vmap_area插入到vmap_area_root指定根的红黑树中。
get_vm_area()函数会调用__get_vm_area_node()函数
[cpp]
- static struct vm_struct *__get_vm_area_node(unsigned long size,
- unsigned long align, unsigned long flags, unsigned long start,
- unsigned long end, int node, gfp_t gfp_mask, void *caller)
- {
- static struct vmap_area *va;
- struct vm_struct *area;
- BUG_ON(in_interrupt());
- if (flags & VM_IOREMAP) {
- int bit = fls(size);
- if (bit > IOREMAP_MAX_ORDER)
- bit = IOREMAP_MAX_ORDER;
- else if (bit < PAGE_SHIFT)
- bit = PAGE_SHIFT;
- align = 1ul << bit;
- }
- size = PAGE_ALIGN(size);
- if (unlikely(!size))
- return NULL;
- /*分配vm_struct结构体内存空间*/
- area = kzalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node);
- if (unlikely(!area))
- return NULL;
- /*
- * We always allocate a guard page.
- */
- size += PAGE_SIZE;/*为安全考虑,多一个页面*/
- /*分配vmap_area结构体,并且将其插入到红黑树中*/
- va = alloc_vmap_area(size, align, start, end, node, gfp_mask);
- if (IS_ERR(va)) {
- kfree(area);
- return NULL;
- }
- /*插入vmlist链表*/
- insert_vmalloc_vm(area, va, flags, caller);
- return area;
- }
[cpp]
- /*
- * Allocate a region of KVA of the specified size and alignment, within the
- * vstart and vend.
- */
- static struct vmap_area *alloc_vmap_area(unsigned long size,
- unsigned long align,
- unsigned long vstart, unsigned long vend,
- int node, gfp_t gfp_mask)
- {
- struct vmap_area *va;
- struct rb_node *n;
- unsigned long addr;
- int purged = 0;
- BUG_ON(!size);
- BUG_ON(size & ~PAGE_MASK);
- /*分配vmap_area结构*/
- va = kmalloc_node(sizeof(struct vmap_area),
- gfp_mask & GFP_RECLAIM_MASK, node);
- if (unlikely(!va))
- return ERR_PTR(-ENOMEM);
- retry:
- addr = ALIGN(vstart, align);
- spin_lock(&vmap_area_lock);
- if (addr + size – 1 < addr)
- goto overflow;
- /* XXX: could have a last_hole cache */
- n = vmap_area_root.rb_node;
- if (n) {
- struct vmap_area *first = NULL;
- do {
- struct vmap_area *tmp;
- tmp = rb_entry(n, struct vmap_area, rb_node);
- if (tmp->va_end >= addr) {
- if (!first && tmp->va_start < addr + size)
- first = tmp;
- n = n->rb_left;
- } else {
- first = tmp;
- n = n->rb_right;
- }
- } while (n);
- if (!first)/*为最左的孩子,也就是比现有的都小*/
- goto found;
- if (first->va_end < addr) {
- n = rb_next(&first->rb_node);
- if (n)
- first = rb_entry(n, struct vmap_area, rb_node);
- else/*next为空*/
- goto found;/*为找到的节点的下一个,也就是比找到的大*/
- }
- /*当上面没有满足要求时,重新配置addr,也就是起始
- 地址*/
- while (addr + size > first->va_start && addr + size <= vend) {
- addr = ALIGN(first->va_end + PAGE_SIZE, align);/*重新配置起始地址*/
- if (addr + size – 1 < addr)
- goto overflow;
- n = rb_next(&first->rb_node);
- if (n)
- first = rb_entry(n, struct vmap_area, rb_node);
- else
- goto found;/*此时应该插入到找到的节点的右边*/
- }
- }
- found:
- if (addr + size > vend) {
- overflow:
- spin_unlock(&vmap_area_lock);
- if (!purged) {
- purge_vmap_area_lazy();
- purged = 1;
- goto retry;
- }
- if (printk_ratelimit())
- printk(KERN_WARNING
- “vmap allocation for size %lu failed: “
- “use vmalloc=<size> to increase size.\n”, size);
- kfree(va);
- return ERR_PTR(-EBUSY);
- }
- BUG_ON(addr & (align-1));
- /*初始化va*/
- va->va_start = addr;
- va->va_end = addr + size;
- va->flags = 0;
- /*插入到红黑树*/
- __insert_vmap_area(va);
- spin_unlock(&vmap_area_lock);
- return va;
- }
[cpp]
- static void insert_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
- unsigned long flags, void *caller)
- {
- struct vm_struct *tmp, **p;
- /*初始化vm*/
- vm->flags = flags;
- vm->addr = (void *)va->va_start;
- vm->size = va->va_end – va->va_start;
- vm->caller = caller;
- va->private = vm;
- va->flags |= VM_VM_AREA;
- write_lock(&vmlist_lock);
- /*寻找插入位置*/
- for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) {
- if (tmp->addr >= vm->addr)
- break;
- }
- /*插入工作*/
- vm->next = *p;
- *p = vm;
- write_unlock(&vmlist_lock);
- }
初步总结了高端内存非连续区的管理框架,后面将总结他的分配和释放工作。