Linux虚拟文件系统在内核初始化的start_kernel()函数中主要调用两个函数来实现。
[cpp]
- asmlinkage void __init start_kernel(void)
- {
- ……
- vfs_caches_init_early();
- ……
- vfs_caches_init(totalram_pages);
- ……
- }
一、早期初始化
虚拟文件系统的早期初始化有函数vfs_caches_init_early()实现,主要负责dentry和inode的hashtable的初始化工作。
[cpp]
- /*在start_kernel中调用,用于文件系统中早期的初始化*/
- void __init vfs_caches_init_early(void)
- {
- /*初始化两个hashtable*/
- dcache_init_early();
- inode_init_early();
- }
1.1 dcache
[cpp]
- static void __init dcache_init_early(void)
- {
- int loop;
- /* If hashes are distributed across NUMA nodes, defer
- * hash allocation until vmalloc space is available.
- */
- if (hashdist)
- return;
- /*dentry hashtable的空间分配*/
- dentry_hashtable =
- alloc_large_system_hash(“Dentry cache”,
- sizeof(struct hlist_head),
- dhash_entries,
- 13,
- HASH_EARLY,
- &d_hash_shift,
- &d_hash_mask,
- 0);
- /*hashtable的各个链表初始化*/
- for (loop = 0; loop < (1 << d_hash_shift); loop++)
- INIT_HLIST_HEAD(&dentry_hashtable[loop]);
- }
1.2 inode
[cpp]
- /*
- * Initialize the waitqueues and inode hash table.
- */
- void __init inode_init_early(void)
- {
- int loop;
- /* If hashes are distributed across NUMA nodes, defer
- * hash allocation until vmalloc space is available.
- */
- if (hashdist)
- return;
- /*从cache中分配inode hashtable的内存空间*/
- inode_hashtable =
- alloc_large_system_hash(“Inode-cache”,
- sizeof(struct hlist_head),
- ihash_entries,
- 14,
- HASH_EARLY,
- &i_hash_shift,
- &i_hash_mask,
- 0);
- /*初始化hashtable 的各个链表*/
- for (loop = 0; loop < (1 << i_hash_shift); loop++)
- INIT_HLIST_HEAD(&inode_hashtable[loop]);
- }
二、后期初始化
这阶段对inode、dentry、mount、字符设备驱动模型以及块设备驱动模型做了相应的初始化。
[cpp]
- /*vfs初始化,在start_kernel中调用*/
- void __init vfs_caches_init(unsigned long mempages)
- {
- unsigned long reserve;
- /* Base hash sizes on available memory, with a reserve equal to
- 150% of current kernel size */
- reserve = min((mempages – nr_free_pages()) * 3/2, mempages – 1);
- mempages -= reserve;
- /*为路径名申请的cache*/
- names_cachep = kmem_cache_create(“names_cache”, PATH_MAX, 0,
- SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
- /*dentry及其相关内容初始化*/
- dcache_init();
- inode_init();/*inode初始化*/
- files_init(mempages);/*文件相关信息初始化,包括文件描述符表初始化*/
- mnt_init();/*mount 的初始化*/
- bdev_cache_init();
- /*字符设备驱动模型的初始化*/
- chrdev_init();
- }
2.1 dentry初始化
[cpp]
- static void __init dcache_init(void)
- {
- int loop;
- /*
- * A constructor could be added for stable state like the lists,
- * but it is probably not worth it because of the cache nature
- * of the dcache.
- *//*从cache中申请目录cache*/
- dentry_cache = KMEM_CACHE(dentry,
- SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD);
- /*注册一个shrinker*/
- register_shrinker(&dcache_shrinker);
- /* Hash may have been set up in dcache_init_early */
- if (!hashdist)
- return;
- /*下面的操作在前面的初始化中已经做了*/
- dentry_hashtable =
- alloc_large_system_hash(“Dentry cache”,
- sizeof(struct hlist_head),
- dhash_entries,
- 13,
- 0,
- &d_hash_shift,
- &d_hash_mask,
- 0);
- for (loop = 0; loop < (1 << d_hash_shift); loop++)
- INIT_HLIST_HEAD(&dentry_hashtable[loop]);
- }
2.2 inode初始化
[cpp]
- void __init inode_init(void)
- {
- int loop;
- /* inode slab cache */
- /*slab中分配inode缓存*/
- inode_cachep = kmem_cache_create(“inode_cache”,
- sizeof(struct inode),
- 0,
- (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
- SLAB_MEM_SPREAD),
- init_once);
- /*注册icache shrinker,将参数对应的shrinker加入指定队列*/
- register_shrinker(&icache_shrinker);
- /* Hash may have been set up in inode_init_early */
- if (!hashdist)
- return;
- /*分配数组对应空间*/
- inode_hashtable =
- alloc_large_system_hash(“Inode-cache”,
- sizeof(struct hlist_head),
- ihash_entries,
- 14,
- 0,
- &i_hash_shift,
- &i_hash_mask,
- 0);
- /*初始化链表组*/
- for (loop = 0; loop < (1 << i_hash_shift); loop++)
- INIT_HLIST_HEAD(&inode_hashtable[loop]);
- }
2.3 files初始化
[cpp]
- void __init files_init(unsigned long mempages)
- {
- int n;
- /*申请文件cache*/
- filp_cachep = kmem_cache_create(“filp”, sizeof(struct file), 0,
- SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
- /*
- * One file with associated inode and dcache is very roughly 1K.
- * Per default don’t use more than 10% of our memory for files.
- */
- n = (mempages * (PAGE_SIZE / 1024)) / 10;
- files_stat.max_files = n; /*更新文件统计信息*/
- if (files_stat.max_files < NR_FILE)
- files_stat.max_files = NR_FILE;
- files_defer_init();/*释放文件描述符表*/
- percpu_counter_init(&nr_files, 0);
- }
2.4 mount初始化
[cpp]
- void __init mnt_init(void)
- {
- unsigned u;
- int err;
- init_rwsem(&namespace_sem);
- /*mnt cache初始化*/
- mnt_cache = kmem_cache_create(“mnt_cache”, sizeof(struct vfsmount),
- 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
- /*mount hashtable内存申请*/
- mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC);
- if (!mount_hashtable)
- panic(“Failed to allocate mount hash table\n”);
- printk(“Mount-cache hash table entries: %lu\n”, HASH_SIZE);
- for (u = 0; u < HASH_SIZE; u++)
- INIT_LIST_HEAD(&mount_hashtable[u]);/*初始化hashtable链表*/
- err = sysfs_init();/*sysfs文件系统初始化*/
- if (err)
- printk(KERN_WARNING “%s: sysfs_init error: %d\n”,
- __func__, err);
- fs_kobj = kobject_create_and_add(“fs”, NULL);
- if (!fs_kobj)
- printk(KERN_WARNING “%s: kobj create error\n”, __func__);
- init_rootfs();/*初始化ramfs和rootfs*/
- init_mount_tree();/*初始化mount tree*/
- }
[cpp]
- static void __init init_mount_tree(void)
- {
- struct vfsmount *mnt;
- struct mnt_namespace *ns;
- struct path root;
- mnt = do_kern_mount(“rootfs”, 0, “rootfs”, NULL);
- if (IS_ERR(mnt))
- panic(“Can’t create rootfs”);
- ns = create_mnt_ns(mnt);/*为mnt创建命名空间*/
- if (IS_ERR(ns))
- panic(“Can’t allocate initial namespace”);
- /*初始化进程的相关命名空间*/
- init_task.nsproxy->mnt_ns = ns;
- get_mnt_ns(ns);/*命名空间的进程数加一*/
- /*更新root的相关字段*/
- root.mnt = ns->root;
- root.dentry = ns->root->mnt_root;
- /*设置fs的当前路径和当前root*/
- set_fs_pwd(current->fs, &root);
- set_fs_root(current->fs, &root);
- }
2.4.1 创建命名空间
[cpp]
- /**
- * create_mnt_ns – creates a private namespace and adds a root filesystem
- * @mnt: pointer to the new root filesystem mountpoint
- */
- struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt)
- {
- struct mnt_namespace *new_ns;
- new_ns = alloc_mnt_ns();/*分配命名空间*/
- if (!IS_ERR(new_ns)) {
- /*下面为和mnt建立关系*/
- mnt->mnt_ns = new_ns;
- new_ns->root = mnt;
- list_add(&new_ns->list, &new_ns->root->mnt_list);
- }
- return new_ns;
- }
[cpp]
- static struct mnt_namespace *alloc_mnt_ns(void)
- {
- struct mnt_namespace *new_ns;
- /*从cache中分配命名空间*/
- new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
- if (!new_ns)
- return ERR_PTR(-ENOMEM);
- /*下面为相关字段的初始化*/
- atomic_set(&new_ns->count, 1);
- new_ns->root = NULL;
- INIT_LIST_HEAD(&new_ns->list);
- init_waitqueue_head(&new_ns->poll);
- new_ns->event = 0;
- return new_ns;
- }
2.4.2 创建mount
[cpp]
- struct vfsmount *
- do_kern_mount(const char *fstype, int flags, const char *name, void *data)
- {
- struct file_system_type *type = get_fs_type(fstype);
- struct vfsmount *mnt;
- if (!type)
- return ERR_PTR(-ENODEV);
- mnt = vfs_kern_mount(type, flags, name, data);
- if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
- !mnt->mnt_sb->s_subtype)
- mnt = fs_set_subtype(mnt, fstype);
- put_filesystem(type);
- return mnt;
- }
[cpp]
- struct vfsmount *
- vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
- {
- struct vfsmount *mnt;
- char *secdata = NULL;
- int error;
- if (!type)
- return ERR_PTR(-ENODEV);
- error = -ENOMEM;
- /*从slab中分配一个mnt*/
- mnt = alloc_vfsmnt(name);
- if (!mnt)
- goto out;
- if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) {
- secdata = alloc_secdata();
- if (!secdata)
- goto out_mnt;
- error = security_sb_copy_data(data, secdata);
- if (error)
- goto out_free_secdata;
- }
- /*调用文件系统控制结构体的get_sb()*/
- error = type->get_sb(type, flags, name, data, mnt);
- if (error < 0)
- goto out_free_secdata;
- BUG_ON(!mnt->mnt_sb);
- error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata);
- if (error)
- goto out_sb;
- /*
- * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE
- * but s_maxbytes was an unsigned long long for many releases. Throw
- * this warning for a little while to try and catch filesystems that
- * violate this rule. This warning should be either removed or
- * converted to a BUG() in 2.6.34.
- */
- WARN((mnt->mnt_sb->s_maxbytes < 0), “%s set sb->s_maxbytes to “
- “negative value (%lld)\n”, type->name, mnt->mnt_sb->s_maxbytes);
- /*初始化mnt相关字段*/
- mnt->mnt_mountpoint = mnt->mnt_root;
- mnt->mnt_parent = mnt;
- up_write(&mnt->mnt_sb->s_umount);
- free_secdata(secdata);
- return mnt;
- out_sb:
- dput(mnt->mnt_root);
- deactivate_locked_super(mnt->mnt_sb);
- out_free_secdata:
- free_secdata(secdata);
- out_mnt:
- free_vfsmnt(mnt);
- out:
- return ERR_PTR(error);
- }
[cpp]
- struct vfsmount *alloc_vfsmnt(const char *name)
- {
- /*从slab中获得mnt*/
- struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
- /*下面进行对mnt的初始化*/
- if (mnt) {
- int err;
- err = mnt_alloc_id(mnt);
- if (err)
- goto out_free_cache;
- if (name) {
- mnt->mnt_devname = kstrdup(name, GFP_KERNEL);
- if (!mnt->mnt_devname)
- goto out_free_id;
- }
- atomic_set(&mnt->mnt_count, 1);
- INIT_LIST_HEAD(&mnt->mnt_hash);
- INIT_LIST_HEAD(&mnt->mnt_child);
- INIT_LIST_HEAD(&mnt->mnt_mounts);
- INIT_LIST_HEAD(&mnt->mnt_list);
- INIT_LIST_HEAD(&mnt->mnt_expire);
- INIT_LIST_HEAD(&mnt->mnt_share);
- INIT_LIST_HEAD(&mnt->mnt_slave_list);
- INIT_LIST_HEAD(&mnt->mnt_slave);
- #ifdef CONFIG_SMP
- mnt->mnt_writers = alloc_percpu(int);
- if (!mnt->mnt_writers)
- goto out_free_devname;
- #else
- mnt->mnt_writers = 0;
- #endif
- }
- return mnt;
- #ifdef CONFIG_SMP
- out_free_devname:
- kfree(mnt->mnt_devname);
- #endif
- out_free_id:
- mnt_free_id(mnt);
- out_free_cache:
- kmem_cache_free(mnt_cache, mnt);
- return NULL;
- }
2.5 块设备驱动模型初始化
[cpp]
- void __init bdev_cache_init(void)
- {
- int err;
- struct vfsmount *bd_mnt;
- /*block cache初始化*/
- bdev_cachep = kmem_cache_create(“bdev_cache”, sizeof(struct bdev_inode),
- 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD|SLAB_PANIC),
- init_once);
- /*注册block dev*/
- err = register_filesystem(&bd_type);
- if (err)
- panic(“Cannot register bdev pseudo-fs”);
- bd_mnt = kern_mount(&bd_type);
- if (IS_ERR(bd_mnt))
- panic(“Cannot create bdev pseudo-fs”);
- /*
- * This vfsmount structure is only used to obtain the
- * blockdev_superblock, so tell kmemleak not to report it.
- */
- kmemleak_not_leak(bd_mnt);
- blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */
- }
2.6 字符设备驱动模型初始化
[cpp]
- void __init chrdev_init(void)
- {
- cdev_map = kobj_map_init(base_probe, &chrdevs_lock);
- /*字符设备驱动初始化*/
- bdi_init(&directly_mappable_cdev_bdi);
- }
这里对linux虚拟文件系统的初始化工作做了整体的梳理,后面将对涉及到的细节做补充,包括inode和dentry cache shrinker的注册、sysfs的初始化等。