在linux内核中,有一种通用的双向循环链表,构成了各种队列的基础。链表的结构定义和相关函数均在include/linux/list.h中,下面就来全面的介绍这一链表的各种API。
- struct list_head {
- struct list_head *next, *prev;
- };
这是链表的元素结构。因为是循环链表,表头和表中节点都是这一结构。有prev和next两个指针,分别指向链表中前一节点和后一节点。
- #define LIST_HEAD_INIT(name) { &(name), &(name) }
-
- #define LIST_HEAD(name) \
- struct list_head name = LIST_HEAD_INIT(name)
-
- static inline void INIT_LIST_HEAD(struct list_head *list)
- {
- list->next = list;
- list->prev = list;
- }
在初始化的时候,链表头的prev和next都是指向自身的。
- static inline void __list_add(struct list_head *new,
- struct list_head *prev,
- struct list_head *next)
- {
- next->prev = new;
- new->next = next;
- new->prev = prev;
- prev->next = new;
- }
-
- static inline void list_add(struct list_head *new, struct list_head *head)
- {
- __list_add(new, head, head->next);
- }
-
- static inline void list_add_tail(struct list_head *new, struct list_head *head)
- {
- __list_add(new, head->prev, head);
- }
双向循环链表的实现,很少有例外情况,基本都可以用公共的方式来处理。这里无论是加第一个节点,还是其它的节点,使用的方法都一样。
另外,链表API实现时大致都是分为两层:一层外部的,如list_add、list_add_tail,用来消除一些例外情况,调用内部实现;一层是内部的,函数名前会加双下划线,如__list_add,往往是几个操作公共的部分,或者排除例外后的实现。
- static inline void __list_del(struct list_head * prev, struct list_head * next)
- {
- next->prev = prev;
- prev->next = next;
- }
-
- static inline void list_del(struct list_head *entry)
- {
- __list_del(entry->prev, entry->next);
- entry->next = LIST_POISON1;
- entry->prev = LIST_POISON2;
- }
-
- static inline void list_del_init(struct list_head *entry)
- {
- __list_del(entry->prev, entry->next);
- INIT_LIST_HEAD(entry);
- }
list_del是链表中节点的删除。之所以在调用__list_del后又把被删除元素的next、prev指向特殊的LIST_POSITION1和LIST_POSITION2,是为了调试未定义的指针。
list_del_init则是删除节点后,随即把节点中指针再次初始化,这种删除方式更为实用。
- static inline void list_replace(struct list_head *old,
- struct list_head *new)
- {
- new->next = old->next;
- new->next->prev = new;
- new->prev = old->prev;
- new->prev->next = new;
- }
-
- static inline void list_replace_init(struct list_head *old,
- struct list_head *new)
- {
- list_replace(old, new);
- INIT_LIST_HEAD(old);
- }
list_replace是将链表中一个节点old,替换为另一个节点new。从实现来看,即使old所在地链表只有old一个节点,new也可以成功替换,这就是双向循环链表可怕的通用之处。
list_replace_init将被替换的old随即又初始化。
- static inline void list_move(struct list_head *list, struct list_head *head)
- {
- __list_del(list->prev, list->next);
- list_add(list, head);
- }
-
- static inline void list_move_tail(struct list_head *list,
- struct list_head *head)
- {
- __list_del(list->prev, list->next);
- list_add_tail(list, head);
- }
list_move的作用是把list节点从原链表中去除,并加入新的链表head中。
list_move_tail只在加入新链表时与list_move有所不同,list_move是加到head之后的链表头部,而list_move_tail是加到head之前的链表尾部。
- static inline int list_is_last(const struct list_head *list,
- const struct list_head *head)
- {
- return list->next == head;
- }
list_is_last 判断list是否处于head链表的尾部。
- static inline int list_empty(const struct list_head *head)
- {
- return head->next == head;
- }
-
- static inline int list_empty_careful(const struct list_head *head)
- {
- struct list_head *next = head->next;
- return (next == head) && (next == head->prev);
- }
list_empty 判断head链表是否为空,为空的意思就是只有一个链表头head。
list_empty_careful 同样是判断head链表是否为空,只是检查更为严格。
- static inline int list_is_singular(const struct list_head *head)
- {
- return !list_empty(head) && (head->next == head->prev);
- }
list_is_singular 判断head中是否只有一个节点,即除链表头head外只有一个节点。
- static inline void __list_cut_position(struct list_head *list,
- struct list_head *head, struct list_head *entry)
- {
- struct list_head *new_first = entry->next;
- list->next = head->next;
- list->next->prev = list;
- list->prev = entry;
- entry->next = list;
- head->next = new_first;
- new_first->prev = head;
- }
-
- static inline void list_cut_position(struct list_head *list,
- struct list_head *head, struct list_head *entry)
- {
- if (list_empty(head))
- return;
- if (list_is_singular(head) &&
- (head->next != entry && head != entry))
- return;
- if (entry == head)
- INIT_LIST_HEAD(list);
- else
- __list_cut_position(list, head, entry);
- }
list_cut_position 用于把head链表分为两个部分。从head->next一直到entry被从head链表中删除,加入新的链表list。新链表list应该是空的,或者原来的节点都可以被忽略掉。可以看到,list_cut_position中排除了一些意外情况,保证调用__list_cut_position时至少有一个元素会被加入新链表。
- static inline void __list_splice(const struct list_head *list,
- struct list_head *prev,
- struct list_head *next)
- {
- struct list_head *first = list->next;
- struct list_head *last = list->prev;
-
- first->prev = prev;
- prev->next = first;
-
- last->next = next;
- next->prev = last;
- }
-
- static inline void list_splice(const struct list_head *list,
- struct list_head *head)
- {
- if (!list_empty(list))
- __list_splice(list, head, head->next);
- }
-
- static inline void list_splice_tail(struct list_head *list,
- struct list_head *head)
- {
- if (!list_empty(list))
- __list_splice(list, head->prev, head);
- }
list_splice的功能和list_cut_position正相反,它合并两个链表。list_splice把list链表中的节点加入head链表中。在实际操作之前,要先判断list链表是否为空。它保证调用__list_splice时list链表中至少有一个节点可以被合并到head链表中。
list_splice_tail只是在合并链表时插入的位置不同。list_splice是把原来list链表中的节点全加到head链表的头部,而list_splice_tail则是把原来list链表中的节点全加到head链表的尾部。
- static inline void list_splice_init(struct list_head *list,
- struct list_head *head)
- {
- if (!list_empty(list)) {
- __list_splice(list, head, head->next);
- INIT_LIST_HEAD(list);
- }
- }
-
- static inline void list_splice_tail_init(struct list_head *list,
- struct list_head *head)
- {
- if (!list_empty(list)) {
- __list_splice(list, head->prev, head);
- INIT_LIST_HEAD(list);
- }
- }
list_splice_init 除了完成list_splice的功能,还把变空了的list链表头重新初始化。
list_splice_tail_init 除了完成list_splice_tail的功能,还吧变空了得list链表头重新初始化。
list操作的API大致如以上所列,包括链表节点添加与删除、节点从一个链表转移到另一个链表、链表中一个节点被替换为另一个节点、链表的合并与拆分、查看链表当前是否为空或者只有一个节点。接下来,是操作链表遍历时的一些宏,我们也简单介绍一下。
- #define list_entry(ptr, type, member) \
- container_of(ptr, type, member)
list_entry主要用于从list节点查找其内嵌在的结构。比如定义一个结构struct A{ struct list_head list; }; 如果知道结构中链表的地址ptrList,就可以从ptrList进而获取整个结构的地址(即整个结构的指针) struct A *ptrA = list_entry(ptrList, struct A, list);
这种地址翻译的技巧是linux的拿手好戏,container_of随处可见,只是链表节点多被封装在更复杂的结构中,使用专门的list_entry定义也是为了使用方便。
- #define list_first_entry(ptr, type, member) \
- list_entry((ptr)->next, type, member)
list_first_entry是将ptr看完一个链表的链表头,取出其中第一个节点对应的结构地址。使用list_first_entry是应保证链表中至少有一个节点。
- #define list_for_each(pos, head) \
- for (pos = (head)->next; prefetch(pos->next), pos != (head); \
- pos = pos->next)
list_for_each循环遍历链表中的每个节点,从链表头部的第一个节点,一直到链表尾部。中间的prefetch是为了利用平台特性加速链表遍历,在某些平台下定义为空,可以忽略。
- #define __list_for_each(pos, head) \
- for (pos = (head)->next; pos != (head); pos = pos->next)
__list_for_each与list_for_each没什么不同,只是少了prefetch的内容,实现上更为简单易懂。
- #define list_for_each_prev(pos, head) \
- for (pos = (head)->prev; prefetch(pos->prev), pos != (head); \
- pos = pos->prev)
list_for_each_prev与list_for_each的遍历顺序相反,从链表尾逆向遍历到链表头。
- #define list_for_each_safe(pos, n, head) \
- for (pos = (head)->next, n = pos->next; pos != (head); \
- pos = n, n = pos->next)
list_for_each_safe 也是链表顺序遍历,只是更加安全。即使在遍历过程中,当前节点从链表中删除,也不会影响链表的遍历。参数上需要加一个暂存的链表节点指针n。
- #define list_for_each_prev_safe(pos, n, head) \
- for (pos = (head)->prev, n = pos->prev; \
- prefetch(pos->prev), pos != (head); \
- pos = n, n = pos->prev)
list_for_each_prev_safe 与list_for_each_prev同样是链表逆序遍历,只是加了链表节点删除保护。
- #define list_for_each_entry(pos, head, member) \
- for (pos = list_entry((head)->next, typeof(*pos), member); \
- prefetch(pos->member.next), &pos->member != (head); \
- pos = list_entry(pos->member.next, typeof(*pos), member))
list_for_each_entry不是遍历链表节点,而是遍历链表节点所嵌套进的结构。这个实现上较为复杂,但可以等价于list_for_each加上list_entry的组合。
- #define list_for_each_entry_reverse(pos, head, member) \
- for (pos = list_entry((head)->prev, typeof(*pos), member); \
- prefetch(pos->member.prev), &pos->member != (head); \
- pos = list_entry(pos->member.prev, typeof(*pos), member))
list_for_each_entry_reverse 是逆序遍历链表节点所嵌套进的结构,等价于list_for_each_prev加上list_etnry的组合。
- #define list_for_each_entry_continue(pos, head, member) \
- for (pos = list_entry(pos->member.next, typeof(*pos), member); \
- prefetch(pos->member.next), &pos->member != (head); \
- pos = list_entry(pos->member.next, typeof(*pos), member))
list_for_each_entry_continue也是遍历链表上的节点嵌套的结构。只是并非从链表头开始,而是从结构指针的下一个结构开始,一直到链表尾部。
- #define list_for_each_entry_continue_reverse(pos, head, member) \
- for (pos = list_entry(pos->member.prev, typeof(*pos), member); \
- prefetch(pos->member.prev), &pos->member != (head); \
- pos = list_entry(pos->member.prev, typeof(*pos), member))
list_for_each_entry_continue_reverse 是逆序遍历链表上的节点嵌套的结构。只是并非从链表尾开始,而是从结构指针的前一个结构开始,一直到链表头部。
- #define list_for_each_entry_from(pos, head, member) \
- for (; prefetch(pos->member.next), &pos->member != (head); \
- pos = list_entry(pos->member.next, typeof(*pos), member))
list_for_each_entry_from 是从当前结构指针pos开始,顺序遍历链表上的结构指针。
- #define list_for_each_entry_safe(pos, n, head, member) \
- for (pos = list_entry((head)->next, typeof(*pos), member), \
- n = list_entry(pos->member.next, typeof(*pos), member); \
- &pos->member != (head); \
- pos = n, n = list_entry(n->member.next, typeof(*n), member))
list_for_each_entry_safe 也是顺序遍历链表上节点嵌套的结构。只是加了删除节点的保护。
- #define list_for_each_entry_safe_continue(pos, n, head, member) \
- for (pos = list_entry(pos->member.next, typeof(*pos), member), \
- n = list_entry(pos->member.next, typeof(*pos), member); \
- &pos->member != (head); \
- pos = n, n = list_entry(n->member.next, typeof(*n), member))
list_for_each_entry_safe_continue 是从pos的下一个结构指针开始,顺序遍历链表上的结构指针,同时加了节点删除保护。
- #define list_for_each_entry_safe_from(pos, n, head, member) \
- for (n = list_entry(pos->member.next, typeof(*pos), member); \
- &pos->member != (head); \
- pos = n, n = list_entry(n->member.next, typeof(*n), member))
list_for_each_entry_safe_from 是从pos开始,顺序遍历链表上的结构指针,同时加了节点删除保护。
- #define list_for_each_entry_safe_reverse(pos, n, head, member) \
- for (pos = list_entry((head)->prev, typeof(*pos), member), \
- n = list_entry(pos->member.prev, typeof(*pos), member); \
- &pos->member != (head); \
- pos = n, n = list_entry(n->member.prev, typeof(*n), member))
list_for_each_entry_safe_reverse 是从pos的前一个结构指针开始,逆序遍历链表上的结构指针,同时加了节点删除保护。
至此为止,我们介绍了linux中双向循环链表的结构、所有的操作函数和遍历宏定义。相信以后在linux代码中遇到链表的使用,不会再陌生。
在任何处理器平台下,都会有一些原子性操作,供操作系统使用,我们这里只讲x86下面的。在单处理器情况下,每条指令的执行都是原子性的,但在多处理器情况下,只有那些单独的读操作或写操作才是原子性的。为了弥补这一缺点,x86提供了附加的lock前缀,使带lock前缀的读修改写指令也能原子性执行。带lock前缀的指令在操作时会锁住总线,使自身的执行即使在多处理器间也是原子性执行的。xchg指令不带lock前缀也是原子性执行,也就是说xchg执行时默认会锁内存总线。原子性操作是线程间同步的基础,linux专门定义了一种只进行原子操作的类型atomic_t,并提供相关的原子读写调用API。本节就来分析这些原子操作在x86下的实现。- typedef struct {
- volatile int counter;
- } atomic_t;
原子类型其实是int类型,只是禁止寄存器对其暂存。
- #define ATOMIC_INIT(i) { (i) }
原子类型的初始化。32位x86平台下atomic API在arch/x86/include/asm/atomic_32.h中实现。
- static inline int atomic_read(const atomic_t *v)
- {
- return v->counter;
- }
- static inline void atomic_set(atomic_t *v, int i)
- {
- v->counter = i;
- }
单独的读操作或者写操作,在x86下都是原子性的。
- static inline void atomic_add(int i, atomic_t *v)
- {
- asm volatile(LOCK_PREFIX “addl %1,%0”
- : “+m” (v->counter)
- : “ir” (i));
- }
- static inline void atomic_sub(int i, atomic_t *v)
- {
- asm volatile(LOCK_PREFIX “subl %1,%0”
- : “+m” (v->counter)
- : “ir” (i));
- }
atomic_add和atomic_sub属于读修改写操作,实现时需要加lock前缀。
- static inline int atomic_sub_and_test(int i, atomic_t *v)
- {
- unsigned char c;
- asm volatile(LOCK_PREFIX “subl %2,%0; sete %1”
- : “+m” (v->counter), “=qm” (c)
- : “ir” (i) : “memory”);
- return c;
- }
atomic_sub_and_test执行完减操作后检查结果是否为0。
- static inline void atomic_inc(atomic_t *v)
- {
- asm volatile(LOCK_PREFIX “incl %0”
- : “+m” (v->counter));
- }
- static inline void atomic_dec(atomic_t *v)
- {
- asm volatile(LOCK_PREFIX “decl %0”
- : “+m” (v->counter));
- }
atomic_inc和atomic_dec是递增递减操作。
- static inline int atomic_dec_and_test(atomic_t *v)
- {
- unsigned char c;
- asm volatile(LOCK_PREFIX “decl %0; sete %1”
- : “+m” (v->counter), “=qm” (c)
- : : “memory”);
- return c != 0;
- }
atomic_dec_and_test在递减后检查结果是否为0。
- static inline int atomic_inc_and_test(atomic_t *v)
- {
- unsigned char c;
- asm volatile(LOCK_PREFIX “incl %0; sete %1”
- : “+m” (v->counter), “=qm” (c)
- : : “memory”);
- return c != 0;
- }
atomic_inc_and_test在递增后检查结果是否为0。
- static inline int atomic_add_negative(int i, atomic_t *v)
- {
- unsigned char c;
- asm volatile(LOCK_PREFIX “addl %2,%0; sets %1”
- : “+m” (v->counter), “=qm” (c)
- : “ir” (i) : “memory”);
- return c;
- }
atomic_add_negative在加操作后检查结果是否为负数。
- static inline int atomic_add_return(int i, atomic_t *v)
- {
- int __i;
- #ifdef CONFIG_M386
- unsigned long flags;
- if (unlikely(boot_cpu_data.x86 <= 3))
- goto no_xadd;
- #endif
- /* Modern 486+ processor */
- __i = i;
- asm volatile(LOCK_PREFIX “xaddl %0, %1”
- : “+r” (i), “+m” (v->counter)
- : : “memory”);
- return i + __i;
- #ifdef CONFIG_M386
- no_xadd: /* Legacy 386 processor */
- local_irq_save(flags);
- __i = atomic_read(v);
- atomic_set(v, i + __i);
- local_irq_restore(flags);
- return i + __i;
- #endif
- }
atomic_add_return 不仅执行加操作,而且把相加的结果返回。它是通过xadd这一指令实现的。
- static inline int atomic_sub_return(int i, atomic_t *v)
- {
- return atomic_add_return(-i, v);
- }
atomic_sub_return 不仅执行减操作,而且把相减的结果返回。它是通过atomic_add_return实现的。
- static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
- {
- return cmpxchg(&v->counter, old, new);
- }
- #define cmpxchg(ptr, o, n) \
- ((__typeof__(*(ptr)))__cmpxchg((ptr), (unsigned long)(o), \
- (unsigned long)(n), \
- sizeof(*(ptr))))
- static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
- unsigned long new, int size)
- {
- unsigned long prev;
- switch (size) {
- case 1:
- asm volatile(LOCK_PREFIX “cmpxchgb %b1,%2”
- : “=a”(prev)
- : “q”(new), “m”(*__xg(ptr)), “0”(old)
- : “memory”);
- return prev;
- case 2:
- asm volatile(LOCK_PREFIX “cmpxchgw %w1,%2”
- : “=a”(prev)
- : “r”(new), “m”(*__xg(ptr)), “0”(old)
- : “memory”);
- return prev;
- case 4:
- asm volatile(LOCK_PREFIX “cmpxchgl %k1,%2”
- : “=a”(prev)
- : “r”(new), “m”(*__xg(ptr)), “0”(old)
- : “memory”);
- return prev;
- case 8:
- asm volatile(LOCK_PREFIX “cmpxchgq %1,%2”
- : “=a”(prev)
- : “r”(new), “m”(*__xg(ptr)), “0”(old)
- : “memory”);
- return prev;
- }
- return old;
- }
atomic_cmpxchg是由cmpxchg指令完成的。它把旧值同atomic_t类型的值相比较,如果相同,就把新值存入atomic_t类型的值中,返回atomic_t类型变量中原有的值。
- static inline int atomic_xchg(atomic_t *v, int new)
- {
- return xchg(&v->counter, new);
- }
- #define xchg(ptr, v) \
- ((__typeof__(*(ptr)))__xchg((unsigned long)(v), (ptr), sizeof(*(ptr))))
- static inline unsigned long __xchg(unsigned long x, volatile void *ptr,
- int size)
- {
- switch (size) {
- case 1:
- asm volatile(“xchgb %b0,%1”
- : “=q” (x)
- : “m” (*__xg(ptr)), “0” (x)
- : “memory”);
- break;
- case 2:
- asm volatile(“xchgw %w0,%1”
- : “=r” (x)
- : “m” (*__xg(ptr)), “0” (x)
- : “memory”);
- break;
- case 4:
- asm volatile(“xchgl %k0,%1”
- : “=r” (x)
- : “m” (*__xg(ptr)), “0” (x)
- : “memory”);
- break;
- case 8:
- asm volatile(“xchgq %0,%1”
- : “=r” (x)
- : “m” (*__xg(ptr)), “0” (x)
- : “memory”);
- break;
- }
- return x;
- }
atomic_xchg则是将新值存入atomic_t类型的变量,并将变量的旧值返回。它使用xchg指令实现。
- /**
- * atomic_add_unless – add unless the number is already a given value
- * @v: pointer of type atomic_t
- * @a: the amount to add to v…
- * @u: …unless v is equal to u.
- *
- * Atomically adds @a to @v, so long as @v was not already @u.
- * Returns non-zero if @v was not @u, and zero otherwise.
- */
- static inline int atomic_add_unless(atomic_t *v, int a, int u)
- {
- int c, old;
- c = atomic_read(v);
- for (;;) {
- if (unlikely(c == (u)))
- break;
- old = atomic_cmpxchg((v), c, c + (a));
- if (likely(old == c))
- break;
- c = old;
- }
- return c != (u);
- }
atomic_add_unless的功能比较特殊。它检查v是否等于u,如果不是则把v的值加上a,返回值表示相加前v是否等于u。因为在atomic_read和atomic_cmpxchg中间可能有其它的写操作,所以要循环检查自己的值是否被写进去。
- #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
- #define atomic_inc_return(v) (atomic_add_return(1, v))
- #define atomic_dec_return(v) (atomic_sub_return(1, v))
atomic_inc_not_zero在v值不是0时加1。
atomic_inc_return对v值加1,并返回相加结果。
atomic_dec_return对v值减1,并返回相减结果。
- #define atomic_clear_mask(mask, addr) \
- asm volatile(LOCK_PREFIX “andl %0,%1” \
- : : “r” (~(mask)), “m” (*(addr)) : “memory”)
atomic_clear_mask清除变量某些位。
- #define atomic_set_mask(mask, addr) \
- asm volatile(LOCK_PREFIX “orl %0,%1” \
- : : “r” (mask), “m” (*(addr)) : “memory”)
atomic_set_mask将变量的某些位置位。
- /* Atomic operations are already serializing on x86 */
- #define smp_mb__before_atomic_dec() barrier()
- #define smp_mb__after_atomic_dec() barrier()
- #define smp_mb__before_atomic_inc() barrier()
- #define smp_mb__after_atomic_inc() barrier()
因为x86的atomic操作大多使用原子指令或者带lock前缀的指令。带lock前缀的指令执行前会完成之前的读写操作,对于原子操作来说不会受之前对同一位置的读写操作,所以这里只是用空操作barrier()代替。barrier()的作用相当于告诉编译器这里有一个内存屏障,放弃在寄存器中的暂存值,重新从内存中读入。
本节的atomic_t类型操作是最基础的,为了介绍下面的内容,必须先介绍它。如果可以使用atomic_t类型代替临界区操作,也可以加快不少速度。
kref是一个引用计数器,它被嵌套进其它的结构中,记录所嵌套结构的引用计数,并在计数清零时调用相应的清理函数。kref的原理和实现都非常简单,但要想用好却不容易,或者说kref被创建就是为了跟踪复杂情况下地结构引用销毁情况。所以这里先介绍kref的实现,再介绍其使用规则。kref的头文件在include/linux/kref.h,实现在lib/kref.c。闲话少说,上代码。
- struct kref {
- atomic_t refcount;
- };
- void kref_set(struct kref *kref, int num)
- {
- atomic_set(&kref->refcount, num);
- smp_mb();
- }
- void kref_init(struct kref *kref)
- {
- kref_set(kref, 1);
- }
kref_init 初始化kref的计数值为1。
- void kref_get(struct kref *kref)
- {
- WARN_ON(!atomic_read(&kref->refcount));
- atomic_inc(&kref->refcount);
- smp_mb__after_atomic_inc();
- }
kref_get递增kref的计数值。
- int kref_put(struct kref *kref, void (*release)(struct kref *kref))
- {
- WARN_ON(release == NULL);
- WARN_ON(release == (void (*)(struct kref *))kfree);
- if (atomic_dec_and_test(&kref->refcount)) {
- release(kref);
- return 1;
- }
- return 0;
- }
kref_put递减kref的计数值,如果计数值减为0,说明kref所指向的结构生命周期结束,会执行release释放函数。
所以说kref的API很简单,kref_init和kref_set基本都是初始时才会用到,平时常用的就是kref_get和kref_put。一旦在kref_put时计数值清零,立即调用结束函数。
kref设计得如此简单,是为了能灵活地用在各种结构的生命周期管理中。要用好它可不简单,好在Documentation/kref.txt中为我们总结了一些使用规则,下面简单翻译一下。
对于那些用在多种场合,被到处传递的结构,如果没有引用计数,bug几乎总是肯定的事。所以我们需要kref。kref允许我们在已有的结构中方便地添加引用计数。
你可以以如下方式添加kref到你的数据结构中:
- struct my_data {
- …
- struct kref refcount;
- …
- };
kref可以出现在你结构中的任意位置。
在分配kref后你必须初始化它,可以调用kref_init,把kref计数值初始为1。
- struct my_data *data;
- data = kmalloc(sizeof(*data), GFP_KERNEL);
- if(!data)
- return -ENOMEM;
- kref_init(&data->refcount);
初始化之后,kref的使用应该遵循以下三条规则:
1) 如果你制造了一个结构指针的非暂时性副本,特别是当这个副本指针会被传递到其它执行线程时,你必须在传递副本指针之前执行kref_get:
- kref_put(&data->refcount);
2)当你使用完,不再需要结构的指针,必须执行kref_put。如果这是结构指针的最后一个引用,release函数会被调用。如果代码绝不会在没有拥有引用计数的请求下去调用kref_get,在kref_put时就不需要加锁。
- kref_put(&data->refcount, data_release);
3)如果代码试图在还没拥有引用计数的情况下就调用kref_get,就必须串行化kref_put和kref_get的执行。因为很可能在kref_get执行之前或者执行中,kref_put就被调用并把整个结构释放掉了。
例如,你分配了一些数据并把它传递到其它线程去处理:
- void data_release(struct kref *kref)
- {
- struct my_data *data = container_of(kref, struct my_data, refcount);
- kree(data);
- }
- void more_data_handling(void *cb_data)
- {
- struct my_data *data = cb_data;
- .
- . do stuff with data here
- .
- kref_put(&data->refcount, data_release);
- }
- int my_data_handler(void)
- {
- int rv = 0;
- struct my_data *data;
- struct task_struct *task;
- data = kmalloc(sizeof(*data), GFP_KERNEL);
- if (!data)
- return -ENOMEM;
- kref_init(&data->refcount);
- kref_get(&data->refcount);
- task = kthread_run(more_data_handling, data, “more_data_handling”);
- if (task == ERR_PTR(-ENOMEM)){
- rv = -ENOMEM;
- goto out;
- }
- .
- . do stuff with data here
- .
- out:
- kref_put(&data->refcount, data_release);
- return rv;
- }
这样做,无论两个线程的执行顺序是怎样的都无所谓,kref_put知道何时数据不再有引用计数,可以被销毁。kref_get()调用不需要加锁,因为在my_data_handler中调用kref_get时已经拥有一个引用。同样地原因,kref_put也不需要加锁。
要注意规则一中的要求,必须在传递指针之前调用kref_get。决不能写下面的代码:
- task = kthread_run(more_data_handling, data, “more_data_handling”);
- if(task == ERR_PTR(-ENOMEM)) {
- rv = -ENOMEM;
- goto out;
- }
- else {
- /* BAD BAD BAD – get is after the handoff */
- kref_get(&data->refcount);
不要认为自己在使用上面的代码时知道自己在做什么。首先,你可能并不知道你在做什么。其次,你可能知道你在做什么(在部分加锁情况下上面的代码也是正确的),但一些修改或者复制你代码的人并不知道你在做什么。这是一种坏的使用方式。
当然在部分情况下也可以优化对get和put的使用。例如,你已经完成了对这个数据的处理,并要把它传递给其它线程,就不需要再做多余的get和put了。
- /* Silly extra get and put */
- kref_get(&obj->ref);
- enqueue(obj);
- kref_put(&obj->ref, obj_cleanup);
只需要做enqueue操作即可,可以在其后加一条注释。
- enqueue(obj);
- /* We are done with obj , so we pass our refcount off to the queue. DON’T TOUCH obj AFTER HERE! */
第三条规则是处理起来最麻烦的。例如,你有一列数据,每条数据都有kref计数,你希望获取第一条数据。但你不能简单地把第一条数据从链表中取出并调用kref_get。这违背了第三条,在调用kref_get前你并没有一个引用。你需要增加一个mutex(或者其它锁)。
- static DEFINE_MUTEX(mutex);
- static LIST_HEAD(q);
- struct my_data
- {
- struct kref refcount;
- struct list_head link;
- };
- static struct my_data *get_entry()
- {
- struct my_data *entry = NULL;
- mutex_lock(&mutex);
- if(!list_empty(&q)){
- entry = container_of(q.next, struct my_q_entry, link);
- kref_get(&entry->refcount);
- }
- mutex_unlock(&mutex);
- return entry;
- }
- static void release_entry(struct kref *ref)
- {
- struct my_data *entry = container_of(ref, struct my_data, refcount);
- list_del(&entry->link);
- kfree(entry);
- }
- static void put_entry(struct my_data *entry)
- {
- mutex_lock(&mutex);
- kref_put(&entry->refcount, release_entry);
- mutex_unlock(&mutex);
- }
如果你不想在整个释放过程中都加锁,kref_put的返回值就有用了。例如你不想在加锁情况下调用kfree,你可以如下使用kref_put。
- static void release_entry(struct kref *ref)
- {
- }
- static void put_entry(struct my_data *entry)
- {
- mutex_lock(&mutex);
- if(kref_put(&entry->refcount, release_entry)){
- list_del(&entry->link);
- mutex_unlock(&mutex);
- kfree(entry);
- }
- else
- mutex_unlock(&mutex);
- }
如果你在撤销结构的过程中需要调用其它的需要较长时间的函数,或者函数也可能要获取同样地互斥锁,这样做就很有用了。但要注意在release函数中做完撤销工作会使代码看起来更整洁。
前面我们说到过list_head,这是linux中通用的链表形式,双向循环链表,功能强大,实现简单优雅。可如果您认为list_head就是链表的极致,应该在linux链表界一统天下,那可就错了。据我所知,linux内核代码中至少还有两种链表能占有一席之地。一种就是hlist,一种就是本节要介绍的klist。虽然三者不同,但hlist和klist都可以看成是从list_head中发展出来的,用于特殊的链表使用情景。hlist是用于哈希表中。众所周知,哈希表主要就是一个哈希数组,为了解决映射冲突的问题,常常把哈希数组的每一项做成一个链表,这样有多少重复的都可以链进去。但哈希数组的项很多,list_head的话每个链表头都需要两个指针的空间,在稀疏的哈希表中实在是一种浪费,于是就发明了hlist。hlist有两大特点,一是它的链表头只需要一个指针,二是它的每一项都可以找到自己的前一节点,也就是说它不再循环,但仍是双向。令人不解的是,hlist的实现太绕了,比如它明明可以直接指向前一节点,却偏偏指向指针地址,还是前一节点中指向后一节点的指针地址。即使这种设计在实现时占便宜,但它理解上带来的不便已经远远超过实现上带来的小小便利。同hlist一样,klist也是为了适应某类特殊情形的要求。考虑一个被简化的情形,假设一些设备被链接在设备链表中,一个线程命令卸载某设备,即将其从设备链表中删除,但这时该设备正在使用中,这时就出现了冲突。当前可以设置临界区并加锁,但因为使用一个设备而锁住整个设备链表显然是不对的;又或者可以从设备本身做文章,让线程阻塞,这当然也可以。但我们上节了解了kref,就该知道linux对待这种情况的风格,给它一个引用计数kref,等计数为零就删除。klist就是这么干的,它把kref直接保存在了链表节点上。之前说到有线程要求删除设备,之前的使用仍存在,所以不能实际删除,但不应该有新的应用访问到该设备。klist就提供了一种让节点在链表上隐身的方法。下面还是来看实际代码吧。
klist的头文件是include/linux/klist.h,实现在lib/klist.c。
- struct klist_node;
- struct klist {
- spinlock_t k_lock;
- struct list_head k_list;
- void (*get)(struct klist_node *);
- void (*put)(struct klist_node *);
- } __attribute__ ((aligned (4)));
-
- #define KLIST_INIT(_name, _get, _put) \
- { .k_lock = __SPIN_LOCK_UNLOCKED(_name.k_lock), \
- .k_list = LIST_HEAD_INIT(_name.k_list), \
- .get = _get, \
- .put = _put, }
-
- #define DEFINE_KLIST(_name, _get, _put) \
- struct klist _name = KLIST_INIT(_name, _get, _put)
-
- extern void klist_init(struct klist *k, void (*get)(struct klist_node *),
- void (*put)(struct klist_node *));
-
- struct klist_node {
- void *n_klist; /* never access directly */
- struct list_head n_node;
- struct kref n_ref;
- };
可以看到,klist的链表头是struct klist结构,链表节点是struct klist_node结构。先看struct klist,除了包含链表需要的k_list,还有用于加锁的k_lock。剩余的get()和put()函数是用于struct klist_node嵌入在更大的结构中,这样在节点初始时调用get(),在节点删除时调用put(),以表示链表中存在对结构的引用。再看struct klist_node,除了链表需要的n_node,还有一个引用计数n_ref。还有一个比较特殊的指针n_klist,n_klist是指向链表头struct klist的,但它的第0位用来表示是否该节点已被请求删除,如果已被请求删除则在链表循环时是看不到这一节点的,循环函数将其略过。现在你明白为什么非要在struct klist的定义后加上__attribute__((aligned(4)))。不过说实话这样在x86下仍然不太保险,但linux选择了相信gcc,毕竟是多年的战友和兄弟了,相互知根知底。
看过这两个结构,想必大家已经较为清楚了,下面就来看看它们的实现。
- /*
- * Use the lowest bit of n_klist to mark deleted nodes and exclude
- * dead ones from iteration.
- */
- #define KNODE_DEAD 1LU
- #define KNODE_KLIST_MASK ~KNODE_DEAD
-
- static struct klist *knode_klist(struct klist_node *knode)
- {
- return (struct klist *)
- ((unsigned long)knode->n_klist & KNODE_KLIST_MASK);
- }
-
- static bool knode_dead(struct klist_node *knode)
- {
- return (unsigned long)knode->n_klist & KNODE_DEAD;
- }
-
- static void knode_set_klist(struct klist_node *knode, struct klist *klist)
- {
- knode->n_klist = klist;
- /* no knode deserves to start its life dead */
- WARN_ON(knode_dead(knode));
- }
-
- static void knode_kill(struct klist_node *knode)
- {
- /* and no knode should die twice ever either, see we’re very humane */
- WARN_ON(knode_dead(knode));
- *(unsigned long *)&knode->n_klist |= KNODE_DEAD;
- }
前面的四个函数都是内部静态函数,帮助API实现的。knode_klist()是从节点找到链表头。knode_dead()是检查该节点是否已被请求删除。
knode_set_klist设置节点的链表头。knode_kill将该节点请求删除。细心的话大家会发现这四个函数是对称的,而且都是操作节点的内部函数。
- void klist_init(struct klist *k, void (*get)(struct klist_node *),
- void (*put)(struct klist_node *))
- {
- INIT_LIST_HEAD(&k->k_list);
- spin_lock_init(&k->k_lock);
- k->get = get;
- k->put = put;
- }
klist_init,初始化klist。
- static void add_head(struct klist *k, struct klist_node *n)
- {
- spin_lock(&k->k_lock);
- list_add(&n->n_node, &k->k_list);
- spin_unlock(&k->k_lock);
- }
-
- static void add_tail(struct klist *k, struct klist_node *n)
- {
- spin_lock(&k->k_lock);
- list_add_tail(&n->n_node, &k->k_list);
- spin_unlock(&k->k_lock);
- }
-
- static void klist_node_init(struct klist *k, struct klist_node *n)
- {
- INIT_LIST_HEAD(&n->n_node);
- kref_init(&n->n_ref);
- knode_set_klist(n, k);
- if (k->get)
- k->get(n);
- }
又是三个内部函数,add_head()将节点加入链表头,add_tail()将节点加入链表尾,klist_node_init()是初始化节点。注意在节点的引用计数初始化时,因为引用计数变为1,所以也要调用相应的get()函数。
- void klist_add_head(struct klist_node *n, struct klist *k)
- {
- klist_node_init(k, n);
- add_head(k, n);
- }
-
- void klist_add_tail(struct klist_node *n, struct klist *k)
- {
- klist_node_init(k, n);
- add_tail(k, n);
- }
klist_add_head()将节点初始化,并加入链表头。
klist_add_tail()将节点初始化,并加入链表尾。
它们正是用上面的三个内部函数实现的,可见linux内核中对函数复用有很强的执念,其实这里add_tail和add_head是不用的,纵观整个文件,也只有klist_add_head()和klist_add_tail()对它们进行了调用。
- void klist_add_after(struct klist_node *n, struct klist_node *pos)
- {
- struct klist *k = knode_klist(pos);
-
- klist_node_init(k, n);
- spin_lock(&k->k_lock);
- list_add(&n->n_node, &pos->n_node);
- spin_unlock(&k->k_lock);
- }
-
- void klist_add_before(struct klist_node *n, struct klist_node *pos)
- {
- struct klist *k = knode_klist(pos);
-
- klist_node_init(k, n);
- spin_lock(&k->k_lock);
- list_add_tail(&n->n_node, &pos->n_node);
- spin_unlock(&k->k_lock);
- }
klist_add_after()将节点加到指定节点后面。
klist_add_before()将节点加到指定节点前面。
这两个函数都是对外提供的API。在list_head中都没有看到有这种API,所以说需求决定了接口。虽说只有一步之遥,klist也不愿让外界介入它的内部实现。
之前出现的API都太常见了,既没有使用引用计数,又没有跳过请求删除的节点。所以klist的亮点在下面,klist链表的遍历。
- struct klist_iter {
- struct klist *i_klist;
- struct klist_node *i_cur;
- };
-
-
- extern void klist_iter_init(struct klist *k, struct klist_iter *i);
- extern void klist_iter_init_node(struct klist *k, struct klist_iter *i,
- struct klist_node *n);
- extern void klist_iter_exit(struct klist_iter *i);
- extern struct klist_node *klist_next(struct klist_iter *i);
以上就是链表遍历需要的辅助结构struct klist_iter,和遍历用到的四个函数。
- struct klist_waiter {
- struct list_head list;
- struct klist_node *node;
- struct task_struct *process;
- int woken;
- };
-
- static DEFINE_SPINLOCK(klist_remove_lock);
- static LIST_HEAD(klist_remove_waiters);
-
- static void klist_release(struct kref *kref)
- {
- struct klist_waiter *waiter, *tmp;
- struct klist_node *n = container_of(kref, struct klist_node, n_ref);
-
- WARN_ON(!knode_dead(n));
- list_del(&n->n_node);
- spin_lock(&klist_remove_lock);
- list_for_each_entry_safe(waiter, tmp, &klist_remove_waiters, list) {
- if (waiter->node != n)
- continue;
-
- waiter->woken = 1;
- mb();
- wake_up_process(waiter->process);
- list_del(&waiter->list);
- }
- spin_unlock(&klist_remove_lock);
- knode_set_klist(n, NULL);
- }
-
- static int klist_dec_and_del(struct klist_node *n)
- {
- return kref_put(&n->n_ref, klist_release);
- }
-
- static void klist_put(struct klist_node *n, bool kill)
- {
- struct klist *k = knode_klist(n);
- void (*put)(struct klist_node *) = k->put;
-
- spin_lock(&k->k_lock);
- if (kill)
- knode_kill(n);
- if (!klist_dec_and_del(n))
- put = NULL;
- spin_unlock(&k->k_lock);
- if (put)
- put(n);
- }
-
- /**
- * klist_del – Decrement the reference count of node and try to remove.
- * @n: node we’re deleting.
- */
- void klist_del(struct klist_node *n)
- {
- klist_put(n, true);
- }
以上的内容乍一看很难理解,其实都是klist实现必须的。因为使用kref动态删除,自然需要一个计数降为零时调用的函数klist_release。
klist_dec_and_del()就是对kref_put()的包装,起到减少节点引用计数的功能。
至于为什么会出现一个新的结构struct klist_waiter,也很简单。之前说有线程申请删除某节点,但节点的引用计数仍在,所以只能把请求删除的线程阻塞,就是用struct klist_waiter阻塞在klist_remove_waiters上。所以在klist_release()调用时还要将阻塞的线程唤醒。knode_kill()将节点设为已请求删除。而且还会调用put()函数。
释放引用计数是调用klist_del(),它通过内部函数klist_put()完成所需操作:用knode_kill()设置节点为已请求删除,用klist_dec_and_del()释放引用,调用可能的put()函数。
- /**
- * klist_remove – Decrement the refcount of node and wait for it to go away.
- * @n: node we’re removing.
- */
- void klist_remove(struct klist_node *n)
- {
- struct klist_waiter waiter;
-
- waiter.node = n;
- waiter.process = current;
- waiter.woken = 0;
- spin_lock(&klist_remove_lock);
- list_add(&waiter.list, &klist_remove_waiters);
- spin_unlock(&klist_remove_lock);
-
- klist_del(n);
-
- for (;;) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- if (waiter.woken)
- break;
- schedule();
- }
- __set_current_state(TASK_RUNNING);
- }
klist_remove()不但会调用klist_del()减少引用计数,还会一直阻塞到节点被删除。这个函数才是请求删除节点的线程应该调用的。
- int klist_node_attached(struct klist_node *n)
- {
- return (n->n_klist != NULL);
- }
klist_node_attached()检查节点是否被包含在某链表中。
以上是klist的链表初始化,节点加入,节点删除函数。下面是klist链表遍历函数。
- struct klist_iter {
- struct klist *i_klist;
- struct klist_node *i_cur;
- };
-
-
- extern void klist_iter_init(struct klist *k, struct klist_iter *i);
- extern void klist_iter_init_node(struct klist *k, struct klist_iter *i,
- struct klist_node *n);
- extern void klist_iter_exit(struct klist_iter *i);
- extern struct klist_node *klist_next(struct klist_iter *i);
klist的遍历有些复杂,因为它考虑到了在遍历过程中节点删除的情况,而且还要忽略那些已被删除的节点。宏实现已经无法满足要求,迫不得已,只能用函数实现,并用struct klist_iter记录中间状态。
- void klist_iter_init_node(struct klist *k, struct klist_iter *i,
- struct klist_node *n)
- {
- i->i_klist = k;
- i->i_cur = n;
- if (n)
- kref_get(&n->n_ref);
- }
-
- void klist_iter_init(struct klist *k, struct klist_iter *i)
- {
- klist_iter_init_node(k, i, NULL);
- }
klist_iter_init_node()是从klist中的某个节点开始遍历,而klist_iter_init()是从链表头开始遍历的。
但你又要注意,klist_iter_init()和klist_iter_init_node()的用法又不同。klist_iter_init_node()可以在其后直接对当前节点进行访问,也可以调用klist_next()访问下一节点。而klist_iter_init()只能调用klist_next()访问下一节点。或许klist_iter_init_node()的本意不是从当前节点开始,而是从当前节点的下一节点开始。
- static struct klist_node *to_klist_node(struct list_head *n)
- {
- return container_of(n, struct klist_node, n_node);
- }
- struct klist_node *klist_next(struct klist_iter *i)
- {
- void (*put)(struct klist_node *) = i->i_klist->put;
- struct klist_node *last = i->i_cur;
- struct klist_node *next;
-
- spin_lock(&i->i_klist->k_lock);
-
- if (last) {
- next = to_klist_node(last->n_node.next);
- if (!klist_dec_and_del(last))
- put = NULL;
- } else
- next = to_klist_node(i->i_klist->k_list.next);
-
- i->i_cur = NULL;
- while (next != to_klist_node(&i->i_klist->k_list)) {
- if (likely(!knode_dead(next))) {
- kref_get(&next->n_ref);
- i->i_cur = next;
- break;
- }
- next = to_klist_node(next->n_node.next);
- }
-
- spin_unlock(&i->i_klist->k_lock);
-
- if (put && last)
- put(last);
- return i->i_cur;
- }
klist_next()是将循环进行到下一节点。实现中需要注意两点问题:1、加锁,根据经验,单纯对某个节点操作不需要加锁,但对影响整个链表的操作需要加自旋锁。比如之前klist_iter_init_node()中对节点增加引用计数,就不需要加锁,因为只有已经拥有节点引用计数的线程才会特别地从那个节点开始。而之后klist_next()中则需要加锁,因为当前线程很可能没有引用计数,所以需要加锁,让情况固定下来。这既是保护链表,也是保护节点有效。符合kref引用计数的使用原则。2、要注意,虽然在节点切换的过程中是加锁的,但切换完访问当前节点时是解锁的,中间可能有节点被删除(这个通过spin_lock就可以搞定),也可能有节点被请求删除,这就需要注意。首先要忽略链表中已被请求删除的节点,然后在减少前一个节点引用计数时,可能就把前一个节点删除了。这里之所以不调用klist_put(),是因为本身已处于加锁状态,但仍要有它的实现。这里的实现和klist_put()中类似,代码不介意在加锁状态下唤醒另一个线程,但却不希望在加锁状态下调用put()函数,那可能会涉及释放另一个更大的结构。
- void klist_iter_exit(struct klist_iter *i)
- {
- if (i->i_cur) {
- klist_put(i->i_cur, false);
- i->i_cur = NULL;
- }
- }
klist_iter_exit(),遍历结束函数。在遍历完成时调不调无所谓,但如果想中途结束,就一定要调用klist_iter_exit()。
klist主要用于设备驱动模型中,为了适应那些动态变化的设备和驱动,而专门设计的链表。klist并不通用,但它真的很新奇。 我看到它时,震惊于链表竟然可以专门异化成这种样子。如果你是松耦合的结构,如果你手下净是些桀骜不驯的家伙,那么不要只考虑kref,你可能还需要klist。
kobject的头文件在include/linux/kobject.h,实现在lib/kobject.c。闲话少说,上代码。
- struct kobject {
- const char *name;
- struct list_head entry;
- struct kobject *parent;
- struct kset *kset;
- struct kobj_type *ktype;
- struct sysfs_dirent *sd;
- struct kref kref;
- unsigned int state_initialized:1;
- unsigned int state_in_sysfs:1;
- unsigned int state_add_uevent_sent:1;
- unsigned int state_remove_uevent_sent:1;
- unsigned int uevent_suppress:1;
- };
在struct kobject中,name是名字,entry是用于kobject所属kset下的子kobject链表,parent指向kobject的父节点,kset指向kobject所属的kset,ktype定义了kobject所属的类型,sd指向kobject对应的sysfs目录,kref记录kobject的引用计数,之后是一系列标志。
- struct kobj_type {
- void (*release)(struct kobject *kobj);
- struct sysfs_ops *sysfs_ops;
- struct attribute **default_attrs;
- };
struct kobj_type就是定义了kobject的公共类型,其中既有操作的函数,也有公共的属性。其中release()是在kobject释放时调用的,sysfs_ops中定义了读写属性文件时调用的函数。default_attrs中定义了这类kobject公共的属性。
- struct kset {
- struct list_head list;
- spinlock_t list_lock;
- struct kobject kobj;
- struct kset_uevent_ops *uevent_ops;
- };
struct kset可以看成在kobject上的扩展,它包含一个kobject的链表,可以方便地表示sysfs中目录与子目录的关系。其中,list是所属kobject的链表头,list_lock用于在访问链表时加锁,kobj是kset的内部kobject,要表现为sysfs中的目录就必须拥有kobject的功能,最后的kset_uevent_ops定义了对发往用户空间的uevent的处理。我对uevent不了解,会尽量忽略。
- struct kobj_attribute {
- struct attribute attr;
- ssize_t (*show)(struct kobject *kobj, struct kobj_attribute *attr,
- char *buf);
- ssize_t (*store)(struct kobject *kobj, struct kobj_attribute *attr,
- const char *buf, size_t count);
- };
struct kobj_attribute是kobject在attribute上做出的扩展,添加了两个专门读写kobject属性的函数。无论是kobject,还是kset(说到底是kset内部的kobject),都提供了使用kobj_attribute的快速创建方法。
结构差不多介绍完了,下面看看实现。我所知道的代码分析风格,喜欢自顶向下的方式,从一个函数开始,介绍出一个函数调用树。在代码量很大,涉及调用层次很深的时候,确实要采用这种打洞的方式来寻找突破口。但这种自顶向下的方式有两个问题:一是很容易迷失,二是代码分析的难度会逐渐增大而不是减小。在茫茫的代码中,你一头下去,周围都是你不认识的函数,一个函数里调用了三个陌生的函数,其中一个陌生的函数又调用了五个更陌生的函数…不久你就会产生很强的挫败感。这就像走在沙漠上,你不知道终点在哪,也许翻过一个沙丘就到了,也许还有无数个沙丘。而且在这种分析时,人是逐渐走向细节,容易被细节所困扰,忽略了整体的印象与代码的层次感。所以,我觉得在分析代码时,也可以采用自底向上的方式,从细小的、内部使用的函数,到比较宏观的、供外部调用的函数。而且按照这种顺序来看代码,基本就是文件从头读到尾的顺序,也比较符合写代码的流程。linux代码喜欢在文件开始处攒内部静态函数,攒到一定程度爆发,突然实现几个外部API,然后再攒,再实现。而且之前的内部静态函数会反复调用到。linux代码写得很有层次感,除了内外有别,还把意思相近的,或者功能刚好相反的,或者使用时顺序调用的函数放在一起,很便于阅读。闲话少说,等你看完kobject的实现自然就清楚了。
- static int populate_dir(struct kobject *kobj)
- {
- struct kobj_type *t = get_ktype(kobj);
- struct attribute *attr;
- int error = 0;
- int i;
- if (t && t->default_attrs) {
- for (i = 0; (attr = t->default_attrs[i]) != NULL; i++) {
- error = sysfs_create_file(kobj, attr);
- if (error)
- break;
- }
- }
- return error;
- }
- static int create_dir(struct kobject *kobj)
- {
- int error = 0;
- if (kobject_name(kobj)) {
- error = sysfs_create_dir(kobj);
- if (!error) {
- error = populate_dir(kobj);
- if (error)
- sysfs_remove_dir(kobj);
- }
- }
- return error;
- }
create_dir()在sysfs中创建kobj对应的目录,populate_dir()创建kobj中默认属性对应的文件。create_dir()正是调用populate_dir()实现的。
- static int get_kobj_path_length(struct kobject *kobj)
- {
- int length = 1;
- struct kobject *parent = kobj;
- /* walk up the ancestors until we hit the one pointing to the
- * root.
- * Add 1 to strlen for leading ‘/’ of each level.
- */
- do {
- if (kobject_name(parent) == NULL)
- return 0;
- length += strlen(kobject_name(parent)) + 1;
- parent = parent->parent;
- } while (parent);
- return length;
- }
- static void fill_kobj_path(struct kobject *kobj, char *path, int length)
- {
- struct kobject *parent;
- –length;
- for (parent = kobj; parent; parent = parent->parent) {
- int cur = strlen(kobject_name(parent));
- /* back up enough to print this name with ‘/’ */
- length -= cur;
- strncpy(path + length, kobject_name(parent), cur);
- *(path + –length) = ‘/’;
- }
- pr_debug(“kobject: ‘%s’ (%p): %s: path = ‘%s’\n”, kobject_name(kobj),
- kobj, __func__, path);
- }
- /**
- * kobject_get_path – generate and return the path associated with a given kobj and kset pair.
- *
- * @kobj: kobject in question, with which to build the path
- * @gfp_mask: the allocation type used to allocate the path
- *
- * The result must be freed by the caller with kfree().
- */
- char *kobject_get_path(struct kobject *kobj, gfp_t gfp_mask)
- {
- char *path;
- int len;
- len = get_kobj_path_length(kobj);
- if (len == 0)
- return NULL;
- path = kzalloc(len, gfp_mask);
- if (!path)
- return NULL;
- fill_kobj_path(kobj, path, len);
- return path;
- }
前面两个是内部函数,get_kobj_path_length()获得kobj路径名的长度,fill_kobj_path()把kobj路径名填充到path缓冲区中。
kobject_get_path()靠两个函数获得kobj的路径名,从攒函数到爆发一气呵成。
- static void kobj_kset_join(struct kobject *kobj)
- {
- if (!kobj->kset)
- return;
- kset_get(kobj->kset);
- spin_lock(&kobj->kset->list_lock);
- list_add_tail(&kobj->entry, &kobj->kset->list);
- spin_unlock(&kobj->kset->list_lock);
- }
- /* remove the kobject from its kset’s list */
- static void kobj_kset_leave(struct kobject *kobj)
- {
- if (!kobj->kset)
- return;
- spin_lock(&kobj->kset->list_lock);
- list_del_init(&kobj->entry);
- spin_unlock(&kobj->kset->list_lock);
- kset_put(kobj->kset);
- }
kobj_kset_join()把kobj加入kobj->kset的链表中,kobj_kset_leave()把kobj从kobj->kset的链表中去除,两者功能相对。
- static void kobject_init_internal(struct kobject *kobj)
- {
- if (!kobj)
- return;
- kref_init(&kobj->kref);
- INIT_LIST_HEAD(&kobj->entry);
- kobj->state_in_sysfs = 0;
- kobj->state_add_uevent_sent = 0;
- kobj->state_remove_uevent_sent = 0;
- kobj->state_initialized = 1;
- }
- static int kobject_add_internal(struct kobject *kobj)
- {
- int error = 0;
- struct kobject *parent;
- if (!kobj)
- return -ENOENT;
- if (!kobj->name || !kobj->name[0]) {
- WARN(1, “kobject: (%p): attempted to be registered with empty “
- “name!\n”, kobj);
- return -EINVAL;
- }
- parent = kobject_get(kobj->parent);
- /* join kset if set, use it as parent if we do not already have one */
- if (kobj->kset) {
- if (!parent)
- parent = kobject_get(&kobj->kset->kobj);
- kobj_kset_join(kobj);
- kobj->parent = parent;
- }
- pr_debug(“kobject: ‘%s’ (%p): %s: parent: ‘%s’, set: ‘%s’\n”,
- kobject_name(kobj), kobj, __func__,
- parent ? kobject_name(parent) : “<NULL>”,
- kobj->kset ? kobject_name(&kobj->kset->kobj) : “<NULL>”);
- error = create_dir(kobj);
- if (error) {
- kobj_kset_leave(kobj);
- kobject_put(parent);
- kobj->parent = NULL;
- /* be noisy on error issues */
- if (error == -EEXIST)
- printk(KERN_ERR “%s failed for %s with “
- “-EEXIST, don’t try to register things with “
- “the same name in the same directory.\n”,
- __func__, kobject_name(kobj));
- else
- printk(KERN_ERR “%s failed for %s (%d)\n”,
- __func__, kobject_name(kobj), error);
- dump_stack();
- } else
- kobj->state_in_sysfs = 1;
- return error;
- }
kobject_init_internal()初始化kobj。
kobject_add_internal()把kobj加入已有的结构。
这两个函数看似无关,实际很有关系。在kobject中有好几个结构变量,但重要的只有两个,一个是kset,一个是parent。这两个都是表示当前kobject在整个体系中的位置,决不能自行决定,需要外部参与设置。那把kobject创建的过程分为init和add两个阶段也就很好理解了。kobject_init_internal()把一些能自动初始化的结构变量初始化掉,等外界设置了parent和kset,再调用kobject_add_internal()把kobject安在适当的位置,并创建相应的sysfs目录及文件。
- int kobject_set_name_vargs(struct kobject *kobj, const char *fmt,
- va_list vargs)
- {
- const char *old_name = kobj->name;
- char *s;
- if (kobj->name && !fmt)
- return 0;
- kobj->name = kvasprintf(GFP_KERNEL, fmt, vargs);
- if (!kobj->name)
- return -ENOMEM;
- /* ewww… some of these buggers have ‘/’ in the name … */
- while ((s = strchr(kobj->name, ‘/’)))
- s[0] = ‘!’;
- kfree(old_name);
- return 0;
- }
- /**
- * kobject_set_name – Set the name of a kobject
- * @kobj: struct kobject to set the name of
- * @fmt: format string used to build the name
- *
- * This sets the name of the kobject. If you have already added the
- * kobject to the system, you must call kobject_rename() in order to
- * change the name of the kobject.
- */
- int kobject_set_name(struct kobject *kobj, const char *fmt, …)
- {
- va_list vargs;
- int retval;
- va_start(vargs, fmt);
- retval = kobject_set_name_vargs(kobj, fmt, vargs);
- va_end(vargs);
- return retval;
- }
kobject_set_name()是设置kobj名称的,它又调用kobject_set_name_vargs()实现。但要注意,这个kobject_set_name()仅限于kobject添加到体系之前,因为它只是修改了名字,并未通知用户空间。
- void kobject_init(struct kobject *kobj, struct kobj_type *ktype)
- {
- char *err_str;
- if (!kobj) {
- err_str = “invalid kobject pointer!”;
- goto error;
- }
- if (!ktype) {
- err_str = “must have a ktype to be initialized properly!\n”;
- goto error;
- }
- if (kobj->state_initialized) {
- /* do not error out as sometimes we can recover */
- printk(KERN_ERR “kobject (%p): tried to init an initialized “
- “object, something is seriously wrong.\n”, kobj);
- dump_stack();
- }
- kobject_init_internal(kobj);
- kobj->ktype = ktype;
- return;
- error:
- printk(KERN_ERR “kobject (%p): %s\n”, kobj, err_str);
- dump_stack();
- }
kobject_init()就是调用kobject_init_internal()自动初始化了一些结构变量,然后又设置了ktype。其实这个ktype主要是管理一些默认属性什么的,只要在kobject_add_internal()调用create_dir()之前设置就行,之所以会出现在kobject_init()中,完全是为了与后面的kobject_create()相对比。
- static int kobject_add_varg(struct kobject *kobj, struct kobject *parent,
- const char *fmt, va_list vargs)
- {
- int retval;
- retval = kobject_set_name_vargs(kobj, fmt, vargs);
- if (retval) {
- printk(KERN_ERR “kobject: can not set name properly!\n”);
- return retval;
- }
- kobj->parent = parent;
- return kobject_add_internal(kobj);
- }
- /**
- * kobject_add – the main kobject add function
- * @kobj: the kobject to add
- * @parent: pointer to the parent of the kobject.
- * @fmt: format to name the kobject with.
- *
- * The kobject name is set and added to the kobject hierarchy in this
- * function.
- *
- * If @parent is set, then the parent of the @kobj will be set to it.
- * If @parent is NULL, then the parent of the @kobj will be set to the
- * kobject associted with the kset assigned to this kobject. If no kset
- * is assigned to the kobject, then the kobject will be located in the
- * root of the sysfs tree.
- *
- * If this function returns an error, kobject_put() must be called to
- * properly clean up the memory associated with the object.
- * Under no instance should the kobject that is passed to this function
- * be directly freed with a call to kfree(), that can leak memory.
- *
- * Note, no “add” uevent will be created with this call, the caller should set
- * up all of the necessary sysfs files for the object and then call
- * kobject_uevent() with the UEVENT_ADD parameter to ensure that
- * userspace is properly notified of this kobject’s creation.
- */
- int kobject_add(struct kobject *kobj, struct kobject *parent,
- const char *fmt, …)
- {
- va_list args;
- int retval;
- if (!kobj)
- return -EINVAL;
- if (!kobj->state_initialized) {
- printk(KERN_ERR “kobject ‘%s’ (%p): tried to add an “
- “uninitialized object, something is seriously wrong.\n”,
- kobject_name(kobj), kobj);
- dump_stack();
- return -EINVAL;
- }
- va_start(args, fmt);
- retval = kobject_add_varg(kobj, parent, fmt, args);
- va_end(args);
- return retval;
- }
kobject_add()把kobj添加到体系中。但它还有一个附加功能,设置kobj的名字。parent也是作为参数传进来的,至于为什么kset没有同样传进来,或许是历史遗留原因吧。
- int kobject_init_and_add(struct kobject *kobj, struct kobj_type *ktype,
- struct kobject *parent, const char *fmt, …)
- {
- va_list args;
- int retval;
- kobject_init(kobj, ktype);
- va_start(args, fmt);
- retval = kobject_add_varg(kobj, parent, fmt, args);
- va_end(args);
- return retval;
- }
kobject_init_and_add()虽然是kobject_init()和kobject_add()的合并,但并不常用,因为其中根本没留下设置kset的空挡,这无疑不太合适。
- int kobject_rename(struct kobject *kobj, const char *new_name)
- {
- int error = 0;
- const char *devpath = NULL;
- const char *dup_name = NULL, *name;
- char *devpath_string = NULL;
- char *envp[2];
- kobj = kobject_get(kobj);
- if (!kobj)
- return -EINVAL;
- if (!kobj->parent)
- return -EINVAL;
- devpath = kobject_get_path(kobj, GFP_KERNEL);
- if (!devpath) {
- error = -ENOMEM;
- goto out;
- }
- devpath_string = kmalloc(strlen(devpath) + 15, GFP_KERNEL);
- if (!devpath_string) {
- error = -ENOMEM;
- goto out;
- }
- sprintf(devpath_string, “DEVPATH_OLD=%s”, devpath);
- envp[0] = devpath_string;
- envp[1] = NULL;
- name = dup_name = kstrdup(new_name, GFP_KERNEL);
- if (!name) {
- error = -ENOMEM;
- goto out;
- }
- error = sysfs_rename_dir(kobj, new_name);
- if (error)
- goto out;
- /* Install the new kobject name */
- dup_name = kobj->name;
- kobj->name = name;
- /* This function is mostly/only used for network interface.
- * Some hotplug package track interfaces by their name and
- * therefore want to know when the name is changed by the user. */
- kobject_uevent_env(kobj, KOBJ_MOVE, envp);
- out:
- kfree(dup_name);
- kfree(devpath_string);
- kfree(devpath);
- kobject_put(kobj);
- return error;
- }
kobject_rename()就是在kobj已经添加到系统之后,要改名字时调用的函数。它除了完成kobject_set_name()的功能,还向用户空间通知这一消息。
- int kobject_move(struct kobject *kobj, struct kobject *new_parent)
- {
- int error;
- struct kobject *old_parent;
- const char *devpath = NULL;
- char *devpath_string = NULL;
- char *envp[2];
- kobj = kobject_get(kobj);
- if (!kobj)
- return -EINVAL;
- new_parent = kobject_get(new_parent);
- if (!new_parent) {
- if (kobj->kset)
- new_parent = kobject_get(&kobj->kset->kobj);
- }
- /* old object path */
- devpath = kobject_get_path(kobj, GFP_KERNEL);
- if (!devpath) {
- error = -ENOMEM;
- goto out;
- }
- devpath_string = kmalloc(strlen(devpath) + 15, GFP_KERNEL);
- if (!devpath_string) {
- error = -ENOMEM;
- goto out;
- }
- sprintf(devpath_string, “DEVPATH_OLD=%s”, devpath);
- envp[0] = devpath_string;
- envp[1] = NULL;
- error = sysfs_move_dir(kobj, new_parent);
- if (error)
- goto out;
- old_parent = kobj->parent;
- kobj->parent = new_parent;
- new_parent = NULL;
- kobject_put(old_parent);
- kobject_uevent_env(kobj, KOBJ_MOVE, envp);
- out:
- kobject_put(new_parent);
- kobject_put(kobj);
- kfree(devpath_string);
- kfree(devpath);
- return error;
- }
kobject_move()则是在kobj添加到系统后,想移动到新的parent kobject下所调用的函数。在通知用户空间上,与kobject_rename()调用的是同一操作。
- void kobject_del(struct kobject *kobj)
- {
- if (!kobj)
- return;
- sysfs_remove_dir(kobj);
- kobj->state_in_sysfs = 0;
- kobj_kset_leave(kobj);
- kobject_put(kobj->parent);
- kobj->parent = NULL;
- }
kobject_del()仅仅是把kobj从系统中退出,相对于kobject_add()操作。
- /**
- * kobject_get – increment refcount for object.
- * @kobj: object.
- */
- struct kobject *kobject_get(struct kobject *kobj)
- {
- if (kobj)
- kref_get(&kobj->kref);
- return kobj;
- }
- /*
- * kobject_cleanup – free kobject resources.
- * @kobj: object to cleanup
- */
- static void kobject_cleanup(struct kobject *kobj)
- {
- struct kobj_type *t = get_ktype(kobj);
- const char *name = kobj->name;
- pr_debug(“kobject: ‘%s’ (%p): %s\n”,
- kobject_name(kobj), kobj, __func__);
- if (t && !t->release)
- pr_debug(“kobject: ‘%s’ (%p): does not have a release() “
- “function, it is broken and must be fixed.\n”,
- kobject_name(kobj), kobj);
- /* send “remove” if the caller did not do it but sent “add” */
- if (kobj->state_add_uevent_sent && !kobj->state_remove_uevent_sent) {
- pr_debug(“kobject: ‘%s’ (%p): auto cleanup ‘remove’ event\n”,
- kobject_name(kobj), kobj);
- kobject_uevent(kobj, KOBJ_REMOVE);
- }
- /* remove from sysfs if the caller did not do it */
- if (kobj->state_in_sysfs) {
- pr_debug(“kobject: ‘%s’ (%p): auto cleanup kobject_del\n”,
- kobject_name(kobj), kobj);
- kobject_del(kobj);
- }
- if (t && t->release) {
- pr_debug(“kobject: ‘%s’ (%p): calling ktype release\n”,
- kobject_name(kobj), kobj);
- t->release(kobj);
- }
- /* free name if we allocated it */
- if (name) {
- pr_debug(“kobject: ‘%s’: free name\n”, name);
- kfree(name);
- }
- }
- static void kobject_release(struct kref *kref)
- {
- kobject_cleanup(container_of(kref, struct kobject, kref));
- }
- /**
- * kobject_put – decrement refcount for object.
- * @kobj: object.
- *
- * Decrement the refcount, and if 0, call kobject_cleanup().
- */
- void kobject_put(struct kobject *kobj)
- {
- if (kobj) {
- if (!kobj->state_initialized)
- WARN(1, KERN_WARNING “kobject: ‘%s’ (%p): is not “
- “initialized, yet kobject_put() is being “
- “called.\n”, kobject_name(kobj), kobj);
- kref_put(&kobj->kref, kobject_release);
- }
- }
kobject_get()和kobject_put()走的完全是引用计数的路线。kobject_put()会在引用计数降为零时撤销整个kobject的存在:向用户空间发生REMOVE消息,从sysfs中删除相应目录,调用kobj_type中定义的release函数,释放name所占的空间。
看看前面介绍的API。
- int kobject_set_name(struct kobject *kobj, const char *name, …)
- __attribute__((format(printf, 2, 3)));
- int kobject_set_name_vargs(struct kobject *kobj, const char *fmt,
- va_list vargs);
- void kobject_init(struct kobject *kobj, struct kobj_type *ktype);
- int __must_check kobject_add(struct kobject *kobj,
- struct kobject *parent,
- const char *fmt, …);
- int __must_check kobject_init_and_add(struct kobject *kobj,
- struct kobj_type *ktype,
- struct kobject *parent,
- const char *fmt, …);
- void kobject_del(struct kobject *kobj);
- int __must_check kobject_rename(struct kobject *, const char *new_name);
- int __must_check kobject_move(struct kobject *, struct kobject *);
- struct kobject *kobject_get(struct kobject *kobj);
- void kobject_put(struct kobject *kobj);
- char *kobject_get_path(struct kobject *kobj, gfp_t flag);
基本上概扩了kobject从创建到删除,包括中间改名字,改位置,以及引用计数的变动。
当然,kobject创建仍比较麻烦,因为ktype需要自己写。下面就是kobject提供的一种快速创建方法。
- static ssize_t kobj_attr_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
- {
- struct kobj_attribute *kattr;
- ssize_t ret = -EIO;
- kattr = container_of(attr, struct kobj_attribute, attr);
- if (kattr->show)
- ret = kattr->show(kobj, kattr, buf);
- return ret;
- }
- static ssize_t kobj_attr_store(struct kobject *kobj, struct attribute *attr,
- const char *buf, size_t count)
- {
- struct kobj_attribute *kattr;
- ssize_t ret = -EIO;
- kattr = container_of(attr, struct kobj_attribute, attr);
- if (kattr->store)
- ret = kattr->store(kobj, kattr, buf, count);
- return ret;
- }
- struct sysfs_ops kobj_sysfs_ops = {
- .show = kobj_attr_show,
- .store = kobj_attr_store,
- };
- static void dynamic_kobj_release(struct kobject *kobj)
- {
- pr_debug(“kobject: (%p): %s\n”, kobj, __func__);
- kfree(kobj);
- }
- static struct kobj_type dynamic_kobj_ktype = {
- .release = dynamic_kobj_release,
- .sysfs_ops = &kobj_sysfs_ops,
- };
这个就是kobject自身提供的一种kobj_type,叫做dynamic_kobj_ktype。它没有提供默认的属性,但提供了release函数及访问属性的方法。
- struct kobject *kobject_create(void)
- {
- struct kobject *kobj;
- kobj = kzalloc(sizeof(*kobj), GFP_KERNEL);
- if (!kobj)
- return NULL;
- kobject_init(kobj, &dynamic_kobj_ktype);
- return kobj;
- }
- struct kobject *kobject_create_and_add(const char *name, struct kobject *parent)
- {
- struct kobject *kobj;
- int retval;
- kobj = kobject_create();
- if (!kobj)
- return NULL;
- retval = kobject_add(kobj, parent, “%s”, name);
- if (retval) {
- printk(KERN_WARNING “%s: kobject_add error: %d\n”,
- __func__, retval);
- kobject_put(kobj);
- kobj = NULL;
- }
- return kobj;
- }
在kobject_create()及kobject_create_add()中,使用了这种dynamic_kobj_ktype。这是一种很好的偷懒方法。因为release()函数会释放kobj,所以这里的kobj必须是kobject_create()动态创建的。这里的kobject_create()和kobject_init()相对,kobject_create_and_add()和kobject_init_and_add()相对。值得一提的是,这里用kobject_create()和kobject_create_and_add()创建的kobject无法嵌入其它结构,是独立的存在,所以用到的地方很少。
- void kset_init(struct kset *k)
- {
- kobject_init_internal(&k->kobj);
- INIT_LIST_HEAD(&k->list);
- spin_lock_init(&k->list_lock);
- }
kset_init()对kset进行初始化。不过它的界限同kobject差不多。
- int kset_register(struct kset *k)
- {
- int err;
- if (!k)
- return -EINVAL;
- kset_init(k);
- err = kobject_add_internal(&k->kobj);
- if (err)
- return err;
- kobject_uevent(&k->kobj, KOBJ_ADD);
- return 0;
- }
kset_register()最大的特点是简单,它只负责把kset中的kobject连入系统,并发布KOBJ_ADD消息。所以在调用它之前,你要先设置好k->kobj.name、k->kobj.parent、k->kobj.kset。
- void kset_unregister(struct kset *k)
- {
- if (!k)
- return;
- kobject_put(&k->kobj);
- }
kset_unregister()只是简单地释放创建时获得的引用计数。使用引用计数就是这么简单。
- struct kobject *kset_find_obj(struct kset *kset, const char *name)
- {
- struct kobject *k;
- struct kobject *ret = NULL;
- spin_lock(&kset->list_lock);
- list_for_each_entry(k, &kset->list, entry) {
- if (kobject_name(k) && !strcmp(kobject_name(k), name)) {
- ret = kobject_get(k);
- break;
- }
- }
- spin_unlock(&kset->list_lock);
- return ret;
- }
kset_find_obj()从kset的链表中找到名为name的kobject。这纯粹是一个对外的API。
- static void kset_release(struct kobject *kobj)
- {
- struct kset *kset = container_of(kobj, struct kset, kobj);
- pr_debug(“kobject: ‘%s’ (%p): %s\n”,
- kobject_name(kobj), kobj, __func__);
- kfree(kset);
- }
- static struct kobj_type kset_ktype = {
- .sysfs_ops = &kobj_sysfs_ops,
- .release = kset_release,
- };
与kobject相对的,kset也提供了一种kobj_type,叫做kset_ktype。
- static struct kset *kset_create(const char *name,
- struct kset_uevent_ops *uevent_ops,
- struct kobject *parent_kobj)
- {
- struct kset *kset;
- int retval;
- kset = kzalloc(sizeof(*kset), GFP_KERNEL);
- if (!kset)
- return NULL;
- retval = kobject_set_name(&kset->kobj, name);
- if (retval) {
- kfree(kset);
- return NULL;
- }
- kset->uevent_ops = uevent_ops;
- kset->kobj.parent = parent_kobj;
- /*
- * The kobject of this kset will have a type of kset_ktype and belong to
- * no kset itself. That way we can properly free it when it is
- * finished being used.
- */
- kset->kobj.ktype = &kset_ktype;
- kset->kobj.kset = NULL;
- return kset;
- }
- /**
- * kset_create_and_add – create a struct kset dynamically and add it to sysfs
- *
- * @name: the name for the kset
- * @uevent_ops: a struct kset_uevent_ops for the kset
- * @parent_kobj: the parent kobject of this kset, if any.
- *
- * This function creates a kset structure dynamically and registers it
- * with sysfs. When you are finished with this structure, call
- * kset_unregister() and the structure will be dynamically freed when it
- * is no longer being used.
- *
- * If the kset was not able to be created, NULL will be returned.
- */
- struct kset *kset_create_and_add(const char *name,
- struct kset_uevent_ops *uevent_ops,
- struct kobject *parent_kobj)
- {
- struct kset *kset;
- int error;
- kset = kset_create(name, uevent_ops, parent_kobj);
- if (!kset)
- return NULL;
- error = kset_register(kset);
- if (error) {
- kfree(kset);
- return NULL;
- }
- return kset;
- }
kset_create()和kset_create_and_add()就是使用kset_type的快速创建函数。
说实话,使用kobject_create_and_add()的比较少见,但使用 kset_create_and_add()的情形还是见过一些的。比如sysfs中那些顶层的目录,就是单纯的目录,不需要嵌入什么很复杂的结构,用简单的kset_create_and_add()创建就好了。
- static inline const char *kobject_name(const struct kobject *kobj)
- {
- return kobj->name;
- }
- static inline struct kset *to_kset(struct kobject *kobj)
- {
- return kobj ? container_of(kobj, struct kset, kobj) : NULL;
- }
- static inline struct kset *kset_get(struct kset *k)
- {
- return k ? to_kset(kobject_get(&k->kobj)) : NULL;
- }
- static inline void kset_put(struct kset *k)
- {
- kobject_put(&k->kobj);
- }
- static inline struct kobj_type *get_ktype(struct kobject *kobj)
- {
- return kobj->ktype;
- }
这些是在kobject.h中的内联函数。这里内联函数更多的意思是方便,易于屏蔽内部实现。
以上就是kobject共800余行的代码实现,当然我们忽略了uevent的那部分。
事实证明,自底向上或者顺序的代码分析方法,还是很适合千行左右的代码分析。而且这样分析很全面,容易我们洞察整个模块的意图,从而在理解代码时从较高的抽象角度去看。
linux的设备驱动模型,是建立在sysfs和kobject之上的,由总线、设备、驱动、类所组成的关系结构。从本节开始,我们将对linux这一设备驱动模型进行深入分析。头文件是include/linux/device.h,实现在drivers/base目录中。本节要分析的,是其中的设备,主要在core.c中。
- struct device {
- struct device *parent;
- struct device_private *p;
- struct kobject kobj;
- const char *init_name; /* initial name of the device */
- struct device_type *type;
- struct semaphore sem; /* semaphore to synchronize calls to
- * its driver.
- */
- struct bus_type *bus; /* type of bus device is on */
- struct device_driver *driver; /* which driver has allocated this
- device */
- void *platform_data; /* Platform specific data, device
- core doesn’t touch it */
- struct dev_pm_info power;
- #ifdef CONFIG_NUMA
- int numa_node; /* NUMA node this device is close to */
- #endif
- u64 *dma_mask; /* dma mask (if dma’able device) */
- u64 coherent_dma_mask;/* Like dma_mask, but for
- alloc_coherent mappings as
- not all hardware supports
- 64 bit addresses for consistent
- allocations such descriptors. */
- struct device_dma_parameters *dma_parms;
- struct list_head dma_pools; /* dma pools (if dma’ble) */
- struct dma_coherent_mem *dma_mem; /* internal for coherent mem
- override */
- /* arch specific additions */
- struct dev_archdata archdata;
- dev_t devt; /* dev_t, creates the sysfs “dev” */
- spinlock_t devres_lock;
- struct list_head devres_head;
- struct klist_node knode_class;
- struct class *class;
- const struct attribute_group **groups; /* optional groups */
- void (*release)(struct device *dev);
- };
先来分析下struct device的结构变量。首先是指向父节点的指针parent,kobj是内嵌在device中的kobject,用于把它联系到sysfs中。bus是对设备所在总线的指针,driver是对设备所用驱动的指针。还有DMA需要的数据,表示设备号的devt,表示设备资源的devres_head和保护它的devres_lock。指向类的指针class,knode_class是被连入class链表时所用的klist节点。group是设备的属性集合。release应该是设备释放时调用的函数。
- struct device_private {
- struct klist klist_children;
- struct klist_node knode_parent;
- struct klist_node knode_driver;
- struct klist_node knode_bus;
- void *driver_data;
- struct device *device;
- };
- #define to_device_private_parent(obj) \
- container_of(obj, struct device_private, knode_parent)
- #define to_device_private_driver(obj) \
- container_of(obj, struct device_private, knode_driver)
- #define to_device_private_bus(obj) \
- container_of(obj, struct device_private, knode_bus)
struct device中有一部分不愿意让外界看到,所以做出struct device_private结构,包括了设备驱动模型内部的链接。klist_children是子设备的链表,knode_parent是连入父设备的klist_children时所用的节点,knode_driver是连入驱动的设备链表所用的节点,knode_bus是连入总线的设备链表时所用的节点。driver_data用于在设备结构中存放相关的驱动信息,也许是驱动专门为设备建立的结构实例。device则是指向struct device_private所属的device。
下面还有一些宏,to_device_private_parent()是从父设备的klist_children上节点,获得相应的device_private。to_device_private_driver()是从驱动的设备链表上节点,获得对应的device_private。to_device_private_bus()是从总线的设备链表上节点,获得对应的device_private。
或许会奇怪,为什么knode_class没有被移入struct device_private,或许有外部模块需要用到它。
- /*
- * The type of device, “struct device” is embedded in. A class
- * or bus can contain devices of different types
- * like “partitions” and “disks”, “mouse” and “event”.
- * This identifies the device type and carries type-specific
- * information, equivalent to the kobj_type of a kobject.
- * If “name” is specified, the uevent will contain it in
- * the DEVTYPE variable.
- */
- struct device_type {
- const char *name;
- const struct attribute_group **groups;
- int (*uevent)(struct device *dev, struct kobj_uevent_env *env);
- char *(*devnode)(struct device *dev, mode_t *mode);
- void (*release)(struct device *dev);
- const struct dev_pm_ops *pm;
- };
device竟然有device_type,类似于与kobject相对的kobj_type,之后我们再看它怎么用。
- /* interface for exporting device attributes */
- struct device_attribute {
- struct attribute attr;
- ssize_t (*show)(struct device *dev, struct device_attribute *attr,
- char *buf);
- ssize_t (*store)(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t count);
- };
- #define DEVICE_ATTR(_name, _mode, _show, _store) \
- struct device_attribute dev_attr_##_name = __ATTR(_name, _mode, _show, _store)
这个device_attribute显然就是device对struct attribute的封装,新加的show()、store()函数都是以与设备相关的结构调用的。
至于device中其它的archdata、dma、devres,都是作为设备特有的,我们现在主要关心设备驱动模型的建立,这些会尽量忽略。
下面就来看看device的实现,这主要在core.c中。
- int __init devices_init(void)
- {
- devices_kset = kset_create_and_add(“devices”, &device_uevent_ops, NULL);
- if (!devices_kset)
- return -ENOMEM;
- dev_kobj = kobject_create_and_add(“dev”, NULL);
- if (!dev_kobj)
- goto dev_kobj_err;
- sysfs_dev_block_kobj = kobject_create_and_add(“block”, dev_kobj);
- if (!sysfs_dev_block_kobj)
- goto block_kobj_err;
- sysfs_dev_char_kobj = kobject_create_and_add(“char”, dev_kobj);
- if (!sysfs_dev_char_kobj)
- goto char_kobj_err;
- return 0;
- char_kobj_err:
- kobject_put(sysfs_dev_block_kobj);
- block_kobj_err:
- kobject_put(dev_kobj);
- dev_kobj_err:
- kset_unregister(devices_kset);
- return -ENOMEM;
- }
这是在设备驱动模型初始化时调用的device部分初始的函数devices_init()。它干的事情我们都很熟悉,就是建立sysfs中的devices目录,和dev目录。还在dev目录下又建立了block和char两个子目录。因为dev目录只打算存放辅助的设备号,所以没必要使用kset。
- static ssize_t dev_attr_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
- {
- struct device_attribute *dev_attr = to_dev_attr(attr);
- struct device *dev = to_dev(kobj);
- ssize_t ret = -EIO;
- if (dev_attr->show)
- ret = dev_attr->show(dev, dev_attr, buf);
- if (ret >= (ssize_t)PAGE_SIZE) {
- print_symbol(“dev_attr_show: %s returned bad count\n”,
- (unsigned long)dev_attr->show);
- }
- return ret;
- }
- static ssize_t dev_attr_store(struct kobject *kobj, struct attribute *attr,
- const char *buf, size_t count)
- {
- struct device_attribute *dev_attr = to_dev_attr(attr);
- struct device *dev = to_dev(kobj);
- ssize_t ret = -EIO;
- if (dev_attr->store)
- ret = dev_attr->store(dev, dev_attr, buf, count);
- return ret;
- }
- static struct sysfs_ops dev_sysfs_ops = {
- .show = dev_attr_show,
- .store = dev_attr_store,
- };
看到这里是不是很熟悉,dev_sysfs_ops就是device准备注册到sysfs中的操作函数。dev_attr_show()和dev_attr_store()都会再调用与属性相关的函数。
- static void device_release(struct kobject *kobj)
- {
- struct device *dev = to_dev(kobj);
- struct device_private *p = dev->p;
- if (dev->release)
- dev->release(dev);
- else if (dev->type && dev->type->release)
- dev->type->release(dev);
- else if (dev->class && dev->class->dev_release)
- dev->class->dev_release(dev);
- else
- WARN(1, KERN_ERR “Device ‘%s’ does not have a release() “
- “function, it is broken and must be fixed.\n”,
- dev_name(dev));
- kfree(p);
- }
- static struct kobj_type device_ktype = {
- .release = device_release,
- .sysfs_ops = &dev_sysfs_ops,
- };
使用的release函数是device_release。在释放device时,会依次调用device结构中定义的release函数,device_type中定义的release函数,device所属的class中所定义的release函数,最后会吧device_private结构释放掉。
- static int dev_uevent_filter(struct kset *kset, struct kobject *kobj)
- {
- struct kobj_type *ktype = get_ktype(kobj);
- if (ktype == &device_ktype) {
- struct device *dev = to_dev(kobj);
- if (dev->bus)
- return 1;
- if (dev->class)
- return 1;
- }
- return 0;
- }
- static const char *dev_uevent_name(struct kset *kset, struct kobject *kobj)
- {
- struct device *dev = to_dev(kobj);
- if (dev->bus)
- return dev->bus->name;
- if (dev->class)
- return dev->class->name;
- return NULL;
- }
- static int dev_uevent(struct kset *kset, struct kobject *kobj,
- struct kobj_uevent_env *env)
- {
- struct device *dev = to_dev(kobj);
- int retval = 0;
- /* add device node properties if present */
- if (MAJOR(dev->devt)) {
- const char *tmp;
- const char *name;
- mode_t mode = 0;
- add_uevent_var(env, “MAJOR=%u”, MAJOR(dev->devt));
- add_uevent_var(env, “MINOR=%u”, MINOR(dev->devt));
- name = device_get_devnode(dev, &mode, &tmp);
- if (name) {
- add_uevent_var(env, “DEVNAME=%s”, name);
- kfree(tmp);
- if (mode)
- add_uevent_var(env, “DEVMODE=%#o”, mode & 0777);
- }
- }
- if (dev->type && dev->type->name)
- add_uevent_var(env, “DEVTYPE=%s”, dev->type->name);
- if (dev->driver)
- add_uevent_var(env, “DRIVER=%s”, dev->driver->name);
- #ifdef CONFIG_SYSFS_DEPRECATED
- if (dev->class) {
- struct device *parent = dev->parent;
- /* find first bus device in parent chain */
- while (parent && !parent->bus)
- parent = parent->parent;
- if (parent && parent->bus) {
- const char *path;
- path = kobject_get_path(&parent->kobj, GFP_KERNEL);
- if (path) {
- add_uevent_var(env, “PHYSDEVPATH=%s”, path);
- kfree(path);
- }
- add_uevent_var(env, “PHYSDEVBUS=%s”, parent->bus->name);
- if (parent->driver)
- add_uevent_var(env, “PHYSDEVDRIVER=%s”,
- parent->driver->name);
- }
- } else if (dev->bus) {
- add_uevent_var(env, “PHYSDEVBUS=%s”, dev->bus->name);
- if (dev->driver)
- add_uevent_var(env, “PHYSDEVDRIVER=%s”,
- dev->driver->name);
- }
- #endif
- /* have the bus specific function add its stuff */
- if (dev->bus && dev->bus->uevent) {
- retval = dev->bus->uevent(dev, env);
- if (retval)
- pr_debug(“device: ‘%s’: %s: bus uevent() returned %d\n”,
- dev_name(dev), __func__, retval);
- }
- /* have the class specific function add its stuff */
- if (dev->class && dev->class->dev_uevent) {
- retval = dev->class->dev_uevent(dev, env);
- if (retval)
- pr_debug(“device: ‘%s’: %s: class uevent() “
- “returned %d\n”, dev_name(dev),
- __func__, retval);
- }
- /* have the device type specific fuction add its stuff */
- if (dev->type && dev->type->uevent) {
- retval = dev->type->uevent(dev, env);
- if (retval)
- pr_debug(“device: ‘%s’: %s: dev_type uevent() “
- “returned %d\n”, dev_name(dev),
- __func__, retval);
- }
- return retval;
- }
- static struct kset_uevent_ops device_uevent_ops = {
- .filter = dev_uevent_filter,
- .name = dev_uevent_name,
- .uevent = dev_uevent,
- };
前面在讲到kset时,我们并未关注其中的kset_event_ops结构变量。但这里device既然用到了,我们就对其中的三个函数做简单介绍。kset_uevent_ops中的函数是用于管理kset内部kobject的uevent操作。其中filter函数用于阻止一个kobject向用户空间发送uevent,返回值为0表示阻止。这里dev_uevent_filter()检查device所属的bus或者class是否存在,如果都不存在,也就没有发送uevent的必要了。name函数是用于覆盖kset发送给用户空间的名称。这里dev_uevent_name()选择使用bus或者class的名称。uevent()函数是在uevent将被发送到用户空间之前调用的,用于向uevent中增加新的环境变量。dev_uevent()的实现很热闹,向uevent中添加了各种环境变量。
- static ssize_t show_uevent(struct device *dev, struct device_attribute *attr,
- char *buf)
- {
- struct kobject *top_kobj;
- struct kset *kset;
- struct kobj_uevent_env *env = NULL;
- int i;
- size_t count = 0;
- int retval;
- /* search the kset, the device belongs to */
- top_kobj = &dev->kobj;
- while (!top_kobj->kset && top_kobj->parent)
- top_kobj = top_kobj->parent;
- if (!top_kobj->kset)
- goto out;
- kset = top_kobj->kset;
- if (!kset->uevent_ops || !kset->uevent_ops->uevent)
- goto out;
- /* respect filter */
- if (kset->uevent_ops && kset->uevent_ops->filter)
- if (!kset->uevent_ops->filter(kset, &dev->kobj))
- goto out;
- env = kzalloc(sizeof(struct kobj_uevent_env), GFP_KERNEL);
- if (!env)
- return -ENOMEM;
- /* let the kset specific function add its keys */
- retval = kset->uevent_ops->uevent(kset, &dev->kobj, env);
- if (retval)
- goto out;
- /* copy keys to file */
- for (i = 0; i < env->envp_idx; i++)
- count += sprintf(&buf[count], “%s\n”, env->envp[i]);
- out:
- kfree(env);
- return count;
- }
- static ssize_t store_uevent(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t count)
- {
- enum kobject_action action;
- if (kobject_action_type(buf, count, &action) == 0) {
- kobject_uevent(&dev->kobj, action);
- goto out;
- }
- dev_err(dev, “uevent: unsupported action-string; this will “
- “be ignored in a future kernel version\n”);
- kobject_uevent(&dev->kobj, KOBJ_ADD);
- out:
- return count;
- }
- static struct device_attribute uevent_attr =
- __ATTR(uevent, S_IRUGO | S_IWUSR, show_uevent, store_uevent);
device不仅在kset中添加了对uevent的管理,而且还把uevent信息做成设备的一个属性uevent。其中show_event()是显示uevent中环境变量的,store_uevent()是发送uevent的。
- static int device_add_attributes(struct device *dev,
- struct device_attribute *attrs)
- {
- int error = 0;
- int i;
- if (attrs) {
- for (i = 0; attr_name(attrs[i]); i++) {
- error = device_create_file(dev, &attrs[i]);
- if (error)
- break;
- }
- if (error)
- while (–i >= 0)
- device_remove_file(dev, &attrs[i]);
- }
- return error;
- }
- static void device_remove_attributes(struct device *dev,
- struct device_attribute *attrs)
- {
- int i;
- if (attrs)
- for (i = 0; attr_name(attrs[i]); i++)
- device_remove_file(dev, &attrs[i]);
- }
- static int device_add_groups(struct device *dev,
- const struct attribute_group **groups)
- {
- int error = 0;
- int i;
- if (groups) {
- for (i = 0; groups[i]; i++) {
- error = sysfs_create_group(&dev->kobj, groups[i]);
- if (error) {
- while (–i >= 0)
- sysfs_remove_group(&dev->kobj,
- groups[i]);
- break;
- }
- }
- }
- return error;
- }
- static void device_remove_groups(struct device *dev,
- const struct attribute_group **groups)
- {
- int i;
- if (groups)
- for (i = 0; groups[i]; i++)
- sysfs_remove_group(&dev->kobj, groups[i]);
- }
以上四个内部函数是用来向device中添加或删除属性与属性集合的。
device_add_attributes、device_remove_attributes、device_add_groups、device_remove_groups,都是直接通过sysfs提供的API实现。
- static int device_add_attrs(struct device *dev)
- {
- struct class *class = dev->class;
- struct device_type *type = dev->type;
- int error;
- if (class) {
- error = device_add_attributes(dev, class->dev_attrs);
- if (error)
- return error;
- }
- if (type) {
- error = device_add_groups(dev, type->groups);
- if (error)
- goto err_remove_class_attrs;
- }
- error = device_add_groups(dev, dev->groups);
- if (error)
- goto err_remove_type_groups;
- return 0;
- err_remove_type_groups:
- if (type)
- device_remove_groups(dev, type->groups);
- err_remove_class_attrs:
- if (class)
- device_remove_attributes(dev, class->dev_attrs);
- return error;
- }
- static void device_remove_attrs(struct device *dev)
- {
- struct class *class = dev->class;
- struct device_type *type = dev->type;
- device_remove_groups(dev, dev->groups);
- if (type)
- device_remove_groups(dev, type->groups);
- if (class)
- device_remove_attributes(dev, class->dev_attrs);
- }
device_add_attrs()实际负责device中的属性添加。也是几个部分的集合,包括class中的dev_attrs,device_type中的groups,还有device本身的groups。
device_remove_attrs()则负责对应的device属性删除工作。
- #define print_dev_t(buffer, dev) \
- sprintf((buffer), “%u:%u\n”, MAJOR(dev), MINOR(dev))
- static ssize_t show_dev(struct device *dev, struct device_attribute *attr,
- char *buf)
- {
- return print_dev_t(buf, dev->devt);
- }
- static struct device_attribute devt_attr =
- __ATTR(dev, S_IRUGO, show_dev, NULL);
这里又定义了一个名为dev的属性,就是显示设备的设备号。
- /**
- * device_create_file – create sysfs attribute file for device.
- * @dev: device.
- * @attr: device attribute descriptor.
- */
- int device_create_file(struct device *dev, struct device_attribute *attr)
- {
- int error = 0;
- if (dev)
- error = sysfs_create_file(&dev->kobj, &attr->attr);
- return error;
- }
- /**
- * device_remove_file – remove sysfs attribute file.
- * @dev: device.
- * @attr: device attribute descriptor.
- */
- void device_remove_file(struct device *dev, struct device_attribute *attr)
- {
- if (dev)
- sysfs_remove_file(&dev->kobj, &attr->attr);
- }
- /**
- * device_create_bin_file – create sysfs binary attribute file for device.
- * @dev: device.
- * @attr: device binary attribute descriptor.
- */
- int device_create_bin_file(struct device *dev, struct bin_attribute *attr)
- {
- int error = -EINVAL;
- if (dev)
- error = sysfs_create_bin_file(&dev->kobj, attr);
- return error;
- }
- /**
- * device_remove_bin_file – remove sysfs binary attribute file
- * @dev: device.
- * @attr: device binary attribute descriptor.
- */
- void device_remove_bin_file(struct device *dev, struct bin_attribute *attr)
- {
- if (dev)
- sysfs_remove_bin_file(&dev->kobj, attr);
- }
- int device_schedule_callback_owner(struct device *dev,
- void (*func)(struct device *), struct module *owner)
- {
- return sysfs_schedule_callback(&dev->kobj,
- (void (*)(void *)) func, dev, owner);
- }
这里的五个函数,也是对sysfs提供的API的简单封装。
device_create_file()和device_remove_file()提供直接的属性文件管理方法。
device_create_bin_file()和device_remove_bin_file()则是提供设备管理二进制文件的方法。
device_schedule_callback_owner()也是简单地将func加入工作队列。
- static void klist_children_get(struct klist_node *n)
- {
- struct device_private *p = to_device_private_parent(n);
- struct device *dev = p->device;
- get_device(dev);
- }
- static void klist_children_put(struct klist_node *n)
- {
- struct device_private *p = to_device_private_parent(n);
- struct device *dev = p->device;
- put_device(dev);
- }
如果之前认真看过klist的实现,应该知道,klist_children_get()和klist_children_put()就是在设备挂入和删除父设备的klist_children链表时调用的函数。在父设备klist_children链表上的指针,相当于对device的一个引用计数。
- struct device *get_device(struct device *dev)
- {
- return dev ? to_dev(kobject_get(&dev->kobj)) : NULL;
- }
- /**
- * put_device – decrement reference count.
- * @dev: device in question.
- */
- void put_device(struct device *dev)
- {
- /* might_sleep(); */
- if (dev)
- kobject_put(&dev->kobj);
- }
device中的引用计数,完全交给内嵌的kobject来做。如果引用计数降为零,自然是调用之前说到的包含甚广的device_release函数。
- void device_initialize(struct device *dev)
- {
- dev->kobj.kset = devices_kset;
- kobject_init(&dev->kobj, &device_ktype);
- INIT_LIST_HEAD(&dev->dma_pools);
- init_MUTEX(&dev->sem);
- spin_lock_init(&dev->devres_lock);
- INIT_LIST_HEAD(&dev->devres_head);
- device_init_wakeup(dev, 0);
- device_pm_init(dev);
- set_dev_node(dev, -1);
- }
device_initialize()就是device结构的初始化函数,它把device中能初始化的部分全初始化。它的界限在其中kobj的位置与device在设备驱动模型中的位置,这些必须由外部设置。可以看到,调用kobject_init()时,object的kobj_type选择了device_ktype,其中主要是sysops的两个函数,还有device_release函数。
- static struct kobject *virtual_device_parent(struct device *dev)
- {
- static struct kobject *virtual_dir = NULL;
- if (!virtual_dir)
- virtual_dir = kobject_create_and_add(“virtual”,
- &devices_kset->kobj);
- return virtual_dir;
- }
- static struct kobject *get_device_parent(struct device *dev,
- struct device *parent)
- {
- int retval;
- if (dev->class) {
- struct kobject *kobj = NULL;
- struct kobject *parent_kobj;
- struct kobject *k;
- /*
- * If we have no parent, we live in “virtual”.
- * Class-devices with a non class-device as parent, live
- * in a “glue” directory to prevent namespace collisions.
- */
- if (parent == NULL)
- parent_kobj = virtual_device_parent(dev);
- else if (parent->class)
- return &parent->kobj;
- else
- parent_kobj = &parent->kobj;
- /* find our class-directory at the parent and reference it */
- spin_lock(&dev->class->p->class_dirs.list_lock);
- list_for_each_entry(k, &dev->class->p->class_dirs.list, entry)
- if (k->parent == parent_kobj) {
- kobj = kobject_get(k);
- break;
- }
- spin_unlock(&dev->class->p->class_dirs.list_lock);
- if (kobj)
- return kobj;
- /* or create a new class-directory at the parent device */
- k = kobject_create();
- if (!k)
- return NULL;
- k->kset = &dev->class->p->class_dirs;
- retval = kobject_add(k, parent_kobj, “%s”, dev->class->name);
- if (retval < 0) {
- kobject_put(k);
- return NULL;
- }
- /* do not emit an uevent for this simple “glue” directory */
- return k;
- }
- if (parent)
- return &parent->kobj;
- return NULL;
- }
这里的get_device_parent()就是获取父节点的kobject,但也并非就如此简单。get_device_parent()的返回值直接决定了device将被挂在哪个目录下。到底该挂在哪,是由dev->class、dev->parent、dev->parent->class等因素综合决定的。我们看get_device_parent()中是如何判断的。如果dev->class为空,表示一切随父设备,有parent则返回parent->kobj,没有则返回NULL。如果有dev->class呢,情况就比较复杂了,也许device有着与parent不同的class,也许device还没有一个parent,等等。我们看具体的情况。如果parent不为空,而且存在parent->class,则还放在parent目录下。不然,要么parent不存在,要么parent没有class,很难直接将有class的device放在parent下面。目前的解决方法很简单,在parent与device之间,再加一层表示class的目录。如果parent都没有,那就把/sys/devices/virtual当做parent。class->p->class_dirs就是专门存放这种中间kobject的kset。思路理清后,再结合实际的sysfs,代码就很容易看懂了。
- static void cleanup_glue_dir(struct device *dev, struct kobject *glue_dir)
- {
- /* see if we live in a “glue” directory */
- if (!glue_dir || !dev->class ||
- glue_dir->kset != &dev->class->p->class_dirs)
- return;
- kobject_put(glue_dir);
- }
- static void cleanup_device_parent(struct device *dev)
- {
- cleanup_glue_dir(dev, dev->kobj.parent);
- }
cleanup_device_parent()是取消对parent引用时调用的函数,看起来只针对这种glue形式的目录起作用。
- static void setup_parent(struct device *dev, struct device *parent)
- {
- struct kobject *kobj;
- kobj = get_device_parent(dev, parent);
- if (kobj)
- dev->kobj.parent = kobj;
- }
setup_parent()就是调用get_device_parent()获得应该存放的父目录kobj,并把dev->kobj.parent设为它。
- static int device_add_class_symlinks(struct device *dev)
- {
- int error;
- if (!dev->class)
- return 0;
- error = sysfs_create_link(&dev->kobj,
- &dev->class->p->class_subsys.kobj,
- “subsystem”);
- if (error)
- goto out;
- /* link in the class directory pointing to the device */
- error = sysfs_create_link(&dev->class->p->class_subsys.kobj,
- &dev->kobj, dev_name(dev));
- if (error)
- goto out_subsys;
- if (dev->parent && device_is_not_partition(dev)) {
- error = sysfs_create_link(&dev->kobj, &dev->parent->kobj,
- “device”);
- if (error)
- goto out_busid;
- }
- return 0;
- out_busid:
- sysfs_remove_link(&dev->class->p->class_subsys.kobj, dev_name(dev));
- out_subsys:
- sysfs_remove_link(&dev->kobj, “subsystem”);
- out:
- return error;
- }
device_add_class_symlinks()在device和class直接添加一些软链接。在device目录下创建指向class的subsystem文件,在class目录下创建指向device的同名文件。如果device有父设备,而且device不是块设备分区,则在device目录下建立一个指向父设备的device链接文件。这一点在usb设备和usb接口间很常见。
- static void device_remove_class_symlinks(struct device *dev)
- {
- if (!dev->class)
- return;
- #ifdef CONFIG_SYSFS_DEPRECATED
- if (dev->parent && device_is_not_partition(dev)) {
- char *class_name;
- class_name = make_class_name(dev->class->name, &dev->kobj);
- if (class_name) {
- sysfs_remove_link(&dev->parent->kobj, class_name);
- kfree(class_name);
- }
- sysfs_remove_link(&dev->kobj, “device”);
- }
- if (dev->kobj.parent != &dev->class->p->class_subsys.kobj &&
- device_is_not_partition(dev))
- sysfs_remove_link(&dev->class->p->class_subsys.kobj,
- dev_name(dev));
- #else
- if (dev->parent && device_is_not_partition(dev))
- sysfs_remove_link(&dev->kobj, “device”);
- sysfs_remove_link(&dev->class->p->class_subsys.kobj, dev_name(dev));
- #endif
- sysfs_remove_link(&dev->kobj, “subsystem”);
- }
device_remove_class_symlinks()删除device和class之间的软链接。
- static inline const char *dev_name(const struct device *dev)
- {
- return kobject_name(&dev->kobj);
- }
- int dev_set_name(struct device *dev, const char *fmt, …)
- {
- va_list vargs;
- int err;
- va_start(vargs, fmt);
- err = kobject_set_name_vargs(&dev->kobj, fmt, vargs);
- va_end(vargs);
- return err;
- }
dev_name()获得设备名称,dev_set_name()设置设备名称。但这里的dev_set_name()只能在设备未注册前使用。device的名称其实是完全靠dev->kobj管理的。
- static struct kobject *device_to_dev_kobj(struct device *dev)
- {
- struct kobject *kobj;
- if (dev->class)
- kobj = dev->class->dev_kobj;
- else
- kobj = sysfs_dev_char_kobj;
- return kobj;
- }
device_to_dev_kobj()为dev选择合适的/sys/dev下的kobject,或者是块设备,或者是字符设备,或者没有。
- #define format_dev_t(buffer, dev) \
- ({ \
- sprintf(buffer, “%u:%u”, MAJOR(dev), MINOR(dev)); \
- buffer; \
- })
- static int device_create_sys_dev_entry(struct device *dev)
- {
- struct kobject *kobj = device_to_dev_kobj(dev);
- int error = 0;
- char devt_str[15];
- if (kobj) {
- format_dev_t(devt_str, dev->devt);
- error = sysfs_create_link(kobj, &dev->kobj, devt_str);
- }
- return error;
- }
- static void device_remove_sys_dev_entry(struct device *dev)
- {
- struct kobject *kobj = device_to_dev_kobj(dev);
- char devt_str[15];
- if (kobj) {
- format_dev_t(devt_str, dev->devt);
- sysfs_remove_link(kobj, devt_str);
- }
- }
device_create_sys_dev_entry()是在/sys/dev相应的目录下建立对设备的软链接。先是通过device_to_dev_kobj()获得父节点的kobj,然后调用sysfs_create_link()建立软链接。
device_remove_sys_dev_entry()与其操作正相反,删除在/sys/dev下建立的软链接。
- int device_private_init(struct device *dev)
- {
- dev->p = kzalloc(sizeof(*dev->p), GFP_KERNEL);
- if (!dev->p)
- return -ENOMEM;
- dev->p->device = dev;
- klist_init(&dev->p->klist_children, klist_children_get,
- klist_children_put);
- return 0;
- }
device_private_init()分配并初始化dev->p。至于空间的释放,是等到释放设备时调用的device_release()中。
之前的函数比较散乱,或许找不出一个整体的印象。但下面马上就要看到重要的部分了,因为代码终于攒到了爆发的程度!
- /**
- * device_register – register a device with the system.
- * @dev: pointer to the device structure
- *
- * This happens in two clean steps – initialize the device
- * and add it to the system. The two steps can be called
- * separately, but this is the easiest and most common.
- * I.e. you should only call the two helpers separately if
- * have a clearly defined need to use and refcount the device
- * before it is added to the hierarchy.
- *
- * NOTE: _Never_ directly free @dev after calling this function, even
- * if it returned an error! Always use put_device() to give up the
- * reference initialized in this function instead.
- */
- int device_register(struct device *dev)
- {
- device_initialize(dev);
- return device_add(dev);
- }
device_register()是提供给外界注册设备的接口。它先是调用device_initialize()初始化dev结构,然后调用device_add()将其加入系统中。但要注意,在调用device_register()注册dev之前,有一些dev结构变量是需要自行设置的。这其中有指明设备位置的struct device *parent,struct bus_type *bus, struct class *class,有指明设备属性的 const char *init_name, struct device_type *type, const struct attribute_group **groups, void (*release)(struct device *dev), dev_t devt,等等。不同设备的使用方法不同,我们留待之后再具体分析。device_initialize()我们已经看过,下面重点看看device_add()是如何实现的。
- int device_add(struct device *dev)
- {
- struct device *parent = NULL;
- struct class_interface *class_intf;
- int error = -EINVAL;
- dev = get_device(dev);
- if (!dev)
- goto done;
- if (!dev->p) {
- error = device_private_init(dev);
- if (error)
- goto done;
- }
- /*
- * for statically allocated devices, which should all be converted
- * some day, we need to initialize the name. We prevent reading back
- * the name, and force the use of dev_name()
- */
- if (dev->init_name) {
- dev_set_name(dev, “%s”, dev->init_name);
- dev->init_name = NULL;
- }
- if (!dev_name(dev))
- goto name_error;
- pr_debug(“device: ‘%s’: %s\n”, dev_name(dev), __func__);
- parent = get_device(dev->parent);
- setup_parent(dev, parent);
- /* use parent numa_node */
- if (parent)
- set_dev_node(dev, dev_to_node(parent));
- /* first, register with generic layer. */
- /* we require the name to be set before, and pass NULL */
- error = kobject_add(&dev->kobj, dev->kobj.parent, NULL);
- if (error)
- goto Error;
- /* notify platform of device entry */
- if (platform_notify)
- platform_notify(dev);
- error = device_create_file(dev, &uevent_attr);
- if (error)
- goto attrError;
- if (MAJOR(dev->devt)) {
- error = device_create_file(dev, &devt_attr);
- if (error)
- goto ueventattrError;
- error = device_create_sys_dev_entry(dev);
- if (error)
- goto devtattrError;
- devtmpfs_create_node(dev);
- }
- error = device_add_class_symlinks(dev);
- if (error)
- goto SymlinkError;
- error = device_add_attrs(dev);
- if (error)
- goto AttrsError;
- error = bus_add_device(dev);
- if (error)
- goto BusError;
- error = dpm_sysfs_add(dev);
- if (error)
- goto DPMError;
- device_pm_add(dev);
- /* Notify clients of device addition. This call must come
- * after dpm_sysf_add() and before kobject_uevent().
- */
- if (dev->bus)
- blocking_notifier_call_chain(&dev->bus->p->bus_notifier,
- BUS_NOTIFY_ADD_DEVICE, dev);
- kobject_uevent(&dev->kobj, KOBJ_ADD);
- bus_probe_device(dev);
- if (parent)
- klist_add_tail(&dev->p->knode_parent,
- &parent->p->klist_children);
- if (dev->class) {
- mutex_lock(&dev->class->p->class_mutex);
- /* tie the class to the device */
- klist_add_tail(&dev->knode_class,
- &dev->class->p->class_devices);
- /* notify any interfaces that the device is here */
- list_for_each_entry(class_intf,
- &dev->class->p->class_interfaces, node)
- if (class_intf->add_dev)
- class_intf->add_dev(dev, class_intf);
- mutex_unlock(&dev->class->p->class_mutex);
- }
- done:
- put_device(dev);
- return error;
- DPMError:
- bus_remove_device(dev);
- BusError:
- device_remove_attrs(dev);
- AttrsError:
- device_remove_class_symlinks(dev);
- SymlinkError:
- if (MAJOR(dev->devt))
- device_remove_sys_dev_entry(dev);
- devtattrError:
- if (MAJOR(dev->devt))
- device_remove_file(dev, &devt_attr);
- ueventattrError:
- device_remove_file(dev, &uevent_attr);
- attrError:
- kobject_uevent(&dev->kobj, KOBJ_REMOVE);
- kobject_del(&dev->kobj);
- Error:
- cleanup_device_parent(dev);
- if (parent)
- put_device(parent);
- name_error:
- kfree(dev->p);
- dev->p = NULL;
- goto done;
- }
device_add()将dev加入设备驱动模型。它先是调用get_device(dev)增加dev的引用计数,然后调用device_private_init()分配和初始化dev->p,调用dev_set_name()设置dev名字。然后是准备将dev加入sysfs,先是用get_device(parent)增加对parent的引用计数(无论是直接挂在parent下还是通过一个类层挂在parent下都要增加parent的引用计数),然后调用setup_parent()找到实际要加入的父kobject,通过kobject_add()加入其下。然后是添加属性和属性集合的操作,调用device_create_file()添加uevent属性,调用device_add_attrs()添加device/type/class预定义的属性与属性集合。如果dev有被分配设备号,再用device_create_file()添加dev属性,并用device_create_sys_dev_entry()在/sys/dev下添加相应的软链接,最后调用devtmpfs_create_node()在/dev下创建相应的设备文件。然后调用device_add_class_symlinks()添加dev与class间的软链接,调用bus_add_device()添加dev与bus间的软链接,并将dev挂入bus的设备链表。调用dpm_sysfs_add()增加dev下的power属性集合,调用device_pm_add()将dev加入dpm_list链表。
调用kobject_uevent()发布KOBJ_ADD消息,调用bus_probe_device()为dev寻找合适的驱动。如果有parent节点,把dev->p->knode_parent挂入parent->p->klist_children链表。如果dev有所属的class,将dev->knode_class挂在class->p->class_devices上,并调用可能的类设备接口的add_dev()方法。可能对于直接在bus上的设备来说,自然可以调用bus_probe_device()查找驱动,而不与总线直接接触的设备,则要靠class来发现驱动,这里的class_interface中的add_dev()方法,就是一个绝好的机会。最后会调用put_device(dev)释放在函数开头增加的引用计数。
device_add()要做的事很多,但想想每件事都在情理之中。device是设备驱动模型的基本元素,在class、bus、dev、devices中都有它的身影。device_add()要适应各种类型的设备注册,自然会越来越复杂。可以说文件开头定义的内部函数,差不多都是为了这里服务的。
- void device_unregister(struct device *dev)
- {
- pr_debug(“device: ‘%s’: %s\n”, dev_name(dev), __func__);
- device_del(dev);
- put_device(dev);
- }
有注册自然又注销。device_unregister()就是用于将dev从系统中注销,并释放创建时产生的引用计数。
- void device_del(struct device *dev)
- {
- struct device *parent = dev->parent;
- struct class_interface *class_intf;
- /* Notify clients of device removal. This call must come
- * before dpm_sysfs_remove().
- */
- if (dev->bus)
- blocking_notifier_call_chain(&dev->bus->p->bus_notifier,
- BUS_NOTIFY_DEL_DEVICE, dev);
- device_pm_remove(dev);
- dpm_sysfs_remove(dev);
- if (parent)
- klist_del(&dev->p->knode_parent);
- if (MAJOR(dev->devt)) {
- devtmpfs_delete_node(dev);
- device_remove_sys_dev_entry(dev);
- device_remove_file(dev, &devt_attr);
- }
- if (dev->class) {
- device_remove_class_symlinks(dev);
- mutex_lock(&dev->class->p->class_mutex);
- /* notify any interfaces that the device is now gone */
- list_for_each_entry(class_intf,
- &dev->class->p->class_interfaces, node)
- if (class_intf->remove_dev)
- class_intf->remove_dev(dev, class_intf);
- /* remove the device from the class list */
- klist_del(&dev->knode_class);
- mutex_unlock(&dev->class->p->class_mutex);
- }
- device_remove_file(dev, &uevent_attr);
- device_remove_attrs(dev);
- bus_remove_device(dev);
- /*
- * Some platform devices are driven without driver attached
- * and managed resources may have been acquired. Make sure
- * all resources are released.
- */
- devres_release_all(dev);
- /* Notify the platform of the removal, in case they
- * need to do anything…
- */
- if (platform_notify_remove)
- platform_notify_remove(dev);
- kobject_uevent(&dev->kobj, KOBJ_REMOVE);
- cleanup_device_parent(dev);
- kobject_del(&dev->kobj);
- put_device(parent);
- }
device_del()是与device_add()相对的函数,进行实际的将dev从系统中脱离的工作。这其中既有将dev从设备驱动模型各种链表中脱离的工作,又有将dev从sysfs的各个角落删除的工作。大致流程与dev_add()相对,就不一一介绍。
爆发结束,下面来看一些比较轻松的函数。
- /**
- * device_get_devnode – path of device node file
- * @dev: device
- * @mode: returned file access mode
- * @tmp: possibly allocated string
- *
- * Return the relative path of a possible device node.
- * Non-default names may need to allocate a memory to compose
- * a name. This memory is returned in tmp and needs to be
- * freed by the caller.
- */
- const char *device_get_devnode(struct device *dev,
- mode_t *mode, const char **tmp)
- {
- char *s;
- *tmp = NULL;
- /* the device type may provide a specific name */
- if (dev->type && dev->type->devnode)
- *tmp = dev->type->devnode(dev, mode);
- if (*tmp)
- return *tmp;
- /* the class may provide a specific name */
- if (dev->class && dev->class->devnode)
- *tmp = dev->class->devnode(dev, mode);
- if (*tmp)
- return *tmp;
- /* return name without allocation, tmp == NULL */
- if (strchr(dev_name(dev), ‘!’) == NULL)
- return dev_name(dev);
- /* replace ‘!’ in the name with ‘/’ */
- *tmp = kstrdup(dev_name(dev), GFP_KERNEL);
- if (!*tmp)
- return NULL;
- while ((s = strchr(*tmp, ‘!’)))
- s[0] = ‘/’;
- return *tmp;
- }
device_get_devnode()返回设备的路径名。不过似乎可以由device_type或者class定义一些独特的返回名称。
- static struct device *next_device(struct klist_iter *i)
- {
- struct klist_node *n = klist_next(i);
- struct device *dev = NULL;
- struct device_private *p;
- if (n) {
- p = to_device_private_parent(n);
- dev = p->device;
- }
- return dev;
- }
- int device_for_each_child(struct device *parent, void *data,
- int (*fn)(struct device *dev, void *data))
- {
- struct klist_iter i;
- struct device *child;
- int error = 0;
- if (!parent->p)
- return 0;
- klist_iter_init(&parent->p->klist_children, &i);
- while ((child = next_device(&i)) && !error)
- error = fn(child, data);
- klist_iter_exit(&i);
- return error;
- }
- struct device *device_find_child(struct device *parent, void *data,
- int (*match)(struct device *dev, void *data))
- {
- struct klist_iter i;
- struct device *child;
- if (!parent)
- return NULL;
- klist_iter_init(&parent->p->klist_children, &i);
- while ((child = next_device(&i)))
- if (match(child, data) && get_device(child))
- break;
- klist_iter_exit(&i);
- return child;
- }
device_for_each_child()对dev下的每个子device,都调用一遍特定的处理函数。
device_find_child()则是查找dev下特点的子device,查找使用特定的match函数。
这两个遍历过程都使用了klist特有的遍历函数,支持遍历过程中的节点删除等功能。next_device()则是为了遍历方便封装的一个内部函数。
下面本该是root_device注册相关的代码。但经过检查,linux内核中使用到的root_device很少见,而且在sysfs中也未能找到一个实际的例子。所以root_device即使还未被弃用,也并非主流,我们将其跳过。
与kobject和kset类似,device也为我们提供了快速device创建方法,下面就看看吧。
- static void device_create_release(struct device *dev)
- {
- pr_debug(“device: ‘%s’: %s\n”, dev_name(dev), __func__);
- kfree(dev);
- }
- struct device *device_create_vargs(struct class *class, struct device *parent,
- dev_t devt, void *drvdata, const char *fmt,
- va_list args)
- {
- struct device *dev = NULL;
- int retval = -ENODEV;
- if (class == NULL || IS_ERR(class))
- goto error;
- dev = kzalloc(sizeof(*dev), GFP_KERNEL);
- if (!dev) {
- retval = -ENOMEM;
- goto error;
- }
- dev->devt = devt;
- dev->class = class;
- dev->parent = parent;
- dev->release = device_create_release;
- dev_set_drvdata(dev, drvdata);
- retval = kobject_set_name_vargs(&dev->kobj, fmt, args);
- if (retval)
- goto error;
- retval = device_register(dev);
- if (retval)
- goto error;
- return dev;
- error:
- put_device(dev);
- return ERR_PTR(retval);
- }
- struct device *device_create(struct class *class, struct device *parent,
- dev_t devt, void *drvdata, const char *fmt, …)
- {
- va_list vargs;
- struct device *dev;
- va_start(vargs, fmt);
- dev = device_create_vargs(class, parent, devt, drvdata, fmt, vargs);
- va_end(vargs);
- return dev;
- }
这里的device_create()提供了一个快速的dev创建注册方法。只是中间没有提供设置device_type的方法,或许是这样的device已经够特立独行了,不需要搞出一类来。
- static int __match_devt(struct device *dev, void *data)
- {
- dev_t *devt = data;
- return dev->devt == *devt;
- }
- void device_destroy(struct class *class, dev_t devt)
- {
- struct device *dev;
- dev = class_find_device(class, NULL, &devt, __match_devt);
- if (dev) {
- put_device(dev);
- device_unregister(dev);
- }
- }
device_destroy()就是与device_create()相对的注销函数。至于这里为什么会多一个put_device(dev),也很简单,因为在class_find_device()找到dev时,调用了get_device()。
- struct device *class_find_device(struct class *class, struct device *start,
- void *data,
- int (*match)(struct device *, void *))
- {
- struct class_dev_iter iter;
- struct device *dev;
- if (!class)
- return NULL;
- if (!class->p) {
- WARN(1, “%s called for class ‘%s’ before it was initialized”,
- __func__, class->name);
- return NULL;
- }
- class_dev_iter_init(&iter, class, start, NULL);
- while ((dev = class_dev_iter_next(&iter))) {
- if (match(dev, data)) {
- get_device(dev);
- break;
- }
- }
- class_dev_iter_exit(&iter);
- return dev;
- }
class_find_device()本来是class.c中的内容,其实现也于之前将的遍历dev->p->klist_children类似,无非是在klist提供的遍历方法上加以封装。但我们这里列出class_find_device()的实现与使用它的device_destroy(),却是为了更好地分析这个调用流程中dev是如何被保护的。它实际上是经历了三个保护手段:首先在class_dev_iter_next()->klist_next()中,是受到struct klist中 spinlock_t k_lock保护的。在找到下一点并解锁之前,就增加了struct klist_node中的struct kref n_ref引用计数。在当前的next()调用完,到下一个next()调用之前,都是受这个增加的引用计数保护的。再看class_find_device()中,使用get_device(dev)增加了dev本身的引用计数保护(当然也要追溯到kobj->kref中),这是第三种保护。知道device_destroy()中主动调用put_device(dev)才去除了这种保护。
本来对dev的保护,应该完全是由dev中的引用计数完成的。但实际上这种保护很多时候是间接完成的。例如这里的klist中的自旋锁,klist_node中的引用计数,都不过是为了保持class的设备链表中对dev的引用计数不消失,这是一种间接保护的手段,保证了这中间即使外界主动释放class设备链表对dev的引用计数,dev仍然不会被实际注销。这种曲折的联系,才真正发挥了引用计数的作用,构成设备驱动模型独特的魅力。
- int device_rename(struct device *dev, char *new_name)
- {
- char *old_device_name = NULL;
- int error;
- dev = get_device(dev);
- if (!dev)
- return -EINVAL;
- pr_debug(“device: ‘%s’: %s: renaming to ‘%s’\n”, dev_name(dev),
- __func__, new_name);
- old_device_name = kstrdup(dev_name(dev), GFP_KERNEL);
- if (!old_device_name) {
- error = -ENOMEM;
- goto out;
- }
- error = kobject_rename(&dev->kobj, new_name);
- if (error)
- goto out;
- if (dev->class) {
- error = sysfs_create_link_nowarn(&dev->class->p->class_subsys.kobj,
- &dev->kobj, dev_name(dev));
- if (error)
- goto out;
- sysfs_remove_link(&dev->class->p->class_subsys.kobj,
- old_device_name);
- }
- out:
- put_device(dev);
- kfree(old_device_name);
- return error;
- }
device_rename()是供设备注册后改变名称用的,除了改变/sys/devices下地名称,还改变了/sys/class下地软链接名称。前者很自然,但后者却很难想到。即使简单的地方,经过重重调试,我们也会惊讶于linux的心细如发。
- static int device_move_class_links(struct device *dev,
- struct device *old_parent,
- struct device *new_parent)
- {
- int error = 0;
- if (old_parent)
- sysfs_remove_link(&dev->kobj, “device”);
- if (new_parent)
- error = sysfs_create_link(&dev->kobj, &new_parent->kobj,
- “device”);
- return error;
- #endif
- }
device_move_class_links()只是一个内部函数,后面还有操纵它的那只手。这里的device_move_class_links显得很名不副实,并没用操作class中软链接的举动。这很正常,因为在sysfs中软链接是针对kobject来说的,所以即使位置变掉了,软链接还是很很准确地定位。
- /**
- * device_move – moves a device to a new parent
- * @dev: the pointer to the struct device to be moved
- * @new_parent: the new parent of the device (can by NULL)
- * @dpm_order: how to reorder the dpm_list
- */
- int device_move(struct device *dev, struct device *new_parent,
- enum dpm_order dpm_order)
- {
- int error;
- struct device *old_parent;
- struct kobject *new_parent_kobj;
- dev = get_device(dev);
- if (!dev)
- return -EINVAL;
- device_pm_lock();
- new_parent = get_device(new_parent);
- new_parent_kobj = get_device_parent(dev, new_parent);
- pr_debug(“device: ‘%s’: %s: moving to ‘%s’\n”, dev_name(dev),
- __func__, new_parent ? dev_name(new_parent) : “<NULL>”);
- error = kobject_move(&dev->kobj, new_parent_kobj);
- if (error) {
- cleanup_glue_dir(dev, new_parent_kobj);
- put_device(new_parent);
- goto out;
- }
- old_parent = dev->parent;
- dev->parent = new_parent;
- if (old_parent)
- klist_remove(&dev->p->knode_parent);
- if (new_parent) {
- klist_add_tail(&dev->p->knode_parent,
- &new_parent->p->klist_children);
- set_dev_node(dev, dev_to_node(new_parent));
- }
- if (!dev->class)
- goto out_put;
- error = device_move_class_links(dev, old_parent, new_parent);
- if (error) {
- /* We ignore errors on cleanup since we’re hosed anyway… */
- device_move_class_links(dev, new_parent, old_parent);
- if (!kobject_move(&dev->kobj, &old_parent->kobj)) {
- if (new_parent)
- klist_remove(&dev->p->knode_parent);
- dev->parent = old_parent;
- if (old_parent) {
- klist_add_tail(&dev->p->knode_parent,
- &old_parent->p->klist_children);
- set_dev_node(dev, dev_to_node(old_parent));
- }
- }
- cleanup_glue_dir(dev, new_parent_kobj);
- put_device(new_parent);
- goto out;
- }
- switch (dpm_order) {
- case DPM_ORDER_NONE:
- break;
- case DPM_ORDER_DEV_AFTER_PARENT:
- device_pm_move_after(dev, new_parent);
- break;
- case DPM_ORDER_PARENT_BEFORE_DEV:
- device_pm_move_before(new_parent, dev);
- break;
- case DPM_ORDER_DEV_LAST:
- device_pm_move_last(dev);
- break;
- }
- out_put:
- put_device(old_parent);
- out:
- device_pm_unlock();
- put_device(dev);
- return error;
- }
device_move()就是将dev移到一个新的parent下。但也有可能这个parent是空的。大部分操作围绕在引用计数上,get_device(),put_device()。而且换了新的parent,到底要加到sysfs中哪个目录下,还要再调用get_device_parent()研究一下。主要的操作就是kobject_move()和device_move_class_links()。因为在sysfs中软链接是针对kobject来说的,所以即使位置变掉了,软链接还是很很准确地定位,所以在/sys/dev、/sys/bus、/sys/class中的软链接都不用变,这实在是sysfs的一大优势。除此之外,device_move()还涉及到电源管理的问题,device移动影响到dev在dpm_list上的位置,我们对此不了解,先忽略之。
- void device_shutdown(void)
- {
- struct device *dev, *devn;
- list_for_each_entry_safe_reverse(dev, devn, &devices_kset->list,
- kobj.entry) {
- if (dev->bus && dev->bus->shutdown) {
- dev_dbg(dev, “shutdown\n”);
- dev->bus->shutdown(dev);
- } else if (dev->driver && dev->driver->shutdown) {
- dev_dbg(dev, “shutdown\n”);
- dev->driver->shutdown(dev);
- }
- }
- kobject_put(sysfs_dev_char_kobj);
- kobject_put(sysfs_dev_block_kobj);
- kobject_put(dev_kobj);
- async_synchronize_full();
- }
这个device_shutdown()是在系统关闭时才调用的。它动用了很少使用的devices_kset,从而可以遍历到每个注册到sysfs上的设备,调用相应的总线或驱动定义的shutdown()函数。提起这个,还是在device_initialize()中将dev->kobj->kset统一设为devices_kset的。原来设备虽然有不同的parent,但kset还是一样的。这样我们就能理解/sys/devices下的顶层设备目录是怎么来的,因为没用parent,就在调用kobject_add()时将kset->kobj当成了parent,所以会直接挂在顶层目录下。这样的目录大致有pci0000:00、virtual等等。
看完了core.c,我有种明白机器人也是由零件组成的的感觉。linux设备驱动模型的大门已经打开了四分之一。���着分析的深入,我们大概也会越来越明白linux的良苦用心。
上节我们分析设备驱动模型中的device,主要是drivers/base/core.c,可以说是代码量最大的一个文件。本节要分析的驱动driver,就要相对简单很多。原因也很简单,对于driver,我们能定义的公共部分实在不多,能再sysfs中表达的也很少。本节的分析将围绕drivers/base/driver.c,但头文件仍然是include/linux/device.h和drivers/base/base.h。先让我们来看看driver的结构。
- struct device_driver {
- const char *name;
- struct bus_type *bus;
- struct module *owner;
- const char *mod_name; /* used for built-in modules */
- bool suppress_bind_attrs; /* disables bind/unbind via sysfs */
- int (*probe) (struct device *dev);
- int (*remove) (struct device *dev);
- void (*shutdown) (struct device *dev);
- int (*suspend) (struct device *dev, pm_message_t state);
- int (*resume) (struct device *dev);
- const struct attribute_group **groups;
- const struct dev_pm_ops *pm;
- struct driver_private *p;
- };
struct device_driver就是模型定义的通用驱动结构。name是驱动名称,但这个name也只是在静态定义的初始名称,实际使用的名称还是由kobject中保管的。bus执行驱动所在的总线,owner是驱动所在的模块,还有一个所在模块名称mod_name,suppress_bind_attrs定义是否允许驱动通过sysfs决定挂载还是卸载设备。下面是一系列函数指针,probe是在驱动刚与设备挂接时调用的,remove是在设备卸载时调用的,shutdown是在设备关闭时调用的(说实话我现在还不知道remove和shutdown的区别),suspend是设备休眠时调用的,resume是设备恢复时调用的。group是属性集合,pm是电源管理的函数集合,p是指向driver_private的指针。
- struct driver_private {
- struct kobject kobj;
- struct klist klist_devices;
- struct klist_node knode_bus;
- struct module_kobject *mkobj;
- struct device_driver *driver;
- };
- #define to_driver(obj) container_of(obj, struct driver_private, kobj)
与device类似,device_driver把与其它组件联系的大部分结构变量移到struct driver_private中来。首先是kobj,在sysfs中代表driver目录本身。klist_devices是驱动下的设备链表,knode_bus是要挂载在总线的驱动链表上的节点。mkobj是driver与相关module的联系,之前在device_driver结构中已经有指向module的指针,但这还不够,在/sys下你能发现一个module目录,所以驱动所属的模块在sysfs中也有显示,具体留到代码中再看。driver指针自然是从driver_private指回struct device_driver的。
- struct driver_attribute {
- struct attribute attr;
- ssize_t (*show)(struct device_driver *driver, char *buf);
- ssize_t (*store)(struct device_driver *driver, const char *buf,
- size_t count);
- };
- #define DRIVER_ATTR(_name, _mode, _show, _store) \
- struct driver_attribute driver_attr_##_name = \
- __ATTR(_name, _mode, _show, _store)
除了以上两个结构,还有struct driver_attribute。driver_attribute是driver对struct attribute的封装,添加了两个特用于device_driver的读写函数。这种封装看似简单重复,工作量很小,但在使用时却会造成巨大的便利。
好,结构介绍完毕,下面看driver.c中的实现。
- static struct device *next_device(struct klist_iter *i)
- {
- struct klist_node *n = klist_next(i);
- struct device *dev = NULL;
- struct device_private *dev_prv;
- if (n) {
- dev_prv = to_device_private_driver(n);
- dev = dev_prv->device;
- }
- return dev;
- }
- int driver_for_each_device(struct device_driver *drv, struct device *start,
- void *data, int (*fn)(struct device *, void *))
- {
- struct klist_iter i;
- struct device *dev;
- int error = 0;
- if (!drv)
- return -EINVAL;
- klist_iter_init_node(&drv->p->klist_devices, &i,
- start ? &start->p->knode_driver : NULL);
- while ((dev = next_device(&i)) && !error)
- error = fn(dev, data);
- klist_iter_exit(&i);
- return error;
- }
- struct device *driver_find_device(struct device_driver *drv,
- struct device *start, void *data,
- int (*match)(struct device *dev, void *data))
- {
- struct klist_iter i;
- struct device *dev;
- if (!drv)
- return NULL;
- klist_iter_init_node(&drv->p->klist_devices, &i,
- (start ? &start->p->knode_driver : NULL));
- while ((dev = next_device(&i)))
- if (match(dev, data) && get_device(dev))
- break;
- klist_iter_exit(&i);
- return dev;
- }
driver_for_each_device()是对drv的设备链表中的每个设备调用一次指定函数。
driver_find_device()是在drv的设备链表中寻找一个设备,寻找使用指定的匹配函数。
这两个函数都不陌生,在之前分析device的core.c中已经见到与它们很类似的函数,只不过那里是遍历设备的子设备链表,这里是遍历驱动的设备链表。next_device()同样是辅助用的内部函数。
- int driver_create_file(struct device_driver *drv,
- struct driver_attribute *attr)
- {
- int error;
- if (drv)
- error = sysfs_create_file(&drv->p->kobj, &attr->attr);
- else
- error = -EINVAL;
- return error;
- }
- void driver_remove_file(struct device_driver *drv,
- struct driver_attribute *attr)
- {
- if (drv)
- sysfs_remove_file(&drv->p->kobj, &attr->attr);
- }
driver_create_file()创建drv下的属性文件,调用sysfs_create_file()实现。
driver_remove_file()删除drv下的属性文件,调用sysfs_remove_file()实现。
- static int driver_add_groups(struct device_driver *drv,
- const struct attribute_group **groups)
- {
- int error = 0;
- int i;
- if (groups) {
- for (i = 0; groups[i]; i++) {
- error = sysfs_create_group(&drv->p->kobj, groups[i]);
- if (error) {
- while (–i >= 0)
- sysfs_remove_group(&drv->p->kobj,
- groups[i]);
- break;
- }
- }
- }
- return error;
- }
- static void driver_remove_groups(struct device_driver *drv,
- const struct attribute_group **groups)
- {
- int i;
- if (groups)
- for (i = 0; groups[i]; i++)
- sysfs_remove_group(&drv->p->kobj, groups[i]);
- }
driver_add_groups()在drv目录下添加属性集合,调用sysfs_create_groups()实现。
driver_remove_groups()在drv目录下删除属性集合,调用sysfs_remove_groups()实现。
发现两点问题:第一,是不是觉得driver_add_groups()不太合适,最好改为driver_create_groups()才搭调。但不只是driver用driver_add_groups(),device也使用device_add_groups(),不知一处这样做。第二���有没有发现driver_create_file()是外部函数,driver_add_groups()就是内部函数,也就是说driver只对外提供添加属性的接口,却不提供添加属性集合的接口。理由吗?在struct device_driver()已经专门定义了一个groups变量来添加属性集合,后面就不易再重复提供接口,而且创建属性集合需要的操作远比创建属性费时。在device中也是这样做的。
另外,driver中只提供管理属性文件的方法,却不提供管理二进制属性文件的方法,这是因为驱动本身没有这种需求,只有部分设备才要求二进制文件表示。
- struct device_driver *get_driver(struct device_driver *drv)
- {
- if (drv) {
- struct driver_private *priv;
- struct kobject *kobj;
- kobj = kobject_get(&drv->p->kobj);
- priv = to_driver(kobj);
- return priv->driver;
- }
- return NULL;
- }
- void put_driver(struct device_driver *drv)
- {
- kobject_put(&drv->p->kobj);
- }
get_driver()增加drv的引用计数,put_driver()减少drv的引用计数。这都是通过drv->p->kobj来做的。
- struct device_driver *driver_find(const char *name, struct bus_type *bus)
- {
- struct kobject *k = kset_find_obj(bus->p->drivers_kset, name);
- struct driver_private *priv;
- if (k) {
- priv = to_driver(k);
- return priv->driver;
- }
- return NULL;
- }
driver_find()从bus的驱动链表中寻找特定名称的driver。
- /**
- * driver_register – register driver with bus
- * @drv: driver to register
- *
- * We pass off most of the work to the bus_add_driver() call,
- * since most of the things we have to do deal with the bus
- * structures.
- */
- int driver_register(struct device_driver *drv)
- {
- int ret;
- struct device_driver *other;
- BUG_ON(!drv->bus->p);
- if ((drv->bus->probe && drv->probe) ||
- (drv->bus->remove && drv->remove) ||
- (drv->bus->shutdown && drv->shutdown))
- printk(KERN_WARNING “Driver ‘%s’ needs updating – please use “
- “bus_type methods\n”, drv->name);
- other = driver_find(drv->name, drv->bus);
- if (other) {
- put_driver(other);
- printk(KERN_ERR “Error: Driver ‘%s’ is already registered, “
- “aborting…\n”, drv->name);
- return -EBUSY;
- }
- ret = bus_add_driver(drv);
- if (ret)
- return ret;
- ret = driver_add_groups(drv, drv->groups);
- if (ret)
- bus_remove_driver(drv);
- return ret;
- }
driver_register()将drv注册到系统中。它真是做得难以预料地简单,所有的工作几乎完全是由bus_add_driver()代为完成的。但你要注意,在调用driver_register()前,drv->bus一定要预先设置。device可以不绑定bus,但driver一定要绑定到bus上。
- void driver_unregister(struct device_driver *drv)
- {
- if (!drv || !drv->p) {
- WARN(1, “Unexpected driver unregister!\n”);
- return;
- }
- driver_remove_groups(drv, drv->groups);
- bus_remove_driver(drv);
- }
driver_unregister()将drv从系统中撤销。大部分工作是调用bus_remove_driver()完成的。可以看出bus_add_driver()与bus_remove_driver()相对。driver和bus的联系如此紧密,以至于driver的注册和撤销工作都可以由bus代劳了。我们需要更进一步的分析。
经过调查,我们发现很有一部分driver的代码被移动到了bus.c中。我们本节是以driver为主,所以接下来会尽量在不惊动bus的情况下,分析存在于bus.c中的driver代码。
- static ssize_t drv_attr_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
- {
- struct driver_attribute *drv_attr = to_drv_attr(attr);
- struct driver_private *drv_priv = to_driver(kobj);
- ssize_t ret = -EIO;
- if (drv_attr->show)
- ret = drv_attr->show(drv_priv->driver, buf);
- return ret;
- }
- static ssize_t drv_attr_store(struct kobject *kobj, struct attribute *attr,
- const char *buf, size_t count)
- {
- struct driver_attribute *drv_attr = to_drv_attr(attr);
- struct driver_private *drv_priv = to_driver(kobj);
- ssize_t ret = -EIO;
- if (drv_attr->store)
- ret = drv_attr->store(drv_priv->driver, buf, count);
- return ret;
- }
- static struct sysfs_ops driver_sysfs_ops = {
- .show = drv_attr_show,
- .store = drv_attr_store,
- };
看到这里,你终于觉得driver开始正常了,它还要定义sysfs读写时操作的函数。
- static void driver_release(struct kobject *kobj)
- {
- struct driver_private *drv_priv = to_driver(kobj);
- pr_debug(“driver: ‘%s’: %s\n”, kobject_name(kobj), __func__);
- kfree(drv_priv);
- }
- static struct kobj_type driver_ktype = {
- .sysfs_ops = &driver_sysfs_ops,
- .release = driver_release,
- };
与device的释放函数device_release不同,driver_release没有提供外界代码运行的机会,只是简单地释放drv_priv函数。
- /* Manually detach a device from its associated driver. */
- static ssize_t driver_unbind(struct device_driver *drv,
- const char *buf, size_t count)
- {
- struct bus_type *bus = bus_get(drv->bus);
- struct device *dev;
- int err = -ENODEV;
- dev = bus_find_device_by_name(bus, NULL, buf);
- if (dev && dev->driver == drv) {
- if (dev->parent) /* Needed for USB */
- down(&dev->parent->sem);
- device_release_driver(dev);
- if (dev->parent)
- up(&dev->parent->sem);
- err = count;
- }
- put_device(dev);
- bus_put(bus);
- return err;
- }
- static DRIVER_ATTR(unbind, S_IWUSR, NULL, driver_unbind);
- /*
- * Manually attach a device to a driver.
- * Note: the driver must want to bind to the device,
- * it is not possible to override the driver’s id table.
- */
- static ssize_t driver_bind(struct device_driver *drv,
- const char *buf, size_t count)
- {
- struct bus_type *bus = bus_get(drv->bus);
- struct device *dev;
- int err = -ENODEV;
- dev = bus_find_device_by_name(bus, NULL, buf);
- if (dev && dev->driver == NULL && driver_match_device(drv, dev)) {
- if (dev->parent) /* Needed for USB */
- down(&dev->parent->sem);
- down(&dev->sem);
- err = driver_probe_device(drv, dev);
- up(&dev->sem);
- if (dev->parent)
- up(&dev->parent->sem);
- if (err > 0) {
- /* success */
- err = count;
- } else if (err == 0) {
- /* driver didn’t accept device */
- err = -ENODEV;
- }
- }
- put_device(dev);
- bus_put(bus);
- return err;
- }
- static DRIVER_ATTR(bind, S_IWUSR, NULL, driver_bind);
上面描述了driver下两个只写的属性文件,unbind和bind。应该是提供用户空间命令是否将设备与驱动挂接的接口。
- static int driver_add_attrs(struct bus_type *bus, struct device_driver *drv)
- {
- int error = 0;
- int i;
- if (bus->drv_attrs) {
- for (i = 0; attr_name(bus->drv_attrs[i]); i++) {
- error = driver_create_file(drv, &bus->drv_attrs[i]);
- if (error)
- goto err;
- }
- }
- done:
- return error;
- err:
- while (–i >= 0)
- driver_remove_file(drv, &bus->drv_attrs[i]);
- goto done;
- }
- static void driver_remove_attrs(struct bus_type *bus,
- struct device_driver *drv)
- {
- int i;
- if (bus->drv_attrs) {
- for (i = 0; attr_name(bus->drv_attrs[i]); i++)
- driver_remove_file(drv, &bus->drv_attrs[i]);
- }
- }
driver_add_attrs()向drv目录下添加属性,只是这些属性都是在bus中定义的drv_attrs[]。
driver_remove_attrs()从drv目录中删除相应的bus->drv_attrs[]。
- static int __must_check add_bind_files(struct device_driver *drv)
- {
- int ret;
- ret = driver_create_file(drv, &driver_attr_unbind);
- if (ret == 0) {
- ret = driver_create_file(drv, &driver_attr_bind);
- if (ret)
- driver_remove_file(drv, &driver_attr_unbind);
- }
- return ret;
- }
- static void remove_bind_files(struct device_driver *drv)
- {
- driver_remove_file(drv, &driver_attr_bind);
- driver_remove_file(drv, &driver_attr_unbind);
- }
add_bind_files()在drv目录下增加bind和unbind属性。
remove_bind_files()从drv目录下删除bind和unbind属性。
- static ssize_t driver_uevent_store(struct device_driver *drv,
- const char *buf, size_t count)
- {
- enum kobject_action action;
- if (kobject_action_type(buf, count, &action) == 0)
- kobject_uevent(&drv->p->kobj, action);
- return count;
- }
- static DRIVER_ATTR(uevent, S_IWUSR, NULL, driver_uevent_store);
这是drv目录下地uevent属性文件,提供了从drv发送uevent的方法。
- /**
- * bus_add_driver – Add a driver to the bus.
- * @drv: driver.
- */
- int bus_add_driver(struct device_driver *drv)
- {
- struct bus_type *bus;
- struct driver_private *priv;
- int error = 0;
- bus = bus_get(drv->bus);
- if (!bus)
- return -EINVAL;
- pr_debug(“bus: ‘%s’: add driver %s\n”, bus->name, drv->name);
- priv = kzalloc(sizeof(*priv), GFP_KERNEL);
- if (!priv) {
- error = -ENOMEM;
- goto out_put_bus;
- }
- klist_init(&priv->klist_devices, NULL, NULL);
- priv->driver = drv;
- drv->p = priv;
- priv->kobj.kset = bus->p->drivers_kset;
- error = kobject_init_and_add(&priv->kobj, &driver_ktype, NULL,
- “%s”, drv->name);
- if (error)
- goto out_unregister;
- if (drv->bus->p->drivers_autoprobe) {
- error = driver_attach(drv);
- if (error)
- goto out_unregister;
- }
- klist_add_tail(&priv->knode_bus, &bus->p->klist_drivers);
- module_add_driver(drv->owner, drv);
- error = driver_create_file(drv, &driver_attr_uevent);
- if (error) {
- printk(KERN_ERR “%s: uevent attr (%s) failed\n”,
- __func__, drv->name);
- }
- error = driver_add_attrs(bus, drv);
- if (error) {
- /* How the hell do we get out of this pickle? Give up */
- printk(KERN_ERR “%s: driver_add_attrs(%s) failed\n”,
- __func__, drv->name);
- }
- if (!drv->suppress_bind_attrs) {
- error = add_bind_files(drv);
- if (error) {
- /* Ditto */
- printk(KERN_ERR “%s: add_bind_files(%s) failed\n”,
- __func__, drv->name);
- }
- }
- kobject_uevent(&priv->kobj, KOBJ_ADD);
- return 0;
- out_unregister:
- kfree(drv->p);
- drv->p = NULL;
- kobject_put(&priv->kobj);
- out_put_bus:
- bus_put(bus);
- return error;
- }
bus_add_driver()看似是把drv与bus联系起来,其实是完成driver加入系统的大部分操作。
首先调用bus_get(drv->bus)增加对bus的引用。
分配并初始化drv->p,即driver_private结构。
调用kobject_init_and_add()将drv加入sysfs,之前只是设置了priv->obj.kset为bus->p->drivers_kset,所以drv目录会出现在bus目录的drivers子目录中。如果总线允许自动probe,就会调用driver_attach()将驱动和总线上的设备进行匹配,这个过程先略过。
然后调用klist_add_tail()将drv挂入总线的驱动链表。
调用module_add_driver()创建driver相关的模块在sysfs中的表示。后面专门描述。
调用driver_create_file()在drv目录下创建uevent属性文件。
调用driver_add_attrs()在drv目录下添加bus->driver_attrs[]中定义的属性。
如果drv->suppress_bind_attrs为零,即允许用户空间决定驱动何时链接和卸载设备,则调用add_bind_files()添加bind和unbind属性文件。
调用kobject_uevent()向用户空间发布KOBJ_ADD消息。
从bus_add_driver()的处理过程来看,driver只在bus的drivers目录下出现,没什么软链接,需要的属性也不多。
- /**
- * bus_remove_driver – delete driver from bus’s knowledge.
- * @drv: driver.
- *
- * Detach the driver from the devices it controls, and remove
- * it from its bus’s list of drivers. Finally, we drop the reference
- * to the bus we took in bus_add_driver().
- */
- void bus_remove_driver(struct device_driver *drv)
- {
- if (!drv->bus)
- return;
- if (!drv->suppress_bind_attrs)
- remove_bind_files(drv);
- driver_remove_attrs(drv->bus, drv);
- driver_remove_file(drv, &driver_attr_uevent);
- klist_remove(&drv->p->knode_bus);
- pr_debug(“bus: ‘%s’: remove driver %s\n”, drv->bus->name, drv->name);
- driver_detach(drv);
- module_remove_driver(drv);
- kobject_put(&drv->p->kobj);
- bus_put(drv->bus);
- }
bus_remove_driver()将drv从系统中撤销,与bus_add_driver()相对应。
driver真正精彩的地方在于probe函数,对设备的操作,对用户空间提供的接口,可惜这些都是特定的。这里只能将driver与bus联系起来,并在以后与device联系起来。
不过不必失望,下面我们分析下drivers/base/module.c,它显示了与驱动有关的module,在sysfs中的表现情况。
首先介绍使用到的结构。应该说module.c的代码实现很简单,但使用到的结构不简单。
- struct module_attribute {
- struct attribute attr;
- ssize_t (*show)(struct module_attribute *, struct module *, char *);
- ssize_t (*store)(struct module_attribute *, struct module *,
- const char *, size_t count);
- void (*setup)(struct module *, const char *);
- int (*test)(struct module *);
- void (*free)(struct module *);
- };
- struct param_attribute
- {
- struct module_attribute mattr;
- struct kernel_param *param;
- };
- struct module_param_attrs
- {
- unsigned int num;
- struct attribute_group grp;
- struct param_attribute attrs[0];
- };
- struct module_kobject
- {
- struct kobject kobj;
- struct module *mod;
- struct kobject *drivers_dir;
- struct module_param_attrs *mp;
- };
可以看到module_attribute结构除了包含struct attribute,还多增加了好几条函数指针。而这只是最简单的,struct param_attribute除了包含module_attribute,还有一个指向kernel_param的指针param。这个kernel_param就太复杂了,是外界向module提供参数用的窗口,这里忽略。后面还有struct module_param_attrs和struct module_kobject。
- static char *make_driver_name(struct device_driver *drv)
- {
- char *driver_name;
- driver_name = kmalloc(strlen(drv->name) + strlen(drv->bus->name) + 2,
- GFP_KERNEL);
- if (!driver_name)
- return NULL;
- sprintf(driver_name, “%s:%s”, drv->bus->name, drv->name);
- return driver_name;
- }
make_driver_name()将drv的名字和drv->bus的名字合起来,不过这是一个内部函数,具体使用还要看后面。
- static void module_create_drivers_dir(struct module_kobject *mk)
- {
- if (!mk || mk->drivers_dir)
- return;
- mk->drivers_dir = kobject_create_and_add(“drivers”, &mk->kobj);
- }
module_create_drivers_dir()在mk所在的目录下创建一个drivers的目录。不过因为是使用kobject_create_and_add(),所以这个kobject使用默认的dynamic_kobj_ktype。
- void module_add_driver(struct module *mod, struct device_driver *drv)
- {
- char *driver_name;
- int no_warn;
- struct module_kobject *mk = NULL;
- if (!drv)
- return;
- if (mod)
- mk = &mod->mkobj;
- else if (drv->mod_name) {
- struct kobject *mkobj;
- /* Lookup built-in module entry in /sys/modules */
- mkobj = kset_find_obj(module_kset, drv->mod_name);
- if (mkobj) {
- mk = container_of(mkobj, struct module_kobject, kobj);
- /* remember our module structure */
- drv->p->mkobj = mk;
- /* kset_find_obj took a reference */
- kobject_put(mkobj);
- }
- }
- if (!mk)
- return;
- /* Don’t check return codes; these calls are idempotent */
- no_warn = sysfs_create_link(&drv->p->kobj, &mk->kobj, “module”);
- driver_name = make_driver_name(drv);
- if (driver_name) {
- module_create_drivers_dir(mk);
- no_warn = sysfs_create_link(mk->drivers_dir, &drv->p->kobj,
- driver_name);
- kfree(driver_name);
- }
- }
module_add_drivers()在module下添加与driver的联系。
开始调用kset_find_obj()从module_kset下寻找drv所属的module对应的kobj。说明每个module在加载时都会在/sys/module中创建一个kobject目录。这里找到后只是将其赋给drv->p->kmobj,并调用kobject_put()释放找到时加上的引用计数。至于为什么driver不保留对module的引用计数,或许是不需要,或许是已经存在了。
接下来调用sysfs_create_link()在驱动目录中添加指向module目录的软链接,名称就是module。
调用module_create_drivers_dir()在module目录下建立drivers子目录。
调用sysfs_create_link()在drivers子目录下建立指向驱动目录的软链接,名称使用make_driver_name()的返回结果。
- void module_remove_driver(struct device_driver *drv)
- {
- struct module_kobject *mk = NULL;
- char *driver_name;
- if (!drv)
- return;
- sysfs_remove_link(&drv->p->kobj, “module”);
- if (drv->owner)
- mk = &drv->owner->mkobj;
- else if (drv->p->mkobj)
- mk = drv->p->mkobj;
- if (mk && mk->drivers_dir) {
- driver_name = make_driver_name(drv);
- if (driver_name) {
- sysfs_remove_link(mk->drivers_dir, driver_name);
- kfree(driver_name);
- }
- }
- }
module_remove_driver()消除driver与相应module之间的软链接关系。
对于module,应该是另一个议题了,这里只是简单涉及,下节我们将涉及到总线bus,并深入分析device和driver的关系。
前面我们分析了设备驱动模型中的device和driver,device和driver本来是不相关的东西,只因为bus的存在,才被联系到了一起。本节就来看看设备驱动模型中起枢纽作用的bus。本节的头文件在include/linux/device.h和drivers/base/base.h,实现代码主要在bus.c中。因为在bus中有很多代码时为了device找到driver或者driver找到device而定义的,本节先尽量忽略这部分,专注于bus的注册和注销,属性定义等内容。剩下的留到讨论device和driver关系时在分析。先来看看bus的数据结构。
- struct bus_type {
- const char *name;
- struct bus_attribute *bus_attrs;
- struct device_attribute *dev_attrs;
- struct driver_attribute *drv_attrs;
- int (*match)(struct device *dev, struct device_driver *drv);
- int (*uevent)(struct device *dev, struct kobj_uevent_env *env);
- int (*probe)(struct device *dev);
- int (*remove)(struct device *dev);
- void (*shutdown)(struct device *dev);
- int (*suspend)(struct device *dev, pm_message_t state);
- int (*resume)(struct device *dev);
- const struct dev_pm_ops *pm;
- struct bus_type_private *p;
- };
struct bus_type是bus的通用数据结构。
name是bus的名称,注意到这里也是const char类型的,在sysfs中使用的还是kobj中动态创建的名称,这里的name只是初始名。
bus_attrs是bus为自己定义的一系列属性,dev_attrs是bus为旗下的device定义的一系列属性,drv_attrs是bus为旗下的driver定义的一系列属性。其中dev_attrs在bus_add_device()->device_add_attrs()中被加入dev目录下,drv_attrs在bus_add_driver()->driver_add_attrs()中被加入driver目录下。
match函数匹配总线中的dev和driver,返回值为1代表匹配成功,为0则失败。
uevent函数用于总线对uevent的环境变量添加,但在总线下设备的dev_uevent处理函数也有对它的调用。
probe函数是总线在匹配成功时调用的函数,bus->probe和drv->probe中只会有一个起效,同时存在时使用bus->probe。
remove函数在总线上设备或者驱动要删除时调用,bus->remove和drv->remove中同样只会有一个起效。
shutdown函数在所有设备都关闭时调用,即在core.c中的device_shutdown()函数中调用,bus->shutdown和drv->shutdown同样只会有一个起效。
suspend函数是在总线上设备休眠时调用。
resume函数是在总线上设备恢复时调用。
pm是struct dev_pm_ops类型,其中定义了一系列电源管理的函数。
p是指向bus_type_private的指针,其中定义了将bus同其它组件联系起来的变量。
- struct bus_type_private {
- struct kset subsys;
- struct kset *drivers_kset;
- struct kset *devices_kset;
- struct klist klist_devices;
- struct klist klist_drivers;
- struct blocking_notifier_head bus_notifier;
- unsigned int drivers_autoprobe:1;
- struct bus_type *bus;
- };
- #define to_bus(obj) container_of(obj, struct bus_type_private, subsys.kobj)
struct bus_type_private是将bus同device、driver、sysfs联系起来的结构。
subsys是kset类型,代表bus在sysfs中的类型。
drivers_kset代表bus目录下的drivers子目录。
devices_kset代表bus目录下地devices子目录。
klist_devices是bus的设备链表,klist_drivers是bus的驱动链表。
bus_notifier用于在总线上内容发送变化时调用特定的函数,这里略过。
driver_autoprobe标志定义是否允许device和driver自动匹配,如果允许会在device或者driver注册时就进行匹配工作。
bus指针指向struct bus_type类型。
使用struct bus_type_private可以将struct bus_type中的部分细节屏蔽掉,利于外界使用bus_type。struct driver_private和struct device_private都有类似的功能。
- struct bus_attribute {
- struct attribute attr;
- ssize_t (*show)(struct bus_type *bus, char *buf);
- ssize_t (*store)(struct bus_type *bus, const char *buf, size_t count);
- };
- #define BUS_ATTR(_name, _mode, _show, _store) \
- struct bus_attribute bus_attr_##_name = __ATTR(_name, _mode, _show, _store)
- #define to_bus_attr(_attr) container_of(_attr, struct bus_attribute, attr)
struct bus_attribute是bus对struct attribute类型的封装,更方便总线属性的定义。
- static ssize_t bus_attr_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
- {
- struct bus_attribute *bus_attr = to_bus_attr(attr);
- struct bus_type_private *bus_priv = to_bus(kobj);
- ssize_t ret = 0;
- if (bus_attr->show)
- ret = bus_attr->show(bus_priv->bus, buf);
- return ret;
- }
- static ssize_t bus_attr_store(struct kobject *kobj, struct attribute *attr,
- const char *buf, size_t count)
- {
- struct bus_attribute *bus_attr = to_bus_attr(attr);
- struct bus_type_private *bus_priv = to_bus(kobj);
- ssize_t ret = 0;
- if (bus_attr->store)
- ret = bus_attr->store(bus_priv->bus, buf, count);
- return ret;
- }
- static struct sysfs_ops bus_sysfs_ops = {
- .show = bus_attr_show,
- .store = bus_attr_store,
- };
- static struct kobj_type bus_ktype = {
- .sysfs_ops = &bus_sysfs_ops,
- };
以上应该是我们最熟悉的部分,bus_ktype中定义了bus对应的kset应该使用的kobj_type实例。与此类似,driver使用的是自定义的driver_ktype,device使用的是自定义的device_ktype。只是这里仅仅定义了sysfs_ops,并未定义release函数,不知bus_type_private打算何时释放。
- int bus_create_file(struct bus_type *bus, struct bus_attribute *attr)
- {
- int error;
- if (bus_get(bus)) {
- error = sysfs_create_file(&bus->p->subsys.kobj, &attr->attr);
- bus_put(bus);
- } else
- error = -EINVAL;
- return error;
- }
- void bus_remove_file(struct bus_type *bus, struct bus_attribute *attr)
- {
- if (bus_get(bus)) {
- sysfs_remove_file(&bus->p->subsys.kobj, &attr->attr);
- bus_put(bus);
- }
- }
bus_create_file()在bus目录下创建属性文件,bus_remove_file()在bus目录下删除属性文件。类似的函数在driver和device中都有见到。
- static int bus_uevent_filter(struct kset *kset, struct kobject *kobj)
- {
- struct kobj_type *ktype = get_ktype(kobj);
- if (ktype == &bus_ktype)
- return 1;
- return 0;
- }
- static struct kset_uevent_ops bus_uevent_ops = {
- .filter = bus_uevent_filter,
- };
- static struct kset *bus_kset;
可以看到这里定义了一个bus_uevent_ops变量,这是kset对uevent事件处理所用的结构,它会用在bus_kset中。
- int __init buses_init(void)
- {
- bus_kset = kset_create_and_add(“bus”, &bus_uevent_ops, NULL);
- if (!bus_kset)
- return -ENOMEM;
- return 0;
- }
在buses_init()中创建了/sys/bus目录,这是一个kset类型,使用了bus_uevent_ops的uevent操作类型。
其实这里的操作不难想象,在devices中我们有一个类似的devices_kset,可以回顾一下。
- static struct kset_uevent_ops device_uevent_ops = {
- .filter = dev_uevent_filter,
- .name = dev_uevent_name,
- .uevent = dev_uevent,
- };
- /* kset to create /sys/devices/ */
- struct kset *devices_kset;
- int __init devices_init(void)
- {
- devices_kset = kset_create_and_add(“devices”, &device_uevent_ops, NULL);
- …
- }
- void device_initialize(struct device *dev)
- {
- dev->kobj.kset = devices_kset;
- …
- }
devices_kset在devices_init()中被创建,使用相应的device_uevent_ops进行uevent处理。而devices_kset又被设为每个device初始化时使用的kset。这就不难想象每个device都是以devices_kset为所属kset的,并使用device_uevent_ops中的处理函数。
只是这里还不知bus_kset会在哪里用到,或许是每个bus所属的kset吧,下面会有答案。
- static ssize_t show_drivers_autoprobe(struct bus_type *bus, char *buf)
- {
- return sprintf(buf, “%d\n”, bus->p->drivers_autoprobe);
- }
- static ssize_t store_drivers_autoprobe(struct bus_type *bus,
- const char *buf, size_t count)
- {
- if (buf[0] == ‘0’)
- bus->p->drivers_autoprobe = 0;
- else
- bus->p->drivers_autoprobe = 1;
- return count;
- }
- static ssize_t store_drivers_probe(struct bus_type *bus,
- const char *buf, size_t count)
- {
- struct device *dev;
- dev = bus_find_device_by_name(bus, NULL, buf);
- if (!dev)
- return -ENODEV;
- if (bus_rescan_devices_helper(dev, NULL) != 0)
- return -EINVAL;
- return count;
- }
- static BUS_ATTR(drivers_probe, S_IWUSR, NULL, store_drivers_probe);
- static BUS_ATTR(drivers_autoprobe, S_IWUSR | S_IRUGO,
- show_drivers_autoprobe, store_drivers_autoprobe);
这里定义了总线下的两个属性,只写得drivers_probe,和可读写的drivers_autoprobe。至于其怎么实现的,我们现在还不关心。
- static int add_probe_files(struct bus_type *bus)
- {
- int retval;
- retval = bus_create_file(bus, &bus_attr_drivers_probe);
- if (retval)
- goto out;
- retval = bus_create_file(bus, &bus_attr_drivers_autoprobe);
- if (retval)
- bus_remove_file(bus, &bus_attr_drivers_probe);
- out:
- return retval;
- }
- static void remove_probe_files(struct bus_type *bus)
- {
- bus_remove_file(bus, &bus_attr_drivers_autoprobe);
- bus_remove_file(bus, &bus_attr_drivers_probe);
- }
add_probe_files()在bus目录下添加drivers_probe和drivers_autoprobe文件。
remove_probe_files()在bus目录下删除drivers_probe和drivers_autoprobe文件。
这两个函数对bus的probe类型属性进行管理,就像add_bind_files/remove_bind_files对driver的bind类型属性进行管理一样。
- static ssize_t bus_uevent_store(struct bus_type *bus,
- const char *buf, size_t count)
- {
- enum kobject_action action;
- if (kobject_action_type(buf, count, &action) == 0)
- kobject_uevent(&bus->p->subsys.kobj, action);
- return count;
- }
- static BUS_ATTR(uevent, S_IWUSR, NULL, bus_uevent_store);
上面定义了bus的一个属性uevent,用于bus所在的kset节点主动发起uevent消息。
同样地uevent文件在driver目录中也有见到。device目录中也有,不过除了store_uevent之外,还增加了show_uevent的功能。
- static struct device *next_device(struct klist_iter *i)
- {
- struct klist_node *n = klist_next(i);
- struct device *dev = NULL;
- struct device_private *dev_prv;
- if (n) {
- dev_prv = to_device_private_bus(n);
- dev = dev_prv->device;
- }
- return dev;
- }
- int bus_for_each_dev(struct bus_type *bus, struct device *start,
- void *data, int (*fn)(struct device *, void *))
- {
- struct klist_iter i;
- struct device *dev;
- int error = 0;
- if (!bus)
- return -EINVAL;
- klist_iter_init_node(&bus->p->klist_devices, &i,
- (start ? &start->p->knode_bus : NULL));
- while ((dev = next_device(&i)) && !error)
- error = fn(dev, data);
- klist_iter_exit(&i);
- return error;
- }
- struct device *bus_find_device(struct bus_type *bus,
- struct device *start, void *data,
- int (*match)(struct device *dev, void *data))
- {
- struct klist_iter i;
- struct device *dev;
- if (!bus)
- return NULL;
- klist_iter_init_node(&bus->p->klist_devices, &i,
- (start ? &start->p->knode_bus : NULL));
- while ((dev = next_device(&i)))
- if (match(dev, data) && get_device(dev))
- break;
- klist_iter_exit(&i);
- return dev;
- }
bus_for_each_dev()是以bus的设备链表中每个设备为参数,调用指定的处理函数。
bus_find_device()是寻找bus设备链表中的某个设备,使用指定的匹配函数。
这两个函数提供遍历bus的设备链表的方法,类似于drivers_for_each_device/drivers_find_device对driver的设备链表的遍历,device_for_each_child/device_find_child对device的子设备链表的遍历。
- static int match_name(struct device *dev, void *data)
- {
- const char *name = data;
- return sysfs_streq(name, dev_name(dev));
- }
- struct device *bus_find_device_by_name(struct bus_type *bus,
- struct device *start, const char *name)
- {
- return bus_find_device(bus, start, (void *)name, match_name);
- }
bus_find_device_by_name()给出了如何使用遍历函数的例子,寻找bus设备链表中指定名称的设备。
- static struct device_driver *next_driver(struct klist_iter *i)
- {
- struct klist_node *n = klist_next(i);
- struct driver_private *drv_priv;
- if (n) {
- drv_priv = container_of(n, struct driver_private, knode_bus);
- return drv_priv->driver;
- }
- return NULL;
- }
- int bus_for_each_drv(struct bus_type *bus, struct device_driver *start,
- void *data, int (*fn)(struct device_driver *, void *))
- {
- struct klist_iter i;
- struct device_driver *drv;
- int error = 0;
- if (!bus)
- return -EINVAL;
- klist_iter_init_node(&bus->p->klist_drivers, &i,
- start ? &start->p->knode_bus : NULL);
- while ((drv = next_driver(&i)) && !error)
- error = fn(drv, data);
- klist_iter_exit(&i);
- return error;
- }
bus_for_each_drv()对bus的驱动链表中的每个驱动调用指定的函数。
这和前面的bus_for_each_dev/bus_find_dev什么都是类似的,只是你可能怀疑为什么会没有bus_find_drv。是没有它的用武之地吗?
请看driver.c中的driver_find()函数。
- struct device_driver *driver_find(const char *name, struct bus_type *bus)
- {
- struct kobject *k = kset_find_obj(bus->p->drivers_kset, name);
- struct driver_private *priv;
- if (k) {
- priv = to_driver(k);
- return priv->driver;
- }
- return NULL;
- }
driver_find()函数是在bus的驱动链表中寻找指定名称的驱动,它的存在证明bus_find_drv()完全是用得上的。可linux却偏偏没有实现bus_find_drv。driver_find()的实现也因此一直走内层路线,它直接用kset_find_obj()进行kobect的名称匹配,调用to_driver()等内容将kobj转化为drv。首先这完全不同于bus_for_each_drv()等一系列遍历函数,它们走的都是在klist中寻找的路线,这里确实走的sysfs中kset内部链表。其次,这里其实也是获得了drv的一个引用计数,在kset_find_obj()中会增加匹配的kobj的引用计数,driver_find()并没有释放,就相当于获取了drv的一个引用计数。这样虽然也可以,但代码写得很不优雅。可见人无完人,linux代码还有许多可改进之处。当然,也可能在最新的linux版本中已经改正了。
- static int bus_add_attrs(struct bus_type *bus)
- {
- int error = 0;
- int i;
- if (bus->bus_attrs) {
- for (i = 0; attr_name(bus->bus_attrs[i]); i++) {
- error = bus_create_file(bus, &bus->bus_attrs[i]);
- if (error)
- goto err;
- }
- }
- done:
- return error;
- err:
- while (–i >= 0)
- bus_remove_file(bus, &bus->bus_attrs[i]);
- goto done;
- }
- static void bus_remove_attrs(struct bus_type *bus)
- {
- int i;
- if (bus->bus_attrs) {
- for (i = 0; attr_name(bus->bus_attrs[i]); i++)
- bus_remove_file(bus, &bus->bus_attrs[i]);
- }
- }
bus_add_attrs()将bus->bus_attrs中定义的属性加入bus目录。
bus_remove_attrs()将bus->bus_attrs中定义的属性删除。
开始看struct bus_type时我们说到结构中的bus_attrs、dev_attrs、drv_attrs三种属性,后两者分别在device_add_attrs()和driver_add_attrs()中添加,最后的bus_attrs也终于在bus_add_attrs()中得到添加。只是它们虽然都定义在bus_type中,确实添加在完全不同的三个地方。
- static void klist_devices_get(struct klist_node *n)
- {
- struct device_private *dev_prv = to_device_private_bus(n);
- struct device *dev = dev_prv->device;
- get_device(dev);
- }
- static void klist_devices_put(struct klist_node *n)
- {
- struct device_private *dev_prv = to_device_private_bus(n);
- struct device *dev = dev_prv->device;
- put_device(dev);
- }
klist_devices_get()用于bus设备链表上添加节点时增加对相应设备的引用。
klist_devices_put()用于bus设备链表上删除节点时减少对相应设备的引用。
相似的函数是device中的klist_children_get/klist_children_put,这是device的子设备链表。除此之外,bus的驱动链表和driver的设备链表,都没有这种引用计数的保护。原因还未知,也许是linux觉得驱动不太靠谱,万一突然当掉,也不至于影响device的正常管理。
- /**
- * bus_register – register a bus with the system.
- * @bus: bus.
- *
- * Once we have that, we registered the bus with the kobject
- * infrastructure, then register the children subsystems it has:
- * the devices and drivers that belong to the bus.
- */
- int bus_register(struct bus_type *bus)
- {
- int retval;
- struct bus_type_private *priv;
- priv = kzalloc(sizeof(struct bus_type_private), GFP_KERNEL);
- if (!priv)
- return -ENOMEM;
- priv->bus = bus;
- bus->p = priv;
- BLOCKING_INIT_NOTIFIER_HEAD(&priv->bus_notifier);
- retval = kobject_set_name(&priv->subsys.kobj, “%s”, bus->name);
- if (retval)
- goto out;
- priv->subsys.kobj.kset = bus_kset;
- priv->subsys.kobj.ktype = &bus_ktype;
- priv->drivers_autoprobe = 1;
- retval = kset_register(&priv->subsys);
- if (retval)
- goto out;
- retval = bus_create_file(bus, &bus_attr_uevent);
- if (retval)
- goto bus_uevent_fail;
- priv->devices_kset = kset_create_and_add(“devices”, NULL,
- &priv->subsys.kobj);
- if (!priv->devices_kset) {
- retval = -ENOMEM;
- goto bus_devices_fail;
- }
- priv->drivers_kset = kset_create_and_add(“drivers”, NULL,
- &priv->subsys.kobj);
- if (!priv->drivers_kset) {
- retval = -ENOMEM;
- goto bus_drivers_fail;
- }
- klist_init(&priv->klist_devices, klist_devices_get, klist_devices_put);
- klist_init(&priv->klist_drivers, NULL, NULL);
- retval = add_probe_files(bus);
- if (retval)
- goto bus_probe_files_fail;
- retval = bus_add_attrs(bus);
- if (retval)
- goto bus_attrs_fail;
- pr_debug(“bus: ‘%s’: registered\n”, bus->name);
- return 0;
- bus_attrs_fail:
- remove_probe_files(bus);
- bus_probe_files_fail:
- kset_unregister(bus->p->drivers_kset);
- bus_drivers_fail:
- kset_unregister(bus->p->devices_kset);
- bus_devices_fail:
- bus_remove_file(bus, &bus_attr_uevent);
- bus_uevent_fail:
- kset_unregister(&bus->p->subsys);
- kfree(bus->p);
- out:
- bus->p = NULL;
- return retval;
- }
bus_register()将bus注册到系统中。
先分配并初始化bus->p,名称使用bus->name,所属的kset使用bus_kset(果然不出所料),类型使用bus_ktype。bus_ktype的使用同driver中的driver_ktype,和device中的device_ktype一样,都是自定义的kobj_type,要知道kobj_type的使用关系到release函数,和自定义属性类型能否正常发挥。
调用kset_register()将bus加入sysfs,因为只是设置了kset,所以会被加入/sys/bus目录下。与driver直接加入相关总线的drivers目录类似,却是与device复杂的寻找父节点过程相去甚远。
在bus目录下添加uevent属性。
在bus目录下创建devices子目录。它是一个kset类型的,目的是展示bus下的设备链表。
在bus目录下创建drivers子目录。它也是一个kset类型的,目的是展示bus下的驱动链表。
或许在最开始有设备驱动模型时,还需要kset来表达这种链表关系,但随着klist等结构的加入,kset的作用也越来越少,现在更多的作用是用来处理uevent消息。
之后初始化bus的设备链表和驱动链表,其中设备链表会占用设备的引用计数。
调用add_probe_files()在bus目录下添加probe相关的两个属性文件。
调用bus_add_attrs添加bus结构中添加的属性。
bus_register()中的操作出乎意料的简单。bus既不需要在哪里添加软链接,也不需要主动向谁报道,从来都是device和driver到bus这里报道的。所以bus_register()中只需要初始一下结构,添加到sysfs中,添加相关的子目录和属性文件,就行了。
- void bus_unregister(struct bus_type *bus)
- {
- pr_debug(“bus: ‘%s’: unregistering\n”, bus->name);
- bus_remove_attrs(bus);
- remove_probe_files(bus);
- kset_unregister(bus->p->drivers_kset);
- kset_unregister(bus->p->devices_kset);
- bus_remove_file(bus, &bus_attr_uevent);
- kset_unregister(&bus->p->subsys);
- kfree(bus->p);
- bus->p = NULL;
- }
bus_unregister()与bus_register()相对,将bus从系统中注销。不过要把bus注销也不是那么简单的,bus中的driver和device都对bus保有一份引用计数。或许正是如此,bus把释放bus->p的动作放在了bus_unregister()中,这至少能保证较早地释放不需要的内存空间。而且在bus引用计数用完时,也不会有任何操作,bus的容错性还是很高的。
- static struct bus_type *bus_get(struct bus_type *bus)
- {
- if (bus) {
- kset_get(&bus->p->subsys);
- return bus;
- }
- return NULL;
- }
- static void bus_put(struct bus_type *bus)
- {
- if (bus)
- kset_put(&bus->p->subsys);
- }
bus_get()增加对bus的引用计数,bus_put()减少对bus的引用计数。实际上这里bus的引用计数降为零时,只是将sysfs中bus对应的目录删除。
无论是bus,还是device,还是driver,都是将主要的注销工作放在相关的unregister中。至于在引用计数降为零时的操作,大概只在device_release()中可见。这主要是因为引用计数,虽然是广泛用在设备驱动模型中,但实际支持的,绝大部分是设备的热插拔,而不是总线或者驱动的热插拔。当然,桥设备的热插拔也可能附带总线的热插拔。
- /*
- * Yes, this forcably breaks the klist abstraction temporarily. It
- * just wants to sort the klist, not change reference counts and
- * take/drop locks rapidly in the process. It does all this while
- * holding the lock for the list, so objects can’t otherwise be
- * added/removed while we’re swizzling.
- */
- static void device_insertion_sort_klist(struct device *a, struct list_head *list,
- int (*compare)(const struct device *a,
- const struct device *b))
- {
- struct list_head *pos;
- struct klist_node *n;
- struct device_private *dev_prv;
- struct device *b;
- list_for_each(pos, list) {
- n = container_of(pos, struct klist_node, n_node);
- dev_prv = to_device_private_bus(n);
- b = dev_prv->device;
- if (compare(a, b) <= 0) {
- list_move_tail(&a->p->knode_bus.n_node,
- &b->p->knode_bus.n_node);
- return;
- }
- }
- list_move_tail(&a->p->knode_bus.n_node, list);
- }
- void bus_sort_breadthfirst(struct bus_type *bus,
- int (*compare)(const struct device *a,
- const struct device *b))
- {
- LIST_HEAD(sorted_devices);
- struct list_head *pos, *tmp;
- struct klist_node *n;
- struct device_private *dev_prv;
- struct device *dev;
- struct klist *device_klist;
- device_klist = bus_get_device_klist(bus);
- spin_lock(&device_klist->k_lock);
- list_for_each_safe(pos, tmp, &device_klist->k_list) {
- n = container_of(pos, struct klist_node, n_node);
- dev_prv = to_device_private_bus(n);
- dev = dev_prv->device;
- device_insertion_sort_klist(dev, &sorted_devices, compare);
- }
- list_splice(&sorted_devices, &device_klist->k_list);
- spin_unlock(&device_klist->k_lock);
- }
bus_sort_breadthfirst()是将bus的设备链表进行排序,使用指定的比较函数,排成降序。
本节主要分析了bus的注册注销过程,下节我们将深入分析device和driver的绑定过程,了解bus在这其中到底起了什么作用。随着我们了解的逐渐深入,未知的东西也在逐渐增多。但饭要一口一口吃,我们的分析也要一点一点来,急不得。
前面我们分析了device、driver、bus三种类型,主要是三者的注册与注销,在sysfs中的目录与属性文件创建等内容。本节就来详细分析下,在设备注册到总线上时,总线是如何为其寻找对应的驱动的;在驱动注册到总线上时,总线又是如何为其寻找对应的设备的。本节的实现代码集中在drivers/base/bus.c和drivers/base/dd.c中。
先来回忆下,在device_register()->device_add()中,先是调用bus_add_device()添加device与bus间的联系,并添加bus为device定义的属性,然后会调用bus_probe_device()。bus_probe_device()会试图为已挂在总线上的该设备寻找对应的驱动。我们的故事就从这里开始。
- /**
- * bus_probe_device – probe drivers for a new device
- * @dev: device to probe
- *
- * – Automatically probe for a driver if the bus allows it.
- */
- void bus_probe_device(struct device *dev)
- {
- struct bus_type *bus = dev->bus;
- int ret;
- if (bus && bus->p->drivers_autoprobe) {
- ret = device_attach(dev);
- WARN_ON(ret < 0);
- }
- }
说到bus->p->drivers_autoprobe这个变量,它是在bus_type_private中的,在调用bus_register()前都初始化不了,在bus_register()中自动定为1。所以,除非是用户空间通过drivers_autoprobe属性文件主动禁止,bus总是允许自动探测的,所有的bus都是如此。
- /**
- * device_attach – try to attach device to a driver.
- * @dev: device.
- *
- * Walk the list of drivers that the bus has and call
- * driver_probe_device() for each pair. If a compatible
- * pair is found, break out and return.
- *
- * Returns 1 if the device was bound to a driver;
- * 0 if no matching driver was found;
- * -ENODEV if the device is not registered.
- *
- * When called for a USB interface, @dev->parent->sem must be held.
- */
- int device_attach(struct device *dev)
- {
- int ret = 0;
- down(&dev->sem);
- if (dev->driver) {
- ret = device_bind_driver(dev);
- if (ret == 0)
- ret = 1;
- else {
- dev->driver = NULL;
- ret = 0;
- }
- } else {
- pm_runtime_get_noresume(dev);
- ret = bus_for_each_drv(dev->bus, NULL, dev, __device_attach);
- pm_runtime_put_sync(dev);
- }
- up(&dev->sem);
- return ret;
- }
device_attach()在实际绑定之前,会用dev->sem进行加锁。不错,dev->sem几乎就是为了在设备与驱动绑定或者解除绑定时加锁用的。还没有看到它在其它地方被调用。
如果在调用device_attach()前就已经有了dev->driver(),就调用device_bind_driver()进行绑定,不然还要调用bus_for_each_drv()进行依次匹配。至于pm_runtime_get_noresume之类的函数,属于电源管理部分,我们现在先忽略。
- static void driver_bound(struct device *dev)
- {
- if (klist_node_attached(&dev->p->knode_driver)) {
- printk(KERN_WARNING “%s: device %s already bound\n”,
- __func__, kobject_name(&dev->kobj));
- return;
- }
- pr_debug(“driver: ‘%s’: %s: bound to device ‘%s’\n”, dev_name(dev),
- __func__, dev->driver->name);
- if (dev->bus)
- blocking_notifier_call_chain(&dev->bus->p->bus_notifier,
- BUS_NOTIFY_BOUND_DRIVER, dev);
- klist_add_tail(&dev->p->knode_driver, &dev->driver->p->klist_devices);
- }
- static int driver_sysfs_add(struct device *dev)
- {
- int ret;
- ret = sysfs_create_link(&dev->driver->p->kobj, &dev->kobj,
- kobject_name(&dev->kobj));
- if (ret == 0) {
- ret = sysfs_create_link(&dev->kobj, &dev->driver->p->kobj,
- “driver”);
- if (ret)
- sysfs_remove_link(&dev->driver->p->kobj,
- kobject_name(&dev->kobj));
- }
- return ret;
- }
- static void driver_sysfs_remove(struct device *dev)
- {
- struct device_driver *drv = dev->driver;
- if (drv) {
- sysfs_remove_link(&drv->p->kobj, kobject_name(&dev->kobj));
- sysfs_remove_link(&dev->kobj, “driver”);
- }
- }
- /**
- * device_bind_driver – bind a driver to one device.
- * @dev: device.
- *
- * Allow manual attachment of a driver to a device.
- * Caller must have already set @dev->driver.
- *
- * Note that this does not modify the bus reference count
- * nor take the bus’s rwsem. Please verify those are accounted
- * for before calling this. (It is ok to call with no other effort
- * from a driver’s probe() method.)
- *
- * This function must be called with @dev->sem held.
- */
- int device_bind_driver(struct device *dev)
- {
- int ret;
- ret = driver_sysfs_add(dev);
- if (!ret)
- driver_bound(dev);
- return ret;
- }
其中drivers_sysfs_add()负责创建sysfs中driver和device指向对方的软链接。还有一个与它相对的函数drivers_sysfs_remove()。
driver_bound()则实际将device加入驱动的设备链表。
因为在调用device_bind_driver()之前就已经设置过dev->driver了,所以这样就将device和driver绑定了。
只是这样好像还缺少了什么,不错,之前看到driver时曾定义了drv->probe函数,bus->probe也有类似的功能,这里只是绑定,却没有调用probe函数。
让我们回过头来,继续看如果device_attach()中没有定义dev->driver会怎么样,是用bus_for_each_drv()对bus的驱动链表进行遍历,遍历函数使用__device_attach。
- static int __device_attach(struct device_driver *drv, void *data)
- {
- struct device *dev = data;
- if (!driver_match_device(drv, dev))
- return 0;
- return driver_probe_device(drv, dev);
- }
先来看匹配工作,这是在driver_match_device()中完成的。
- static inline int driver_match_device(struct device_driver *drv,
- struct device *dev)
- {
- return drv->bus->match ? drv->bus->match(dev, drv) : 1;
- }
- int driver_probe_device(struct device_driver *drv, struct device *dev)
- {
- int ret = 0;
- if (!device_is_registered(dev))
- return -ENODEV;
- pr_debug(“bus: ‘%s’: %s: matched device %s with driver %s\n”,
- drv->bus->name, __func__, dev_name(dev), drv->name);
- pm_runtime_get_noresume(dev);
- pm_runtime_barrier(dev);
- ret = really_probe(dev, drv);
- pm_runtime_put_sync(dev);
- return ret;
- }
- static atomic_t probe_count = ATOMIC_INIT(0);
- static DECLARE_WAIT_QUEUE_HEAD(probe_waitqueue);
- static int really_probe(struct device *dev, struct device_driver *drv)
- {
- int ret = 0;
- atomic_inc(&probe_count);
- pr_debug(“bus: ‘%s’: %s: probing driver %s with device %s\n”,
- drv->bus->name, __func__, drv->name, dev_name(dev));
- WARN_ON(!list_empty(&dev->devres_head));
- dev->driver = drv;
- if (driver_sysfs_add(dev)) {
- printk(KERN_ERR “%s: driver_sysfs_add(%s) failed\n”,
- __func__, dev_name(dev));
- goto probe_failed;
- }
- if (dev->bus->probe) {
- ret = dev->bus->probe(dev);
- if (ret)
- goto probe_failed;
- } else if (drv->probe) {
- ret = drv->probe(dev);
- if (ret)
- goto probe_failed;
- }
- driver_bound(dev);
- ret = 1;
- pr_debug(“bus: ‘%s’: %s: bound device %s to driver %s\n”,
- drv->bus->name, __func__, dev_name(dev), drv->name);
- goto done;
- probe_failed:
- devres_release_all(dev);
- driver_sysfs_remove(dev);
- dev->driver = NULL;
- if (ret != -ENODEV && ret != -ENXIO) {
- /* driver matched but the probe failed */
- printk(KERN_WARNING
- “%s: probe of %s failed with error %d\n”,
- drv->name, dev_name(dev), ret);
- }
- /*
- * Ignore errors returned by ->probe so that the next driver can try
- * its luck.
- */
- ret = 0;
- done:
- atomic_dec(&probe_count);
- wake_up(&probe_waitqueue);
- return ret;
- }
至于在really_probe()中使用probe_count保护,最后调用wake_up(&probe_waitqueue),都是为了进行同步。
- /**
- * driver_probe_done
- * Determine if the probe sequence is finished or not.
- *
- * Should somehow figure out how to use a semaphore, not an atomic variable…
- */
- int driver_probe_done(void)
- {
- pr_debug(“%s: probe_count = %d\n”, __func__,
- atomic_read(&probe_count));
- if (atomic_read(&probe_count))
- return -EBUSY;
- return 0;
- }
- /**
- * wait_for_device_probe
- * Wait for device probing to be completed.
- */
- void wait_for_device_probe(void)
- {
- /* wait for the known devices to complete their probing */
- wait_event(probe_waitqueue, atomic_read(&probe_count) == 0);
- async_synchronize_full();
- }
wait_for_device_probe()会阻塞到所有的设备绑定完驱动。
关于bus_probe_device()的过程就分析到这里,下面来看下bus_add_driver()又是怎样做的。
之前我们已经知道driver_register()把绝大部分操作都移到了bus_add_driver()中来。其中只有一点和设备与驱动的绑定相关,就是对driver_attach()的调用。
- int driver_attach(struct device_driver *drv)
- {
- return bus_for_each_dev(drv->bus, NULL, drv, __driver_attach);
- }
- static int __driver_attach(struct device *dev, void *data)
- {
- struct device_driver *drv = data;
- /*
- * Lock device and try to bind to it. We drop the error
- * here and always return 0, because we need to keep trying
- * to bind to devices and some drivers will return an error
- * simply if it didn’t support the device.
- *
- * driver_probe_device() will spit a warning if there
- * is an error.
- */
- if (!driver_match_device(drv, dev))
- return 0;
- if (dev->parent) /* Needed for USB */
- down(&dev->parent->sem);
- down(&dev->sem);
- if (!dev->driver)
- driver_probe_device(drv, dev);
- up(&dev->sem);
- if (dev->parent)
- up(&dev->parent->sem);
- return 0;
- }
然后依然是加锁,调用driver_probe_device()函数。这就与__device_attach()的路径一致了。
不要以为就这样结束了,现在我们只是看到了把device和driver绑定到一起的方法,却没有看到解除绑定的方法。
既然绑定的方法是在设备和驱动注册的时候调用的,那解除绑定自然是在设备或驱动注销的时候。
还是先来看设备的,device_unregister()->device_del()会调用bus_remove_device()将设备从总线上删除。
bus_remove_device()是与bus_add_device()相对的,但也不仅如此,它还调用了device_release_driver()来解除与driver的绑定。
- /**
- * device_release_driver – manually detach device from driver.
- * @dev: device.
- *
- * Manually detach device from driver.
- * When called for a USB interface, @dev->parent->sem must be held.
- */
- void device_release_driver(struct device *dev)
- {
- /*
- * If anyone calls device_release_driver() recursively from
- * within their ->remove callback for the same device, they
- * will deadlock right here.
- */
- down(&dev->sem);
- __device_release_driver(dev);
- up(&dev->sem);
- }
- /*
- * __device_release_driver() must be called with @dev->sem held.
- * When called for a USB interface, @dev->parent->sem must be held as well.
- */
- static void __device_release_driver(struct device *dev)
- {
- struct device_driver *drv;
- drv = dev->driver;
- if (drv) {
- pm_runtime_get_noresume(dev);
- pm_runtime_barrier(dev);
- driver_sysfs_remove(dev);
- if (dev->bus)
- blocking_notifier_call_chain(&dev->bus->p->bus_notifier,
- BUS_NOTIFY_UNBIND_DRIVER,
- dev);
- if (dev->bus && dev->bus->remove)
- dev->bus->remove(dev);
- else if (drv->remove)
- drv->remove(dev);
- devres_release_all(dev);
- dev->driver = NULL;
- klist_remove(&dev->p->knode_driver);
- if (dev->bus)
- blocking_notifier_call_chain(&dev->bus->p->bus_notifier,
- BUS_NOTIFY_UNBOUND_DRIVER,
- dev);
- pm_runtime_put_sync(dev);
- }
- }
除了sysfs和结构中解除绑定的操作,还调用了bus->remove或者driver->remove。
虽然device注销时与driver解除绑定很简单,但driver注销要与device解除绑定就要复杂一些,因为它要与设备链表上所有的设备解除绑定。
在driver_unregister()->bus_remove_driver()中,调用了driver_detach()函数。
- /**
- * driver_detach – detach driver from all devices it controls.
- * @drv: driver.
- */
- void driver_detach(struct device_driver *drv)
- {
- struct device_private *dev_prv;
- struct device *dev;
- for (;;) {
- spin_lock(&drv->p->klist_devices.k_lock);
- if (list_empty(&drv->p->klist_devices.k_list)) {
- spin_unlock(&drv->p->klist_devices.k_lock);
- break;
- }
- dev_prv = list_entry(drv->p->klist_devices.k_list.prev,
- struct device_private,
- knode_driver.n_node);
- dev = dev_prv->device;
- get_device(dev);
- spin_unlock(&drv->p->klist_devices.k_lock);
- if (dev->parent) /* Needed for USB */
- down(&dev->parent->sem);
- down(&dev->sem);
- if (dev->driver == drv)
- __device_release_driver(dev);
- up(&dev->sem);
- if (dev->parent)
- up(&dev->parent->sem);
- put_device(dev);
- }
- }
或许会奇怪这里为什么会有get_device()和put_device()的操作。这是为了防止设备一取下链表,就会释放最后一个引用计数,导致直接注销。那时候的情况,一定是在占用了dev->sem的同时去等待dev->sem,通俗来说就是死锁。
通过driver_attach()和driver_detach()的训练,我们已经习惯在为设备加锁时,顺便为其父设备加锁。虽然在device_attach()和device_release_driver()中只是对设备本身加锁。或许是害怕在驱动与设备解除绑定的过程中,父设备突然也要解除绑定,导致不一致状态。为至于为什么设备方主动要求时不需要对父设备加锁,或许是设备的主动申请更靠谱,不会在子设备绑定或释放的同时,父设备也申请释放。总之,在linux看来,设备恐怕比驱动还要靠谱一些,从driver和bus的引用计数,从这里的加锁情况,都可以看出一二。
- void *dev_get_drvdata(const struct device *dev)
- {
- if (dev && dev->p)
- return dev->p->driver_data;
- return NULL;
- }
- void dev_set_drvdata(struct device *dev, void *data)
- {
- int error;
- if (!dev)
- return;
- if (!dev->p) {
- error = device_private_init(dev);
- if (error)
- return;
- }
- dev->p->driver_data = data;
- }
不要 小看这个device_private结构中小小的driver_data,在驱动编写中总能派上大用场。当然也不是说没有driver_data就过不下去,毕竟驱动可以定义一个自己的device结构,并把通用的struct device内嵌其中,然后想放多少数据都行。可那样太麻烦,许多驱动都要专门设置这样一个变量,索性加到通用的数据结构中。而且是直接加到device_private中,眼不见为净,方便省事。
- /**
- * device_reprobe – remove driver for a device and probe for a new driver
- * @dev: the device to reprobe
- *
- * This function detaches the attached driver (if any) for the given
- * device and restarts the driver probing process. It is intended
- * to use if probing criteria changed during a devices lifetime and
- * driver attachment should change accordingly.
- */
- int device_reprobe(struct device *dev)
- {
- if (dev->driver) {
- if (dev->parent) /* Needed for USB */
- down(&dev->parent->sem);
- device_release_driver(dev);
- if (dev->parent)
- up(&dev->parent->sem);
- }
- return bus_rescan_devices_helper(dev, NULL);
- }
- static int __must_check bus_rescan_devices_helper(struct device *dev,
- void *data)
- {
- int ret = 0;
- if (!dev->driver) {
- if (dev->parent) /* Needed for USB */
- down(&dev->parent->sem);
- ret = device_attach(dev);
- if (dev->parent)
- up(&dev->parent->sem);
- }
- return ret < 0 ? ret : 0;
- }
我们终于成功完成了对dd.c的分析,并将bus.c剩余的部分结了尾。想必大家已经充分领略了device、driver和bus的铁三角结构,下节我们将进入设备驱动模型的另一方天地。