一、内核文件系统数据结构 #
1. 各个结构体关系 #
1.1. 常规文件 #
2. file #
- 每个被打开的文件对应一个file结构体,跟fd关联
1// include/linux/fs.h
2struct file {
3 union {
4 struct llist_node fu_llist;
5 struct rcu_head fu_rcuhead;
6 } f_u;
7 struct path f_path;
8 struct inode *f_inode; /* cached value */
9 const struct file_operations *f_op;
10
11 /*
12 * Protects f_ep, f_flags.
13 * Must not be taken from IRQ context.
14 */
15 spinlock_t f_lock;
16 atomic_long_t f_count;
17 unsigned int f_flags;
18 fmode_t f_mode;
19 struct mutex f_pos_lock;
20 loff_t f_pos;
21 struct fown_struct f_owner;
22 const struct cred *f_cred;
23 struct file_ra_state f_ra;
24
25 u64 f_version;
26#ifdef CONFIG_SECURITY
27 void *f_security;
28#endif
29 /* needed for tty driver, and maybe others */
30 void *private_data; // epoll等特殊文件会将自己的结构体指针放到这里
31
32#ifdef CONFIG_EPOLL
33 /* Used by fs/eventpoll.c to link all the hooks to this file */
34 struct hlist_head *f_ep;
35#endif /* #ifdef CONFIG_EPOLL */
36 struct address_space *f_mapping;
37 errseq_t f_wb_err;
38 errseq_t f_sb_err; /* for syncfs */
39} __randomize_layout
40 __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */
2.1. inode #
1// include/linux/fs.h
2/*
3 * Keep mostly read-only and often accessed (especially for
4 * the RCU path lookup and 'stat' data) fields at the beginning
5 * of the 'struct inode'
6 */
7struct inode {
8 umode_t i_mode;
9 unsigned short i_opflags;
10 kuid_t i_uid;
11 kgid_t i_gid;
12 unsigned int i_flags;
13
14#ifdef CONFIG_FS_POSIX_ACL
15 struct posix_acl *i_acl;
16 struct posix_acl *i_default_acl;
17#endif
18
19 const struct inode_operations *i_op;
20 struct super_block *i_sb;
21 struct address_space *i_mapping;
22
23#ifdef CONFIG_SECURITY
24 void *i_security;
25#endif
26
27 /* Stat data, not accessed from path walking */
28 unsigned long i_ino;
29 /*
30 * Filesystems may only read i_nlink directly. They shall use the
31 * following functions for modification:
32 *
33 * (set|clear|inc|drop)_nlink
34 * inode_(inc|dec)_link_count
35 */
36 union {
37 const unsigned int i_nlink;
38 unsigned int __i_nlink;
39 };
40 dev_t i_rdev;
41 loff_t i_size;
42 struct timespec64 i_atime;
43 struct timespec64 i_mtime;
44 struct timespec64 i_ctime;
45 spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */
46 unsigned short i_bytes;
47 u8 i_blkbits;
48 u8 i_write_hint;
49 blkcnt_t i_blocks;
50
51#ifdef __NEED_I_SIZE_ORDERED
52 seqcount_t i_size_seqcount;
53#endif
54
55 /* Misc */
56 unsigned long i_state;
57 struct rw_semaphore i_rwsem;
58
59 unsigned long dirtied_when; /* jiffies of first dirtying */
60 unsigned long dirtied_time_when;
61
62 struct hlist_node i_hash;
63 struct list_head i_io_list; /* backing dev IO list */
64#ifdef CONFIG_CGROUP_WRITEBACK
65 struct bdi_writeback *i_wb; /* the associated cgroup wb */
66
67 /* foreign inode detection, see wbc_detach_inode() */
68 int i_wb_frn_winner;
69 u16 i_wb_frn_avg_time;
70 u16 i_wb_frn_history;
71#endif
72 struct list_head i_lru; /* inode LRU list */
73 struct list_head i_sb_list;
74 struct list_head i_wb_list; /* backing dev writeback list */
75 union {
76 struct hlist_head i_dentry;
77 struct rcu_head i_rcu;
78 };
79 atomic64_t i_version;
80 atomic64_t i_sequence; /* see futex */
81 atomic_t i_count;
82 atomic_t i_dio_count;
83 atomic_t i_writecount;
84#if defined(CONFIG_IMA) || defined(CONFIG_FILE_LOCKING)
85 atomic_t i_readcount; /* struct files open RO */
86#endif
87 union {
88 const struct file_operations *i_fop; /* former ->i_op->default_file_ops */
89 void (*free_inode)(struct inode *);
90 };
91 struct file_lock_context *i_flctx;
92 struct address_space i_data;
93 struct list_head i_devices;
94 union {
95 struct pipe_inode_info *i_pipe;
96 struct cdev *i_cdev;
97 char *i_link;
98 unsigned i_dir_seq;
99 };
100
101 __u32 i_generation;
102
103#ifdef CONFIG_FSNOTIFY
104 __u32 i_fsnotify_mask; /* all events this inode cares about */
105 struct fsnotify_mark_connector __rcu *i_fsnotify_marks;
106#endif
107
108#ifdef CONFIG_FS_ENCRYPTION
109 struct fscrypt_info *i_crypt_info;
110#endif
111
112#ifdef CONFIG_FS_VERITY
113 struct fsverity_info *i_verity_info;
114#endif
115
116 void *i_private; /* fs or device private pointer */
117} __randomize_layout;
1) inode_operations #
1// include/linux/fs.h
2struct inode_operations {
3 struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
4 const char * (*get_link) (struct dentry *, struct inode *, struct delayed_call *);
5 int (*permission) (struct user_namespace *, struct inode *, int);
6 struct posix_acl * (*get_acl)(struct inode *, int, bool);
7
8 int (*readlink) (struct dentry *, char __user *,int);
9
10 int (*create) (struct user_namespace *, struct inode *,struct dentry *,
11 umode_t, bool);
12 int (*link) (struct dentry *,struct inode *,struct dentry *);
13 int (*unlink) (struct inode *,struct dentry *);
14 int (*symlink) (struct user_namespace *, struct inode *,struct dentry *,
15 const char *);
16 int (*mkdir) (struct user_namespace *, struct inode *,struct dentry *,
17 umode_t);
18 int (*rmdir) (struct inode *,struct dentry *);
19 int (*mknod) (struct user_namespace *, struct inode *,struct dentry *,
20 umode_t,dev_t);
21 int (*rename) (struct user_namespace *, struct inode *, struct dentry *,
22 struct inode *, struct dentry *, unsigned int);
23 int (*setattr) (struct user_namespace *, struct dentry *,
24 struct iattr *);
25 int (*getattr) (struct user_namespace *, const struct path *,
26 struct kstat *, u32, unsigned int);
27 ssize_t (*listxattr) (struct dentry *, char *, size_t);
28 int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
29 u64 len);
30 int (*update_time)(struct inode *, struct timespec64 *, int);
31 int (*atomic_open)(struct inode *, struct dentry *,
32 struct file *, unsigned open_flag,
33 umode_t create_mode);
34 int (*tmpfile) (struct user_namespace *, struct inode *,
35 struct dentry *, umode_t);
36 int (*set_acl)(struct user_namespace *, struct inode *,
37 struct posix_acl *, int);
38 int (*fileattr_set)(struct user_namespace *mnt_userns,
39 struct dentry *dentry, struct fileattr *fa);
40 int (*fileattr_get)(struct dentry *dentry, struct fileattr *fa);
41} ____cacheline_aligned;
2.2. path #
1// include/linux/path.h
2struct path {
3 struct vfsmount *mnt;
4 struct dentry *dentry;
5} __randomize_layout;
2.3. file_operations #
1// include/linux/fs.h
2struct file_operations {
3 struct module *owner;
4 loff_t (*llseek) (struct file *, loff_t, int);
5 ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
6 ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
7 ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
8 ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
9 int (*iopoll)(struct kiocb *kiocb, struct io_comp_batch *,
10 unsigned int flags);
11 int (*iterate) (struct file *, struct dir_context *);
12 int (*iterate_shared) (struct file *, struct dir_context *);
13 __poll_t (*poll) (struct file *, struct poll_table_struct *);
14 long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
15 long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
16 int (*mmap) (struct file *, struct vm_area_struct *);
17 unsigned long mmap_supported_flags;
18 int (*open) (struct inode *, struct file *);
19 int (*flush) (struct file *, fl_owner_t id);
20 int (*release) (struct inode *, struct file *);
21 int (*fsync) (struct file *, loff_t, loff_t, int datasync);
22 int (*fasync) (int, struct file *, int);
23 int (*lock) (struct file *, int, struct file_lock *);
24 ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
25 unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
26 int (*check_flags)(int);
27 int (*flock) (struct file *, int, struct file_lock *);
28 ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
29 ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
30 int (*setlease)(struct file *, long, struct file_lock **, void **);
31 long (*fallocate)(struct file *file, int mode, loff_t offset,
32 loff_t len);
33 void (*show_fdinfo)(struct seq_file *m, struct file *f);
34#ifndef CONFIG_MMU
35 unsigned (*mmap_capabilities)(struct file *);
36#endif
37 ssize_t (*copy_file_range)(struct file *, loff_t, struct file *,
38 loff_t, size_t, unsigned int);
39 loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in,
40 struct file *file_out, loff_t pos_out,
41 loff_t len, unsigned int remap_flags);
42 int (*fadvise)(struct file *, loff_t, loff_t, int);
43 int (*uring_cmd)(struct io_uring_cmd *ioucmd, unsigned int issue_flags);
44} __randomize_layout;
3. files_struct #
- 进程结构体中持有的管理文件的结构体
1// include/linux/fdtable.h
2/*
3 * Open file table structure
4 */
5struct files_struct {
6 /*
7 * read mostly part
8 */
9 atomic_t count;
10 bool resize_in_progress;
11 wait_queue_head_t resize_wait;
12
13 struct fdtable __rcu *fdt;
14 struct fdtable fdtab;
15 /*
16 * written part on a separate cache line in SMP
17 */
18 spinlock_t file_lock ____cacheline_aligned_in_smp;
19 unsigned int next_fd;
20 unsigned long close_on_exec_init[1];
21 unsigned long open_fds_init[1];
22 unsigned long full_fds_bits_init[1];
23 struct file __rcu * fd_array[NR_OPEN_DEFAULT];
24};
3.1. fdtable #
1// include/linux/fdtable.h
2struct fdtable {
3 unsigned int max_fds;
4 struct file __rcu **fd; /* current fd array */
5 unsigned long *close_on_exec;
6 unsigned long *open_fds;
7 unsigned long *full_fds_bits;
8 struct rcu_head rcu;
9};