文件系统总述

一、内核文件系统数据结构 #

1. 各个结构体关系 #

1.1. 常规文件 #

2. file #

  • 每个被打开的文件对应一个file结构体,跟fd关联
 1// include/linux/fs.h
 2struct file {
 3	union {
 4		struct llist_node	fu_llist;
 5		struct rcu_head 	fu_rcuhead;
 6	} f_u;
 7	struct path		f_path;
 8	struct inode		*f_inode;	/* cached value */
 9	const struct file_operations	*f_op;
10
11	/*
12	 * Protects f_ep, f_flags.
13	 * Must not be taken from IRQ context.
14	 */
15	spinlock_t		f_lock;
16	atomic_long_t		f_count;
17	unsigned int 		f_flags;
18	fmode_t			f_mode;
19	struct mutex		f_pos_lock;
20	loff_t			f_pos;
21	struct fown_struct	f_owner;
22	const struct cred	*f_cred;
23	struct file_ra_state	f_ra;
24
25	u64			f_version;
26#ifdef CONFIG_SECURITY
27	void			*f_security;
28#endif
29	/* needed for tty driver, and maybe others */
30	void			*private_data;  // epoll等特殊文件会将自己的结构体指针放到这里
31
32#ifdef CONFIG_EPOLL
33	/* Used by fs/eventpoll.c to link all the hooks to this file */
34	struct hlist_head	*f_ep;
35#endif /* #ifdef CONFIG_EPOLL */
36	struct address_space	*f_mapping;
37	errseq_t		f_wb_err;
38	errseq_t		f_sb_err; /* for syncfs */
39} __randomize_layout
40  __attribute__((aligned(4)));	/* lest something weird decides that 2 is OK */

2.1. inode #

  1// include/linux/fs.h
  2/*
  3 * Keep mostly read-only and often accessed (especially for
  4 * the RCU path lookup and 'stat' data) fields at the beginning
  5 * of the 'struct inode'
  6 */
  7struct inode {
  8	umode_t			i_mode;
  9	unsigned short		i_opflags;
 10	kuid_t			i_uid;
 11	kgid_t			i_gid;
 12	unsigned int		i_flags;
 13
 14#ifdef CONFIG_FS_POSIX_ACL
 15	struct posix_acl	*i_acl;
 16	struct posix_acl	*i_default_acl;
 17#endif
 18
 19	const struct inode_operations	*i_op;
 20	struct super_block	*i_sb;
 21	struct address_space	*i_mapping;
 22
 23#ifdef CONFIG_SECURITY
 24	void			*i_security;
 25#endif
 26
 27	/* Stat data, not accessed from path walking */
 28	unsigned long		i_ino;
 29	/*
 30	 * Filesystems may only read i_nlink directly.  They shall use the
 31	 * following functions for modification:
 32	 *
 33	 *    (set|clear|inc|drop)_nlink
 34	 *    inode_(inc|dec)_link_count
 35	 */
 36	union {
 37		const unsigned int i_nlink;
 38		unsigned int __i_nlink;
 39	};
 40	dev_t			i_rdev;
 41	loff_t			i_size;
 42	struct timespec64	i_atime;
 43	struct timespec64	i_mtime;
 44	struct timespec64	i_ctime;
 45	spinlock_t		i_lock;	/* i_blocks, i_bytes, maybe i_size */
 46	unsigned short          i_bytes;
 47	u8			i_blkbits;
 48	u8			i_write_hint;
 49	blkcnt_t		i_blocks;
 50
 51#ifdef __NEED_I_SIZE_ORDERED
 52	seqcount_t		i_size_seqcount;
 53#endif
 54
 55	/* Misc */
 56	unsigned long		i_state;
 57	struct rw_semaphore	i_rwsem;
 58
 59	unsigned long		dirtied_when;	/* jiffies of first dirtying */
 60	unsigned long		dirtied_time_when;
 61
 62	struct hlist_node	i_hash;
 63	struct list_head	i_io_list;	/* backing dev IO list */
 64#ifdef CONFIG_CGROUP_WRITEBACK
 65	struct bdi_writeback	*i_wb;		/* the associated cgroup wb */
 66
 67	/* foreign inode detection, see wbc_detach_inode() */
 68	int			i_wb_frn_winner;
 69	u16			i_wb_frn_avg_time;
 70	u16			i_wb_frn_history;
 71#endif
 72	struct list_head	i_lru;		/* inode LRU list */
 73	struct list_head	i_sb_list;
 74	struct list_head	i_wb_list;	/* backing dev writeback list */
 75	union {
 76		struct hlist_head	i_dentry;
 77		struct rcu_head		i_rcu;
 78	};
 79	atomic64_t		i_version;
 80	atomic64_t		i_sequence; /* see futex */
 81	atomic_t		i_count;
 82	atomic_t		i_dio_count;
 83	atomic_t		i_writecount;
 84#if defined(CONFIG_IMA) || defined(CONFIG_FILE_LOCKING)
 85	atomic_t		i_readcount; /* struct files open RO */
 86#endif
 87	union {
 88		const struct file_operations	*i_fop;	/* former ->i_op->default_file_ops */
 89		void (*free_inode)(struct inode *);
 90	};
 91	struct file_lock_context	*i_flctx;
 92	struct address_space	i_data;
 93	struct list_head	i_devices;
 94	union {
 95		struct pipe_inode_info	*i_pipe;
 96		struct cdev		*i_cdev;
 97		char			*i_link;
 98		unsigned		i_dir_seq;
 99	};
100
101	__u32			i_generation;
102
103#ifdef CONFIG_FSNOTIFY
104	__u32			i_fsnotify_mask; /* all events this inode cares about */
105	struct fsnotify_mark_connector __rcu	*i_fsnotify_marks;
106#endif
107
108#ifdef CONFIG_FS_ENCRYPTION
109	struct fscrypt_info	*i_crypt_info;
110#endif
111
112#ifdef CONFIG_FS_VERITY
113	struct fsverity_info	*i_verity_info;
114#endif
115
116	void			*i_private; /* fs or device private pointer */
117} __randomize_layout;

1) inode_operations #

 1// include/linux/fs.h
 2struct inode_operations {
 3	struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
 4	const char * (*get_link) (struct dentry *, struct inode *, struct delayed_call *);
 5	int (*permission) (struct user_namespace *, struct inode *, int);
 6	struct posix_acl * (*get_acl)(struct inode *, int, bool);
 7
 8	int (*readlink) (struct dentry *, char __user *,int);
 9
10	int (*create) (struct user_namespace *, struct inode *,struct dentry *,
11		       umode_t, bool);
12	int (*link) (struct dentry *,struct inode *,struct dentry *);
13	int (*unlink) (struct inode *,struct dentry *);
14	int (*symlink) (struct user_namespace *, struct inode *,struct dentry *,
15			const char *);
16	int (*mkdir) (struct user_namespace *, struct inode *,struct dentry *,
17		      umode_t);
18	int (*rmdir) (struct inode *,struct dentry *);
19	int (*mknod) (struct user_namespace *, struct inode *,struct dentry *,
20		      umode_t,dev_t);
21	int (*rename) (struct user_namespace *, struct inode *, struct dentry *,
22			struct inode *, struct dentry *, unsigned int);
23	int (*setattr) (struct user_namespace *, struct dentry *,
24			struct iattr *);
25	int (*getattr) (struct user_namespace *, const struct path *,
26			struct kstat *, u32, unsigned int);
27	ssize_t (*listxattr) (struct dentry *, char *, size_t);
28	int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
29		      u64 len);
30	int (*update_time)(struct inode *, struct timespec64 *, int);
31	int (*atomic_open)(struct inode *, struct dentry *,
32			   struct file *, unsigned open_flag,
33			   umode_t create_mode);
34	int (*tmpfile) (struct user_namespace *, struct inode *,
35			struct dentry *, umode_t);
36	int (*set_acl)(struct user_namespace *, struct inode *,
37		       struct posix_acl *, int);
38	int (*fileattr_set)(struct user_namespace *mnt_userns,
39			    struct dentry *dentry, struct fileattr *fa);
40	int (*fileattr_get)(struct dentry *dentry, struct fileattr *fa);
41} ____cacheline_aligned;

2.2. path #

1// include/linux/path.h
2struct path {
3	struct vfsmount *mnt;
4	struct dentry *dentry;
5} __randomize_layout;

2.3. file_operations #

 1// include/linux/fs.h
 2struct file_operations {
 3	struct module *owner;
 4	loff_t (*llseek) (struct file *, loff_t, int);
 5	ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
 6	ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
 7	ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
 8	ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
 9	int (*iopoll)(struct kiocb *kiocb, struct io_comp_batch *,
10			unsigned int flags);
11	int (*iterate) (struct file *, struct dir_context *);
12	int (*iterate_shared) (struct file *, struct dir_context *);
13	__poll_t (*poll) (struct file *, struct poll_table_struct *);
14	long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
15	long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
16	int (*mmap) (struct file *, struct vm_area_struct *);
17	unsigned long mmap_supported_flags;
18	int (*open) (struct inode *, struct file *);
19	int (*flush) (struct file *, fl_owner_t id);
20	int (*release) (struct inode *, struct file *);
21	int (*fsync) (struct file *, loff_t, loff_t, int datasync);
22	int (*fasync) (int, struct file *, int);
23	int (*lock) (struct file *, int, struct file_lock *);
24	ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
25	unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
26	int (*check_flags)(int);
27	int (*flock) (struct file *, int, struct file_lock *);
28	ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
29	ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
30	int (*setlease)(struct file *, long, struct file_lock **, void **);
31	long (*fallocate)(struct file *file, int mode, loff_t offset,
32			  loff_t len);
33	void (*show_fdinfo)(struct seq_file *m, struct file *f);
34#ifndef CONFIG_MMU
35	unsigned (*mmap_capabilities)(struct file *);
36#endif
37	ssize_t (*copy_file_range)(struct file *, loff_t, struct file *,
38			loff_t, size_t, unsigned int);
39	loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in,
40				   struct file *file_out, loff_t pos_out,
41				   loff_t len, unsigned int remap_flags);
42	int (*fadvise)(struct file *, loff_t, loff_t, int);
43	int (*uring_cmd)(struct io_uring_cmd *ioucmd, unsigned int issue_flags);
44} __randomize_layout;

3. files_struct #

  • 进程结构体中持有的管理文件的结构体
 1// include/linux/fdtable.h
 2/*
 3 * Open file table structure
 4 */
 5struct files_struct {
 6  /*
 7   * read mostly part
 8   */
 9	atomic_t count;
10	bool resize_in_progress;
11	wait_queue_head_t resize_wait;
12
13	struct fdtable __rcu *fdt;
14	struct fdtable fdtab;
15  /*
16   * written part on a separate cache line in SMP
17   */
18	spinlock_t file_lock ____cacheline_aligned_in_smp;
19	unsigned int next_fd;
20	unsigned long close_on_exec_init[1];
21	unsigned long open_fds_init[1];
22	unsigned long full_fds_bits_init[1];
23	struct file __rcu * fd_array[NR_OPEN_DEFAULT];
24};

3.1. fdtable #

1// include/linux/fdtable.h
2struct fdtable {
3	unsigned int max_fds;
4	struct file __rcu **fd;      /* current fd array */
5	unsigned long *close_on_exec;
6	unsigned long *open_fds;
7	unsigned long *full_fds_bits;
8	struct rcu_head rcu;
9};