C语言 Linux内核块设备驱动程序:在module_init中调用add_disk时空指针取消引用

disbfnqx  于 2023-10-16  发布在  Linux
关注(0)|答案(1)|浏览(225)

我写了一个非常基本的块设备驱动程序,按照这里的步骤https://linux-kernel-labs.github.io/refs/heads/master/labs/block_device_drivers.htmlmodule_init函数my_block_init如下所示:

  1. static int create_block_device(struct my_block_dev *dev) {
  2. int err;
  3. dev->size = NR_SECTORS * KERNEL_SECTOR_SIZE;
  4. dev->data = vmalloc(dev->size);
  5. if (dev->data == NULL) {
  6. printk(KERN_ERR "vmalloc: out of memory\n");
  7. err = -ENOMEM;
  8. goto out_vmalloc;
  9. }
  10. /* Initialize tag set. */
  11. dev->tag_set.ops = &my_queue_ops;
  12. dev->tag_set.nr_hw_queues = 1;
  13. dev->tag_set.queue_depth = 128;
  14. dev->tag_set.numa_node = NUMA_NO_NODE;
  15. dev->tag_set.cmd_size = 0;
  16. dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
  17. err = blk_mq_alloc_tag_set(&dev->tag_set);
  18. if (err) {
  19. printk(KERN_ERR "blk_mq_alloc_tag_set: can't allocate tag set\n");
  20. goto out_alloc_tag_set;
  21. }
  22. /* Allocate queue. */
  23. dev->queue = blk_mq_init_queue(&dev->tag_set);
  24. if (IS_ERR(dev->queue)) {
  25. printk(KERN_ERR "blk_mq_init_queue: out of memory\n");
  26. err = -ENOMEM;
  27. goto out_blk_init;
  28. }
  29. blk_queue_logical_block_size(dev->queue, KERNEL_SECTOR_SIZE);
  30. dev->queue->queuedata = dev;
  31. /* initialize the gendisk structure */
  32. dev->gd = blk_alloc_disk(NUMA_NO_NODE);
  33. if (!dev->gd) {
  34. printk(KERN_ERR "alloc_disk: failure\n");
  35. err = -ENOMEM;
  36. goto out_alloc_disk;
  37. }
  38. dev->gd->major = MY_BLOCK_MAJOR;
  39. dev->gd->minors = 1;
  40. dev->gd->first_minor = 0;
  41. dev->gd->fops = &my_block_ops;
  42. dev->gd->queue = dev->queue;
  43. dev->gd->private_data = dev;
  44. snprintf(dev->gd->disk_name, DISK_NAME_LEN, "myblock");
  45. set_capacity(dev->gd, NR_SECTORS);
  46. if (add_disk(dev->gd)) {
  47. err = -ENOMEM;
  48. goto out_alloc_disk;
  49. }
  50. return 0;
  51. out_alloc_disk:
  52. blk_put_queue(dev->queue);
  53. out_blk_init:
  54. blk_mq_free_tag_set(&dev->tag_set);
  55. out_alloc_tag_set:
  56. vfree(dev->data);
  57. out_vmalloc:
  58. return err;
  59. }
  60. static int my_block_init(void) {
  61. int status;
  62. status = register_blkdev(MY_BLOCK_MAJOR, MY_BLKDEV_NAME);
  63. if (status < 0) {
  64. printk(KERN_ERR "unable to register mybdev block device\n");
  65. return -EBUSY;
  66. }
  67. printk(KERN_INFO
  68. "Block device with major(%d) and name(%s) successfully created\n",
  69. MY_BLOCK_MAJOR, MY_BLKDEV_NAME);
  70. status = create_block_device(&dev);
  71. if (status < 0) {
  72. printk(KERN_ERR "unable to create block device\n");
  73. return -EBUSY;
  74. }
  75. return 0;
  76. }

在我对ko模块文件编译并执行insmod之后,insmod进程被杀死并退出。
然后我使用dmesg | tail -100检查日志,看起来在调用add_disk函数时有一个NULL指针解引用:

  1. [ 108.621987] Block device with major(240) and name(mybdev) successfully created
  2. [ 108.624629] BUG: kernel NULL pointer dereference, address: 0000000000000264
  3. [ 108.624637] #PF: supervisor read access in kernel mode
  4. [ 108.624639] #PF: error_code(0x0000) - not-present page
  5. [ 108.624641] PGD 0 P4D 0
  6. [ 108.624643] Oops: 0000 [#1] PREEMPT SMP PTI
  7. [ 108.624646] CPU: 0 PID: 2767 Comm: insmod Tainted: G OE 6.2.0-33-generic #33~22.04.1-Ubuntu
  8. [ 108.624648] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 11/12/2020
  9. [ 108.624651] RIP: 0010:kobject_get+0xe/0x90
  10. [ 108.624658] Code: c2 a8 68 05 b9 eb d2 0f 1f 44 00 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 55 48 89 e5 41 54 49 89 fc 48 85 ff 74 22 <f6> 47 3c 01 74 2f 49 8d 7c 24 38 b8 01 00 00 00 f0 41 0f c1 44 24
  11. [ 108.624659] RSP: 0018:ffffbef08649fa68 EFLAGS: 00010206
  12. [ 108.624661] RAX: ffffffffb8f920c4 RBX: 0000000000000228 RCX: 0000000000000000
  13. [ 108.624662] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000228
  14. [ 108.624663] RBP: ffffbef08649fa70 R08: 0000000000000000 R09: 0000000000000000
  15. [ 108.624663] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000228
  16. [ 108.624664] R13: 0000000000000000 R14: ffff967146f53400 R15: ffff967146f53410
  17. [ 108.624665] FS: 00007f80b1aaa000(0000) GS:ffff967179e00000(0000) knlGS:0000000000000000
  18. [ 108.624666] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  19. [ 108.624667] CR2: 0000000000000264 CR3: 0000000056e9c003 CR4: 0000000000370ef0
  20. [ 108.624670] Call Trace:
  21. [ 108.624672] <TASK>
  22. [ 108.624676] ? show_regs+0x72/0x90
  23. [ 108.624681] ? __die+0x25/0x80
  24. [ 108.624682] ? page_fault_oops+0x79/0x190
  25. [ 108.624685] ? mod_memcg_lruvec_state+0x2b/0x60
  26. [ 108.624689] ? mod_objcg_state+0x1ad/0x2e0
  27. [ 108.624692] ? do_user_addr_fault+0x30c/0x640
  28. [ 108.624693] ? exc_page_fault+0x81/0x1b0
  29. [ 108.624698] ? asm_exc_page_fault+0x27/0x30
  30. [ 108.624702] ? kobject_get+0xe/0x90
  31. [ 108.624704] kobject_add_internal+0x35/0x310
  32. [ 108.624706] kobject_add+0x7a/0xf0
  33. [ 108.624709] elv_register_queue+0x3a/0xa0
  34. [ 108.624712] blk_register_queue+0xf2/0x220
  35. [ 108.624715] device_add_disk+0x249/0x400
  36. [ 108.624722] ? __pfx_init_module+0x10/0x10 [mybdev]
  37. [ 108.624726] my_block_init+0x193/0xec0 [mybdev]
  38. [ 108.624729] do_one_initcall+0x46/0x240
  39. [ 108.624733] ? kmalloc_trace+0x2a/0xb0
  40. [ 108.624736] do_init_module+0x52/0x240
  41. [ 108.624739] load_module+0xb96/0xd60
  42. [ 108.624741] ? kernel_read_file+0x25c/0x2b0
  43. [ 108.624746] __do_sys_finit_module+0xcc/0x150
  44. [ 108.624748] ? __do_sys_finit_module+0xcc/0x150
  45. [ 108.624750] __x64_sys_finit_module+0x18/0x30
  46. [ 108.624752] do_syscall_64+0x59/0x90
  47. [ 108.624755] ? ksys_mmap_pgoff+0x123/0x270
  48. [ 108.624759] ? exit_to_user_mode_prepare+0x3b/0xd0
  49. [ 108.624761] ? syscall_exit_to_user_mode+0x38/0x60
  50. [ 108.624762] ? do_syscall_64+0x69/0x90
  51. [ 108.624764] ? syscall_exit_to_user_mode+0x38/0x60
  52. [ 108.624766] ? do_syscall_64+0x69/0x90
  53. [ 108.624767] ? do_syscall_64+0x69/0x90
  54. [ 108.624769] ? do_syscall_64+0x69/0x90
  55. [ 108.624771] entry_SYSCALL_64_after_hwframe+0x72/0xdc
  56. [ 108.624772] RIP: 0033:0x7f80b131ea3d

添加elv_register_queue内部发生的异常,源代码如下:

  1. int elv_register_queue(struct request_queue *q, bool uevent)
  2. {
  3. struct elevator_queue *e = q->elevator;
  4. int error;
  5. lockdep_assert_held(&q->sysfs_lock);
  6. error = kobject_add(&e->kobj, &q->disk->queue_kobj, "iosched");
  7. if (!error) {
  8. struct elv_fs_entry *attr = e->type->elevator_attrs;
  9. if (attr) {
  10. while (attr->attr.name) {
  11. if (sysfs_create_file(&e->kobj, &attr->attr))
  12. break;
  13. attr++;
  14. }
  15. }
  16. if (uevent)
  17. kobject_uevent(&e->kobj, KOBJ_ADD);
  18. set_bit(ELEVATOR_FLAG_REGISTERED, &e->flags);
  19. }
  20. return error;
  21. }

在查找了几个源代码后,我无法找到未初始化的内容,并生成了异常。有没有人熟悉内核的这一部分,或者有没有更好的方法来解决这个问题?
核心版本:v6.2.0

l2osamch

l2osamch1#

我通过检查block/genhd.c的源代码解决了这个问题,发现对blk_alloc_disk的调用为提供的gendisk分配了一个request_queue,所以我删除了自己对request_queue的初始化,代码终于完成了。

  1. dev->gd = blk_alloc_disk(NUMA_NO_NODE);
  2. dev->queue = dev->gd->queue; // use the queue allocated during blk_alloc_disk

相关问题