Linux 内存管理(1)--物理内存初始化
内容导读
互联网集市收集整理的这篇技术教程文章主要介绍了Linux 内存管理(1)--物理内存初始化,小编现在分享给大家,供广大互联网技能从业者学习和参考。文章包含11565字,纯文字阅读大概需要17分钟。
内容图文
![Linux 内存管理(1)--物理内存初始化](/upload/InfoBanner/zyjiaocheng/953/9c9ae853af2b474898c684b846b8b22b.jpg)
1 内存初始化总体流程
内核版本:Linux 4.14
硬件平台:IMX6DL-SABRESD
start_kernel()
|----page_address_init()
|----setup_arch()
|----setup_machine_fdt()
| |----early_init_dt_scan_nodes()
| |----of_scan_flat_dt(early_init_dt_scan_memory, NULL)
| |----early_init_dt_scan_memory()
| |----early_init_dt_add_memory_arch()
| |----memblock_add()
|----early_mm_init()
|----setup_dma_zone()
|----paging_init()
|----mm_init_cpumask()
|----build_all_zonelists()
|----page_alloc_init()
|----vfs_caches_init_early()
|----mm_init()
|----kmem_cache_init_late()
|----kmemleak_init()
|----debug_objects_mem_init()
|----setup_per_cpu_pageset()
|----numa_policy_init()
|----anon_vma_init()
内核初始化中涉及内存的函数非常多,下面就一步一步来从总体上分析。
2 用户空间与内核空间划分
在32位的Linux系统中,虚拟地址一共是4GB。将整个虚拟地址划分为用户空间+内核空间,有以下3中划分方式:
choice
prompt "Memory split"
depends on MMU
default VMSPLIT_3G
help
Select the desired split between kernel and user memory.
If you are not absolutely sure what you are doing, leave this
option alone!
config VMSPLIT_3G
bool "3G/1G user/kernel split"
config VMSPLIT_3G_OPT
depends on !ARM_LPAE
bool "3G/1G user/kernel split (for full 1G low memory)"
config VMSPLIT_2G
bool "2G/2G user/kernel split"
config VMSPLIT_1G
bool "1G/3G user/kernel split"
endchoice
config PAGE_OFFSET
hex
default PHYS_OFFSET if !MMU
default 0x40000000 if VMSPLIT_1G
default 0x80000000 if VMSPLIT_2G
default 0xB0000000 if VMSPLIT_3G_OPT
default 0xC0000000
ARM 中采用 VMSPLIT_2G,所以用户空间与内核空间各 2G,分水岭为 0x80008000。
在 .config 文件中
CONFIG_VMSPLIT_2G=y
# CONFIG_VMSPLIT_1G is not set
CONFIG_PAGE_OFFSET=0x80000000
/* PAGE_OFFSET - the virtual address of the start of the kernel image */
#define PAGE_OFFSET UL(CONFIG_PAGE_OFFSET)
static inline phys_addr_t __virt_to_phys(unsigned long x)
{
return (phys_addr_t)x - PAGE_OFFSET + PHYS_OFFSET;
}
static inline unsigned long __phys_to_virt(phys_addr_t x)
{
return x - PHYS_OFFSET + PAGE_OFFSET;
}
3 获取物理内存大小
所有的内存操作都是基于物理内存的,所以首先要获取物理内存的起始地址与大小。
通过DTS获取物理内存属性,然后解析并添加到 memblock 子系统中。
arch/arm/boot/dts/imx6qdl-sabresd.dtsi
memory: memory {
reg = <0x10000000 0x40000000>;---------------------------------PHYS_OFFSET size
};
根据第一节的总体流程,我们主要来看一下 early_init_dt_scan_memory() 这个函数:
int __init early_init_dt_scan_memory(unsigned long node, const char *uname,
int depth, void *data)
{
const char *type = of_get_flat_dt_prop(node, "device_type", NULL); -----------------------------------------device_type = "memory"
const __be32 *reg, *endp;
int l;
bool hotpluggable;
/* We are scanning "memory" nodes only */
if (type == NULL) {
/*
* The longtrail doesn't have a device_type on the
* /memory node, so look for the node called /memory@0.
*/
if (!IS_ENABLED(CONFIG_PPC32) || depth != 1 || strcmp(uname, "memory@0") != 0)
return 0;
} else if (strcmp(type, "memory") != 0)
return 0;
reg = of_get_flat_dt_prop(node, "linux,usable-memory", &l); -------------------------------------reg = <0x10000000 0x40000000>
if (reg == NULL)
reg = of_get_flat_dt_prop(node, "reg", &l);
if (reg == NULL)
return 0;
endp = reg + (l / sizeof(__be32));
hotpluggable = of_get_flat_dt_prop(node, "hotpluggable", NULL);
pr_debug("memory scan node %s, reg size %d,\n", uname, l);
while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) {
u64 base, size;
base = dt_mem_next_cell(dt_root_addr_cells, ®); -----------------------------------------0x10000000
size = dt_mem_next_cell(dt_root_size_cells, ®); -------------------------------------------0x40000000
if (size == 0)
continue;
pr_debug(" - %llx , %llx\n", (unsigned long long)base,
(unsigned long long)size);
early_init_dt_add_memory_arch(base, size); --------------------------------------------------将解析出的 mem 加入 memblock子系统
if (!hotpluggable)
continue;
if (early_init_dt_mark_hotplug_memory_arch(base, size))
pr_warn("failed to mark hotplug range 0x%llx - 0x%llx\n",
base, base + size);
}
return 0;
}
根据解析出的 base/size,调用early_init_dt_add_memory_arch–>memblock_add–>memblock_add_range将解析出的物理内存加入到memblock子系统中。
所有的内存都有全局变量 memblock 中:
struct memblock_region {
phys_addr_t base;
phys_addr_t size;
unsigned long flags;
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
int nid;
#endif
};
struct memblock_type {
unsigned long cnt; /* number of regions */
unsigned long max; /* size of the allocated array */
phys_addr_t total_size; /* size of all regions */
struct memblock_region *regions;
char *name;
};
struct memblock {
bool bottom_up; /* is bottom up direction? */
phys_addr_t current_limit;
struct memblock_type memory;
struct memblock_type reserved;
#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
struct memblock_type physmem;
#endif
};
struct memblock memblock __initdata_memblock = {
.memory.regions = memblock_memory_init_regions,
.memory.cnt = 1, /* empty dummy entry */
.memory.max = INIT_MEMBLOCK_REGIONS,
.memory.name = "memory",
.reserved.regions = memblock_reserved_init_regions,
.reserved.cnt = 1, /* empty dummy entry */
.reserved.max = INIT_MEMBLOCK_REGIONS,
.reserved.name = "reserved",
#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
.physmem.regions = memblock_physmem_init_regions,
.physmem.cnt = 1, /* empty dummy entry */
.physmem.max = INIT_PHYSMEM_REGIONS,
.physmem.name = "physmem",
#endif
.bottom_up = false,
.current_limit = MEMBLOCK_ALLOC_ANYWHERE,
};
memblock_add用于添加region到memblock.memory中;在内核初始化阶段很多地方(比如哈arm_memblock_init)使用memblock_reserve将region添加到memblock.reserved。
memblock_remove用于将一个region从memblock.memory中移除,memblock_free等用于将一个region从memblock.reserved中移除。
int __init_memblock memblock_add_range(struct memblock_type *type,
phys_addr_t base, phys_addr_t size,
int nid, unsigned long flags)
{
bool insert = false;
phys_addr_t obase = base;
phys_addr_t end = base + memblock_cap_size(base, &size);
int idx, nr_new;
struct memblock_region *rgn;
if (!size)
return 0;
/* special case for empty array */
if (type->regions[0].size == 0) {
WARN_ON(type->cnt != 1 || type->total_size);
type->regions[0].base = base;
type->regions[0].size = size;
type->regions[0].flags = flags;
memblock_set_region_node(&type->regions[0], nid);
type->total_size = size;
return 0;
}
repeat:
/*
* The following is executed twice. Once with %false @insert and
* then with %true. The first counts the number of regions needed
* to accommodate the new area. The second actually inserts them.
*/
...
...
memblock
在内核启动阶段,也有内存管理的需求,但是此时伙伴系统并没有完成初始化。在早期内核中使用bootmem机制,作为内核初始化阶段的内存分配器。
后来使用memblock作为内核初始化阶段内存分配器,用于内存分配和释放。
CONFIG_NO_BOOTMEM用于决定是否使用bootmem,IMX6DL 使能,所以使用memblock作为初始化阶段的内存分配器。
因为bootmem和memblock两者API兼容,所以使用者无感。使用memblock的时候编译mm/nobootmem.c,调用memblock.c中的分配器接口。
4 物理内存映射
由于没有打开CONFIG_ARM_LPAE,Linux页表采用两层映射。所以PGD->PUD->PMD->PTE中间的PUD/PMD被省略的,pmd_off_k的返回值实际就是pgd_offset_k。
linux-4.14/arch/arm/mm/mm.h
static inline pmd_t *pmd_off_k(unsigned long virt)
{
return pmd_offset(pud_offset(pgd_offset_k(virt), virt), virt);
}
linux-4.14/mm/init-mm.c
struct mm_struct init_mm = {
.mm_rb = RB_ROOT,
.pgd = swapper_pg_dir,
.mm_users = ATOMIC_INIT(2),
.mm_count = ATOMIC_INIT(1),
.mmap_sem = __RWSEM_INITIALIZER(init_mm.mmap_sem),
.page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
.mmlist = LIST_HEAD_INIT(init_mm.mmlist),
.user_ns = &init_user_ns,
INIT_MM_CONTEXT(init_mm)
};
/* to find an entry in a page-table-directory */
#define pgd_index(addr) ((addr) >> PGDIR_SHIFT)
#define pgd_offset(mm, addr) ((mm)->pgd + pgd_index(addr))
/* to find an entry in a kernel page-table-directory */
#define pgd_offset_k(addr) pgd_offset(&init_mm, addr)
prepare_page_table用于清空页表项,其实清空了三段地址页表项。
static inline void prepare_page_table(void)
{
unsigned long addr;
phys_addr_t end;
/*
* Clear out all the mappings below the kernel image.
*/
for (addr = 0; addr < MODULES_VADDR; addr += PMD_SIZE) ----------------------清除 0 ~ MODULES_VADDR(PAGE_OFFSET(0X80000000)) 地址段一级页表
pmd_clear(pmd_off_k(addr));
#ifdef CONFIG_XIP_KERNEL
/* The XIP kernel is mapped in the module area -- skip over it */
addr = ((unsigned long)_exiprom + PMD_SIZE - 1) & PMD_MASK;
#endif
for ( ; addr < PAGE_OFFSET; addr += PMD_SIZE) -------------------------------------清除 MODULES_VADDR ~ PAGE_OFFSET 地址段一级页表
pmd_clear(pmd_off_k(addr));
/*
* Find the end of the first block of lowmem.
*/
end = memblock.memory.regions[0].base + memblock.memory.regions[0].size;
if (end >= arm_lowmem_limit) -------------------------------------------------------------end = 0x50000000,arm_lowmem_limit = 0x50000000(具体查看下面代码)
end = arm_lowmem_limit;
/*
* Clear out all the kernel space mappings, except for the first
* memory bank, up to the vmalloc region.
*/
for (addr = __phys_to_virt(end); ---------------------------------------------------------end = 0x50000000,虚拟地址 0xb0000000。清除 0xb0000000 ~ VMALLOC_START 地址段一级页表。
addr < VMALLOC_START; addr += PMD_SIZE)
pmd_clear(pmd_off_k(addr));
}
#define VMALLOC_END 0xffffffffUL
#define VMALLOC_OFFSET (8*1024*1024)
static void * __initdata vmalloc_min =
(void *)(VMALLOC_END - (240 << 20) - VMALLOC_OFFSET);
vmalloc_limit = (u64)(uintptr_t)vmalloc_min - PAGE_OFFSET + PHYS_OFFSET;
phys_addr_t block_end = reg->base + reg->size;
lowmem_limit = min_t(u64,
vmalloc_limit,
block_end);
arm_lowmem_limit = lowmem_limit;
真正创建页表是在map_lowmem创建了两块区间映射区间一0x600000000x60800000(0xc00000000xc0800000)和区间二0x608000000x8f800000(0xc08000000xef800000)。
区间一:具有读写执行权限,主要用于存放Kernel代码数据段,还包括swapper_pg_dir内容。
区间二:具有读写,不允许执行,是Normal Memory部分。
可以看出这两个区间虚拟到物理地址映射是线性映射,但是存在在末尾存在特殊两页不是线性映射。
static void __init map_lowmem(void)
{
struct memblock_region *reg;
phys_addr_t kernel_x_start = round_down(__pa(KERNEL_START), SECTION_SIZE); --------------kernel_x_start = 0x10000000
phys_addr_t kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE); --------------------------kernel_x_end = 0x10f00000
/* Map all the lowmem memory banks. */
for_each_memblock(memory, reg) {
phys_addr_t start = reg->base;
phys_addr_t end = start + reg->size;
struct map_desc map;
if (memblock_is_nomap(reg))
continue;
if (end > arm_lowmem_limit)
end = arm_lowmem_limit;
if (start >= end)
break;
if (end < kernel_x_start) {
map.pfn = __phys_to_pfn(start);
map.virtual = __phys_to_virt(start);
map.length = end - start;
map.type = MT_MEMORY_RWX;
create_mapping(&map);
} else if (start >= kernel_x_end) {
map.pfn = __phys_to_pfn(start);
map.virtual = __phys_to_virt(start);
map.length = end - start;
map.type = MT_MEMORY_RW;
create_mapping(&map);
} else {
/* This better cover the entire kernel */
if (start < kernel_x_start) {
map.pfn = __phys_to_pfn(start);
map.virtual = __phys_to_virt(start);
map.length = kernel_x_start - start;
map.type = MT_MEMORY_RW;
create_mapping(&map);
}
map.pfn = __phys_to_pfn(kernel_x_start);
map.virtual = __phys_to_virt(kernel_x_start);
map.length = kernel_x_end - kernel_x_start;
map.type = MT_MEMORY_RWX;
create_mapping(&map);------------------------------------创建虚拟地址 0x80000000 ~ 0x80f00000 到物理地址 0x10000000 ~ 0x10f00000 的映射关系,属性为 MT_MEMORY_RWX
if (kernel_x_end < end) {
map.pfn = __phys_to_pfn(kernel_x_end);
map.virtual = __phys_to_virt(kernel_x_end);
map.length = end - kernel_x_end;
map.type = MT_MEMORY_RW;
create_mapping(&map);---------------------------------创建虚拟地址 0x80f00000 ~ 0xb0000000 到物理地址 0x10f00000 ~ 0x50000000 的映射关系,属性为 MT_MEMORY_RW
}
}
}
}
内容总结
以上是互联网集市为您收集整理的Linux 内存管理(1)--物理内存初始化全部内容,希望文章能够帮你解决Linux 内存管理(1)--物理内存初始化所遇到的程序开发问题。 如果觉得互联网集市技术教程内容还不错,欢迎将互联网集市网站推荐给程序员好友。
内容备注
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 gblab@vip.qq.com 举报,一经查实,本站将立刻删除。
内容手机端
扫描二维码推送至手机访问。