kvm: x86: Add memcg accounting to KVM allocations

There are many KVM kernel memory allocations which are tied to the life of
the VM process and should be charged to the VM process's cgroup. If the
allocations aren't tied to the process, the OOM killer will not know
that killing the process will free the associated kernel memory.
Add __GFP_ACCOUNT flags to many of the allocations which are not yet being
charged to the VM process's cgroup.

Tested:
	Ran all kvm-unit-tests on a 64 bit Haswell machine, the patch
	introduced no new failures.
	Ran a kernel memory accounting test which creates a VM to touch
	memory and then checks that the kernel memory allocated for the
	process is within certain bounds.
	With this patch we account for much more of the vmalloc and slab memory
	allocated for the VM.

There remain a few allocations which should be charged to the VM's
cgroup but are not. In x86, they include:
	vcpu->arch.pio_data
There allocations are unaccounted in this patch because they are mapped
to userspace, and accounting them to a cgroup causes problems. This
should be addressed in a future patch.

Signed-off-by: Ben Gardon <bgardon@google.com>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
Ben Gardon 2019-02-11 11:02:50 -08:00 committed by Paolo Bonzini
parent b12ce36a43
commit 254272ce65
8 changed files with 21 additions and 18 deletions

View File

@ -1729,7 +1729,7 @@ static int kvm_hv_eventfd_assign(struct kvm *kvm, u32 conn_id, int fd)
mutex_lock(&hv->hv_lock);
ret = idr_alloc(&hv->conn_to_evt, eventfd, conn_id, conn_id + 1,
GFP_KERNEL);
GFP_KERNEL_ACCOUNT);
mutex_unlock(&hv->hv_lock);
if (ret >= 0)

View File

@ -653,7 +653,7 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
pid_t pid_nr;
int ret;
pit = kzalloc(sizeof(struct kvm_pit), GFP_KERNEL);
pit = kzalloc(sizeof(struct kvm_pit), GFP_KERNEL_ACCOUNT);
if (!pit)
return NULL;

View File

@ -583,7 +583,7 @@ int kvm_pic_init(struct kvm *kvm)
struct kvm_pic *s;
int ret;
s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL);
s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL_ACCOUNT);
if (!s)
return -ENOMEM;
spin_lock_init(&s->lock);

View File

@ -622,7 +622,7 @@ int kvm_ioapic_init(struct kvm *kvm)
struct kvm_ioapic *ioapic;
int ret;
ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL);
ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL_ACCOUNT);
if (!ioapic)
return -ENOMEM;
spin_lock_init(&ioapic->lock);

View File

@ -181,7 +181,8 @@ static void recalculate_apic_map(struct kvm *kvm)
max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic));
new = kvzalloc(sizeof(struct kvm_apic_map) +
sizeof(struct kvm_lapic *) * ((u64)max_id + 1), GFP_KERNEL);
sizeof(struct kvm_lapic *) * ((u64)max_id + 1),
GFP_KERNEL_ACCOUNT);
if (!new)
goto out;
@ -2259,13 +2260,13 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
ASSERT(vcpu != NULL);
apic_debug("apic_init %d\n", vcpu->vcpu_id);
apic = kzalloc(sizeof(*apic), GFP_KERNEL);
apic = kzalloc(sizeof(*apic), GFP_KERNEL_ACCOUNT);
if (!apic)
goto nomem;
vcpu->arch.apic = apic;
apic->regs = (void *)get_zeroed_page(GFP_KERNEL);
apic->regs = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
if (!apic->regs) {
printk(KERN_ERR "malloc apic regs error for vcpu %x\n",
vcpu->vcpu_id);

View File

@ -961,7 +961,7 @@ static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
if (cache->nobjs >= min)
return 0;
while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
obj = kmem_cache_zalloc(base_cache, GFP_KERNEL);
obj = kmem_cache_zalloc(base_cache, GFP_KERNEL_ACCOUNT);
if (!obj)
return cache->nobjs >= min ? 0 : -ENOMEM;
cache->objects[cache->nobjs++] = obj;
@ -3702,7 +3702,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
u64 *lm_root;
lm_root = (void*)get_zeroed_page(GFP_KERNEL);
lm_root = (void*)get_zeroed_page(GFP_KERNEL_ACCOUNT);
if (lm_root == NULL)
return 1;
@ -5499,7 +5499,7 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
* Therefore we need to allocate shadow page tables in the first
* 4GB of memory, which happens to fit the DMA32 zone.
*/
page = alloc_page(GFP_KERNEL | __GFP_DMA32);
page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_DMA32);
if (!page)
return -ENOMEM;

View File

@ -42,7 +42,7 @@ int kvm_page_track_create_memslot(struct kvm_memory_slot *slot,
for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) {
slot->arch.gfn_track[i] =
kvcalloc(npages, sizeof(*slot->arch.gfn_track[i]),
GFP_KERNEL);
GFP_KERNEL_ACCOUNT);
if (!slot->arch.gfn_track[i])
goto track_free;
}

View File

@ -3879,7 +3879,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = -EINVAL;
if (!lapic_in_kernel(vcpu))
goto out;
u.lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
u.lapic = kzalloc(sizeof(struct kvm_lapic_state),
GFP_KERNEL_ACCOUNT);
r = -ENOMEM;
if (!u.lapic)
@ -4066,7 +4067,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
break;
}
case KVM_GET_XSAVE: {
u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL);
u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL_ACCOUNT);
r = -ENOMEM;
if (!u.xsave)
break;
@ -4090,7 +4091,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
break;
}
case KVM_GET_XCRS: {
u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL);
u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL_ACCOUNT);
r = -ENOMEM;
if (!u.xcrs)
break;
@ -9040,14 +9041,15 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
static_key_slow_inc(&kvm_no_apic_vcpu);
vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
GFP_KERNEL);
GFP_KERNEL_ACCOUNT);
if (!vcpu->arch.mce_banks) {
r = -ENOMEM;
goto fail_free_lapic;
}
vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) {
if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask,
GFP_KERNEL_ACCOUNT)) {
r = -ENOMEM;
goto fail_free_mce_banks;
}
@ -9306,13 +9308,13 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
slot->arch.rmap[i] =
kvcalloc(lpages, sizeof(*slot->arch.rmap[i]),
GFP_KERNEL);
GFP_KERNEL_ACCOUNT);
if (!slot->arch.rmap[i])
goto out_free;
if (i == 0)
continue;
linfo = kvcalloc(lpages, sizeof(*linfo), GFP_KERNEL);
linfo = kvcalloc(lpages, sizeof(*linfo), GFP_KERNEL_ACCOUNT);
if (!linfo)
goto out_free;