From 762237bb714b0cd93ce2405ccc891fadb405c26e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 17 Mar 2011 07:20:20 +0000 Subject: [PATCH 01/13] drm/i915: Remove surplus POSTING_READs before wait_for_vblank ... as wait_for_vblank (and friends) will do a flush of the MMIO writes anyway. References: https://bugs.freedesktop.org/show_bug.cgi?id=34601 Cc: Jesse Barnes Signed-off-by: Chris Wilson Reviewed-by: Keith Packard Reviewed-by: Jesse Barnes --- drivers/gpu/drm/i915/intel_display.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 3106c0dc8389..3bc6ab56cf8b 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1518,7 +1518,6 @@ static void intel_enable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe, val = I915_READ(reg); val |= PIPECONF_ENABLE; I915_WRITE(reg, val); - POSTING_READ(reg); intel_wait_for_vblank(dev_priv->dev, pipe); } @@ -1554,7 +1553,6 @@ static void intel_disable_pipe(struct drm_i915_private *dev_priv, val = I915_READ(reg); val &= ~PIPECONF_ENABLE; I915_WRITE(reg, val); - POSTING_READ(reg); intel_wait_for_pipe_off(dev_priv->dev, pipe); } @@ -1579,7 +1577,6 @@ static void intel_enable_plane(struct drm_i915_private *dev_priv, val = I915_READ(reg); val |= DISPLAY_PLANE_ENABLE; I915_WRITE(reg, val); - POSTING_READ(reg); intel_wait_for_vblank(dev_priv->dev, pipe); } @@ -1612,7 +1609,6 @@ static void intel_disable_plane(struct drm_i915_private *dev_priv, val = I915_READ(reg); val &= ~DISPLAY_PLANE_ENABLE; I915_WRITE(reg, val); - POSTING_READ(reg); intel_flush_display_plane(dev_priv, plane); intel_wait_for_vblank(dev_priv->dev, pipe); } @@ -1769,7 +1765,6 @@ static void g4x_enable_fbc(struct drm_crtc *crtc, unsigned long interval) return; I915_WRITE(DPFC_CONTROL, dpfc_ctl & ~DPFC_CTL_EN); - POSTING_READ(DPFC_CONTROL); intel_wait_for_vblank(dev, intel_crtc->pipe); } @@ -1861,7 +1856,6 @@ static void ironlake_enable_fbc(struct drm_crtc *crtc, unsigned long interval) return; I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl & ~DPFC_CTL_EN); - POSTING_READ(ILK_DPFC_CONTROL); intel_wait_for_vblank(dev, intel_crtc->pipe); } @@ -5777,7 +5771,6 @@ static void intel_increase_pllclock(struct drm_crtc *crtc) dpll &= ~DISPLAY_RATE_SELECT_FPA1; I915_WRITE(dpll_reg, dpll); - POSTING_READ(dpll_reg); intel_wait_for_vblank(dev, pipe); dpll = I915_READ(dpll_reg); @@ -5821,7 +5814,6 @@ static void intel_decrease_pllclock(struct drm_crtc *crtc) dpll |= DISPLAY_RATE_SELECT_FPA1; I915_WRITE(dpll_reg, dpll); - dpll = I915_READ(dpll_reg); intel_wait_for_vblank(dev, pipe); dpll = I915_READ(dpll_reg); if (!(dpll & DISPLAY_RATE_SELECT_FPA1)) From 00d70b15125030391d17baab2c2f70f93b3339a6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 17 Mar 2011 07:18:29 +0000 Subject: [PATCH 02/13] drm/i915: skip redundant operations whilst enabling pipes and planes If the pipe or plane is already enabled, then we do not need to enable it again and can skip the delay. Similarly if it is already disabled when we want to disable it, we can also skip it. This fixes a regression from b24e717988, which caused the LVDS output on one PineView machine to become corrupt after changing orientation several times. References: https://bugs.freedesktop.org/show_bug.cgi?id=34601 Cc: Jesse Barnes Signed-off-by: Chris Wilson Reviewed-by: Keith Packard Tested-by: mengmeng.meng@intel.com --- drivers/gpu/drm/i915/intel_display.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 3bc6ab56cf8b..841f0397288b 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1516,8 +1516,10 @@ static void intel_enable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe, reg = PIPECONF(pipe); val = I915_READ(reg); - val |= PIPECONF_ENABLE; - I915_WRITE(reg, val); + if (val & PIPECONF_ENABLE) + return; + + I915_WRITE(reg, val | PIPECONF_ENABLE); intel_wait_for_vblank(dev_priv->dev, pipe); } @@ -1551,8 +1553,10 @@ static void intel_disable_pipe(struct drm_i915_private *dev_priv, reg = PIPECONF(pipe); val = I915_READ(reg); - val &= ~PIPECONF_ENABLE; - I915_WRITE(reg, val); + if ((val & PIPECONF_ENABLE) == 0) + return; + + I915_WRITE(reg, val & ~PIPECONF_ENABLE); intel_wait_for_pipe_off(dev_priv->dev, pipe); } @@ -1575,8 +1579,10 @@ static void intel_enable_plane(struct drm_i915_private *dev_priv, reg = DSPCNTR(plane); val = I915_READ(reg); - val |= DISPLAY_PLANE_ENABLE; - I915_WRITE(reg, val); + if (val & DISPLAY_PLANE_ENABLE) + return; + + I915_WRITE(reg, val | DISPLAY_PLANE_ENABLE); intel_wait_for_vblank(dev_priv->dev, pipe); } @@ -1607,8 +1613,10 @@ static void intel_disable_plane(struct drm_i915_private *dev_priv, reg = DSPCNTR(plane); val = I915_READ(reg); - val &= ~DISPLAY_PLANE_ENABLE; - I915_WRITE(reg, val); + if ((val & DISPLAY_PLANE_ENABLE) == 0) + return; + + I915_WRITE(reg, val & ~DISPLAY_PLANE_ENABLE); intel_flush_display_plane(dev_priv, plane); intel_wait_for_vblank(dev_priv->dev, pipe); } From 09bfa51773c1e90f13000dc2fc0c4b84047009bc Mon Sep 17 00:00:00 2001 From: Herton Ronaldo Krzesinski Date: Thu, 17 Mar 2011 13:45:12 +0000 Subject: [PATCH 03/13] drm/i915: Prevent racy removal of request from client list When i915_gem_retire_requests_ring calls i915_gem_request_remove_from_client, the client_list for that request may already be removed in i915_gem_release. So we may call twice list_del(&request->client_list), resulting in an oops like this report: [126167.230394] BUG: unable to handle kernel paging request at 00100104 [126167.230699] IP: [] i915_gem_retire_requests_ring+0xd4/0x240 [i915] [126167.231042] *pdpt = 00000000314c1001 *pde = 0000000000000000 [126167.231314] Oops: 0002 [#1] SMP [126167.231471] last sysfs file: /sys/devices/LNXSYSTM:00/device:00/PNP0C0A:00/power_supply/BAT1/current_now [126167.231901] Modules linked in: snd_seq_dummy nls_utf8 isofs btrfs zlib_deflate libcrc32c ufs qnx4 hfsplus hfs minix ntfs vfat msdos fat jfs xfs exportfs reiserfs cryptd aes_i586 aes_generic binfmt_misc vboxnetadp vboxnetflt vboxdrv parport_pc ppdev snd_hda_codec_hdmi snd_hda_codec_conexant snd_hda_intel snd_hda_codec snd_hwdep arc4 snd_pcm snd_seq_midi snd_rawmidi snd_seq_midi_event snd_seq uvcvideo videodev snd_timer snd_seq_device joydev iwlagn iwlcore mac80211 snd cfg80211 soundcore i915 drm_kms_helper snd_page_alloc psmouse drm serio_raw i2c_algo_bit video lp parport usbhid hid sky2 sdhci_pci ahci sdhci libahci [126167.232018] [126167.232018] Pid: 1101, comm: Xorg Not tainted 2.6.38-6-generic-pae #34-Ubuntu Gateway MC7833U / [126167.232018] EIP: 0060:[] EFLAGS: 00213246 CPU: 0 [126167.232018] EIP is at i915_gem_retire_requests_ring+0xd4/0x240 [i915] [126167.232018] EAX: 00200200 EBX: f1ac25b0 ECX: 00000040 EDX: 00100100 [126167.232018] ESI: f1a2801c EDI: e87fc060 EBP: ef4d7dd8 ESP: ef4d7db0 [126167.232018] DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 [126167.232018] Process Xorg (pid: 1101, ti=ef4d6000 task=f1ba6500 task.ti=ef4d6000) [126167.232018] Stack: [126167.232018] f1a28000 f1a2809c f1a28094 0058bd97 f1aa2400 f1a2801c 0058bd7b 0058bd85 [126167.232018] f1a2801c f1a28000 ef4d7e38 f8c2e995 ef4d7e30 ef4d7e60 c14d1ebc f6b3a040 [126167.232018] f1522cc0 000000db 00000000 f1ba6500 ffffffa1 00000000 00000001 f1a29214 [126167.232018] Call Trace: Unfortunately the call trace reported was cut, but looking at debug symbols the crash is at __list_del, when probably list_del is called twice on the same request->client_list, as the dereferenced value is LIST_POISON1 + 4, and by looking more at the debug symbols before list_del call it should have being called by i915_gem_request_remove_from_client And as I can see in the code, it seems we indeed have the possibility to remove a request->client_list twice, which would cause the above, because we do list_del(&request->client_list) on both i915_gem_request_remove_from_client and i915_gem_release As Chris Wilson pointed out, it's indeed the case: "(...) I had thought that the actual insertion/deletion was serialised under the struct mutex and the intention of the spinlock was to protect the unlocked list traversal during throttling. However, I missed that i915_gem_release() is also called without struct mutex and so we do need the double check for i915_gem_request_remove_from_client()." This change does the required check to avoid the duplicate remove of request->client_list. Bugzilla: http://bugs.launchpad.net/bugs/733780 Cc: stable@kernel.org # 2.6.38 Signed-off-by: Herton Ronaldo Krzesinski Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c4c2855d002d..d4bf061dbaf1 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1796,8 +1796,10 @@ i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) return; spin_lock(&file_priv->mm.lock); - list_del(&request->client_list); - request->file_priv = NULL; + if (request->file_priv) { + list_del(&request->client_list); + request->file_priv = NULL; + } spin_unlock(&file_priv->mm.lock); } From 7ccb4a53eb03c9196646ca0c2a97558313e886f1 Mon Sep 17 00:00:00 2001 From: Yuanhan Liu Date: Fri, 18 Mar 2011 07:37:35 +0000 Subject: [PATCH 04/13] drm/i915: Re-enable self-refresh A broken implementation of is_pot() prevented the detection of when a singular pipe was enabled. Eric Anholt pointed out the existence of is_power_of_2() so use that instead of our broken code! Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=35402 Signed-off-by: Yuanhan Liu Tested-by: xunx.fang@intel.com Reviewed-by: Keith Packard Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_display.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 841f0397288b..49c07231302c 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3885,10 +3885,7 @@ static bool g4x_compute_srwm(struct drm_device *dev, display, cursor); } -static inline bool single_plane_enabled(unsigned int mask) -{ - return mask && (mask & -mask) == 0; -} +#define single_plane_enabled(mask) is_power_of_2(mask) static void g4x_update_wm(struct drm_device *dev) { From 29c5a587284195278e233eec5c2234c24fb2c204 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 17 Mar 2011 15:23:22 +0000 Subject: [PATCH 05/13] drm/i915: Fix tiling corruption from pipelined fencing ... even though it was disabled. A mistake in the handling of fence reuse caused us to skip the vital delay of waiting for the object to finish rendering before changing the register. This resulted in us changing the fence register whilst the bo was active and so causing the blits to complete using the wrong stride or even the wrong tiling. (Visually the effect is that small blocks of the screen look like they have been interlaced). The fix is to wait for the GPU to finish using the memory region pointed to by the fence before changing it. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=34584 Cc: Andy Whitcroft Cc: Daniel Vetter Reviewed-by: Daniel Vetter [Note for 2.6.38-stable, we need to reintroduce the interruptible passing] Signed-off-by: Chris Wilson Tested-by: Dave Airlie --- drivers/gpu/drm/i915/i915_gem.c | 44 +++++++++++++-------------------- 1 file changed, 17 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d4bf061dbaf1..c5dfb59e13d3 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2581,8 +2581,23 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj, reg = &dev_priv->fence_regs[obj->fence_reg]; list_move_tail(®->lru_list, &dev_priv->mm.fence_list); - if (!obj->fenced_gpu_access && !obj->last_fenced_seqno) - pipelined = NULL; + if (obj->tiling_changed) { + ret = i915_gem_object_flush_fence(obj, pipelined); + if (ret) + return ret; + + if (!obj->fenced_gpu_access && !obj->last_fenced_seqno) + pipelined = NULL; + + if (pipelined) { + reg->setup_seqno = + i915_gem_next_request_seqno(pipelined); + obj->last_fenced_seqno = reg->setup_seqno; + obj->last_fenced_ring = pipelined; + } + + goto update; + } if (!pipelined) { if (reg->setup_seqno) { @@ -2601,31 +2616,6 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj, ret = i915_gem_object_flush_fence(obj, pipelined); if (ret) return ret; - } else if (obj->tiling_changed) { - if (obj->fenced_gpu_access) { - if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { - ret = i915_gem_flush_ring(obj->ring, - 0, obj->base.write_domain); - if (ret) - return ret; - } - - obj->fenced_gpu_access = false; - } - } - - if (!obj->fenced_gpu_access && !obj->last_fenced_seqno) - pipelined = NULL; - BUG_ON(!pipelined && reg->setup_seqno); - - if (obj->tiling_changed) { - if (pipelined) { - reg->setup_seqno = - i915_gem_next_request_seqno(pipelined); - obj->last_fenced_seqno = reg->setup_seqno; - obj->last_fenced_ring = pipelined; - } - goto update; } return 0; From 48898b038b69ef4801f0e059026c8f6920684677 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 18 Mar 2011 09:06:49 +0000 Subject: [PATCH 06/13] drm/i915/dp: Correct the order of deletion for ghost eDP devices The order of the calls does matter indeed. Swapping the call order of intel_dp_destroy() and intel_dp_encoder_destroy() fixes the problem. This is because i2c_del_adapter unregisters the device which parent is intel_connector, and connectors are removed in intel_dp_destroy(). Thus intel_dp_encoder_destroy() must be called before intel_dp_destroy(). Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=24822 Signed-off-by: Takashi Iwai Signed-off-by: Chris Wilson Reviewed-by: Keith Packard --- drivers/gpu/drm/i915/intel_dp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index d29e33f815d7..0daefca5cbb8 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1957,9 +1957,9 @@ intel_dp_init(struct drm_device *dev, int output_reg) DP_NO_AUX_HANDSHAKE_LINK_TRAINING; } else { /* if this fails, presume the device is a ghost */ - DRM_ERROR("failed to retrieve link info\n"); - intel_dp_destroy(&intel_connector->base); + DRM_INFO("failed to retrieve link info, disabling eDP\n"); intel_dp_encoder_destroy(&intel_dp->base.base); + intel_dp_destroy(&intel_connector->base); return; } } From e281fcaa287fb39ce26d9aa33a716c2a7bb8484e Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Fri, 18 Mar 2011 10:32:07 -0700 Subject: [PATCH 07/13] drm/i915: report correct render clock frequencies on SNB Fix up the debug file to report the right frequencies. On SNB, we program the PCU with a frequency ratio, which is multiplied by 100MHz on the CPU side. But GFX only runs at half that, so report it as such to avoid confusion. Signed-off-by: Jesse Barnes Signed-off-by: Chris Wilson Reviewed-by: Keith Packard --- drivers/gpu/drm/i915/i915_debugfs.c | 8 ++++---- drivers/gpu/drm/i915/intel_display.c | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 09e0327fc6ce..87c8e29465e3 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -892,7 +892,7 @@ static int i915_cur_delayinfo(struct seq_file *m, void *unused) seq_printf(m, "Render p-state limit: %d\n", rp_state_limits & 0xff); seq_printf(m, "CAGF: %dMHz\n", ((rpstat & GEN6_CAGF_MASK) >> - GEN6_CAGF_SHIFT) * 100); + GEN6_CAGF_SHIFT) * 50); seq_printf(m, "RP CUR UP EI: %dus\n", rpupei & GEN6_CURICONT_MASK); seq_printf(m, "RP CUR UP: %dus\n", rpcurup & @@ -908,15 +908,15 @@ static int i915_cur_delayinfo(struct seq_file *m, void *unused) max_freq = (rp_state_cap & 0xff0000) >> 16; seq_printf(m, "Lowest (RPN) frequency: %dMHz\n", - max_freq * 100); + max_freq * 50); max_freq = (rp_state_cap & 0xff00) >> 8; seq_printf(m, "Nominal (RP1) frequency: %dMHz\n", - max_freq * 100); + max_freq * 50); max_freq = rp_state_cap & 0xff; seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n", - max_freq * 100); + max_freq * 50); __gen6_gt_force_wake_put(dev_priv); } else { diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 49c07231302c..432fc04c6bff 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -6930,7 +6930,7 @@ void gen6_enable_rps(struct drm_i915_private *dev_priv) DRM_ERROR("timeout waiting for pcode mailbox to finish\n"); if (pcu_mbox & (1<<31)) { /* OC supported */ max_freq = pcu_mbox & 0xff; - DRM_DEBUG_DRIVER("overclocking supported, adjusting frequency max to %dMHz\n", pcu_mbox * 100); + DRM_DEBUG_DRIVER("overclocking supported, adjusting frequency max to %dMHz\n", pcu_mbox * 50); } /* In units of 100MHz */ From ed0291fd16f6349ef43d3f25a4626c2f7baf568b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 19 Mar 2011 08:21:45 +0000 Subject: [PATCH 08/13] drm/i915: Fix computation of pitch for dumb bo creator Cc: Dave Airlie Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c5dfb59e13d3..0b99c30bb19c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -224,7 +224,7 @@ i915_gem_dumb_create(struct drm_file *file, struct drm_mode_create_dumb *args) { /* have to work out size/pitch and return them */ - args->pitch = ALIGN(args->width & ((args->bpp + 1) / 8), 64); + args->pitch = ALIGN(args->width * ((args->bpp + 7) / 8), 64); args->size = args->pitch * args->height; return i915_gem_create(file, dev, args->size, &args->handle); From d4aeee776017b6da6dcd12f453cd82a3c951a0dc Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 14 Mar 2011 15:11:24 +0000 Subject: [PATCH 09/13] drm/i915: Disable pagefaults along execbuffer relocation fast path Along the fast path for relocation handling, we attempt to copy directly from the user data structures whilst holding our mutex. This causes lockdep to warn about circular lock dependencies if we need to pagefault the user pages. [Since when handling a page fault on a mmapped bo, we need to acquire the struct mutex whilst already holding the mm semaphore, it is then verboten to acquire the mm semaphore when already holding the struct mutex. The likelihood of the user passing in the relocations contained in a GTT mmaped bo is low, but conceivable for extreme pathology.] In order to force the mm to return EFAULT rather than handle the pagefault, we therefore need to disable pagefaults across the relocation fast path. Signed-off-by: Chris Wilson Cc: stable@kernel.org Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 7ff7f933ddf1..20a4cc5b818f 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -367,6 +367,10 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, uint32_t __iomem *reloc_entry; void __iomem *reloc_page; + /* We can't wait for rendering with pagefaults disabled */ + if (obj->active && in_atomic()) + return -EFAULT; + ret = i915_gem_object_set_to_gtt_domain(obj, 1); if (ret) return ret; @@ -440,15 +444,24 @@ i915_gem_execbuffer_relocate(struct drm_device *dev, struct list_head *objects) { struct drm_i915_gem_object *obj; - int ret; + int ret = 0; + /* This is the fast path and we cannot handle a pagefault whilst + * holding the struct mutex lest the user pass in the relocations + * contained within a mmaped bo. For in such a case we, the page + * fault handler would call i915_gem_fault() and we would try to + * acquire the struct mutex again. Obviously this is bad and so + * lockdep complains vehemently. + */ + pagefault_disable(); list_for_each_entry(obj, objects, exec_list) { ret = i915_gem_execbuffer_relocate_object(obj, eb); if (ret) - return ret; + break; } + pagefault_enable(); - return 0; + return ret; } static int From 36d527deadf7d0c302e3452dde39465e74a65a08 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 19 Mar 2011 22:26:49 +0000 Subject: [PATCH 10/13] drm/i915: Restore missing command flush before interrupt on BLT ring We always skipped flushing the BLT ring if the request flush did not include the RENDER domain. However, this neglects that we try to flush the COMMAND domain after every batch and before the breadcrumb interrupt (to make sure the batch is indeed completed prior to the interrupt firing and so insuring CPU coherency). As a result of the missing flush, incoherency did indeed creep in, most notable when using lots of command buffers and so potentially rewritting an active command buffer (i.e. the GPU was still executing from it even though the following interrupt had already fired and the request/buffer retired). As all ring->flush routines now have the same preconditions, de-duplicate and move those checks up into i915_gem_flush_ring(). Fixes gem_linear_blit. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=35284 Signed-off-by: Chris Wilson Reviewed-by: Daniel Vetter Tested-by: mengmeng.meng@intel.com --- drivers/gpu/drm/i915/i915_gem.c | 7 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 113 +++++++++++------------- 2 files changed, 57 insertions(+), 63 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 0b99c30bb19c..edd6098743b2 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2219,13 +2219,18 @@ i915_gem_flush_ring(struct intel_ring_buffer *ring, { int ret; + if (((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) == 0) + return 0; + trace_i915_gem_ring_flush(ring, invalidate_domains, flush_domains); ret = ring->flush(ring, invalidate_domains, flush_domains); if (ret) return ret; - i915_gem_process_flushing_list(ring, flush_domains); + if (flush_domains & I915_GEM_GPU_DOMAINS) + i915_gem_process_flushing_list(ring, flush_domains); + return 0; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 789c47801ba8..e9e6f71418a4 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -65,62 +65,60 @@ render_ring_flush(struct intel_ring_buffer *ring, u32 cmd; int ret; - if ((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) { + /* + * read/write caches: + * + * I915_GEM_DOMAIN_RENDER is always invalidated, but is + * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is + * also flushed at 2d versus 3d pipeline switches. + * + * read-only caches: + * + * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if + * MI_READ_FLUSH is set, and is always flushed on 965. + * + * I915_GEM_DOMAIN_COMMAND may not exist? + * + * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is + * invalidated when MI_EXE_FLUSH is set. + * + * I915_GEM_DOMAIN_VERTEX, which exists on 965, is + * invalidated with every MI_FLUSH. + * + * TLBs: + * + * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND + * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and + * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER + * are flushed at any MI_FLUSH. + */ + + cmd = MI_FLUSH | MI_NO_WRITE_FLUSH; + if ((invalidate_domains|flush_domains) & + I915_GEM_DOMAIN_RENDER) + cmd &= ~MI_NO_WRITE_FLUSH; + if (INTEL_INFO(dev)->gen < 4) { /* - * read/write caches: - * - * I915_GEM_DOMAIN_RENDER is always invalidated, but is - * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is - * also flushed at 2d versus 3d pipeline switches. - * - * read-only caches: - * - * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if - * MI_READ_FLUSH is set, and is always flushed on 965. - * - * I915_GEM_DOMAIN_COMMAND may not exist? - * - * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is - * invalidated when MI_EXE_FLUSH is set. - * - * I915_GEM_DOMAIN_VERTEX, which exists on 965, is - * invalidated with every MI_FLUSH. - * - * TLBs: - * - * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND - * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and - * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER - * are flushed at any MI_FLUSH. + * On the 965, the sampler cache always gets flushed + * and this bit is reserved. */ - - cmd = MI_FLUSH | MI_NO_WRITE_FLUSH; - if ((invalidate_domains|flush_domains) & - I915_GEM_DOMAIN_RENDER) - cmd &= ~MI_NO_WRITE_FLUSH; - if (INTEL_INFO(dev)->gen < 4) { - /* - * On the 965, the sampler cache always gets flushed - * and this bit is reserved. - */ - if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER) - cmd |= MI_READ_FLUSH; - } - if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION) - cmd |= MI_EXE_FLUSH; - - if (invalidate_domains & I915_GEM_DOMAIN_COMMAND && - (IS_G4X(dev) || IS_GEN5(dev))) - cmd |= MI_INVALIDATE_ISP; - - ret = intel_ring_begin(ring, 2); - if (ret) - return ret; - - intel_ring_emit(ring, cmd); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER) + cmd |= MI_READ_FLUSH; } + if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION) + cmd |= MI_EXE_FLUSH; + + if (invalidate_domains & I915_GEM_DOMAIN_COMMAND && + (IS_G4X(dev) || IS_GEN5(dev))) + cmd |= MI_INVALIDATE_ISP; + + ret = intel_ring_begin(ring, 2); + if (ret) + return ret; + + intel_ring_emit(ring, cmd); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); return 0; } @@ -568,9 +566,6 @@ bsd_ring_flush(struct intel_ring_buffer *ring, { int ret; - if ((flush_domains & I915_GEM_DOMAIN_RENDER) == 0) - return 0; - ret = intel_ring_begin(ring, 2); if (ret) return ret; @@ -1056,9 +1051,6 @@ static int gen6_ring_flush(struct intel_ring_buffer *ring, uint32_t cmd; int ret; - if (((invalidate | flush) & I915_GEM_GPU_DOMAINS) == 0) - return 0; - ret = intel_ring_begin(ring, 4); if (ret) return ret; @@ -1230,9 +1222,6 @@ static int blt_ring_flush(struct intel_ring_buffer *ring, uint32_t cmd; int ret; - if (((invalidate | flush) & I915_GEM_DOMAIN_RENDER) == 0) - return 0; - ret = blt_ring_begin(ring, 4); if (ret) return ret; From 26e12f894317bf0221fed40bef6f937538a78c0b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 20 Mar 2011 11:20:19 +0000 Subject: [PATCH 11/13] drm/i915: Fix use after free within tracepoint Detected by scripts/coccinelle/free/kfree.cocci. Signed-off-by: Chris Wilson Reviewed-by: Keith Packard --- drivers/gpu/drm/i915/i915_gem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index edd6098743b2..c9bdaf208a91 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3603,6 +3603,8 @@ static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj) return; } + trace_i915_gem_object_destroy(obj); + if (obj->base.map_list.map) i915_gem_free_mmap_offset(obj); @@ -3612,8 +3614,6 @@ static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj) kfree(obj->page_cpu_valid); kfree(obj->bit_17); kfree(obj); - - trace_i915_gem_object_destroy(obj); } void i915_gem_free_object(struct drm_gem_object *gem_obj) From f6e47884e7f588094bf7b824c839a9ee33f2aa55 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 20 Mar 2011 21:09:12 +0000 Subject: [PATCH 12/13] drm/i915: Avoid unmapping pages from a NULL address space Found by gem_stress. As we perform retirement from a workqueue, it is possible for us to free and unbind objects after the last close on the device, and so after the address space has been torn down and reset to NULL: BUG: unable to handle kernel NULL pointer dereference at 00000054 IP: [] mutex_lock+0xf/0x27 *pde = 00000000 Oops: 0002 [#1] SMP last sysfs file: /sys/module/vt/parameters/default_utf8 Pid: 5, comm: kworker/u:0 Not tainted 2.6.38+ #214 EIP: 0060:[] EFLAGS: 00010206 CPU: 1 EIP is at mutex_lock+0xf/0x27 EAX: 00000054 EBX: 00000054 ECX: 00000000 EDX: 00012fff ESI: 00000028 EDI: 00000000 EBP: f706fe20 ESP: f706fe18 DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 Process kworker/u:0 (pid: 5, ti=f706e000 task=f7060d00 task.ti=f706e000) Stack: f5aa3c60 00000000 f706fe74 c107e7df 00000246 dea55380 00000054 f5aa3c60 f706fe44 00000061 f70b4000 c13fff84 00000008 f706fe54 00000000 00000000 00012f00 00012fff 00000028 c109e575 f6b36700 00100000 00000000 f706fe90 Call Trace: [] unmap_mapping_range+0x7d/0x1e6 [] ? mntput_no_expire+0x52/0xb6 [] i915_gem_release_mmap+0x49/0x58 [] i915_gem_object_unbind+0x4c/0x125 [] i915_gem_free_object_tail+0x1d/0xdb [] i915_gem_free_object+0x3d/0x41 [] ? drm_gem_object_free+0x0/0x27 [] drm_gem_object_free+0x25/0x27 [] kref_put+0x39/0x42 [] drm_gem_object_unreference+0x16/0x18 [] i915_gem_object_move_to_inactive+0xba/0xbe [] i915_gem_retire_requests_ring+0x16e/0x1a5 [] i915_gem_retire_requests+0x48/0x63 [] i915_gem_retire_work_handler+0x4c/0x117 [] process_one_work+0x140/0x21b [] ? __need_more_worker+0x13/0x2a [] ? need_to_create_worker+0x1c/0x35 [] ? i915_gem_retire_work_handler+0x0/0x117 [] worker_thread+0xd4/0x14b [] ? worker_thread+0x0/0x14b [] kthread+0x68/0x6d [] ? kthread+0x0/0x6d [] kernel_thread_helper+0x6/0x10 Code: 00 e8 98 fe ff ff 5d c3 55 89 e5 3e 8d 74 26 00 ba 01 00 00 00 e8 84 fe ff ff 5d c3 55 89 e5 53 8d 64 24 fc 3e 8d 74 26 00 89 c3 ff 08 79 05 e8 ab ff ff ff 89 e0 25 00 e0 ff ff 89 43 10 58 EIP: [] mutex_lock+0xf/0x27 SS:ESP 0068:f706fe18 CR2: 0000000000000054 Signed-off-by: Chris Wilson Reviewed-by: Keith Packard --- drivers/gpu/drm/i915/i915_gem.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c9bdaf208a91..7ce3f353af33 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1356,9 +1356,10 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj) if (!obj->fault_mappable) return; - unmap_mapping_range(obj->base.dev->dev_mapping, - (loff_t)obj->base.map_list.hash.key<base.size, 1); + if (obj->base.dev->dev_mapping) + unmap_mapping_range(obj->base.dev->dev_mapping, + (loff_t)obj->base.map_list.hash.key<base.size, 1); obj->fault_mappable = false; } From f0c860246472248a534656d6cdbed5a36d1feb2e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 23 Mar 2011 17:53:28 +0000 Subject: [PATCH 13/13] Revert "drm/i915: Don't save/restore hardware status page address register" This reverts commit a7a75c8f70d6f6a2f16c9f627f938bbee2d32718. There are two different variations on how Intel hardware addresses the "Hardware Status Page". One as a location in physical memory and the other as an offset into the virtual memory of the GPU, used in more recent chipsets. (The HWS itself is a cacheable region of memory which the GPU can write to without requiring CPU synchronisation, used for updating various details of hardware state, such as the position of the GPU head in the ringbuffer, the last breadcrumb seqno, etc). These two types of addresses were updated in different locations of code - one inline with the ringbuffer initialisation, and the other during device initialisation. (The HWS page is logically associated with the rings, and there is one HWS page per ring.) During resume, only the ringbuffers were being re-initialised along with the virtual HWS page, leaving the older physical address HWS untouched. This then caused a hang on the older gen3/4 (915GM, 945GM, 965GM) the first time we tried to synchronise the GPU as the breadcrumbs were never being updated. Reported-and-tested-by: Linus Torvalds Reported-by: Jan Niehusmann Reported-and-tested-by: Justin P. Mattock Reported-and-tested-by: Michael "brot" Groh Cc: Zhenyu Wang Signed-off-by: Chris Wilson Acked-by: Zhenyu Wang --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_suspend.c | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 449650545bb4..5004724ea57e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -383,6 +383,7 @@ typedef struct drm_i915_private { u32 saveDSPACNTR; u32 saveDSPBCNTR; u32 saveDSPARB; + u32 saveHWS; u32 savePIPEACONF; u32 savePIPEBCONF; u32 savePIPEASRC; diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c index 7e992a8e9098..da474153a0a2 100644 --- a/drivers/gpu/drm/i915/i915_suspend.c +++ b/drivers/gpu/drm/i915/i915_suspend.c @@ -796,6 +796,9 @@ int i915_save_state(struct drm_device *dev) pci_read_config_byte(dev->pdev, LBB, &dev_priv->saveLBB); + /* Hardware status page */ + dev_priv->saveHWS = I915_READ(HWS_PGA); + i915_save_display(dev); /* Interrupt state */ @@ -842,6 +845,9 @@ int i915_restore_state(struct drm_device *dev) pci_write_config_byte(dev->pdev, LBB, dev_priv->saveLBB); + /* Hardware status page */ + I915_WRITE(HWS_PGA, dev_priv->saveHWS); + i915_restore_display(dev); /* Interrupt state */