From abc47ff61d3fae19d967d09876f1ced193dfcad2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 6 Oct 2019 17:49:53 +0100 Subject: [PATCH 001/159] drm/i915/gt: Restore dropped 'interruptible' flag Lost in the rebasing was Tvrtko's reminder that we need to keep an uninterruptible wait around for the Ironlake VT-d w/a Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191006165002.30312-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_gt_requests.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c index 8aed89fd2cdc..d69e78478eea 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c @@ -48,7 +48,7 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) fence = i915_active_fence_get(&tl->last_request); if (fence) { timeout = dma_fence_wait_timeout(fence, - true, + interruptible, timeout); dma_fence_put(fence); } From 1d0f2ebf392ef11465cefb29d545ed9e3222b29e Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Mon, 9 Sep 2019 12:31:09 +0300 Subject: [PATCH 002/159] drm/i915/perf: move perf types to their own header Following a pattern used throughout the driver. Signed-off-by: Lionel Landwerlin Reviewed-by: Umesh Nerlige Ramappa Link: https://patchwork.freedesktop.org/patch/msgid/20190909093116.7747-7-lionel.g.landwerlin@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 300 +---------------------- drivers/gpu/drm/i915/i915_perf.h | 2 + drivers/gpu/drm/i915/i915_perf_types.h | 327 +++++++++++++++++++++++++ 3 files changed, 330 insertions(+), 299 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_perf_types.h diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 1da67b242113..1b8fd3a38c66 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -93,6 +93,7 @@ #include "i915_gem_fence_reg.h" #include "i915_gem_gtt.h" #include "i915_gpu_error.h" +#include "i915_perf_types.h" #include "i915_request.h" #include "i915_scheduler.h" #include "gt/intel_timeline.h" @@ -974,305 +975,6 @@ struct intel_wm_config { bool sprites_scaled; }; -struct i915_oa_format { - u32 format; - int size; -}; - -struct i915_oa_reg { - i915_reg_t addr; - u32 value; -}; - -struct i915_oa_config { - char uuid[UUID_STRING_LEN + 1]; - int id; - - const struct i915_oa_reg *mux_regs; - u32 mux_regs_len; - const struct i915_oa_reg *b_counter_regs; - u32 b_counter_regs_len; - const struct i915_oa_reg *flex_regs; - u32 flex_regs_len; - - struct attribute_group sysfs_metric; - struct attribute *attrs[2]; - struct device_attribute sysfs_metric_id; - - atomic_t ref_count; -}; - -struct i915_perf_stream; - -/** - * struct i915_perf_stream_ops - the OPs to support a specific stream type - */ -struct i915_perf_stream_ops { - /** - * @enable: Enables the collection of HW samples, either in response to - * `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened - * without `I915_PERF_FLAG_DISABLED`. - */ - void (*enable)(struct i915_perf_stream *stream); - - /** - * @disable: Disables the collection of HW samples, either in response - * to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying - * the stream. - */ - void (*disable)(struct i915_perf_stream *stream); - - /** - * @poll_wait: Call poll_wait, passing a wait queue that will be woken - * once there is something ready to read() for the stream - */ - void (*poll_wait)(struct i915_perf_stream *stream, - struct file *file, - poll_table *wait); - - /** - * @wait_unlocked: For handling a blocking read, wait until there is - * something to ready to read() for the stream. E.g. wait on the same - * wait queue that would be passed to poll_wait(). - */ - int (*wait_unlocked)(struct i915_perf_stream *stream); - - /** - * @read: Copy buffered metrics as records to userspace - * **buf**: the userspace, destination buffer - * **count**: the number of bytes to copy, requested by userspace - * **offset**: zero at the start of the read, updated as the read - * proceeds, it represents how many bytes have been copied so far and - * the buffer offset for copying the next record. - * - * Copy as many buffered i915 perf samples and records for this stream - * to userspace as will fit in the given buffer. - * - * Only write complete records; returning -%ENOSPC if there isn't room - * for a complete record. - * - * Return any error condition that results in a short read such as - * -%ENOSPC or -%EFAULT, even though these may be squashed before - * returning to userspace. - */ - int (*read)(struct i915_perf_stream *stream, - char __user *buf, - size_t count, - size_t *offset); - - /** - * @destroy: Cleanup any stream specific resources. - * - * The stream will always be disabled before this is called. - */ - void (*destroy)(struct i915_perf_stream *stream); -}; - -/** - * struct i915_perf_stream - state for a single open stream FD - */ -struct i915_perf_stream { - /** - * @dev_priv: i915 drm device - */ - struct drm_i915_private *dev_priv; - - /** - * @link: Links the stream into ``&drm_i915_private->streams`` - */ - struct list_head link; - - /** - * @wakeref: As we keep the device awake while the perf stream is - * active, we track our runtime pm reference for later release. - */ - intel_wakeref_t wakeref; - - /** - * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*` - * properties given when opening a stream, representing the contents - * of a single sample as read() by userspace. - */ - u32 sample_flags; - - /** - * @sample_size: Considering the configured contents of a sample - * combined with the required header size, this is the total size - * of a single sample record. - */ - int sample_size; - - /** - * @ctx: %NULL if measuring system-wide across all contexts or a - * specific context that is being monitored. - */ - struct i915_gem_context *ctx; - - /** - * @enabled: Whether the stream is currently enabled, considering - * whether the stream was opened in a disabled state and based - * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls. - */ - bool enabled; - - /** - * @ops: The callbacks providing the implementation of this specific - * type of configured stream. - */ - const struct i915_perf_stream_ops *ops; - - /** - * @oa_config: The OA configuration used by the stream. - */ - struct i915_oa_config *oa_config; - - /** - * @pinned_ctx: The OA context specific information. - */ - struct intel_context *pinned_ctx; - u32 specific_ctx_id; - u32 specific_ctx_id_mask; - - struct hrtimer poll_check_timer; - wait_queue_head_t poll_wq; - bool pollin; - - bool periodic; - int period_exponent; - - /** - * @oa_buffer: State of the OA buffer. - */ - struct { - struct i915_vma *vma; - u8 *vaddr; - u32 last_ctx_id; - int format; - int format_size; - int size_exponent; - - /** - * @ptr_lock: Locks reads and writes to all head/tail state - * - * Consider: the head and tail pointer state needs to be read - * consistently from a hrtimer callback (atomic context) and - * read() fop (user context) with tail pointer updates happening - * in atomic context and head updates in user context and the - * (unlikely) possibility of read() errors needing to reset all - * head/tail state. - * - * Note: Contention/performance aren't currently a significant - * concern here considering the relatively low frequency of - * hrtimer callbacks (5ms period) and that reads typically only - * happen in response to a hrtimer event and likely complete - * before the next callback. - * - * Note: This lock is not held *while* reading and copying data - * to userspace so the value of head observed in htrimer - * callbacks won't represent any partial consumption of data. - */ - spinlock_t ptr_lock; - - /** - * @tails: One 'aging' tail pointer and one 'aged' tail pointer ready to - * used for reading. - * - * Initial values of 0xffffffff are invalid and imply that an - * update is required (and should be ignored by an attempted - * read) - */ - struct { - u32 offset; - } tails[2]; - - /** - * @aged_tail_idx: Index for the aged tail ready to read() data up to. - */ - unsigned int aged_tail_idx; - - /** - * @aging_timestamp: A monotonic timestamp for when the current aging tail pointer - * was read; used to determine when it is old enough to trust. - */ - u64 aging_timestamp; - - /** - * @head: Although we can always read back the head pointer register, - * we prefer to avoid trusting the HW state, just to avoid any - * risk that some hardware condition could * somehow bump the - * head pointer unpredictably and cause us to forward the wrong - * OA buffer data to userspace. - */ - u32 head; - } oa_buffer; -}; - -/** - * struct i915_oa_ops - Gen specific implementation of an OA unit stream - */ -struct i915_oa_ops { - /** - * @is_valid_b_counter_reg: Validates register's address for - * programming boolean counters for a particular platform. - */ - bool (*is_valid_b_counter_reg)(struct drm_i915_private *dev_priv, - u32 addr); - - /** - * @is_valid_mux_reg: Validates register's address for programming mux - * for a particular platform. - */ - bool (*is_valid_mux_reg)(struct drm_i915_private *dev_priv, u32 addr); - - /** - * @is_valid_flex_reg: Validates register's address for programming - * flex EU filtering for a particular platform. - */ - bool (*is_valid_flex_reg)(struct drm_i915_private *dev_priv, u32 addr); - - /** - * @enable_metric_set: Selects and applies any MUX configuration to set - * up the Boolean and Custom (B/C) counters that are part of the - * counter reports being sampled. May apply system constraints such as - * disabling EU clock gating as required. - */ - int (*enable_metric_set)(struct i915_perf_stream *stream); - - /** - * @disable_metric_set: Remove system constraints associated with using - * the OA unit. - */ - void (*disable_metric_set)(struct i915_perf_stream *stream); - - /** - * @oa_enable: Enable periodic sampling - */ - void (*oa_enable)(struct i915_perf_stream *stream); - - /** - * @oa_disable: Disable periodic sampling - */ - void (*oa_disable)(struct i915_perf_stream *stream); - - /** - * @read: Copy data from the circular OA buffer into a given userspace - * buffer. - */ - int (*read)(struct i915_perf_stream *stream, - char __user *buf, - size_t count, - size_t *offset); - - /** - * @oa_hw_tail_read: read the OA tail pointer register - * - * In particular this enables us to share all the fiddly code for - * handling the OA unit tail pointer race that affects multiple - * generations. - */ - u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream); -}; - struct intel_cdclk_state { unsigned int cdclk, vco, ref, bypass; u8 voltage_level; diff --git a/drivers/gpu/drm/i915/i915_perf.h b/drivers/gpu/drm/i915/i915_perf.h index f4fb311184b1..ff412fb0dbbf 100644 --- a/drivers/gpu/drm/i915/i915_perf.h +++ b/drivers/gpu/drm/i915/i915_perf.h @@ -8,6 +8,8 @@ #include +#include "i915_perf_types.h" + struct drm_device; struct drm_file; struct drm_i915_private; diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h new file mode 100644 index 000000000000..edcab2df74fb --- /dev/null +++ b/drivers/gpu/drm/i915/i915_perf_types.h @@ -0,0 +1,327 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef _I915_PERF_TYPES_H_ +#define _I915_PERF_TYPES_H_ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "i915_reg.h" +#include "intel_wakeref.h" + +struct drm_i915_private; +struct file; +struct i915_gem_context; +struct i915_vma; +struct intel_context; +struct intel_engine_cs; + +struct i915_oa_format { + u32 format; + int size; +}; + +struct i915_oa_reg { + i915_reg_t addr; + u32 value; +}; + +struct i915_oa_config { + char uuid[UUID_STRING_LEN + 1]; + int id; + + const struct i915_oa_reg *mux_regs; + u32 mux_regs_len; + const struct i915_oa_reg *b_counter_regs; + u32 b_counter_regs_len; + const struct i915_oa_reg *flex_regs; + u32 flex_regs_len; + + struct attribute_group sysfs_metric; + struct attribute *attrs[2]; + struct device_attribute sysfs_metric_id; + + atomic_t ref_count; +}; + +struct i915_perf_stream; + +/** + * struct i915_perf_stream_ops - the OPs to support a specific stream type + */ +struct i915_perf_stream_ops { + /** + * @enable: Enables the collection of HW samples, either in response to + * `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened + * without `I915_PERF_FLAG_DISABLED`. + */ + void (*enable)(struct i915_perf_stream *stream); + + /** + * @disable: Disables the collection of HW samples, either in response + * to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying + * the stream. + */ + void (*disable)(struct i915_perf_stream *stream); + + /** + * @poll_wait: Call poll_wait, passing a wait queue that will be woken + * once there is something ready to read() for the stream + */ + void (*poll_wait)(struct i915_perf_stream *stream, + struct file *file, + poll_table *wait); + + /** + * @wait_unlocked: For handling a blocking read, wait until there is + * something to ready to read() for the stream. E.g. wait on the same + * wait queue that would be passed to poll_wait(). + */ + int (*wait_unlocked)(struct i915_perf_stream *stream); + + /** + * @read: Copy buffered metrics as records to userspace + * **buf**: the userspace, destination buffer + * **count**: the number of bytes to copy, requested by userspace + * **offset**: zero at the start of the read, updated as the read + * proceeds, it represents how many bytes have been copied so far and + * the buffer offset for copying the next record. + * + * Copy as many buffered i915 perf samples and records for this stream + * to userspace as will fit in the given buffer. + * + * Only write complete records; returning -%ENOSPC if there isn't room + * for a complete record. + * + * Return any error condition that results in a short read such as + * -%ENOSPC or -%EFAULT, even though these may be squashed before + * returning to userspace. + */ + int (*read)(struct i915_perf_stream *stream, + char __user *buf, + size_t count, + size_t *offset); + + /** + * @destroy: Cleanup any stream specific resources. + * + * The stream will always be disabled before this is called. + */ + void (*destroy)(struct i915_perf_stream *stream); +}; + +/** + * struct i915_perf_stream - state for a single open stream FD + */ +struct i915_perf_stream { + /** + * @dev_priv: i915 drm device + */ + struct drm_i915_private *dev_priv; + + /** + * @link: Links the stream into ``&drm_i915_private->streams`` + */ + struct list_head link; + + /** + * @wakeref: As we keep the device awake while the perf stream is + * active, we track our runtime pm reference for later release. + */ + intel_wakeref_t wakeref; + + /** + * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*` + * properties given when opening a stream, representing the contents + * of a single sample as read() by userspace. + */ + u32 sample_flags; + + /** + * @sample_size: Considering the configured contents of a sample + * combined with the required header size, this is the total size + * of a single sample record. + */ + int sample_size; + + /** + * @ctx: %NULL if measuring system-wide across all contexts or a + * specific context that is being monitored. + */ + struct i915_gem_context *ctx; + + /** + * @enabled: Whether the stream is currently enabled, considering + * whether the stream was opened in a disabled state and based + * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls. + */ + bool enabled; + + /** + * @ops: The callbacks providing the implementation of this specific + * type of configured stream. + */ + const struct i915_perf_stream_ops *ops; + + /** + * @oa_config: The OA configuration used by the stream. + */ + struct i915_oa_config *oa_config; + + /** + * @pinned_ctx: The OA context specific information. + */ + struct intel_context *pinned_ctx; + u32 specific_ctx_id; + u32 specific_ctx_id_mask; + + struct hrtimer poll_check_timer; + wait_queue_head_t poll_wq; + bool pollin; + + bool periodic; + int period_exponent; + + /** + * @oa_buffer: State of the OA buffer. + */ + struct { + struct i915_vma *vma; + u8 *vaddr; + u32 last_ctx_id; + int format; + int format_size; + int size_exponent; + + /** + * @ptr_lock: Locks reads and writes to all head/tail state + * + * Consider: the head and tail pointer state needs to be read + * consistently from a hrtimer callback (atomic context) and + * read() fop (user context) with tail pointer updates happening + * in atomic context and head updates in user context and the + * (unlikely) possibility of read() errors needing to reset all + * head/tail state. + * + * Note: Contention/performance aren't currently a significant + * concern here considering the relatively low frequency of + * hrtimer callbacks (5ms period) and that reads typically only + * happen in response to a hrtimer event and likely complete + * before the next callback. + * + * Note: This lock is not held *while* reading and copying data + * to userspace so the value of head observed in htrimer + * callbacks won't represent any partial consumption of data. + */ + spinlock_t ptr_lock; + + /** + * @tails: One 'aging' tail pointer and one 'aged' tail pointer ready to + * used for reading. + * + * Initial values of 0xffffffff are invalid and imply that an + * update is required (and should be ignored by an attempted + * read) + */ + struct { + u32 offset; + } tails[2]; + + /** + * @aged_tail_idx: Index for the aged tail ready to read() data up to. + */ + unsigned int aged_tail_idx; + + /** + * @aging_timestamp: A monotonic timestamp for when the current aging tail pointer + * was read; used to determine when it is old enough to trust. + */ + u64 aging_timestamp; + + /** + * @head: Although we can always read back the head pointer register, + * we prefer to avoid trusting the HW state, just to avoid any + * risk that some hardware condition could * somehow bump the + * head pointer unpredictably and cause us to forward the wrong + * OA buffer data to userspace. + */ + u32 head; + } oa_buffer; +}; + +/** + * struct i915_oa_ops - Gen specific implementation of an OA unit stream + */ +struct i915_oa_ops { + /** + * @is_valid_b_counter_reg: Validates register's address for + * programming boolean counters for a particular platform. + */ + bool (*is_valid_b_counter_reg)(struct drm_i915_private *dev_priv, + u32 addr); + + /** + * @is_valid_mux_reg: Validates register's address for programming mux + * for a particular platform. + */ + bool (*is_valid_mux_reg)(struct drm_i915_private *dev_priv, u32 addr); + + /** + * @is_valid_flex_reg: Validates register's address for programming + * flex EU filtering for a particular platform. + */ + bool (*is_valid_flex_reg)(struct drm_i915_private *dev_priv, u32 addr); + + /** + * @enable_metric_set: Selects and applies any MUX configuration to set + * up the Boolean and Custom (B/C) counters that are part of the + * counter reports being sampled. May apply system constraints such as + * disabling EU clock gating as required. + */ + int (*enable_metric_set)(struct i915_perf_stream *stream); + + /** + * @disable_metric_set: Remove system constraints associated with using + * the OA unit. + */ + void (*disable_metric_set)(struct i915_perf_stream *stream); + + /** + * @oa_enable: Enable periodic sampling + */ + void (*oa_enable)(struct i915_perf_stream *stream); + + /** + * @oa_disable: Disable periodic sampling + */ + void (*oa_disable)(struct i915_perf_stream *stream); + + /** + * @read: Copy data from the circular OA buffer into a given userspace + * buffer. + */ + int (*read)(struct i915_perf_stream *stream, + char __user *buf, + size_t count, + size_t *offset); + + /** + * @oa_hw_tail_read: read the OA tail pointer register + * + * In particular this enables us to share all the fiddly code for + * handling the OA unit tail pointer race that affects multiple + * generations. + */ + u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream); +}; + +#endif /* _I915_PERF_TYPES_H_ */ From 3aec2c6a4886276bcbc9ace9416ffae41643702c Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 2 Oct 2019 12:22:58 -0700 Subject: [PATCH 003/159] drm/i915/vbt: Child device size remains unchanged through VBT 229 The latest documented version of the VBT is 229, but no further data has been added to the child device definition in block 2. Update the child device version test to eliminate the "Expected child device config size for VBT version XXX not known; assuming 39" debug messages from the logs. Bspec: 20124 Bspec: 20157 Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20191002192258.1013-1-matthew.d.roper@intel.com Reviewed-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_bios.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index 1def550c68c8..9628b485b179 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -1615,7 +1615,7 @@ parse_general_definitions(struct drm_i915_private *dev_priv, expected_size = 37; } else if (bdb->version <= 215) { expected_size = 38; - } else if (bdb->version <= 216) { + } else if (bdb->version <= 229) { expected_size = 39; } else { expected_size = sizeof(*child); From b9dcb97b6c8076af3469c62feb1174ef525b81f7 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 7 Oct 2019 16:41:51 +0100 Subject: [PATCH 004/159] drm/i915: make array hw_engine_mask static, makes object smaller Don't populate the array hw_engine_mask on the stack but instead make it static. Makes the object code smaller by 316 bytes. Before: text data bss dec hex filename 34004 4388 320 38712 9738 gpu/drm/i915/gt/intel_reset.o After: text data bss dec hex filename 33528 4548 320 38396 95fc gpu/drm/i915/gt/intel_reset.o (gcc version 9.2.1, amd64) Signed-off-by: Colin Ian King Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191007154151.23245-1-colin.king@canonical.com --- drivers/gpu/drm/i915/gt/intel_reset.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 7b3d9d4517a0..ae8f9cef6b13 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -282,14 +282,14 @@ static int gen6_reset_engines(struct intel_gt *gt, intel_engine_mask_t engine_mask, unsigned int retry) { - struct intel_engine_cs *engine; - const u32 hw_engine_mask[] = { + static const u32 hw_engine_mask[] = { [RCS0] = GEN6_GRDOM_RENDER, [BCS0] = GEN6_GRDOM_BLT, [VCS0] = GEN6_GRDOM_MEDIA, [VCS1] = GEN8_GRDOM_MEDIA2, [VECS0] = GEN6_GRDOM_VECS, }; + struct intel_engine_cs *engine; u32 hw_mask; if (engine_mask == ALL_ENGINES) { @@ -413,7 +413,7 @@ static int gen11_reset_engines(struct intel_gt *gt, intel_engine_mask_t engine_mask, unsigned int retry) { - const u32 hw_engine_mask[] = { + static const u32 hw_engine_mask[] = { [RCS0] = GEN11_GRDOM_RENDER, [BCS0] = GEN11_GRDOM_BLT, [VCS0] = GEN11_GRDOM_MEDIA, From cd6a851385be878f086204aa0e7cd3bec9ea909f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 7 Oct 2019 16:45:31 +0100 Subject: [PATCH 005/159] drm/i915/gt: Prefer local path to runtime powermanagement Avoid going to the base i915 device when we already have a path from gt to the runtime powermanagement interface. The benefit is that it looks a bit more self-consistent to always be acquiring the gt->uncore->rpm for use with the gt->uncore. Signed-off-by: Chris Wilson Cc: Daniele Ceraolo Spurio Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191007154531.1750-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_context.c | 2 +- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 4 ++-- drivers/gpu/drm/i915/gt/intel_engine_pm.c | 2 +- drivers/gpu/drm/i915/gt/intel_gt.c | 7 +++---- drivers/gpu/drm/i915/gt/intel_gt_pm.c | 4 ++-- drivers/gpu/drm/i915/gt/intel_hangcheck.c | 4 ++-- drivers/gpu/drm/i915/gt/intel_reset.c | 6 +++--- drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 4 ++-- drivers/gpu/drm/i915/gt/selftest_reset.c | 8 ++++---- drivers/gpu/drm/i915/gt/selftest_workarounds.c | 4 ++-- drivers/gpu/drm/i915/gt/uc/intel_guc_log.c | 3 +-- drivers/gpu/drm/i915/gt/uc/intel_huc.c | 2 +- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 2 +- drivers/gpu/drm/i915/i915_irq.c | 6 +++--- 14 files changed, 28 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index be34d97ac18f..59c3083c1ec1 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -62,7 +62,7 @@ int __intel_context_do_pin(struct intel_context *ce) } err = 0; - with_intel_runtime_pm(&ce->engine->i915->runtime_pm, wakeref) + with_intel_runtime_pm(ce->engine->uncore->rpm, wakeref) err = ce->ops->pin(ce); if (err) goto err; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 80fd072ac719..6220b7151bb9 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1458,10 +1458,10 @@ void intel_engine_dump(struct intel_engine_cs *engine, spin_unlock_irqrestore(&engine->active.lock, flags); drm_printf(m, "\tMMIO base: 0x%08x\n", engine->mmio_base); - wakeref = intel_runtime_pm_get_if_in_use(&engine->i915->runtime_pm); + wakeref = intel_runtime_pm_get_if_in_use(engine->uncore->rpm); if (wakeref) { intel_engine_print_registers(engine, m); - intel_runtime_pm_put(&engine->i915->runtime_pm, wakeref); + intel_runtime_pm_put(engine->uncore->rpm, wakeref); } else { drm_printf(m, "\tDevice is asleep; skipping register dump\n"); } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 8e5e513eddc9..67eb6183648a 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -185,7 +185,7 @@ static const struct intel_wakeref_ops wf_ops = { void intel_engine_init__pm(struct intel_engine_cs *engine) { - struct intel_runtime_pm *rpm = &engine->i915->runtime_pm; + struct intel_runtime_pm *rpm = engine->uncore->rpm; intel_wakeref_init(&engine->wakeref, rpm, &wf_ops); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 8f44cf8c79b2..b3619a2a5d0e 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -273,7 +273,7 @@ void intel_gt_check_and_clear_faults(struct intel_gt *gt) void intel_gt_flush_ggtt_writes(struct intel_gt *gt) { - struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore = gt->uncore; intel_wakeref_t wakeref; /* @@ -297,13 +297,12 @@ void intel_gt_flush_ggtt_writes(struct intel_gt *gt) wmb(); - if (INTEL_INFO(i915)->has_coherent_ggtt) + if (INTEL_INFO(gt->i915)->has_coherent_ggtt) return; intel_gt_chipset_flush(gt); - with_intel_runtime_pm(&i915->runtime_pm, wakeref) { - struct intel_uncore *uncore = gt->uncore; + with_intel_runtime_pm(uncore->rpm, wakeref) { unsigned long flags; spin_lock_irqsave(&uncore->lock, flags); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index b52e2ba3d092..87e34e0b6427 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -94,7 +94,7 @@ static const struct intel_wakeref_ops wf_ops = { void intel_gt_pm_init_early(struct intel_gt *gt) { - intel_wakeref_init(>->wakeref, >->i915->runtime_pm, &wf_ops); + intel_wakeref_init(>->wakeref, gt->uncore->rpm, &wf_ops); BLOCKING_INIT_NOTIFIER_HEAD(>->pm_notifications); } @@ -222,7 +222,7 @@ void intel_gt_suspend(struct intel_gt *gt) /* We expect to be idle already; but also want to be independent */ wait_for_idle(gt); - with_intel_runtime_pm(>->i915->runtime_pm, wakeref) + with_intel_runtime_pm(gt->uncore->rpm, wakeref) intel_rc6_disable(>->rc6); } diff --git a/drivers/gpu/drm/i915/gt/intel_hangcheck.c b/drivers/gpu/drm/i915/gt/intel_hangcheck.c index 9814b18b32ad..c14dbeb3ccc3 100644 --- a/drivers/gpu/drm/i915/gt/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/intel_hangcheck.c @@ -271,7 +271,7 @@ static void hangcheck_elapsed(struct work_struct *work) if (intel_gt_is_wedged(gt)) return; - wakeref = intel_runtime_pm_get_if_in_use(>->i915->runtime_pm); + wakeref = intel_runtime_pm_get_if_in_use(gt->uncore->rpm); if (!wakeref) return; @@ -322,7 +322,7 @@ static void hangcheck_elapsed(struct work_struct *work) if (hung) hangcheck_declare_hang(gt, hung, stuck); - intel_runtime_pm_put(>->i915->runtime_pm, wakeref); + intel_runtime_pm_put(gt->uncore->rpm, wakeref); /* Reset timer in case GPU hangs without another request being added */ intel_gt_queue_hangcheck(gt); diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index ae8f9cef6b13..34791fc79dea 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -811,7 +811,7 @@ void intel_gt_set_wedged(struct intel_gt *gt) intel_wakeref_t wakeref; mutex_lock(>->reset.mutex); - with_intel_runtime_pm(>->i915->runtime_pm, wakeref) + with_intel_runtime_pm(gt->uncore->rpm, wakeref) __intel_gt_set_wedged(gt); mutex_unlock(>->reset.mutex); } @@ -1186,7 +1186,7 @@ void intel_gt_handle_error(struct intel_gt *gt, * isn't the case at least when we get here by doing a * simulated reset via debugfs, so get an RPM reference. */ - wakeref = intel_runtime_pm_get(>->i915->runtime_pm); + wakeref = intel_runtime_pm_get(gt->uncore->rpm); engine_mask &= INTEL_INFO(gt->i915)->engine_mask; @@ -1246,7 +1246,7 @@ void intel_gt_handle_error(struct intel_gt *gt, wake_up_all(>->reset.queue); out: - intel_runtime_pm_put(>->i915->runtime_pm, wakeref); + intel_runtime_pm_put(gt->uncore->rpm, wakeref); } int intel_gt_reset_trylock(struct intel_gt *gt, int *srcu) diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index e8a40df79bd0..569a4105d49e 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -1695,14 +1695,14 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) if (intel_gt_is_wedged(gt)) return -EIO; /* we're long past hope of a successful reset */ - wakeref = intel_runtime_pm_get(>->i915->runtime_pm); + wakeref = intel_runtime_pm_get(gt->uncore->rpm); saved_hangcheck = fetch_and_zero(&i915_modparams.enable_hangcheck); drain_delayed_work(>->hangcheck.work); /* flush param */ err = intel_gt_live_subtests(tests, gt); i915_modparams.enable_hangcheck = saved_hangcheck; - intel_runtime_pm_put(>->i915->runtime_pm, wakeref); + intel_runtime_pm_put(gt->uncore->rpm, wakeref); return err; } diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c index d79482db7fe8..419b38fa7828 100644 --- a/drivers/gpu/drm/i915/gt/selftest_reset.c +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c @@ -17,7 +17,7 @@ static int igt_global_reset(void *arg) /* Check that we can issue a global GPU reset */ igt_global_reset_lock(gt); - wakeref = intel_runtime_pm_get(>->i915->runtime_pm); + wakeref = intel_runtime_pm_get(gt->uncore->rpm); reset_count = i915_reset_count(>->i915->gpu_error); @@ -28,7 +28,7 @@ static int igt_global_reset(void *arg) err = -EINVAL; } - intel_runtime_pm_put(>->i915->runtime_pm, wakeref); + intel_runtime_pm_put(gt->uncore->rpm, wakeref); igt_global_reset_unlock(gt); if (intel_gt_is_wedged(gt)) @@ -45,14 +45,14 @@ static int igt_wedged_reset(void *arg) /* Check that we can recover a wedged device with a GPU reset */ igt_global_reset_lock(gt); - wakeref = intel_runtime_pm_get(>->i915->runtime_pm); + wakeref = intel_runtime_pm_get(gt->uncore->rpm); intel_gt_set_wedged(gt); GEM_BUG_ON(!intel_gt_is_wedged(gt)); intel_gt_reset(gt, ALL_ENGINES, NULL); - intel_runtime_pm_put(>->i915->runtime_pm, wakeref); + intel_runtime_pm_put(gt->uncore->rpm, wakeref); igt_global_reset_unlock(gt); return intel_gt_is_wedged(gt) ? -EIO : 0; diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index 95627e80f246..74952bda9256 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -256,7 +256,7 @@ switch_to_scratch_context(struct intel_engine_cs *engine, GEM_BUG_ON(IS_ERR(ce)); rq = ERR_PTR(-ENODEV); - with_intel_runtime_pm(&engine->i915->runtime_pm, wakeref) + with_intel_runtime_pm(engine->uncore->rpm, wakeref) rq = igt_spinner_create_request(spin, ce, MI_NOOP); intel_context_put(ce); @@ -313,7 +313,7 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine, if (err) goto out_spin; - with_intel_runtime_pm(&i915->runtime_pm, wakeref) + with_intel_runtime_pm(engine->uncore->rpm, wakeref) err = reset(engine); igt_spinner_end(&spin); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c index 36332064de9c..2cf2d3314f62 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c @@ -607,7 +607,6 @@ int intel_guc_log_relay_open(struct intel_guc_log *log) void intel_guc_log_relay_flush(struct intel_guc_log *log) { struct intel_guc *guc = log_to_guc(log); - struct drm_i915_private *i915 = guc_to_gt(guc)->i915; intel_wakeref_t wakeref; /* @@ -616,7 +615,7 @@ void intel_guc_log_relay_flush(struct intel_guc_log *log) */ flush_work(&log->relay.flush_work); - with_intel_runtime_pm(&i915->runtime_pm, wakeref) + with_intel_runtime_pm(guc_to_gt(guc)->uncore->rpm, wakeref) guc_action_flush_log(guc); /* GuC would have updated log buffer by now, so capture it */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c index d4625c97b4f9..33608a114d4e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -185,7 +185,7 @@ int intel_huc_check_status(struct intel_huc *huc) if (!intel_huc_is_supported(huc)) return -ENODEV; - with_intel_runtime_pm(>->i915->runtime_pm, wakeref) + with_intel_runtime_pm(gt->uncore->rpm, wakeref) status = intel_uncore_read(gt->uncore, huc->status.reg); return (status & huc->status.mask) == huc->status.value; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 29a9eec60d2e..3fdbc935d155 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -587,7 +587,7 @@ void intel_uc_suspend(struct intel_uc *uc) if (!intel_guc_is_running(guc)) return; - with_intel_runtime_pm(&uc_to_gt(uc)->i915->runtime_pm, wakeref) + with_intel_runtime_pm(uc_to_gt(uc)->uncore->rpm, wakeref) intel_uc_runtime_suspend(uc); } diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index f2371b6083c6..3af7f7914c40 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -412,7 +412,7 @@ void gen9_reset_guc_interrupts(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); - assert_rpm_wakelock_held(>->i915->runtime_pm); + assert_rpm_wakelock_held(gt->uncore->rpm); spin_lock_irq(>->irq_lock); gen6_gt_pm_reset_iir(gt, gt->pm_guc_events); @@ -423,7 +423,7 @@ void gen9_enable_guc_interrupts(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); - assert_rpm_wakelock_held(>->i915->runtime_pm); + assert_rpm_wakelock_held(gt->uncore->rpm); spin_lock_irq(>->irq_lock); if (!guc->interrupts.enabled) { @@ -440,7 +440,7 @@ void gen9_disable_guc_interrupts(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); - assert_rpm_wakelock_held(>->i915->runtime_pm); + assert_rpm_wakelock_held(gt->uncore->rpm); spin_lock_irq(>->irq_lock); guc->interrupts.enabled = false; From 08ad9a3846fc72b047b110b36d162ffbcf298fa2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 4 Oct 2019 20:47:58 +0100 Subject: [PATCH 006/159] drm/i915/execlists: Fix annotation for decoupling virtual request As we may signal a request and take the engine->active.lock within the signaler, the engine submission paths have to use a nested annotation on their requests -- but we guarantee that we can never submit on the same engine as the signaling fence. <4>[ 723.763281] WARNING: possible circular locking dependency detected <4>[ 723.763285] 5.3.0-g80fa0e042cdb-drmtip_379+ #1 Tainted: G U <4>[ 723.763288] ------------------------------------------------------ <4>[ 723.763291] gem_exec_await/1388 is trying to acquire lock: <4>[ 723.763294] ffff93a7b53221d8 (&engine->active.lock){..-.}, at: execlists_submit_request+0x2b/0x1e0 [i915] <4>[ 723.763378] but task is already holding lock: <4>[ 723.763381] ffff93a7c25f6d20 (&i915_request_get(rq)->submit/1){-.-.}, at: __i915_sw_fence_complete+0x1b2/0x250 [i915] <4>[ 723.763420] which lock already depends on the new lock. <4>[ 723.763423] the existing dependency chain (in reverse order) is: <4>[ 723.763427] -> #2 (&i915_request_get(rq)->submit/1){-.-.}: <4>[ 723.763434] _raw_spin_lock_irqsave_nested+0x39/0x50 <4>[ 723.763478] __i915_sw_fence_complete+0x1b2/0x250 [i915] <4>[ 723.763513] intel_engine_breadcrumbs_irq+0x3aa/0x5e0 [i915] <4>[ 723.763600] cs_irq_handler+0x49/0x50 [i915] <4>[ 723.763659] gen11_gt_irq_handler+0x17b/0x280 [i915] <4>[ 723.763690] gen11_irq_handler+0x54/0xf0 [i915] <4>[ 723.763695] __handle_irq_event_percpu+0x41/0x2d0 <4>[ 723.763699] handle_irq_event_percpu+0x2b/0x70 <4>[ 723.763702] handle_irq_event+0x2f/0x50 <4>[ 723.763706] handle_edge_irq+0xee/0x1a0 <4>[ 723.763709] do_IRQ+0x7e/0x160 <4>[ 723.763712] ret_from_intr+0x0/0x1d <4>[ 723.763717] __slab_alloc.isra.28.constprop.33+0x4f/0x70 <4>[ 723.763720] kmem_cache_alloc+0x28d/0x2f0 <4>[ 723.763724] vm_area_dup+0x15/0x40 <4>[ 723.763727] dup_mm+0x2dd/0x550 <4>[ 723.763730] copy_process+0xf21/0x1ef0 <4>[ 723.763734] _do_fork+0x71/0x670 <4>[ 723.763737] __se_sys_clone+0x6e/0xa0 <4>[ 723.763741] do_syscall_64+0x4f/0x210 <4>[ 723.763744] entry_SYSCALL_64_after_hwframe+0x49/0xbe <4>[ 723.763747] -> #1 (&(&rq->lock)->rlock#2){-.-.}: <4>[ 723.763752] _raw_spin_lock+0x2a/0x40 <4>[ 723.763789] __unwind_incomplete_requests+0x3eb/0x450 [i915] <4>[ 723.763825] __execlists_submission_tasklet+0x9ec/0x1d60 [i915] <4>[ 723.763864] execlists_submission_tasklet+0x34/0x50 [i915] <4>[ 723.763874] tasklet_action_common.isra.5+0x47/0xb0 <4>[ 723.763878] __do_softirq+0xd8/0x4ae <4>[ 723.763881] irq_exit+0xa9/0xc0 <4>[ 723.763883] smp_apic_timer_interrupt+0xb7/0x280 <4>[ 723.763887] apic_timer_interrupt+0xf/0x20 <4>[ 723.763892] cpuidle_enter_state+0xae/0x450 <4>[ 723.763895] cpuidle_enter+0x24/0x40 <4>[ 723.763899] do_idle+0x1e7/0x250 <4>[ 723.763902] cpu_startup_entry+0x14/0x20 <4>[ 723.763905] start_secondary+0x15f/0x1b0 <4>[ 723.763908] secondary_startup_64+0xa4/0xb0 <4>[ 723.763911] -> #0 (&engine->active.lock){..-.}: <4>[ 723.763916] __lock_acquire+0x15d8/0x1ea0 <4>[ 723.763919] lock_acquire+0xa6/0x1c0 <4>[ 723.763922] _raw_spin_lock_irqsave+0x33/0x50 <4>[ 723.763956] execlists_submit_request+0x2b/0x1e0 [i915] <4>[ 723.764002] submit_notify+0xa8/0x13c [i915] <4>[ 723.764035] __i915_sw_fence_complete+0x81/0x250 [i915] <4>[ 723.764054] i915_sw_fence_wake+0x51/0x64 [i915] <4>[ 723.764054] __i915_sw_fence_complete+0x1ee/0x250 [i915] <4>[ 723.764054] dma_i915_sw_fence_wake_timer+0x14/0x20 [i915] <4>[ 723.764054] dma_fence_signal_locked+0x9e/0x1c0 <4>[ 723.764054] dma_fence_signal+0x1f/0x40 <4>[ 723.764054] vgem_fence_signal_ioctl+0x67/0xc0 [vgem] <4>[ 723.764054] drm_ioctl_kernel+0x83/0xf0 <4>[ 723.764054] drm_ioctl+0x2f3/0x3b0 <4>[ 723.764054] do_vfs_ioctl+0xa0/0x6f0 <4>[ 723.764054] ksys_ioctl+0x35/0x60 <4>[ 723.764054] __x64_sys_ioctl+0x11/0x20 <4>[ 723.764054] do_syscall_64+0x4f/0x210 <4>[ 723.764054] entry_SYSCALL_64_after_hwframe+0x49/0xbe <4>[ 723.764054] other info that might help us debug this: <4>[ 723.764054] Chain exists of: &engine->active.lock --> &(&rq->lock)->rlock#2 --> &i915_request_get(rq)->submit/1 <4>[ 723.764054] Possible unsafe locking scenario: <4>[ 723.764054] CPU0 CPU1 <4>[ 723.764054] ---- ---- <4>[ 723.764054] lock(&i915_request_get(rq)->submit/1); <4>[ 723.764054] lock(&(&rq->lock)->rlock#2); <4>[ 723.764054] lock(&i915_request_get(rq)->submit/1); <4>[ 723.764054] lock(&engine->active.lock); <4>[ 723.764054] *** DEADLOCK *** Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111862 Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191004194758.19679-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 468438fb47af..432b8b60c4c0 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -928,7 +928,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) */ if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags)) { - spin_lock(&rq->lock); + spin_lock_nested(&rq->lock, + SINGLE_DEPTH_NESTING); i915_request_cancel_breadcrumb(rq); spin_unlock(&rq->lock); } From 1664f35aa718815527368e57b0d4ed5bc80e889c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 4 Oct 2019 21:31:21 +0100 Subject: [PATCH 007/159] drm/i915/selftests: Appease lockdep Disable irqs around updating the context image to keep lockdep happy: <4>[ 673.483340] WARNING: possible irq lock inversion dependency detected <4>[ 673.483342] 5.4.0-rc1-CI-Trybot_5118+ #1 Tainted: G U <4>[ 673.483342] -------------------------------------------------------- <4>[ 673.483343] swapper/2/0 just changed the state of lock: <4>[ 673.483344] ffff88845db885a0 (&i915_request_get(rq)->submit/1){-...}, at: __i915_sw_fence_complete+0x1b2/0x250 [i915] <4>[ 673.483387] but this lock took another, HARDIRQ-unsafe lock in the past: <4>[ 673.483388] (&ce->pin_mutex/2){+...} <4>[ 673.483389] and interrupts could create inverse lock ordering between them. <4>[ 673.483390] other info that might help us debug this: <4>[ 673.483390] Chain exists of: &i915_request_get(rq)->submit/1 --> &engine->active.lock --> &ce->pin_mutex/2 <4>[ 673.483392] Possible interrupt unsafe locking scenario: <4>[ 673.483392] CPU0 CPU1 <4>[ 673.483393] ---- ---- <4>[ 673.483393] lock(&ce->pin_mutex/2); <4>[ 673.483394] local_irq_disable(); <4>[ 673.483395] lock(&i915_request_get(rq)->submit/1); <4>[ 673.483396] lock(&engine->active.lock); <4>[ 673.483396] <4>[ 673.483397] lock(&i915_request_get(rq)->submit/1); <4>[ 673.483398] *** DEADLOCK *** <4>[ 673.483398] 2 locks held by swapper/2/0: <4>[ 673.483399] #0: ffff8883f61ac9b0 (&(>->irq_lock)->rlock){-.-.}, at: gen11_gt_irq_handler+0x42/0x280 [i915] <4>[ 673.483433] #1: ffff88845db8c418 (&(&rq->lock)->rlock){-.-.}, at: intel_engine_breadcrumbs_irq+0x34a/0x5a0 [i915] <4>[ 673.483463] the shortest dependencies between 2nd lock and 1st lock: <4>[ 673.483466] -> (&ce->pin_mutex/2){+...} ops: 614520 { <4>[ 673.483468] HARDIRQ-ON-W at: <4>[ 673.483471] lock_acquire+0xa7/0x1c0 <4>[ 673.483501] live_unlite_restore+0x1d8/0x6c0 [i915] <4>[ 673.483543] __i915_subtests+0xb8/0x210 [i915] <4>[ 673.483581] __run_selftests+0x112/0x170 [i915] <4>[ 673.483615] i915_live_selftests+0x2c/0x60 [i915] <4>[ 673.483644] i915_pci_probe+0x93/0x1b0 [i915] <4>[ 673.483646] pci_device_probe+0x9e/0x120 <4>[ 673.483648] really_probe+0xea/0x420 <4>[ 673.483649] driver_probe_device+0x10b/0x120 <4>[ 673.483651] device_driver_attach+0x4a/0x50 <4>[ 673.483652] __driver_attach+0x97/0x130 <4>[ 673.483653] bus_for_each_dev+0x74/0xc0 <4>[ 673.483654] bus_add_driver+0x142/0x220 <4>[ 673.483655] driver_register+0x56/0xf0 <4>[ 673.483657] do_one_initcall+0x58/0x2ff <4>[ 673.483659] do_init_module+0x56/0x1f8 <4>[ 673.483660] load_module+0x243e/0x29f0 <4>[ 673.483661] __do_sys_finit_module+0xe9/0x110 <4>[ 673.483662] do_syscall_64+0x4f/0x210 <4>[ 673.483665] entry_SYSCALL_64_after_hwframe+0x49/0xbe <4>[ 673.483665] INITIAL USE at: <4>[ 673.483667] lock_acquire+0xa7/0x1c0 <4>[ 673.483698] live_unlite_restore+0x1d8/0x6c0 [i915] <4>[ 673.483733] __i915_subtests+0xb8/0x210 [i915] <4>[ 673.483764] __run_selftests+0x112/0x170 [i915] <4>[ 673.483793] i915_live_selftests+0x2c/0x60 [i915] <4>[ 673.483821] i915_pci_probe+0x93/0x1b0 [i915] <4>[ 673.483822] pci_device_probe+0x9e/0x120 <4>[ 673.483824] really_probe+0xea/0x420 <4>[ 673.483825] driver_probe_device+0x10b/0x120 <4>[ 673.483826] device_driver_attach+0x4a/0x50 <4>[ 673.483827] __driver_attach+0x97/0x130 <4>[ 673.483828] bus_for_each_dev+0x74/0xc0 <4>[ 673.483829] bus_add_driver+0x142/0x220 <4>[ 673.483830] driver_register+0x56/0xf0 <4>[ 673.483831] do_one_initcall+0x58/0x2ff <4>[ 673.483833] do_init_module+0x56/0x1f8 <4>[ 673.483834] load_module+0x243e/0x29f0 <4>[ 673.483835] __do_sys_finit_module+0xe9/0x110 <4>[ 673.483836] do_syscall_64+0x4f/0x210 <4>[ 673.483837] entry_SYSCALL_64_after_hwframe+0x49/0xbe <4>[ 673.483838] } <4>[ 673.483868] ... key at: [] __key.70113+0x2/0xffffffffffef2ed0 [i915] <4>[ 673.483869] ... acquired at: <4>[ 673.483935] __execlists_reset+0xfb/0xc20 [i915] <4>[ 673.483965] execlists_reset+0x3d/0x50 [i915] <4>[ 673.483995] intel_engine_reset+0xdf/0x230 [i915] <4>[ 673.484022] live_preempt_hang+0x1d7/0x2e0 [i915] <4>[ 673.484064] __i915_subtests+0xb8/0x210 [i915] <4>[ 673.484130] __run_selftests+0x112/0x170 [i915] <4>[ 673.484163] i915_live_selftests+0x2c/0x60 [i915] <4>[ 673.484193] i915_pci_probe+0x93/0x1b0 [i915] <4>[ 673.484194] pci_device_probe+0x9e/0x120 <4>[ 673.484195] really_probe+0xea/0x420 <4>[ 673.484196] driver_probe_device+0x10b/0x120 <4>[ 673.484197] device_driver_attach+0x4a/0x50 <4>[ 673.484198] __driver_attach+0x97/0x130 <4>[ 673.484199] bus_for_each_dev+0x74/0xc0 <4>[ 673.484200] bus_add_driver+0x142/0x220 <4>[ 673.484202] driver_register+0x56/0xf0 <4>[ 673.484203] do_one_initcall+0x58/0x2ff <4>[ 673.484204] do_init_module+0x56/0x1f8 <4>[ 673.484205] load_module+0x243e/0x29f0 <4>[ 673.484206] __do_sys_finit_module+0xe9/0x110 <4>[ 673.484207] do_syscall_64+0x4f/0x210 <4>[ 673.484208] entry_SYSCALL_64_after_hwframe+0x49/0xbe <4>[ 673.484209] -> (&engine->active.lock){..-.} ops: 972791 { <4>[ 673.484211] IN-SOFTIRQ-W at: <4>[ 673.484213] lock_acquire+0xa7/0x1c0 <4>[ 673.484214] _raw_spin_lock_irqsave+0x33/0x50 <4>[ 673.484244] execlists_submission_tasklet+0xaf/0x100 [i915] <4>[ 673.484246] tasklet_action_common.isra.18+0x6c/0x1c0 <4>[ 673.484247] __do_softirq+0xdf/0x47f <4>[ 673.484248] irq_exit+0xba/0xc0 <4>[ 673.484249] do_IRQ+0x83/0x160 <4>[ 673.484250] ret_from_intr+0x0/0x1d <4>[ 673.484252] cpuidle_enter_state+0xb2/0x450 <4>[ 673.484253] cpuidle_enter+0x24/0x40 <4>[ 673.484254] do_idle+0x1e7/0x250 <4>[ 673.484256] cpu_startup_entry+0x14/0x20 <4>[ 673.484257] start_secondary+0x15f/0x1b0 <4>[ 673.484258] secondary_startup_64+0xa4/0xb0 <4>[ 673.484259] INITIAL USE at: <4>[ 673.484261] lock_acquire+0xa7/0x1c0 <4>[ 673.484290] intel_engine_init_active+0x7e/0xb0 [i915] <4>[ 673.484305] intel_engines_setup+0x1cd/0x3b0 [i915] <4>[ 673.484305] i915_gem_init+0x12d/0x900 [i915] <4>[ 673.484305] i915_driver_probe+0xb70/0x15d0 [i915] <4>[ 673.484305] i915_pci_probe+0x43/0x1b0 [i915] <4>[ 673.484305] pci_device_probe+0x9e/0x120 <4>[ 673.484305] really_probe+0xea/0x420 <4>[ 673.484305] driver_probe_device+0x10b/0x120 <4>[ 673.484305] device_driver_attach+0x4a/0x50 <4>[ 673.484305] __driver_attach+0x97/0x130 <4>[ 673.484305] bus_for_each_dev+0x74/0xc0 <4>[ 673.484305] bus_add_driver+0x142/0x220 <4>[ 673.484305] driver_register+0x56/0xf0 <4>[ 673.484305] do_one_initcall+0x58/0x2ff <4>[ 673.484305] do_init_module+0x56/0x1f8 <4>[ 673.484305] load_module+0x243e/0x29f0 <4>[ 673.484305] __do_sys_finit_module+0xe9/0x110 <4>[ 673.484305] do_syscall_64+0x4f/0x210 <4>[ 673.484305] entry_SYSCALL_64_after_hwframe+0x49/0xbe <4>[ 673.484305] } <4>[ 673.484305] ... key at: [] __key.70307+0x0/0xffffffffffef2ea0 [i915] <4>[ 673.484305] ... acquired at: <4>[ 673.484305] _raw_spin_lock_irqsave+0x33/0x50 <4>[ 673.484305] execlists_submit_request+0x2b/0x1e0 [i915] <4>[ 673.484305] submit_notify+0xa8/0x13c [i915] <4>[ 673.484305] __i915_sw_fence_complete+0x81/0x250 [i915] <4>[ 673.484305] i915_sw_fence_wake+0x51/0x70 [i915] <4>[ 673.484305] __i915_sw_fence_complete+0x1ee/0x250 [i915] <4>[ 673.484305] dma_i915_sw_fence_wake+0x1b/0x30 [i915] <4>[ 673.484305] dma_fence_signal_locked+0x9e/0x1b0 <4>[ 673.484305] dma_fence_signal+0x1f/0x40 <4>[ 673.484305] fence_work+0x28/0x80 [i915] <4>[ 673.484305] process_one_work+0x26a/0x620 <4>[ 673.484305] worker_thread+0x37/0x380 <4>[ 673.484305] kthread+0x119/0x130 <4>[ 673.484305] ret_from_fork+0x24/0x50 <4>[ 673.484305] -> (&i915_request_get(rq)->submit/1){-...} ops: 857694 { <4>[ 673.484305] IN-HARDIRQ-W at: <4>[ 673.484305] lock_acquire+0xa7/0x1c0 <4>[ 673.484305] _raw_spin_lock_irqsave_nested+0x39/0x50 <4>[ 673.484305] __i915_sw_fence_complete+0x1b2/0x250 [i915] <4>[ 673.484305] intel_engine_breadcrumbs_irq+0x3d0/0x5a0 [i915] <4>[ 673.484305] cs_irq_handler+0x39/0x50 [i915] <4>[ 673.484305] gen11_gt_irq_handler+0x17b/0x280 [i915] <4>[ 673.484305] gen11_irq_handler+0x54/0xf0 [i915] <4>[ 673.484305] __handle_irq_event_percpu+0x41/0x2c0 <4>[ 673.484305] handle_irq_event_percpu+0x2b/0x70 <4>[ 673.484305] handle_irq_event+0x2f/0x50 <4>[ 673.484305] handle_edge_irq+0x99/0x1b0 <4>[ 673.484305] do_IRQ+0x7e/0x160 <4>[ 673.484305] ret_from_intr+0x0/0x1d <4>[ 673.484305] cpuidle_enter_state+0xb2/0x450 <4>[ 673.484305] cpuidle_enter+0x24/0x40 <4>[ 673.484305] do_idle+0x1e7/0x250 <4>[ 673.484305] cpu_startup_entry+0x14/0x20 <4>[ 673.484305] start_secondary+0x15f/0x1b0 <4>[ 673.484305] secondary_startup_64+0xa4/0xb0 <4>[ 673.484305] INITIAL USE at: <4>[ 673.484305] lock_acquire+0xa7/0x1c0 <4>[ 673.484305] _raw_spin_lock_irqsave_nested+0x39/0x50 <4>[ 673.484305] __i915_sw_fence_complete+0x1b2/0x250 [i915] <4>[ 673.484305] __engine_park+0x233/0x420 [i915] <4>[ 673.484305] ____intel_wakeref_put_last+0x1c/0x70 [i915] <4>[ 673.484305] intel_gt_resume+0x202/0x2c0 [i915] <4>[ 673.484305] i915_gem_init+0x36e/0x900 [i915] <4>[ 673.484305] i915_driver_probe+0xb70/0x15d0 [i915] <4>[ 673.484305] i915_pci_probe+0x43/0x1b0 [i915] <4>[ 673.484305] pci_device_probe+0x9e/0x120 <4>[ 673.484305] really_probe+0xea/0x420 <4>[ 673.484305] driver_probe_device+0x10b/0x120 <4>[ 673.484305] device_driver_attach+0x4a/0x50 <4>[ 673.484305] __driver_attach+0x97/0x130 <4>[ 673.484305] bus_for_each_dev+0x74/0xc0 <4>[ 673.484305] bus_add_driver+0x142/0x220 <4>[ 673.484305] driver_register+0x56/0xf0 <4>[ 673.484305] do_one_initcall+0x58/0x2ff <4>[ 673.484305] do_init_module+0x56/0x1f8 <4>[ 673.484305] load_module+0x243e/0x29f0 <4>[ 673.484305] __do_sys_finit_module+0xe9/0x110 <4>[ 673.484305] do_syscall_64+0x4f/0x210 <4>[ 673.484305] entry_SYSCALL_64_after_hwframe+0x49/0xbe <4>[ 673.484305] } <4>[ 673.484305] ... key at: [] __key.80173+0x1/0xffffffffffef2960 [i915] <4>[ 673.484305] ... acquired at: <4>[ 673.484305] mark_lock+0x382/0x500 <4>[ 673.484305] __lock_acquire+0x7e1/0x15d0 <4>[ 673.484305] lock_acquire+0xa7/0x1c0 <4>[ 673.484305] _raw_spin_lock_irqsave_nested+0x39/0x50 <4>[ 673.484305] __i915_sw_fence_complete+0x1b2/0x250 [i915] <4>[ 673.484305] intel_engine_breadcrumbs_irq+0x3d0/0x5a0 [i915] <4>[ 673.484305] cs_irq_handler+0x39/0x50 [i915] <4>[ 673.484305] gen11_gt_irq_handler+0x17b/0x280 [i915] <4>[ 673.484305] gen11_irq_handler+0x54/0xf0 [i915] <4>[ 673.484305] __handle_irq_event_percpu+0x41/0x2c0 <4>[ 673.484305] handle_irq_event_percpu+0x2b/0x70 <4>[ 673.484305] handle_irq_event+0x2f/0x50 <4>[ 673.484305] handle_edge_irq+0x99/0x1b0 <4>[ 673.484305] do_IRQ+0x7e/0x160 <4>[ 673.484305] ret_from_intr+0x0/0x1d <4>[ 673.484305] cpuidle_enter_state+0xb2/0x450 <4>[ 673.484305] cpuidle_enter+0x24/0x40 <4>[ 673.484305] do_idle+0x1e7/0x250 <4>[ 673.484305] cpu_startup_entry+0x14/0x20 <4>[ 673.484305] start_secondary+0x15f/0x1b0 <4>[ 673.484305] secondary_startup_64+0xa4/0xb0 <4>[ 673.484305] stack backtrace: <4>[ 673.484305] CPU: 2 PID: 0 Comm: swapper/2 Tainted: G U 5.4.0-rc1-CI-Trybot_5118+ #1 <4>[ 673.484305] Hardware name: Intel Corporation Ice Lake Client Platform/IceLake U DDR4 SODIMM PD RVP TLC, BIOS ICLSFWR1.R00.3183.A00.1905020411 05/02/2019 <4>[ 673.484305] Call Trace: <4>[ 673.484305] <4>[ 673.484305] dump_stack+0x67/0x9b <4>[ 673.484305] check_usage_forwards+0x13c/0x150 <4>[ 673.484305] ? mark_lock+0x382/0x500 <4>[ 673.484305] mark_lock+0x382/0x500 <4>[ 673.484305] ? check_usage_backwards+0x140/0x140 <4>[ 673.484305] __lock_acquire+0x7e1/0x15d0 <4>[ 673.484305] ? debug_object_deactivate+0x17e/0x190 <4>[ 673.484305] lock_acquire+0xa7/0x1c0 <4>[ 673.484305] ? __i915_sw_fence_complete+0x1b2/0x250 [i915] <4>[ 673.484305] _raw_spin_lock_irqsave_nested+0x39/0x50 <4>[ 673.484305] ? __i915_sw_fence_complete+0x1b2/0x250 [i915] <4>[ 673.484305] __i915_sw_fence_complete+0x1b2/0x250 [i915] <4>[ 673.484305] intel_engine_breadcrumbs_irq+0x3d0/0x5a0 [i915] <4>[ 673.484305] cs_irq_handler+0x39/0x50 [i915] <4>[ 673.484305] gen11_gt_irq_handler+0x17b/0x280 [i915] <4>[ 673.484305] gen11_irq_handler+0x54/0xf0 [i915] <4>[ 673.484305] __handle_irq_event_percpu+0x41/0x2c0 <4>[ 673.484305] handle_irq_event_percpu+0x2b/0x70 <4>[ 673.484305] handle_irq_event+0x2f/0x50 <4>[ 673.484305] handle_edge_irq+0x99/0x1b0 <4>[ 673.484305] do_IRQ+0x7e/0x160 <4>[ 673.484305] common_interrupt+0xf/0xf <4>[ 673.484305] Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191004203121.31138-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/selftest_lrc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 393ae5321e1d..198cf2f754f4 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -143,11 +143,11 @@ static int live_unlite_restore(struct drm_i915_private *i915, int prio) GEM_BUG_ON(!ce[1]->ring->size); intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2); - local_bh_disable(); /* appease lockdep */ + local_irq_disable(); /* appease lockdep */ __context_pin_acquire(ce[1]); __execlists_update_reg_state(ce[1], engine); __context_pin_release(ce[1]); - local_bh_enable(); + local_irq_enable(); rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK); if (IS_ERR(rq[0])) { From a1b58ee3cb34c433167b7b7d3465ab3ca0e3daf1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 6 Oct 2019 17:49:54 +0100 Subject: [PATCH 008/159] drm/i915/gt: Treat a busy timeline as 'active' while waiting If we cannot claim the timeline->mutex while preparing for a wait on it, we have to skip the timeline. In doing so, treat it as active so that under a intel_gt_wait_for_idle() loop, we repeat the wait after scheduling away. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191006165002.30312-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_gt_requests.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c index d69e78478eea..ca606b79fd5e 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c @@ -34,8 +34,10 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) spin_lock_irqsave(&timelines->lock, flags); list_for_each_entry_safe(tl, tn, &timelines->active_list, link) { - if (!mutex_trylock(&tl->mutex)) + if (!mutex_trylock(&tl->mutex)) { + active_count++; /* report busy to caller, try again? */ continue; + } intel_timeline_get(tl); GEM_BUG_ON(!tl->active_count); From d30213e533fa73929b6af48a5b0e5761879d7db2 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 7 Oct 2019 19:33:46 +0200 Subject: [PATCH 009/159] drm/i915: Fix Kconfig indentation Adjust indentation from spaces to tab (+optional two spaces) as in coding style with command like: $ sed -e 's/^ /\t/' -i */Kconfig Signed-off-by: Krzysztof Kozlowski Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20191007173346.9379-1-krzk@kernel.org --- drivers/gpu/drm/i915/Kconfig | 12 +-- drivers/gpu/drm/i915/Kconfig.debug | 142 ++++++++++++++--------------- 2 files changed, 77 insertions(+), 77 deletions(-) diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index 0d21402945ab..3c6d57df262d 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -76,7 +76,7 @@ config DRM_I915_CAPTURE_ERROR This option enables capturing the GPU state when a hang is detected. This information is vital for triaging hangs and assists in debugging. Please report any hang to - https://bugs.freedesktop.org/enter_bug.cgi?product=DRI + https://bugs.freedesktop.org/enter_bug.cgi?product=DRI for triaging. If in doubt, say "Y". @@ -105,11 +105,11 @@ config DRM_I915_USERPTR If in doubt, say "Y". config DRM_I915_GVT - bool "Enable Intel GVT-g graphics virtualization host support" - depends on DRM_I915 - depends on 64BIT - default n - help + bool "Enable Intel GVT-g graphics virtualization host support" + depends on DRM_I915 + depends on 64BIT + default n + help Choose this option if you want to enable Intel GVT-g graphics virtualization technology host support with integrated graphics. With GVT-g, it's possible to have one integrated graphics diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index 87a38c6aaa41..95d5fb8058fc 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -1,48 +1,48 @@ # SPDX-License-Identifier: GPL-2.0-only config DRM_I915_WERROR - bool "Force GCC to throw an error instead of a warning when compiling" - # As this may inadvertently break the build, only allow the user - # to shoot oneself in the foot iff they aim really hard - depends on EXPERT - # We use the dependency on !COMPILE_TEST to not be enabled in - # allmodconfig or allyesconfig configurations - depends on !COMPILE_TEST + bool "Force GCC to throw an error instead of a warning when compiling" + # As this may inadvertently break the build, only allow the user + # to shoot oneself in the foot iff they aim really hard + depends on EXPERT + # We use the dependency on !COMPILE_TEST to not be enabled in + # allmodconfig or allyesconfig configurations + depends on !COMPILE_TEST select HEADER_TEST - default n - help - Add -Werror to the build flags for (and only for) i915.ko. - Do not enable this unless you are writing code for the i915.ko module. + default n + help + Add -Werror to the build flags for (and only for) i915.ko. + Do not enable this unless you are writing code for the i915.ko module. - Recommended for driver developers only. + Recommended for driver developers only. - If in doubt, say "N". + If in doubt, say "N". config DRM_I915_DEBUG - bool "Enable additional driver debugging" - depends on DRM_I915 - select DEBUG_FS - select PREEMPT_COUNT - select REFCOUNT_FULL - select I2C_CHARDEV - select STACKDEPOT - select DRM_DP_AUX_CHARDEV - select X86_MSR # used by igt/pm_rpm - select DRM_VGEM # used by igt/prime_vgem (dmabuf interop checks) - select DRM_DEBUG_MM if DRM=y + bool "Enable additional driver debugging" + depends on DRM_I915 + select DEBUG_FS + select PREEMPT_COUNT + select REFCOUNT_FULL + select I2C_CHARDEV + select STACKDEPOT + select DRM_DP_AUX_CHARDEV + select X86_MSR # used by igt/pm_rpm + select DRM_VGEM # used by igt/prime_vgem (dmabuf interop checks) + select DRM_DEBUG_MM if DRM=y select DRM_DEBUG_SELFTEST select SW_SYNC # signaling validation framework (igt/syncobj*) select DRM_I915_SW_FENCE_DEBUG_OBJECTS select DRM_I915_SELFTEST select DRM_I915_DEBUG_RUNTIME_PM select DRM_I915_DEBUG_MMIO - default n - help - Choose this option to turn on extra driver debugging that may affect - performance but will catch some internal issues. + default n + help + Choose this option to turn on extra driver debugging that may affect + performance but will catch some internal issues. - Recommended for driver developers only. + Recommended for driver developers only. - If in doubt, say "N". + If in doubt, say "N". config DRM_I915_DEBUG_MMIO bool "Always insert extra checks around mmio access by default" @@ -58,16 +58,16 @@ config DRM_I915_DEBUG_MMIO If in doubt, say "N". config DRM_I915_DEBUG_GEM - bool "Insert extra checks into the GEM internals" - default n - depends on DRM_I915_WERROR - help - Enable extra sanity checks (including BUGs) along the GEM driver - paths that may slow the system down and if hit hang the machine. + bool "Insert extra checks into the GEM internals" + default n + depends on DRM_I915_WERROR + help + Enable extra sanity checks (including BUGs) along the GEM driver + paths that may slow the system down and if hit hang the machine. - Recommended for driver developers only. + Recommended for driver developers only. - If in doubt, say "N". + If in doubt, say "N". config DRM_I915_ERRLOG_GEM bool "Insert extra logging (very verbose) for common GEM errors" @@ -110,41 +110,41 @@ config DRM_I915_TRACE_GTT If in doubt, say "N". config DRM_I915_SW_FENCE_DEBUG_OBJECTS - bool "Enable additional driver debugging for fence objects" - depends on DRM_I915 - select DEBUG_OBJECTS - default n - help - Choose this option to turn on extra driver debugging that may affect - performance but will catch some internal issues. + bool "Enable additional driver debugging for fence objects" + depends on DRM_I915 + select DEBUG_OBJECTS + default n + help + Choose this option to turn on extra driver debugging that may affect + performance but will catch some internal issues. - Recommended for driver developers only. + Recommended for driver developers only. - If in doubt, say "N". + If in doubt, say "N". config DRM_I915_SW_FENCE_CHECK_DAG - bool "Enable additional driver debugging for detecting dependency cycles" - depends on DRM_I915 - default n - help - Choose this option to turn on extra driver debugging that may affect - performance but will catch some internal issues. + bool "Enable additional driver debugging for detecting dependency cycles" + depends on DRM_I915 + default n + help + Choose this option to turn on extra driver debugging that may affect + performance but will catch some internal issues. - Recommended for driver developers only. + Recommended for driver developers only. - If in doubt, say "N". + If in doubt, say "N". config DRM_I915_DEBUG_GUC - bool "Enable additional driver debugging for GuC" - depends on DRM_I915 - default n - help - Choose this option to turn on extra driver debugging that may affect - performance but will help resolve GuC related issues. + bool "Enable additional driver debugging for GuC" + depends on DRM_I915 + default n + help + Choose this option to turn on extra driver debugging that may affect + performance but will help resolve GuC related issues. - Recommended for driver developers only. + Recommended for driver developers only. - If in doubt, say "N". + If in doubt, say "N". config DRM_I915_SELFTEST bool "Enable selftests upon driver load" @@ -177,15 +177,15 @@ config DRM_I915_SELFTEST_BROKEN If in doubt, say "N". config DRM_I915_LOW_LEVEL_TRACEPOINTS - bool "Enable low level request tracing events" - depends on DRM_I915 - default n - help - Choose this option to turn on low level request tracing events. - This provides the ability to precisely monitor engine utilisation - and also analyze the request dependency resolving timeline. + bool "Enable low level request tracing events" + depends on DRM_I915 + default n + help + Choose this option to turn on low level request tracing events. + This provides the ability to precisely monitor engine utilisation + and also analyze the request dependency resolving timeline. - If in doubt, say "N". + If in doubt, say "N". config DRM_I915_DEBUG_VBLANK_EVADE bool "Enable extra debug warnings for vblank evasion" From 8f8b1171e1a514c2fdfd388b662a7e5b34e839a8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 7 Oct 2019 22:09:41 +0100 Subject: [PATCH 010/159] drm/i915/perf: Wean ourselves off dev_priv Use the local uncore accessors for the GT rather than using the [not-so] magic global dev_priv mmio routines. In the process, we also teach the perf stream to use backpointers to the i915_perf rather than digging it out of dev_priv. v2: Rebase onto i915_perf_types.h Signed-off-by: Chris Wilson Cc: Umesh Nerlige Ramappa Cc: Lionel Landwerlin Reviewed-by: Lionel Landwerlin #v1 Link: https://patchwork.freedesktop.org/patch/msgid/20191007140812.10963-1-chris@chris-wilson.co.uk Link: https://patchwork.freedesktop.org/patch/msgid/20191007210942.18145-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 56 +- drivers/gpu/drm/i915/i915_perf.c | 713 ++++++++++++------------- drivers/gpu/drm/i915/i915_perf_types.h | 73 ++- 3 files changed, 418 insertions(+), 424 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 1b8fd3a38c66..dd0eb8dc4491 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1332,61 +1332,7 @@ struct drm_i915_private { struct intel_runtime_pm runtime_pm; - struct { - bool initialized; - - struct kobject *metrics_kobj; - struct ctl_table_header *sysctl_header; - - /* - * Lock associated with adding/modifying/removing OA configs - * in dev_priv->perf.metrics_idr. - */ - struct mutex metrics_lock; - - /* - * List of dynamic configurations, you need to hold - * dev_priv->perf.metrics_lock to access it. - */ - struct idr metrics_idr; - - /* - * Lock associated with anything below within this structure - * except exclusive_stream. - */ - struct mutex lock; - struct list_head streams; - - /* - * The stream currently using the OA unit. If accessed - * outside a syscall associated to its file - * descriptor, you need to hold - * dev_priv->drm.struct_mutex. - */ - struct i915_perf_stream *exclusive_stream; - - /** - * For rate limiting any notifications of spurious - * invalid OA reports - */ - struct ratelimit_state spurious_report_rs; - - struct i915_oa_config test_config; - - u32 gen7_latched_oastatus1; - u32 ctx_oactxctrl_offset; - u32 ctx_flexeu0_offset; - - /** - * The RPT_ID/reason field for Gen8+ includes a bit - * to determine if the CTX ID in the report is valid - * but the specific bit differs between Gen 8 and 9 - */ - u32 gen8_valid_ctx_bit; - - struct i915_oa_ops ops; - const struct i915_oa_format *oa_formats; - } perf; + struct i915_perf perf; /* Abstract the submission mechanism (legacy ringbuffer or execlists) away */ struct intel_gt gt; diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 231388d06c82..5a25bbeb6c7a 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -367,8 +367,7 @@ struct perf_open_properties { static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer); -static void free_oa_config(struct drm_i915_private *dev_priv, - struct i915_oa_config *oa_config) +static void free_oa_config(struct i915_oa_config *oa_config) { if (!PTR_ERR(oa_config->flex_regs)) kfree(oa_config->flex_regs); @@ -379,53 +378,52 @@ static void free_oa_config(struct drm_i915_private *dev_priv, kfree(oa_config); } -static void put_oa_config(struct drm_i915_private *dev_priv, - struct i915_oa_config *oa_config) +static void put_oa_config(struct i915_oa_config *oa_config) { if (!atomic_dec_and_test(&oa_config->ref_count)) return; - free_oa_config(dev_priv, oa_config); + free_oa_config(oa_config); } -static int get_oa_config(struct drm_i915_private *dev_priv, +static int get_oa_config(struct i915_perf *perf, int metrics_set, struct i915_oa_config **out_config) { int ret; if (metrics_set == 1) { - *out_config = &dev_priv->perf.test_config; - atomic_inc(&dev_priv->perf.test_config.ref_count); + *out_config = &perf->test_config; + atomic_inc(&perf->test_config.ref_count); return 0; } - ret = mutex_lock_interruptible(&dev_priv->perf.metrics_lock); + ret = mutex_lock_interruptible(&perf->metrics_lock); if (ret) return ret; - *out_config = idr_find(&dev_priv->perf.metrics_idr, metrics_set); + *out_config = idr_find(&perf->metrics_idr, metrics_set); if (!*out_config) ret = -EINVAL; else atomic_inc(&(*out_config)->ref_count); - mutex_unlock(&dev_priv->perf.metrics_lock); + mutex_unlock(&perf->metrics_lock); return ret; } static u32 gen8_oa_hw_tail_read(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct intel_uncore *uncore = stream->gt->uncore; - return I915_READ(GEN8_OATAILPTR) & GEN8_OATAILPTR_MASK; + return intel_uncore_read(uncore, GEN8_OATAILPTR) & GEN8_OATAILPTR_MASK; } static u32 gen7_oa_hw_tail_read(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; - u32 oastatus1 = I915_READ(GEN7_OASTATUS1); + struct intel_uncore *uncore = stream->gt->uncore; + u32 oastatus1 = intel_uncore_read(uncore, GEN7_OASTATUS1); return oastatus1 & GEN7_OASTATUS1_TAIL_MASK; } @@ -456,7 +454,6 @@ static u32 gen7_oa_hw_tail_read(struct i915_perf_stream *stream) */ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; int report_size = stream->oa_buffer.format_size; unsigned long flags; unsigned int aged_idx; @@ -479,7 +476,7 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) aged_tail = stream->oa_buffer.tails[aged_idx].offset; aging_tail = stream->oa_buffer.tails[!aged_idx].offset; - hw_tail = dev_priv->perf.ops.oa_hw_tail_read(stream); + hw_tail = stream->perf->ops.oa_hw_tail_read(stream); /* The tail pointer increases in 64 byte increments, * not in report_size steps... @@ -655,7 +652,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, size_t count, size_t *offset) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct intel_uncore *uncore = stream->gt->uncore; int report_size = stream->oa_buffer.format_size; u8 *oa_buf_base = stream->oa_buffer.vaddr; u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); @@ -740,7 +737,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, reason = ((report32[0] >> OAREPORT_REASON_SHIFT) & OAREPORT_REASON_MASK); if (reason == 0) { - if (__ratelimit(&dev_priv->perf.spurious_report_rs)) + if (__ratelimit(&stream->perf->spurious_report_rs)) DRM_NOTE("Skipping spurious, invalid OA report\n"); continue; } @@ -755,7 +752,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * Note: that we don't clear the valid_ctx_bit so userspace can * understand that the ID has been squashed by the kernel. */ - if (!(report32[0] & dev_priv->perf.gen8_valid_ctx_bit)) + if (!(report32[0] & stream->perf->gen8_valid_ctx_bit)) ctx_id = report32[2] = INVALID_CTX_ID; /* @@ -789,7 +786,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * switches since it's not-uncommon for periodic samples to * identify a switch before any 'context switch' report. */ - if (!dev_priv->perf.exclusive_stream->ctx || + if (!stream->perf->exclusive_stream->ctx || stream->specific_ctx_id == ctx_id || stream->oa_buffer.last_ctx_id == stream->specific_ctx_id || reason & OAREPORT_REASON_CTX_SWITCH) { @@ -798,7 +795,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * While filtering for a single context we avoid * leaking the IDs of other contexts. */ - if (dev_priv->perf.exclusive_stream->ctx && + if (stream->perf->exclusive_stream->ctx && stream->specific_ctx_id != ctx_id) { report32[2] = INVALID_CTX_ID; } @@ -830,7 +827,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, */ head += gtt_offset; - I915_WRITE(GEN8_OAHEADPTR, head & GEN8_OAHEADPTR_MASK); + intel_uncore_write(uncore, GEN8_OAHEADPTR, + head & GEN8_OAHEADPTR_MASK); stream->oa_buffer.head = head; spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); @@ -864,14 +862,14 @@ static int gen8_oa_read(struct i915_perf_stream *stream, size_t count, size_t *offset) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct intel_uncore *uncore = stream->gt->uncore; u32 oastatus; int ret; if (WARN_ON(!stream->oa_buffer.vaddr)) return -EIO; - oastatus = I915_READ(GEN8_OASTATUS); + oastatus = intel_uncore_read(uncore, GEN8_OASTATUS); /* * We treat OABUFFER_OVERFLOW as a significant error: @@ -896,14 +894,14 @@ static int gen8_oa_read(struct i915_perf_stream *stream, DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n", stream->period_exponent); - dev_priv->perf.ops.oa_disable(stream); - dev_priv->perf.ops.oa_enable(stream); + stream->perf->ops.oa_disable(stream); + stream->perf->ops.oa_enable(stream); /* * Note: .oa_enable() is expected to re-init the oabuffer and * reset GEN8_OASTATUS for us */ - oastatus = I915_READ(GEN8_OASTATUS); + oastatus = intel_uncore_read(uncore, GEN8_OASTATUS); } if (oastatus & GEN8_OASTATUS_REPORT_LOST) { @@ -911,8 +909,8 @@ static int gen8_oa_read(struct i915_perf_stream *stream, DRM_I915_PERF_RECORD_OA_REPORT_LOST); if (ret) return ret; - I915_WRITE(GEN8_OASTATUS, - oastatus & ~GEN8_OASTATUS_REPORT_LOST); + intel_uncore_write(uncore, GEN8_OASTATUS, + oastatus & ~GEN8_OASTATUS_REPORT_LOST); } return gen8_append_oa_reports(stream, buf, count, offset); @@ -943,7 +941,7 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, size_t count, size_t *offset) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct intel_uncore *uncore = stream->gt->uncore; int report_size = stream->oa_buffer.format_size; u8 *oa_buf_base = stream->oa_buffer.vaddr; u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); @@ -1017,7 +1015,7 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, * copying it to userspace... */ if (report32[0] == 0) { - if (__ratelimit(&dev_priv->perf.spurious_report_rs)) + if (__ratelimit(&stream->perf->spurious_report_rs)) DRM_NOTE("Skipping spurious, invalid OA report\n"); continue; } @@ -1043,9 +1041,9 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, */ head += gtt_offset; - I915_WRITE(GEN7_OASTATUS2, - ((head & GEN7_OASTATUS2_HEAD_MASK) | - GEN7_OASTATUS2_MEM_SELECT_GGTT)); + intel_uncore_write(uncore, GEN7_OASTATUS2, + (head & GEN7_OASTATUS2_HEAD_MASK) | + GEN7_OASTATUS2_MEM_SELECT_GGTT); stream->oa_buffer.head = head; spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); @@ -1075,21 +1073,21 @@ static int gen7_oa_read(struct i915_perf_stream *stream, size_t count, size_t *offset) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct intel_uncore *uncore = stream->gt->uncore; u32 oastatus1; int ret; if (WARN_ON(!stream->oa_buffer.vaddr)) return -EIO; - oastatus1 = I915_READ(GEN7_OASTATUS1); + oastatus1 = intel_uncore_read(uncore, GEN7_OASTATUS1); /* XXX: On Haswell we don't have a safe way to clear oastatus1 * bits while the OA unit is enabled (while the tail pointer * may be updated asynchronously) so we ignore status bits * that have already been reported to userspace. */ - oastatus1 &= ~dev_priv->perf.gen7_latched_oastatus1; + oastatus1 &= ~stream->perf->gen7_latched_oastatus1; /* We treat OABUFFER_OVERFLOW as a significant error: * @@ -1120,10 +1118,10 @@ static int gen7_oa_read(struct i915_perf_stream *stream, DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n", stream->period_exponent); - dev_priv->perf.ops.oa_disable(stream); - dev_priv->perf.ops.oa_enable(stream); + stream->perf->ops.oa_disable(stream); + stream->perf->ops.oa_enable(stream); - oastatus1 = I915_READ(GEN7_OASTATUS1); + oastatus1 = intel_uncore_read(uncore, GEN7_OASTATUS1); } if (unlikely(oastatus1 & GEN7_OASTATUS1_REPORT_LOST)) { @@ -1131,7 +1129,7 @@ static int gen7_oa_read(struct i915_perf_stream *stream, DRM_I915_PERF_RECORD_OA_REPORT_LOST); if (ret) return ret; - dev_priv->perf.gen7_latched_oastatus1 |= + stream->perf->gen7_latched_oastatus1 |= GEN7_OASTATUS1_REPORT_LOST; } @@ -1196,9 +1194,7 @@ static int i915_oa_read(struct i915_perf_stream *stream, size_t count, size_t *offset) { - struct drm_i915_private *dev_priv = stream->dev_priv; - - return dev_priv->perf.ops.read(stream, buf, count, offset); + return stream->perf->ops.read(stream, buf, count, offset); } static struct intel_context *oa_pin_context(struct i915_perf_stream *stream) @@ -1239,14 +1235,13 @@ static struct intel_context *oa_pin_context(struct i915_perf_stream *stream) */ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) { - struct drm_i915_private *i915 = stream->dev_priv; struct intel_context *ce; ce = oa_pin_context(stream); if (IS_ERR(ce)) return PTR_ERR(ce); - switch (INTEL_GEN(i915)) { + switch (INTEL_GEN(ce->engine->i915)) { case 7: { /* * On Haswell we don't do any post processing of the reports @@ -1260,7 +1255,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) case 8: case 9: case 10: - if (USES_GUC_SUBMISSION(i915)) { + if (USES_GUC_SUBMISSION(ce->engine->i915)) { /* * When using GuC, the context descriptor we write in * i915 is read by GuC and rewritten before it's @@ -1296,7 +1291,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) } default: - MISSING_CASE(INTEL_GEN(i915)); + MISSING_CASE(INTEL_GEN(ce->engine->i915)); } ce->tag = stream->specific_ctx_id_mask; @@ -1340,38 +1335,38 @@ free_oa_buffer(struct i915_perf_stream *stream) static void i915_oa_stream_destroy(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct i915_perf *perf = stream->perf; - BUG_ON(stream != dev_priv->perf.exclusive_stream); + BUG_ON(stream != perf->exclusive_stream); /* * Unset exclusive_stream first, it will be checked while disabling * the metric set on gen8+. */ - mutex_lock(&dev_priv->drm.struct_mutex); - dev_priv->perf.exclusive_stream = NULL; - dev_priv->perf.ops.disable_metric_set(stream); - mutex_unlock(&dev_priv->drm.struct_mutex); + mutex_lock(&perf->i915->drm.struct_mutex); + perf->exclusive_stream = NULL; + perf->ops.disable_metric_set(stream); + mutex_unlock(&perf->i915->drm.struct_mutex); free_oa_buffer(stream); - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); - intel_runtime_pm_put(&dev_priv->runtime_pm, stream->wakeref); + intel_uncore_forcewake_put(stream->gt->uncore, FORCEWAKE_ALL); + intel_runtime_pm_put(stream->gt->uncore->rpm, stream->wakeref); if (stream->ctx) oa_put_render_ctx_id(stream); - put_oa_config(dev_priv, stream->oa_config); + put_oa_config(stream->oa_config); - if (dev_priv->perf.spurious_report_rs.missed) { + if (perf->spurious_report_rs.missed) { DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n", - dev_priv->perf.spurious_report_rs.missed); + perf->spurious_report_rs.missed); } } static void gen7_init_oa_buffer(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct intel_uncore *uncore = stream->gt->uncore; u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); unsigned long flags; @@ -1380,13 +1375,14 @@ static void gen7_init_oa_buffer(struct i915_perf_stream *stream) /* Pre-DevBDW: OABUFFER must be set with counters off, * before OASTATUS1, but after OASTATUS2 */ - I915_WRITE(GEN7_OASTATUS2, - gtt_offset | GEN7_OASTATUS2_MEM_SELECT_GGTT); /* head */ + intel_uncore_write(uncore, GEN7_OASTATUS2, /* head */ + gtt_offset | GEN7_OASTATUS2_MEM_SELECT_GGTT); stream->oa_buffer.head = gtt_offset; - I915_WRITE(GEN7_OABUFFER, gtt_offset); + intel_uncore_write(uncore, GEN7_OABUFFER, gtt_offset); - I915_WRITE(GEN7_OASTATUS1, gtt_offset | OABUFFER_SIZE_16M); /* tail */ + intel_uncore_write(uncore, GEN7_OASTATUS1, /* tail */ + gtt_offset | OABUFFER_SIZE_16M); /* Mark that we need updated tail pointers to read from... */ stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR; @@ -1398,7 +1394,7 @@ static void gen7_init_oa_buffer(struct i915_perf_stream *stream) * already seen since they can't be cleared while periodic * sampling is enabled. */ - dev_priv->perf.gen7_latched_oastatus1 = 0; + stream->perf->gen7_latched_oastatus1 = 0; /* NB: although the OA buffer will initially be allocated * zeroed via shmfs (and so this memset is redundant when @@ -1421,17 +1417,17 @@ static void gen7_init_oa_buffer(struct i915_perf_stream *stream) static void gen8_init_oa_buffer(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct intel_uncore *uncore = stream->gt->uncore; u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); unsigned long flags; spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); - I915_WRITE(GEN8_OASTATUS, 0); - I915_WRITE(GEN8_OAHEADPTR, gtt_offset); + intel_uncore_write(uncore, GEN8_OASTATUS, 0); + intel_uncore_write(uncore, GEN8_OAHEADPTR, gtt_offset); stream->oa_buffer.head = gtt_offset; - I915_WRITE(GEN8_OABUFFER_UDW, 0); + intel_uncore_write(uncore, GEN8_OABUFFER_UDW, 0); /* * PRM says: @@ -1441,9 +1437,9 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream) * to enable proper functionality of the overflow * bit." */ - I915_WRITE(GEN8_OABUFFER, gtt_offset | + intel_uncore_write(uncore, GEN8_OABUFFER, gtt_offset | OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT); - I915_WRITE(GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK); + intel_uncore_write(uncore, GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK); /* Mark that we need updated tail pointers to read from... */ stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR; @@ -1482,7 +1478,6 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream) static int alloc_oa_buffer(struct i915_perf_stream *stream) { struct drm_i915_gem_object *bo; - struct drm_i915_private *dev_priv = stream->dev_priv; struct i915_vma *vma; int ret; @@ -1492,7 +1487,7 @@ static int alloc_oa_buffer(struct i915_perf_stream *stream) BUILD_BUG_ON_NOT_POWER_OF_2(OA_BUFFER_SIZE); BUILD_BUG_ON(OA_BUFFER_SIZE < SZ_128K || OA_BUFFER_SIZE > SZ_16M); - bo = i915_gem_object_create_shmem(dev_priv, OA_BUFFER_SIZE); + bo = i915_gem_object_create_shmem(stream->perf->i915, OA_BUFFER_SIZE); if (IS_ERR(bo)) { DRM_ERROR("Failed to allocate OA buffer\n"); return PTR_ERR(bo); @@ -1533,7 +1528,7 @@ static int alloc_oa_buffer(struct i915_perf_stream *stream) return ret; } -static void config_oa_regs(struct drm_i915_private *dev_priv, +static void config_oa_regs(struct intel_uncore *uncore, const struct i915_oa_reg *regs, u32 n_regs) { @@ -1542,7 +1537,7 @@ static void config_oa_regs(struct drm_i915_private *dev_priv, for (i = 0; i < n_regs; i++) { const struct i915_oa_reg *reg = regs + i; - I915_WRITE(reg->addr, reg->value); + intel_uncore_write(uncore, reg->addr, reg->value); } } @@ -1575,7 +1570,7 @@ static void delay_after_mux(void) static int hsw_enable_metric_set(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct intel_uncore *uncore = stream->gt->uncore; const struct i915_oa_config *oa_config = stream->oa_config; /* @@ -1588,15 +1583,15 @@ static int hsw_enable_metric_set(struct i915_perf_stream *stream) * count the events from non-render domain. Unit level clock * gating for RCS should also be disabled. */ - I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) & - ~GEN7_DOP_CLOCK_GATE_ENABLE)); - I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) | - GEN6_CSUNIT_CLOCK_GATE_DISABLE)); + intel_uncore_rmw(uncore, GEN7_MISCCPCTL, + GEN7_DOP_CLOCK_GATE_ENABLE, 0); + intel_uncore_rmw(uncore, GEN6_UCGCTL1, + 0, GEN6_CSUNIT_CLOCK_GATE_DISABLE); - config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len); + config_oa_regs(uncore, oa_config->mux_regs, oa_config->mux_regs_len); delay_after_mux(); - config_oa_regs(dev_priv, oa_config->b_counter_regs, + config_oa_regs(uncore, oa_config->b_counter_regs, oa_config->b_counter_regs_len); return 0; @@ -1604,15 +1599,14 @@ static int hsw_enable_metric_set(struct i915_perf_stream *stream) static void hsw_disable_metric_set(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct intel_uncore *uncore = stream->gt->uncore; - I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) & - ~GEN6_CSUNIT_CLOCK_GATE_DISABLE)); - I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) | - GEN7_DOP_CLOCK_GATE_ENABLE)); + intel_uncore_rmw(uncore, GEN6_UCGCTL1, + GEN6_CSUNIT_CLOCK_GATE_DISABLE, 0); + intel_uncore_rmw(uncore, GEN7_MISCCPCTL, + 0, GEN7_DOP_CLOCK_GATE_ENABLE); - I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) & - ~GT_NOA_ENABLE)); + intel_uncore_rmw(uncore, GDT_CHICKEN_BITS, GT_NOA_ENABLE, 0); } static u32 oa_config_flex_reg(const struct i915_oa_config *oa_config, @@ -1647,9 +1641,8 @@ static void gen8_update_reg_state_unlocked(const struct intel_context *ce, const struct i915_perf_stream *stream) { - struct drm_i915_private *i915 = ce->engine->i915; - u32 ctx_oactxctrl = i915->perf.ctx_oactxctrl_offset; - u32 ctx_flexeu0 = i915->perf.ctx_flexeu0_offset; + u32 ctx_oactxctrl = stream->perf->ctx_oactxctrl_offset; + u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset; /* The MMIO offsets for Flex EU registers aren't contiguous */ i915_reg_t flex_regs[] = { EU_PERF_CNTL0, @@ -1672,7 +1665,8 @@ gen8_update_reg_state_unlocked(const struct intel_context *ce, reg_state[ctx_flexeu0 + i * 2 + 1] = oa_config_flex_reg(stream->oa_config, flex_regs[i]); - reg_state[CTX_R_PWR_CLK_STATE] = intel_sseu_make_rpcs(i915, &ce->sseu); + reg_state[CTX_R_PWR_CLK_STATE] = + intel_sseu_make_rpcs(ce->engine->i915, &ce->sseu); } struct flex { @@ -1827,9 +1821,9 @@ static int gen8_configure_context(struct i915_gem_context *ctx, static int gen8_configure_all_contexts(struct i915_perf_stream *stream, const struct i915_oa_config *oa_config) { - struct drm_i915_private *i915 = stream->dev_priv; + struct drm_i915_private *i915 = stream->perf->i915; /* The MMIO offsets for Flex EU registers aren't contiguous */ - const u32 ctx_flexeu0 = i915->perf.ctx_flexeu0_offset; + const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset; #define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1) struct flex regs[] = { { @@ -1838,7 +1832,7 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream, }, { GEN8_OACTXCONTROL, - i915->perf.ctx_oactxctrl_offset + 1, + stream->perf->ctx_oactxctrl_offset + 1, ((stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | GEN8_OA_COUNTER_RESUME) @@ -1922,7 +1916,7 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream, static int gen8_enable_metric_set(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct intel_uncore *uncore = stream->gt->uncore; const struct i915_oa_config *oa_config = stream->oa_config; int ret; @@ -1949,10 +1943,10 @@ static int gen8_enable_metric_set(struct i915_perf_stream *stream) * be read back from automatically triggered reports, as part of the * RPT_ID field. */ - if (IS_GEN_RANGE(dev_priv, 9, 11)) { - I915_WRITE(GEN8_OA_DEBUG, - _MASKED_BIT_ENABLE(GEN9_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS | - GEN9_OA_DEBUG_INCLUDE_CLK_RATIO)); + if (IS_GEN_RANGE(stream->perf->i915, 9, 11)) { + intel_uncore_write(uncore, GEN8_OA_DEBUG, + _MASKED_BIT_ENABLE(GEN9_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS | + GEN9_OA_DEBUG_INCLUDE_CLK_RATIO)); } /* @@ -1964,10 +1958,10 @@ static int gen8_enable_metric_set(struct i915_perf_stream *stream) if (ret) return ret; - config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len); + config_oa_regs(uncore, oa_config->mux_regs, oa_config->mux_regs_len); delay_after_mux(); - config_oa_regs(dev_priv, oa_config->b_counter_regs, + config_oa_regs(uncore, oa_config->b_counter_regs, oa_config->b_counter_regs_len); return 0; @@ -1975,30 +1969,28 @@ static int gen8_enable_metric_set(struct i915_perf_stream *stream) static void gen8_disable_metric_set(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct intel_uncore *uncore = stream->gt->uncore; /* Reset all contexts' slices/subslices configurations. */ gen8_configure_all_contexts(stream, NULL); - I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) & - ~GT_NOA_ENABLE)); + intel_uncore_rmw(uncore, GDT_CHICKEN_BITS, GT_NOA_ENABLE, 0); } static void gen10_disable_metric_set(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct intel_uncore *uncore = stream->gt->uncore; /* Reset all contexts' slices/subslices configurations. */ gen8_configure_all_contexts(stream, NULL); /* Make sure we disable noa to save power. */ - I915_WRITE(RPM_CONFIG1, - I915_READ(RPM_CONFIG1) & ~GEN10_GT_NOA_ENABLE); + intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0); } static void gen7_oa_enable(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct intel_uncore *uncore = stream->gt->uncore; struct i915_gem_context *ctx = stream->ctx; u32 ctx_id = stream->specific_ctx_id; bool periodic = stream->periodic; @@ -2016,19 +2008,19 @@ static void gen7_oa_enable(struct i915_perf_stream *stream) */ gen7_init_oa_buffer(stream); - I915_WRITE(GEN7_OACONTROL, - (ctx_id & GEN7_OACONTROL_CTX_MASK) | - (period_exponent << - GEN7_OACONTROL_TIMER_PERIOD_SHIFT) | - (periodic ? GEN7_OACONTROL_TIMER_ENABLE : 0) | - (report_format << GEN7_OACONTROL_FORMAT_SHIFT) | - (ctx ? GEN7_OACONTROL_PER_CTX_ENABLE : 0) | - GEN7_OACONTROL_ENABLE); + intel_uncore_write(uncore, GEN7_OACONTROL, + (ctx_id & GEN7_OACONTROL_CTX_MASK) | + (period_exponent << + GEN7_OACONTROL_TIMER_PERIOD_SHIFT) | + (periodic ? GEN7_OACONTROL_TIMER_ENABLE : 0) | + (report_format << GEN7_OACONTROL_FORMAT_SHIFT) | + (ctx ? GEN7_OACONTROL_PER_CTX_ENABLE : 0) | + GEN7_OACONTROL_ENABLE); } static void gen8_oa_enable(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct intel_uncore *uncore = stream->gt->uncore; u32 report_format = stream->oa_buffer.format; /* @@ -2047,9 +2039,9 @@ static void gen8_oa_enable(struct i915_perf_stream *stream) * filtering and instead filter on the cpu based on the context-id * field of reports */ - I915_WRITE(GEN8_OACONTROL, (report_format << - GEN8_OA_REPORT_FORMAT_SHIFT) | - GEN8_OA_COUNTER_ENABLE); + intel_uncore_write(uncore, GEN8_OACONTROL, + (report_format << GEN8_OA_REPORT_FORMAT_SHIFT) | + GEN8_OA_COUNTER_ENABLE); } /** @@ -2063,9 +2055,7 @@ static void gen8_oa_enable(struct i915_perf_stream *stream) */ static void i915_oa_stream_enable(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; - - dev_priv->perf.ops.oa_enable(stream); + stream->perf->ops.oa_enable(stream); if (stream->periodic) hrtimer_start(&stream->poll_check_timer, @@ -2075,7 +2065,7 @@ static void i915_oa_stream_enable(struct i915_perf_stream *stream) static void gen7_oa_disable(struct i915_perf_stream *stream) { - struct intel_uncore *uncore = &stream->dev_priv->uncore; + struct intel_uncore *uncore = stream->gt->uncore; intel_uncore_write(uncore, GEN7_OACONTROL, 0); if (intel_wait_for_register(uncore, @@ -2086,7 +2076,7 @@ static void gen7_oa_disable(struct i915_perf_stream *stream) static void gen8_oa_disable(struct i915_perf_stream *stream) { - struct intel_uncore *uncore = &stream->dev_priv->uncore; + struct intel_uncore *uncore = stream->gt->uncore; intel_uncore_write(uncore, GEN8_OACONTROL, 0); if (intel_wait_for_register(uncore, @@ -2105,9 +2095,7 @@ static void gen8_oa_disable(struct i915_perf_stream *stream) */ static void i915_oa_stream_disable(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; - - dev_priv->perf.ops.oa_disable(stream); + stream->perf->ops.oa_disable(stream); if (stream->periodic) hrtimer_cancel(&stream->poll_check_timer); @@ -2144,7 +2132,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, struct drm_i915_perf_open_param *param, struct perf_open_properties *props) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct i915_perf *perf = stream->perf; int format_size; int ret; @@ -2152,7 +2140,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, * reason then don't let userspace try their luck with config * IDs */ - if (!dev_priv->perf.metrics_kobj) { + if (!perf->metrics_kobj) { DRM_DEBUG("OA metrics weren't advertised via sysfs\n"); return -EINVAL; } @@ -2162,7 +2150,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, return -EINVAL; } - if (!dev_priv->perf.ops.enable_metric_set) { + if (!perf->ops.enable_metric_set) { DRM_DEBUG("OA unit not supported\n"); return -ENODEV; } @@ -2171,7 +2159,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, * counter reports and marshal to the appropriate client * we currently only allow exclusive access */ - if (dev_priv->perf.exclusive_stream) { + if (perf->exclusive_stream) { DRM_DEBUG("OA unit already in use\n"); return -EBUSY; } @@ -2183,7 +2171,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, stream->sample_size = sizeof(struct drm_i915_perf_record_header); - format_size = dev_priv->perf.oa_formats[props->oa_format].size; + format_size = perf->oa_formats[props->oa_format].size; stream->sample_flags |= SAMPLE_OA_REPORT; stream->sample_size += format_size; @@ -2193,7 +2181,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, return -EINVAL; stream->oa_buffer.format = - dev_priv->perf.oa_formats[props->oa_format].format; + perf->oa_formats[props->oa_format].format; stream->periodic = props->oa_periodic; if (stream->periodic) @@ -2207,7 +2195,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, } } - ret = get_oa_config(dev_priv, props->metrics_set, &stream->oa_config); + ret = get_oa_config(perf, props->metrics_set, &stream->oa_config); if (ret) { DRM_DEBUG("Invalid OA config id=%i\n", props->metrics_set); goto err_config; @@ -2225,27 +2213,27 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, * In our case we are expecting that taking pm + FORCEWAKE * references will effectively disable RC6. */ - stream->wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); + stream->wakeref = intel_runtime_pm_get(stream->gt->uncore->rpm); + intel_uncore_forcewake_get(stream->gt->uncore, FORCEWAKE_ALL); ret = alloc_oa_buffer(stream); if (ret) goto err_oa_buf_alloc; - ret = i915_mutex_lock_interruptible(&dev_priv->drm); + ret = i915_mutex_lock_interruptible(&stream->perf->i915->drm); if (ret) goto err_lock; stream->ops = &i915_oa_stream_ops; - dev_priv->perf.exclusive_stream = stream; + perf->exclusive_stream = stream; - ret = dev_priv->perf.ops.enable_metric_set(stream); + ret = perf->ops.enable_metric_set(stream); if (ret) { DRM_DEBUG("Unable to enable metric set\n"); goto err_enable; } - mutex_unlock(&dev_priv->drm.struct_mutex); + mutex_unlock(&stream->perf->i915->drm.struct_mutex); hrtimer_init(&stream->poll_check_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); @@ -2256,18 +2244,18 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, return 0; err_enable: - dev_priv->perf.exclusive_stream = NULL; - dev_priv->perf.ops.disable_metric_set(stream); - mutex_unlock(&dev_priv->drm.struct_mutex); + perf->exclusive_stream = NULL; + perf->ops.disable_metric_set(stream); + mutex_unlock(&stream->perf->i915->drm.struct_mutex); err_lock: free_oa_buffer(stream); err_oa_buf_alloc: - put_oa_config(dev_priv, stream->oa_config); + put_oa_config(stream->oa_config); - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); - intel_runtime_pm_put(&dev_priv->runtime_pm, stream->wakeref); + intel_uncore_forcewake_put(stream->gt->uncore, FORCEWAKE_ALL); + intel_runtime_pm_put(stream->gt->uncore->rpm, stream->wakeref); err_config: if (stream->ctx) @@ -2359,7 +2347,7 @@ static ssize_t i915_perf_read(struct file *file, loff_t *ppos) { struct i915_perf_stream *stream = file->private_data; - struct drm_i915_private *dev_priv = stream->dev_priv; + struct i915_perf *perf = stream->perf; ssize_t ret; /* To ensure it's handled consistently we simply treat all reads of a @@ -2382,15 +2370,15 @@ static ssize_t i915_perf_read(struct file *file, if (ret) return ret; - mutex_lock(&dev_priv->perf.lock); + mutex_lock(&perf->lock); ret = i915_perf_read_locked(stream, file, buf, count, ppos); - mutex_unlock(&dev_priv->perf.lock); + mutex_unlock(&perf->lock); } while (ret == -EAGAIN); } else { - mutex_lock(&dev_priv->perf.lock); + mutex_lock(&perf->lock); ret = i915_perf_read_locked(stream, file, buf, count, ppos); - mutex_unlock(&dev_priv->perf.lock); + mutex_unlock(&perf->lock); } /* We allow the poll checking to sometimes report false positive EPOLLIN @@ -2428,7 +2416,6 @@ static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer) /** * i915_perf_poll_locked - poll_wait() with a suitable wait queue for stream - * @dev_priv: i915 device instance * @stream: An i915 perf stream * @file: An i915 perf stream file * @wait: poll() state table @@ -2437,15 +2424,14 @@ static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer) * &i915_perf_stream_ops->poll_wait to call poll_wait() with a wait queue that * will be woken for new stream data. * - * Note: The &drm_i915_private->perf.lock mutex has been taken to serialize + * Note: The &perf->lock mutex has been taken to serialize * with any non-file-operation driver hooks. * * Returns: any poll events that are ready without sleeping */ -static __poll_t i915_perf_poll_locked(struct drm_i915_private *dev_priv, - struct i915_perf_stream *stream, - struct file *file, - poll_table *wait) +static __poll_t i915_perf_poll_locked(struct i915_perf_stream *stream, + struct file *file, + poll_table *wait) { __poll_t events = 0; @@ -2479,12 +2465,12 @@ static __poll_t i915_perf_poll_locked(struct drm_i915_private *dev_priv, static __poll_t i915_perf_poll(struct file *file, poll_table *wait) { struct i915_perf_stream *stream = file->private_data; - struct drm_i915_private *dev_priv = stream->dev_priv; + struct i915_perf *perf = stream->perf; __poll_t ret; - mutex_lock(&dev_priv->perf.lock); - ret = i915_perf_poll_locked(dev_priv, stream, file, wait); - mutex_unlock(&dev_priv->perf.lock); + mutex_lock(&perf->lock); + ret = i915_perf_poll_locked(stream, file, wait); + mutex_unlock(&perf->lock); return ret; } @@ -2543,7 +2529,7 @@ static void i915_perf_disable_locked(struct i915_perf_stream *stream) * @cmd: the ioctl request * @arg: the ioctl data * - * Note: The &drm_i915_private->perf.lock mutex has been taken to serialize + * Note: The &perf->lock mutex has been taken to serialize * with any non-file-operation driver hooks. * * Returns: zero on success or a negative error code. Returns -EINVAL for @@ -2581,12 +2567,12 @@ static long i915_perf_ioctl(struct file *file, unsigned long arg) { struct i915_perf_stream *stream = file->private_data; - struct drm_i915_private *dev_priv = stream->dev_priv; + struct i915_perf *perf = stream->perf; long ret; - mutex_lock(&dev_priv->perf.lock); + mutex_lock(&perf->lock); ret = i915_perf_ioctl_locked(stream, cmd, arg); - mutex_unlock(&dev_priv->perf.lock); + mutex_unlock(&perf->lock); return ret; } @@ -2598,7 +2584,7 @@ static long i915_perf_ioctl(struct file *file, * Frees all resources associated with the given i915 perf @stream, disabling * any associated data capture in the process. * - * Note: The &drm_i915_private->perf.lock mutex has been taken to serialize + * Note: The &perf->lock mutex has been taken to serialize * with any non-file-operation driver hooks. */ static void i915_perf_destroy_locked(struct i915_perf_stream *stream) @@ -2631,14 +2617,14 @@ static void i915_perf_destroy_locked(struct i915_perf_stream *stream) static int i915_perf_release(struct inode *inode, struct file *file) { struct i915_perf_stream *stream = file->private_data; - struct drm_i915_private *dev_priv = stream->dev_priv; + struct i915_perf *perf = stream->perf; - mutex_lock(&dev_priv->perf.lock); + mutex_lock(&perf->lock); i915_perf_destroy_locked(stream); - mutex_unlock(&dev_priv->perf.lock); + mutex_unlock(&perf->lock); /* Release the reference the perf stream kept on the driver. */ - drm_dev_put(&dev_priv->drm); + drm_dev_put(&perf->i915->drm); return 0; } @@ -2660,7 +2646,7 @@ static const struct file_operations fops = { /** * i915_perf_open_ioctl_locked - DRM ioctl() for userspace to open a stream FD - * @dev_priv: i915 device instance + * @perf: i915 perf instance * @param: The open parameters passed to 'DRM_I915_PERF_OPEN` * @props: individually validated u64 property value pairs * @file: drm file @@ -2668,7 +2654,7 @@ static const struct file_operations fops = { * See i915_perf_ioctl_open() for interface details. * * Implements further stream config validation and stream initialization on - * behalf of i915_perf_open_ioctl() with the &drm_i915_private->perf.lock mutex + * behalf of i915_perf_open_ioctl() with the &perf->lock mutex * taken to serialize with any non-file-operation driver hooks. * * Note: at this point the @props have only been validated in isolation and @@ -2683,7 +2669,7 @@ static const struct file_operations fops = { * Returns: zero on success or a negative error code. */ static int -i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, +i915_perf_open_ioctl_locked(struct i915_perf *perf, struct drm_i915_perf_open_param *param, struct perf_open_properties *props, struct drm_file *file) @@ -2722,7 +2708,7 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, * MI_REPORT_PERF_COUNT commands and so consider it a privileged op to * enable the OA unit by default. */ - if (IS_HASWELL(dev_priv) && specific_ctx) + if (IS_HASWELL(perf->i915) && specific_ctx) privileged_op = false; /* Similar to perf's kernel.perf_paranoid_cpu sysctl option @@ -2743,7 +2729,8 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, goto err_ctx; } - stream->dev_priv = dev_priv; + stream->perf = perf; + stream->gt = &perf->i915->gt; stream->ctx = specific_ctx; ret = i915_oa_stream_init(stream, param, props); @@ -2759,7 +2746,7 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, goto err_flags; } - list_add(&stream->link, &dev_priv->perf.streams); + list_add(&stream->link, &perf->streams); if (param->flags & I915_PERF_FLAG_FD_CLOEXEC) f_flags |= O_CLOEXEC; @@ -2778,7 +2765,7 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, /* Take a reference on the driver that will be kept with stream_fd * until its release. */ - drm_dev_get(&dev_priv->drm); + drm_dev_get(&perf->i915->drm); return stream_fd; @@ -2796,15 +2783,15 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, return ret; } -static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent) +static u64 oa_exponent_to_ns(struct i915_perf *perf, int exponent) { return div64_u64(1000000000ULL * (2ULL << exponent), - 1000ULL * RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_khz); + 1000ULL * RUNTIME_INFO(perf->i915)->cs_timestamp_frequency_khz); } /** * read_properties_unlocked - validate + copy userspace stream open properties - * @dev_priv: i915 device instance + * @perf: i915 perf instance * @uprops: The array of u64 key value pairs given by userspace * @n_props: The number of key value pairs expected in @uprops * @props: The stream configuration built up while validating properties @@ -2817,7 +2804,7 @@ static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent) * we shouldn't validate or assume anything about ordering here. This doesn't * rule out defining new properties with ordering requirements in the future. */ -static int read_properties_unlocked(struct drm_i915_private *dev_priv, +static int read_properties_unlocked(struct i915_perf *perf, u64 __user *uprops, u32 n_props, struct perf_open_properties *props) @@ -2883,7 +2870,7 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, value); return -EINVAL; } - if (!dev_priv->perf.oa_formats[value].size) { + if (!perf->oa_formats[value].size) { DRM_DEBUG("Unsupported OA report format %llu\n", value); return -EINVAL; @@ -2904,7 +2891,7 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, */ BUILD_BUG_ON(sizeof(oa_period) != 8); - oa_period = oa_exponent_to_ns(dev_priv, value); + oa_period = oa_exponent_to_ns(perf, value); /* This check is primarily to ensure that oa_period <= * UINT32_MAX (before passing to do_div which only @@ -2958,7 +2945,7 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, * mutex to avoid an awkward lockdep with mmap_sem. * * Most of the implementation details are handled by - * i915_perf_open_ioctl_locked() after taking the &drm_i915_private->perf.lock + * i915_perf_open_ioctl_locked() after taking the &perf->lock * mutex for serializing with any non-file-operation driver hooks. * * Return: A newly opened i915 Perf stream file descriptor or negative @@ -2967,13 +2954,13 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, int i915_perf_open_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct i915_perf *perf = &to_i915(dev)->perf; struct drm_i915_perf_open_param *param = data; struct perf_open_properties props; u32 known_open_flags; int ret; - if (!dev_priv->perf.initialized) { + if (!perf->i915) { DRM_DEBUG("i915 perf interface not available for this system\n"); return -ENOTSUPP; } @@ -2986,124 +2973,127 @@ int i915_perf_open_ioctl(struct drm_device *dev, void *data, return -EINVAL; } - ret = read_properties_unlocked(dev_priv, + ret = read_properties_unlocked(perf, u64_to_user_ptr(param->properties_ptr), param->num_properties, &props); if (ret) return ret; - mutex_lock(&dev_priv->perf.lock); - ret = i915_perf_open_ioctl_locked(dev_priv, param, &props, file); - mutex_unlock(&dev_priv->perf.lock); + mutex_lock(&perf->lock); + ret = i915_perf_open_ioctl_locked(perf, param, &props, file); + mutex_unlock(&perf->lock); return ret; } /** * i915_perf_register - exposes i915-perf to userspace - * @dev_priv: i915 device instance + * @i915: i915 device instance * * In particular OA metric sets are advertised under a sysfs metrics/ * directory allowing userspace to enumerate valid IDs that can be * used to open an i915-perf stream. */ -void i915_perf_register(struct drm_i915_private *dev_priv) +void i915_perf_register(struct drm_i915_private *i915) { + struct i915_perf *perf = &i915->perf; int ret; - if (!dev_priv->perf.initialized) + if (!perf->i915) return; /* To be sure we're synchronized with an attempted * i915_perf_open_ioctl(); considering that we register after * being exposed to userspace. */ - mutex_lock(&dev_priv->perf.lock); + mutex_lock(&perf->lock); - dev_priv->perf.metrics_kobj = + perf->metrics_kobj = kobject_create_and_add("metrics", - &dev_priv->drm.primary->kdev->kobj); - if (!dev_priv->perf.metrics_kobj) + &i915->drm.primary->kdev->kobj); + if (!perf->metrics_kobj) goto exit; - sysfs_attr_init(&dev_priv->perf.test_config.sysfs_metric_id.attr); + sysfs_attr_init(&perf->test_config.sysfs_metric_id.attr); - if (INTEL_GEN(dev_priv) >= 11) { - i915_perf_load_test_config_icl(dev_priv); - } else if (IS_CANNONLAKE(dev_priv)) { - i915_perf_load_test_config_cnl(dev_priv); - } else if (IS_COFFEELAKE(dev_priv)) { - if (IS_CFL_GT2(dev_priv)) - i915_perf_load_test_config_cflgt2(dev_priv); - if (IS_CFL_GT3(dev_priv)) - i915_perf_load_test_config_cflgt3(dev_priv); - } else if (IS_GEMINILAKE(dev_priv)) { - i915_perf_load_test_config_glk(dev_priv); - } else if (IS_KABYLAKE(dev_priv)) { - if (IS_KBL_GT2(dev_priv)) - i915_perf_load_test_config_kblgt2(dev_priv); - else if (IS_KBL_GT3(dev_priv)) - i915_perf_load_test_config_kblgt3(dev_priv); - } else if (IS_BROXTON(dev_priv)) { - i915_perf_load_test_config_bxt(dev_priv); - } else if (IS_SKYLAKE(dev_priv)) { - if (IS_SKL_GT2(dev_priv)) - i915_perf_load_test_config_sklgt2(dev_priv); - else if (IS_SKL_GT3(dev_priv)) - i915_perf_load_test_config_sklgt3(dev_priv); - else if (IS_SKL_GT4(dev_priv)) - i915_perf_load_test_config_sklgt4(dev_priv); - } else if (IS_CHERRYVIEW(dev_priv)) { - i915_perf_load_test_config_chv(dev_priv); - } else if (IS_BROADWELL(dev_priv)) { - i915_perf_load_test_config_bdw(dev_priv); - } else if (IS_HASWELL(dev_priv)) { - i915_perf_load_test_config_hsw(dev_priv); -} + if (INTEL_GEN(i915) >= 11) { + i915_perf_load_test_config_icl(i915); + } else if (IS_CANNONLAKE(i915)) { + i915_perf_load_test_config_cnl(i915); + } else if (IS_COFFEELAKE(i915)) { + if (IS_CFL_GT2(i915)) + i915_perf_load_test_config_cflgt2(i915); + if (IS_CFL_GT3(i915)) + i915_perf_load_test_config_cflgt3(i915); + } else if (IS_GEMINILAKE(i915)) { + i915_perf_load_test_config_glk(i915); + } else if (IS_KABYLAKE(i915)) { + if (IS_KBL_GT2(i915)) + i915_perf_load_test_config_kblgt2(i915); + else if (IS_KBL_GT3(i915)) + i915_perf_load_test_config_kblgt3(i915); + } else if (IS_BROXTON(i915)) { + i915_perf_load_test_config_bxt(i915); + } else if (IS_SKYLAKE(i915)) { + if (IS_SKL_GT2(i915)) + i915_perf_load_test_config_sklgt2(i915); + else if (IS_SKL_GT3(i915)) + i915_perf_load_test_config_sklgt3(i915); + else if (IS_SKL_GT4(i915)) + i915_perf_load_test_config_sklgt4(i915); + } else if (IS_CHERRYVIEW(i915)) { + i915_perf_load_test_config_chv(i915); + } else if (IS_BROADWELL(i915)) { + i915_perf_load_test_config_bdw(i915); + } else if (IS_HASWELL(i915)) { + i915_perf_load_test_config_hsw(i915); + } - if (dev_priv->perf.test_config.id == 0) + if (perf->test_config.id == 0) goto sysfs_error; - ret = sysfs_create_group(dev_priv->perf.metrics_kobj, - &dev_priv->perf.test_config.sysfs_metric); + ret = sysfs_create_group(perf->metrics_kobj, + &perf->test_config.sysfs_metric); if (ret) goto sysfs_error; - atomic_set(&dev_priv->perf.test_config.ref_count, 1); + atomic_set(&perf->test_config.ref_count, 1); goto exit; sysfs_error: - kobject_put(dev_priv->perf.metrics_kobj); - dev_priv->perf.metrics_kobj = NULL; + kobject_put(perf->metrics_kobj); + perf->metrics_kobj = NULL; exit: - mutex_unlock(&dev_priv->perf.lock); + mutex_unlock(&perf->lock); } /** * i915_perf_unregister - hide i915-perf from userspace - * @dev_priv: i915 device instance + * @i915: i915 device instance * * i915-perf state cleanup is split up into an 'unregister' and * 'deinit' phase where the interface is first hidden from * userspace by i915_perf_unregister() before cleaning up * remaining state in i915_perf_fini(). */ -void i915_perf_unregister(struct drm_i915_private *dev_priv) +void i915_perf_unregister(struct drm_i915_private *i915) { - if (!dev_priv->perf.metrics_kobj) + struct i915_perf *perf = &i915->perf; + + if (!perf->metrics_kobj) return; - sysfs_remove_group(dev_priv->perf.metrics_kobj, - &dev_priv->perf.test_config.sysfs_metric); + sysfs_remove_group(perf->metrics_kobj, + &perf->test_config.sysfs_metric); - kobject_put(dev_priv->perf.metrics_kobj); - dev_priv->perf.metrics_kobj = NULL; + kobject_put(perf->metrics_kobj); + perf->metrics_kobj = NULL; } -static bool gen8_is_valid_flex_addr(struct drm_i915_private *dev_priv, u32 addr) +static bool gen8_is_valid_flex_addr(struct i915_perf *perf, u32 addr) { static const i915_reg_t flex_eu_regs[] = { EU_PERF_CNTL0, @@ -3123,7 +3113,7 @@ static bool gen8_is_valid_flex_addr(struct drm_i915_private *dev_priv, u32 addr) return false; } -static bool gen7_is_valid_b_counter_addr(struct drm_i915_private *dev_priv, u32 addr) +static bool gen7_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr) { return (addr >= i915_mmio_reg_offset(OASTARTTRIG1) && addr <= i915_mmio_reg_offset(OASTARTTRIG8)) || @@ -3133,7 +3123,7 @@ static bool gen7_is_valid_b_counter_addr(struct drm_i915_private *dev_priv, u32 addr <= i915_mmio_reg_offset(OACEC7_1)); } -static bool gen7_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) +static bool gen7_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { return addr == i915_mmio_reg_offset(HALF_SLICE_CHICKEN2) || (addr >= i915_mmio_reg_offset(MICRO_BP0_0) && @@ -3144,34 +3134,34 @@ static bool gen7_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) addr <= i915_mmio_reg_offset(OA_PERFMATRIX_HI)); } -static bool gen8_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) +static bool gen8_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { - return gen7_is_valid_mux_addr(dev_priv, addr) || + return gen7_is_valid_mux_addr(perf, addr) || addr == i915_mmio_reg_offset(WAIT_FOR_RC6_EXIT) || (addr >= i915_mmio_reg_offset(RPM_CONFIG0) && addr <= i915_mmio_reg_offset(NOA_CONFIG(8))); } -static bool gen10_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) +static bool gen10_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { - return gen8_is_valid_mux_addr(dev_priv, addr) || + return gen8_is_valid_mux_addr(perf, addr) || addr == i915_mmio_reg_offset(GEN10_NOA_WRITE_HIGH) || (addr >= i915_mmio_reg_offset(OA_PERFCNT3_LO) && addr <= i915_mmio_reg_offset(OA_PERFCNT4_HI)); } -static bool hsw_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) +static bool hsw_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { - return gen7_is_valid_mux_addr(dev_priv, addr) || + return gen7_is_valid_mux_addr(perf, addr) || (addr >= 0x25100 && addr <= 0x2FF90) || (addr >= i915_mmio_reg_offset(HSW_MBVID2_NOA0) && addr <= i915_mmio_reg_offset(HSW_MBVID2_NOA9)) || addr == i915_mmio_reg_offset(HSW_MBVID2_MISR0); } -static bool chv_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) +static bool chv_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { - return gen7_is_valid_mux_addr(dev_priv, addr) || + return gen7_is_valid_mux_addr(perf, addr) || (addr >= 0x182300 && addr <= 0x1823A4); } @@ -3194,8 +3184,8 @@ static u32 mask_reg_value(u32 reg, u32 val) return val; } -static struct i915_oa_reg *alloc_oa_regs(struct drm_i915_private *dev_priv, - bool (*is_valid)(struct drm_i915_private *dev_priv, u32 addr), +static struct i915_oa_reg *alloc_oa_regs(struct i915_perf *perf, + bool (*is_valid)(struct i915_perf *perf, u32 addr), u32 __user *regs, u32 n_regs) { @@ -3225,7 +3215,7 @@ static struct i915_oa_reg *alloc_oa_regs(struct drm_i915_private *dev_priv, if (err) goto addr_err; - if (!is_valid(dev_priv, addr)) { + if (!is_valid(perf, addr)) { DRM_DEBUG("Invalid oa_reg address: %X\n", addr); err = -EINVAL; goto addr_err; @@ -3258,7 +3248,7 @@ static ssize_t show_dynamic_id(struct device *dev, return sprintf(buf, "%d\n", oa_config->id); } -static int create_dynamic_oa_sysfs_entry(struct drm_i915_private *dev_priv, +static int create_dynamic_oa_sysfs_entry(struct i915_perf *perf, struct i915_oa_config *oa_config) { sysfs_attr_init(&oa_config->sysfs_metric_id.attr); @@ -3273,7 +3263,7 @@ static int create_dynamic_oa_sysfs_entry(struct drm_i915_private *dev_priv, oa_config->sysfs_metric.name = oa_config->uuid; oa_config->sysfs_metric.attrs = oa_config->attrs; - return sysfs_create_group(dev_priv->perf.metrics_kobj, + return sysfs_create_group(perf->metrics_kobj, &oa_config->sysfs_metric); } @@ -3293,17 +3283,17 @@ static int create_dynamic_oa_sysfs_entry(struct drm_i915_private *dev_priv, int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct i915_perf *perf = &to_i915(dev)->perf; struct drm_i915_perf_oa_config *args = data; struct i915_oa_config *oa_config, *tmp; int err, id; - if (!dev_priv->perf.initialized) { + if (!perf->i915) { DRM_DEBUG("i915 perf interface not available for this system\n"); return -ENOTSUPP; } - if (!dev_priv->perf.metrics_kobj) { + if (!perf->metrics_kobj) { DRM_DEBUG("OA metrics weren't advertised via sysfs\n"); return -EINVAL; } @@ -3341,8 +3331,8 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, oa_config->mux_regs_len = args->n_mux_regs; oa_config->mux_regs = - alloc_oa_regs(dev_priv, - dev_priv->perf.ops.is_valid_mux_reg, + alloc_oa_regs(perf, + perf->ops.is_valid_mux_reg, u64_to_user_ptr(args->mux_regs_ptr), args->n_mux_regs); @@ -3354,8 +3344,8 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, oa_config->b_counter_regs_len = args->n_boolean_regs; oa_config->b_counter_regs = - alloc_oa_regs(dev_priv, - dev_priv->perf.ops.is_valid_b_counter_reg, + alloc_oa_regs(perf, + perf->ops.is_valid_b_counter_reg, u64_to_user_ptr(args->boolean_regs_ptr), args->n_boolean_regs); @@ -3365,7 +3355,7 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, goto reg_err; } - if (INTEL_GEN(dev_priv) < 8) { + if (INTEL_GEN(perf->i915) < 8) { if (args->n_flex_regs != 0) { err = -EINVAL; goto reg_err; @@ -3373,8 +3363,8 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, } else { oa_config->flex_regs_len = args->n_flex_regs; oa_config->flex_regs = - alloc_oa_regs(dev_priv, - dev_priv->perf.ops.is_valid_flex_reg, + alloc_oa_regs(perf, + perf->ops.is_valid_flex_reg, u64_to_user_ptr(args->flex_regs_ptr), args->n_flex_regs); @@ -3385,14 +3375,14 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, } } - err = mutex_lock_interruptible(&dev_priv->perf.metrics_lock); + err = mutex_lock_interruptible(&perf->metrics_lock); if (err) goto reg_err; /* We shouldn't have too many configs, so this iteration shouldn't be * too costly. */ - idr_for_each_entry(&dev_priv->perf.metrics_idr, tmp, id) { + idr_for_each_entry(&perf->metrics_idr, tmp, id) { if (!strcmp(tmp->uuid, oa_config->uuid)) { DRM_DEBUG("OA config already exists with this uuid\n"); err = -EADDRINUSE; @@ -3400,14 +3390,14 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, } } - err = create_dynamic_oa_sysfs_entry(dev_priv, oa_config); + err = create_dynamic_oa_sysfs_entry(perf, oa_config); if (err) { DRM_DEBUG("Failed to create sysfs entry for OA config\n"); goto sysfs_err; } /* Config id 0 is invalid, id 1 for kernel stored test config. */ - oa_config->id = idr_alloc(&dev_priv->perf.metrics_idr, + oa_config->id = idr_alloc(&perf->metrics_idr, oa_config, 2, 0, GFP_KERNEL); if (oa_config->id < 0) { @@ -3416,16 +3406,16 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, goto sysfs_err; } - mutex_unlock(&dev_priv->perf.metrics_lock); + mutex_unlock(&perf->metrics_lock); DRM_DEBUG("Added config %s id=%i\n", oa_config->uuid, oa_config->id); return oa_config->id; sysfs_err: - mutex_unlock(&dev_priv->perf.metrics_lock); + mutex_unlock(&perf->metrics_lock); reg_err: - put_oa_config(dev_priv, oa_config); + put_oa_config(oa_config); DRM_DEBUG("Failed to add new OA config\n"); return err; } @@ -3444,12 +3434,12 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct i915_perf *perf = &to_i915(dev)->perf; u64 *arg = data; struct i915_oa_config *oa_config; int ret; - if (!dev_priv->perf.initialized) { + if (!perf->i915) { DRM_DEBUG("i915 perf interface not available for this system\n"); return -ENOTSUPP; } @@ -3459,11 +3449,11 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, return -EACCES; } - ret = mutex_lock_interruptible(&dev_priv->perf.metrics_lock); + ret = mutex_lock_interruptible(&perf->metrics_lock); if (ret) goto lock_err; - oa_config = idr_find(&dev_priv->perf.metrics_idr, *arg); + oa_config = idr_find(&perf->metrics_idr, *arg); if (!oa_config) { DRM_DEBUG("Failed to remove unknown OA config\n"); ret = -ENOENT; @@ -3472,17 +3462,17 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, GEM_BUG_ON(*arg != oa_config->id); - sysfs_remove_group(dev_priv->perf.metrics_kobj, + sysfs_remove_group(perf->metrics_kobj, &oa_config->sysfs_metric); - idr_remove(&dev_priv->perf.metrics_idr, *arg); + idr_remove(&perf->metrics_idr, *arg); DRM_DEBUG("Removed config %s id=%i\n", oa_config->uuid, oa_config->id); - put_oa_config(dev_priv, oa_config); + put_oa_config(oa_config); config_err: - mutex_unlock(&dev_priv->perf.metrics_lock); + mutex_unlock(&perf->metrics_lock); lock_err: return ret; } @@ -3531,103 +3521,104 @@ static struct ctl_table dev_root[] = { /** * i915_perf_init - initialize i915-perf state on module load - * @dev_priv: i915 device instance + * @i915: i915 device instance * * Initializes i915-perf state without exposing anything to userspace. * * Note: i915-perf initialization is split into an 'init' and 'register' * phase with the i915_perf_register() exposing state to userspace. */ -void i915_perf_init(struct drm_i915_private *dev_priv) +void i915_perf_init(struct drm_i915_private *i915) { - if (IS_HASWELL(dev_priv)) { - dev_priv->perf.ops.is_valid_b_counter_reg = - gen7_is_valid_b_counter_addr; - dev_priv->perf.ops.is_valid_mux_reg = - hsw_is_valid_mux_addr; - dev_priv->perf.ops.is_valid_flex_reg = NULL; - dev_priv->perf.ops.enable_metric_set = hsw_enable_metric_set; - dev_priv->perf.ops.disable_metric_set = hsw_disable_metric_set; - dev_priv->perf.ops.oa_enable = gen7_oa_enable; - dev_priv->perf.ops.oa_disable = gen7_oa_disable; - dev_priv->perf.ops.read = gen7_oa_read; - dev_priv->perf.ops.oa_hw_tail_read = - gen7_oa_hw_tail_read; + struct i915_perf *perf = &i915->perf; - dev_priv->perf.oa_formats = hsw_oa_formats; - } else if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) { + /* XXX const struct i915_perf_ops! */ + + if (IS_HASWELL(i915)) { + perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr; + perf->ops.is_valid_mux_reg = hsw_is_valid_mux_addr; + perf->ops.is_valid_flex_reg = NULL; + perf->ops.enable_metric_set = hsw_enable_metric_set; + perf->ops.disable_metric_set = hsw_disable_metric_set; + perf->ops.oa_enable = gen7_oa_enable; + perf->ops.oa_disable = gen7_oa_disable; + perf->ops.read = gen7_oa_read; + perf->ops.oa_hw_tail_read = gen7_oa_hw_tail_read; + + perf->oa_formats = hsw_oa_formats; + } else if (HAS_LOGICAL_RING_CONTEXTS(i915)) { /* Note: that although we could theoretically also support the * legacy ringbuffer mode on BDW (and earlier iterations of * this driver, before upstreaming did this) it didn't seem * worth the complexity to maintain now that BDW+ enable * execlist mode by default. */ - dev_priv->perf.oa_formats = gen8_plus_oa_formats; + perf->oa_formats = gen8_plus_oa_formats; - dev_priv->perf.ops.oa_enable = gen8_oa_enable; - dev_priv->perf.ops.oa_disable = gen8_oa_disable; - dev_priv->perf.ops.read = gen8_oa_read; - dev_priv->perf.ops.oa_hw_tail_read = gen8_oa_hw_tail_read; + perf->ops.oa_enable = gen8_oa_enable; + perf->ops.oa_disable = gen8_oa_disable; + perf->ops.read = gen8_oa_read; + perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read; - if (IS_GEN_RANGE(dev_priv, 8, 9)) { - dev_priv->perf.ops.is_valid_b_counter_reg = + if (IS_GEN_RANGE(i915, 8, 9)) { + perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr; - dev_priv->perf.ops.is_valid_mux_reg = + perf->ops.is_valid_mux_reg = gen8_is_valid_mux_addr; - dev_priv->perf.ops.is_valid_flex_reg = + perf->ops.is_valid_flex_reg = gen8_is_valid_flex_addr; - if (IS_CHERRYVIEW(dev_priv)) { - dev_priv->perf.ops.is_valid_mux_reg = + if (IS_CHERRYVIEW(i915)) { + perf->ops.is_valid_mux_reg = chv_is_valid_mux_addr; } - dev_priv->perf.ops.enable_metric_set = gen8_enable_metric_set; - dev_priv->perf.ops.disable_metric_set = gen8_disable_metric_set; + perf->ops.enable_metric_set = gen8_enable_metric_set; + perf->ops.disable_metric_set = gen8_disable_metric_set; - if (IS_GEN(dev_priv, 8)) { - dev_priv->perf.ctx_oactxctrl_offset = 0x120; - dev_priv->perf.ctx_flexeu0_offset = 0x2ce; + if (IS_GEN(i915, 8)) { + perf->ctx_oactxctrl_offset = 0x120; + perf->ctx_flexeu0_offset = 0x2ce; - dev_priv->perf.gen8_valid_ctx_bit = BIT(25); + perf->gen8_valid_ctx_bit = BIT(25); } else { - dev_priv->perf.ctx_oactxctrl_offset = 0x128; - dev_priv->perf.ctx_flexeu0_offset = 0x3de; + perf->ctx_oactxctrl_offset = 0x128; + perf->ctx_flexeu0_offset = 0x3de; - dev_priv->perf.gen8_valid_ctx_bit = BIT(16); + perf->gen8_valid_ctx_bit = BIT(16); } - } else if (IS_GEN_RANGE(dev_priv, 10, 11)) { - dev_priv->perf.ops.is_valid_b_counter_reg = + } else if (IS_GEN_RANGE(i915, 10, 11)) { + perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr; - dev_priv->perf.ops.is_valid_mux_reg = + perf->ops.is_valid_mux_reg = gen10_is_valid_mux_addr; - dev_priv->perf.ops.is_valid_flex_reg = + perf->ops.is_valid_flex_reg = gen8_is_valid_flex_addr; - dev_priv->perf.ops.enable_metric_set = gen8_enable_metric_set; - dev_priv->perf.ops.disable_metric_set = gen10_disable_metric_set; + perf->ops.enable_metric_set = gen8_enable_metric_set; + perf->ops.disable_metric_set = gen10_disable_metric_set; - if (IS_GEN(dev_priv, 10)) { - dev_priv->perf.ctx_oactxctrl_offset = 0x128; - dev_priv->perf.ctx_flexeu0_offset = 0x3de; + if (IS_GEN(i915, 10)) { + perf->ctx_oactxctrl_offset = 0x128; + perf->ctx_flexeu0_offset = 0x3de; } else { - dev_priv->perf.ctx_oactxctrl_offset = 0x124; - dev_priv->perf.ctx_flexeu0_offset = 0x78e; + perf->ctx_oactxctrl_offset = 0x124; + perf->ctx_flexeu0_offset = 0x78e; } - dev_priv->perf.gen8_valid_ctx_bit = BIT(16); + perf->gen8_valid_ctx_bit = BIT(16); } } - if (dev_priv->perf.ops.enable_metric_set) { - INIT_LIST_HEAD(&dev_priv->perf.streams); - mutex_init(&dev_priv->perf.lock); + if (perf->ops.enable_metric_set) { + INIT_LIST_HEAD(&perf->streams); + mutex_init(&perf->lock); oa_sample_rate_hard_limit = 1000 * - (RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_khz / 2); - dev_priv->perf.sysctl_header = register_sysctl_table(dev_root); + (RUNTIME_INFO(i915)->cs_timestamp_frequency_khz / 2); + perf->sysctl_header = register_sysctl_table(dev_root); - mutex_init(&dev_priv->perf.metrics_lock); - idr_init(&dev_priv->perf.metrics_idr); + mutex_init(&perf->metrics_lock); + idr_init(&perf->metrics_idr); /* We set up some ratelimit state to potentially throttle any * _NOTES about spurious, invalid OA reports which we don't @@ -3639,44 +3630,40 @@ void i915_perf_init(struct drm_i915_private *dev_priv) * * Using the same limiting factors as printk_ratelimit() */ - ratelimit_state_init(&dev_priv->perf.spurious_report_rs, - 5 * HZ, 10); + ratelimit_state_init(&perf->spurious_report_rs, 5 * HZ, 10); /* Since we use a DRM_NOTE for spurious reports it would be * inconsistent to let __ratelimit() automatically print a * warning for throttling. */ - ratelimit_set_flags(&dev_priv->perf.spurious_report_rs, + ratelimit_set_flags(&perf->spurious_report_rs, RATELIMIT_MSG_ON_RELEASE); - dev_priv->perf.initialized = true; + perf->i915 = i915; } } static int destroy_config(int id, void *p, void *data) { - struct drm_i915_private *dev_priv = data; - struct i915_oa_config *oa_config = p; - - put_oa_config(dev_priv, oa_config); - + put_oa_config(p); return 0; } /** * i915_perf_fini - Counter part to i915_perf_init() - * @dev_priv: i915 device instance + * @i915: i915 device instance */ -void i915_perf_fini(struct drm_i915_private *dev_priv) +void i915_perf_fini(struct drm_i915_private *i915) { - if (!dev_priv->perf.initialized) + struct i915_perf *perf = &i915->perf; + + if (!perf->i915) return; - idr_for_each(&dev_priv->perf.metrics_idr, destroy_config, dev_priv); - idr_destroy(&dev_priv->perf.metrics_idr); + idr_for_each(&perf->metrics_idr, destroy_config, perf); + idr_destroy(&perf->metrics_idr); - unregister_sysctl_table(dev_priv->perf.sysctl_header); + unregister_sysctl_table(perf->sysctl_header); - memset(&dev_priv->perf.ops, 0, sizeof(dev_priv->perf.ops)); - - dev_priv->perf.initialized = false; + memset(&perf->ops, 0, sizeof(perf->ops)); + perf->i915 = NULL; } diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h index edcab2df74fb..3c6246064a0b 100644 --- a/drivers/gpu/drm/i915/i915_perf_types.h +++ b/drivers/gpu/drm/i915/i915_perf_types.h @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -124,9 +125,14 @@ struct i915_perf_stream_ops { */ struct i915_perf_stream { /** - * @dev_priv: i915 drm device + * @perf: i915_perf backpointer */ - struct drm_i915_private *dev_priv; + struct i915_perf *perf; + + /** + * @gt: intel_gt container + */ + struct intel_gt *gt; /** * @link: Links the stream into ``&drm_i915_private->streams`` @@ -266,20 +272,19 @@ struct i915_oa_ops { * @is_valid_b_counter_reg: Validates register's address for * programming boolean counters for a particular platform. */ - bool (*is_valid_b_counter_reg)(struct drm_i915_private *dev_priv, - u32 addr); + bool (*is_valid_b_counter_reg)(struct i915_perf *perf, u32 addr); /** * @is_valid_mux_reg: Validates register's address for programming mux * for a particular platform. */ - bool (*is_valid_mux_reg)(struct drm_i915_private *dev_priv, u32 addr); + bool (*is_valid_mux_reg)(struct i915_perf *perf, u32 addr); /** * @is_valid_flex_reg: Validates register's address for programming * flex EU filtering for a particular platform. */ - bool (*is_valid_flex_reg)(struct drm_i915_private *dev_priv, u32 addr); + bool (*is_valid_flex_reg)(struct i915_perf *perf, u32 addr); /** * @enable_metric_set: Selects and applies any MUX configuration to set @@ -324,4 +329,60 @@ struct i915_oa_ops { u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream); }; +struct i915_perf { + struct drm_i915_private *i915; + + struct kobject *metrics_kobj; + struct ctl_table_header *sysctl_header; + + /* + * Lock associated with adding/modifying/removing OA configs + * in dev_priv->perf.metrics_idr. + */ + struct mutex metrics_lock; + + /* + * List of dynamic configurations, you need to hold + * dev_priv->perf.metrics_lock to access it. + */ + struct idr metrics_idr; + + /* + * Lock associated with anything below within this structure + * except exclusive_stream. + */ + struct mutex lock; + struct list_head streams; + + /* + * The stream currently using the OA unit. If accessed + * outside a syscall associated to its file + * descriptor, you need to hold + * dev_priv->drm.struct_mutex. + */ + struct i915_perf_stream *exclusive_stream; + + /** + * For rate limiting any notifications of spurious + * invalid OA reports + */ + struct ratelimit_state spurious_report_rs; + + struct i915_oa_config test_config; + + u32 gen7_latched_oastatus1; + u32 ctx_oactxctrl_offset; + u32 ctx_flexeu0_offset; + + /** + * The RPT_ID/reason field for Gen8+ includes a bit + * to determine if the CTX ID in the report is valid + * but the specific bit differs between Gen 8 and 9 + */ + u32 gen8_valid_ctx_bit; + + struct i915_oa_ops ops; + const struct i915_oa_format *oa_formats; +}; + #endif /* _I915_PERF_TYPES_H_ */ From a4c969d107a66369e6b8bf6b2fa28ca01723133b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 7 Oct 2019 22:09:42 +0100 Subject: [PATCH 011/159] drm/i915/perf: Set the exclusive stream under perf->lock The BKL struct_mutex is no more, the only serialisation we required for setting the exclusive stream is already managed by ce->pin_mutex in gen8_configure_all_contexts(). As such, we can manipulate i915_perf.exclusive_stream underneath our own (already held) perf->lock. Signed-off-by: Chris Wilson Cc: Umesh Nerlige Ramappa Cc: Lionel Landwerlin Reviewed-by: Lionel Landwerlin Link: https://patchwork.freedesktop.org/patch/msgid/20191007140812.10963-2-chris@chris-wilson.co.uk Link: https://patchwork.freedesktop.org/patch/msgid/20191007210942.18145-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_perf.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 5a25bbeb6c7a..e5973809b69a 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1343,10 +1343,8 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream) * Unset exclusive_stream first, it will be checked while disabling * the metric set on gen8+. */ - mutex_lock(&perf->i915->drm.struct_mutex); perf->exclusive_stream = NULL; perf->ops.disable_metric_set(stream); - mutex_unlock(&perf->i915->drm.struct_mutex); free_oa_buffer(stream); @@ -1853,7 +1851,7 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream, for (i = 2; i < ARRAY_SIZE(regs); i++) regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg); - lockdep_assert_held(&i915->drm.struct_mutex); + lockdep_assert_held(&stream->perf->lock); /* * The OA register config is setup through the context image. This image @@ -2220,10 +2218,6 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, if (ret) goto err_oa_buf_alloc; - ret = i915_mutex_lock_interruptible(&stream->perf->i915->drm); - if (ret) - goto err_lock; - stream->ops = &i915_oa_stream_ops; perf->exclusive_stream = stream; @@ -2233,8 +2227,6 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, goto err_enable; } - mutex_unlock(&stream->perf->i915->drm.struct_mutex); - hrtimer_init(&stream->poll_check_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); stream->poll_check_timer.function = oa_poll_check_timer_cb; @@ -2246,9 +2238,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, err_enable: perf->exclusive_stream = NULL; perf->ops.disable_metric_set(stream); - mutex_unlock(&stream->perf->i915->drm.struct_mutex); -err_lock: free_oa_buffer(stream); err_oa_buf_alloc: From e45e0003f60d3e9d62d9bb42d72efbbada7132bc Mon Sep 17 00:00:00 2001 From: Anshuman Gupta Date: Mon, 7 Oct 2019 15:16:07 +0530 Subject: [PATCH 012/159] drm/i915/tgl: Add DC3CO required register and bits Adding following definition to i915_reg.h 1. DC_STATE_EN register DC3CO bit fields and masks. DC3CO enable bit will be used by driver to make DC3CO ready for DMC f/w and status bit will be used as DC3CO entry status. 2. Transcoder EXITLINE register and its bit fields and mask. Transcoder EXITLINE enable bit represents PSR2 idle frame reset should be applied at exit line and exitlines mask represent required number of scanlines at which DC3CO exit happens. B.Specs:49196 v1: Use of REG_BIT and using extra space for EXITLINE_ macro definition. [Animesh] v2: Grouping EXITLINE reg bits with EXITLINE(trans) define, no functional change. [Ville] Cc: Jani Nikula Cc: Imre Deak Cc: Animesh Manna Reviewed-by: Animesh Manna Reviewed-by: Imre Deak Signed-off-by: Anshuman Gupta Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20191007094607.2111-1-anshuman.gupta@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 6d67bd238cfe..d252ee7803cc 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4145,6 +4145,7 @@ enum { #define _VTOTAL_A 0x6000c #define _VBLANK_A 0x60010 #define _VSYNC_A 0x60014 +#define _EXITLINE_A 0x60018 #define _PIPEASRC 0x6001c #define _BCLRPAT_A 0x60020 #define _VSYNCSHIFT_A 0x60028 @@ -4196,6 +4197,11 @@ enum { #define PIPESRC(trans) _MMIO_TRANS2(trans, _PIPEASRC) #define PIPE_MULT(trans) _MMIO_TRANS2(trans, _PIPE_MULT_A) +#define EXITLINE(trans) _MMIO_TRANS2(trans, _EXITLINE_A) +#define EXITLINE_ENABLE REG_BIT(31) +#define EXITLINE_MASK REG_GENMASK(12, 0) +#define EXITLINE_SHIFT 0 + /* * HSW+ eDP PSR registers * @@ -10291,6 +10297,8 @@ enum skl_power_gate { /* GEN9 DC */ #define DC_STATE_EN _MMIO(0x45504) #define DC_STATE_DISABLE 0 +#define DC_STATE_EN_DC3CO REG_BIT(30) +#define DC_STATE_DC3CO_STATUS REG_BIT(29) #define DC_STATE_EN_UPTO_DC5 (1 << 0) #define DC_STATE_EN_DC9 (1 << 3) #define DC_STATE_EN_UPTO_DC6 (2 << 0) From 19c79ff82b4ad95c6ce1a101dc238a643cea3835 Mon Sep 17 00:00:00 2001 From: Anshuman Gupta Date: Thu, 3 Oct 2019 13:47:34 +0530 Subject: [PATCH 013/159] drm/i915/tgl: Add DC3CO mask to allowed_dc_mask and gen9_dc_mask Enable dc3co state in enable_dc module param and add dc3co enable mask to allowed_dc_mask and gen9_dc_mask. v1: Adding enable_dc=3,4 options to enable DC3CO with DC5 and DC6 independently. [Animesh] v2: Using a switch statement for cleaner code. [Animesh] Cc: Jani Nikula Cc: Imre Deak Cc: Animesh Manna Reviewed-by: Animesh Manna Reviewed-by: Imre Deak Signed-off-by: Anshuman Gupta Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20191003081738.22101-3-anshuman.gupta@intel.com --- .../drm/i915/display/intel_display_power.c | 29 +++++++++++++++---- drivers/gpu/drm/i915/i915_params.c | 3 +- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 0616284c6da6..c00cf061fab4 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -695,7 +695,11 @@ static u32 gen9_dc_mask(struct drm_i915_private *dev_priv) u32 mask; mask = DC_STATE_EN_UPTO_DC5; - if (INTEL_GEN(dev_priv) >= 11) + + if (INTEL_GEN(dev_priv) >= 12) + mask |= DC_STATE_EN_DC3CO | DC_STATE_EN_UPTO_DC6 + | DC_STATE_EN_DC9; + else if (IS_GEN(dev_priv, 11)) mask |= DC_STATE_EN_UPTO_DC6 | DC_STATE_EN_DC9; else if (IS_GEN9_LP(dev_priv)) mask |= DC_STATE_EN_DC9; @@ -3924,14 +3928,17 @@ static u32 get_allowed_dc_mask(const struct drm_i915_private *dev_priv, int requested_dc; int max_dc; - if (INTEL_GEN(dev_priv) >= 11) { - max_dc = 2; + if (INTEL_GEN(dev_priv) >= 12) { + max_dc = 4; /* * DC9 has a separate HW flow from the rest of the DC states, * not depending on the DMC firmware. It's needed by system * suspend/resume, so allow it unconditionally. */ mask = DC_STATE_EN_DC9; + } else if (IS_GEN(dev_priv, 11)) { + max_dc = 2; + mask = DC_STATE_EN_DC9; } else if (IS_GEN(dev_priv, 10) || IS_GEN9_BC(dev_priv)) { max_dc = 2; mask = 0; @@ -3950,7 +3957,7 @@ static u32 get_allowed_dc_mask(const struct drm_i915_private *dev_priv, requested_dc = enable_dc; } else if (enable_dc == -1) { requested_dc = max_dc; - } else if (enable_dc > max_dc && enable_dc <= 2) { + } else if (enable_dc > max_dc && enable_dc <= 4) { DRM_DEBUG_KMS("Adjusting requested max DC state (%d->%d)\n", enable_dc, max_dc); requested_dc = max_dc; @@ -3959,10 +3966,20 @@ static u32 get_allowed_dc_mask(const struct drm_i915_private *dev_priv, requested_dc = max_dc; } - if (requested_dc > 1) + switch (requested_dc) { + case 4: + mask |= DC_STATE_EN_DC3CO | DC_STATE_EN_UPTO_DC6; + break; + case 3: + mask |= DC_STATE_EN_DC3CO | DC_STATE_EN_UPTO_DC5; + break; + case 2: mask |= DC_STATE_EN_UPTO_DC6; - if (requested_dc > 0) + break; + case 1: mask |= DC_STATE_EN_UPTO_DC5; + break; + } DRM_DEBUG_KMS("Allowed DC state mask %02x\n", mask); diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 296452f9efe4..4f1806f65040 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -46,7 +46,8 @@ i915_param_named(modeset, int, 0400, i915_param_named_unsafe(enable_dc, int, 0400, "Enable power-saving display C-states. " - "(-1=auto [default]; 0=disable; 1=up to DC5; 2=up to DC6)"); + "(-1=auto [default]; 0=disable; 1=up to DC5; 2=up to DC6; " + "3=up to DC5 with DC3CO; 4=up to DC6 with DC3CO)"); i915_param_named_unsafe(enable_fbc, int, 0600, "Enable frame buffer compression for power savings " From 4645e906f2d40ff4000c78373bb932f713259611 Mon Sep 17 00:00:00 2001 From: Anshuman Gupta Date: Thu, 3 Oct 2019 13:47:35 +0530 Subject: [PATCH 014/159] drm/i915/tgl: Enable DC3CO state in "DC Off" power well Add target_dc_state and used by set_target_dc_state API in order to enable DC3CO state with existing DC states. target_dc_state will enable/disable the desired DC state in DC_STATE_EN reg when "DC Off" power well gets disable/enable. v2: commit log improvement. v3: Used intel_wait_for_register to wait for DC3CO exit. [Imre] Used gen9_set_dc_state() to allow/disallow DC3CO. [Imre] Moved transcoder psr2 exit line enablement from tgl_allow_dc3co() to a appropriate place haswell_crtc_enable(). [Imre] Changed the DC3CO power well enabled call back logic as recommended in review comments. [Imre] v4: Used wait_for_us() instead of intel_wait_for_reg(). [Imre (IRC)] v5: using udelay() instead of waiting for DC3CO exit status. v6: Fixed minor unwanted change. v7: Removed DC3CO powerwell and POWER_DOMAIN_VIDEO. v8: Uniform checks by using only target_dc_state instead of allowed_dc_mask in "DC off" power well callback. [Imre] Adding "DC off" power well id to older platforms. [Imre] Removed psr2_deep_sleep flag from tgl_set_target_dc_state. [Imre] v9: Used switch case for target DC state in gen9_dc_off_power_well_disable(), checking DC3CO state against allowed DC mask, using WARN_ON() in tgl_set_target_dc_state(). [Imre] v10: Code refactoring and using sanitize_target_dc_state(). [Imre] Cc: Jani Nikula Cc: Imre Deak Cc: Animesh Manna Reviewed-by: Imre Deak Signed-off-by: Anshuman Gupta Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20191003081738.22101-4-anshuman.gupta@intel.com --- .../drm/i915/display/intel_display_power.c | 80 ++++++++++++++++--- .../drm/i915/display/intel_display_power.h | 1 + drivers/gpu/drm/i915/i915_drv.h | 1 + 3 files changed, 73 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index c00cf061fab4..812081f5232f 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -769,6 +769,52 @@ static void gen9_set_dc_state(struct drm_i915_private *dev_priv, u32 state) dev_priv->csr.dc_state = val & mask; } +static u32 +sanitize_target_dc_state(struct drm_i915_private *dev_priv, + u32 target_dc_state) +{ + u32 states[] = { + DC_STATE_EN_UPTO_DC6, + DC_STATE_EN_UPTO_DC5, + DC_STATE_EN_DC3CO, + DC_STATE_DISABLE, + }; + int i; + + for (i = 0; i < ARRAY_SIZE(states) - 1; i++) { + if (target_dc_state != states[i]) + continue; + + if (dev_priv->csr.allowed_dc_mask & target_dc_state) + break; + + target_dc_state = states[i + 1]; + } + + return target_dc_state; +} + +static void tgl_enable_dc3co(struct drm_i915_private *dev_priv) +{ + DRM_DEBUG_KMS("Enabling DC3CO\n"); + gen9_set_dc_state(dev_priv, DC_STATE_EN_DC3CO); +} + +static void tgl_disable_dc3co(struct drm_i915_private *dev_priv) +{ + u32 val; + + DRM_DEBUG_KMS("Disabling DC3CO\n"); + val = I915_READ(DC_STATE_EN); + val &= ~DC_STATE_DC3CO_STATUS; + I915_WRITE(DC_STATE_EN, val); + gen9_set_dc_state(dev_priv, DC_STATE_DISABLE); + /* + * Delay of 200us DC3CO Exit time B.Spec 49196 + */ + usleep_range(200, 210); +} + static void bxt_enable_dc9(struct drm_i915_private *dev_priv) { assert_can_enable_dc9(dev_priv); @@ -936,7 +982,8 @@ static void bxt_verify_ddi_phy_power_wells(struct drm_i915_private *dev_priv) static bool gen9_dc_off_power_well_enabled(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { - return (I915_READ(DC_STATE_EN) & DC_STATE_EN_UPTO_DC5_DC6_MASK) == 0; + return ((I915_READ(DC_STATE_EN) & DC_STATE_EN_DC3CO) == 0 && + (I915_READ(DC_STATE_EN) & DC_STATE_EN_UPTO_DC5_DC6_MASK) == 0); } static void gen9_assert_dbuf_enabled(struct drm_i915_private *dev_priv) @@ -952,6 +999,11 @@ static void gen9_disable_dc_states(struct drm_i915_private *dev_priv) { struct intel_cdclk_state cdclk_state = {}; + if (dev_priv->csr.target_dc_state == DC_STATE_EN_DC3CO) { + tgl_disable_dc3co(dev_priv); + return; + } + gen9_set_dc_state(dev_priv, DC_STATE_DISABLE); dev_priv->display.get_cdclk(dev_priv, &cdclk_state); @@ -984,10 +1036,17 @@ static void gen9_dc_off_power_well_disable(struct drm_i915_private *dev_priv, if (!dev_priv->csr.dmc_payload) return; - if (dev_priv->csr.allowed_dc_mask & DC_STATE_EN_UPTO_DC6) + switch (dev_priv->csr.target_dc_state) { + case DC_STATE_EN_DC3CO: + tgl_enable_dc3co(dev_priv); + break; + case DC_STATE_EN_UPTO_DC6: skl_enable_dc6(dev_priv); - else if (dev_priv->csr.allowed_dc_mask & DC_STATE_EN_UPTO_DC5) + break; + case DC_STATE_EN_UPTO_DC5: gen9_enable_dc5(dev_priv); + break; + } } static void i9xx_power_well_sync_hw_noop(struct drm_i915_private *dev_priv, @@ -2935,7 +2994,7 @@ static const struct i915_power_well_desc skl_power_wells[] = { .name = "DC off", .domains = SKL_DISPLAY_DC_OFF_POWER_DOMAINS, .ops = &gen9_dc_off_power_well_ops, - .id = DISP_PW_ID_NONE, + .id = SKL_DISP_DC_OFF, }, { .name = "power well 2", @@ -3017,7 +3076,7 @@ static const struct i915_power_well_desc bxt_power_wells[] = { .name = "DC off", .domains = BXT_DISPLAY_DC_OFF_POWER_DOMAINS, .ops = &gen9_dc_off_power_well_ops, - .id = DISP_PW_ID_NONE, + .id = SKL_DISP_DC_OFF, }, { .name = "power well 2", @@ -3077,7 +3136,7 @@ static const struct i915_power_well_desc glk_power_wells[] = { .name = "DC off", .domains = GLK_DISPLAY_DC_OFF_POWER_DOMAINS, .ops = &gen9_dc_off_power_well_ops, - .id = DISP_PW_ID_NONE, + .id = SKL_DISP_DC_OFF, }, { .name = "power well 2", @@ -3246,7 +3305,7 @@ static const struct i915_power_well_desc cnl_power_wells[] = { .name = "DC off", .domains = CNL_DISPLAY_DC_OFF_POWER_DOMAINS, .ops = &gen9_dc_off_power_well_ops, - .id = DISP_PW_ID_NONE, + .id = SKL_DISP_DC_OFF, }, { .name = "power well 2", @@ -3374,7 +3433,7 @@ static const struct i915_power_well_desc icl_power_wells[] = { .name = "DC off", .domains = ICL_DISPLAY_DC_OFF_POWER_DOMAINS, .ops = &gen9_dc_off_power_well_ops, - .id = DISP_PW_ID_NONE, + .id = SKL_DISP_DC_OFF, }, { .name = "power well 2", @@ -3607,7 +3666,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = { .name = "DC off", .domains = TGL_DISPLAY_DC_OFF_POWER_DOMAINS, .ops = &gen9_dc_off_power_well_ops, - .id = DISP_PW_ID_NONE, + .id = SKL_DISP_DC_OFF, }, { .name = "power well 2", @@ -4040,6 +4099,9 @@ int intel_power_domains_init(struct drm_i915_private *dev_priv) dev_priv->csr.allowed_dc_mask = get_allowed_dc_mask(dev_priv, i915_modparams.enable_dc); + dev_priv->csr.target_dc_state = + sanitize_target_dc_state(dev_priv, DC_STATE_EN_UPTO_DC6); + BUILD_BUG_ON(POWER_DOMAIN_NUM > 64); mutex_init(&power_domains->lock); diff --git a/drivers/gpu/drm/i915/display/intel_display_power.h b/drivers/gpu/drm/i915/display/intel_display_power.h index 737b5def7fc6..7d72faf474b2 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.h +++ b/drivers/gpu/drm/i915/display/intel_display_power.h @@ -100,6 +100,7 @@ enum i915_power_well_id { SKL_DISP_PW_MISC_IO, SKL_DISP_PW_1, SKL_DISP_PW_2, + SKL_DISP_DC_OFF, }; #define POWER_DOMAIN_PIPE(pipe) ((pipe) + POWER_DOMAIN_PIPE_A) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index dd0eb8dc4491..887ea4a4d687 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -339,6 +339,7 @@ struct intel_csr { i915_reg_t mmioaddr[20]; u32 mmiodata[20]; u32 dc_state; + u32 target_dc_state; u32 allowed_dc_mask; intel_wakeref_t wakeref; }; From bdacf0871f8771c2763b53efb9f9aa9bdea7b570 Mon Sep 17 00:00:00 2001 From: Anshuman Gupta Date: Thu, 3 Oct 2019 13:47:36 +0530 Subject: [PATCH 015/159] drm/i915/tgl: Do modeset to enable and configure DC3CO exitline DC3CO enabling B.Specs sequence requires to enable end configure exit scanlines to TRANS_EXITLINE register, programming this register has to be part of modeset sequence as this can't be change when transcoder or port is enabled. When system boots with only eDP panel there may not be real modeset as BIOS has already programmed the necessary registers, therefore it needs to force a modeset to enable and configure DC3CO exitline. v1: Computing dc3co_exitline crtc state from a DP encoder compute config. [Imre] Enabling and disabling DC3CO PSR2 transcoder exitline from encoder pre_enable and post_disable hooks. [Imre] Computing dc3co_exitline instead of has_dc3co_exitline bool. [Imre] v2: Code refactoring for symmetry and to avoid exported function. [Imre] Removing IS_TIGERLAKE check from compute_config, adding PIPE_A restriction and clearing dc3co_exitline state if crtc is not active or it is not PSR2 capable in dc3co exitline compute_config. [Imre] Using GEN >= 12 check in dc3co exitline get_config. [Imre] Cc: Jani Nikula Cc: Imre Deak Cc: Animesh Manna Reviewed-by: Imre Deak Signed-off-by: Anshuman Gupta Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20191003081738.22101-5-anshuman.gupta@intel.com --- drivers/gpu/drm/i915/display/intel_ddi.c | 93 ++++++++++++++++++- drivers/gpu/drm/i915/display/intel_display.c | 1 + .../drm/i915/display/intel_display_types.h | 1 + 3 files changed, 93 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 3c1e885e0187..6c1315c7bcde 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -45,6 +45,7 @@ #include "intel_lspcon.h" #include "intel_panel.h" #include "intel_psr.h" +#include "intel_sprite.h" #include "intel_tc.h" #include "intel_vdsc.h" @@ -3330,6 +3331,86 @@ static void intel_ddi_disable_fec_state(struct intel_encoder *encoder, POSTING_READ(intel_dp->regs.dp_tp_ctl); } +static void +tgl_clear_psr2_transcoder_exitline(const struct intel_crtc_state *cstate) +{ + struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev); + u32 val; + + if (!cstate->dc3co_exitline) + return; + + val = I915_READ(EXITLINE(cstate->cpu_transcoder)); + val &= ~(EXITLINE_MASK | EXITLINE_ENABLE); + I915_WRITE(EXITLINE(cstate->cpu_transcoder), val); +} + +static void +tgl_set_psr2_transcoder_exitline(const struct intel_crtc_state *cstate) +{ + u32 val, exit_scanlines; + struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev); + + if (!cstate->dc3co_exitline) + return; + + exit_scanlines = cstate->dc3co_exitline; + exit_scanlines <<= EXITLINE_SHIFT; + val = I915_READ(EXITLINE(cstate->cpu_transcoder)); + val &= ~(EXITLINE_MASK | EXITLINE_ENABLE); + val |= exit_scanlines; + val |= EXITLINE_ENABLE; + I915_WRITE(EXITLINE(cstate->cpu_transcoder), val); +} + +static void tgl_dc3co_exitline_compute_config(struct intel_encoder *encoder, + struct intel_crtc_state *cstate) +{ + u32 exit_scanlines; + struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev); + u32 crtc_vdisplay = cstate->base.adjusted_mode.crtc_vdisplay; + + cstate->dc3co_exitline = 0; + + if (!(dev_priv->csr.allowed_dc_mask & DC_STATE_EN_DC3CO)) + return; + + /* B.Specs:49196 DC3CO only works with pipeA and DDIA.*/ + if (to_intel_crtc(cstate->base.crtc)->pipe != PIPE_A || + encoder->port != PORT_A) + return; + + if (!cstate->has_psr2 || !cstate->base.active) + return; + + /* + * DC3CO Exit time 200us B.Spec 49196 + * PSR2 transcoder Early Exit scanlines = ROUNDUP(200 / line time) + 1 + */ + exit_scanlines = + intel_usecs_to_scanlines(&cstate->base.adjusted_mode, 200) + 1; + + if (WARN_ON(exit_scanlines > crtc_vdisplay)) + return; + + cstate->dc3co_exitline = crtc_vdisplay - exit_scanlines; + DRM_DEBUG_KMS("DC3CO exit scanlines %d\n", cstate->dc3co_exitline); +} + +static void tgl_dc3co_exitline_get_config(struct intel_crtc_state *crtc_state) +{ + u32 val; + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + + if (INTEL_GEN(dev_priv) < 12) + return; + + val = I915_READ(EXITLINE(crtc_state->cpu_transcoder)); + + if (val & EXITLINE_ENABLE) + crtc_state->dc3co_exitline = val & EXITLINE_MASK; +} + static void tgl_ddi_pre_enable_dp(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) @@ -3342,6 +3423,7 @@ static void tgl_ddi_pre_enable_dp(struct intel_encoder *encoder, int level = intel_ddi_dp_level(intel_dp); enum transcoder transcoder = crtc_state->cpu_transcoder; + tgl_set_psr2_transcoder_exitline(crtc_state); intel_dp_set_link_params(intel_dp, crtc_state->port_clock, crtc_state->lane_count, is_mst); @@ -3666,6 +3748,7 @@ static void intel_ddi_post_disable_dp(struct intel_encoder *encoder, dig_port->ddi_io_power_domain); intel_ddi_clk_disable(encoder); + tgl_clear_psr2_transcoder_exitline(old_crtc_state); } static void intel_ddi_post_disable_hdmi(struct intel_encoder *encoder, @@ -4212,6 +4295,9 @@ void intel_ddi_get_config(struct intel_encoder *encoder, break; } + if (encoder->type == INTEL_OUTPUT_EDP) + tgl_dc3co_exitline_get_config(pipe_config); + pipe_config->has_audio = intel_ddi_is_audio_enabled(dev_priv, cpu_transcoder); @@ -4289,10 +4375,13 @@ static int intel_ddi_compute_config(struct intel_encoder *encoder, if (HAS_TRANSCODER_EDP(dev_priv) && port == PORT_A) pipe_config->cpu_transcoder = TRANSCODER_EDP; - if (intel_crtc_has_type(pipe_config, INTEL_OUTPUT_HDMI)) + if (intel_crtc_has_type(pipe_config, INTEL_OUTPUT_HDMI)) { ret = intel_hdmi_compute_config(encoder, pipe_config, conn_state); - else + } else { ret = intel_dp_compute_config(encoder, pipe_config, conn_state); + tgl_dc3co_exitline_compute_config(encoder, pipe_config); + } + if (ret) return ret; diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 05fb672a00b9..1a533ccdb54f 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -12808,6 +12808,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, PIPE_CONF_CHECK_I(pixel_multiplier); PIPE_CONF_CHECK_I(output_format); + PIPE_CONF_CHECK_I(dc3co_exitline); PIPE_CONF_CHECK_BOOL(has_hdmi_sink); if ((INTEL_GEN(dev_priv) < 8 && !IS_HASWELL(dev_priv)) || IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 1602aac7ca0f..40390d855815 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -870,6 +870,7 @@ struct intel_crtc_state { bool has_psr; bool has_psr2; + u32 dc3co_exitline; /* * Frequence the dpll for the port should run at. Differs from the From 1c4d821db919366034dc9ff4c01b4b0da0b8e30f Mon Sep 17 00:00:00 2001 From: Anshuman Gupta Date: Thu, 3 Oct 2019 13:47:37 +0530 Subject: [PATCH 016/159] drm/i915/tgl: Switch between dc3co and dc5 based on display idleness DC3CO is useful power state, when DMC detects PSR2 idle frame while an active video playback, playing 30fps video on 60hz panel is the classic example of this use case. B.Specs:49196 has a restriction to enable DC3CO only for Video Playback. It will be worthy to enable DC3CO after completion of each pageflip and switch back to DC5 when display is idle because driver doesn't differentiate between video playback and a normal pageflip. We will use Frontbuffer flush call tgl_dc3co_flush() to enable DC3CO state only for ORIGIN_FLIP flush call, because DC3CO state has primarily targeted for VPB use case. We are not interested here for frontbuffer invalidates calls because that triggers PSR2 exit, which will explicitly disable DC3CO. DC5 and DC6 saves more power, but can't be entered during video playback because there are not enough idle frames in a row to meet most PSR2 panel deep sleep entry requirement typically 4 frames. As PSR2 existing implementation is using minimum 6 idle frames for deep sleep, it is safer to enable DC5/6 after 6 idle frames (By scheduling a delayed work of 6 idle frames, once DC3CO has been enabled after a pageflip). After manually waiting for 6 idle frames DC5/6 will be enabled and PSR2 deep sleep idle frames will be restored to 6 idle frames, at this point DMC will triggers DC5/6 once PSR2 enters to deep sleep after 6 idle frames. In future when we will enable S/W PSR2 tracking, we can change the PSR2 required deep sleep idle frames to 1 so DMC can trigger the DC5/6 immediately after S/W manual waiting of 6 idle frames get complete. v2: calculated s/w state to switch over dc3co when there is an update. [Imre] Used cancel_delayed_work_sync() in order to avoid any race with already scheduled delayed work. [Imre] v3: Cancel_delayed_work_sync() may blocked the commit work. hence dropping it, dc5_idle_thread() checks the valid wakeref before putting the reference count, which avoids any chances of dropping a zero wakeref. [Imre (IRC)] v4: Used frontbuffer flush mechanism. [Imre] v5: Used psr.pipe to extract frontbuffer busy bits. [Imre] Used cancel_delayed_work_sync() in encoder disable path. [Imre] Used mod_delayed_work() instead of cancelling and scheduling a delayed work. [Imre] Used psr.lock in tgl_dc5_idle_thread() to enable psr2 deep sleep. [Imre] Removed DC5_REQ_IDLE_FRAMES macro. [Imre] v6: Used dc3co_exitline check instead of TGL and dc3co allowed_dc_mask checks, used delayed_work_pending with the psr lock and removed the psr2_deep_slp_disabled flag. [Imre] v7: Code refactoring, moved most of functional code to inte_psr.c [Imre] Using frontbuffer_bits on psr.pipe check instead of busy_frontbuffer_bits. [Imre] Calculating dc3co_exit_delay in intel_psr_enable_locked. [Imre] Cc: Jani Nikula Cc: Imre Deak Cc: Animesh Manna Reviewed-by: Imre Deak Signed-off-by: Anshuman Gupta Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20191003081738.22101-6-anshuman.gupta@intel.com --- .../drm/i915/display/intel_display_power.c | 45 +++++++ .../drm/i915/display/intel_display_power.h | 2 + drivers/gpu/drm/i915/display/intel_psr.c | 114 +++++++++++++++++- drivers/gpu/drm/i915/i915_drv.h | 3 + 4 files changed, 163 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 812081f5232f..6f9e7927e248 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -870,6 +870,51 @@ lookup_power_well(struct drm_i915_private *dev_priv, return &dev_priv->power_domains.power_wells[0]; } +/** + * intel_display_power_set_target_dc_state - Set target dc state. + * @dev_priv: i915 device + * @state: state which needs to be set as target_dc_state. + * + * This function set the "DC off" power well target_dc_state, + * based upon this target_dc_stste, "DC off" power well will + * enable desired DC state. + */ +void intel_display_power_set_target_dc_state(struct drm_i915_private *dev_priv, + u32 state) +{ + struct i915_power_well *power_well; + bool dc_off_enabled; + struct i915_power_domains *power_domains = &dev_priv->power_domains; + + mutex_lock(&power_domains->lock); + power_well = lookup_power_well(dev_priv, SKL_DISP_DC_OFF); + + if (WARN_ON(!power_well)) + goto unlock; + + state = sanitize_target_dc_state(dev_priv, state); + + if (state == dev_priv->csr.target_dc_state) + goto unlock; + + dc_off_enabled = power_well->desc->ops->is_enabled(dev_priv, + power_well); + /* + * If DC off power well is disabled, need to enable and disable the + * DC off power well to effect target DC state. + */ + if (!dc_off_enabled) + power_well->desc->ops->enable(dev_priv, power_well); + + dev_priv->csr.target_dc_state = state; + + if (!dc_off_enabled) + power_well->desc->ops->disable(dev_priv, power_well); + +unlock: + mutex_unlock(&power_domains->lock); +} + static void assert_can_enable_dc5(struct drm_i915_private *dev_priv) { bool pg2_enabled = intel_display_power_well_is_enabled(dev_priv, diff --git a/drivers/gpu/drm/i915/display/intel_display_power.h b/drivers/gpu/drm/i915/display/intel_display_power.h index 7d72faf474b2..1da04f3e0fb3 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.h +++ b/drivers/gpu/drm/i915/display/intel_display_power.h @@ -257,6 +257,8 @@ void intel_display_power_suspend_late(struct drm_i915_private *i915); void intel_display_power_resume_early(struct drm_i915_private *i915); void intel_display_power_suspend(struct drm_i915_private *i915); void intel_display_power_resume(struct drm_i915_private *i915); +void intel_display_power_set_target_dc_state(struct drm_i915_private *dev_priv, + u32 state); const char * intel_display_power_domain_str(enum intel_display_power_domain domain); diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index b3c7eef53bf3..50f22abcd30e 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -534,6 +534,73 @@ transcoder_has_psr2(struct drm_i915_private *dev_priv, enum transcoder trans) return trans == TRANSCODER_EDP; } +static u32 intel_get_frame_time_us(const struct intel_crtc_state *cstate) +{ + if (!cstate || !cstate->base.active) + return 0; + + return DIV_ROUND_UP(1000 * 1000, + drm_mode_vrefresh(&cstate->base.adjusted_mode)); +} + +static void psr2_program_idle_frames(struct drm_i915_private *dev_priv, + u32 idle_frames) +{ + u32 val; + + idle_frames <<= EDP_PSR2_IDLE_FRAME_SHIFT; + val = I915_READ(EDP_PSR2_CTL(dev_priv->psr.transcoder)); + val &= ~EDP_PSR2_IDLE_FRAME_MASK; + val |= idle_frames; + I915_WRITE(EDP_PSR2_CTL(dev_priv->psr.transcoder), val); +} + +static void tgl_psr2_enable_dc3co(struct drm_i915_private *dev_priv) +{ + psr2_program_idle_frames(dev_priv, 0); + intel_display_power_set_target_dc_state(dev_priv, DC_STATE_EN_DC3CO); +} + +static void tgl_psr2_disable_dc3co(struct drm_i915_private *dev_priv) +{ + int idle_frames; + + intel_display_power_set_target_dc_state(dev_priv, DC_STATE_EN_UPTO_DC6); + /* + * Restore PSR2 idle frame let's use 6 as the minimum to cover all known + * cases including the off-by-one issue that HW has in some cases. + */ + idle_frames = max(6, dev_priv->vbt.psr.idle_frames); + idle_frames = max(idle_frames, dev_priv->psr.sink_sync_latency + 1); + psr2_program_idle_frames(dev_priv, idle_frames); +} + +static void tgl_dc5_idle_thread(struct work_struct *work) +{ + struct drm_i915_private *dev_priv = + container_of(work, typeof(*dev_priv), psr.idle_work.work); + + mutex_lock(&dev_priv->psr.lock); + /* If delayed work is pending, it is not idle */ + if (delayed_work_pending(&dev_priv->psr.idle_work)) + goto unlock; + + DRM_DEBUG_KMS("DC5/6 idle thread\n"); + tgl_psr2_disable_dc3co(dev_priv); +unlock: + mutex_unlock(&dev_priv->psr.lock); +} + +static void tgl_disallow_dc3co_on_psr2_exit(struct drm_i915_private *dev_priv) +{ + if (!dev_priv->psr.dc3co_enabled) + return; + + cancel_delayed_work(&dev_priv->psr.idle_work); + /* Before PSR2 exit disallow dc3co*/ + tgl_psr2_disable_dc3co(dev_priv); +} + static bool intel_psr2_config_valid(struct intel_dp *intel_dp, struct intel_crtc_state *crtc_state) { @@ -746,6 +813,8 @@ static void intel_psr_enable_locked(struct drm_i915_private *dev_priv, dev_priv->psr.psr2_enabled = intel_psr2_enabled(dev_priv, crtc_state); dev_priv->psr.busy_frontbuffer_bits = 0; dev_priv->psr.pipe = to_intel_crtc(crtc_state->base.crtc)->pipe; + dev_priv->psr.dc3co_enabled = !!crtc_state->dc3co_exitline; + dev_priv->psr.dc3co_exit_delay = intel_get_frame_time_us(crtc_state); dev_priv->psr.transcoder = crtc_state->cpu_transcoder; /* @@ -829,6 +898,7 @@ static void intel_psr_exit(struct drm_i915_private *dev_priv) } if (dev_priv->psr.psr2_enabled) { + tgl_disallow_dc3co_on_psr2_exit(dev_priv); val = I915_READ(EDP_PSR2_CTL(dev_priv->psr.transcoder)); WARN_ON(!(val & EDP_PSR2_ENABLE)); val &= ~EDP_PSR2_ENABLE; @@ -901,6 +971,7 @@ void intel_psr_disable(struct intel_dp *intel_dp, mutex_unlock(&dev_priv->psr.lock); cancel_work_sync(&dev_priv->psr.work); + cancel_delayed_work_sync(&dev_priv->psr.idle_work); } static void psr_force_hw_tracking_exit(struct drm_i915_private *dev_priv) @@ -1208,6 +1279,44 @@ void intel_psr_invalidate(struct drm_i915_private *dev_priv, mutex_unlock(&dev_priv->psr.lock); } +/* + * When we will be completely rely on PSR2 S/W tracking in future, + * intel_psr_flush() will invalidate and flush the PSR for ORIGIN_FLIP + * event also therefore tgl_dc3co_flush() require to be changed + * accrodingly in future. + */ +static void +tgl_dc3co_flush(struct drm_i915_private *dev_priv, + unsigned int frontbuffer_bits, enum fb_op_origin origin) +{ + u32 delay; + + mutex_lock(&dev_priv->psr.lock); + + if (!dev_priv->psr.dc3co_enabled) + goto unlock; + + if (!dev_priv->psr.psr2_enabled || !dev_priv->psr.active) + goto unlock; + + /* + * At every frontbuffer flush flip event modified delay of delayed work, + * when delayed work schedules that means display has been idle. + */ + if (!(frontbuffer_bits & + INTEL_FRONTBUFFER_ALL_MASK(dev_priv->psr.pipe))) + goto unlock; + + tgl_psr2_enable_dc3co(dev_priv); + /* DC5/DC6 required idle frames = 6 */ + delay = 6 * dev_priv->psr.dc3co_exit_delay; + mod_delayed_work(system_wq, &dev_priv->psr.idle_work, + usecs_to_jiffies(delay)); + +unlock: + mutex_unlock(&dev_priv->psr.lock); +} + /** * intel_psr_flush - Flush PSR * @dev_priv: i915 device @@ -1227,8 +1336,10 @@ void intel_psr_flush(struct drm_i915_private *dev_priv, if (!CAN_PSR(dev_priv)) return; - if (origin == ORIGIN_FLIP) + if (origin == ORIGIN_FLIP) { + tgl_dc3co_flush(dev_priv, frontbuffer_bits, origin); return; + } mutex_lock(&dev_priv->psr.lock); if (!dev_priv->psr.enabled) { @@ -1284,6 +1395,7 @@ void intel_psr_init(struct drm_i915_private *dev_priv) dev_priv->psr.link_standby = dev_priv->vbt.psr.full_link; INIT_WORK(&dev_priv->psr.work, intel_psr_work); + INIT_DELAYED_WORK(&dev_priv->psr.idle_work, tgl_dc5_idle_thread); mutex_init(&dev_priv->psr.lock); } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 887ea4a4d687..bcfb355aab4d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -502,6 +502,9 @@ struct i915_psr { bool sink_not_reliable; bool irq_aux_error; u16 su_x_granularity; + bool dc3co_enabled; + u32 dc3co_exit_delay; + struct delayed_work idle_work; }; #define QUIRK_LVDS_SSC_DISABLE (1<<1) From 41286861b4c9abdb4e0b32c6caa0967780e6f803 Mon Sep 17 00:00:00 2001 From: Anshuman Gupta Date: Thu, 3 Oct 2019 13:47:38 +0530 Subject: [PATCH 017/159] drm/i915/tgl: Add DC3CO counter in i915_dmc_info Adding DC3CO counter in i915_dmc_info debugfs will be useful for DC3CO validation. DMC firmware uses DMC_DEBUG3 register as DC3CO counter register on TGL, as per B.Specs DMC_DEBUG3 is general purpose register. v1: comment modification for DMC_DBUG3. using GEN >= 12 check instead of IS_TIGERLAKE() to print DMC_DEBUG3 counter value. Cc: Jani Nikula Cc: Imre Deak Cc: Animesh Manna Reviewed-by: Imre Deak Signed-off-by: Anshuman Gupta Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20191003081738.22101-7-anshuman.gupta@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 7 +++++++ drivers/gpu/drm/i915/i915_reg.h | 2 ++ 2 files changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 298a3e879e65..277f31297f29 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2405,6 +2405,13 @@ static int i915_dmc_info(struct seq_file *m, void *unused) if (INTEL_GEN(dev_priv) >= 12) { dc5_reg = TGL_DMC_DEBUG_DC5_COUNT; dc6_reg = TGL_DMC_DEBUG_DC6_COUNT; + /* + * NOTE: DMC_DEBUG3 is a general purpose reg. + * According to B.Specs:49196 DMC f/w reuses DC5/6 counter + * reg for DC3CO debugging and validation, + * but TGL DMC f/w is using DMC_DEBUG3 reg for DC3CO counter. + */ + seq_printf(m, "DC3CO count: %d\n", I915_READ(DMC_DEBUG3)); } else { dc5_reg = IS_BROXTON(dev_priv) ? BXT_CSR_DC3_DC5_COUNT : SKL_CSR_DC3_DC5_COUNT; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index d252ee7803cc..1dc067fc57ab 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7268,6 +7268,8 @@ enum { #define TGL_DMC_DEBUG_DC5_COUNT _MMIO(0x101084) #define TGL_DMC_DEBUG_DC6_COUNT _MMIO(0x101088) +#define DMC_DEBUG3 _MMIO(0x101090) + /* interrupts */ #define DE_MASTER_IRQ_CONTROL (1 << 31) #define DE_SPRITEB_FLIP_DONE (1 << 29) From 20af04f3dd567970508d5ad756e1777ba1286815 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 8 Oct 2019 08:03:42 +0100 Subject: [PATCH 018/159] drm/i915/execlists: Assign virtual_engine->uncore from first sibling Copy across the engine->uncore shortcut to the virtual_engine from its first physical engine, similar to the handling of the engine->gt backpointer. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191008070342.4045-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 432b8b60c4c0..6db762c509b8 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -4170,6 +4170,7 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx, ve->base.i915 = ctx->i915; ve->base.gt = siblings[0]->gt; + ve->base.uncore = siblings[0]->uncore; ve->base.id = -1; ve->base.class = OTHER_CLASS; ve->base.uabi_class = I915_ENGINE_CLASS_INVALID; From 3de1627851321fc5589b2a49bf52755a60c8276f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 8 Oct 2019 08:11:21 +0100 Subject: [PATCH 019/159] drm/i915/selftests: Assign the mock_engine->uncore shortcut Set up the engine->uncore shortcut on mock_engine creation. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191008071121.25088-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/mock_engine.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 5d43cbc3f345..3d88397c0dbb 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -248,6 +248,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, /* minimal engine setup for requests */ engine->base.i915 = i915; engine->base.gt = &i915->gt; + engine->base.uncore = i915->gt.uncore; snprintf(engine->base.name, sizeof(engine->base.name), "%s", name); engine->base.id = id; engine->base.mask = BIT(id); From d14a701b00706395de33c61866daa3d424d758fc Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 8 Oct 2019 15:50:45 +0100 Subject: [PATCH 020/159] drm/i915/selftests: Assign the intel_runtime_pm pointer for mock_uncore Couple up our mock_uncore to know about the fake global device and its runtime powermanagement. Signed-off-by: Chris Wilson Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20191008145045.23157-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/mock_engine.c | 1 + drivers/gpu/drm/i915/selftests/mock_gem_device.c | 2 +- drivers/gpu/drm/i915/selftests/mock_uncore.c | 5 ++++- drivers/gpu/drm/i915/selftests/mock_uncore.h | 3 ++- 4 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 3d88397c0dbb..747f7c7790eb 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -240,6 +240,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, struct mock_engine *engine; GEM_BUG_ON(id >= I915_NUM_ENGINES); + GEM_BUG_ON(!i915->gt.uncore); engine = kzalloc(sizeof(*engine) + PAGE_SIZE, GFP_KERNEL); if (!engine) diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 70a7026db08d..d14c6d8735fe 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -162,7 +162,7 @@ struct drm_i915_private *mock_gem_device(void) I915_GTT_PAGE_SIZE_64K | I915_GTT_PAGE_SIZE_2M; - mock_uncore_init(&i915->uncore); + mock_uncore_init(&i915->uncore, i915); i915_gem_init__mm(i915); intel_gt_init_early(&i915->gt, i915); atomic_inc(&i915->gt.wakeref.count); /* disable; no hw support */ diff --git a/drivers/gpu/drm/i915/selftests/mock_uncore.c b/drivers/gpu/drm/i915/selftests/mock_uncore.c index 49585f16d4a2..ca57e4008701 100644 --- a/drivers/gpu/drm/i915/selftests/mock_uncore.c +++ b/drivers/gpu/drm/i915/selftests/mock_uncore.c @@ -39,8 +39,11 @@ __nop_read(16) __nop_read(32) __nop_read(64) -void mock_uncore_init(struct intel_uncore *uncore) +void mock_uncore_init(struct intel_uncore *uncore, + struct drm_i915_private *i915) { + intel_uncore_init_early(uncore, i915); + ASSIGN_RAW_WRITE_MMIO_VFUNCS(uncore, nop); ASSIGN_RAW_READ_MMIO_VFUNCS(uncore, nop); } diff --git a/drivers/gpu/drm/i915/selftests/mock_uncore.h b/drivers/gpu/drm/i915/selftests/mock_uncore.h index dacb36b5ffcd..8a2cc553f466 100644 --- a/drivers/gpu/drm/i915/selftests/mock_uncore.h +++ b/drivers/gpu/drm/i915/selftests/mock_uncore.h @@ -25,6 +25,7 @@ #ifndef __MOCK_UNCORE_H #define __MOCK_UNCORE_H -void mock_uncore_init(struct intel_uncore *uncore); +void mock_uncore_init(struct intel_uncore *uncore, + struct drm_i915_private *i915); #endif /* !__MOCK_UNCORE_H */ From 23b9e41a3dbdad16266b16d0c16ac629f0b6d732 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 8 Oct 2019 15:01:11 +0100 Subject: [PATCH 021/159] drm/i915/perf: drop list of streams At some point in time there was the idea that we could have multiple stream from the same piece of HW but that never materialized and given the hard time we already have making everything work with the submission side, there is no real point having this list of 1 element around. Signed-off-by: Lionel Landwerlin Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191008140111.5437-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_perf.c | 16 +--------------- drivers/gpu/drm/i915/i915_perf_types.h | 6 ------ 2 files changed, 1 insertion(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index e5973809b69a..5a34cad7d824 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1407,9 +1407,6 @@ static void gen7_init_oa_buffer(struct i915_perf_stream *stream) */ memset(stream->oa_buffer.vaddr, 0, OA_BUFFER_SIZE); - /* Maybe make ->pollin per-stream state if we support multiple - * concurrent streams in the future. - */ stream->pollin = false; } @@ -1466,10 +1463,6 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream) */ memset(stream->oa_buffer.vaddr, 0, OA_BUFFER_SIZE); - /* - * Maybe make ->pollin per-stream state if we support multiple - * concurrent streams in the future. - */ stream->pollin = false; } @@ -2585,8 +2578,6 @@ static void i915_perf_destroy_locked(struct i915_perf_stream *stream) if (stream->ops->destroy) stream->ops->destroy(stream); - list_del(&stream->link); - if (stream->ctx) i915_gem_context_put(stream->ctx); @@ -2736,8 +2727,6 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf, goto err_flags; } - list_add(&stream->link, &perf->streams); - if (param->flags & I915_PERF_FLAG_FD_CLOEXEC) f_flags |= O_CLOEXEC; if (param->flags & I915_PERF_FLAG_FD_NONBLOCK) @@ -2746,7 +2735,7 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf, stream_fd = anon_inode_getfd("[i915_perf]", &fops, stream, f_flags); if (stream_fd < 0) { ret = stream_fd; - goto err_open; + goto err_flags; } if (!(param->flags & I915_PERF_FLAG_DISABLED)) @@ -2759,8 +2748,6 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf, return stream_fd; -err_open: - list_del(&stream->link); err_flags: if (stream->ops->destroy) stream->ops->destroy(stream); @@ -3600,7 +3587,6 @@ void i915_perf_init(struct drm_i915_private *i915) } if (perf->ops.enable_metric_set) { - INIT_LIST_HEAD(&perf->streams); mutex_init(&perf->lock); oa_sample_rate_hard_limit = 1000 * diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h index 3c6246064a0b..2d17059d32ee 100644 --- a/drivers/gpu/drm/i915/i915_perf_types.h +++ b/drivers/gpu/drm/i915/i915_perf_types.h @@ -134,11 +134,6 @@ struct i915_perf_stream { */ struct intel_gt *gt; - /** - * @link: Links the stream into ``&drm_i915_private->streams`` - */ - struct list_head link; - /** * @wakeref: As we keep the device awake while the perf stream is * active, we track our runtime pm reference for later release. @@ -352,7 +347,6 @@ struct i915_perf { * except exclusive_stream. */ struct mutex lock; - struct list_head streams; /* * The stream currently using the OA unit. If accessed From d99f7b079c2a49b2278fe80192fbca083db87d18 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 8 Oct 2019 11:56:55 +0100 Subject: [PATCH 022/159] drm/i915/gt: Flush submission tasklet before waiting/retiring A common bane of ours is arbitrary delays in ksoftirqd processing our submission tasklet. Give the submission tasklet a kick before we wait to avoid those delays eating into a tight timeout. Signed-off-by: Chris Wilson Reviewed-by: Stuart Summers Link: https://patchwork.freedesktop.org/patch/msgid/20191008105655.13256-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_engine.h | 3 +- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 33 +++++++++++++-------- drivers/gpu/drm/i915/gt/intel_gt_requests.c | 12 ++++++++ 3 files changed, 34 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index c9e8c8ccbd47..d624752f2a92 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -407,8 +407,9 @@ static inline void __intel_engine_reset(struct intel_engine_cs *engine, engine->serial++; /* contexts lost */ } -bool intel_engine_is_idle(struct intel_engine_cs *engine); bool intel_engines_are_idle(struct intel_gt *gt); +bool intel_engine_is_idle(struct intel_engine_cs *engine); +void intel_engine_flush_submission(struct intel_engine_cs *engine); void intel_engines_reset_default_submission(struct intel_gt *gt); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 6220b7151bb9..7e2aa7a6bef0 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1040,6 +1040,25 @@ static bool ring_is_idle(struct intel_engine_cs *engine) return idle; } +void intel_engine_flush_submission(struct intel_engine_cs *engine) +{ + struct tasklet_struct *t = &engine->execlists.tasklet; + + if (__tasklet_is_scheduled(t)) { + local_bh_disable(); + if (tasklet_trylock(t)) { + /* Must wait for any GPU reset in progress. */ + if (__tasklet_is_enabled(t)) + t->func(t->data); + tasklet_unlock(t); + } + local_bh_enable(); + } + + /* Otherwise flush the tasklet if it was running on another cpu */ + tasklet_unlock_wait(t); +} + /** * intel_engine_is_idle() - Report if the engine has finished process all work * @engine: the intel_engine_cs @@ -1058,21 +1077,9 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) /* Waiting to drain ELSP? */ if (execlists_active(&engine->execlists)) { - struct tasklet_struct *t = &engine->execlists.tasklet; - synchronize_hardirq(engine->i915->drm.pdev->irq); - local_bh_disable(); - if (tasklet_trylock(t)) { - /* Must wait for any GPU reset in progress. */ - if (__tasklet_is_enabled(t)) - t->func(t->data); - tasklet_unlock(t); - } - local_bh_enable(); - - /* Otherwise flush the tasklet if it was on another cpu */ - tasklet_unlock_wait(t); + intel_engine_flush_submission(engine); if (execlists_active(&engine->execlists)) return false; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c index ca606b79fd5e..cbb4069b11e1 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c @@ -4,6 +4,7 @@ * Copyright © 2019 Intel Corporation */ +#include "i915_drv.h" /* for_each_engine() */ #include "i915_request.h" #include "intel_gt.h" #include "intel_gt_pm.h" @@ -19,6 +20,15 @@ static void retire_requests(struct intel_timeline *tl) break; } +static void flush_submission(struct intel_gt *gt) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, gt->i915, id) + intel_engine_flush_submission(engine); +} + long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) { struct intel_gt_timelines *timelines = >->timelines; @@ -32,6 +42,8 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) if (unlikely(timeout < 0)) timeout = -timeout, interruptible = false; + flush_submission(gt); /* kick the ksoftirqd tasklets */ + spin_lock_irqsave(&timelines->lock, flags); list_for_each_entry_safe(tl, tn, &timelines->active_list, link) { if (!mutex_trylock(&tl->mutex)) { From 232a6ebae419193f5b8da4fa869ae5089ab105c2 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Tue, 8 Oct 2019 17:01:14 +0100 Subject: [PATCH 023/159] drm/i915: introduce intel_memory_region Support memory regions, as defined by a given (start, end), and allow creating GEM objects which are backed by said region. The immediate goal here is to have something to represent our device memory, but later on we also want to represent every memory domain with a region, so stolen, shmem, and of course device. At some point we are probably going to want use a common struct here, such that we are better aligned with say TTM. Signed-off-by: Matthew Auld Signed-off-by: Abdiel Janulgue Signed-off-by: Niranjana Vishwanathapura Cc: Joonas Lahtinen Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191008160116.18379-2-matthew.auld@intel.com --- drivers/gpu/drm/i915/Makefile | 2 + .../gpu/drm/i915/gem/i915_gem_object_types.h | 9 + drivers/gpu/drm/i915/gem/i915_gem_region.c | 145 +++++++++++++ drivers/gpu/drm/i915/gem/i915_gem_region.h | 28 +++ .../gpu/drm/i915/gem/selftests/huge_pages.c | 78 +++++++ drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/intel_memory_region.c | 191 ++++++++++++++++++ drivers/gpu/drm/i915/intel_memory_region.h | 83 ++++++++ .../drm/i915/selftests/i915_mock_selftests.h | 1 + .../drm/i915/selftests/intel_memory_region.c | 115 +++++++++++ .../gpu/drm/i915/selftests/mock_gem_device.c | 1 + drivers/gpu/drm/i915/selftests/mock_region.c | 59 ++++++ drivers/gpu/drm/i915/selftests/mock_region.h | 16 ++ 13 files changed, 729 insertions(+) create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_region.c create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_region.h create mode 100644 drivers/gpu/drm/i915/intel_memory_region.c create mode 100644 drivers/gpu/drm/i915/intel_memory_region.h create mode 100644 drivers/gpu/drm/i915/selftests/intel_memory_region.c create mode 100644 drivers/gpu/drm/i915/selftests/mock_region.c create mode 100644 drivers/gpu/drm/i915/selftests/mock_region.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 45d3be12359d..b6a1fcb810c2 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -51,6 +51,7 @@ i915-y += i915_drv.o \ i915_utils.o \ intel_csr.o \ intel_device_info.o \ + intel_memory_region.o \ intel_pch.o \ intel_pm.o \ intel_runtime_pm.o \ @@ -121,6 +122,7 @@ gem-y += \ gem/i915_gem_pages.o \ gem/i915_gem_phys.o \ gem/i915_gem_pm.o \ + gem/i915_gem_region.o \ gem/i915_gem_shmem.o \ gem/i915_gem_shrinker.o \ gem/i915_gem_stolen.o \ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index c00b4f077f9e..11390586cfe1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -160,6 +160,15 @@ struct drm_i915_gem_object { atomic_t pages_pin_count; atomic_t shrink_pin; + /** + * Memory region for this object. + */ + struct intel_memory_region *region; + /** + * List of memory region blocks allocated for this object. + */ + struct list_head blocks; + struct sg_table *pages; void *mapping; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c b/drivers/gpu/drm/i915/gem/i915_gem_region.c new file mode 100644 index 000000000000..6588e3c99e5d --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include "intel_memory_region.h" +#include "i915_gem_region.h" +#include "i915_drv.h" + +void +i915_gem_object_put_pages_buddy(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + __intel_memory_region_put_pages_buddy(obj->mm.region, &obj->mm.blocks); + + obj->mm.dirty = false; + sg_free_table(pages); + kfree(pages); +} + +int +i915_gem_object_get_pages_buddy(struct drm_i915_gem_object *obj) +{ + struct intel_memory_region *mem = obj->mm.region; + struct list_head *blocks = &obj->mm.blocks; + unsigned int flags = I915_ALLOC_MIN_PAGE_SIZE; + resource_size_t size = obj->base.size; + resource_size_t prev_end; + struct i915_buddy_block *block; + struct sg_table *st; + struct scatterlist *sg; + unsigned int sg_page_sizes; + int ret; + + st = kmalloc(sizeof(*st), GFP_KERNEL); + if (!st) + return -ENOMEM; + + if (sg_alloc_table(st, size >> ilog2(mem->mm.chunk_size), GFP_KERNEL)) { + kfree(st); + return -ENOMEM; + } + + ret = __intel_memory_region_get_pages_buddy(mem, size, flags, blocks); + if (ret) + goto err_free_sg; + + GEM_BUG_ON(list_empty(blocks)); + + sg = st->sgl; + st->nents = 0; + sg_page_sizes = 0; + prev_end = (resource_size_t)-1; + + list_for_each_entry(block, blocks, link) { + u64 block_size, offset; + + block_size = i915_buddy_block_size(&mem->mm, block); + offset = i915_buddy_block_offset(block); + + GEM_BUG_ON(overflows_type(block_size, sg->length)); + + if (offset != prev_end || + add_overflows_t(typeof(sg->length), sg->length, block_size)) { + if (st->nents) { + sg_page_sizes |= sg->length; + sg = __sg_next(sg); + } + + sg_dma_address(sg) = mem->region.start + offset; + sg_dma_len(sg) = block_size; + + sg->length = block_size; + + st->nents++; + } else { + sg->length += block_size; + sg_dma_len(sg) += block_size; + } + + prev_end = offset + block_size; + }; + + sg_page_sizes |= sg->length; + sg_mark_end(sg); + i915_sg_trim(st); + + __i915_gem_object_set_pages(obj, st, sg_page_sizes); + + return 0; + +err_free_sg: + sg_free_table(st); + kfree(st); + return ret; +} + +void i915_gem_object_init_memory_region(struct drm_i915_gem_object *obj, + struct intel_memory_region *mem) +{ + INIT_LIST_HEAD(&obj->mm.blocks); + obj->mm.region = intel_memory_region_get(mem); +} + +void i915_gem_object_release_memory_region(struct drm_i915_gem_object *obj) +{ + intel_memory_region_put(obj->mm.region); +} + +struct drm_i915_gem_object * +i915_gem_object_create_region(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags) +{ + struct drm_i915_gem_object *obj; + + /* + * NB: Our use of resource_size_t for the size stems from using struct + * resource for the mem->region. We might need to revisit this in the + * future. + */ + + if (!mem) + return ERR_PTR(-ENODEV); + + size = round_up(size, mem->min_page_size); + + GEM_BUG_ON(!size); + GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_MIN_ALIGNMENT)); + + /* + * XXX: There is a prevalence of the assumption that we fit the + * object's page count inside a 32bit _signed_ variable. Let's document + * this and catch if we ever need to fix it. In the meantime, if you do + * spot such a local variable, please consider fixing! + */ + + if (size >> PAGE_SHIFT > INT_MAX) + return ERR_PTR(-E2BIG); + + if (overflows_type(size, obj->base.size)) + return ERR_PTR(-E2BIG); + + return mem->ops->create_object(mem, size, flags); +} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.h b/drivers/gpu/drm/i915/gem/i915_gem_region.h new file mode 100644 index 000000000000..ebddc86d78f7 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __I915_GEM_REGION_H__ +#define __I915_GEM_REGION_H__ + +#include + +struct intel_memory_region; +struct drm_i915_gem_object; +struct sg_table; + +int i915_gem_object_get_pages_buddy(struct drm_i915_gem_object *obj); +void i915_gem_object_put_pages_buddy(struct drm_i915_gem_object *obj, + struct sg_table *pages); + +void i915_gem_object_init_memory_region(struct drm_i915_gem_object *obj, + struct intel_memory_region *mem); +void i915_gem_object_release_memory_region(struct drm_i915_gem_object *obj); + +struct drm_i915_gem_object * +i915_gem_object_create_region(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags); + +#endif diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index e42abddd4a36..b4c390e9fa50 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -8,6 +8,7 @@ #include "i915_selftest.h" +#include "gem/i915_gem_region.h" #include "gem/i915_gem_pm.h" #include "gt/intel_gt.h" @@ -17,6 +18,7 @@ #include "selftests/mock_drm.h" #include "selftests/mock_gem_device.h" +#include "selftests/mock_region.h" #include "selftests/i915_random.h" static const unsigned int page_sizes[] = { @@ -452,6 +454,81 @@ static int igt_mock_exhaust_device_supported_pages(void *arg) return err; } +static int igt_mock_memory_region_huge_pages(void *arg) +{ + struct i915_ppgtt *ppgtt = arg; + struct drm_i915_private *i915 = ppgtt->vm.i915; + unsigned long supported = INTEL_INFO(i915)->page_sizes; + struct intel_memory_region *mem; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int bit; + int err = 0; + + mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0); + if (IS_ERR(mem)) { + pr_err("%s failed to create memory region\n", __func__); + return PTR_ERR(mem); + } + + for_each_set_bit(bit, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) { + unsigned int page_size = BIT(bit); + resource_size_t phys; + + obj = i915_gem_object_create_region(mem, page_size, 0); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_region; + } + + vma = i915_vma_instance(obj, &ppgtt->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_put; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto out_close; + + err = igt_check_page_sizes(vma); + if (err) + goto out_unpin; + + phys = i915_gem_object_get_dma_address(obj, 0); + if (!IS_ALIGNED(phys, page_size)) { + pr_err("%s addr misaligned(%pa) page_size=%u\n", + __func__, &phys, page_size); + err = -EINVAL; + goto out_unpin; + } + + if (vma->page_sizes.gtt != page_size) { + pr_err("%s page_sizes.gtt=%u, expected=%u\n", + __func__, vma->page_sizes.gtt, page_size); + err = -EINVAL; + goto out_unpin; + } + + i915_vma_unpin(vma); + i915_vma_close(vma); + + i915_gem_object_put(obj); + } + + goto out_region; + +out_unpin: + i915_vma_unpin(vma); +out_close: + i915_vma_close(vma); +out_put: + i915_gem_object_put(obj); +out_region: + intel_memory_region_put(mem); + return err; +} + static int igt_mock_ppgtt_misaligned_dma(void *arg) { struct i915_ppgtt *ppgtt = arg; @@ -1623,6 +1700,7 @@ int i915_gem_huge_page_mock_selftests(void) { static const struct i915_subtest tests[] = { SUBTEST(igt_mock_exhaust_device_supported_pages), + SUBTEST(igt_mock_memory_region_huge_pages), SUBTEST(igt_mock_ppgtt_misaligned_dma), SUBTEST(igt_mock_ppgtt_huge_fill), SUBTEST(igt_mock_ppgtt_64K), diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index bcfb355aab4d..d284b04c492b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -85,6 +85,7 @@ #include "intel_device_info.h" #include "intel_pch.h" #include "intel_runtime_pm.h" +#include "intel_memory_region.h" #include "intel_uncore.h" #include "intel_wakeref.h" #include "intel_wopcm.h" diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c new file mode 100644 index 000000000000..2ef67c397fca --- /dev/null +++ b/drivers/gpu/drm/i915/intel_memory_region.c @@ -0,0 +1,191 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include "intel_memory_region.h" +#include "i915_drv.h" + +static u64 +intel_memory_region_free_pages(struct intel_memory_region *mem, + struct list_head *blocks) +{ + struct i915_buddy_block *block, *on; + u64 size = 0; + + list_for_each_entry_safe(block, on, blocks, link) { + size += i915_buddy_block_size(&mem->mm, block); + i915_buddy_free(&mem->mm, block); + } + INIT_LIST_HEAD(blocks); + + return size; +} + +void +__intel_memory_region_put_pages_buddy(struct intel_memory_region *mem, + struct list_head *blocks) +{ + mutex_lock(&mem->mm_lock); + intel_memory_region_free_pages(mem, blocks); + mutex_unlock(&mem->mm_lock); +} + +void +__intel_memory_region_put_block_buddy(struct i915_buddy_block *block) +{ + struct list_head blocks; + + INIT_LIST_HEAD(&blocks); + list_add(&block->link, &blocks); + __intel_memory_region_put_pages_buddy(block->private, &blocks); +} + +int +__intel_memory_region_get_pages_buddy(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags, + struct list_head *blocks) +{ + unsigned long n_pages = size >> ilog2(mem->mm.chunk_size); + unsigned int min_order = 0; + + GEM_BUG_ON(!IS_ALIGNED(size, mem->mm.chunk_size)); + GEM_BUG_ON(!list_empty(blocks)); + + if (flags & I915_ALLOC_MIN_PAGE_SIZE) { + min_order = ilog2(mem->min_page_size) - + ilog2(mem->mm.chunk_size); + } + + mutex_lock(&mem->mm_lock); + + do { + struct i915_buddy_block *block; + unsigned int order; + + order = fls(n_pages) - 1; + GEM_BUG_ON(order > mem->mm.max_order); + GEM_BUG_ON(order < min_order); + + do { + block = i915_buddy_alloc(&mem->mm, order); + if (!IS_ERR(block)) + break; + + if (order-- == min_order) + goto err_free_blocks; + } while (1); + + n_pages -= BIT(order); + + block->private = mem; + list_add(&block->link, blocks); + + if (!n_pages) + break; + } while (1); + + mutex_unlock(&mem->mm_lock); + return 0; + +err_free_blocks: + intel_memory_region_free_pages(mem, blocks); + mutex_unlock(&mem->mm_lock); + return -ENXIO; +} + +struct i915_buddy_block * +__intel_memory_region_get_block_buddy(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags) +{ + struct i915_buddy_block *block; + LIST_HEAD(blocks); + int ret; + + ret = __intel_memory_region_get_pages_buddy(mem, size, flags, &blocks); + if (ret) + return ERR_PTR(ret); + + block = list_first_entry(&blocks, typeof(*block), link); + list_del_init(&block->link); + return block; +} + +int intel_memory_region_init_buddy(struct intel_memory_region *mem) +{ + return i915_buddy_init(&mem->mm, resource_size(&mem->region), + PAGE_SIZE); +} + +void intel_memory_region_release_buddy(struct intel_memory_region *mem) +{ + i915_buddy_fini(&mem->mm); +} + +struct intel_memory_region * +intel_memory_region_create(struct drm_i915_private *i915, + resource_size_t start, + resource_size_t size, + resource_size_t min_page_size, + resource_size_t io_start, + const struct intel_memory_region_ops *ops) +{ + struct intel_memory_region *mem; + int err; + + mem = kzalloc(sizeof(*mem), GFP_KERNEL); + if (!mem) + return ERR_PTR(-ENOMEM); + + mem->i915 = i915; + mem->region = (struct resource)DEFINE_RES_MEM(start, size); + mem->io_start = io_start; + mem->min_page_size = min_page_size; + mem->ops = ops; + + mutex_init(&mem->mm_lock); + + if (ops->init) { + err = ops->init(mem); + if (err) + goto err_free; + } + + kref_init(&mem->kref); + return mem; + +err_free: + kfree(mem); + return ERR_PTR(err); +} + +static void __intel_memory_region_destroy(struct kref *kref) +{ + struct intel_memory_region *mem = + container_of(kref, typeof(*mem), kref); + + if (mem->ops->release) + mem->ops->release(mem); + + mutex_destroy(&mem->mm_lock); + kfree(mem); +} + +struct intel_memory_region * +intel_memory_region_get(struct intel_memory_region *mem) +{ + kref_get(&mem->kref); + return mem; +} + +void intel_memory_region_put(struct intel_memory_region *mem) +{ + kref_put(&mem->kref, __intel_memory_region_destroy); +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/intel_memory_region.c" +#include "selftests/mock_region.c" +#endif diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h new file mode 100644 index 000000000000..2ea17d6c31ed --- /dev/null +++ b/drivers/gpu/drm/i915/intel_memory_region.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __INTEL_MEMORY_REGION_H__ +#define __INTEL_MEMORY_REGION_H__ + +#include +#include +#include +#include + +#include "i915_buddy.h" + +struct drm_i915_private; +struct drm_i915_gem_object; +struct intel_memory_region; +struct sg_table; + +#define I915_ALLOC_MIN_PAGE_SIZE BIT(0) + +struct intel_memory_region_ops { + unsigned int flags; + + int (*init)(struct intel_memory_region *mem); + void (*release)(struct intel_memory_region *mem); + + struct drm_i915_gem_object * + (*create_object)(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags); +}; + +struct intel_memory_region { + struct drm_i915_private *i915; + + const struct intel_memory_region_ops *ops; + + struct io_mapping iomap; + struct resource region; + + struct i915_buddy_mm mm; + struct mutex mm_lock; + + struct kref kref; + + resource_size_t io_start; + resource_size_t min_page_size; + + unsigned int type; + unsigned int instance; + unsigned int id; +}; + +int intel_memory_region_init_buddy(struct intel_memory_region *mem); +void intel_memory_region_release_buddy(struct intel_memory_region *mem); + +int __intel_memory_region_get_pages_buddy(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags, + struct list_head *blocks); +struct i915_buddy_block * +__intel_memory_region_get_block_buddy(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags); +void __intel_memory_region_put_pages_buddy(struct intel_memory_region *mem, + struct list_head *blocks); +void __intel_memory_region_put_block_buddy(struct i915_buddy_block *block); + +struct intel_memory_region * +intel_memory_region_create(struct drm_i915_private *i915, + resource_size_t start, + resource_size_t size, + resource_size_t min_page_size, + resource_size_t io_start, + const struct intel_memory_region_ops *ops); + +struct intel_memory_region * +intel_memory_region_get(struct intel_memory_region *mem); +void intel_memory_region_put(struct intel_memory_region *mem); + +#endif diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index b88084fe3269..aa5a0e7f5d9e 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -26,3 +26,4 @@ selftest(gtt, i915_gem_gtt_mock_selftests) selftest(hugepages, i915_gem_huge_page_mock_selftests) selftest(contexts, i915_gem_context_mock_selftests) selftest(buddy, i915_buddy_mock_selftests) +selftest(memory_region, intel_memory_region_mock_selftests) diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c new file mode 100644 index 000000000000..89eaa419eaf2 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c @@ -0,0 +1,115 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include + +#include "../i915_selftest.h" + +#include "mock_drm.h" +#include "mock_gem_device.h" +#include "mock_region.h" + +#include "gem/i915_gem_region.h" +#include "gem/selftests/mock_context.h" + +static void close_objects(struct intel_memory_region *mem, + struct list_head *objects) +{ + struct drm_i915_private *i915 = mem->i915; + struct drm_i915_gem_object *obj, *on; + + list_for_each_entry_safe(obj, on, objects, st_link) { + if (i915_gem_object_has_pinned_pages(obj)) + i915_gem_object_unpin_pages(obj); + /* No polluting the memory region between tests */ + __i915_gem_object_put_pages(obj, I915_MM_NORMAL); + list_del(&obj->st_link); + i915_gem_object_put(obj); + } + + cond_resched(); + + i915_gem_drain_freed_objects(i915); +} + +static int igt_mock_fill(void *arg) +{ + struct intel_memory_region *mem = arg; + resource_size_t total = resource_size(&mem->region); + resource_size_t page_size; + resource_size_t rem; + unsigned long max_pages; + unsigned long page_num; + LIST_HEAD(objects); + int err = 0; + + page_size = mem->mm.chunk_size; + max_pages = div64_u64(total, page_size); + rem = total; + + for_each_prime_number_from(page_num, 1, max_pages) { + resource_size_t size = page_num * page_size; + struct drm_i915_gem_object *obj; + + obj = i915_gem_object_create_region(mem, size, 0); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + break; + } + + err = i915_gem_object_pin_pages(obj); + if (err) { + i915_gem_object_put(obj); + break; + } + + list_add(&obj->st_link, &objects); + rem -= size; + } + + if (err == -ENOMEM) + err = 0; + if (err == -ENXIO) { + if (page_num * page_size <= rem) { + pr_err("%s failed, space still left in region\n", + __func__); + err = -EINVAL; + } else { + err = 0; + } + } + + close_objects(mem, &objects); + + return err; +} + +int intel_memory_region_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_mock_fill), + }; + struct intel_memory_region *mem; + struct drm_i915_private *i915; + int err; + + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + + mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0); + if (IS_ERR(mem)) { + pr_err("failed to create memory region\n"); + err = PTR_ERR(mem); + goto out_unref; + } + + err = i915_subtests(tests, mem); + + intel_memory_region_put(mem); +out_unref: + drm_dev_put(&i915->drm); + return err; +} diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index d14c6d8735fe..9c22d41b30cd 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -33,6 +33,7 @@ #include "mock_gem_device.h" #include "mock_gtt.h" #include "mock_uncore.h" +#include "mock_region.h" #include "gem/selftests/mock_context.h" #include "gem/selftests/mock_gem_object.h" diff --git a/drivers/gpu/drm/i915/selftests/mock_region.c b/drivers/gpu/drm/i915/selftests/mock_region.c new file mode 100644 index 000000000000..0e9a575ede3b --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_region.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include "gem/i915_gem_region.h" +#include "intel_memory_region.h" + +#include "mock_region.h" + +static const struct drm_i915_gem_object_ops mock_region_obj_ops = { + .get_pages = i915_gem_object_get_pages_buddy, + .put_pages = i915_gem_object_put_pages_buddy, + .release = i915_gem_object_release_memory_region, +}; + +static struct drm_i915_gem_object * +mock_object_create(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags) +{ + struct drm_i915_private *i915 = mem->i915; + struct drm_i915_gem_object *obj; + + if (size > BIT(mem->mm.max_order) * mem->mm.chunk_size) + return ERR_PTR(-E2BIG); + + obj = i915_gem_object_alloc(); + if (!obj) + return ERR_PTR(-ENOMEM); + + drm_gem_private_object_init(&i915->drm, &obj->base, size); + i915_gem_object_init(obj, &mock_region_obj_ops); + + obj->read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT; + + i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE); + + i915_gem_object_init_memory_region(obj, mem); + + return obj; +} + +static const struct intel_memory_region_ops mock_region_ops = { + .init = intel_memory_region_init_buddy, + .release = intel_memory_region_release_buddy, + .create_object = mock_object_create, +}; + +struct intel_memory_region * +mock_region_create(struct drm_i915_private *i915, + resource_size_t start, + resource_size_t size, + resource_size_t min_page_size, + resource_size_t io_start) +{ + return intel_memory_region_create(i915, start, size, min_page_size, + io_start, &mock_region_ops); +} diff --git a/drivers/gpu/drm/i915/selftests/mock_region.h b/drivers/gpu/drm/i915/selftests/mock_region.h new file mode 100644 index 000000000000..24608089d833 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_region.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __MOCK_REGION_H +#define __MOCK_REGION_H + +struct intel_memory_region * +mock_region_create(struct drm_i915_private *i915, + resource_size_t start, + resource_size_t size, + resource_size_t min_page_size, + resource_size_t io_start); + +#endif /* !__MOCK_REGION_H */ From 2f0b97ca02118630132dddf258fbdb5d5f5ec32a Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Tue, 8 Oct 2019 17:01:15 +0100 Subject: [PATCH 024/159] drm/i915/region: support contiguous allocations Some kernel internal objects may need to be allocated as a contiguous block, also thinking ahead the various kernel io_mapping interfaces seem to expect it, although this is purely a limitation in the kernel API...so perhaps something to be improved. Signed-off-by: Matthew Auld Cc: Joonas Lahtinen Cc: Abdiel Janulgue Cc: Michael J Ruhl Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191008160116.18379-3-matthew.auld@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_object.h | 6 + .../gpu/drm/i915/gem/i915_gem_object_types.h | 4 + drivers/gpu/drm/i915/gem/i915_gem_region.c | 15 +- drivers/gpu/drm/i915/gem/i915_gem_region.h | 3 +- .../gpu/drm/i915/gem/selftests/huge_pages.c | 83 +++++---- drivers/gpu/drm/i915/intel_memory_region.c | 9 +- drivers/gpu/drm/i915/intel_memory_region.h | 3 +- .../drm/i915/selftests/intel_memory_region.c | 165 ++++++++++++++++++ drivers/gpu/drm/i915/selftests/mock_region.c | 2 +- 9 files changed, 245 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 086a9bf5adcc..dfd16d65630f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -139,6 +139,12 @@ i915_gem_object_is_readonly(const struct drm_i915_gem_object *obj) return obj->base.vma_node.readonly; } +static inline bool +i915_gem_object_is_contiguous(const struct drm_i915_gem_object *obj) +{ + return obj->flags & I915_BO_ALLOC_CONTIGUOUS; +} + static inline bool i915_gem_object_type_has(const struct drm_i915_gem_object *obj, unsigned long flags) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 11390586cfe1..c6a712cf7d7a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -119,6 +119,10 @@ struct drm_i915_gem_object { I915_SELFTEST_DECLARE(struct list_head st_link); + unsigned long flags; +#define I915_BO_ALLOC_CONTIGUOUS BIT(0) +#define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS) + /* * Is the object to be mapped as read-only to the GPU * Only honoured if hardware has relevant pte bit diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c b/drivers/gpu/drm/i915/gem/i915_gem_region.c index 6588e3c99e5d..d94914a86737 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_region.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c @@ -23,10 +23,10 @@ i915_gem_object_get_pages_buddy(struct drm_i915_gem_object *obj) { struct intel_memory_region *mem = obj->mm.region; struct list_head *blocks = &obj->mm.blocks; - unsigned int flags = I915_ALLOC_MIN_PAGE_SIZE; resource_size_t size = obj->base.size; resource_size_t prev_end; struct i915_buddy_block *block; + unsigned int flags; struct sg_table *st; struct scatterlist *sg; unsigned int sg_page_sizes; @@ -41,6 +41,10 @@ i915_gem_object_get_pages_buddy(struct drm_i915_gem_object *obj) return -ENOMEM; } + flags = I915_ALLOC_MIN_PAGE_SIZE; + if (obj->flags & I915_BO_ALLOC_CONTIGUOUS) + flags |= I915_ALLOC_CONTIGUOUS; + ret = __intel_memory_region_get_pages_buddy(mem, size, flags, blocks); if (ret) goto err_free_sg; @@ -55,7 +59,8 @@ i915_gem_object_get_pages_buddy(struct drm_i915_gem_object *obj) list_for_each_entry(block, blocks, link) { u64 block_size, offset; - block_size = i915_buddy_block_size(&mem->mm, block); + block_size = min_t(u64, size, + i915_buddy_block_size(&mem->mm, block)); offset = i915_buddy_block_offset(block); GEM_BUG_ON(overflows_type(block_size, sg->length)); @@ -96,10 +101,12 @@ i915_gem_object_get_pages_buddy(struct drm_i915_gem_object *obj) } void i915_gem_object_init_memory_region(struct drm_i915_gem_object *obj, - struct intel_memory_region *mem) + struct intel_memory_region *mem, + unsigned long flags) { INIT_LIST_HEAD(&obj->mm.blocks); obj->mm.region = intel_memory_region_get(mem); + obj->flags |= flags; } void i915_gem_object_release_memory_region(struct drm_i915_gem_object *obj) @@ -120,6 +127,8 @@ i915_gem_object_create_region(struct intel_memory_region *mem, * future. */ + GEM_BUG_ON(flags & ~I915_BO_ALLOC_FLAGS); + if (!mem) return ERR_PTR(-ENODEV); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.h b/drivers/gpu/drm/i915/gem/i915_gem_region.h index ebddc86d78f7..f2ff6f8bff74 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_region.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.h @@ -17,7 +17,8 @@ void i915_gem_object_put_pages_buddy(struct drm_i915_gem_object *obj, struct sg_table *pages); void i915_gem_object_init_memory_region(struct drm_i915_gem_object *obj, - struct intel_memory_region *mem); + struct intel_memory_region *mem, + unsigned long flags); void i915_gem_object_release_memory_region(struct drm_i915_gem_object *obj); struct drm_i915_gem_object * diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index b4c390e9fa50..63a4743e5f54 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -456,6 +456,7 @@ static int igt_mock_exhaust_device_supported_pages(void *arg) static int igt_mock_memory_region_huge_pages(void *arg) { + const unsigned int flags[] = { 0, I915_BO_ALLOC_CONTIGUOUS }; struct i915_ppgtt *ppgtt = arg; struct drm_i915_private *i915 = ppgtt->vm.i915; unsigned long supported = INTEL_INFO(i915)->page_sizes; @@ -474,46 +475,52 @@ static int igt_mock_memory_region_huge_pages(void *arg) for_each_set_bit(bit, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) { unsigned int page_size = BIT(bit); resource_size_t phys; + int i; - obj = i915_gem_object_create_region(mem, page_size, 0); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto out_region; + for (i = 0; i < ARRAY_SIZE(flags); ++i) { + obj = i915_gem_object_create_region(mem, page_size, + flags[i]); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_region; + } + + vma = i915_vma_instance(obj, &ppgtt->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_put; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto out_close; + + err = igt_check_page_sizes(vma); + if (err) + goto out_unpin; + + phys = i915_gem_object_get_dma_address(obj, 0); + if (!IS_ALIGNED(phys, page_size)) { + pr_err("%s addr misaligned(%pa) page_size=%u\n", + __func__, &phys, page_size); + err = -EINVAL; + goto out_unpin; + } + + if (vma->page_sizes.gtt != page_size) { + pr_err("%s page_sizes.gtt=%u, expected=%u\n", + __func__, vma->page_sizes.gtt, + page_size); + err = -EINVAL; + goto out_unpin; + } + + i915_vma_unpin(vma); + i915_vma_close(vma); + + __i915_gem_object_put_pages(obj, I915_MM_NORMAL); + i915_gem_object_put(obj); } - - vma = i915_vma_instance(obj, &ppgtt->vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto out_put; - } - - err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) - goto out_close; - - err = igt_check_page_sizes(vma); - if (err) - goto out_unpin; - - phys = i915_gem_object_get_dma_address(obj, 0); - if (!IS_ALIGNED(phys, page_size)) { - pr_err("%s addr misaligned(%pa) page_size=%u\n", - __func__, &phys, page_size); - err = -EINVAL; - goto out_unpin; - } - - if (vma->page_sizes.gtt != page_size) { - pr_err("%s page_sizes.gtt=%u, expected=%u\n", - __func__, vma->page_sizes.gtt, page_size); - err = -EINVAL; - goto out_unpin; - } - - i915_vma_unpin(vma); - i915_vma_close(vma); - - i915_gem_object_put(obj); } goto out_region; diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c index 2ef67c397fca..98006618e871 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.c +++ b/drivers/gpu/drm/i915/intel_memory_region.c @@ -47,8 +47,8 @@ __intel_memory_region_get_pages_buddy(struct intel_memory_region *mem, unsigned int flags, struct list_head *blocks) { - unsigned long n_pages = size >> ilog2(mem->mm.chunk_size); unsigned int min_order = 0; + unsigned long n_pages; GEM_BUG_ON(!IS_ALIGNED(size, mem->mm.chunk_size)); GEM_BUG_ON(!list_empty(blocks)); @@ -58,6 +58,13 @@ __intel_memory_region_get_pages_buddy(struct intel_memory_region *mem, ilog2(mem->mm.chunk_size); } + if (flags & I915_ALLOC_CONTIGUOUS) { + size = roundup_pow_of_two(size); + min_order = ilog2(size) - ilog2(mem->mm.chunk_size); + } + + n_pages = size >> ilog2(mem->mm.chunk_size); + mutex_lock(&mem->mm_lock); do { diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h index 2ea17d6c31ed..29b86ca17dd9 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.h +++ b/drivers/gpu/drm/i915/intel_memory_region.h @@ -18,7 +18,8 @@ struct drm_i915_gem_object; struct intel_memory_region; struct sg_table; -#define I915_ALLOC_MIN_PAGE_SIZE BIT(0) +#define I915_ALLOC_MIN_PAGE_SIZE BIT(0) +#define I915_ALLOC_CONTIGUOUS BIT(1) struct intel_memory_region_ops { unsigned int flags; diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c index 89eaa419eaf2..4e44c81e8e5b 100644 --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c @@ -13,6 +13,7 @@ #include "gem/i915_gem_region.h" #include "gem/selftests/mock_context.h" +#include "selftests/i915_random.h" static void close_objects(struct intel_memory_region *mem, struct list_head *objects) @@ -86,10 +87,174 @@ static int igt_mock_fill(void *arg) return err; } +static struct drm_i915_gem_object * +igt_object_create(struct intel_memory_region *mem, + struct list_head *objects, + u64 size, + unsigned int flags) +{ + struct drm_i915_gem_object *obj; + int err; + + obj = i915_gem_object_create_region(mem, size, flags); + if (IS_ERR(obj)) + return obj; + + err = i915_gem_object_pin_pages(obj); + if (err) + goto put; + + list_add(&obj->st_link, objects); + return obj; + +put: + i915_gem_object_put(obj); + return ERR_PTR(err); +} + +static void igt_object_release(struct drm_i915_gem_object *obj) +{ + i915_gem_object_unpin_pages(obj); + __i915_gem_object_put_pages(obj, I915_MM_NORMAL); + list_del(&obj->st_link); + i915_gem_object_put(obj); +} + +static int igt_mock_contiguous(void *arg) +{ + struct intel_memory_region *mem = arg; + struct drm_i915_gem_object *obj; + unsigned long n_objects; + LIST_HEAD(objects); + LIST_HEAD(holes); + I915_RND_STATE(prng); + resource_size_t target; + resource_size_t total; + resource_size_t min; + int err = 0; + + total = resource_size(&mem->region); + + /* Min size */ + obj = igt_object_create(mem, &objects, mem->mm.chunk_size, + I915_BO_ALLOC_CONTIGUOUS); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + if (obj->mm.pages->nents != 1) { + pr_err("%s min object spans multiple sg entries\n", __func__); + err = -EINVAL; + goto err_close_objects; + } + + igt_object_release(obj); + + /* Max size */ + obj = igt_object_create(mem, &objects, total, I915_BO_ALLOC_CONTIGUOUS); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + if (obj->mm.pages->nents != 1) { + pr_err("%s max object spans multiple sg entries\n", __func__); + err = -EINVAL; + goto err_close_objects; + } + + igt_object_release(obj); + + /* Internal fragmentation should not bleed into the object size */ + target = round_up(prandom_u32_state(&prng) % total, PAGE_SIZE); + target = max_t(u64, PAGE_SIZE, target); + + obj = igt_object_create(mem, &objects, target, + I915_BO_ALLOC_CONTIGUOUS); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + if (obj->base.size != target) { + pr_err("%s obj->base.size(%llx) != target(%llx)\n", __func__, + (u64)obj->base.size, (u64)target); + err = -EINVAL; + goto err_close_objects; + } + + if (obj->mm.pages->nents != 1) { + pr_err("%s object spans multiple sg entries\n", __func__); + err = -EINVAL; + goto err_close_objects; + } + + igt_object_release(obj); + + /* + * Try to fragment the address space, such that half of it is free, but + * the max contiguous block size is SZ_64K. + */ + + target = SZ_64K; + n_objects = div64_u64(total, target); + + while (n_objects--) { + struct list_head *list; + + if (n_objects % 2) + list = &holes; + else + list = &objects; + + obj = igt_object_create(mem, list, target, + I915_BO_ALLOC_CONTIGUOUS); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto err_close_objects; + } + } + + close_objects(mem, &holes); + + min = target; + target = total >> 1; + + /* Make sure we can still allocate all the fragmented space */ + obj = igt_object_create(mem, &objects, target, 0); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto err_close_objects; + } + + igt_object_release(obj); + + /* + * Even though we have enough free space, we don't have a big enough + * contiguous block. Make sure that holds true. + */ + + do { + bool should_fail = target > min; + + obj = igt_object_create(mem, &objects, target, + I915_BO_ALLOC_CONTIGUOUS); + if (should_fail != IS_ERR(obj)) { + pr_err("%s target allocation(%llx) mismatch\n", + __func__, (u64)target); + err = -EINVAL; + goto err_close_objects; + } + + target >>= 1; + } while (target >= mem->mm.chunk_size); + +err_close_objects: + list_splice_tail(&holes, &objects); + close_objects(mem, &objects); + return err; +} + int intel_memory_region_mock_selftests(void) { static const struct i915_subtest tests[] = { SUBTEST(igt_mock_fill), + SUBTEST(igt_mock_contiguous), }; struct intel_memory_region *mem; struct drm_i915_private *i915; diff --git a/drivers/gpu/drm/i915/selftests/mock_region.c b/drivers/gpu/drm/i915/selftests/mock_region.c index 0e9a575ede3b..7b0c99ddc2d5 100644 --- a/drivers/gpu/drm/i915/selftests/mock_region.c +++ b/drivers/gpu/drm/i915/selftests/mock_region.c @@ -36,7 +36,7 @@ mock_object_create(struct intel_memory_region *mem, i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE); - i915_gem_object_init_memory_region(obj, mem); + i915_gem_object_init_memory_region(obj, mem, flags); return obj; } From 7c98501acb94318819f5ea764fc3aae09f69aff6 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Tue, 8 Oct 2019 17:01:16 +0100 Subject: [PATCH 025/159] drm/i915/region: support volatile objects Volatile objects are marked as DONTNEED while pinned, therefore once unpinned the backing store can be discarded. This is limited to kernel internal objects. Signed-off-by: Matthew Auld Signed-off-by: CQ Tang Cc: Joonas Lahtinen Cc: Abdiel Janulgue Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191008160116.18379-4-matthew.auld@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_internal.c | 17 +++++++++-------- drivers/gpu/drm/i915/gem/i915_gem_object.h | 12 ++++++++++++ .../gpu/drm/i915/gem/i915_gem_object_types.h | 9 ++++++++- drivers/gpu/drm/i915/gem/i915_gem_pages.c | 6 ++++++ drivers/gpu/drm/i915/gem/i915_gem_region.c | 17 ++++++++++++++++- drivers/gpu/drm/i915/gem/selftests/huge_pages.c | 12 ++++-------- drivers/gpu/drm/i915/intel_memory_region.c | 5 +++++ drivers/gpu/drm/i915/intel_memory_region.h | 6 ++++++ drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 5 ++--- 9 files changed, 68 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c index 0c41e04ab8fa..5ae694c24df4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c @@ -117,13 +117,6 @@ static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) goto err; } - /* Mark the pages as dontneed whilst they are still pinned. As soon - * as they are unpinned they are allowed to be reaped by the shrinker, - * and the caller is expected to repopulate - the contents of this - * object are only valid whilst active and pinned. - */ - obj->mm.madv = I915_MADV_DONTNEED; - __i915_gem_object_set_pages(obj, st, sg_page_sizes); return 0; @@ -143,7 +136,6 @@ static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj, internal_free_pages(pages); obj->mm.dirty = false; - obj->mm.madv = I915_MADV_WILLNEED; } static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = { @@ -188,6 +180,15 @@ i915_gem_object_create_internal(struct drm_i915_private *i915, drm_gem_private_object_init(&i915->drm, &obj->base, size); i915_gem_object_init(obj, &i915_gem_object_internal_ops); + /* + * Mark the object as volatile, such that the pages are marked as + * dontneed whilst they are still pinned. As soon as they are unpinned + * they are allowed to be reaped by the shrinker, and the caller is + * expected to repopulate - the contents of this object are only valid + * whilst active and pinned. + */ + i915_gem_object_set_volatile(obj); + obj->read_domains = I915_GEM_DOMAIN_CPU; obj->write_domain = I915_GEM_DOMAIN_CPU; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index dfd16d65630f..c5e14c9c805c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -145,6 +145,18 @@ i915_gem_object_is_contiguous(const struct drm_i915_gem_object *obj) return obj->flags & I915_BO_ALLOC_CONTIGUOUS; } +static inline bool +i915_gem_object_is_volatile(const struct drm_i915_gem_object *obj) +{ + return obj->flags & I915_BO_ALLOC_VOLATILE; +} + +static inline void +i915_gem_object_set_volatile(struct drm_i915_gem_object *obj) +{ + obj->flags |= I915_BO_ALLOC_VOLATILE; +} + static inline bool i915_gem_object_type_has(const struct drm_i915_gem_object *obj, unsigned long flags) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index c6a712cf7d7a..a387e3ee728b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -121,7 +121,8 @@ struct drm_i915_gem_object { unsigned long flags; #define I915_BO_ALLOC_CONTIGUOUS BIT(0) -#define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS) +#define I915_BO_ALLOC_VOLATILE BIT(1) +#define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | I915_BO_ALLOC_VOLATILE) /* * Is the object to be mapped as read-only to the GPU @@ -172,6 +173,12 @@ struct drm_i915_gem_object { * List of memory region blocks allocated for this object. */ struct list_head blocks; + /** + * Element within memory_region->objects or region->purgeable + * if the object is marked as DONTNEED. Access is protected by + * region->obj_lock. + */ + struct list_head region_link; struct sg_table *pages; void *mapping; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 2e941f093a20..b0ec0959c13f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -18,6 +18,9 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, lockdep_assert_held(&obj->mm.lock); + if (i915_gem_object_is_volatile(obj)) + obj->mm.madv = I915_MADV_DONTNEED; + /* Make the pages coherent with the GPU (flushing any swapin). */ if (obj->cache_dirty) { obj->write_domain = 0; @@ -160,6 +163,9 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) if (IS_ERR_OR_NULL(pages)) return pages; + if (i915_gem_object_is_volatile(obj)) + obj->mm.madv = I915_MADV_WILLNEED; + i915_gem_object_make_unshrinkable(obj); if (obj->mm.mapping) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c b/drivers/gpu/drm/i915/gem/i915_gem_region.c index d94914a86737..d3f7733bc7ed 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_region.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c @@ -107,11 +107,26 @@ void i915_gem_object_init_memory_region(struct drm_i915_gem_object *obj, INIT_LIST_HEAD(&obj->mm.blocks); obj->mm.region = intel_memory_region_get(mem); obj->flags |= flags; + + mutex_lock(&mem->objects.lock); + + if (obj->flags & I915_BO_ALLOC_VOLATILE) + list_add(&obj->mm.region_link, &mem->objects.purgeable); + else + list_add(&obj->mm.region_link, &mem->objects.list); + + mutex_unlock(&mem->objects.lock); } void i915_gem_object_release_memory_region(struct drm_i915_gem_object *obj) { - intel_memory_region_put(obj->mm.region); + struct intel_memory_region *mem = obj->mm.region; + + mutex_lock(&mem->objects.lock); + list_del(&obj->mm.region_link); + mutex_unlock(&mem->objects.lock); + + intel_memory_region_put(mem); } struct drm_i915_gem_object * diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index 63a4743e5f54..f27772f6779a 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -115,8 +115,6 @@ static int get_huge_pages(struct drm_i915_gem_object *obj) if (i915_gem_gtt_prepare_pages(obj, st)) goto err; - obj->mm.madv = I915_MADV_DONTNEED; - GEM_BUG_ON(sg_page_sizes != obj->mm.page_mask); __i915_gem_object_set_pages(obj, st, sg_page_sizes); @@ -137,7 +135,6 @@ static void put_huge_pages(struct drm_i915_gem_object *obj, huge_pages_free_pages(pages); obj->mm.dirty = false; - obj->mm.madv = I915_MADV_WILLNEED; } static const struct drm_i915_gem_object_ops huge_page_ops = { @@ -170,6 +167,8 @@ huge_pages_object(struct drm_i915_private *i915, drm_gem_private_object_init(&i915->drm, &obj->base, size); i915_gem_object_init(obj, &huge_page_ops); + i915_gem_object_set_volatile(obj); + obj->write_domain = I915_GEM_DOMAIN_CPU; obj->read_domains = I915_GEM_DOMAIN_CPU; obj->cache_level = I915_CACHE_NONE; @@ -229,8 +228,6 @@ static int fake_get_huge_pages(struct drm_i915_gem_object *obj) i915_sg_trim(st); - obj->mm.madv = I915_MADV_DONTNEED; - __i915_gem_object_set_pages(obj, st, sg_page_sizes); return 0; @@ -263,8 +260,6 @@ static int fake_get_huge_pages_single(struct drm_i915_gem_object *obj) sg_dma_len(sg) = obj->base.size; sg_dma_address(sg) = page_size; - obj->mm.madv = I915_MADV_DONTNEED; - __i915_gem_object_set_pages(obj, st, sg->length); return 0; @@ -283,7 +278,6 @@ static void fake_put_huge_pages(struct drm_i915_gem_object *obj, { fake_free_huge_pages(obj, pages); obj->mm.dirty = false; - obj->mm.madv = I915_MADV_WILLNEED; } static const struct drm_i915_gem_object_ops fake_ops = { @@ -323,6 +317,8 @@ fake_huge_pages_object(struct drm_i915_private *i915, u64 size, bool single) else i915_gem_object_init(obj, &fake_ops); + i915_gem_object_set_volatile(obj); + obj->write_domain = I915_GEM_DOMAIN_CPU; obj->read_domains = I915_GEM_DOMAIN_CPU; obj->cache_level = I915_CACHE_NONE; diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c index 98006618e871..c0299435d75e 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.c +++ b/drivers/gpu/drm/i915/intel_memory_region.c @@ -152,6 +152,10 @@ intel_memory_region_create(struct drm_i915_private *i915, mem->min_page_size = min_page_size; mem->ops = ops; + mutex_init(&mem->objects.lock); + INIT_LIST_HEAD(&mem->objects.list); + INIT_LIST_HEAD(&mem->objects.purgeable); + mutex_init(&mem->mm_lock); if (ops->init) { @@ -177,6 +181,7 @@ static void __intel_memory_region_destroy(struct kref *kref) mem->ops->release(mem); mutex_destroy(&mem->mm_lock); + mutex_destroy(&mem->objects.lock); kfree(mem); } diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h index 29b86ca17dd9..52c141b6108c 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.h +++ b/drivers/gpu/drm/i915/intel_memory_region.h @@ -52,6 +52,12 @@ struct intel_memory_region { unsigned int type; unsigned int instance; unsigned int id; + + struct { + struct mutex lock; /* Protects access to objects */ + struct list_head list; + struct list_head purgeable; + } objects; }; int intel_memory_region_init_buddy(struct intel_memory_region *mem); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 165b3a7f9744..ebe735df6504 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -82,8 +82,6 @@ static int fake_get_pages(struct drm_i915_gem_object *obj) } GEM_BUG_ON(rem); - obj->mm.madv = I915_MADV_DONTNEED; - __i915_gem_object_set_pages(obj, pages, sg_page_sizes); return 0; @@ -95,7 +93,6 @@ static void fake_put_pages(struct drm_i915_gem_object *obj, { fake_free_pages(obj, pages); obj->mm.dirty = false; - obj->mm.madv = I915_MADV_WILLNEED; } static const struct drm_i915_gem_object_ops fake_ops = { @@ -122,6 +119,8 @@ fake_dma_object(struct drm_i915_private *i915, u64 size) drm_gem_private_object_init(&i915->drm, &obj->base, size); i915_gem_object_init(obj, &fake_ops); + i915_gem_object_set_volatile(obj); + obj->write_domain = I915_GEM_DOMAIN_CPU; obj->read_domains = I915_GEM_DOMAIN_CPU; obj->cache_level = I915_CACHE_NONE; From 6ad145fe0246abe31a312ce19321bb5b11635802 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 8 Oct 2019 19:59:41 +0100 Subject: [PATCH 026/159] drm/i915/gt: Give engine->kernel_context distinct timeline lock classes Assign a separate lockclass to the perma-pinned timelines of the kernel_context, such that we can use them from within the user timelines should we ever need to inject GPU operations to fixup faults during request construction. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20191008185941.15228-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 7e2aa7a6bef0..5aa1371f6a0f 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -736,6 +736,7 @@ intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass) static struct intel_context * create_kernel_context(struct intel_engine_cs *engine) { + static struct lock_class_key kernel; struct intel_context *ce; int err; @@ -751,6 +752,14 @@ create_kernel_context(struct intel_engine_cs *engine) return ERR_PTR(err); } + /* + * Give our perma-pinned kernel timelines a separate lockdep class, + * so that we can use them from within the normal user timelines + * should we need to inject GPU operations during their request + * construction. + */ + lockdep_set_class(&ce->timeline->mutex, &kernel); + return ce; } From 41f0bc49f7f2014feab8b278fea2adaea6ccaf4e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 9 Oct 2019 07:17:59 +0100 Subject: [PATCH 027/159] drm/i915/selftests: Hold request reference over waits Take a reference on the request before submitting it to the HW and then waiting on it for selftest_workarounds. Once submitted, the request may be freed by a background worker, unless we take an extra reference for ourselves. References: https://bugs.freedesktop.org/show_bug.cgi?id=111926 Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20191009061759.3189-1-chris@chris-wilson.co.uk --- .../gpu/drm/i915/gt/selftest_workarounds.c | 73 +++++++++---------- 1 file changed, 34 insertions(+), 39 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index 74952bda9256..1048be646c35 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -33,6 +33,30 @@ struct wa_lists { } engine[I915_NUM_ENGINES]; }; +static int request_add_sync(struct i915_request *rq, int err) +{ + i915_request_get(rq); + i915_request_add(rq); + if (i915_request_wait(rq, 0, HZ / 5) < 0) + err = -EIO; + i915_request_put(rq); + + return err; +} + +static int request_add_spin(struct i915_request *rq, struct igt_spinner *spin) +{ + int err = 0; + + i915_request_get(rq); + i915_request_add(rq); + if (spin && !igt_wait_for_spinner(spin, rq)) + err = -ETIMEDOUT; + i915_request_put(rq); + + return err; +} + static void reference_lists_init(struct drm_i915_private *i915, struct wa_lists *lists) { @@ -243,7 +267,6 @@ switch_to_scratch_context(struct intel_engine_cs *engine, struct i915_gem_context *ctx; struct intel_context *ce; struct i915_request *rq; - intel_wakeref_t wakeref; int err = 0; ctx = kernel_context(engine->i915); @@ -255,9 +278,7 @@ switch_to_scratch_context(struct intel_engine_cs *engine, ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); GEM_BUG_ON(IS_ERR(ce)); - rq = ERR_PTR(-ENODEV); - with_intel_runtime_pm(engine->uncore->rpm, wakeref) - rq = igt_spinner_create_request(spin, ce, MI_NOOP); + rq = igt_spinner_create_request(spin, ce, MI_NOOP); intel_context_put(ce); @@ -267,13 +288,7 @@ switch_to_scratch_context(struct intel_engine_cs *engine, goto err; } - i915_request_add(rq); - - if (spin && !igt_wait_for_spinner(spin, rq)) { - pr_err("Spinner failed to start\n"); - err = -ETIMEDOUT; - } - + err = request_add_spin(rq, spin); err: if (err && spin) igt_spinner_end(spin); @@ -586,15 +601,11 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx, goto err_request; err_request: - i915_request_add(rq); - if (err) - goto out_batch; - - if (i915_request_wait(rq, 0, HZ / 5) < 0) { + err = request_add_sync(rq, err); + if (err) { pr_err("%s: Futzing %x timedout; cancelling test\n", engine->name, reg); intel_gt_set_wedged(&ctx->i915->gt); - err = -EIO; goto out_batch; } @@ -697,7 +708,6 @@ static int live_dirty_whitelist(void *arg) struct intel_engine_cs *engine; struct i915_gem_context *ctx; enum intel_engine_id id; - intel_wakeref_t wakeref; struct drm_file *file; int err = 0; @@ -706,13 +716,9 @@ static int live_dirty_whitelist(void *arg) if (INTEL_GEN(i915) < 7) /* minimum requirement for LRI, SRM, LRM */ return 0; - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - file = mock_file(i915); - if (IS_ERR(file)) { - err = PTR_ERR(file); - goto out_rpm; - } + if (IS_ERR(file)) + return PTR_ERR(file); ctx = live_context(i915, file); if (IS_ERR(ctx)) { @@ -731,8 +737,6 @@ static int live_dirty_whitelist(void *arg) out_file: mock_file_free(i915, file); -out_rpm: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); return err; } @@ -807,12 +811,7 @@ static int read_whitelisted_registers(struct i915_gem_context *ctx, intel_ring_advance(rq, cs); err_req: - i915_request_add(rq); - - if (i915_request_wait(rq, 0, HZ / 5) < 0) - err = -EIO; - - return err; + return request_add_sync(rq, err); } static int scrub_whitelisted_registers(struct i915_gem_context *ctx, @@ -872,9 +871,7 @@ static int scrub_whitelisted_registers(struct i915_gem_context *ctx, err = engine->emit_bb_start(rq, batch->node.start, 0, 0); err_request: - i915_request_add(rq); - if (i915_request_wait(rq, 0, HZ / 5) < 0) - err = -EIO; + err = request_add_sync(rq, err); err_unpin: i915_gem_object_unpin_map(batch->obj); @@ -1229,12 +1226,10 @@ live_engine_reset_workarounds(void *arg) goto err; } - i915_request_add(rq); - - if (!igt_wait_for_spinner(&spin, rq)) { + ret = request_add_spin(rq, &spin); + if (ret) { pr_err("Spinner failed to start\n"); igt_spinner_fini(&spin); - ret = -ETIMEDOUT; goto err; } From d46e137c44974938bc08700000a4b1231491ca8a Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 9 Oct 2019 11:00:24 +0100 Subject: [PATCH 028/159] drm/i915/selftests: fix null pointer dereference on pointer data In the case where data fails to be allocated the error exit path is via label 'out' where data is dereferenced in a for-loop. Fix this by exiting via the label 'out_file' instead to avoid the null pointer dereference. Addresses-Coverity: ("Dereference after null check") Fixes: 50d16d44cce4 ("drm/i915/selftests: Exercise context switching in parallel") Signed-off-by: Colin Ian King Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191009100024.23077-1-colin.king@canonical.com --- drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index fb58c0919ea1..e5c235051ae5 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -263,7 +263,7 @@ static int live_parallel_switch(void *arg) if (!data) { i915_gem_context_unlock_engines(ctx); err = -ENOMEM; - goto out; + goto out_file; } m = 0; /* Use the first context as our template for the engines */ From 9b000b47cc18fa3b078ce080cbcf84f1a8a497ad Mon Sep 17 00:00:00 2001 From: Swati Sharma Date: Wed, 9 Oct 2019 12:25:39 +0530 Subject: [PATCH 029/159] drm/i915/color: fix broken gamma state-checker during boot MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Premature gamma lut prepration and loading which was getting reflected in first modeset causing different colors on screen during boot. Issue: In BIOS, gamma is disabled by default. However, legacy read_luts() was setting crtc_state->base.gamma_lut and gamma_lut was programmed with junk values which led to visual artifacts (different colored screens instead of usual black during boot). Fix: Calling read_luts() only when gamma is enabled which will happen after first modeset. This fix is independent from the revert 1b8588741fdc ("Revert "drm/i915/color: Extract icl_read_luts()"") and should fix different colors on screen in legacy platforms too. v2: -Added gamma_enable checks inside read_luts() [Ville/Jani N] -Corrected gamma enable check for CHV [Ville] v3: -Added check in ilk_read_luts() [Ville] -Simplified gamma enable check for CHV [Ville] Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111809 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111885 Tested-by: Jani Saarinen Signed-off-by: Swati Sharma Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191009065542.27415-2-swati2.sharma@intel.com --- drivers/gpu/drm/i915/display/intel_color.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index 9ab34902663e..08d020d4da35 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -1613,6 +1613,9 @@ i9xx_read_lut_8(const struct intel_crtc_state *crtc_state) static void i9xx_read_luts(struct intel_crtc_state *crtc_state) { + if (!crtc_state->gamma_enable) + return; + crtc_state->base.gamma_lut = i9xx_read_lut_8(crtc_state); } @@ -1659,6 +1662,9 @@ i965_read_lut_10p6(const struct intel_crtc_state *crtc_state) static void i965_read_luts(struct intel_crtc_state *crtc_state) { + if (!crtc_state->gamma_enable) + return; + if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT) crtc_state->base.gamma_lut = i9xx_read_lut_8(crtc_state); else @@ -1701,10 +1707,10 @@ chv_read_cgm_lut(const struct intel_crtc_state *crtc_state) static void chv_read_luts(struct intel_crtc_state *crtc_state) { - if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT) - crtc_state->base.gamma_lut = i9xx_read_lut_8(crtc_state); - else + if (crtc_state->cgm_mode & CGM_PIPE_MODE_GAMMA) crtc_state->base.gamma_lut = chv_read_cgm_lut(crtc_state); + else + i965_read_luts(crtc_state); } static struct drm_property_blob * @@ -1742,6 +1748,12 @@ ilk_read_lut_10(const struct intel_crtc_state *crtc_state) static void ilk_read_luts(struct intel_crtc_state *crtc_state) { + if (!crtc_state->gamma_enable) + return; + + if ((crtc_state->csc_mode & CSC_POSITION_BEFORE_GAMMA) == 0) + return; + if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT) crtc_state->base.gamma_lut = i9xx_read_lut_8(crtc_state); else @@ -1788,6 +1800,9 @@ glk_read_lut_10(const struct intel_crtc_state *crtc_state, u32 prec_index) static void glk_read_luts(struct intel_crtc_state *crtc_state) { + if (!crtc_state->gamma_enable) + return; + if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT) crtc_state->base.gamma_lut = i9xx_read_lut_8(crtc_state); else From d50341274d013e80d80d33e03f4573a34f4d7277 Mon Sep 17 00:00:00 2001 From: Swati Sharma Date: Wed, 9 Oct 2019 12:25:40 +0530 Subject: [PATCH 030/159] drm/i915/color: move check of gamma_enable to specific func/platform MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Moved common code to check gamma_enable to specific funcs per platform in bit_precision func. icl doesn't support that and chv has separate enable knob for CGM LUT. v2: -Simplified chv_gamma_precision() [Ville] Signed-off-by: Swati Sharma Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191009065542.27415-3-swati2.sharma@intel.com --- drivers/gpu/drm/i915/display/intel_color.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index 08d020d4da35..fa44eb73d088 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -1420,6 +1420,9 @@ static int icl_color_check(struct intel_crtc_state *crtc_state) static int i9xx_gamma_precision(const struct intel_crtc_state *crtc_state) { + if (!crtc_state->gamma_enable) + return 0; + switch (crtc_state->gamma_mode) { case GAMMA_MODE_MODE_8BIT: return 8; @@ -1433,6 +1436,9 @@ static int i9xx_gamma_precision(const struct intel_crtc_state *crtc_state) static int ilk_gamma_precision(const struct intel_crtc_state *crtc_state) { + if (!crtc_state->gamma_enable) + return 0; + if ((crtc_state->csc_mode & CSC_POSITION_BEFORE_GAMMA) == 0) return 0; @@ -1457,6 +1463,9 @@ static int chv_gamma_precision(const struct intel_crtc_state *crtc_state) static int glk_gamma_precision(const struct intel_crtc_state *crtc_state) { + if (!crtc_state->gamma_enable) + return 0; + switch (crtc_state->gamma_mode) { case GAMMA_MODE_MODE_8BIT: return 8; @@ -1473,9 +1482,6 @@ int intel_color_get_gamma_bit_precision(const struct intel_crtc_state *crtc_stat struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - if (!crtc_state->gamma_enable) - return 0; - if (HAS_GMCH(dev_priv)) { if (IS_CHERRYVIEW(dev_priv)) return chv_gamma_precision(crtc_state); From 2a86972f60fcfaa0daa02b9fe461935ea2063791 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 8 Oct 2019 10:29:20 -0700 Subject: [PATCH 031/159] drm/i915: Select DPLL's via mask MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This slightly simplifies the EHL DPLL4 handling and also gives us more flexibility in the future in case we need to skip the use of specific PLL's (e.g., due to hardware workarounds and such). v2: - Replace GENMASK() with or'd BIT()'s to make the specific DPLLs more explicit. (Ville) - s/unsigned/unsigned long/. (Lucas) Cc: Lucas De Marchi Signed-off-by: Matt Roper Reviewed-by: Lucas De Marchi Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191008172920.11362-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 48 ++++++++++--------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index 5e9e84c94a15..ec10fa7d3c69 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -247,8 +247,7 @@ static struct intel_shared_dpll * intel_find_shared_dpll(struct intel_atomic_state *state, const struct intel_crtc *crtc, const struct intel_dpll_hw_state *pll_state, - enum intel_dpll_id range_min, - enum intel_dpll_id range_max) + unsigned long dpll_mask) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_shared_dpll *pll, *unused_pll = NULL; @@ -257,7 +256,9 @@ intel_find_shared_dpll(struct intel_atomic_state *state, shared_dpll = intel_atomic_get_shared_dpll_state(&state->base); - for (i = range_min; i <= range_max; i++) { + WARN_ON(dpll_mask & ~(BIT(I915_NUM_PLLS) - 1)); + + for_each_set_bit(i, &dpll_mask, I915_NUM_PLLS) { pll = &dev_priv->shared_dplls[i]; /* Only want to check enabled timings first */ @@ -464,8 +465,8 @@ static bool ibx_get_dpll(struct intel_atomic_state *state, } else { pll = intel_find_shared_dpll(state, crtc, &crtc_state->dpll_hw_state, - DPLL_ID_PCH_PLL_A, - DPLL_ID_PCH_PLL_B); + BIT(DPLL_ID_PCH_PLL_B) | + BIT(DPLL_ID_PCH_PLL_A)); } if (!pll) @@ -814,7 +815,8 @@ hsw_ddi_hdmi_get_dpll(struct intel_atomic_state *state, pll = intel_find_shared_dpll(state, crtc, &crtc_state->dpll_hw_state, - DPLL_ID_WRPLL1, DPLL_ID_WRPLL2); + BIT(DPLL_ID_WRPLL2) | + BIT(DPLL_ID_WRPLL1)); if (!pll) return NULL; @@ -877,7 +879,7 @@ static bool hsw_get_dpll(struct intel_atomic_state *state, pll = intel_find_shared_dpll(state, crtc, &crtc_state->dpll_hw_state, - DPLL_ID_SPLL, DPLL_ID_SPLL); + BIT(DPLL_ID_SPLL)); } else { return false; } @@ -1447,13 +1449,13 @@ static bool skl_get_dpll(struct intel_atomic_state *state, if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP)) pll = intel_find_shared_dpll(state, crtc, &crtc_state->dpll_hw_state, - DPLL_ID_SKL_DPLL0, - DPLL_ID_SKL_DPLL0); + BIT(DPLL_ID_SKL_DPLL0)); else pll = intel_find_shared_dpll(state, crtc, &crtc_state->dpll_hw_state, - DPLL_ID_SKL_DPLL1, - DPLL_ID_SKL_DPLL3); + BIT(DPLL_ID_SKL_DPLL3) | + BIT(DPLL_ID_SKL_DPLL2) | + BIT(DPLL_ID_SKL_DPLL1)); if (!pll) return false; @@ -2401,8 +2403,9 @@ static bool cnl_get_dpll(struct intel_atomic_state *state, pll = intel_find_shared_dpll(state, crtc, &crtc_state->dpll_hw_state, - DPLL_ID_SKL_DPLL0, - DPLL_ID_SKL_DPLL2); + BIT(DPLL_ID_SKL_DPLL2) | + BIT(DPLL_ID_SKL_DPLL1) | + BIT(DPLL_ID_SKL_DPLL0)); if (!pll) { DRM_DEBUG_KMS("No PLL selected\n"); return false; @@ -2975,7 +2978,7 @@ static bool icl_get_combo_phy_dpll(struct intel_atomic_state *state, &crtc_state->icl_port_dplls[ICL_PORT_DPLL_DEFAULT]; struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum port port = encoder->port; - bool has_dpll4 = false; + unsigned long dpll_mask; if (!icl_calc_dpll_state(crtc_state, encoder, &port_dpll->hw_state)) { DRM_DEBUG_KMS("Could not calculate combo PHY PLL state.\n"); @@ -2984,13 +2987,16 @@ static bool icl_get_combo_phy_dpll(struct intel_atomic_state *state, } if (IS_ELKHARTLAKE(dev_priv) && port != PORT_A) - has_dpll4 = true; + dpll_mask = + BIT(DPLL_ID_EHL_DPLL4) | + BIT(DPLL_ID_ICL_DPLL1) | + BIT(DPLL_ID_ICL_DPLL0); + else + dpll_mask = BIT(DPLL_ID_ICL_DPLL1) | BIT(DPLL_ID_ICL_DPLL0); port_dpll->pll = intel_find_shared_dpll(state, crtc, &port_dpll->hw_state, - DPLL_ID_ICL_DPLL0, - has_dpll4 ? DPLL_ID_EHL_DPLL4 - : DPLL_ID_ICL_DPLL1); + dpll_mask); if (!port_dpll->pll) { DRM_DEBUG_KMS("No combo PHY PLL found for [ENCODER:%d:%s]\n", encoder->base.base.id, encoder->base.name); @@ -3023,8 +3029,7 @@ static bool icl_get_tc_phy_dplls(struct intel_atomic_state *state, port_dpll->pll = intel_find_shared_dpll(state, crtc, &port_dpll->hw_state, - DPLL_ID_ICL_TBTPLL, - DPLL_ID_ICL_TBTPLL); + BIT(DPLL_ID_ICL_TBTPLL)); if (!port_dpll->pll) { DRM_DEBUG_KMS("No TBT-ALT PLL found\n"); return false; @@ -3043,8 +3048,7 @@ static bool icl_get_tc_phy_dplls(struct intel_atomic_state *state, encoder->port)); port_dpll->pll = intel_find_shared_dpll(state, crtc, &port_dpll->hw_state, - dpll_id, - dpll_id); + BIT(dpll_id)); if (!port_dpll->pll) { DRM_DEBUG_KMS("No MG PHY PLL found\n"); goto err_unreference_tbt_pll; From c949ae431467764277cdd88d7c26ff963a9db40a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 9 Oct 2019 11:09:54 +0100 Subject: [PATCH 032/159] drm/i915/execlists: Protect peeking at execlists->active Now that we dropped the engine->active.lock serialisation from around process_csb(), direct submission can run concurrently to the interrupt handler. As such execlists->active may be advanced as we dequeue, dropping the reference to the request. We need to employ our RCU request protection to ensure that the request is not freed too early. Fixes: df403069029d ("drm/i915/execlists: Lift process_csb() out of the irq-off spinlock") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191009100955.21477-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 6db762c509b8..7ea58335f04c 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1289,7 +1289,7 @@ static void virtual_xfer_breadcrumbs(struct virtual_engine *ve, static struct i915_request * last_active(const struct intel_engine_execlists *execlists) { - struct i915_request * const *last = execlists->active; + struct i915_request * const *last = READ_ONCE(execlists->active); while (*last && i915_request_completed(*last)) last++; @@ -1981,8 +1981,11 @@ static void process_csb(struct intel_engine_cs *engine) static void __execlists_submission_tasklet(struct intel_engine_cs *const engine) { lockdep_assert_held(&engine->active.lock); - if (!engine->execlists.pending[0]) + if (!engine->execlists.pending[0]) { + rcu_read_lock(); /* protect peeking at execlists->active */ execlists_dequeue(engine); + rcu_read_unlock(); + } } /* From c36eebd9ba5d70b84e1e7408ccc7632566f285c4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 9 Oct 2019 17:09:06 +0100 Subject: [PATCH 033/159] drm/i915/gt: execlists->active is serialised by the tasklet The active/pending execlists is no longer protected by the engine->active.lock, but is serialised by the tasklet instead. Update the locking around the debug and stats to follow suit. v2: local_bh_disable() to prevent recursing into the tasklet in case we trigger a softirq (Tvrtko) Fixes: df403069029d ("drm/i915/execlists: Lift process_csb() out of the irq-off spinlock") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191009160906.16195-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_engine.h | 14 ++++++++++++++ drivers/gpu/drm/i915/gt/intel_engine_cs.c | 16 +++++++--------- drivers/gpu/drm/i915/i915_gem.h | 6 ++++++ 3 files changed, 27 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index d624752f2a92..93ea367fe624 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -136,6 +136,20 @@ execlists_active(const struct intel_engine_execlists *execlists) return READ_ONCE(*execlists->active); } +static inline void +execlists_active_lock_bh(struct intel_engine_execlists *execlists) +{ + local_bh_disable(); /* prevent local softirq and lock recursion */ + tasklet_lock(&execlists->tasklet); +} + +static inline void +execlists_active_unlock_bh(struct intel_engine_execlists *execlists) +{ + tasklet_unlock(&execlists->tasklet); + local_bh_enable(); /* restore softirq, and kick ksoftirqd! */ +} + struct i915_request * execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 5aa1371f6a0f..c9d639c6becb 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1245,9 +1245,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, struct drm_printer *m) { struct drm_i915_private *dev_priv = engine->i915; - const struct intel_engine_execlists * const execlists = - &engine->execlists; - unsigned long flags; + struct intel_engine_execlists * const execlists = &engine->execlists; u64 addr; if (engine->id == RENDER_CLASS && IS_GEN_RANGE(dev_priv, 4, 7)) @@ -1329,7 +1327,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, idx, hws[idx * 2], hws[idx * 2 + 1]); } - spin_lock_irqsave(&engine->active.lock, flags); + execlists_active_lock_bh(execlists); for (port = execlists->active; (rq = *port); port++) { char hdr[80]; int len; @@ -1367,7 +1365,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, if (tl) intel_timeline_put(tl); } - spin_unlock_irqrestore(&engine->active.lock, flags); + execlists_active_unlock_bh(execlists); } else if (INTEL_GEN(dev_priv) > 6) { drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n", ENGINE_READ(engine, RING_PP_DIR_BASE)); @@ -1509,8 +1507,8 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine) if (!intel_engine_supports_stats(engine)) return -ENODEV; - spin_lock_irqsave(&engine->active.lock, flags); - write_seqlock(&engine->stats.lock); + execlists_active_lock_bh(execlists); + write_seqlock_irqsave(&engine->stats.lock, flags); if (unlikely(engine->stats.enabled == ~0)) { err = -EBUSY; @@ -1538,8 +1536,8 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine) } unlock: - write_sequnlock(&engine->stats.lock); - spin_unlock_irqrestore(&engine->active.lock, flags); + write_sequnlock_irqrestore(&engine->stats.lock, flags); + execlists_active_unlock_bh(execlists); return err; } diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index 167a7b56ed5b..6795f1daa3d5 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -77,6 +77,12 @@ struct drm_i915_private; #define I915_GEM_IDLE_TIMEOUT (HZ / 5) +static inline void tasklet_lock(struct tasklet_struct *t) +{ + while (!tasklet_trylock(t)) + cpu_relax(); +} + static inline void __tasklet_disable_sync_once(struct tasklet_struct *t) { if (!atomic_fetch_inc(&t->count)) From ba2c74da52eba388d959a1000a9dd08fa4857659 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Wed, 9 Oct 2019 16:04:23 -0700 Subject: [PATCH 034/159] drm/i915/tgl: the BCS engine supports relative MMIO The specs don't mention any specific HW limitation on the blitter and manual inspection shows that the HW does set the relative MMIO bit in the LRI of the blitter context image, so we can remove our limitations. Signed-off-by: Daniele Ceraolo Spurio Cc: Chris Wilson Cc: John Harrison Cc: Mika Kuoppala Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191009230424.6507-1-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/i915/gt/intel_lrc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 7ea58335f04c..eb061f345166 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -3451,7 +3451,7 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine) engine->flags |= I915_ENGINE_HAS_PREEMPTION; } - if (engine->class != COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) >= 12) + if (INTEL_GEN(engine->i915) >= 12) engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO; } From 9d41318c4e43da6b4c95a21cf366bf18179291fd Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Wed, 9 Oct 2019 16:04:24 -0700 Subject: [PATCH 035/159] drm/i915/tgl: simplify the lrc register list for !RCS There are small differences between the blitter and the video engines in the xcs context image (e.g. registers 0x200 and 0x204 only exist on the blitter). Since we never explicitly set a value for those register and given that we don't need to update the offsets in the lrc image when we change engine within the class for virtual engine because the HW can handle that, instead of having a separate define for the BCS we can just restrict the programming to the part we're interested in, which is common across the engines. Bspec: 45584 Signed-off-by: Daniele Ceraolo Spurio Cc: Chris Wilson Cc: Mika Kuoppala Cc: Stuart Summers Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191009230424.6507-2-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/i915/gt/intel_lrc.c | 67 ++++------------------------- 1 file changed, 9 insertions(+), 58 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index eb061f345166..bdfd602b402a 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -669,64 +669,6 @@ static const u8 gen12_xcs_offsets[] = { REG16(0x274), REG16(0x270), - NOP(13), - LRI(2, POSTED), - REG16(0x200), - REG16(0x204), - - NOP(11), - LRI(50, POSTED), - REG16(0x588), - REG16(0x588), - REG16(0x588), - REG16(0x588), - REG16(0x588), - REG16(0x588), - REG(0x028), - REG(0x09c), - REG(0x0c0), - REG(0x178), - REG(0x17c), - REG16(0x358), - REG(0x170), - REG(0x150), - REG(0x154), - REG(0x158), - REG16(0x41c), - REG16(0x600), - REG16(0x604), - REG16(0x608), - REG16(0x60c), - REG16(0x610), - REG16(0x614), - REG16(0x618), - REG16(0x61c), - REG16(0x620), - REG16(0x624), - REG16(0x628), - REG16(0x62c), - REG16(0x630), - REG16(0x634), - REG16(0x638), - REG16(0x63c), - REG16(0x640), - REG16(0x644), - REG16(0x648), - REG16(0x64c), - REG16(0x650), - REG16(0x654), - REG16(0x658), - REG16(0x65c), - REG16(0x660), - REG16(0x664), - REG16(0x668), - REG16(0x66c), - REG16(0x670), - REG16(0x674), - REG16(0x678), - REG16(0x67c), - REG(0x068), - END(), }; @@ -857,6 +799,15 @@ static const u8 gen12_rcs_offsets[] = { static const u8 *reg_offsets(const struct intel_engine_cs *engine) { + /* + * The gen12+ lists only have the registers we program in the basic + * default state. We rely on the context image using relative + * addressing to automatic fixup the register state between the + * physical engines for virtual engine. + */ + GEM_BUG_ON(INTEL_GEN(engine->i915) >= 12 && + !intel_engine_has_relative_mmio(engine)); + if (engine->class == RENDER_CLASS) { if (INTEL_GEN(engine->i915) >= 12) return gen12_rcs_offsets; From 542a5c66e0ff6cf257600df1d7362735f8ebde35 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 2 Oct 2019 17:00:34 +0100 Subject: [PATCH 036/159] drm/i915/gt: Warn CI about an unrecoverable wedge If we have a wedged GPU that we need to recover, but fail, add a taint for CI to pickup and schedule a reboot. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Petri Latvala Reviewed-by: Janusz Krzysztofik Link: https://patchwork.freedesktop.org/patch/msgid/20191002160034.5121-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_reset.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 34791fc79dea..77445d100ca8 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -872,8 +872,14 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt) ok = !HAS_EXECLISTS(gt->i915); /* XXX better agnosticism desired */ if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display) ok = __intel_gt_reset(gt, ALL_ENGINES) == 0; - if (!ok) + if (!ok) { + /* + * Warn CI about the unrecoverable wedged condition. + * Time for a reboot. + */ + add_taint_for_CI(TAINT_WARN); return false; + } /* * Undo nop_submit_request. We prevent all new i915 requests from From bd9bec5b6a09a3a7656f096e3ff0ca6709f89770 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 10 Oct 2019 09:32:42 +0100 Subject: [PATCH 037/159] drm/i915/execlists: Mark up expected state during reset Move the BUG_ON around slightly and add some explanations for each to try and capture the expected state more carefully. We want to compare the expected active state of our bookkeeping as compared to the tracked HW state. References: https://bugs.freedesktop.org/show_bug.cgi?id=111937 Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20191010083242.1387-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index bdfd602b402a..bd8ba98e35df 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -2728,8 +2728,10 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) if (!rq) goto unwind; + /* We still have requests in-flight; the engine should be active */ + GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); + ce = rq->hw_context; - GEM_BUG_ON(i915_active_is_idle(&ce->active)); GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); /* Proclaim we have exclusive access to the context image! */ @@ -2737,10 +2739,13 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) rq = active_request(rq); if (!rq) { + /* Idle context; tidy up the ring so we can restart afresh */ ce->ring->head = ce->ring->tail; goto out_replay; } + /* Context has requests still in-flight; it should not be idle! */ + GEM_BUG_ON(i915_active_is_idle(&ce->active)); ce->ring->head = intel_ring_wrap(ce->ring, rq->head); /* From 86027e312c36dfe6b614e32f7550d5fe20a6d9e0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 10 Oct 2019 12:02:52 +0100 Subject: [PATCH 038/159] drm/i915/selftests: Check that registers are preserved between virtual engines Make sure that we copy across the registers from one engine to the next, as we hop around a virtual engine. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191010110252.17289-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/selftest_lrc.c | 180 +++++++++++++++++++++++++ 1 file changed, 180 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 198cf2f754f4..a691e429ca01 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -19,6 +19,33 @@ #include "gem/selftests/igt_gem_utils.h" #include "gem/selftests/mock_context.h" +static struct i915_vma *create_scratch(struct intel_gt *gt) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int err; + + obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED); + + vma = i915_vma_instance(obj, >->ggtt->vm, NULL); + if (IS_ERR(vma)) { + i915_gem_object_put(obj); + return vma; + } + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); + if (err) { + i915_gem_object_put(obj); + return ERR_PTR(err); + } + + return vma; +} + static int live_sanitycheck(void *arg) { struct drm_i915_private *i915 = arg; @@ -2076,6 +2103,158 @@ static int live_virtual_mask(void *arg) return 0; } +static int preserved_virtual_engine(struct drm_i915_private *i915, + struct intel_engine_cs **siblings, + unsigned int nsibling) +{ +#define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4) + + struct i915_request *last = NULL; + struct i915_gem_context *ctx; + struct intel_context *ve; + struct i915_vma *scratch; + struct igt_live_test t; + const int num_gpr = 16 * 2; /* each GPR is 2 dwords */ + unsigned int n; + int err = 0; + + ctx = kernel_context(i915); + if (!ctx) + return -ENOMEM; + + scratch = create_scratch(siblings[0]->gt); + if (IS_ERR(scratch)) { + err = PTR_ERR(scratch); + goto out_close; + } + + ve = intel_execlists_create_virtual(ctx, siblings, nsibling); + if (IS_ERR(ve)) { + err = PTR_ERR(ve); + goto out_scratch; + } + + err = intel_context_pin(ve); + if (err) + goto out_put; + + err = igt_live_test_begin(&t, i915, __func__, ve->engine->name); + if (err) + goto out_unpin; + + for (n = 0; n < num_gpr; n++) { + struct intel_engine_cs *engine = siblings[n % nsibling]; + struct i915_request *rq; + u32 *cs; + + rq = i915_request_create(ve); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_end; + } + + i915_request_put(last); + last = i915_request_get(rq); + + cs = intel_ring_begin(rq, 8); + if (IS_ERR(cs)) { + i915_request_add(rq); + err = PTR_ERR(cs); + goto out_end; + } + + *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; + *cs++ = CS_GPR(engine, n); + *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32); + *cs++ = 0; + + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = CS_GPR(engine, (n + 1) % num_gpr); + *cs++ = n + 1; + + *cs++ = MI_NOOP; + intel_ring_advance(rq, cs); + + /* Restrict this request to run on a particular engine */ + rq->execution_mask = engine->mask; + i915_request_add(rq); + } + + if (i915_request_wait(last, 0, HZ / 5) < 0) { + err = -ETIME; + } else { + u32 *map = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); + + for (n = 0; n < num_gpr; n++) { + if (map[n] != n) { + pr_err("Incorrect value[%d] found for GPR[%d]\n", + map[n], n); + err = -EINVAL; + break; + } + } + + i915_gem_object_unpin_map(scratch->obj); + } + +out_end: + if (igt_live_test_end(&t)) + err = -EIO; + i915_request_put(last); +out_unpin: + intel_context_unpin(ve); +out_put: + intel_context_put(ve); +out_scratch: + i915_vma_unpin_and_release(&scratch, 0); +out_close: + kernel_context_close(ctx); + return err; + +#undef CS_GPR +} + +static int live_virtual_preserved(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; + struct intel_gt *gt = &i915->gt; + unsigned int class, inst; + + /* + * Check that the context image retains non-privileged (user) registers + * from one engine to the next. For this we check that the CS_GPR + * are preserved. + */ + + if (USES_GUC_SUBMISSION(i915)) + return 0; + + /* As we use CS_GPR we cannot run before they existed on all engines. */ + if (INTEL_GEN(i915) < 9) + return 0; + + for (class = 0; class <= MAX_ENGINE_CLASS; class++) { + int nsibling, err; + + nsibling = 0; + for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { + if (!gt->engine_class[class][inst]) + continue; + + siblings[nsibling++] = gt->engine_class[class][inst]; + } + if (nsibling < 2) + continue; + + err = preserved_virtual_engine(i915, siblings, nsibling); + if (err) + return err; + } + + return 0; +} + static int bond_virtual_engine(struct drm_i915_private *i915, unsigned int class, struct intel_engine_cs **siblings, @@ -2277,6 +2456,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) SUBTEST(live_preempt_smoke), SUBTEST(live_virtual_engine), SUBTEST(live_virtual_mask), + SUBTEST(live_virtual_preserved), SUBTEST(live_virtual_bond), }; From 9a3a41dfe2239abe65944cfa1f8cd3aff5a03944 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Fri, 4 Oct 2019 13:34:52 +0200 Subject: [PATCH 039/159] drm/i915: Fix for_each_intel_plane_mask definition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using for_each_intel_plane_mask() fails because of an extra bracket, remove the bracket so we can use it in the next commit. Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20191004113514.17064-3-maarten.lankhorst@linux.intel.com Reviewed-by: Ville Syrjälä Reviewed-by: Matt Roper --- drivers/gpu/drm/i915/display/intel_display.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index 2782f23ee887..4ded64fcbc6c 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -350,7 +350,7 @@ enum phy_fia { &(dev)->mode_config.plane_list, \ base.head) \ for_each_if((plane_mask) & \ - drm_plane_mask(&intel_plane->base))) + drm_plane_mask(&intel_plane->base)) #define for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) \ list_for_each_entry(intel_plane, \ From af9fbfa657c86dd0c5ee0e24da3e2da79353c0fb Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Fri, 4 Oct 2019 13:34:53 +0200 Subject: [PATCH 040/159] drm/i915: Introduce and use intel_atomic_crtc_state_for_each_plane_state. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of looking at drm_plane_state, look at intel_plane_state directly. This will allow us to make the watermarks bigjoiner aware, when we make it work for bigjoiner slave pipes as well. Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20191004113514.17064-4-maarten.lankhorst@linux.intel.com Reviewed-by: Ville Syrjälä Reviewed-by: Matt Roper --- drivers/gpu/drm/i915/display/intel_display.h | 8 +++ drivers/gpu/drm/i915/intel_pm.c | 60 ++++++++------------ 2 files changed, 33 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index 4ded64fcbc6c..bc2cf4bec0e8 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -440,6 +440,14 @@ enum phy_fia { (__i)--) \ for_each_if(crtc) +#define intel_atomic_crtc_state_for_each_plane_state( \ + plane, plane_state, \ + crtc_state) \ + for_each_intel_plane_mask(((crtc_state)->base.state->dev), (plane), \ + ((crtc_state)->base.plane_mask)) \ + for_each_if ((plane_state = \ + to_intel_plane_state(__drm_atomic_get_current_plane_state((crtc_state)->base.state, &plane->base)))) + void intel_link_compute_m_n(u16 bpp, int nlanes, int pixel_clock, int link_clock, struct intel_link_m_n *m_n, diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index bfcf03ab5245..6aeaad587a20 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3089,8 +3089,8 @@ static int ilk_compute_pipe_wm(struct intel_crtc_state *crtc_state) struct intel_pipe_wm *pipe_wm; struct drm_device *dev = state->dev; const struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_plane *plane; - const struct drm_plane_state *plane_state; + struct intel_plane *plane; + const struct intel_plane_state *plane_state; const struct intel_plane_state *pristate = NULL; const struct intel_plane_state *sprstate = NULL; const struct intel_plane_state *curstate = NULL; @@ -3099,15 +3099,13 @@ static int ilk_compute_pipe_wm(struct intel_crtc_state *crtc_state) pipe_wm = &crtc_state->wm.ilk.optimal; - drm_atomic_crtc_state_for_each_plane_state(plane, plane_state, &crtc_state->base) { - const struct intel_plane_state *ps = to_intel_plane_state(plane_state); - - if (plane->type == DRM_PLANE_TYPE_PRIMARY) - pristate = ps; - else if (plane->type == DRM_PLANE_TYPE_OVERLAY) - sprstate = ps; - else if (plane->type == DRM_PLANE_TYPE_CURSOR) - curstate = ps; + intel_atomic_crtc_state_for_each_plane_state(plane, plane_state, crtc_state) { + if (plane->base.type == DRM_PLANE_TYPE_PRIMARY) + pristate = plane_state; + else if (plane->base.type == DRM_PLANE_TYPE_OVERLAY) + sprstate = plane_state; + else if (plane->base.type == DRM_PLANE_TYPE_CURSOR) + curstate = plane_state; } pipe_wm->pipe_enabled = crtc_state->base.active; @@ -4124,8 +4122,8 @@ int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc, { struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev); struct drm_atomic_state *state = crtc_state->base.state; - struct drm_plane *plane; - const struct drm_plane_state *drm_plane_state; + const struct intel_plane_state *plane_state; + struct intel_plane *plane; int crtc_clock, dotclk; u32 pipe_max_pixel_rate; uint_fixed_16_16_t pipe_downscale; @@ -4134,12 +4132,10 @@ int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc, if (!crtc_state->base.enable) return 0; - drm_atomic_crtc_state_for_each_plane_state(plane, drm_plane_state, &crtc_state->base) { + intel_atomic_crtc_state_for_each_plane_state(plane, plane_state, crtc_state) { uint_fixed_16_16_t plane_downscale; uint_fixed_16_16_t fp_9_div_8 = div_fixed16(9, 8); int bpp; - const struct intel_plane_state *plane_state = - to_intel_plane_state(drm_plane_state); if (!intel_wm_plane_visible(crtc_state, plane_state)) continue; @@ -4227,18 +4223,16 @@ skl_get_total_relative_data_rate(struct intel_crtc_state *crtc_state, u64 *uv_plane_data_rate) { struct drm_atomic_state *state = crtc_state->base.state; - struct drm_plane *plane; - const struct drm_plane_state *drm_plane_state; + struct intel_plane *plane; + const struct intel_plane_state *plane_state; u64 total_data_rate = 0; if (WARN_ON(!state)) return 0; /* Calculate and cache data rate for each plane */ - drm_atomic_crtc_state_for_each_plane_state(plane, drm_plane_state, &crtc_state->base) { - enum plane_id plane_id = to_intel_plane(plane)->id; - const struct intel_plane_state *plane_state = - to_intel_plane_state(drm_plane_state); + intel_atomic_crtc_state_for_each_plane_state(plane, plane_state, crtc_state) { + enum plane_id plane_id = plane->id; u64 rate; /* packed/y */ @@ -4259,18 +4253,16 @@ static u64 icl_get_total_relative_data_rate(struct intel_crtc_state *crtc_state, u64 *plane_data_rate) { - struct drm_plane *plane; - const struct drm_plane_state *drm_plane_state; + struct intel_plane *plane; + const struct intel_plane_state *plane_state; u64 total_data_rate = 0; if (WARN_ON(!crtc_state->base.state)) return 0; /* Calculate and cache data rate for each plane */ - drm_atomic_crtc_state_for_each_plane_state(plane, drm_plane_state, &crtc_state->base) { - const struct intel_plane_state *plane_state = - to_intel_plane_state(drm_plane_state); - enum plane_id plane_id = to_intel_plane(plane)->id; + intel_atomic_crtc_state_for_each_plane_state(plane, plane_state, crtc_state) { + enum plane_id plane_id = plane->id; u64 rate; if (!plane_state->planar_linked_plane) { @@ -4282,7 +4274,7 @@ icl_get_total_relative_data_rate(struct intel_crtc_state *crtc_state, /* * The slave plane might not iterate in - * drm_atomic_crtc_state_for_each_plane_state(), + * intel_atomic_crtc_state_for_each_plane_state(), * and needs the master plane state which may be * NULL if we try get_new_plane_state(), so we * always calculate from the master. @@ -5065,8 +5057,8 @@ static int skl_build_pipe_wm(struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); struct skl_pipe_wm *pipe_wm = &crtc_state->wm.skl.optimal; - struct drm_plane *plane; - const struct drm_plane_state *drm_plane_state; + struct intel_plane *plane; + const struct intel_plane_state *plane_state; int ret; /* @@ -5075,10 +5067,8 @@ static int skl_build_pipe_wm(struct intel_crtc_state *crtc_state) */ memset(pipe_wm->planes, 0, sizeof(pipe_wm->planes)); - drm_atomic_crtc_state_for_each_plane_state(plane, drm_plane_state, - &crtc_state->base) { - const struct intel_plane_state *plane_state = - to_intel_plane_state(drm_plane_state); + intel_atomic_crtc_state_for_each_plane_state(plane, plane_state, + crtc_state) { if (INTEL_GEN(dev_priv) >= 11) ret = icl_build_plane_wm(crtc_state, plane_state); From 04c8b0bf4abc203df94f63f268a56a7a0efbb0f4 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Fri, 4 Oct 2019 13:34:55 +0200 Subject: [PATCH 041/159] drm/i915: Use intel_plane_state in prepare and cleanup plane_fb MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to look at the hw fb in the plane split, so replace all the places that use drm_plane_state with intel_plane_state. Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20191004113514.17064-6-maarten.lankhorst@linux.intel.com Reviewed-by: Ville Syrjälä [mlankhorst: Fix line wraps (Matt Roper)] Reviewed-by: Matt Roper --- drivers/gpu/drm/i915/display/intel_display.c | 30 ++++++++++++-------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 1a533ccdb54f..23dc7ae1243a 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -14358,12 +14358,14 @@ static void fb_obj_bump_render_priority(struct drm_i915_gem_object *obj) */ int intel_prepare_plane_fb(struct drm_plane *plane, - struct drm_plane_state *new_state) + struct drm_plane_state *_new_plane_state) { + struct intel_plane_state *new_plane_state = + to_intel_plane_state(_new_plane_state); struct intel_atomic_state *intel_state = - to_intel_atomic_state(new_state->state); + to_intel_atomic_state(new_plane_state->base.state); struct drm_i915_private *dev_priv = to_i915(plane->dev); - struct drm_framebuffer *fb = new_state->fb; + struct drm_framebuffer *fb = new_plane_state->base.fb; struct drm_i915_gem_object *obj = intel_fb_obj(fb); struct drm_i915_gem_object *old_obj = intel_fb_obj(plane->state->fb); int ret; @@ -14394,9 +14396,9 @@ intel_prepare_plane_fb(struct drm_plane *plane, } } - if (new_state->fence) { /* explicit fencing */ + if (new_plane_state->base.fence) { /* explicit fencing */ ret = i915_sw_fence_await_dma_fence(&intel_state->commit_ready, - new_state->fence, + new_plane_state->base.fence, I915_FENCE_TIMEOUT, GFP_KERNEL); if (ret < 0) @@ -14410,7 +14412,7 @@ intel_prepare_plane_fb(struct drm_plane *plane, if (ret) return ret; - ret = intel_plane_pin_fb(to_intel_plane_state(new_state)); + ret = intel_plane_pin_fb(new_plane_state); i915_gem_object_unpin_pages(obj); if (ret) @@ -14419,7 +14421,7 @@ intel_prepare_plane_fb(struct drm_plane *plane, fb_obj_bump_render_priority(obj); intel_frontbuffer_flush(obj->frontbuffer, ORIGIN_DIRTYFB); - if (!new_state->fence) { /* implicit fencing */ + if (!new_plane_state->base.fence) { /* implicit fencing */ struct dma_fence *fence; ret = i915_sw_fence_await_reservation(&intel_state->commit_ready, @@ -14431,11 +14433,13 @@ intel_prepare_plane_fb(struct drm_plane *plane, fence = dma_resv_get_excl_rcu(obj->base.resv); if (fence) { - add_rps_boost_after_vblank(new_state->crtc, fence); + add_rps_boost_after_vblank(new_plane_state->base.crtc, + fence); dma_fence_put(fence); } } else { - add_rps_boost_after_vblank(new_state->crtc, new_state->fence); + add_rps_boost_after_vblank(new_plane_state->base.crtc, + new_plane_state->base.fence); } /* @@ -14463,10 +14467,12 @@ intel_prepare_plane_fb(struct drm_plane *plane, */ void intel_cleanup_plane_fb(struct drm_plane *plane, - struct drm_plane_state *old_state) + struct drm_plane_state *_old_plane_state) { + struct intel_plane_state *old_plane_state = + to_intel_plane_state(_old_plane_state); struct intel_atomic_state *intel_state = - to_intel_atomic_state(old_state->state); + to_intel_atomic_state(old_plane_state->base.state); struct drm_i915_private *dev_priv = to_i915(plane->dev); if (intel_state->rps_interactive) { @@ -14475,7 +14481,7 @@ intel_cleanup_plane_fb(struct drm_plane *plane, } /* Should only be called after a successful intel_prepare_plane_fb()! */ - intel_plane_unpin_fb(to_intel_plane_state(old_state)); + intel_plane_unpin_fb(old_plane_state); } int From d8bd3e157a17b03d2b0f380e143bf8204f75df94 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Fri, 4 Oct 2019 13:34:56 +0200 Subject: [PATCH 042/159] drm/i915: Remove begin/finish_crtc_commit, v4. This can all be done from the intel_update_crtc function. Split out the pipe update into a separate function, just like is done for the planes. Pull in all the changes done during fastset as well. It makes no sense for it to still exist as a separate function. Changes since v1: - Inline intel_update_pipe_config() Changes since v2: - Add comments suggested by matt. - Reorder commit_pipe_config() to remove all nesting. (Ville, Matt) - Use intel_set_pipe_src_size((). (Matt) Changes since v3: - Move atomic_update_watermarks closer to the plane calls. Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20191004113514.17064-7-maarten.lankhorst@linux.intel.com Reviewed-by: Matt Roper [mlankhorst: Replace 8 spaces with tabs in comment] --- drivers/gpu/drm/i915/display/intel_display.c | 210 +++++++++---------- 1 file changed, 99 insertions(+), 111 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 23dc7ae1243a..9db390188e9d 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -135,8 +135,6 @@ static void vlv_prepare_pll(struct intel_crtc *crtc, const struct intel_crtc_state *pipe_config); static void chv_prepare_pll(struct intel_crtc *crtc, const struct intel_crtc_state *pipe_config); -static void intel_begin_crtc_commit(struct intel_atomic_state *, struct intel_crtc *); -static void intel_finish_crtc_commit(struct intel_atomic_state *, struct intel_crtc *); static void intel_crtc_init_scalers(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state); static void skylake_pfit_enable(const struct intel_crtc_state *crtc_state); @@ -4401,45 +4399,6 @@ static void icl_set_pipe_chicken(struct intel_crtc *crtc) I915_WRITE(PIPE_CHICKEN(pipe), tmp); } -static void intel_update_pipe_config(const struct intel_crtc_state *old_crtc_state, - const struct intel_crtc_state *new_crtc_state) -{ - struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc); - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - - /* drm_atomic_helper_update_legacy_modeset_state might not be called. */ - crtc->base.mode = new_crtc_state->base.mode; - - /* - * Update pipe size and adjust fitter if needed: the reason for this is - * that in compute_mode_changes we check the native mode (not the pfit - * mode) to see if we can flip rather than do a full mode set. In the - * fastboot case, we'll flip, but if we don't update the pipesrc and - * pfit state, we'll end up with a big fb scanned out into the wrong - * sized surface. - */ - - I915_WRITE(PIPESRC(crtc->pipe), - ((new_crtc_state->pipe_src_w - 1) << 16) | - (new_crtc_state->pipe_src_h - 1)); - - /* on skylake this is done by detaching scalers */ - if (INTEL_GEN(dev_priv) >= 9) { - skl_detach_scalers(new_crtc_state); - - if (new_crtc_state->pch_pfit.enabled) - skylake_pfit_enable(new_crtc_state); - } else if (HAS_PCH_SPLIT(dev_priv)) { - if (new_crtc_state->pch_pfit.enabled) - ironlake_pfit_enable(new_crtc_state); - else if (old_crtc_state->pch_pfit.enabled) - ironlake_pfit_disable(old_crtc_state); - } - - if (INTEL_GEN(dev_priv) >= 11) - icl_set_pipe_chicken(crtc); -} - static void intel_fdi_normal_train(struct intel_crtc *crtc) { struct drm_device *dev = crtc->base.dev; @@ -13695,13 +13654,95 @@ u32 intel_crtc_get_vblank_counter(struct intel_crtc *crtc) return crtc->base.funcs->get_vblank_counter(&crtc->base); } +void intel_crtc_arm_fifo_underrun(struct intel_crtc *crtc, + struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + + if (!IS_GEN(dev_priv, 2)) + intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, true); + + if (crtc_state->has_pch_encoder) { + enum pipe pch_transcoder = + intel_crtc_pch_transcoder(crtc); + + intel_set_pch_fifo_underrun_reporting(dev_priv, pch_transcoder, true); + } +} + +static void intel_pipe_fastset(const struct intel_crtc_state *old_crtc_state, + const struct intel_crtc_state *new_crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + + /* drm_atomic_helper_update_legacy_modeset_state might not be called. */ + crtc->base.mode = new_crtc_state->base.mode; + + /* + * Update pipe size and adjust fitter if needed: the reason for this is + * that in compute_mode_changes we check the native mode (not the pfit + * mode) to see if we can flip rather than do a full mode set. In the + * fastboot case, we'll flip, but if we don't update the pipesrc and + * pfit state, we'll end up with a big fb scanned out into the wrong + * sized surface. + */ + intel_set_pipe_src_size(new_crtc_state); + + /* on skylake this is done by detaching scalers */ + if (INTEL_GEN(dev_priv) >= 9) { + skl_detach_scalers(new_crtc_state); + + if (new_crtc_state->pch_pfit.enabled) + skylake_pfit_enable(new_crtc_state); + } else if (HAS_PCH_SPLIT(dev_priv)) { + if (new_crtc_state->pch_pfit.enabled) + ironlake_pfit_enable(new_crtc_state); + else if (old_crtc_state->pch_pfit.enabled) + ironlake_pfit_disable(old_crtc_state); + } + + if (INTEL_GEN(dev_priv) >= 11) + icl_set_pipe_chicken(crtc); +} + +static void commit_pipe_config(struct intel_atomic_state *state, + struct intel_crtc_state *old_crtc_state, + struct intel_crtc_state *new_crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + bool modeset = needs_modeset(new_crtc_state); + + /* + * During modesets pipe configuration was programmed as the + * CRTC was enabled. + */ + if (!modeset) { + if (new_crtc_state->base.color_mgmt_changed || + new_crtc_state->update_pipe) + intel_color_commit(new_crtc_state); + + if (INTEL_GEN(dev_priv) >= 9) + skl_detach_scalers(new_crtc_state); + + if (INTEL_GEN(dev_priv) >= 9 || IS_BROADWELL(dev_priv)) + bdw_set_pipemisc(new_crtc_state); + + if (new_crtc_state->update_pipe) + intel_pipe_fastset(old_crtc_state, new_crtc_state); + } + + if (dev_priv->display.atomic_update_watermarks) + dev_priv->display.atomic_update_watermarks(state, + new_crtc_state); +} + static void intel_update_crtc(struct intel_crtc *crtc, struct intel_atomic_state *state, struct intel_crtc_state *old_crtc_state, struct intel_crtc_state *new_crtc_state) { - struct drm_device *dev = state->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(state->base.dev); bool modeset = needs_modeset(new_crtc_state); struct intel_plane_state *new_plane_state = intel_atomic_get_new_plane_state(state, @@ -13725,14 +13766,27 @@ static void intel_update_crtc(struct intel_crtc *crtc, else if (new_plane_state) intel_fbc_enable(crtc, new_crtc_state, new_plane_state); - intel_begin_crtc_commit(state, crtc); + /* Perform vblank evasion around commit operation */ + intel_pipe_update_start(new_crtc_state); + + commit_pipe_config(state, old_crtc_state, new_crtc_state); if (INTEL_GEN(dev_priv) >= 9) skl_update_planes_on_crtc(state, crtc); else i9xx_update_planes_on_crtc(state, crtc); - intel_finish_crtc_commit(state, crtc); + intel_pipe_update_end(new_crtc_state); + + /* + * We usually enable FIFO underrun interrupts as part of the + * CRTC enable sequence during modesets. But when we inherit a + * valid pipe configuration from the BIOS we need to take care + * of enabling them on the CRTC's first fastset. + */ + if (new_crtc_state->update_pipe && !modeset && + old_crtc_state->base.mode.private_flags & I915_MODE_FLAG_INHERITED) + intel_crtc_arm_fifo_underrun(crtc, new_crtc_state); } static void intel_old_crtc_state_disables(struct intel_atomic_state *state, @@ -14522,72 +14576,6 @@ skl_max_scale(const struct intel_crtc_state *crtc_state, return max_scale; } -static void intel_begin_crtc_commit(struct intel_atomic_state *state, - struct intel_crtc *crtc) -{ - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - struct intel_crtc_state *old_crtc_state = - intel_atomic_get_old_crtc_state(state, crtc); - struct intel_crtc_state *new_crtc_state = - intel_atomic_get_new_crtc_state(state, crtc); - bool modeset = needs_modeset(new_crtc_state); - - /* Perform vblank evasion around commit operation */ - intel_pipe_update_start(new_crtc_state); - - if (modeset) - goto out; - - if (new_crtc_state->base.color_mgmt_changed || - new_crtc_state->update_pipe) - intel_color_commit(new_crtc_state); - - if (new_crtc_state->update_pipe) - intel_update_pipe_config(old_crtc_state, new_crtc_state); - else if (INTEL_GEN(dev_priv) >= 9) - skl_detach_scalers(new_crtc_state); - - if (INTEL_GEN(dev_priv) >= 9 || IS_BROADWELL(dev_priv)) - bdw_set_pipemisc(new_crtc_state); - -out: - if (dev_priv->display.atomic_update_watermarks) - dev_priv->display.atomic_update_watermarks(state, - new_crtc_state); -} - -void intel_crtc_arm_fifo_underrun(struct intel_crtc *crtc, - struct intel_crtc_state *crtc_state) -{ - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - - if (!IS_GEN(dev_priv, 2)) - intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, true); - - if (crtc_state->has_pch_encoder) { - enum pipe pch_transcoder = - intel_crtc_pch_transcoder(crtc); - - intel_set_pch_fifo_underrun_reporting(dev_priv, pch_transcoder, true); - } -} - -static void intel_finish_crtc_commit(struct intel_atomic_state *state, - struct intel_crtc *crtc) -{ - struct intel_crtc_state *old_crtc_state = - intel_atomic_get_old_crtc_state(state, crtc); - struct intel_crtc_state *new_crtc_state = - intel_atomic_get_new_crtc_state(state, crtc); - - intel_pipe_update_end(new_crtc_state); - - if (new_crtc_state->update_pipe && - !needs_modeset(new_crtc_state) && - old_crtc_state->base.mode.private_flags & I915_MODE_FLAG_INHERITED) - intel_crtc_arm_fifo_underrun(crtc, new_crtc_state); -} - /** * intel_plane_destroy - destroy a plane * @plane: plane to destroy From 3a612765f4233993bf5cff302844bd3b6b5a7429 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Fri, 4 Oct 2019 13:34:54 +0200 Subject: [PATCH 043/159] drm/i915: Remove cursor use of properties for coordinates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have a src and dect rectangle, use it instead of relying on the core drm properties. Because the core by default clips the src/dst properties, after the drm_atomic_helper_check_plane_state() we manually set the unclipped src/dst rectangles. We still need the call for visibility checks, but this way we are able to use the src/dst rects in the check/commit code. This removes the special case in the watermark code for cursor w/h. Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20191004113514.17064-5-maarten.lankhorst@linux.intel.com Reviewed-by: Ville Syrjälä [mlankhorst: Clarify commit message to state we use unclipped src/dst --- drivers/gpu/drm/i915/display/intel_display.c | 57 +++++++++++-------- drivers/gpu/drm/i915/intel_pm.c | 58 +++++++------------- 2 files changed, 53 insertions(+), 62 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 9db390188e9d..a146ec02a0c1 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -10538,16 +10538,16 @@ static u32 intel_cursor_base(const struct intel_plane_state *plane_state) /* ILK+ do this automagically */ if (HAS_GMCH(dev_priv) && plane_state->base.rotation & DRM_MODE_ROTATE_180) - base += (plane_state->base.crtc_h * - plane_state->base.crtc_w - 1) * fb->format->cpp[0]; + base += (drm_rect_height(&plane_state->base.dst) * + drm_rect_width(&plane_state->base.dst) - 1) * fb->format->cpp[0]; return base; } static u32 intel_cursor_position(const struct intel_plane_state *plane_state) { - int x = plane_state->base.crtc_x; - int y = plane_state->base.crtc_y; + int x = plane_state->base.dst.x1; + int y = plane_state->base.dst.y1; u32 pos = 0; if (x < 0) { @@ -10569,8 +10569,8 @@ static bool intel_cursor_size_ok(const struct intel_plane_state *plane_state) { const struct drm_mode_config *config = &plane_state->base.plane->dev->mode_config; - int width = plane_state->base.crtc_w; - int height = plane_state->base.crtc_h; + int width = drm_rect_width(&plane_state->base.dst); + int height = drm_rect_height(&plane_state->base.dst); return width > 0 && width <= config->cursor_width && height > 0 && height <= config->cursor_height; @@ -10589,8 +10589,8 @@ static int intel_cursor_check_surface(struct intel_plane_state *plane_state) if (!plane_state->base.visible) return 0; - src_x = plane_state->base.src_x >> 16; - src_y = plane_state->base.src_y >> 16; + src_x = plane_state->base.src.x1 >> 16; + src_y = plane_state->base.src.y1 >> 16; intel_add_fb_offsets(&src_x, &src_y, plane_state, 0); offset = intel_plane_compute_aligned_offset(&src_x, &src_y, @@ -10625,6 +10625,10 @@ static int intel_check_cursor(struct intel_crtc_state *crtc_state, if (ret) return ret; + /* Use the unclipped src/dst rectangles, which we program to hw */ + plane_state->base.src = drm_plane_state_src(&plane_state->base); + plane_state->base.dst = drm_plane_state_dest(&plane_state->base); + ret = intel_cursor_check_surface(plane_state); if (ret) return ret; @@ -10667,7 +10671,7 @@ static u32 i845_cursor_ctl(const struct intel_crtc_state *crtc_state, static bool i845_cursor_size_ok(const struct intel_plane_state *plane_state) { - int width = plane_state->base.crtc_w; + int width = drm_rect_width(&plane_state->base.dst); /* * 845g/865g are only limited by the width of their cursors, @@ -10693,8 +10697,8 @@ static int i845_check_cursor(struct intel_crtc_state *crtc_state, /* Check for which cursor types we support */ if (!i845_cursor_size_ok(plane_state)) { DRM_DEBUG("Cursor dimension %dx%d not supported\n", - plane_state->base.crtc_w, - plane_state->base.crtc_h); + drm_rect_width(&plane_state->base.dst), + drm_rect_height(&plane_state->base.dst)); return -EINVAL; } @@ -10727,8 +10731,8 @@ static void i845_update_cursor(struct intel_plane *plane, unsigned long irqflags; if (plane_state && plane_state->base.visible) { - unsigned int width = plane_state->base.crtc_w; - unsigned int height = plane_state->base.crtc_h; + unsigned int width = drm_rect_width(&plane_state->base.src); + unsigned int height = drm_rect_height(&plane_state->base.dst); cntl = plane_state->ctl | i845_cursor_ctl_crtc(crtc_state); @@ -10830,7 +10834,7 @@ static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state, if (IS_GEN(dev_priv, 6) || IS_IVYBRIDGE(dev_priv)) cntl |= MCURSOR_TRICKLE_FEED_DISABLE; - switch (plane_state->base.crtc_w) { + switch (drm_rect_width(&plane_state->base.dst)) { case 64: cntl |= MCURSOR_MODE_64_ARGB_AX; break; @@ -10841,7 +10845,7 @@ static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state, cntl |= MCURSOR_MODE_256_ARGB_AX; break; default: - MISSING_CASE(plane_state->base.crtc_w); + MISSING_CASE(drm_rect_width(&plane_state->base.dst)); return 0; } @@ -10855,8 +10859,8 @@ static bool i9xx_cursor_size_ok(const struct intel_plane_state *plane_state) { struct drm_i915_private *dev_priv = to_i915(plane_state->base.plane->dev); - int width = plane_state->base.crtc_w; - int height = plane_state->base.crtc_h; + int width = drm_rect_width(&plane_state->base.dst); + int height = drm_rect_height(&plane_state->base.dst); if (!intel_cursor_size_ok(plane_state)) return false; @@ -10909,17 +10913,19 @@ static int i9xx_check_cursor(struct intel_crtc_state *crtc_state, /* Check for which cursor types we support */ if (!i9xx_cursor_size_ok(plane_state)) { DRM_DEBUG("Cursor dimension %dx%d not supported\n", - plane_state->base.crtc_w, - plane_state->base.crtc_h); + drm_rect_width(&plane_state->base.dst), + drm_rect_height(&plane_state->base.dst)); return -EINVAL; } WARN_ON(plane_state->base.visible && plane_state->color_plane[0].stride != fb->pitches[0]); - if (fb->pitches[0] != plane_state->base.crtc_w * fb->format->cpp[0]) { + if (fb->pitches[0] != + drm_rect_width(&plane_state->base.dst) * fb->format->cpp[0]) { DRM_DEBUG_KMS("Invalid cursor stride (%u) (cursor width %d)\n", - fb->pitches[0], plane_state->base.crtc_w); + fb->pitches[0], + drm_rect_width(&plane_state->base.dst)); return -EINVAL; } @@ -10934,7 +10940,7 @@ static int i9xx_check_cursor(struct intel_crtc_state *crtc_state, * Refuse the put the cursor into that compromised position. */ if (IS_CHERRYVIEW(dev_priv) && pipe == PIPE_C && - plane_state->base.visible && plane_state->base.crtc_x < 0) { + plane_state->base.visible && plane_state->base.dst.x1 < 0) { DRM_DEBUG_KMS("CHV cursor C not allowed to straddle the left screen edge\n"); return -EINVAL; } @@ -10954,11 +10960,14 @@ static void i9xx_update_cursor(struct intel_plane *plane, unsigned long irqflags; if (plane_state && plane_state->base.visible) { + unsigned width = drm_rect_width(&plane_state->base.dst); + unsigned height = drm_rect_height(&plane_state->base.dst); + cntl = plane_state->ctl | i9xx_cursor_ctl_crtc(crtc_state); - if (plane_state->base.crtc_h != plane_state->base.crtc_w) - fbc_ctl = CUR_FBC_CTL_EN | (plane_state->base.crtc_h - 1); + if (width != height) + fbc_ctl = CUR_FBC_CTL_EN | (height - 1); base = intel_cursor_base(plane_state); pos = intel_cursor_position(plane_state); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 6aeaad587a20..53358e33df1b 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1117,10 +1117,7 @@ static u16 g4x_compute_wm(const struct intel_crtc_state *crtc_state, clock = adjusted_mode->crtc_clock; htotal = adjusted_mode->crtc_htotal; - if (plane->id == PLANE_CURSOR) - width = plane_state->base.crtc_w; - else - width = drm_rect_width(&plane_state->base.dst); + width = drm_rect_width(&plane_state->base.dst); if (plane->id == PLANE_CURSOR) { wm = intel_wm_method2(clock, htotal, width, cpp, latency); @@ -2549,7 +2546,8 @@ static u32 ilk_compute_cur_wm(const struct intel_crtc_state *crtc_state, return ilk_wm_method2(crtc_state->pixel_rate, crtc_state->base.adjusted_mode.crtc_htotal, - plane_state->base.crtc_w, cpp, mem_value); + drm_rect_width(&plane_state->base.dst), + cpp, mem_value); } /* Only for WM_LP. */ @@ -4046,7 +4044,6 @@ static uint_fixed_16_16_t skl_plane_downscale_amount(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { - struct intel_plane *plane = to_intel_plane(plane_state->base.plane); u32 src_w, src_h, dst_w, dst_h; uint_fixed_16_16_t fp_w_ratio, fp_h_ratio; uint_fixed_16_16_t downscale_h, downscale_w; @@ -4054,27 +4051,17 @@ skl_plane_downscale_amount(const struct intel_crtc_state *crtc_state, if (WARN_ON(!intel_wm_plane_visible(crtc_state, plane_state))) return u32_to_fixed16(0); - /* n.b., src is 16.16 fixed point, dst is whole integer */ - if (plane->id == PLANE_CURSOR) { - /* - * Cursors only support 0/180 degree rotation, - * hence no need to account for rotation here. - */ - src_w = plane_state->base.src_w >> 16; - src_h = plane_state->base.src_h >> 16; - dst_w = plane_state->base.crtc_w; - dst_h = plane_state->base.crtc_h; - } else { - /* - * Src coordinates are already rotated by 270 degrees for - * the 90/270 degree plane rotation cases (to match the - * GTT mapping), hence no need to account for rotation here. - */ - src_w = drm_rect_width(&plane_state->base.src) >> 16; - src_h = drm_rect_height(&plane_state->base.src) >> 16; - dst_w = drm_rect_width(&plane_state->base.dst); - dst_h = drm_rect_height(&plane_state->base.dst); - } + /* + * Src coordinates are already rotated by 270 degrees for + * the 90/270 degree plane rotation cases (to match the + * GTT mapping), hence no need to account for rotation here. + * + * n.b., src is 16.16 fixed point, dst is whole integer. + */ + src_w = drm_rect_width(&plane_state->base.src) >> 16; + src_h = drm_rect_height(&plane_state->base.src) >> 16; + dst_w = drm_rect_width(&plane_state->base.dst); + dst_h = drm_rect_height(&plane_state->base.dst); fp_w_ratio = div_fixed16(src_w, dst_w); fp_h_ratio = div_fixed16(src_h, dst_h); @@ -4698,20 +4685,15 @@ skl_compute_plane_wm_params(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state, struct skl_wm_params *wp, int color_plane) { - struct intel_plane *plane = to_intel_plane(plane_state->base.plane); const struct drm_framebuffer *fb = plane_state->base.fb; int width; - if (plane->id == PLANE_CURSOR) { - width = plane_state->base.crtc_w; - } else { - /* - * Src coordinates are already rotated by 270 degrees for - * the 90/270 degree plane rotation cases (to match the - * GTT mapping), hence no need to account for rotation here. - */ - width = drm_rect_width(&plane_state->base.src) >> 16; - } + /* + * Src coordinates are already rotated by 270 degrees for + * the 90/270 degree plane rotation cases (to match the + * GTT mapping), hence no need to account for rotation here. + */ + width = drm_rect_width(&plane_state->base.src) >> 16; return skl_compute_wm_params(crtc_state, width, fb->format, fb->modifier, From 9a61363a6310dda769b8c7601fc9de8ed81e6bc8 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Thu, 10 Oct 2019 16:05:19 +0100 Subject: [PATCH 044/159] drm/i915/perf: store the associated engine of a stream We'll use this information later to verify that a client trying to reconfigure the stream does so on the right engine. For now, we want to pull the knowledge of which engine we use into a central property. Signed-off-by: Lionel Landwerlin Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191010150520.26488-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_perf.c | 30 ++++++++++++++++++++++---- drivers/gpu/drm/i915/i915_perf_types.h | 5 +++++ 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 5a34cad7d824..1a5c6591b9bb 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -197,6 +197,7 @@ #include "gem/i915_gem_context.h" #include "gem/i915_gem_pm.h" +#include "gt/intel_engine_user.h" #include "gt/intel_lrc_reg.h" #include "i915_drv.h" @@ -347,6 +348,7 @@ static const struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = { * @oa_format: An OA unit HW report format * @oa_periodic: Whether to enable periodic OA unit sampling * @oa_period_exponent: The OA unit sampling period is derived from this + * @engine: The engine (typically rcs0) being monitored by the OA unit * * As read_properties_unlocked() enumerates and validates the properties given * to open a stream of metrics the configuration is built up in the structure @@ -363,6 +365,8 @@ struct perf_open_properties { int oa_format; bool oa_periodic; int oa_period_exponent; + + struct intel_engine_cs *engine; }; static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer); @@ -1205,7 +1209,7 @@ static struct intel_context *oa_pin_context(struct i915_perf_stream *stream) int err; for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { - if (ce->engine->class != RENDER_CLASS) + if (ce->engine != stream->engine) /* first match! */ continue; /* @@ -2127,7 +2131,13 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, int format_size; int ret; - /* If the sysfs metrics/ directory wasn't registered for some + if (!props->engine) { + DRM_DEBUG("OA engine not specified\n"); + return -EINVAL; + } + + /* + * If the sysfs metrics/ directory wasn't registered for some * reason then don't let userspace try their luck with config * IDs */ @@ -2146,7 +2156,8 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, return -ENODEV; } - /* To avoid the complexity of having to accurately filter + /* + * To avoid the complexity of having to accurately filter * counter reports and marshal to the appropriate client * we currently only allow exclusive access */ @@ -2160,6 +2171,9 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, return -EINVAL; } + stream->engine = props->engine; + stream->gt = stream->engine->gt; + stream->sample_size = sizeof(struct drm_i915_perf_record_header); format_size = perf->oa_formats[props->oa_format].size; @@ -2711,7 +2725,6 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf, } stream->perf = perf; - stream->gt = &perf->i915->gt; stream->ctx = specific_ctx; ret = i915_oa_stream_init(stream, param, props); @@ -2796,6 +2809,15 @@ static int read_properties_unlocked(struct i915_perf *perf, return -EINVAL; } + /* At the moment we only support using i915-perf on the RCS. */ + props->engine = intel_engine_lookup_user(perf->i915, + I915_ENGINE_CLASS_RENDER, + 0); + if (!props->engine) { + DRM_DEBUG("No RENDER-capable engines\n"); + return -EINVAL; + } + /* Considering that ID = 0 is reserved and assuming that we don't * (currently) expect any configurations to ever specify duplicate * values for a particular property ID then the last _PROP_MAX value is diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h index 2d17059d32ee..82cd3b295037 100644 --- a/drivers/gpu/drm/i915/i915_perf_types.h +++ b/drivers/gpu/drm/i915/i915_perf_types.h @@ -140,6 +140,11 @@ struct i915_perf_stream { */ intel_wakeref_t wakeref; + /** + * @engine: Engine associated with this performance stream. + */ + struct intel_engine_cs *engine; + /** * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*` * properties given when opening a stream, representing the contents From 52111c4628a29973d8be266f45b464318912b199 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 10 Oct 2019 16:05:20 +0100 Subject: [PATCH 045/159] drm/i915/perf: Store shortcut to intel_uncore Now that we have the engine stored in i915_perf, we have a means of accessing intel_gt should we require it. However, we are currently only using the intel_gt to find the right intel_uncore, so replace our i915_perf.gt pointer with the more useful i915_perf.uncore. Signed-off-by: Chris Wilson Cc: Lionel Landwerlin Reviewed-by: Lionel Landwerlin Link: https://patchwork.freedesktop.org/patch/msgid/20191010150520.26488-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_perf.c | 48 +++++++++++++------------- drivers/gpu/drm/i915/i915_perf_types.h | 4 +-- 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 1a5c6591b9bb..77c3cef64548 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -419,14 +419,14 @@ static int get_oa_config(struct i915_perf *perf, static u32 gen8_oa_hw_tail_read(struct i915_perf_stream *stream) { - struct intel_uncore *uncore = stream->gt->uncore; + struct intel_uncore *uncore = stream->uncore; return intel_uncore_read(uncore, GEN8_OATAILPTR) & GEN8_OATAILPTR_MASK; } static u32 gen7_oa_hw_tail_read(struct i915_perf_stream *stream) { - struct intel_uncore *uncore = stream->gt->uncore; + struct intel_uncore *uncore = stream->uncore; u32 oastatus1 = intel_uncore_read(uncore, GEN7_OASTATUS1); return oastatus1 & GEN7_OASTATUS1_TAIL_MASK; @@ -656,7 +656,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, size_t count, size_t *offset) { - struct intel_uncore *uncore = stream->gt->uncore; + struct intel_uncore *uncore = stream->uncore; int report_size = stream->oa_buffer.format_size; u8 *oa_buf_base = stream->oa_buffer.vaddr; u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); @@ -866,7 +866,7 @@ static int gen8_oa_read(struct i915_perf_stream *stream, size_t count, size_t *offset) { - struct intel_uncore *uncore = stream->gt->uncore; + struct intel_uncore *uncore = stream->uncore; u32 oastatus; int ret; @@ -945,7 +945,7 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, size_t count, size_t *offset) { - struct intel_uncore *uncore = stream->gt->uncore; + struct intel_uncore *uncore = stream->uncore; int report_size = stream->oa_buffer.format_size; u8 *oa_buf_base = stream->oa_buffer.vaddr; u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); @@ -1077,7 +1077,7 @@ static int gen7_oa_read(struct i915_perf_stream *stream, size_t count, size_t *offset) { - struct intel_uncore *uncore = stream->gt->uncore; + struct intel_uncore *uncore = stream->uncore; u32 oastatus1; int ret; @@ -1352,8 +1352,8 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream) free_oa_buffer(stream); - intel_uncore_forcewake_put(stream->gt->uncore, FORCEWAKE_ALL); - intel_runtime_pm_put(stream->gt->uncore->rpm, stream->wakeref); + intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL); + intel_runtime_pm_put(stream->uncore->rpm, stream->wakeref); if (stream->ctx) oa_put_render_ctx_id(stream); @@ -1368,7 +1368,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream) static void gen7_init_oa_buffer(struct i915_perf_stream *stream) { - struct intel_uncore *uncore = stream->gt->uncore; + struct intel_uncore *uncore = stream->uncore; u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); unsigned long flags; @@ -1416,7 +1416,7 @@ static void gen7_init_oa_buffer(struct i915_perf_stream *stream) static void gen8_init_oa_buffer(struct i915_perf_stream *stream) { - struct intel_uncore *uncore = stream->gt->uncore; + struct intel_uncore *uncore = stream->uncore; u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); unsigned long flags; @@ -1565,7 +1565,7 @@ static void delay_after_mux(void) static int hsw_enable_metric_set(struct i915_perf_stream *stream) { - struct intel_uncore *uncore = stream->gt->uncore; + struct intel_uncore *uncore = stream->uncore; const struct i915_oa_config *oa_config = stream->oa_config; /* @@ -1594,7 +1594,7 @@ static int hsw_enable_metric_set(struct i915_perf_stream *stream) static void hsw_disable_metric_set(struct i915_perf_stream *stream) { - struct intel_uncore *uncore = stream->gt->uncore; + struct intel_uncore *uncore = stream->uncore; intel_uncore_rmw(uncore, GEN6_UCGCTL1, GEN6_CSUNIT_CLOCK_GATE_DISABLE, 0); @@ -1911,7 +1911,7 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream, static int gen8_enable_metric_set(struct i915_perf_stream *stream) { - struct intel_uncore *uncore = stream->gt->uncore; + struct intel_uncore *uncore = stream->uncore; const struct i915_oa_config *oa_config = stream->oa_config; int ret; @@ -1964,7 +1964,7 @@ static int gen8_enable_metric_set(struct i915_perf_stream *stream) static void gen8_disable_metric_set(struct i915_perf_stream *stream) { - struct intel_uncore *uncore = stream->gt->uncore; + struct intel_uncore *uncore = stream->uncore; /* Reset all contexts' slices/subslices configurations. */ gen8_configure_all_contexts(stream, NULL); @@ -1974,7 +1974,7 @@ static void gen8_disable_metric_set(struct i915_perf_stream *stream) static void gen10_disable_metric_set(struct i915_perf_stream *stream) { - struct intel_uncore *uncore = stream->gt->uncore; + struct intel_uncore *uncore = stream->uncore; /* Reset all contexts' slices/subslices configurations. */ gen8_configure_all_contexts(stream, NULL); @@ -1985,7 +1985,7 @@ static void gen10_disable_metric_set(struct i915_perf_stream *stream) static void gen7_oa_enable(struct i915_perf_stream *stream) { - struct intel_uncore *uncore = stream->gt->uncore; + struct intel_uncore *uncore = stream->uncore; struct i915_gem_context *ctx = stream->ctx; u32 ctx_id = stream->specific_ctx_id; bool periodic = stream->periodic; @@ -2015,7 +2015,7 @@ static void gen7_oa_enable(struct i915_perf_stream *stream) static void gen8_oa_enable(struct i915_perf_stream *stream) { - struct intel_uncore *uncore = stream->gt->uncore; + struct intel_uncore *uncore = stream->uncore; u32 report_format = stream->oa_buffer.format; /* @@ -2060,7 +2060,7 @@ static void i915_oa_stream_enable(struct i915_perf_stream *stream) static void gen7_oa_disable(struct i915_perf_stream *stream) { - struct intel_uncore *uncore = stream->gt->uncore; + struct intel_uncore *uncore = stream->uncore; intel_uncore_write(uncore, GEN7_OACONTROL, 0); if (intel_wait_for_register(uncore, @@ -2071,7 +2071,7 @@ static void gen7_oa_disable(struct i915_perf_stream *stream) static void gen8_oa_disable(struct i915_perf_stream *stream) { - struct intel_uncore *uncore = stream->gt->uncore; + struct intel_uncore *uncore = stream->uncore; intel_uncore_write(uncore, GEN8_OACONTROL, 0); if (intel_wait_for_register(uncore, @@ -2172,7 +2172,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, } stream->engine = props->engine; - stream->gt = stream->engine->gt; + stream->uncore = stream->engine->gt->uncore; stream->sample_size = sizeof(struct drm_i915_perf_record_header); @@ -2218,8 +2218,8 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, * In our case we are expecting that taking pm + FORCEWAKE * references will effectively disable RC6. */ - stream->wakeref = intel_runtime_pm_get(stream->gt->uncore->rpm); - intel_uncore_forcewake_get(stream->gt->uncore, FORCEWAKE_ALL); + stream->wakeref = intel_runtime_pm_get(stream->uncore->rpm); + intel_uncore_forcewake_get(stream->uncore, FORCEWAKE_ALL); ret = alloc_oa_buffer(stream); if (ret) @@ -2251,8 +2251,8 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, err_oa_buf_alloc: put_oa_config(stream->oa_config); - intel_uncore_forcewake_put(stream->gt->uncore, FORCEWAKE_ALL); - intel_runtime_pm_put(stream->gt->uncore->rpm, stream->wakeref); + intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL); + intel_runtime_pm_put(stream->uncore->rpm, stream->wakeref); err_config: if (stream->ctx) diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h index 82cd3b295037..a91ae2d1a543 100644 --- a/drivers/gpu/drm/i915/i915_perf_types.h +++ b/drivers/gpu/drm/i915/i915_perf_types.h @@ -130,9 +130,9 @@ struct i915_perf_stream { struct i915_perf *perf; /** - * @gt: intel_gt container + * @uncore: mmio access path */ - struct intel_gt *gt; + struct intel_uncore *uncore; /** * @wakeref: As we keep the device awake while the perf stream is From b068a86071ca743de28a607a0b22b41b1a3f5fb3 Mon Sep 17 00:00:00 2001 From: James Ausmus Date: Wed, 9 Oct 2019 10:23:14 -0700 Subject: [PATCH 046/159] drm/i915: Move SAGV block time to dev_priv MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In prep for newer platforms having more complicated ways to determine the SAGV block time, move the variable to dev_priv, and extract the setting to an initial setup function. While we're at it, update the if ladder to follow the new gen -> old gen order preference, and warn on any non-specified gen. v2: Shorten the function name (Ville), return directly (Ville), move sagv_block_time_us value to dev_priv (Ville) v3: Change sagv_block_time_us to u32 (Lucas), Change fallback value to -1 (Lucas), use intel_has_sagv for setup check rather than hand-rolling (Lucas) Cc: Ville Syrjälä Cc: Stanislav Lisovskiy Cc: Lucas De Marchi Signed-off-by: James Ausmus Reviewed-by: Ville Syrjälä Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20191004221449.1317-1-james.ausmus@intel.com Link: https://patchwork.freedesktop.org/patch/msgid/20191009172315.11004-1-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/intel_pm.c | 33 ++++++++++++++++++++++++--------- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d284b04c492b..c46b339064c0 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1267,6 +1267,8 @@ struct drm_i915_private { I915_SAGV_NOT_CONTROLLED } sagv_status; + u32 sagv_block_time_us; + struct { /* * Raw watermark latency values: diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 53358e33df1b..f00e64f9320c 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3638,6 +3638,26 @@ intel_has_sagv(struct drm_i915_private *dev_priv) dev_priv->sagv_status != I915_SAGV_NOT_CONTROLLED; } +static void +skl_setup_sagv_block_time(struct drm_i915_private *dev_priv) +{ + if (IS_GEN(dev_priv, 11)) { + dev_priv->sagv_block_time_us = 10; + return; + } else if (IS_GEN(dev_priv, 10)) { + dev_priv->sagv_block_time_us = 20; + return; + } else if (IS_GEN(dev_priv, 9)) { + dev_priv->sagv_block_time_us = 30; + return; + } else { + MISSING_CASE(INTEL_GEN(dev_priv)); + } + + /* Default to an unusable block time */ + dev_priv->sagv_block_time_us = -1; +} + /* * SAGV dynamically adjusts the system agent voltage and clock frequencies * depending on power and performance requirements. The display engine access @@ -3726,18 +3746,10 @@ bool intel_can_enable_sagv(struct intel_atomic_state *state) struct intel_crtc_state *crtc_state; enum pipe pipe; int level, latency; - int sagv_block_time_us; if (!intel_has_sagv(dev_priv)) return false; - if (IS_GEN(dev_priv, 9)) - sagv_block_time_us = 30; - else if (IS_GEN(dev_priv, 10)) - sagv_block_time_us = 20; - else - sagv_block_time_us = 10; - /* * If there are no active CRTCs, no additional checks need be performed */ @@ -3784,7 +3796,7 @@ bool intel_can_enable_sagv(struct intel_atomic_state *state) * incur memory latencies higher than sagv_block_time_us we * can't enable SAGV. */ - if (latency < sagv_block_time_us) + if (latency < dev_priv->sagv_block_time_us) return false; } @@ -8985,6 +8997,9 @@ void intel_init_pm(struct drm_i915_private *dev_priv) else if (IS_GEN(dev_priv, 5)) i915_ironlake_get_mem_freq(dev_priv); + if (intel_has_sagv(dev_priv)) + skl_setup_sagv_block_time(dev_priv); + /* For FIFO watermark updates */ if (INTEL_GEN(dev_priv) >= 9) { skl_setup_wm_latency(dev_priv); From da80f04792fc0dddcc44201cb4ee96cb011755b6 Mon Sep 17 00:00:00 2001 From: James Ausmus Date: Wed, 9 Oct 2019 10:23:15 -0700 Subject: [PATCH 047/159] drm/i915/tgl: Read SAGV block time from PCODE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Starting from TGL, we now need to read the SAGV block time via a PCODE mailbox, rather than having a static value. BSpec: 49326 v2: Fix up pcode val data type (Ville), tighten variable scope (Ville) Cc: Ville Syrjälä Cc: Stanislav Lisovskiy Cc: Lucas De Marchi Signed-off-by: James Ausmus Reviewed-by: Ville Syrjälä Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20191004221449.1317-2-james.ausmus@intel.com Link: https://patchwork.freedesktop.org/patch/msgid/20191009172315.11004-2-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_pm.c | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 1dc067fc57ab..0fb9030b89f1 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -8878,6 +8878,7 @@ enum { #define GEN9_SAGV_DISABLE 0x0 #define GEN9_SAGV_IS_DISABLED 0x1 #define GEN9_SAGV_ENABLE 0x3 +#define GEN12_PCODE_READ_SAGV_BLOCK_TIME_US 0x23 #define GEN6_PCODE_DATA _MMIO(0x138128) #define GEN6_PCODE_FREQ_IA_RATIO_SHIFT 8 #define GEN6_PCODE_FREQ_RING_RATIO_SHIFT 16 diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index f00e64f9320c..b306e2338f5a 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3641,7 +3641,20 @@ intel_has_sagv(struct drm_i915_private *dev_priv) static void skl_setup_sagv_block_time(struct drm_i915_private *dev_priv) { - if (IS_GEN(dev_priv, 11)) { + if (INTEL_GEN(dev_priv) >= 12) { + u32 val = 0; + int ret; + + ret = sandybridge_pcode_read(dev_priv, + GEN12_PCODE_READ_SAGV_BLOCK_TIME_US, + &val, NULL); + if (!ret) { + dev_priv->sagv_block_time_us = val; + return; + } + + DRM_DEBUG_DRIVER("Couldn't read SAGV block time!\n"); + } else if (IS_GEN(dev_priv, 11)) { dev_priv->sagv_block_time_us = 10; return; } else if (IS_GEN(dev_priv, 10)) { From 9c4a14f8cceeffa01334e3229888746aa7dd95fe Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 10 Oct 2019 08:14:25 +0100 Subject: [PATCH 048/159] drm/i915: Note the addition of timeslicing to the pretend scheduler Since writing the comment that the scheduler is entirely passive, we've added minimal timeslicing which adds the most primitive of active elements (a timeout and reschedule). Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Ramalingam C Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191010071434.31195-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_scheduler_types.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h index aad81acba9dc..d18e70550054 100644 --- a/drivers/gpu/drm/i915/i915_scheduler_types.h +++ b/drivers/gpu/drm/i915/i915_scheduler_types.h @@ -49,6 +49,15 @@ struct i915_sched_attr { * DAG of each request, we are able to insert it into a sorted queue when it * is ready, and are able to reorder its portion of the graph to accommodate * dynamic priority changes. + * + * Ok, there is now one active element to the "scheduler" in the backends. + * We let a new context run for a small amount of time before re-evaluating + * the run order. As we re-evaluate, we maintain the strict ordering of + * dependencies, but attempt to rotate the active contexts (the current context + * is put to the back of its priority queue, then reshuffling its dependents). + * This provides minimal timeslicing and prevents a userspace hog (e.g. + * something waiting on a user semaphore [VkEvent]) from denying service to + * others. */ struct i915_sched_node { struct list_head signalers_list; /* those before us, we depend upon */ From c97fb526ca0666e515e94f96e922dfc50467fd01 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 10 Oct 2019 08:14:26 +0100 Subject: [PATCH 049/159] drm/i915/execlists: Leave tell-tales as to why pending[] is bad Before we BUG out with bad pending state, leave a telltale as to which test failed. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191010071434.31195-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 30 ++++++++++++++++++++++++----- drivers/gpu/drm/i915/i915_gem.h | 11 +++++++---- 2 files changed, 32 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index bd8ba98e35df..fd609ce4313a 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1089,25 +1089,45 @@ assert_pending_valid(const struct intel_engine_execlists *execlists, trace_ports(execlists, msg, execlists->pending); - if (!execlists->pending[0]) + if (!execlists->pending[0]) { + GEM_TRACE_ERR("Nothing pending for promotion!\n"); return false; + } - if (execlists->pending[execlists_num_ports(execlists)]) + if (execlists->pending[execlists_num_ports(execlists)]) { + GEM_TRACE_ERR("Excess pending[%d] for promotion!\n", + execlists_num_ports(execlists)); return false; + } for (port = execlists->pending; (rq = *port); port++) { - if (ce == rq->hw_context) + if (ce == rq->hw_context) { + GEM_TRACE_ERR("Duplicate context in pending[%zd]\n", + port - execlists->pending); return false; + } ce = rq->hw_context; if (i915_request_completed(rq)) continue; - if (i915_active_is_idle(&ce->active)) + if (i915_active_is_idle(&ce->active)) { + GEM_TRACE_ERR("Inactive context in pending[%zd]\n", + port - execlists->pending); return false; + } - if (!i915_vma_is_pinned(ce->state)) + if (!i915_vma_is_pinned(ce->state)) { + GEM_TRACE_ERR("Unpinned context in pending[%zd]\n", + port - execlists->pending); return false; + } + + if (!i915_vma_is_pinned(ce->ring->vma)) { + GEM_TRACE_ERR("Unpinned ringbuffer in pending[%zd]\n", + port - execlists->pending); + return false; + } } return ce; diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index 6795f1daa3d5..db20d2b0842b 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -37,10 +37,8 @@ struct drm_i915_private; #define GEM_SHOW_DEBUG() (drm_debug & DRM_UT_DRIVER) #define GEM_BUG_ON(condition) do { if (unlikely((condition))) { \ - pr_err("%s:%d GEM_BUG_ON(%s)\n", \ - __func__, __LINE__, __stringify(condition)); \ - GEM_TRACE("%s:%d GEM_BUG_ON(%s)\n", \ - __func__, __LINE__, __stringify(condition)); \ + GEM_TRACE_ERR("%s:%d GEM_BUG_ON(%s)\n", \ + __func__, __LINE__, __stringify(condition)); \ BUG(); \ } \ } while(0) @@ -66,11 +64,16 @@ struct drm_i915_private; #if IS_ENABLED(CONFIG_DRM_I915_TRACE_GEM) #define GEM_TRACE(...) trace_printk(__VA_ARGS__) +#define GEM_TRACE_ERR(...) do { \ + pr_err(__VA_ARGS__); \ + trace_printk(__VA_ARGS__); \ +} while (0) #define GEM_TRACE_DUMP() ftrace_dump(DUMP_ALL) #define GEM_TRACE_DUMP_ON(expr) \ do { if (expr) ftrace_dump(DUMP_ALL); } while (0) #else #define GEM_TRACE(...) do { } while (0) +#define GEM_TRACE_ERR(...) do { } while (0) #define GEM_TRACE_DUMP() do { } while (0) #define GEM_TRACE_DUMP_ON(expr) BUILD_BUG_ON_INVALID(expr) #endif From cbbf2787782ca5d17b479deb962a07f0421a7932 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 11 Oct 2019 11:33:45 +0100 Subject: [PATCH 050/159] drm/i915/execlists: Only mark incomplete requests as -EIO on cancelling Only the requests that have not completed do we want to change the status of to signal the -EIO when cancelling the inflight set of requests upon wedging. Reported-by: Tvrtko Ursulin Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191011103345.26013-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index fd609ce4313a..13fa61ed85de 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -247,8 +247,12 @@ static void __context_pin_release(struct intel_context *ce) static void mark_eio(struct i915_request *rq) { - if (!i915_request_signaled(rq)) - dma_fence_set_error(&rq->fence, -EIO); + if (i915_request_completed(rq)) + return; + + GEM_BUG_ON(i915_request_signaled(rq)); + + dma_fence_set_error(&rq->fence, -EIO); i915_request_mark_complete(rq); } From 54895010a893cdbf0aaf34fbd7719d750b557eb2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 11 Oct 2019 18:38:23 +0100 Subject: [PATCH 051/159] drm/i915: Add an rcu_barrier option to i915_drop_caches Sometimes a test has to wait for RCU to complete a grace period and perform its callbacks, for example waiting for a close(fd) to actually perform the fput(filp) and so trigger all the callbacks such as closing GEM contexts. There is no trivial means of triggering an RCU barrier from userspace, so add one for our convenience in debugfs/i915_drop_caches Signed-off-by: Chris Wilson Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20191011173823.20432-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 277f31297f29..e575761550ac 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3599,6 +3599,7 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops, #define DROP_IDLE BIT(6) #define DROP_RESET_ACTIVE BIT(7) #define DROP_RESET_SEQNO BIT(8) +#define DROP_RCU BIT(9) #define DROP_ALL (DROP_UNBOUND | \ DROP_BOUND | \ DROP_RETIRE | \ @@ -3607,7 +3608,8 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops, DROP_SHRINK_ALL |\ DROP_IDLE | \ DROP_RESET_ACTIVE | \ - DROP_RESET_SEQNO) + DROP_RESET_SEQNO | \ + DROP_RCU) static int i915_drop_caches_get(void *data, u64 *val) { @@ -3659,6 +3661,9 @@ i915_drop_caches_set(void *data, u64 val) i915_gem_shrink_all(i915); fs_reclaim_release(GFP_KERNEL); + if (val & DROP_RCU) + rcu_barrier(); + if (val & DROP_FREED) i915_gem_drain_freed_objects(i915); From cd9ba7b6e479ee5f71070f0c095f185e7cdc2eac Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 11 Oct 2019 20:36:20 +0100 Subject: [PATCH 052/159] drm/i915/selftests: Serialise write to scratch with its vma binding Add the missing serialisation on the request for a write into a vma to wait until that vma is bound before being executed by the GPU. Signed-off-by: Chris Wilson Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20191011193620.14026-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/selftest_workarounds.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index 1048be646c35..dc11f7ad50a2 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -786,6 +786,14 @@ static int read_whitelisted_registers(struct i915_gem_context *ctx, if (IS_ERR(rq)) return PTR_ERR(rq); + i915_vma_lock(results); + err = i915_request_await_object(rq, results->obj, true); + if (err == 0) + err = i915_vma_move_to_active(results, rq, EXEC_OBJECT_WRITE); + i915_vma_unlock(results); + if (err) + goto err_req; + srm = MI_STORE_REGISTER_MEM; if (INTEL_GEN(ctx->i915) >= 8) srm++; From a5efcde69b112d276416f8e6e5e9fd8ede13bfc5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 11 Oct 2019 20:03:17 +0100 Subject: [PATCH 053/159] drm/i915/perf: Replace global wakeref tracking with engine-pm As we now have a specific engine to use OA on, exchange the top-level runtime-pm wakeref with the engine-pm. This still results in the same top-level runtime-pm, but with more nuances to keep the engine and its gt awake. Signed-off-by: Chris Wilson Reviewed-by: Lionel Landwerlin Link: https://patchwork.freedesktop.org/patch/msgid/20191011190325.10979-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_perf.c | 8 ++++---- drivers/gpu/drm/i915/i915_perf_types.h | 6 ------ 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 77c3cef64548..c4a436dfb7db 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -196,7 +196,7 @@ #include #include "gem/i915_gem_context.h" -#include "gem/i915_gem_pm.h" +#include "gt/intel_engine_pm.h" #include "gt/intel_engine_user.h" #include "gt/intel_lrc_reg.h" @@ -1353,7 +1353,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream) free_oa_buffer(stream); intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL); - intel_runtime_pm_put(stream->uncore->rpm, stream->wakeref); + intel_engine_pm_put(stream->engine); if (stream->ctx) oa_put_render_ctx_id(stream); @@ -2218,7 +2218,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, * In our case we are expecting that taking pm + FORCEWAKE * references will effectively disable RC6. */ - stream->wakeref = intel_runtime_pm_get(stream->uncore->rpm); + intel_engine_pm_get(stream->engine); intel_uncore_forcewake_get(stream->uncore, FORCEWAKE_ALL); ret = alloc_oa_buffer(stream); @@ -2252,7 +2252,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, put_oa_config(stream->oa_config); intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL); - intel_runtime_pm_put(stream->uncore->rpm, stream->wakeref); + intel_engine_pm_put(stream->engine); err_config: if (stream->ctx) diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h index a91ae2d1a543..eb8d1ebd5095 100644 --- a/drivers/gpu/drm/i915/i915_perf_types.h +++ b/drivers/gpu/drm/i915/i915_perf_types.h @@ -134,12 +134,6 @@ struct i915_perf_stream { */ struct intel_uncore *uncore; - /** - * @wakeref: As we keep the device awake while the perf stream is - * active, we track our runtime pm reference for later release. - */ - intel_wakeref_t wakeref; - /** * @engine: Engine associated with this performance stream. */ From d8ad5f52617b8a74cb3a511d06e24115360761c6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 11 Oct 2019 20:03:25 +0100 Subject: [PATCH 054/159] drm/i915/execlists: Prevent merging requests with conflicting flags We set out-of-bound parameters inside the i915_requests.flags field, such as disabling preemption or marking the end-of-context. We should not coalesce consecutive requests if they have differing instructions as we only inspect the last active request in a context. Thus if we allow a later request to be merged into the same execution context, it will mask any of the earlier flags. References: 2a98f4e65bba ("drm/i915: add infrastructure to hold off preemption on a request") Signed-off-by: Chris Wilson Cc: Lionel Landwerlin Reviewed-by: Lionel Landwerlin Link: https://patchwork.freedesktop.org/patch/msgid/20191011190325.10979-9-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 13fa61ed85de..7f2d28bf9ea3 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1208,6 +1208,9 @@ static bool can_merge_rq(const struct i915_request *prev, if (i915_request_completed(next)) return true; + if (unlikely(prev->flags ^ next->flags) & I915_REQUEST_NOPREEMPT) + return false; + if (!can_merge_ctx(prev->hw_context, next->hw_context)) return false; From c3eb54aad982d05493ab374a6f0d0a6831dd2c6e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 12 Oct 2019 08:01:36 +0100 Subject: [PATCH 055/159] drm/i915: Mark up "sentinel" requests Sometimes we want to emit a terminator request, a request that flushes the pipeline and allows no request to come after it. This can be used for a "preempt-to-idle" to ensure that upon processing the context-switch to that request, all other active contexts have been flushed. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191012070136.32058-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 6 +++++- drivers/gpu/drm/i915/i915_request.h | 10 ++++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 7f2d28bf9ea3..a07baeb897fe 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1208,7 +1208,8 @@ static bool can_merge_rq(const struct i915_request *prev, if (i915_request_completed(next)) return true; - if (unlikely(prev->flags ^ next->flags) & I915_REQUEST_NOPREEMPT) + if (unlikely((prev->flags ^ next->flags) & + (I915_REQUEST_NOPREEMPT | I915_REQUEST_SENTINEL))) return false; if (!can_merge_ctx(prev->hw_context, next->hw_context)) @@ -1659,6 +1660,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (last->hw_context == rq->hw_context) goto done; + if (i915_request_has_sentinel(last)) + goto done; + /* * If GVT overrides us we only ever submit * port[0], leaving port[1] empty. Note that we diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 6a95242b280d..96991d64759c 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -216,8 +216,9 @@ struct i915_request { unsigned long emitted_jiffies; unsigned long flags; -#define I915_REQUEST_WAITBOOST BIT(0) -#define I915_REQUEST_NOPREEMPT BIT(1) +#define I915_REQUEST_WAITBOOST BIT(0) +#define I915_REQUEST_NOPREEMPT BIT(1) +#define I915_REQUEST_SENTINEL BIT(2) /** timeline->request entry for this request */ struct list_head link; @@ -440,6 +441,11 @@ static inline bool i915_request_has_nopreempt(const struct i915_request *rq) return unlikely(rq->flags & I915_REQUEST_NOPREEMPT); } +static inline bool i915_request_has_sentinel(const struct i915_request *rq) +{ + return unlikely(rq->flags & I915_REQUEST_SENTINEL); +} + static inline struct intel_timeline * i915_request_timeline(struct i915_request *rq) { From 6a45008ab7bb5e13b543de0c141b94aaa71d8397 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Sat, 12 Oct 2019 08:23:06 +0100 Subject: [PATCH 056/159] drm/i915/perf: allow for CS OA configs to be created lazily Here we introduce a mechanism by which the execbuf part of the i915 driver will be able to request that a batch buffer containing the programming for a particular OA config be created. We'll execute these OA configuration buffers right before executing a set of userspace commands so that a particular user batchbuffer be executed with a given OA configuration. This mechanism essentially allows the userspace driver to go through several OA configuration without having to open/close the i915/perf stream. v2: No need for locking on object OA config object creation (Chris) Flush cpu mapping of OA config (Chris) v3: Properly deal with the perf_metric lock (Chris/Lionel) v4: Fix oa config unref/put when not found (Lionel) v5: Allocate BOs for configurations on the stream instead of globally (Lionel) v6: Fix 64bit division (Chris) v7: Store allocated config BOs into the stream (Lionel) Signed-off-by: Lionel Landwerlin Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191012072308.30312-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 1 + drivers/gpu/drm/i915/i915_perf.c | 109 +++++++++++-------- drivers/gpu/drm/i915/i915_perf.h | 24 ++++ drivers/gpu/drm/i915/i915_perf_types.h | 23 ++-- 4 files changed, 103 insertions(+), 54 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index b0227ab2fe1b..0987100c786b 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -138,6 +138,7 @@ /* Gen11+. addr = base + (ctx_restore ? offset & GENMASK(12,2) : offset) */ #define MI_LRI_CS_MMIO (1<<19) #define MI_LRI_FORCE_POSTED (1<<12) +#define MI_LOAD_REGISTER_IMM_MAX_REGS (126) #define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1) #define MI_STORE_REGISTER_MEM_GEN8 MI_INSTR(0x24, 2) #define MI_SRM_LRM_GLOBAL_GTT (1<<22) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index c4a436dfb7db..50f2f972020d 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -369,52 +369,52 @@ struct perf_open_properties { struct intel_engine_cs *engine; }; +struct i915_oa_config_bo { + struct llist_node node; + + struct i915_oa_config *oa_config; + struct i915_vma *vma; +}; + static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer); -static void free_oa_config(struct i915_oa_config *oa_config) +void i915_oa_config_release(struct kref *ref) { + struct i915_oa_config *oa_config = + container_of(ref, typeof(*oa_config), ref); + if (!PTR_ERR(oa_config->flex_regs)) kfree(oa_config->flex_regs); if (!PTR_ERR(oa_config->b_counter_regs)) kfree(oa_config->b_counter_regs); if (!PTR_ERR(oa_config->mux_regs)) kfree(oa_config->mux_regs); - kfree(oa_config); + + kfree_rcu(oa_config, rcu); } -static void put_oa_config(struct i915_oa_config *oa_config) +struct i915_oa_config * +i915_perf_get_oa_config(struct i915_perf *perf, int metrics_set) { - if (!atomic_dec_and_test(&oa_config->ref_count)) - return; + struct i915_oa_config *oa_config; - free_oa_config(oa_config); -} - -static int get_oa_config(struct i915_perf *perf, - int metrics_set, - struct i915_oa_config **out_config) -{ - int ret; - - if (metrics_set == 1) { - *out_config = &perf->test_config; - atomic_inc(&perf->test_config.ref_count); - return 0; - } - - ret = mutex_lock_interruptible(&perf->metrics_lock); - if (ret) - return ret; - - *out_config = idr_find(&perf->metrics_idr, metrics_set); - if (!*out_config) - ret = -EINVAL; + rcu_read_lock(); + if (metrics_set == 1) + oa_config = &perf->test_config; else - atomic_inc(&(*out_config)->ref_count); + oa_config = idr_find(&perf->metrics_idr, metrics_set); + if (oa_config) + oa_config = i915_oa_config_get(oa_config); + rcu_read_unlock(); - mutex_unlock(&perf->metrics_lock); + return oa_config; +} - return ret; +static void free_oa_config_bo(struct i915_oa_config_bo *oa_bo) +{ + i915_oa_config_put(oa_bo->oa_config); + i915_vma_put(oa_bo->vma); + kfree(oa_bo); } static u32 gen8_oa_hw_tail_read(struct i915_perf_stream *stream) @@ -1337,6 +1337,16 @@ free_oa_buffer(struct i915_perf_stream *stream) stream->oa_buffer.vaddr = NULL; } +static void +free_oa_configs(struct i915_perf_stream *stream) +{ + struct i915_oa_config_bo *oa_bo, *tmp; + + i915_oa_config_put(stream->oa_config); + llist_for_each_entry_safe(oa_bo, tmp, stream->oa_config_bos.first, node) + free_oa_config_bo(oa_bo); +} + static void i915_oa_stream_destroy(struct i915_perf_stream *stream) { struct i915_perf *perf = stream->perf; @@ -1358,7 +1368,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream) if (stream->ctx) oa_put_render_ctx_id(stream); - put_oa_config(stream->oa_config); + free_oa_configs(stream); if (perf->spurious_report_rs.missed) { DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n", @@ -1505,10 +1515,6 @@ static int alloc_oa_buffer(struct i915_perf_stream *stream) goto err_unpin; } - DRM_DEBUG_DRIVER("OA Buffer initialized, gtt offset = 0x%x, vaddr = %p\n", - i915_ggtt_offset(stream->oa_buffer.vma), - stream->oa_buffer.vaddr); - return 0; err_unpin: @@ -2200,9 +2206,10 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, } } - ret = get_oa_config(perf, props->metrics_set, &stream->oa_config); - if (ret) { + stream->oa_config = i915_perf_get_oa_config(perf, props->metrics_set); + if (!stream->oa_config) { DRM_DEBUG("Invalid OA config id=%i\n", props->metrics_set); + ret = -EINVAL; goto err_config; } @@ -2234,6 +2241,9 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, goto err_enable; } + DRM_DEBUG("opening stream oa config uuid=%s\n", + stream->oa_config->uuid); + hrtimer_init(&stream->poll_check_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); stream->poll_check_timer.function = oa_poll_check_timer_cb; @@ -2249,7 +2259,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, free_oa_buffer(stream); err_oa_buf_alloc: - put_oa_config(stream->oa_config); + free_oa_configs(stream); intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL); intel_engine_pm_put(stream->engine); @@ -3057,7 +3067,8 @@ void i915_perf_register(struct drm_i915_private *i915) if (ret) goto sysfs_error; - atomic_set(&perf->test_config.ref_count, 1); + perf->test_config.perf = perf; + kref_init(&perf->test_config.ref); goto exit; @@ -3315,7 +3326,8 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, return -ENOMEM; } - atomic_set(&oa_config->ref_count, 1); + oa_config->perf = perf; + kref_init(&oa_config->ref); if (!uuid_is_valid(args->uuid)) { DRM_DEBUG("Invalid uuid format for OA config\n"); @@ -3414,7 +3426,7 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, sysfs_err: mutex_unlock(&perf->metrics_lock); reg_err: - put_oa_config(oa_config); + i915_oa_config_put(oa_config); DRM_DEBUG("Failed to add new OA config\n"); return err; } @@ -3450,13 +3462,13 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, ret = mutex_lock_interruptible(&perf->metrics_lock); if (ret) - goto lock_err; + return ret; oa_config = idr_find(&perf->metrics_idr, *arg); if (!oa_config) { DRM_DEBUG("Failed to remove unknown OA config\n"); ret = -ENOENT; - goto config_err; + goto err_unlock; } GEM_BUG_ON(*arg != oa_config->id); @@ -3466,13 +3478,16 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, idr_remove(&perf->metrics_idr, *arg); + mutex_unlock(&perf->metrics_lock); + DRM_DEBUG("Removed config %s id=%i\n", oa_config->uuid, oa_config->id); - put_oa_config(oa_config); + i915_oa_config_put(oa_config); -config_err: + return 0; + +err_unlock: mutex_unlock(&perf->metrics_lock); -lock_err: return ret; } @@ -3642,7 +3657,7 @@ void i915_perf_init(struct drm_i915_private *i915) static int destroy_config(int id, void *p, void *data) { - put_oa_config(p); + i915_oa_config_put(p); return 0; } diff --git a/drivers/gpu/drm/i915/i915_perf.h b/drivers/gpu/drm/i915/i915_perf.h index ff412fb0dbbf..98e16be48a73 100644 --- a/drivers/gpu/drm/i915/i915_perf.h +++ b/drivers/gpu/drm/i915/i915_perf.h @@ -6,6 +6,7 @@ #ifndef __I915_PERF_H__ #define __I915_PERF_H__ +#include #include #include "i915_perf_types.h" @@ -13,6 +14,7 @@ struct drm_device; struct drm_file; struct drm_i915_private; +struct i915_oa_config; struct intel_context; struct intel_engine_cs; @@ -27,7 +29,29 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, struct drm_file *file); int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, struct drm_file *file); + void i915_oa_init_reg_state(const struct intel_context *ce, const struct intel_engine_cs *engine); +struct i915_oa_config * +i915_perf_get_oa_config(struct i915_perf *perf, int metrics_set); + +static inline struct i915_oa_config * +i915_oa_config_get(struct i915_oa_config *oa_config) +{ + if (kref_get_unless_zero(&oa_config->ref)) + return oa_config; + else + return NULL; +} + +void i915_oa_config_release(struct kref *ref); +static inline void i915_oa_config_put(struct i915_oa_config *oa_config) +{ + if (!oa_config) + return; + + kref_put(&oa_config->ref, i915_oa_config_release); +} + #endif /* __I915_PERF_H__ */ diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h index eb8d1ebd5095..337cd7d2ad77 100644 --- a/drivers/gpu/drm/i915/i915_perf_types.h +++ b/drivers/gpu/drm/i915/i915_perf_types.h @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include #include @@ -22,6 +22,7 @@ struct drm_i915_private; struct file; struct i915_gem_context; +struct i915_perf; struct i915_vma; struct intel_context; struct intel_engine_cs; @@ -37,6 +38,8 @@ struct i915_oa_reg { }; struct i915_oa_config { + struct i915_perf *perf; + char uuid[UUID_STRING_LEN + 1]; int id; @@ -51,7 +54,8 @@ struct i915_oa_config { struct attribute *attrs[2]; struct device_attribute sysfs_metric_id; - atomic_t ref_count; + struct kref ref; + struct rcu_head rcu; }; struct i915_perf_stream; @@ -177,6 +181,12 @@ struct i915_perf_stream { */ struct i915_oa_config *oa_config; + /** + * @oa_config_bos: A list of struct i915_oa_config_bo allocated lazily + * each time @oa_config changes. + */ + struct llist_head oa_config_bos; + /** * @pinned_ctx: The OA context specific information. */ @@ -331,13 +341,13 @@ struct i915_perf { /* * Lock associated with adding/modifying/removing OA configs - * in dev_priv->perf.metrics_idr. + * in perf->metrics_idr. */ struct mutex metrics_lock; /* - * List of dynamic configurations, you need to hold - * dev_priv->perf.metrics_lock to access it. + * List of dynamic configurations (struct i915_oa_config), you + * need to hold perf->metrics_lock to access it. */ struct idr metrics_idr; @@ -350,8 +360,7 @@ struct i915_perf { /* * The stream currently using the OA unit. If accessed * outside a syscall associated to its file - * descriptor, you need to hold - * dev_priv->drm.struct_mutex. + * descriptor. */ struct i915_perf_stream *exclusive_stream; From daed3e44396d178cf2098b754bb8ef5ca4e918bc Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Sat, 12 Oct 2019 08:23:07 +0100 Subject: [PATCH 057/159] drm/i915/perf: implement active wait for noa configurations NOA configuration take some amount of time to apply. That amount of time depends on the size of the GT. There is no documented time for this. For example, past experimentations with powergating configuration changes seem to indicate a 60~70us delay. We go with 500us as default for now which should be over the required amount of time (according to HW architects). v2: Don't forget to save/restore registers used for the wait (Chris) v3: Name used CS_GPR registers (Chris) Fix compile issue due to rebase (Lionel) v4: Fix save/restore helpers (Umesh) v5: Move noa_wait from drm_i915_private to i915_perf_stream (Lionel) v6: Add missing struct declarations in i915_perf.h Signed-off-by: Lionel Landwerlin Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191012072308.30312-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 4 +- drivers/gpu/drm/i915/gt/intel_gt_types.h | 5 + drivers/gpu/drm/i915/i915_debugfs.c | 32 +++ drivers/gpu/drm/i915/i915_perf.c | 224 ++++++++++++++++++ drivers/gpu/drm/i915/i915_perf_types.h | 8 + drivers/gpu/drm/i915/i915_reg.h | 4 +- .../drm/i915/selftests/i915_live_selftests.h | 1 + drivers/gpu/drm/i915/selftests/i915_perf.c | 216 +++++++++++++++++ 8 files changed, 492 insertions(+), 2 deletions(-) create mode 100644 drivers/gpu/drm/i915/selftests/i915_perf.c diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index 0987100c786b..8e63cffcabe0 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -163,7 +163,8 @@ #define MI_BATCH_BUFFER_START MI_INSTR(0x31, 0) #define MI_BATCH_GTT (2<<6) /* aliased with (1<<7) on gen4 */ #define MI_BATCH_BUFFER_START_GEN8 MI_INSTR(0x31, 1) -#define MI_BATCH_RESOURCE_STREAMER (1<<10) +#define MI_BATCH_RESOURCE_STREAMER REG_BIT(10) +#define MI_BATCH_PREDICATE REG_BIT(15) /* HSW+ on RCS only*/ /* * 3D instructions used by the kernel @@ -224,6 +225,7 @@ #define PIPE_CONTROL_CS_STALL (1<<20) #define PIPE_CONTROL_TLB_INVALIDATE (1<<18) #define PIPE_CONTROL_MEDIA_STATE_CLEAR (1<<16) +#define PIPE_CONTROL_WRITE_TIMESTAMP (3<<14) #define PIPE_CONTROL_QW_WRITE (1<<14) #define PIPE_CONTROL_POST_SYNC_OP_MASK (3<<14) #define PIPE_CONTROL_DEPTH_STALL (1<<13) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index 802f516a3430..be4b263621c8 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -109,6 +109,11 @@ enum intel_gt_scratch_field { /* 8 bytes */ INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA = 256, + /* 6 * 8 bytes */ + INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR = 2048, + + /* 4 bytes */ + INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1 = 2096, }; #endif /* __INTEL_GT_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index e575761550ac..a541b6ae534f 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3590,6 +3590,37 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops, i915_wedged_get, i915_wedged_set, "%llu\n"); +static int +i915_perf_noa_delay_set(void *data, u64 val) +{ + struct drm_i915_private *i915 = data; + const u32 clk = RUNTIME_INFO(i915)->cs_timestamp_frequency_khz; + + /* + * This would lead to infinite waits as we're doing timestamp + * difference on the CS with only 32bits. + */ + if (val > mul_u32_u32(U32_MAX, clk)) + return -EINVAL; + + atomic64_set(&i915->perf.noa_programming_delay, val); + return 0; +} + +static int +i915_perf_noa_delay_get(void *data, u64 *val) +{ + struct drm_i915_private *i915 = data; + + *val = atomic64_read(&i915->perf.noa_programming_delay); + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(i915_perf_noa_delay_fops, + i915_perf_noa_delay_get, + i915_perf_noa_delay_set, + "%llu\n"); + #define DROP_UNBOUND BIT(0) #define DROP_BOUND BIT(1) #define DROP_RETIRE BIT(2) @@ -4345,6 +4376,7 @@ static const struct i915_debugfs_files { const char *name; const struct file_operations *fops; } i915_debugfs_files[] = { + {"i915_perf_noa_delay", &i915_perf_noa_delay_fops}, {"i915_wedged", &i915_wedged_fops}, {"i915_cache_sharing", &i915_cache_sharing_fops}, {"i915_gem_drop_caches", &i915_drop_caches_fops}, diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 50f2f972020d..81e8a7934001 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -198,6 +198,7 @@ #include "gem/i915_gem_context.h" #include "gt/intel_engine_pm.h" #include "gt/intel_engine_user.h" +#include "gt/intel_gt.h" #include "gt/intel_lrc_reg.h" #include "i915_drv.h" @@ -1347,6 +1348,12 @@ free_oa_configs(struct i915_perf_stream *stream) free_oa_config_bo(oa_bo); } +static void +free_noa_wait(struct i915_perf_stream *stream) +{ + i915_vma_unpin_and_release(&stream->noa_wait, 0); +} + static void i915_oa_stream_destroy(struct i915_perf_stream *stream) { struct i915_perf *perf = stream->perf; @@ -1369,6 +1376,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream) oa_put_render_ctx_id(stream); free_oa_configs(stream); + free_noa_wait(stream); if (perf->spurious_report_rs.missed) { DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n", @@ -1529,6 +1537,206 @@ static int alloc_oa_buffer(struct i915_perf_stream *stream) return ret; } +static u32 *save_restore_register(struct i915_perf_stream *stream, u32 *cs, + bool save, i915_reg_t reg, u32 offset, + u32 dword_count) +{ + u32 cmd; + u32 d; + + cmd = save ? MI_STORE_REGISTER_MEM : MI_LOAD_REGISTER_MEM; + if (INTEL_GEN(stream->perf->i915) >= 8) + cmd++; + + for (d = 0; d < dword_count; d++) { + *cs++ = cmd; + *cs++ = i915_mmio_reg_offset(reg) + 4 * d; + *cs++ = intel_gt_scratch_offset(stream->engine->gt, + offset) + 4 * d; + *cs++ = 0; + } + + return cs; +} + +static int alloc_noa_wait(struct i915_perf_stream *stream) +{ + struct drm_i915_private *i915 = stream->perf->i915; + struct drm_i915_gem_object *bo; + struct i915_vma *vma; + const u64 delay_ticks = 0xffffffffffffffff - + DIV64_U64_ROUND_UP( + atomic64_read(&stream->perf->noa_programming_delay) * + RUNTIME_INFO(i915)->cs_timestamp_frequency_khz, + 1000000ull); + const u32 base = stream->engine->mmio_base; +#define CS_GPR(x) GEN8_RING_CS_GPR(base, x) + u32 *batch, *ts0, *cs, *jump; + int ret, i; + enum { + START_TS, + NOW_TS, + DELTA_TS, + JUMP_PREDICATE, + DELTA_TARGET, + N_CS_GPR + }; + + bo = i915_gem_object_create_internal(i915, 4096); + if (IS_ERR(bo)) { + DRM_ERROR("Failed to allocate NOA wait batchbuffer\n"); + return PTR_ERR(bo); + } + + /* + * We pin in GGTT because we jump into this buffer now because + * multiple OA config BOs will have a jump to this address and it + * needs to be fixed during the lifetime of the i915/perf stream. + */ + vma = i915_gem_object_ggtt_pin(bo, NULL, 0, 0, PIN_HIGH); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto err_unref; + } + + batch = cs = i915_gem_object_pin_map(bo, I915_MAP_WB); + if (IS_ERR(batch)) { + ret = PTR_ERR(batch); + goto err_unpin; + } + + /* Save registers. */ + for (i = 0; i < N_CS_GPR; i++) + cs = save_restore_register( + stream, cs, true /* save */, CS_GPR(i), + INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2); + cs = save_restore_register( + stream, cs, true /* save */, MI_PREDICATE_RESULT_1, + INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1); + + /* First timestamp snapshot location. */ + ts0 = cs; + + /* + * Initial snapshot of the timestamp register to implement the wait. + * We work with 32b values, so clear out the top 32b bits of the + * register because the ALU works 64bits. + */ + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(CS_GPR(START_TS)) + 4; + *cs++ = 0; + *cs++ = MI_LOAD_REGISTER_REG | (3 - 2); + *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(base)); + *cs++ = i915_mmio_reg_offset(CS_GPR(START_TS)); + + /* + * This is the location we're going to jump back into until the + * required amount of time has passed. + */ + jump = cs; + + /* + * Take another snapshot of the timestamp register. Take care to clear + * up the top 32bits of CS_GPR(1) as we're using it for other + * operations below. + */ + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(CS_GPR(NOW_TS)) + 4; + *cs++ = 0; + *cs++ = MI_LOAD_REGISTER_REG | (3 - 2); + *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(base)); + *cs++ = i915_mmio_reg_offset(CS_GPR(NOW_TS)); + + /* + * Do a diff between the 2 timestamps and store the result back into + * CS_GPR(1). + */ + *cs++ = MI_MATH(5); + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS)); + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS)); + *cs++ = MI_MATH_SUB; + *cs++ = MI_MATH_STORE(MI_MATH_REG(DELTA_TS), MI_MATH_REG_ACCU); + *cs++ = MI_MATH_STORE(MI_MATH_REG(JUMP_PREDICATE), MI_MATH_REG_CF); + + /* + * Transfer the carry flag (set to 1 if ts1 < ts0, meaning the + * timestamp have rolled over the 32bits) into the predicate register + * to be used for the predicated jump. + */ + *cs++ = MI_LOAD_REGISTER_REG | (3 - 2); + *cs++ = i915_mmio_reg_offset(CS_GPR(JUMP_PREDICATE)); + *cs++ = i915_mmio_reg_offset(MI_PREDICATE_RESULT_1); + + /* Restart from the beginning if we had timestamps roll over. */ + *cs++ = (INTEL_GEN(i915) < 8 ? + MI_BATCH_BUFFER_START : + MI_BATCH_BUFFER_START_GEN8) | + MI_BATCH_PREDICATE; + *cs++ = i915_ggtt_offset(vma) + (ts0 - batch) * 4; + *cs++ = 0; + + /* + * Now add the diff between to previous timestamps and add it to : + * (((1 * << 64) - 1) - delay_ns) + * + * When the Carry Flag contains 1 this means the elapsed time is + * longer than the expected delay, and we can exit the wait loop. + */ + *cs++ = MI_LOAD_REGISTER_IMM(2); + *cs++ = i915_mmio_reg_offset(CS_GPR(DELTA_TARGET)); + *cs++ = lower_32_bits(delay_ticks); + *cs++ = i915_mmio_reg_offset(CS_GPR(DELTA_TARGET)) + 4; + *cs++ = upper_32_bits(delay_ticks); + + *cs++ = MI_MATH(4); + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(DELTA_TS)); + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(DELTA_TARGET)); + *cs++ = MI_MATH_ADD; + *cs++ = MI_MATH_STOREINV(MI_MATH_REG(JUMP_PREDICATE), MI_MATH_REG_CF); + + /* + * Transfer the result into the predicate register to be used for the + * predicated jump. + */ + *cs++ = MI_LOAD_REGISTER_REG | (3 - 2); + *cs++ = i915_mmio_reg_offset(CS_GPR(JUMP_PREDICATE)); + *cs++ = i915_mmio_reg_offset(MI_PREDICATE_RESULT_1); + + /* Predicate the jump. */ + *cs++ = (INTEL_GEN(i915) < 8 ? + MI_BATCH_BUFFER_START : + MI_BATCH_BUFFER_START_GEN8) | + MI_BATCH_PREDICATE; + *cs++ = i915_ggtt_offset(vma) + (jump - batch) * 4; + *cs++ = 0; + + /* Restore registers. */ + for (i = 0; i < N_CS_GPR; i++) + cs = save_restore_register( + stream, cs, false /* restore */, CS_GPR(i), + INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2); + cs = save_restore_register( + stream, cs, false /* restore */, MI_PREDICATE_RESULT_1, + INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1); + + /* And return to the ring. */ + *cs++ = MI_BATCH_BUFFER_END; + + GEM_BUG_ON(cs - batch > PAGE_SIZE / sizeof(*batch)); + + i915_gem_object_flush_map(bo); + i915_gem_object_unpin_map(bo); + + stream->noa_wait = vma; + return 0; + +err_unpin: + __i915_vma_unpin(vma); +err_unref: + i915_gem_object_put(bo); + return ret; +} + static void config_oa_regs(struct intel_uncore *uncore, const struct i915_oa_reg *regs, u32 n_regs) @@ -2206,6 +2414,12 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, } } + ret = alloc_noa_wait(stream); + if (ret) { + DRM_DEBUG("Unable to allocate NOA wait batch buffer\n"); + goto err_noa_wait_alloc; + } + stream->oa_config = i915_perf_get_oa_config(perf, props->metrics_set); if (!stream->oa_config) { DRM_DEBUG("Invalid OA config id=%i\n", props->metrics_set); @@ -2265,6 +2479,9 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, intel_engine_pm_put(stream->engine); err_config: + free_noa_wait(stream); + +err_noa_wait_alloc: if (stream->ctx) oa_put_render_ctx_id(stream); @@ -3651,6 +3868,9 @@ void i915_perf_init(struct drm_i915_private *i915) ratelimit_set_flags(&perf->spurious_report_rs, RATELIMIT_MSG_ON_RELEASE); + atomic64_set(&perf->noa_programming_delay, + 500 * 1000 /* 500us */); + perf->i915 = i915; } } @@ -3680,3 +3900,7 @@ void i915_perf_fini(struct drm_i915_private *i915) memset(&perf->ops, 0, sizeof(perf->ops)); perf->i915 = NULL; } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/i915_perf.c" +#endif diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h index 337cd7d2ad77..d35a3c1946c3 100644 --- a/drivers/gpu/drm/i915/i915_perf_types.h +++ b/drivers/gpu/drm/i915/i915_perf_types.h @@ -266,6 +266,12 @@ struct i915_perf_stream { */ u32 head; } oa_buffer; + + /** + * A batch buffer doing a wait on the GPU for the NOA logic to be + * reprogrammed. + */ + struct i915_vma *noa_wait; }; /** @@ -385,6 +391,8 @@ struct i915_perf { struct i915_oa_ops ops; const struct i915_oa_format *oa_formats; + + atomic64_t noa_programming_delay; }; #endif /* _I915_PERF_TYPES_H_ */ diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 0fb9030b89f1..e24991e54897 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -545,7 +545,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define MI_PREDICATE_SRC0_UDW _MMIO(0x2400 + 4) #define MI_PREDICATE_SRC1 _MMIO(0x2408) #define MI_PREDICATE_SRC1_UDW _MMIO(0x2408 + 4) - +#define MI_PREDICATE_DATA _MMIO(0x2410) +#define MI_PREDICATE_RESULT _MMIO(0x2418) +#define MI_PREDICATE_RESULT_1 _MMIO(0x241c) #define MI_PREDICATE_RESULT_2 _MMIO(0x2214) #define LOWER_SLICE_ENABLED (1 << 0) #define LOWER_SLICE_DISABLED (0 << 0) diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index 6713efea350b..6daf6599ec79 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -35,3 +35,4 @@ selftest(reset, intel_reset_live_selftests) selftest(hangcheck, intel_hangcheck_live_selftests) selftest(execlists, intel_execlists_live_selftests) selftest(guc, intel_guc_live_selftest) +selftest(perf, i915_perf_live_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_perf.c b/drivers/gpu/drm/i915/selftests/i915_perf.c new file mode 100644 index 000000000000..dc6d689e4251 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_perf.c @@ -0,0 +1,216 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include + +#include "gem/i915_gem_pm.h" +#include "gt/intel_gt.h" + +#include "i915_selftest.h" + +#include "igt_flush_test.h" +#include "lib_sw_fence.h" + +static struct i915_perf_stream * +test_stream(struct i915_perf *perf) +{ + struct drm_i915_perf_open_param param = {}; + struct perf_open_properties props = { + .engine = intel_engine_lookup_user(perf->i915, + I915_ENGINE_CLASS_RENDER, + 0), + .sample_flags = SAMPLE_OA_REPORT, + .oa_format = I915_OA_FORMAT_C4_B8, + .metrics_set = 1, + }; + struct i915_perf_stream *stream; + + stream = kzalloc(sizeof(*stream), GFP_KERNEL); + if (!stream) + return NULL; + + stream->perf = perf; + + mutex_lock(&perf->lock); + if (i915_oa_stream_init(stream, ¶m, &props)) { + kfree(stream); + stream = NULL; + } + mutex_unlock(&perf->lock); + + return stream; +} + +static void stream_destroy(struct i915_perf_stream *stream) +{ + struct i915_perf *perf = stream->perf; + + mutex_lock(&perf->lock); + i915_perf_destroy_locked(stream); + mutex_unlock(&perf->lock); +} + +static int live_sanitycheck(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_perf_stream *stream; + + /* Quick check we can create a perf stream */ + + stream = test_stream(&i915->perf); + if (!stream) + return -EINVAL; + + stream_destroy(stream); + return 0; +} + +static int write_timestamp(struct i915_request *rq, int slot) +{ + u32 *cs; + int len; + + cs = intel_ring_begin(rq, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + len = 5; + if (INTEL_GEN(rq->i915) >= 8) + len++; + + *cs++ = GFX_OP_PIPE_CONTROL(len); + *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_STORE_DATA_INDEX | + PIPE_CONTROL_WRITE_TIMESTAMP; + *cs++ = slot * sizeof(u32); + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + + intel_ring_advance(rq, cs); + + return 0; +} + +static ktime_t poll_status(struct i915_request *rq, int slot) +{ + while (!intel_read_status_page(rq->engine, slot) && + !i915_request_completed(rq)) + cpu_relax(); + + return ktime_get(); +} + +static int live_noa_delay(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_perf_stream *stream; + struct i915_request *rq; + ktime_t t0, t1; + u64 expected; + u32 delay; + int err; + int i; + + /* Check that the GPU delays matches expectations */ + + stream = test_stream(&i915->perf); + if (!stream) + return -ENOMEM; + + expected = atomic64_read(&stream->perf->noa_programming_delay); + + if (stream->engine->class != RENDER_CLASS) { + err = -ENODEV; + goto out; + } + + for (i = 0; i < 4; i++) + intel_write_status_page(stream->engine, 0x100 + i, 0); + + rq = i915_request_create(stream->engine->kernel_context); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out; + } + + if (rq->engine->emit_init_breadcrumb && + i915_request_timeline(rq)->has_initial_breadcrumb) { + err = rq->engine->emit_init_breadcrumb(rq); + if (err) { + i915_request_add(rq); + goto out; + } + } + + err = write_timestamp(rq, 0x100); + if (err) { + i915_request_add(rq); + goto out; + } + + err = rq->engine->emit_bb_start(rq, + i915_ggtt_offset(stream->noa_wait), 0, + I915_DISPATCH_SECURE); + if (err) { + i915_request_add(rq); + goto out; + } + + err = write_timestamp(rq, 0x102); + if (err) { + i915_request_add(rq); + goto out; + } + + i915_request_get(rq); + i915_request_add(rq); + + preempt_disable(); + t0 = poll_status(rq, 0x100); + t1 = poll_status(rq, 0x102); + preempt_enable(); + + pr_info("CPU delay: %lluns, expected %lluns\n", + ktime_sub(t1, t0), expected); + + delay = intel_read_status_page(stream->engine, 0x102); + delay -= intel_read_status_page(stream->engine, 0x100); + delay = div_u64(mul_u32_u32(delay, 1000 * 1000), + RUNTIME_INFO(i915)->cs_timestamp_frequency_khz); + pr_info("GPU delay: %uns, expected %lluns\n", + delay, expected); + + if (4 * delay < 3 * expected || 2 * delay > 3 * expected) { + pr_err("GPU delay [%uus] outside of expected threshold! [%lluus, %lluus]\n", + delay / 1000, + div_u64(3 * expected, 4000), + div_u64(3 * expected, 2000)); + err = -EINVAL; + } + + i915_request_put(rq); +out: + stream_destroy(stream); + return err; +} + +int i915_perf_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_sanitycheck), + SUBTEST(live_noa_delay), + }; + struct i915_perf *perf = &i915->perf; + + if (!perf->metrics_kobj || !perf->ops.enable_metric_set) + return 0; + + if (intel_gt_is_wedged(&i915->gt)) + return 0; + + return i915_subtests(tests, i915); +} From 15d0ace1f876e01b9745cb22ee32e3770fe3a6d5 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Sat, 12 Oct 2019 08:23:08 +0100 Subject: [PATCH 058/159] drm/i915/perf: execute OA configuration from command stream We haven't run into issues with programming the global OA/NOA registers configuration from CPU so far, but HW engineers actually recommend doing this from the command streamer. On TGL in particular one of the clock domain in which some of that programming goes might not be powered when we poke things from the CPU. Since we have a command buffer prepared for the execbuffer side of things, we can reuse that approach here too. This also allows us to significantly reduce the amount of time we hold the main lock. v2: Drop the global lock as much as possible v3: Take global lock to pin global v4: Create i915 request in emit_oa_config() to avoid deadlocks (Lionel) v5: Move locking to the stream (Lionel) v6: Move active reconfiguration request into i915_perf_stream (Lionel) v7: Pin VMA outside request creation (Chris) Lock VMA before move to active (Chris) v8: Fix double free on stream->initial_oa_config_bo (Lionel) Don't allow interruption when waiting on active config request (Lionel) Signed-off-by: Lionel Landwerlin Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191012072308.30312-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_perf.c | 199 ++++++++++++++++++++++++------- 1 file changed, 156 insertions(+), 43 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 81e8a7934001..74f50120c151 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1731,56 +1731,181 @@ static int alloc_noa_wait(struct i915_perf_stream *stream) return 0; err_unpin: - __i915_vma_unpin(vma); + i915_vma_unpin_and_release(&vma, 0); err_unref: i915_gem_object_put(bo); return ret; } -static void config_oa_regs(struct intel_uncore *uncore, - const struct i915_oa_reg *regs, - u32 n_regs) +static u32 *write_cs_mi_lri(u32 *cs, + const struct i915_oa_reg *reg_data, + u32 n_regs) { u32 i; for (i = 0; i < n_regs; i++) { - const struct i915_oa_reg *reg = regs + i; + if ((i % MI_LOAD_REGISTER_IMM_MAX_REGS) == 0) { + u32 n_lri = min_t(u32, + n_regs - i, + MI_LOAD_REGISTER_IMM_MAX_REGS); - intel_uncore_write(uncore, reg->addr, reg->value); + *cs++ = MI_LOAD_REGISTER_IMM(n_lri); + } + *cs++ = i915_mmio_reg_offset(reg_data[i].addr); + *cs++ = reg_data[i].value; } + + return cs; } -static void delay_after_mux(void) +static int num_lri_dwords(int num_regs) { + int count = 0; + + if (num_regs > 0) { + count += DIV_ROUND_UP(num_regs, MI_LOAD_REGISTER_IMM_MAX_REGS); + count += num_regs * 2; + } + + return count; +} + +static struct i915_oa_config_bo * +alloc_oa_config_buffer(struct i915_perf_stream *stream, + struct i915_oa_config *oa_config) +{ + struct drm_i915_gem_object *obj; + struct i915_oa_config_bo *oa_bo; + size_t config_length = 0; + u32 *cs; + int err; + + oa_bo = kzalloc(sizeof(*oa_bo), GFP_KERNEL); + if (!oa_bo) + return ERR_PTR(-ENOMEM); + + config_length += num_lri_dwords(oa_config->mux_regs_len); + config_length += num_lri_dwords(oa_config->b_counter_regs_len); + config_length += num_lri_dwords(oa_config->flex_regs_len); + config_length++; /* MI_BATCH_BUFFER_END */ + config_length = ALIGN(sizeof(u32) * config_length, I915_GTT_PAGE_SIZE); + + obj = i915_gem_object_create_shmem(stream->perf->i915, config_length); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto err_free; + } + + cs = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_oa_bo; + } + + cs = write_cs_mi_lri(cs, + oa_config->mux_regs, + oa_config->mux_regs_len); + cs = write_cs_mi_lri(cs, + oa_config->b_counter_regs, + oa_config->b_counter_regs_len); + cs = write_cs_mi_lri(cs, + oa_config->flex_regs, + oa_config->flex_regs_len); + + *cs++ = MI_BATCH_BUFFER_END; + + i915_gem_object_flush_map(obj); + i915_gem_object_unpin_map(obj); + + oa_bo->vma = i915_vma_instance(obj, + &stream->engine->gt->ggtt->vm, + NULL); + if (IS_ERR(oa_bo->vma)) { + err = PTR_ERR(oa_bo->vma); + goto err_oa_bo; + } + + oa_bo->oa_config = i915_oa_config_get(oa_config); + llist_add(&oa_bo->node, &stream->oa_config_bos); + + return oa_bo; + +err_oa_bo: + i915_gem_object_put(obj); +err_free: + kfree(oa_bo); + return ERR_PTR(err); +} + +static struct i915_vma * +get_oa_vma(struct i915_perf_stream *stream, struct i915_oa_config *oa_config) +{ + struct i915_oa_config_bo *oa_bo; + /* - * It apparently takes a fairly long time for a new MUX - * configuration to be be applied after these register writes. - * This delay duration was derived empirically based on the - * render_basic config but hopefully it covers the maximum - * configuration latency. - * - * As a fallback, the checks in _append_oa_reports() to skip - * invalid OA reports do also seem to work to discard reports - * generated before this config has completed - albeit not - * silently. - * - * Unfortunately this is essentially a magic number, since we - * don't currently know of a reliable mechanism for predicting - * how long the MUX config will take to apply and besides - * seeing invalid reports we don't know of a reliable way to - * explicitly check that the MUX config has landed. - * - * It's even possible we've miss characterized the underlying - * problem - it just seems like the simplest explanation why - * a delay at this location would mitigate any invalid reports. + * Look for the buffer in the already allocated BOs attached + * to the stream. */ - usleep_range(15000, 20000); + llist_for_each_entry(oa_bo, stream->oa_config_bos.first, node) { + if (oa_bo->oa_config == oa_config && + memcmp(oa_bo->oa_config->uuid, + oa_config->uuid, + sizeof(oa_config->uuid)) == 0) + goto out; + } + + oa_bo = alloc_oa_config_buffer(stream, oa_config); + if (IS_ERR(oa_bo)) + return ERR_CAST(oa_bo); + +out: + return i915_vma_get(oa_bo->vma); +} + +static int emit_oa_config(struct i915_perf_stream *stream, + struct intel_context *ce) +{ + struct i915_request *rq; + struct i915_vma *vma; + int err; + + vma = get_oa_vma(stream, stream->oa_config); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); + if (err) + goto err_vma_put; + + rq = i915_request_create(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_vma_unpin; + } + + i915_vma_lock(vma); + err = i915_request_await_object(rq, vma->obj, 0); + if (!err) + err = i915_vma_move_to_active(vma, rq, 0); + i915_vma_unlock(vma); + if (err) + goto err_add_request; + + err = rq->engine->emit_bb_start(rq, + vma->node.start, 0, + I915_DISPATCH_SECURE); +err_add_request: + i915_request_add(rq); +err_vma_unpin: + i915_vma_unpin(vma); +err_vma_put: + i915_vma_put(vma); + return err; } static int hsw_enable_metric_set(struct i915_perf_stream *stream) { struct intel_uncore *uncore = stream->uncore; - const struct i915_oa_config *oa_config = stream->oa_config; /* * PRM: @@ -1797,13 +1922,7 @@ static int hsw_enable_metric_set(struct i915_perf_stream *stream) intel_uncore_rmw(uncore, GEN6_UCGCTL1, 0, GEN6_CSUNIT_CLOCK_GATE_DISABLE); - config_oa_regs(uncore, oa_config->mux_regs, oa_config->mux_regs_len); - delay_after_mux(); - - config_oa_regs(uncore, oa_config->b_counter_regs, - oa_config->b_counter_regs_len); - - return 0; + return emit_oa_config(stream, stream->engine->kernel_context); } static void hsw_disable_metric_set(struct i915_perf_stream *stream) @@ -2167,13 +2286,7 @@ static int gen8_enable_metric_set(struct i915_perf_stream *stream) if (ret) return ret; - config_oa_regs(uncore, oa_config->mux_regs, oa_config->mux_regs_len); - delay_after_mux(); - - config_oa_regs(uncore, oa_config->b_counter_regs, - oa_config->b_counter_regs_len); - - return 0; + return emit_oa_config(stream, stream->engine->kernel_context); } static void gen8_disable_metric_set(struct i915_perf_stream *stream) From 5f5c382ecfdd06e17316d1c9f1362522c20cdfef Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 12 Oct 2019 10:10:56 +0100 Subject: [PATCH 059/159] drm/i915/perf: Prefer using the pinned_ctx for emitting delays on config When we are watching a particular context, we want the OA config to be applied inline with that context such that it takes effect before the next submission. Signed-off-by: Chris Wilson Cc: Lionel Landwerlin Reviewed-by: Lionel Landwerlin Link: https://patchwork.freedesktop.org/patch/msgid/20191012091056.28686-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_perf.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 74f50120c151..b4e2332d35cb 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1903,6 +1903,11 @@ static int emit_oa_config(struct i915_perf_stream *stream, return err; } +static struct intel_context *oa_context(struct i915_perf_stream *stream) +{ + return stream->pinned_ctx ?: stream->engine->kernel_context; +} + static int hsw_enable_metric_set(struct i915_perf_stream *stream) { struct intel_uncore *uncore = stream->uncore; @@ -1922,7 +1927,7 @@ static int hsw_enable_metric_set(struct i915_perf_stream *stream) intel_uncore_rmw(uncore, GEN6_UCGCTL1, 0, GEN6_CSUNIT_CLOCK_GATE_DISABLE); - return emit_oa_config(stream, stream->engine->kernel_context); + return emit_oa_config(stream, oa_context(stream)); } static void hsw_disable_metric_set(struct i915_perf_stream *stream) @@ -2286,7 +2291,7 @@ static int gen8_enable_metric_set(struct i915_perf_stream *stream) if (ret) return ret; - return emit_oa_config(stream, stream->engine->kernel_context); + return emit_oa_config(stream, oa_context(stream)); } static void gen8_disable_metric_set(struct i915_perf_stream *stream) From c2fba936d3047fc5ea1458549daa19844ef243a4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 13 Oct 2019 10:52:11 +0100 Subject: [PATCH 060/159] drm/i915/perf: Avoid polluting the i915_oa_config with error pointers Use a local variable to track the allocation errors to avoid polluting the struct and keep the free simple. Reported-by: kbuild test robot Reported-by: Dan Carpenter Signed-off-by: Chris Wilson Cc: Lionel Landwerlin Reviewed-by: Lionel Landwerlin Link: https://patchwork.freedesktop.org/patch/msgid/20191013095211.2922-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_perf.c | 52 +++++++++++++++----------------- 1 file changed, 25 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index b4e2332d35cb..366580701ba2 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -384,12 +384,9 @@ void i915_oa_config_release(struct kref *ref) struct i915_oa_config *oa_config = container_of(ref, typeof(*oa_config), ref); - if (!PTR_ERR(oa_config->flex_regs)) - kfree(oa_config->flex_regs); - if (!PTR_ERR(oa_config->b_counter_regs)) - kfree(oa_config->b_counter_regs); - if (!PTR_ERR(oa_config->mux_regs)) - kfree(oa_config->mux_regs); + kfree(oa_config->flex_regs); + kfree(oa_config->b_counter_regs); + kfree(oa_config->mux_regs); kfree_rcu(oa_config, rcu); } @@ -3631,6 +3628,7 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, struct i915_perf *perf = &to_i915(dev)->perf; struct drm_i915_perf_oa_config *args = data; struct i915_oa_config *oa_config, *tmp; + static struct i915_oa_reg *regs; int err, id; if (!perf->i915) { @@ -3676,30 +3674,30 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, memcpy(oa_config->uuid, args->uuid, sizeof(args->uuid)); oa_config->mux_regs_len = args->n_mux_regs; - oa_config->mux_regs = - alloc_oa_regs(perf, - perf->ops.is_valid_mux_reg, - u64_to_user_ptr(args->mux_regs_ptr), - args->n_mux_regs); + regs = alloc_oa_regs(perf, + perf->ops.is_valid_mux_reg, + u64_to_user_ptr(args->mux_regs_ptr), + args->n_mux_regs); - if (IS_ERR(oa_config->mux_regs)) { + if (IS_ERR(regs)) { DRM_DEBUG("Failed to create OA config for mux_regs\n"); - err = PTR_ERR(oa_config->mux_regs); + err = PTR_ERR(regs); goto reg_err; } + oa_config->mux_regs = regs; oa_config->b_counter_regs_len = args->n_boolean_regs; - oa_config->b_counter_regs = - alloc_oa_regs(perf, - perf->ops.is_valid_b_counter_reg, - u64_to_user_ptr(args->boolean_regs_ptr), - args->n_boolean_regs); + regs = alloc_oa_regs(perf, + perf->ops.is_valid_b_counter_reg, + u64_to_user_ptr(args->boolean_regs_ptr), + args->n_boolean_regs); - if (IS_ERR(oa_config->b_counter_regs)) { + if (IS_ERR(regs)) { DRM_DEBUG("Failed to create OA config for b_counter_regs\n"); - err = PTR_ERR(oa_config->b_counter_regs); + err = PTR_ERR(regs); goto reg_err; } + oa_config->b_counter_regs = regs; if (INTEL_GEN(perf->i915) < 8) { if (args->n_flex_regs != 0) { @@ -3708,17 +3706,17 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, } } else { oa_config->flex_regs_len = args->n_flex_regs; - oa_config->flex_regs = - alloc_oa_regs(perf, - perf->ops.is_valid_flex_reg, - u64_to_user_ptr(args->flex_regs_ptr), - args->n_flex_regs); + regs = alloc_oa_regs(perf, + perf->ops.is_valid_flex_reg, + u64_to_user_ptr(args->flex_regs_ptr), + args->n_flex_regs); - if (IS_ERR(oa_config->flex_regs)) { + if (IS_ERR(regs)) { DRM_DEBUG("Failed to create OA config for flex_regs\n"); - err = PTR_ERR(oa_config->flex_regs); + err = PTR_ERR(regs); goto reg_err; } + oa_config->flex_regs = regs; } err = mutex_lock_interruptible(&perf->metrics_lock); From 280bc0cecb77240bf6be7f86a9da90e3451829b9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 13 Oct 2019 12:45:09 +0100 Subject: [PATCH 061/159] drm/i915/selftests: Fixup naked 64b divide drivers/gpu/drm/i915/intel_memory_region.o: in function `igt_mock_contiguous': drivers/gpu/drm/i915/selftests/intel_memory_region.c:166: undefined reference to `__umoddi3' v2: promote target to u64 for consistency across all builds Reported-by: kbuild test robot Fixes: 2f0b97ca0211 ("drm/i915/region: support contiguous allocations") Signed-off-by: Chris Wilson Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20191013114509.3405-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/intel_memory_region.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c index 4e44c81e8e5b..56091e7e599e 100644 --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c @@ -128,9 +128,9 @@ static int igt_mock_contiguous(void *arg) LIST_HEAD(objects); LIST_HEAD(holes); I915_RND_STATE(prng); - resource_size_t target; resource_size_t total; resource_size_t min; + u64 target; int err = 0; total = resource_size(&mem->region); @@ -163,7 +163,9 @@ static int igt_mock_contiguous(void *arg) igt_object_release(obj); /* Internal fragmentation should not bleed into the object size */ - target = round_up(prandom_u32_state(&prng) % total, PAGE_SIZE); + target = i915_prandom_u64_state(&prng); + div64_u64_rem(target, total, &target); + target = round_up(target, PAGE_SIZE); target = max_t(u64, PAGE_SIZE, target); obj = igt_object_create(mem, &objects, target, @@ -172,8 +174,8 @@ static int igt_mock_contiguous(void *arg) return PTR_ERR(obj); if (obj->base.size != target) { - pr_err("%s obj->base.size(%llx) != target(%llx)\n", __func__, - (u64)obj->base.size, (u64)target); + pr_err("%s obj->base.size(%zx) != target(%llx)\n", __func__, + obj->base.size, target); err = -EINVAL; goto err_close_objects; } @@ -236,7 +238,7 @@ static int igt_mock_contiguous(void *arg) I915_BO_ALLOC_CONTIGUOUS); if (should_fail != IS_ERR(obj)) { pr_err("%s target allocation(%llx) mismatch\n", - __func__, (u64)target); + __func__, target); err = -EINVAL; goto err_close_objects; } From 1fd37669bc873cee180f1505a20b16e557b7b472 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 12 Oct 2019 09:02:08 +0100 Subject: [PATCH 062/159] drm/i915/display: Squelch kerneldoc warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Just a parameter rename, drivers/gpu/drm/i915/display/intel_display.c:14425: warning: Function parameter or member '_new_plane_state' not described in 'intel_prepare_plane_fb' drivers/gpu/drm/i915/display/intel_display.c:14425: warning: Excess function parameter 'new_state' description in 'intel_prepare_plane_fb' drivers/gpu/drm/i915/display/intel_display.c:14534: warning: Function parameter or member '_old_plane_state' not described in 'intel_cleanup_plane_fb' drivers/gpu/drm/i915/display/intel_display.c:14534: warning: Excess function parameter 'old_state' description in 'intel_cleanup_plane_fb' Signed-off-by: Chris Wilson Cc: Maarten Lankhorst Cc: Ville Syrjälä Cc: Matt Roper Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191012080208.18774-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/display/intel_display.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index a146ec02a0c1..3cf39fc153b3 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -14410,7 +14410,7 @@ static void fb_obj_bump_render_priority(struct drm_i915_gem_object *obj) /** * intel_prepare_plane_fb - Prepare fb for usage on plane * @plane: drm plane to prepare for - * @new_state: the plane state being prepared + * @_new_plane_state: the plane state being prepared * * Prepares a framebuffer for usage on a display plane. Generally this * involves pinning the underlying object and updating the frontbuffer tracking @@ -14524,7 +14524,7 @@ intel_prepare_plane_fb(struct drm_plane *plane, /** * intel_cleanup_plane_fb - Cleans up an fb after plane use * @plane: drm plane to clean up for - * @old_state: the state from the previous modeset + * @_old_plane_state: the state from the previous modeset * * Cleans up a framebuffer that has just been removed from a plane. */ From 9c27462c896d4852ece6086338e52777a3cb110e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 14 Oct 2019 10:07:48 +0100 Subject: [PATCH 063/159] drm/i915/selftests: Check known register values within the context Check the logical ring context by asserting that the registers hold expected start during execution. (It's a bit chicken-and-egg for how could we manage to execute our request if the registers were not being updated. Still, it's nice to verify that the HW is working as expected.) Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191014090757.32111-6-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/selftest_lrc.c | 126 +++++++++++++++++++++++++ 1 file changed, 126 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index a691e429ca01..0aa36b1b2389 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -2599,10 +2599,136 @@ static int live_lrc_layout(void *arg) return err; } +static int __live_lrc_state(struct i915_gem_context *fixme, + struct intel_engine_cs *engine, + struct i915_vma *scratch) +{ + struct intel_context *ce; + struct i915_request *rq; + enum { + RING_START_IDX = 0, + RING_TAIL_IDX, + MAX_IDX + }; + u32 expected[MAX_IDX]; + u32 *cs; + int err; + int n; + + ce = intel_context_create(fixme, engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + err = intel_context_pin(ce); + if (err) + goto err_put; + + rq = i915_request_create(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_unpin; + } + + cs = intel_ring_begin(rq, 4 * MAX_IDX); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + i915_request_add(rq); + goto err_unpin; + } + + *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; + *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base)); + *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32); + *cs++ = 0; + + expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma); + + *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; + *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)); + *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32); + *cs++ = 0; + + i915_request_get(rq); + i915_request_add(rq); + + intel_engine_flush_submission(engine); + expected[RING_TAIL_IDX] = ce->ring->tail; + + if (i915_request_wait(rq, 0, HZ / 5) < 0) { + err = -ETIME; + goto err_rq; + } + + cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_rq; + } + + for (n = 0; n < MAX_IDX; n++) { + if (cs[n] != expected[n]) { + pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n", + engine->name, n, cs[n], expected[n]); + err = -EINVAL; + break; + } + } + + i915_gem_object_unpin_map(scratch->obj); + +err_rq: + i915_request_put(rq); +err_unpin: + intel_context_unpin(ce); +err_put: + intel_context_put(ce); + return err; +} + +static int live_lrc_state(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + struct i915_gem_context *fixme; + struct i915_vma *scratch; + enum intel_engine_id id; + int err = 0; + + /* + * Check the live register state matches what we expect for this + * intel_context. + */ + + fixme = kernel_context(gt->i915); + if (!fixme) + return -ENOMEM; + + scratch = create_scratch(gt); + if (IS_ERR(scratch)) { + err = PTR_ERR(scratch); + goto out_close; + } + + for_each_engine(engine, gt->i915, id) { + err = __live_lrc_state(fixme, engine, scratch); + if (err) + break; + } + + if (igt_flush_test(gt->i915)) + err = -EIO; + + i915_vma_unpin_and_release(&scratch, 0); +out_close: + kernel_context_close(fixme); + return err; +} + int intel_lrc_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(live_lrc_layout), + SUBTEST(live_lrc_state), }; if (!HAS_LOGICAL_RING_CONTEXTS(i915)) From 9506c23dfaf5980ab1eb33e26968bcf9b2269bb5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 14 Oct 2019 10:07:49 +0100 Subject: [PATCH 064/159] drm/i915/selftests: Check that GPR are cleared for new contexts We want the general purpose registers to be clear in all new contexts so that we can be confident that no information is leaked from one to the next. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191014090757.32111-7-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/selftest_lrc.c | 189 ++++++++++++++++++++++--- 1 file changed, 168 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 0aa36b1b2389..1276da059dc6 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -19,6 +19,9 @@ #include "gem/selftests/igt_gem_utils.h" #include "gem/selftests/mock_context.h" +#define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4) +#define NUM_GPR_DW (16 * 2) /* each GPR is 2 dwords */ + static struct i915_vma *create_scratch(struct intel_gt *gt) { struct drm_i915_gem_object *obj; @@ -2107,16 +2110,14 @@ static int preserved_virtual_engine(struct drm_i915_private *i915, struct intel_engine_cs **siblings, unsigned int nsibling) { -#define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4) - struct i915_request *last = NULL; struct i915_gem_context *ctx; struct intel_context *ve; struct i915_vma *scratch; struct igt_live_test t; - const int num_gpr = 16 * 2; /* each GPR is 2 dwords */ unsigned int n; int err = 0; + u32 *cs; ctx = kernel_context(i915); if (!ctx) @@ -2142,10 +2143,9 @@ static int preserved_virtual_engine(struct drm_i915_private *i915, if (err) goto out_unpin; - for (n = 0; n < num_gpr; n++) { + for (n = 0; n < NUM_GPR_DW; n++) { struct intel_engine_cs *engine = siblings[n % nsibling]; struct i915_request *rq; - u32 *cs; rq = i915_request_create(ve); if (IS_ERR(rq)) { @@ -2169,7 +2169,7 @@ static int preserved_virtual_engine(struct drm_i915_private *i915, *cs++ = 0; *cs++ = MI_LOAD_REGISTER_IMM(1); - *cs++ = CS_GPR(engine, (n + 1) % num_gpr); + *cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW); *cs++ = n + 1; *cs++ = MI_NOOP; @@ -2182,21 +2182,26 @@ static int preserved_virtual_engine(struct drm_i915_private *i915, if (i915_request_wait(last, 0, HZ / 5) < 0) { err = -ETIME; - } else { - u32 *map = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); - - for (n = 0; n < num_gpr; n++) { - if (map[n] != n) { - pr_err("Incorrect value[%d] found for GPR[%d]\n", - map[n], n); - err = -EINVAL; - break; - } - } - - i915_gem_object_unpin_map(scratch->obj); + goto out_end; } + cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto out_end; + } + + for (n = 0; n < NUM_GPR_DW; n++) { + if (cs[n] != n) { + pr_err("Incorrect value[%d] found for GPR[%d]\n", + cs[n], n); + err = -EINVAL; + break; + } + } + + i915_gem_object_unpin_map(scratch->obj); + out_end: if (igt_live_test_end(&t)) err = -EIO; @@ -2210,8 +2215,6 @@ static int preserved_virtual_engine(struct drm_i915_private *i915, out_close: kernel_context_close(ctx); return err; - -#undef CS_GPR } static int live_virtual_preserved(void *arg) @@ -2724,11 +2727,155 @@ static int live_lrc_state(void *arg) return err; } +static int gpr_make_dirty(struct intel_engine_cs *engine) +{ + struct i915_request *rq; + u32 *cs; + int n; + + rq = i915_request_create(engine->kernel_context); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2); + if (IS_ERR(cs)) { + i915_request_add(rq); + return PTR_ERR(cs); + } + + *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW); + for (n = 0; n < NUM_GPR_DW; n++) { + *cs++ = CS_GPR(engine, n); + *cs++ = STACK_MAGIC; + } + *cs++ = MI_NOOP; + + intel_ring_advance(rq, cs); + i915_request_add(rq); + + return 0; +} + +static int __live_gpr_clear(struct i915_gem_context *fixme, + struct intel_engine_cs *engine, + struct i915_vma *scratch) +{ + struct intel_context *ce; + struct i915_request *rq; + u32 *cs; + int err; + int n; + + if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS) + return 0; /* GPR only on rcs0 for gen8 */ + + err = gpr_make_dirty(engine); + if (err) + return err; + + ce = intel_context_create(fixme, engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_put; + } + + cs = intel_ring_begin(rq, 4 * NUM_GPR_DW); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + i915_request_add(rq); + goto err_put; + } + + for (n = 0; n < NUM_GPR_DW; n++) { + *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; + *cs++ = CS_GPR(engine, n); + *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32); + *cs++ = 0; + } + + i915_request_get(rq); + i915_request_add(rq); + + if (i915_request_wait(rq, 0, HZ / 5) < 0) { + err = -ETIME; + goto err_rq; + } + + cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_rq; + } + + for (n = 0; n < NUM_GPR_DW; n++) { + if (cs[n]) { + pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n", + engine->name, + n / 2, n & 1 ? "udw" : "ldw", + cs[n]); + err = -EINVAL; + break; + } + } + + i915_gem_object_unpin_map(scratch->obj); + +err_rq: + i915_request_put(rq); +err_put: + intel_context_put(ce); + return err; +} + +static int live_gpr_clear(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + struct i915_gem_context *fixme; + struct i915_vma *scratch; + enum intel_engine_id id; + int err = 0; + + /* + * Check that GPR registers are cleared in new contexts as we need + * to avoid leaking any information from previous contexts. + */ + + fixme = kernel_context(gt->i915); + if (!fixme) + return -ENOMEM; + + scratch = create_scratch(gt); + if (IS_ERR(scratch)) { + err = PTR_ERR(scratch); + goto out_close; + } + + for_each_engine(engine, gt->i915, id) { + err = __live_gpr_clear(fixme, engine, scratch); + if (err) + break; + } + + if (igt_flush_test(gt->i915)) + err = -EIO; + + i915_vma_unpin_and_release(&scratch, 0); +out_close: + kernel_context_close(fixme); + return err; +} + int intel_lrc_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(live_lrc_layout), SUBTEST(live_lrc_state), + SUBTEST(live_gpr_clear), }; if (!HAS_LOGICAL_RING_CONTEXTS(i915)) From 89b6d1831d21554a5737a201b78a6ad6c5517b46 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 13 Oct 2019 21:30:12 +0100 Subject: [PATCH 065/159] drm/i915/execlists: Tweak virtual unsubmission Since commit e2144503bf3b ("drm/i915: Prevent bonded requests from overtaking each other on preemption") we have restricted requests to run on their chosen engine across preemption events. We can take this restriction into account to know that we will want to resubmit those requests onto the same physical engine, and so can shortcircuit the virtual engine selection process and keep the request on the same engine during unwind. References: e2144503bf3b ("drm/i915: Prevent bonded requests from overtaking each other on preemption") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Ramlingam C Link: https://patchwork.freedesktop.org/patch/msgid/20191013203012.25208-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 6 +++--- drivers/gpu/drm/i915/i915_request.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index a07baeb897fe..ee21307ab5fe 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -847,7 +847,6 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) list_for_each_entry_safe_reverse(rq, rn, &engine->active.requests, sched.link) { - struct intel_engine_cs *owner; if (i915_request_completed(rq)) continue; /* XXX */ @@ -862,8 +861,7 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) * engine so that it can be moved across onto another physical * engine as load dictates. */ - owner = rq->hw_context->engine; - if (likely(owner == engine)) { + if (likely(rq->execution_mask == engine->mask)) { GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); if (rq_prio(rq) != prio) { prio = rq_prio(rq); @@ -874,6 +872,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) list_move(&rq->sched.link, pl); active = rq; } else { + struct intel_engine_cs *owner = rq->hw_context->engine; + /* * Decouple the virtual breadcrumb before moving it * back to the virtual engine -- we don't want the diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 437f9fc6282e..b8a54572a4f8 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -649,6 +649,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) rq->gem_context = ce->gem_context; rq->engine = ce->engine; rq->ring = ce->ring; + rq->execution_mask = ce->engine->mask; rcu_assign_pointer(rq->timeline, tl); rq->hwsp_seqno = tl->hwsp_seqno; @@ -671,7 +672,6 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) rq->batch = NULL; rq->capture_list = NULL; rq->flags = 0; - rq->execution_mask = ALL_ENGINES; INIT_LIST_HEAD(&rq->execute_cb); From 41e35ffb380bde1379e4030bb5b2ac824d5139cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 11 Oct 2019 23:20:30 +0300 Subject: [PATCH 066/159] drm/i915: Favor last VBT child device with conflicting AUX ch/DDC pin MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The first come first served apporoach to handling the VBT child device AUX ch conflicts has backfired. We have machines in the wild where the VBT specifies both port A eDP and port E DP (in that order) with port E being the real one. So let's try to flip the preference around and let the last child device win once again. Cc: stable@vger.kernel.org Cc: Jani Nikula Tested-by: Masami Ichikawa Tested-by: Torsten Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111966 Fixes: 36a0f92020dc ("drm/i915/bios: make child device order the priority order") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191011202030.8829-1-ville.syrjala@linux.intel.com Acked-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_bios.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index 9628b485b179..f0307b04cc13 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -1270,7 +1270,7 @@ static void sanitize_ddc_pin(struct drm_i915_private *dev_priv, DRM_DEBUG_KMS("port %c trying to use the same DDC pin (0x%x) as port %c, " "disabling port %c DVI/HDMI support\n", port_name(port), info->alternate_ddc_pin, - port_name(p), port_name(port)); + port_name(p), port_name(p)); /* * If we have multiple ports supposedly sharing the @@ -1278,9 +1278,14 @@ static void sanitize_ddc_pin(struct drm_i915_private *dev_priv, * port. Otherwise they share the same ddc bin and * system couldn't communicate with them separately. * - * Give child device order the priority, first come first - * served. + * Give inverse child device order the priority, + * last one wins. Yes, there are real machines + * (eg. Asrock B250M-HDV) where VBT has both + * port A and port E with the same AUX ch and + * we must pick port E :( */ + info = &dev_priv->vbt.ddi_port_info[p]; + info->supports_dvi = false; info->supports_hdmi = false; info->alternate_ddc_pin = 0; @@ -1316,7 +1321,7 @@ static void sanitize_aux_ch(struct drm_i915_private *dev_priv, DRM_DEBUG_KMS("port %c trying to use the same AUX CH (0x%x) as port %c, " "disabling port %c DP support\n", port_name(port), info->alternate_aux_channel, - port_name(p), port_name(port)); + port_name(p), port_name(p)); /* * If we have multiple ports supposedlt sharing the @@ -1324,9 +1329,14 @@ static void sanitize_aux_ch(struct drm_i915_private *dev_priv, * port. Otherwise they share the same aux channel * and system couldn't communicate with them separately. * - * Give child device order the priority, first come first - * served. + * Give inverse child device order the priority, + * last one wins. Yes, there are real machines + * (eg. Asrock B250M-HDV) where VBT has both + * port A and port E with the same AUX ch and + * we must pick port E :( */ + info = &dev_priv->vbt.ddi_port_info[p]; + info->supports_dp = false; info->alternate_aux_channel = 0; } From 53448aed7b805cf36ecc59ed5fe8e53815ce4fe8 Mon Sep 17 00:00:00 2001 From: Vivek Kasireddy Date: Thu, 10 Oct 2019 17:26:18 -0700 Subject: [PATCH 067/159] drm/i915/ehl: Port C's hotplug interrupt is associated with TC1 bits On platforms that have the MCC PCH, Port C's hotplug interrupt bits are mapped to TC1 bits. Suggested-by: Matt Roper Signed-off-by: Vivek Kasireddy Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20191011002618.3087-1-vivek.kasireddy@intel.com --- drivers/gpu/drm/i915/display/intel_dp.c | 3 +++ drivers/gpu/drm/i915/i915_irq.c | 8 ++++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 38aa09cfbed3..830c71e5c861 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -5282,6 +5282,9 @@ static bool icl_combo_port_connected(struct drm_i915_private *dev_priv, { enum port port = intel_dig_port->base.port; + if (HAS_PCH_MCC(dev_priv) && port == PORT_C) + return I915_READ(SDEISR) & SDE_TC_HOTPLUG_ICP(PORT_TC1); + return I915_READ(SDEISR) & SDE_DDI_HOTPLUG_ICP(port); } diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 3af7f7914c40..a7c968b01af3 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2249,8 +2249,8 @@ static void icp_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) tc_port_hotplug_long_detect = tgp_tc_port_hotplug_long_detect; pins = hpd_tgp; } else if (HAS_PCH_MCC(dev_priv)) { - ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_TGP; - tc_hotplug_trigger = 0; + ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_ICP; + tc_hotplug_trigger = pch_iir & SDE_TC_HOTPLUG_ICP(PORT_TC1); pins = hpd_icp; } else { ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_ICP; @@ -3377,8 +3377,8 @@ static void icp_hpd_irq_setup(struct drm_i915_private *dev_priv, static void mcc_hpd_irq_setup(struct drm_i915_private *dev_priv) { icp_hpd_irq_setup(dev_priv, - SDE_DDI_MASK_TGP, 0, - TGP_DDI_HPD_ENABLE_MASK, 0, + SDE_DDI_MASK_ICP, SDE_TC_HOTPLUG_ICP(PORT_TC1), + ICP_DDI_HPD_ENABLE_MASK, ICP_TC_HPD_ENABLE(PORT_TC1), hpd_icp); } From 3c00660db18371c632adae4836ed2b2d3d78ecb6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 14 Oct 2019 13:13:36 +0100 Subject: [PATCH 068/159] drm/i915/execlists: Assert tasklet is locked for process_csb() We rely on only the tasklet being allowed to call into process_csb(), so assert that is locked when we do. As the tasklet uses a simple bitlock, there is no strong lockdep checking so we must make do with a plain assertion that the tasklet is running and assume that we are the tasklet! v2: Fixup intel_gt_sanitize() to prepare each engine for the reset so that the locks are marked as held during the reset v3: Check for existent function pointers for very early sanitisation. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191014121336.30137-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_gt_pm.c | 13 ++++++++++--- drivers/gpu/drm/i915/gt/intel_lrc.c | 7 +++++++ drivers/gpu/drm/i915/i915_gem.h | 5 +++++ 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index 87e34e0b6427..bd1bd3e00a94 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -136,11 +136,18 @@ void intel_gt_sanitize(struct intel_gt *gt, bool force) intel_uc_sanitize(>->uc); - if (!reset_engines(gt) && !force) - return; + for_each_engine(engine, gt->i915, id) + if (engine->reset.prepare) + engine->reset.prepare(engine); + + if (reset_engines(gt) || force) { + for_each_engine(engine, gt->i915, id) + __intel_engine_reset(engine, false); + } for_each_engine(engine, gt->i915, id) - __intel_engine_reset(engine, false); + if (engine->reset.finish) + engine->reset.finish(engine); } void intel_gt_pm_disable(struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index ee21307ab5fe..26b6b699193b 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1842,6 +1842,13 @@ static void process_csb(struct intel_engine_cs *engine) const u8 num_entries = execlists->csb_size; u8 head, tail; + /* + * As we modify our execlists state tracking we require exclusive + * access. Either we are inside the tasklet, or the tasklet is disabled + * and we assume that is only inside the reset paths and so serialised. + */ + GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) && + !reset_in_progress(execlists)); GEM_BUG_ON(USES_GUC_SUBMISSION(engine->i915)); /* diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index db20d2b0842b..f6f9675848b8 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -86,6 +86,11 @@ static inline void tasklet_lock(struct tasklet_struct *t) cpu_relax(); } +static inline bool tasklet_is_locked(const struct tasklet_struct *t) +{ + return test_bit(TASKLET_STATE_RUN, &t->state); +} + static inline void __tasklet_disable_sync_once(struct tasklet_struct *t) { if (!atomic_fetch_inc(&t->count)) From b8d49f28aa03e4678e450e588b10c0faf96e4118 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Mon, 14 Oct 2019 21:14:01 +0100 Subject: [PATCH 069/159] drm/i915/perf: introduce a versioning of the i915-perf uapi Reporting this version will help application figure out what level of the support the running kernel provides. v2: Add i915_perf_ioctl_version() (Chris) Signed-off-by: Lionel Landwerlin Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191014201404.22468-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_getparam.c | 4 ++++ drivers/gpu/drm/i915/i915_perf.c | 10 ++++++++++ drivers/gpu/drm/i915/i915_perf.h | 1 + include/uapi/drm/i915_drm.h | 21 +++++++++++++++++++++ 4 files changed, 36 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c index f4b3cbb1adce..ad33fbe90a28 100644 --- a/drivers/gpu/drm/i915/i915_getparam.c +++ b/drivers/gpu/drm/i915/i915_getparam.c @@ -5,6 +5,7 @@ #include "gt/intel_engine_user.h" #include "i915_drv.h" +#include "i915_perf.h" int i915_getparam_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) @@ -156,6 +157,9 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, case I915_PARAM_MMAP_GTT_COHERENT: value = INTEL_INFO(i915)->has_coherent_ggtt; break; + case I915_PARAM_PERF_REVISION: + value = i915_perf_ioctl_version(); + break; default: DRM_DEBUG("Unknown parameter %d\n", param->param); return -EINVAL; diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 366580701ba2..220e3384af30 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -4017,6 +4017,16 @@ void i915_perf_fini(struct drm_i915_private *i915) perf->i915 = NULL; } +/** + * i915_perf_ioctl_version - Version of the i915-perf subsystem + * + * This version number is used by userspace to detect available features. + */ +int i915_perf_ioctl_version(void) +{ + return 1; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/i915_perf.c" #endif diff --git a/drivers/gpu/drm/i915/i915_perf.h b/drivers/gpu/drm/i915/i915_perf.h index 98e16be48a73..4ceebce72060 100644 --- a/drivers/gpu/drm/i915/i915_perf.h +++ b/drivers/gpu/drm/i915/i915_perf.h @@ -22,6 +22,7 @@ void i915_perf_init(struct drm_i915_private *i915); void i915_perf_fini(struct drm_i915_private *i915); void i915_perf_register(struct drm_i915_private *i915); void i915_perf_unregister(struct drm_i915_private *i915); +int i915_perf_ioctl_version(void); int i915_perf_open_ioctl(struct drm_device *dev, void *data, struct drm_file *file); diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 30c542144016..c50c712b3771 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -611,6 +611,13 @@ typedef struct drm_i915_irq_wait { * See I915_EXEC_FENCE_OUT and I915_EXEC_FENCE_SUBMIT. */ #define I915_PARAM_HAS_EXEC_SUBMIT_FENCE 53 + +/* + * Revision of the i915-perf uAPI. The value returned helps determine what + * i915-perf features are available. See drm_i915_perf_property_id. + */ +#define I915_PARAM_PERF_REVISION 54 + /* Must be kept compact -- no holes and well documented */ typedef struct drm_i915_getparam { @@ -1844,23 +1851,31 @@ enum drm_i915_perf_property_id { * Open the stream for a specific context handle (as used with * execbuffer2). A stream opened for a specific context this way * won't typically require root privileges. + * + * This property is available in perf revision 1. */ DRM_I915_PERF_PROP_CTX_HANDLE = 1, /** * A value of 1 requests the inclusion of raw OA unit reports as * part of stream samples. + * + * This property is available in perf revision 1. */ DRM_I915_PERF_PROP_SAMPLE_OA, /** * The value specifies which set of OA unit metrics should be * be configured, defining the contents of any OA unit reports. + * + * This property is available in perf revision 1. */ DRM_I915_PERF_PROP_OA_METRICS_SET, /** * The value specifies the size and layout of OA unit reports. + * + * This property is available in perf revision 1. */ DRM_I915_PERF_PROP_OA_FORMAT, @@ -1870,6 +1885,8 @@ enum drm_i915_perf_property_id { * from this exponent as follows: * * 80ns * 2^(period_exponent + 1) + * + * This property is available in perf revision 1. */ DRM_I915_PERF_PROP_OA_EXPONENT, @@ -1901,6 +1918,8 @@ struct drm_i915_perf_open_param { * to close and re-open a stream with the same configuration. * * It's undefined whether any pending data for the stream will be lost. + * + * This ioctl is available in perf revision 1. */ #define I915_PERF_IOCTL_ENABLE _IO('i', 0x0) @@ -1908,6 +1927,8 @@ struct drm_i915_perf_open_param { * Disable data capture for a stream. * * It is an error to try and read a stream that is disabled. + * + * This ioctl is available in perf revision 1. */ #define I915_PERF_IOCTL_DISABLE _IO('i', 0x1) From 4f6ccc74a85cbb4cdd373c374dc76398dc7603a1 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Mon, 14 Oct 2019 21:14:02 +0100 Subject: [PATCH 070/159] drm/i915: add support for perf configuration queries Listing configurations at the moment is supported only through sysfs. This might cause issues for applications wanting to list configurations from a container where sysfs isn't available. This change adds a way to query the number of configurations and their content through the i915 query uAPI. v2: Fix sparse warnings (Lionel) Add support to query configuration using uuid (Lionel) v3: Fix some inconsistency in uapi header (Lionel) Fix unlocking when not locked issue (Lionel) Add debug messages (Lionel) v4: Fix missing unlock (Dan) v5: Drop lock when copying config content to userspace (Chris) v6: Drop lock when copying config list to userspace (Chris) Fix deadlock when calling i915_perf_get_oa_config() under perf.metrics_lock (Lionel) Add i915_oa_config_get() (Chris) Link: https://gitlab.freedesktop.org/mesa/mesa/merge_requests/932 Signed-off-by: Lionel Landwerlin Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191014201404.22468-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_perf.c | 3 +- drivers/gpu/drm/i915/i915_query.c | 296 ++++++++++++++++++++++++++++++ include/uapi/drm/i915_drm.h | 62 ++++++- 3 files changed, 358 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 220e3384af30..109782ea30ad 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -3806,8 +3806,7 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, GEM_BUG_ON(*arg != oa_config->id); - sysfs_remove_group(perf->metrics_kobj, - &oa_config->sysfs_metric); + sysfs_remove_group(perf->metrics_kobj, &oa_config->sysfs_metric); idr_remove(&perf->metrics_idr, *arg); diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c index abac5042da2b..c27cfef9281c 100644 --- a/drivers/gpu/drm/i915/i915_query.c +++ b/drivers/gpu/drm/i915/i915_query.c @@ -7,6 +7,7 @@ #include #include "i915_drv.h" +#include "i915_perf.h" #include "i915_query.h" #include @@ -140,10 +141,305 @@ query_engine_info(struct drm_i915_private *i915, return len; } +static int can_copy_perf_config_registers_or_number(u32 user_n_regs, + u64 user_regs_ptr, + u32 kernel_n_regs) +{ + /* + * We'll just put the number of registers, and won't copy the + * register. + */ + if (user_n_regs == 0) + return 0; + + if (user_n_regs < kernel_n_regs) + return -EINVAL; + + if (!access_ok(u64_to_user_ptr(user_regs_ptr), + 2 * sizeof(u32) * kernel_n_regs)) + return -EFAULT; + + return 0; +} + +static int copy_perf_config_registers_or_number(const struct i915_oa_reg *kernel_regs, + u32 kernel_n_regs, + u64 user_regs_ptr, + u32 *user_n_regs) +{ + u32 r; + + if (*user_n_regs == 0) { + *user_n_regs = kernel_n_regs; + return 0; + } + + *user_n_regs = kernel_n_regs; + + for (r = 0; r < kernel_n_regs; r++) { + u32 __user *user_reg_ptr = + u64_to_user_ptr(user_regs_ptr + sizeof(u32) * r * 2); + u32 __user *user_val_ptr = + u64_to_user_ptr(user_regs_ptr + sizeof(u32) * r * 2 + + sizeof(u32)); + int ret; + + ret = __put_user(i915_mmio_reg_offset(kernel_regs[r].addr), + user_reg_ptr); + if (ret) + return -EFAULT; + + ret = __put_user(kernel_regs[r].value, user_val_ptr); + if (ret) + return -EFAULT; + } + + return 0; +} + +static int query_perf_config_data(struct drm_i915_private *i915, + struct drm_i915_query_item *query_item, + bool use_uuid) +{ + struct drm_i915_query_perf_config __user *user_query_config_ptr = + u64_to_user_ptr(query_item->data_ptr); + struct drm_i915_perf_oa_config __user *user_config_ptr = + u64_to_user_ptr(query_item->data_ptr + + sizeof(struct drm_i915_query_perf_config)); + struct drm_i915_perf_oa_config user_config; + struct i915_perf *perf = &i915->perf; + struct i915_oa_config *oa_config; + char uuid[UUID_STRING_LEN + 1]; + u64 config_id; + u32 flags, total_size; + int ret; + + if (!perf->i915) + return -ENODEV; + + total_size = + sizeof(struct drm_i915_query_perf_config) + + sizeof(struct drm_i915_perf_oa_config); + + if (query_item->length == 0) + return total_size; + + if (query_item->length < total_size) { + DRM_DEBUG("Invalid query config data item size=%u expected=%u\n", + query_item->length, total_size); + return -EINVAL; + } + + if (!access_ok(user_query_config_ptr, total_size)) + return -EFAULT; + + if (__get_user(flags, &user_query_config_ptr->flags)) + return -EFAULT; + + if (flags != 0) + return -EINVAL; + + if (use_uuid) { + struct i915_oa_config *tmp; + int id; + + BUILD_BUG_ON(sizeof(user_query_config_ptr->uuid) >= sizeof(uuid)); + + memset(&uuid, 0, sizeof(uuid)); + if (__copy_from_user(uuid, user_query_config_ptr->uuid, + sizeof(user_query_config_ptr->uuid))) + return -EFAULT; + + oa_config = NULL; + rcu_read_lock(); + idr_for_each_entry(&perf->metrics_idr, tmp, id) { + if (!strcmp(tmp->uuid, uuid)) { + oa_config = i915_oa_config_get(tmp); + break; + } + } + rcu_read_unlock(); + } else { + if (__get_user(config_id, &user_query_config_ptr->config)) + return -EFAULT; + + oa_config = i915_perf_get_oa_config(perf, config_id); + } + if (!oa_config) + return -ENOENT; + + if (__copy_from_user(&user_config, user_config_ptr, + sizeof(user_config))) { + ret = -EFAULT; + goto out; + } + + ret = can_copy_perf_config_registers_or_number(user_config.n_boolean_regs, + user_config.boolean_regs_ptr, + oa_config->b_counter_regs_len); + if (ret) + goto out; + + ret = can_copy_perf_config_registers_or_number(user_config.n_flex_regs, + user_config.flex_regs_ptr, + oa_config->flex_regs_len); + if (ret) + goto out; + + ret = can_copy_perf_config_registers_or_number(user_config.n_mux_regs, + user_config.mux_regs_ptr, + oa_config->mux_regs_len); + if (ret) + goto out; + + ret = copy_perf_config_registers_or_number(oa_config->b_counter_regs, + oa_config->b_counter_regs_len, + user_config.boolean_regs_ptr, + &user_config.n_boolean_regs); + if (ret) + goto out; + + ret = copy_perf_config_registers_or_number(oa_config->flex_regs, + oa_config->flex_regs_len, + user_config.flex_regs_ptr, + &user_config.n_flex_regs); + if (ret) + goto out; + + ret = copy_perf_config_registers_or_number(oa_config->mux_regs, + oa_config->mux_regs_len, + user_config.mux_regs_ptr, + &user_config.n_mux_regs); + if (ret) + goto out; + + memcpy(user_config.uuid, oa_config->uuid, sizeof(user_config.uuid)); + + if (__copy_to_user(user_config_ptr, &user_config, + sizeof(user_config))) { + ret = -EFAULT; + goto out; + } + + ret = total_size; + +out: + i915_oa_config_put(oa_config); + return ret; +} + +static size_t sizeof_perf_config_list(size_t count) +{ + return sizeof(struct drm_i915_query_perf_config) + sizeof(u64) * count; +} + +static size_t sizeof_perf_metrics(struct i915_perf *perf) +{ + struct i915_oa_config *tmp; + size_t i; + int id; + + i = 1; + rcu_read_lock(); + idr_for_each_entry(&perf->metrics_idr, tmp, id) + i++; + rcu_read_unlock(); + + return sizeof_perf_config_list(i); +} + +static int query_perf_config_list(struct drm_i915_private *i915, + struct drm_i915_query_item *query_item) +{ + struct drm_i915_query_perf_config __user *user_query_config_ptr = + u64_to_user_ptr(query_item->data_ptr); + struct i915_perf *perf = &i915->perf; + u64 *oa_config_ids = NULL; + int alloc, n_configs; + u32 flags; + int ret; + + if (!perf->i915) + return -ENODEV; + + if (query_item->length == 0) + return sizeof_perf_metrics(perf); + + if (get_user(flags, &user_query_config_ptr->flags)) + return -EFAULT; + + if (flags != 0) + return -EINVAL; + + n_configs = 1; + do { + struct i915_oa_config *tmp; + u64 *ids; + int id; + + ids = krealloc(oa_config_ids, + n_configs * sizeof(*oa_config_ids), + GFP_KERNEL); + if (!ids) + return -ENOMEM; + + alloc = fetch_and_zero(&n_configs); + + ids[n_configs++] = 1ull; /* reserved for test_config */ + rcu_read_lock(); + idr_for_each_entry(&perf->metrics_idr, tmp, id) { + if (n_configs < alloc) + ids[n_configs] = id; + n_configs++; + } + rcu_read_unlock(); + + oa_config_ids = ids; + } while (n_configs > alloc); + + if (query_item->length < sizeof_perf_config_list(n_configs)) { + DRM_DEBUG("Invalid query config list item size=%u expected=%zu\n", + query_item->length, + sizeof_perf_config_list(n_configs)); + kfree(oa_config_ids); + return -EINVAL; + } + + if (put_user(n_configs, &user_query_config_ptr->config)) { + kfree(oa_config_ids); + return -EFAULT; + } + + ret = copy_to_user(user_query_config_ptr + 1, + oa_config_ids, + n_configs * sizeof(*oa_config_ids)); + kfree(oa_config_ids); + if (ret) + return -EFAULT; + + return sizeof_perf_config_list(n_configs); +} + +static int query_perf_config(struct drm_i915_private *i915, + struct drm_i915_query_item *query_item) +{ + switch (query_item->flags) { + case DRM_I915_QUERY_PERF_CONFIG_LIST: + return query_perf_config_list(i915, query_item); + case DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID: + return query_perf_config_data(i915, query_item, true); + case DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_ID: + return query_perf_config_data(i915, query_item, false); + default: + return -EINVAL; + } +} + static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv, struct drm_i915_query_item *query_item) = { query_topology_info, query_engine_info, + query_perf_config, }; int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file) diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index c50c712b3771..0c7b2815fbf1 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -2005,6 +2005,7 @@ struct drm_i915_query_item { __u64 query_id; #define DRM_I915_QUERY_TOPOLOGY_INFO 1 #define DRM_I915_QUERY_ENGINE_INFO 2 +#define DRM_I915_QUERY_PERF_CONFIG 3 /* Must be kept compact -- no holes and well documented */ /* @@ -2016,9 +2017,18 @@ struct drm_i915_query_item { __s32 length; /* - * Unused for now. Must be cleared to zero. + * When query_id == DRM_I915_QUERY_TOPOLOGY_INFO, must be 0. + * + * When query_id == DRM_I915_QUERY_PERF_CONFIG, must be one of the + * following : + * - DRM_I915_QUERY_PERF_CONFIG_LIST + * - DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID + * - DRM_I915_QUERY_PERF_CONFIG_FOR_UUID */ __u32 flags; +#define DRM_I915_QUERY_PERF_CONFIG_LIST 1 +#define DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID 2 +#define DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_ID 3 /* * Data will be written at the location pointed by data_ptr when the @@ -2146,6 +2156,56 @@ struct drm_i915_query_engine_info { struct drm_i915_engine_info engines[]; }; +/* + * Data written by the kernel with query DRM_I915_QUERY_PERF_CONFIG. + */ +struct drm_i915_query_perf_config { + union { + /* + * When query_item.flags == DRM_I915_QUERY_PERF_CONFIG_LIST, i915 sets + * this fields to the number of configurations available. + */ + __u64 n_configs; + + /* + * When query_id == DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_ID, + * i915 will use the value in this field as configuration + * identifier to decide what data to write into config_ptr. + */ + __u64 config; + + /* + * When query_id == DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID, + * i915 will use the value in this field as configuration + * identifier to decide what data to write into config_ptr. + * + * String formatted like "%08x-%04x-%04x-%04x-%012x" + */ + char uuid[36]; + }; + + /* + * Unused for now. Must be cleared to zero. + */ + __u32 flags; + + /* + * When query_item.flags == DRM_I915_QUERY_PERF_CONFIG_LIST, i915 will + * write an array of __u64 of configuration identifiers. + * + * When query_item.flags == DRM_I915_QUERY_PERF_CONFIG_DATA, i915 will + * write a struct drm_i915_perf_oa_config. If the following fields of + * drm_i915_perf_oa_config are set not set to 0, i915 will write into + * the associated pointers the values of submitted when the + * configuration was created : + * + * - n_mux_regs + * - n_boolean_regs + * - n_flex_regs + */ + __u8 data[]; +}; + #if defined(__cplusplus) } #endif From 7831e9a965ea2ca91855995d62197bc8078bb762 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 14 Oct 2019 21:14:03 +0100 Subject: [PATCH 071/159] drm/i915/perf: Allow dynamic reconfiguration of the OA stream Introduce a new perf_ioctl command to change the OA configuration of the active stream. This allows the OA stream to be reconfigured between batch buffers, giving greater flexibility in sampling. We inject a request into the OA context to reconfigure the stream asynchronously on the GPU in between and ordered with execbuffer calls. Original patch for dynamic reconfiguration by Lionel Landwerlin. Link: https://gitlab.freedesktop.org/mesa/mesa/merge_requests/932 Signed-off-by: Chris Wilson Reviewed-by: Lionel Landwerlin Link: https://patchwork.freedesktop.org/patch/msgid/20191014201404.22468-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_perf.c | 46 +++++++++++++++++++++++++++++++- include/uapi/drm/i915_drm.h | 13 +++++++++ 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 109782ea30ad..372c91f6a28e 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -2862,6 +2862,40 @@ static void i915_perf_disable_locked(struct i915_perf_stream *stream) stream->ops->disable(stream); } +static long i915_perf_config_locked(struct i915_perf_stream *stream, + unsigned long metrics_set) +{ + struct i915_oa_config *config; + long ret = stream->oa_config->id; + + config = i915_perf_get_oa_config(stream->perf, metrics_set); + if (!config) + return -EINVAL; + + if (config != stream->oa_config) { + int err; + + /* + * If OA is bound to a specific context, emit the + * reconfiguration inline from that context. The update + * will then be ordered with respect to submission on that + * context. + * + * When set globally, we use a low priority kernel context, + * so it will effectively take effect when idle. + */ + err = emit_oa_config(stream, oa_context(stream)); + if (err == 0) + config = xchg(&stream->oa_config, config); + else + ret = err; + } + + i915_oa_config_put(config); + + return ret; +} + /** * i915_perf_ioctl - support ioctl() usage with i915 perf stream FDs * @stream: An i915 perf stream @@ -2885,6 +2919,8 @@ static long i915_perf_ioctl_locked(struct i915_perf_stream *stream, case I915_PERF_IOCTL_DISABLE: i915_perf_disable_locked(stream); return 0; + case I915_PERF_IOCTL_CONFIG: + return i915_perf_config_locked(stream, arg); } return -EINVAL; @@ -4023,7 +4059,15 @@ void i915_perf_fini(struct drm_i915_private *i915) */ int i915_perf_ioctl_version(void) { - return 1; + /* + * 1: Initial version + * I915_PERF_IOCTL_ENABLE + * I915_PERF_IOCTL_DISABLE + * + * 2: Added runtime modification of OA config. + * I915_PERF_IOCTL_CONFIG + */ + return 2; } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 0c7b2815fbf1..b008ce8b4e6f 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1932,6 +1932,19 @@ struct drm_i915_perf_open_param { */ #define I915_PERF_IOCTL_DISABLE _IO('i', 0x1) +/** + * Change metrics_set captured by a stream. + * + * If the stream is bound to a specific context, the configuration change + * will performed inline with that context such that it takes effect before + * the next execbuf submission. + * + * Returns the previously bound metrics set id, or a negative error code. + * + * This ioctl is available in perf revision 2. + */ +#define I915_PERF_IOCTL_CONFIG _IO('i', 0x2) + /** * Common to all i915 perf records */ From 9cd20ef7803cc53a00c6eb7198b3d870ac7b3766 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Mon, 14 Oct 2019 21:14:04 +0100 Subject: [PATCH 072/159] drm/i915/perf: allow holding preemption on filtered ctx We would like to make use of perf in Vulkan. The Vulkan API is much lower level than OpenGL, with applications directly exposed to the concept of command buffers (pretty much equivalent to our batch buffers). In Vulkan, queries are always limited in scope to a command buffer. In OpenGL, the lack of command buffer concept meant that queries' duration could span multiple command buffers. With that restriction gone in Vulkan, we would like to simplify measuring performance just by measuring the deltas between the counter snapshots written by 2 MI_RECORD_PERF_COUNT commands, rather than the more complex scheme we currently have in the GL driver, using 2 MI_RECORD_PERF_COUNT commands and doing some post processing on the stream of OA reports, coming from the global OA buffer, to remove any unrelated deltas in between the 2 MI_RECORD_PERF_COUNT. Disabling preemption only apply to a single context with which want to query performance counters for and is considered a privileged operation, by default protected by CAP_SYS_ADMIN. It is possible to enable it for a normal user by disabling the paranoid stream setting. v2: Store preemption setting in intel_context (Chris) v3: Use priorities to avoid preemption rather than the HW mechanism v4: Just modify the port priority reporting function v5: Add nopreempt flag on gem context and always flag requests appropriately, regarless of OA reconfiguration. Link: https://gitlab.freedesktop.org/mesa/mesa/merge_requests/932 Signed-off-by: Lionel Landwerlin Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191014201404.22468-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_context.h | 18 ++++++++++ .../gpu/drm/i915/gem/i915_gem_context_types.h | 1 + .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 3 ++ drivers/gpu/drm/i915/i915_perf.c | 34 +++++++++++++++++-- drivers/gpu/drm/i915/i915_perf_types.h | 8 +++++ include/uapi/drm/i915_drm.h | 11 ++++++ 6 files changed, 72 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h index 9234586830d1..cfe80590f0ed 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h @@ -114,6 +114,24 @@ i915_gem_context_clear_user_engines(struct i915_gem_context *ctx) clear_bit(CONTEXT_USER_ENGINES, &ctx->flags); } +static inline bool +i915_gem_context_nopreempt(const struct i915_gem_context *ctx) +{ + return test_bit(CONTEXT_NOPREEMPT, &ctx->flags); +} + +static inline void +i915_gem_context_set_nopreempt(struct i915_gem_context *ctx) +{ + set_bit(CONTEXT_NOPREEMPT, &ctx->flags); +} + +static inline void +i915_gem_context_clear_nopreempt(struct i915_gem_context *ctx) +{ + clear_bit(CONTEXT_NOPREEMPT, &ctx->flags); +} + static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx) { return !ctx->file_priv; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index ab8e1367dfc8..fe97b8ba4fda 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -146,6 +146,7 @@ struct i915_gem_context { #define CONTEXT_CLOSED 1 #define CONTEXT_FORCE_SINGLE_SUBMISSION 2 #define CONTEXT_USER_ENGINES 3 +#define CONTEXT_NOPREEMPT 4 struct mutex mutex; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 580c15e74a0a..98cd92320afb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -2079,6 +2079,9 @@ static int eb_submit(struct i915_execbuffer *eb) if (err) return err; + if (i915_gem_context_nopreempt(eb->gem_context)) + eb->request->flags |= I915_REQUEST_NOPREEMPT; + return 0; } diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 372c91f6a28e..54ec1c4190ac 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -344,6 +344,8 @@ static const struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = { * struct perf_open_properties - for validated properties given to open a stream * @sample_flags: `DRM_I915_PERF_PROP_SAMPLE_*` properties are tracked as flags * @single_context: Whether a single or all gpu contexts should be monitored + * @hold_preemption: Whether the preemption is disabled for the filtered + * context * @ctx_handle: A gem ctx handle for use with @single_context * @metrics_set: An ID for an OA unit metric set advertised via sysfs * @oa_format: An OA unit HW report format @@ -359,6 +361,7 @@ struct perf_open_properties { u32 sample_flags; u64 single_context:1; + u64 hold_preemption:1; u64 ctx_handle; /* OA sampling state */ @@ -2514,6 +2517,8 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, if (WARN_ON(stream->oa_buffer.format_size == 0)) return -EINVAL; + stream->hold_preemption = props->hold_preemption; + stream->oa_buffer.format = perf->oa_formats[props->oa_format].format; @@ -2834,6 +2839,9 @@ static void i915_perf_enable_locked(struct i915_perf_stream *stream) if (stream->ops->enable) stream->ops->enable(stream); + + if (stream->hold_preemption) + i915_gem_context_set_nopreempt(stream->ctx); } /** @@ -2858,6 +2866,9 @@ static void i915_perf_disable_locked(struct i915_perf_stream *stream) /* Allow stream->ops->disable() to refer to this */ stream->enabled = false; + if (stream->hold_preemption) + i915_gem_context_clear_nopreempt(stream->ctx); + if (stream->ops->disable) stream->ops->disable(stream); } @@ -3067,6 +3078,15 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf, } } + if (props->hold_preemption) { + if (!props->single_context) { + DRM_DEBUG("preemption disable with no context\n"); + ret = -EINVAL; + goto err; + } + privileged_op = true; + } + /* * On Haswell the OA unit supports clock gating off for a specific * context and in this mode there's no visibility of metrics for the @@ -3081,7 +3101,7 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf, * MI_REPORT_PERF_COUNT commands and so consider it a privileged op to * enable the OA unit by default. */ - if (IS_HASWELL(perf->i915) && specific_ctx) + if (IS_HASWELL(perf->i915) && specific_ctx && !props->hold_preemption) privileged_op = false; /* Similar to perf's kernel.perf_paranoid_cpu sysctl option @@ -3091,7 +3111,7 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf, */ if (privileged_op && i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) { - DRM_DEBUG("Insufficient privileges to open system-wide i915 perf stream\n"); + DRM_DEBUG("Insufficient privileges to open i915 perf stream\n"); ret = -EACCES; goto err_ctx; } @@ -3293,6 +3313,9 @@ static int read_properties_unlocked(struct i915_perf *perf, props->oa_periodic = true; props->oa_period_exponent = value; break; + case DRM_I915_PERF_PROP_HOLD_PREEMPTION: + props->hold_preemption = !!value; + break; case DRM_I915_PERF_PROP_MAX: MISSING_CASE(id); return -EINVAL; @@ -4066,8 +4089,13 @@ int i915_perf_ioctl_version(void) * * 2: Added runtime modification of OA config. * I915_PERF_IOCTL_CONFIG + * + * 3: Add DRM_I915_PERF_PROP_HOLD_PREEMPTION parameter to hold + * preemption on a particular context so that performance data is + * accessible from a delta of MI_RPC reports without looking at the + * OA buffer. */ - return 2; + return 3; } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h index d35a3c1946c3..a1f733fc905a 100644 --- a/drivers/gpu/drm/i915/i915_perf_types.h +++ b/drivers/gpu/drm/i915/i915_perf_types.h @@ -170,6 +170,14 @@ struct i915_perf_stream { */ bool enabled; + /** + * @hold_preemption: Whether preemption is put on hold for command + * submissions done on the @ctx. This is useful for some drivers that + * cannot easily post process the OA buffer context to subtract delta + * of performance counters not associated with @ctx. + */ + bool hold_preemption; + /** * @ops: The callbacks providing the implementation of this specific * type of configured stream. diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index b008ce8b4e6f..63d40cba97e0 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1890,6 +1890,17 @@ enum drm_i915_perf_property_id { */ DRM_I915_PERF_PROP_OA_EXPONENT, + /** + * Specifying this property is only valid when specify a context to + * filter with DRM_I915_PERF_PROP_CTX_HANDLE. Specifying this property + * will hold preemption of the particular context we want to gather + * performance data about. The execbuf2 submissions must include a + * drm_i915_gem_execbuffer_ext_perf parameter for this to apply. + * + * This property is available in perf revision 3. + */ + DRM_I915_PERF_PROP_HOLD_PREEMPTION, + DRM_I915_PERF_PROP_MAX /* non-ABI */ }; From 56184a20a80037c39fccc78b683cf593a2db4c5f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 15 Oct 2019 10:39:15 +0100 Subject: [PATCH 073/159] drm/i915: Drop obj.page_pin_count after a failed vma->set_pages() Before we attempt to set_pages on the vma, we claim a obj.pages_pin_count for it. If we subsequently fail to set the pages on the vma, we need to drop our pinning before returning the error. Reported-by: Matthew Auld Signed-off-by: Chris Wilson Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20191015093915.3995-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_vma.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 9fdcd4e2c799..7d936254bde7 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -802,8 +802,11 @@ static int vma_get_pages(struct i915_vma *vma) } err = vma->ops->set_pages(vma); - if (err) + if (err) { + if (vma->obj) + i915_gem_object_unpin_pages(vma->obj); goto unlock; + } } atomic_inc(&vma->pages_count); From 454a325a9768aa9202843f2a4aea7bef94495d94 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 15 Oct 2019 11:01:55 +0100 Subject: [PATCH 074/159] drm/i915: Remove leftover vma->obj->pages_pin_count on insert/remove We now do the page pin count upfront in vma_get_pages/vma_put_pages, so that we do the allocations before we enter the vm->mutex. Our vma page references we are tracked in vma->pages_count and the extra obj->pages_pin_count being performed later in i915_vma_insert and i915_vma_remove is redundant, and worse throws off the shrinker's logic on when it can free an object by unbinding it. Reported-by: Daniele Ceraolo Spurio Reported-by: Matthew Auld Signed-off-by: Chris Wilson Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20191015100155.10376-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_vma.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 7d936254bde7..e90c4d0af8fd 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -703,7 +703,6 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) list_add_tail(&vma->vm_link, &vma->vm->bound_list); if (vma->obj) { - atomic_inc(&vma->obj->mm.pages_pin_count); atomic_inc(&vma->obj->bind_count); assert_bind_count(vma->obj); } @@ -726,14 +725,12 @@ i915_vma_remove(struct i915_vma *vma) if (vma->obj) { struct drm_i915_gem_object *obj = vma->obj; - atomic_dec(&obj->bind_count); - /* * And finally now the object is completely decoupled from this * vma, we can drop its hold on the backing storage and allow * it to be reaped by the shrinker. */ - i915_gem_object_unpin_pages(obj); + atomic_dec(&obj->bind_count); assert_bind_count(obj); } From 8b390c15818ccf34a4e44e18ed590cb06bbbcb88 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 15 Oct 2019 10:32:04 +0100 Subject: [PATCH 075/159] drm/i915/execlists: Clear semaphore immediately upon ELSP promotion There is no significance to our delay before clearing the semaphore the engine is waiting on, so release it as soon as we acknowledge the CS update following our preemption request. This should allow the GPU to resume work earlier, if it was stuck on the semaphore at the end of a request. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191015093204.25693-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 26b6b699193b..1e9e2cc168f7 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1910,6 +1910,9 @@ static void process_csb(struct intel_engine_cs *engine) else promote = gen8_csb_parse(execlists, buf + 2 * head); if (promote) { + if (!inject_preempt_hang(execlists)) + ring_set_paused(engine, 0); + /* cancel old inflight, prepare for switch */ trace_ports(execlists, "preempted", execlists->active); while (*execlists->active) @@ -1926,9 +1929,6 @@ static void process_csb(struct intel_engine_cs *engine) if (enable_timeslice(execlists)) mod_timer(&execlists->timer, jiffies + 1); - if (!inject_preempt_hang(execlists)) - ring_set_paused(engine, 0); - WRITE_ONCE(execlists->pending[0], NULL); } else { GEM_BUG_ON(!*execlists->active); From bb71fb0072f14611f6c5bb5a1a5bf8ac942fa1c3 Mon Sep 17 00:00:00 2001 From: Gwan-gyeong Mun Date: Thu, 19 Sep 2019 22:53:04 +0300 Subject: [PATCH 076/159] drm/i915/dp: Extend program of VSC Header and DB for Colorimetry Format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It refactors and renames a function which handled vsc sdp header and data block setup for supporting colorimetry format. Function intel_dp_setup_vsc_sdp handles vsc sdp header and data block setup for pixel encoding / colorimetry format. In order to use colorspace information of a connector, it adds an argument of drm_connector_state type. Setup VSC header and data block in function intel_dp_setup_vsc_sdp for pixel encoding / colorimetry format as per dp 1.4a spec, section 2.2.5.7.1, table 2-119: VSC SDP Header Bytes, section 2.2.5.7.5, table 2-120: VSC SDP Payload for DB16 through DB18. Signed-off-by: Gwan-gyeong Mun Reviewed-by: Uma Shankar Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190919195311.13972-2-gwan-gyeong.mun@intel.com --- drivers/gpu/drm/i915/display/intel_ddi.c | 2 +- drivers/gpu/drm/i915/display/intel_display.h | 2 - drivers/gpu/drm/i915/display/intel_dp.c | 68 ++++++++++++++++---- drivers/gpu/drm/i915/display/intel_dp.h | 3 + 4 files changed, 60 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 6c1315c7bcde..079dae20deb5 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -3851,7 +3851,7 @@ static void intel_enable_ddi_dp(struct intel_encoder *encoder, intel_edp_backlight_on(crtc_state, conn_state); intel_psr_enable(intel_dp, crtc_state); - intel_dp_ycbcr_420_enable(intel_dp, crtc_state); + intel_dp_vsc_enable(intel_dp, crtc_state, conn_state); intel_edp_drrs_enable(intel_dp, crtc_state); if (crtc_state->has_audio) diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index bc2cf4bec0e8..bed203ec1df8 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -539,8 +539,6 @@ void intel_dp_get_m_n(struct intel_crtc *crtc, struct intel_crtc_state *pipe_config); void intel_dp_set_m_n(const struct intel_crtc_state *crtc_state, enum link_m_n_set m_n); -void intel_dp_ycbcr_420_enable(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state); int intel_dotclock_calculate(int link_freq, const struct intel_link_m_n *m_n); bool bxt_find_best_dpll(struct intel_crtc_state *crtc_state, struct dpll *best_clock); diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 6594f2af1257..7aa027881ff3 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -4474,8 +4474,9 @@ intel_dp_get_sink_irq_esi(struct intel_dp *intel_dp, u8 *sink_irq_vector) } static void -intel_pixel_encoding_setup_vsc(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state) +intel_dp_setup_vsc_sdp(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); struct dp_sdp vsc_sdp = {}; @@ -4496,13 +4497,55 @@ intel_pixel_encoding_setup_vsc(struct intel_dp *intel_dp, */ vsc_sdp.sdp_header.HB3 = 0x13; - /* - * YCbCr 420 = 3h DB16[7:4] ITU-R BT.601 = 0h, ITU-R BT.709 = 1h - * DB16[3:0] DP 1.4a spec, Table 2-120 - */ - vsc_sdp.db[16] = 0x3 << 4; /* 0x3 << 4 , YCbCr 420*/ - /* RGB->YCBCR color conversion uses the BT.709 color space. */ - vsc_sdp.db[16] |= 0x1; /* 0x1, ITU-R BT.709 */ + /* DP 1.4a spec, Table 2-120 */ + switch (crtc_state->output_format) { + case INTEL_OUTPUT_FORMAT_YCBCR444: + vsc_sdp.db[16] = 0x1 << 4; /* YCbCr 444 : DB16[7:4] = 1h */ + break; + case INTEL_OUTPUT_FORMAT_YCBCR420: + vsc_sdp.db[16] = 0x3 << 4; /* YCbCr 420 : DB16[7:4] = 3h */ + break; + case INTEL_OUTPUT_FORMAT_RGB: + default: + /* RGB: DB16[7:4] = 0h */ + break; + } + + switch (conn_state->colorspace) { + case DRM_MODE_COLORIMETRY_BT709_YCC: + vsc_sdp.db[16] |= 0x1; + break; + case DRM_MODE_COLORIMETRY_XVYCC_601: + vsc_sdp.db[16] |= 0x2; + break; + case DRM_MODE_COLORIMETRY_XVYCC_709: + vsc_sdp.db[16] |= 0x3; + break; + case DRM_MODE_COLORIMETRY_SYCC_601: + vsc_sdp.db[16] |= 0x4; + break; + case DRM_MODE_COLORIMETRY_OPYCC_601: + vsc_sdp.db[16] |= 0x5; + break; + case DRM_MODE_COLORIMETRY_BT2020_CYCC: + case DRM_MODE_COLORIMETRY_BT2020_RGB: + vsc_sdp.db[16] |= 0x6; + break; + case DRM_MODE_COLORIMETRY_BT2020_YCC: + vsc_sdp.db[16] |= 0x7; + break; + case DRM_MODE_COLORIMETRY_DCI_P3_RGB_D65: + case DRM_MODE_COLORIMETRY_DCI_P3_RGB_THEATER: + vsc_sdp.db[16] |= 0x4; /* DCI-P3 (SMPTE RP 431-2) */ + break; + default: + /* sRGB (IEC 61966-2-1) / ITU-R BT.601: DB16[0:3] = 0h */ + + /* RGB->YCBCR color conversion uses the BT.709 color space. */ + if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420) + vsc_sdp.db[16] |= 0x1; /* 0x1, ITU-R BT.709 */ + break; + } /* * For pixel encoding formats YCbCr444, YCbCr422, YCbCr420, and Y Only, @@ -4554,13 +4597,14 @@ intel_pixel_encoding_setup_vsc(struct intel_dp *intel_dp, crtc_state, DP_SDP_VSC, &vsc_sdp, sizeof(vsc_sdp)); } -void intel_dp_ycbcr_420_enable(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state) +void intel_dp_vsc_enable(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { if (crtc_state->output_format != INTEL_OUTPUT_FORMAT_YCBCR420) return; - intel_pixel_encoding_setup_vsc(intel_dp, crtc_state); + intel_dp_setup_vsc_sdp(intel_dp, crtc_state, conn_state); } static u8 intel_dp_autotest_link_training(struct intel_dp *intel_dp) diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h index a194b5b6da05..ce2b87f51a30 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.h +++ b/drivers/gpu/drm/i915/display/intel_dp.h @@ -108,6 +108,9 @@ bool intel_dp_read_dpcd(struct intel_dp *intel_dp); bool intel_dp_get_colorimetry_status(struct intel_dp *intel_dp); int intel_dp_link_required(int pixel_clock, int bpp); int intel_dp_max_data_rate(int max_link_clock, int max_lanes); +void intel_dp_vsc_enable(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state); bool intel_digital_port_connected(struct intel_encoder *encoder); static inline unsigned int intel_dp_unused_lane_mask(int lane_count) From 0c06fa156006c746bbde36961119143596dd1425 Mon Sep 17 00:00:00 2001 From: Gwan-gyeong Mun Date: Thu, 19 Sep 2019 22:53:05 +0300 Subject: [PATCH 077/159] drm/i915/dp: Add support of BT.2020 Colorimetry to DP MSA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When BT.2020 Colorimetry output is used for DP, we should program BT.2020 Colorimetry to MSA and VSC SDP. In order to handle colorspace of drm_connector_state, it moves a calling of intel_ddi_set_pipe_settings() function into intel_ddi_pre_enable_dp(). And it also rename intel_ddi_set_pipe_settings() to intel_ddi_set_dp_msa(). As per DP 1.4a spec section 2.2.4 [MSA Data Transport] The MSA data that the DP Source device transports for reproducing the main video stream. Attribute data is sent once per frame during the main video stream’s vertical blanking period. In order to distinguish needed colorimetry for VSC SDP, it adds intel_dp_needs_vsc_sdp function. If the output colorspace requires vsc sdp or output format is YCbCr 4:2:0, it uses MSA with VSC SDP. As per DP 1.4a spec section 2.2.4.3 [MSA Field for Indication of Color Encoding Format and Content Color Gamut] while sending BT.2020 Colorimetry signals we should program MSA MISC1 fields which indicate VSC SDP for the Pixel Encoding/Colorimetry Format. v2: Remove useless parentheses v3: Addressed review comments from Ville - In order to checking output format and output colorspace on intel_dp_needs_vsc_sdp(), it passes entire intel_crtc_state struct value. - Remove a pointless variable. v9: Addressed review comments from Ville - Remove a duplicated output color space from intel_crtc_state. - In order to handle colorspace of drm_connector_state, it moves a calling of intel_ddi_set_pipe_settings() function into intel_ddi_pre_enable_dp(). Signed-off-by: Gwan-gyeong Mun Reviewed-by: Uma Shankar Link: https://patchwork.freedesktop.org/patch/msgid/20190919195311.13972-3-gwan-gyeong.mun@intel.com Signed-off-by: Ville Syrjälä --- drivers/gpu/drm/i915/display/intel_ddi.c | 14 ++++++---- drivers/gpu/drm/i915/display/intel_ddi.h | 3 +- drivers/gpu/drm/i915/display/intel_display.c | 1 - drivers/gpu/drm/i915/display/intel_dp.c | 29 +++++++++++++++++++- drivers/gpu/drm/i915/display/intel_dp.h | 2 ++ 5 files changed, 41 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 079dae20deb5..a8555e396b2a 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -1741,7 +1741,8 @@ static void intel_ddi_clock_get(struct intel_encoder *encoder, hsw_ddi_clock_get(encoder, pipe_config); } -void intel_ddi_set_pipe_settings(const struct intel_crtc_state *crtc_state) +void intel_ddi_set_dp_msa(const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); @@ -1792,11 +1793,12 @@ void intel_ddi_set_pipe_settings(const struct intel_crtc_state *crtc_state) /* * As per DP 1.4a spec section 2.2.4.3 [MSA Field for Indication * of Color Encoding Format and Content Color Gamut] while sending - * YCBCR 420 signals we should program MSA MISC1 fields which - * indicate VSC SDP for the Pixel Encoding/Colorimetry Format. + * YCBCR 420, HDR BT.2020 signals we should program MSA MISC1 fields + * which indicate VSC SDP for the Pixel Encoding/Colorimetry Format. */ - if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420) + if (intel_dp_needs_vsc_sdp(crtc_state, conn_state)) temp |= TRANS_MSA_USE_VSC_SDP; + I915_WRITE(TRANS_MSA_MISC(cpu_transcoder), temp); } @@ -3594,6 +3596,8 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, tgl_ddi_pre_enable_dp(encoder, crtc_state, conn_state); else hsw_ddi_pre_enable_dp(encoder, crtc_state, conn_state); + + intel_ddi_set_dp_msa(crtc_state, conn_state); } static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder, @@ -4009,7 +4013,7 @@ static void intel_ddi_update_pipe_dp(struct intel_encoder *encoder, { struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); - intel_ddi_set_pipe_settings(crtc_state); + intel_ddi_set_dp_msa(crtc_state, conn_state); intel_psr_update(intel_dp, crtc_state); intel_edp_drrs_enable(intel_dp, crtc_state); diff --git a/drivers/gpu/drm/i915/display/intel_ddi.h b/drivers/gpu/drm/i915/display/intel_ddi.h index a08365da2643..19aeab1246ee 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.h +++ b/drivers/gpu/drm/i915/display/intel_ddi.h @@ -30,7 +30,8 @@ void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state) void intel_ddi_disable_transcoder_func(const struct intel_crtc_state *crtc_state); void intel_ddi_enable_pipe_clock(const struct intel_crtc_state *crtc_state); void intel_ddi_disable_pipe_clock(const struct intel_crtc_state *crtc_state); -void intel_ddi_set_pipe_settings(const struct intel_crtc_state *crtc_state); +void intel_ddi_set_dp_msa(const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state); bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector); void intel_ddi_get_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config); diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 3cf39fc153b3..5d77f5bae0d9 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -6468,7 +6468,6 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, if (INTEL_GEN(dev_priv) >= 11) icl_set_pipe_chicken(intel_crtc); - intel_ddi_set_pipe_settings(pipe_config); if (!transcoder_is_dsi(cpu_transcoder)) intel_ddi_enable_transcoder_func(pipe_config); diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 7aa027881ff3..d8dd7035ca46 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -2297,6 +2297,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, pipe_config->has_pch_encoder = true; pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB; + if (lspcon->active) lspcon_ycbcr420_config(&intel_connector->base, pipe_config); else @@ -4473,6 +4474,32 @@ intel_dp_get_sink_irq_esi(struct intel_dp *intel_dp, u8 *sink_irq_vector) DP_DPRX_ESI_LEN; } +bool +intel_dp_needs_vsc_sdp(const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) +{ + /* + * As per DP 1.4a spec section 2.2.4.3 [MSA Field for Indication + * of Color Encoding Format and Content Color Gamut], in order to + * sending YCBCR 420 or HDR BT.2020 signals we should use DP VSC SDP. + */ + if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420) + return true; + + switch (conn_state->colorspace) { + case DRM_MODE_COLORIMETRY_SYCC_601: + case DRM_MODE_COLORIMETRY_OPYCC_601: + case DRM_MODE_COLORIMETRY_BT2020_YCC: + case DRM_MODE_COLORIMETRY_BT2020_RGB: + case DRM_MODE_COLORIMETRY_BT2020_CYCC: + return true; + default: + break; + } + + return false; +} + static void intel_dp_setup_vsc_sdp(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state, @@ -4601,7 +4628,7 @@ void intel_dp_vsc_enable(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) { - if (crtc_state->output_format != INTEL_OUTPUT_FORMAT_YCBCR420) + if (!intel_dp_needs_vsc_sdp(crtc_state, conn_state)) return; intel_dp_setup_vsc_sdp(intel_dp, crtc_state, conn_state); diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h index ce2b87f51a30..b05b9f62347f 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.h +++ b/drivers/gpu/drm/i915/display/intel_dp.h @@ -108,6 +108,8 @@ bool intel_dp_read_dpcd(struct intel_dp *intel_dp); bool intel_dp_get_colorimetry_status(struct intel_dp *intel_dp); int intel_dp_link_required(int pixel_clock, int bpp); int intel_dp_max_data_rate(int max_link_clock, int max_lanes); +bool intel_dp_needs_vsc_sdp(const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state); void intel_dp_vsc_enable(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state); From 9d1bb6f0222c425235ddb592e54c778c7b67b2fc Mon Sep 17 00:00:00 2001 From: Gwan-gyeong Mun Date: Thu, 19 Sep 2019 22:53:08 +0300 Subject: [PATCH 078/159] drm/i915/dp: Attach colorspace property MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It attaches the colorspace connector property to a DisplayPort connector. Based on colorspace change, modeset will be triggered to switch to a new colorspace. And in order to distinguish colorspace bwtween DP and HDMI connector, it adds a handling of drm_mode_create_dp_colorspace_property() to intel_attach_colorspace_property(). Based on colorspace property value create a VSC SDP packet with appropriate colorspace. This would help to enable wider color gamut like BT2020 on a sink device. v9: Addressed review comments from Ville - Add a handling of drm_mode_create_dp_colorspace_property() to intel_attach_colorspace_property(). This hunk moved from the previous commit. Signed-off-by: Gwan-gyeong Mun Reviewed-by: Uma Shankar Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190919195311.13972-6-gwan-gyeong.mun@intel.com --- .../gpu/drm/i915/display/intel_connector.c | 21 ++++++++++++++++--- drivers/gpu/drm/i915/display/intel_dp.c | 2 ++ 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_connector.c b/drivers/gpu/drm/i915/display/intel_connector.c index ba2ef165a01a..1133c4e97bb4 100644 --- a/drivers/gpu/drm/i915/display/intel_connector.c +++ b/drivers/gpu/drm/i915/display/intel_connector.c @@ -277,7 +277,22 @@ intel_attach_aspect_ratio_property(struct drm_connector *connector) void intel_attach_colorspace_property(struct drm_connector *connector) { - if (!drm_mode_create_hdmi_colorspace_property(connector)) - drm_object_attach_property(&connector->base, - connector->colorspace_property, 0); + switch (connector->connector_type) { + case DRM_MODE_CONNECTOR_HDMIA: + case DRM_MODE_CONNECTOR_HDMIB: + if (drm_mode_create_hdmi_colorspace_property(connector)) + return; + break; + case DRM_MODE_CONNECTOR_DisplayPort: + case DRM_MODE_CONNECTOR_eDP: + if (drm_mode_create_dp_colorspace_property(connector)) + return; + break; + default: + DRM_DEBUG_KMS("Colorspace property not supported\n"); + return; + } + + drm_object_attach_property(&connector->base, + connector->colorspace_property, 0); } diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index d8dd7035ca46..4bc4afe96ba9 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -6476,6 +6476,8 @@ intel_dp_add_properties(struct intel_dp *intel_dp, struct drm_connector *connect else if (INTEL_GEN(dev_priv) >= 5) drm_connector_attach_max_bpc_property(connector, 6, 12); + intel_attach_colorspace_property(connector); + if (intel_dp_is_edp(intel_dp)) { u32 allowed_scalers; From 922430dd4022eb7afef29c0f98972265a0053b8d Mon Sep 17 00:00:00 2001 From: Gwan-gyeong Mun Date: Thu, 19 Sep 2019 22:53:09 +0300 Subject: [PATCH 079/159] drm/i915: Add new GMP register size for GEN11 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to Bspec, GEN11 and prior GEN11 have different register size for HDR Metadata Infoframe SDP packet. It adds new VIDEO_DIP_GMP_DATA_SIZE for GEN11. And it makes handle different register size for HDMI_PACKET_TYPE_GAMUT_METADATA on hsw_dip_data_size() for each GEN platforms. It addresses Uma's review comments. v9: Add WARN_ON() when buffer size if larger than register size. [Ville] Signed-off-by: Gwan-gyeong Mun Reviewed-by: Uma Shankar Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190919195311.13972-7-gwan-gyeong.mun@intel.com --- drivers/gpu/drm/i915/display/intel_hdmi.c | 12 ++++++++++-- drivers/gpu/drm/i915/i915_reg.h | 1 + 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index 0a6846c5ba95..92d1cbbbee2b 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -189,13 +189,19 @@ hsw_dip_data_reg(struct drm_i915_private *dev_priv, } } -static int hsw_dip_data_size(unsigned int type) +static int hsw_dip_data_size(struct drm_i915_private *dev_priv, + unsigned int type) { switch (type) { case DP_SDP_VSC: return VIDEO_DIP_VSC_DATA_SIZE; case DP_SDP_PPS: return VIDEO_DIP_PPS_DATA_SIZE; + case HDMI_PACKET_TYPE_GAMUT_METADATA: + if (INTEL_GEN(dev_priv) >= 11) + return VIDEO_DIP_GMP_DATA_SIZE; + else + return VIDEO_DIP_DATA_SIZE; default: return VIDEO_DIP_DATA_SIZE; } @@ -514,7 +520,9 @@ static void hsw_write_infoframe(struct intel_encoder *encoder, int i; u32 val = I915_READ(ctl_reg); - data_size = hsw_dip_data_size(type); + data_size = hsw_dip_data_size(dev_priv, type); + + WARN_ON(len > data_size); val &= ~hsw_infoframe_enable(type); I915_WRITE(ctl_reg, val); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index e24991e54897..d741d34e1895 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4692,6 +4692,7 @@ enum { * (Haswell and newer) to see which VIDEO_DIP_DATA byte corresponds to each byte * of the infoframe structure specified by CEA-861. */ #define VIDEO_DIP_DATA_SIZE 32 +#define VIDEO_DIP_GMP_DATA_SIZE 36 #define VIDEO_DIP_VSC_DATA_SIZE 36 #define VIDEO_DIP_PPS_DATA_SIZE 132 #define VIDEO_DIP_CTL _MMIO(0x61170) From b246cf215e4c62a0baa4dfc35510198f228dc951 Mon Sep 17 00:00:00 2001 From: Gwan-gyeong Mun Date: Thu, 19 Sep 2019 22:53:10 +0300 Subject: [PATCH 080/159] drm/i915/dp: Program an Infoframe SDP Header and DB for HDR Static Metadata MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Function intel_dp_setup_hdr_metadata_infoframe_sdp handles Infoframe SDP header and data block setup for HDR Static Metadata. It enables writing of HDR metadata infoframe SDP to panel. Support for HDR video was introduced in DisplayPort 1.4. It implements the CTA-861-G standard for transport of static HDR metadata. The HDR Metadata will be provided by userspace compositors, based on blending policies and passed to the driver through a blob property. Because each of GEN11 and prior GEN11 have different register size for HDR Metadata Infoframe SDP packet, it adds and uses different register size. Setup Infoframe SDP header and data block in function intel_dp_setup_hdr_metadata_infoframe_sdp for HDR Static Metadata as per dp 1.4 spec and CTA-861-F spec. As per DP 1.4 spec, 2.2.2.5 SDP Formats. It enables Dynamic Range and Mastering Infoframe for HDR content, which is defined in CTA-861-F spec. According to DP 1.4 spec and CEA-861-F spec Table 5, in order to transmit static HDR metadata, we have to use Non-audio INFOFRAME SDP v1.3. +--------------------------------+-------------------------------+ | [ Packet Type Value ] | [ Packet Type ] | +--------------------------------+-------------------------------+ | 80h + Non-audio INFOFRAME Type | CEA-861-F Non-audio INFOFRAME | +--------------------------------+-------------------------------+ | [Transmission Timing] | +----------------------------------------------------------------+ | As per CEA-861-F for INFOFRAME, including CEA-861.3 within | | which Dynamic Range and Mastering INFOFRAME are defined | +----------------------------------------------------------------+ v2: Add a missed blank line after function declaration. v3: Remove not handled return values from intel_dp_setup_hdr_metadata_infoframe_sdp(). [Uma] v9: Addressed review comments from Ville. - Add BUILD_BUG_ON to check a changing of struct dp_sdp size. - Change a passed size toward write_infoframe() for DP infoframe sdp packet for HDR static metadata. Signed-off-by: Gwan-gyeong Mun Reviewed-by: Uma Shankar Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190919195311.13972-8-gwan-gyeong.mun@intel.com --- drivers/gpu/drm/i915/display/intel_ddi.c | 1 + drivers/gpu/drm/i915/display/intel_dp.c | 92 ++++++++++++++++++++++++ drivers/gpu/drm/i915/display/intel_dp.h | 3 + 3 files changed, 96 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index a8555e396b2a..e9fca3b2f6ec 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -3856,6 +3856,7 @@ static void intel_enable_ddi_dp(struct intel_encoder *encoder, intel_edp_backlight_on(crtc_state, conn_state); intel_psr_enable(intel_dp, crtc_state); intel_dp_vsc_enable(intel_dp, crtc_state, conn_state); + intel_dp_hdr_metadata_enable(intel_dp, crtc_state, conn_state); intel_edp_drrs_enable(intel_dp, crtc_state); if (crtc_state->has_audio) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 4bc4afe96ba9..edb95a4cdb21 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -4624,6 +4624,86 @@ intel_dp_setup_vsc_sdp(struct intel_dp *intel_dp, crtc_state, DP_SDP_VSC, &vsc_sdp, sizeof(vsc_sdp)); } +static void +intel_dp_setup_hdr_metadata_infoframe_sdp(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) +{ + struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); + struct dp_sdp infoframe_sdp = {}; + struct hdmi_drm_infoframe drm_infoframe = {}; + const int infoframe_size = HDMI_INFOFRAME_HEADER_SIZE + HDMI_DRM_INFOFRAME_SIZE; + unsigned char buf[HDMI_INFOFRAME_HEADER_SIZE + HDMI_DRM_INFOFRAME_SIZE]; + ssize_t len; + int ret; + + ret = drm_hdmi_infoframe_set_hdr_metadata(&drm_infoframe, conn_state); + if (ret) { + DRM_DEBUG_KMS("couldn't set HDR metadata in infoframe\n"); + return; + } + + len = hdmi_drm_infoframe_pack_only(&drm_infoframe, buf, sizeof(buf)); + if (len < 0) { + DRM_DEBUG_KMS("buffer size is smaller than hdr metadata infoframe\n"); + return; + } + + if (len != infoframe_size) { + DRM_DEBUG_KMS("wrong static hdr metadata size\n"); + return; + } + + /* + * Set up the infoframe sdp packet for HDR static metadata. + * Prepare VSC Header for SU as per DP 1.4a spec, + * Table 2-100 and Table 2-101 + */ + + /* Packet ID, 00h for non-Audio INFOFRAME */ + infoframe_sdp.sdp_header.HB0 = 0; + /* + * Packet Type 80h + Non-audio INFOFRAME Type value + * HDMI_INFOFRAME_TYPE_DRM: 0x87, + */ + infoframe_sdp.sdp_header.HB1 = drm_infoframe.type; + /* + * Least Significant Eight Bits of (Data Byte Count – 1) + * infoframe_size - 1, + */ + infoframe_sdp.sdp_header.HB2 = 0x1D; + /* INFOFRAME SDP Version Number */ + infoframe_sdp.sdp_header.HB3 = (0x13 << 2); + /* CTA Header Byte 2 (INFOFRAME Version Number) */ + infoframe_sdp.db[0] = drm_infoframe.version; + /* CTA Header Byte 3 (Length of INFOFRAME): HDMI_DRM_INFOFRAME_SIZE */ + infoframe_sdp.db[1] = drm_infoframe.length; + /* + * Copy HDMI_DRM_INFOFRAME_SIZE size from a buffer after + * HDMI_INFOFRAME_HEADER_SIZE + */ + BUILD_BUG_ON(sizeof(infoframe_sdp.db) < HDMI_DRM_INFOFRAME_SIZE + 2); + memcpy(&infoframe_sdp.db[2], &buf[HDMI_INFOFRAME_HEADER_SIZE], + HDMI_DRM_INFOFRAME_SIZE); + + /* + * Size of DP infoframe sdp packet for HDR static metadata is consist of + * - DP SDP Header(struct dp_sdp_header): 4 bytes + * - Two Data Blocks: 2 bytes + * CTA Header Byte2 (INFOFRAME Version Number) + * CTA Header Byte3 (Length of INFOFRAME) + * - HDMI_DRM_INFOFRAME_SIZE: 26 bytes + * + * Prior to GEN11's GMP register size is identical to DP HDR static metadata + * infoframe size. But GEN11+ has larger than that size, write_infoframe + * will pad rest of the size. + */ + intel_dig_port->write_infoframe(&intel_dig_port->base, crtc_state, + HDMI_PACKET_TYPE_GAMUT_METADATA, + &infoframe_sdp, + sizeof(struct dp_sdp_header) + 2 + HDMI_DRM_INFOFRAME_SIZE); +} + void intel_dp_vsc_enable(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) @@ -4634,6 +4714,18 @@ void intel_dp_vsc_enable(struct intel_dp *intel_dp, intel_dp_setup_vsc_sdp(intel_dp, crtc_state, conn_state); } +void intel_dp_hdr_metadata_enable(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) +{ + if (!conn_state->hdr_output_metadata) + return; + + intel_dp_setup_hdr_metadata_infoframe_sdp(intel_dp, + crtc_state, + conn_state); +} + static u8 intel_dp_autotest_link_training(struct intel_dp *intel_dp) { int status = 0; diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h index b05b9f62347f..3da166054788 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.h +++ b/drivers/gpu/drm/i915/display/intel_dp.h @@ -113,6 +113,9 @@ bool intel_dp_needs_vsc_sdp(const struct intel_crtc_state *crtc_state, void intel_dp_vsc_enable(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state); +void intel_dp_hdr_metadata_enable(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state); bool intel_digital_port_connected(struct intel_encoder *encoder); static inline unsigned int intel_dp_unused_lane_mask(int lane_count) From 0299dfa7ad54f95ae9502fc425184f865d326d3b Mon Sep 17 00:00:00 2001 From: Gwan-gyeong Mun Date: Thu, 19 Sep 2019 22:53:11 +0300 Subject: [PATCH 081/159] drm/i915/dp: Attach HDR metadata property to DP connector MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It attaches HDR metadata property to DP connector on GLK+. It enables HDR metadata infoframe sdp on GLK+ to be used to send HDR metadata to DP sink. v2: Minor style fix Signed-off-by: Gwan-gyeong Mun Reviewed-by: Uma Shankar Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190919195311.13972-9-gwan-gyeong.mun@intel.com --- drivers/gpu/drm/i915/display/intel_dp.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index edb95a4cdb21..dd5dc1e38495 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -6570,6 +6570,11 @@ intel_dp_add_properties(struct intel_dp *intel_dp, struct drm_connector *connect intel_attach_colorspace_property(connector); + if (IS_GEMINILAKE(dev_priv) || INTEL_GEN(dev_priv) >= 11) + drm_object_attach_property(&connector->base, + connector->dev->mode_config.hdr_output_metadata_property, + 0); + if (intel_dp_is_edp(intel_dp)) { u32 allowed_scalers; From 3e706dff0891f56844166597aa4d37c43700e2f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 18 Jul 2019 17:50:47 +0300 Subject: [PATCH 082/159] drm/i915: Switch to using DP_MSA_MISC_* defines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that we have standard defines for the MSA MISC bits lets use them on HSW+ where we program these directly into the TRANS_MSA_MISC register. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190718145053.25808-7-ville.syrjala@linux.intel.com Reviewed-by: Gwan-gyeong Mun --- drivers/gpu/drm/i915/display/intel_ddi.c | 17 ++++++++--------- drivers/gpu/drm/i915/i915_reg.h | 13 +------------ 2 files changed, 9 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index e9fca3b2f6ec..80f8e2698be0 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -1754,20 +1754,20 @@ void intel_ddi_set_dp_msa(const struct intel_crtc_state *crtc_state, WARN_ON(transcoder_is_dsi(cpu_transcoder)); - temp = TRANS_MSA_SYNC_CLK; + temp = DP_MSA_MISC_SYNC_CLOCK; switch (crtc_state->pipe_bpp) { case 18: - temp |= TRANS_MSA_6_BPC; + temp |= DP_MSA_MISC_6_BPC; break; case 24: - temp |= TRANS_MSA_8_BPC; + temp |= DP_MSA_MISC_8_BPC; break; case 30: - temp |= TRANS_MSA_10_BPC; + temp |= DP_MSA_MISC_10_BPC; break; case 36: - temp |= TRANS_MSA_12_BPC; + temp |= DP_MSA_MISC_12_BPC; break; default: MISSING_CASE(crtc_state->pipe_bpp); @@ -1779,7 +1779,7 @@ void intel_ddi_set_dp_msa(const struct intel_crtc_state *crtc_state, crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB); if (crtc_state->limited_color_range) - temp |= TRANS_MSA_CEA_RANGE; + temp |= DP_MSA_MISC_COLOR_CEA_RGB; /* * As per DP 1.2 spec section 2.3.4.3 while sending @@ -1787,8 +1787,7 @@ void intel_ddi_set_dp_msa(const struct intel_crtc_state *crtc_state, * colorspace information. */ if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR444) - temp |= TRANS_MSA_SAMPLING_444 | TRANS_MSA_CLRSP_YCBCR | - TRANS_MSA_YCBCR_BT709; + temp |= DP_MSA_MISC_COLOR_YCBCR_444_BT709; /* * As per DP 1.4a spec section 2.2.4.3 [MSA Field for Indication @@ -1797,7 +1796,7 @@ void intel_ddi_set_dp_msa(const struct intel_crtc_state *crtc_state, * which indicate VSC SDP for the Pixel Encoding/Colorimetry Format. */ if (intel_dp_needs_vsc_sdp(crtc_state, conn_state)) - temp |= TRANS_MSA_USE_VSC_SDP; + temp |= DP_MSA_MISC_COLOR_VSC_SDP; I915_WRITE(TRANS_MSA_MISC(cpu_transcoder), temp); } diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index d741d34e1895..7dd126cc3ac3 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -9759,18 +9759,7 @@ enum skl_power_gate { #define _TRANSC_MSA_MISC 0x62410 #define _TRANS_EDP_MSA_MISC 0x6f410 #define TRANS_MSA_MISC(tran) _MMIO_TRANS2(tran, _TRANSA_MSA_MISC) - -#define TRANS_MSA_SYNC_CLK (1 << 0) -#define TRANS_MSA_SAMPLING_444 (2 << 1) -#define TRANS_MSA_CLRSP_YCBCR (1 << 3) -#define TRANS_MSA_YCBCR_BT709 (1 << 4) -#define TRANS_MSA_6_BPC (0 << 5) -#define TRANS_MSA_8_BPC (1 << 5) -#define TRANS_MSA_10_BPC (2 << 5) -#define TRANS_MSA_12_BPC (3 << 5) -#define TRANS_MSA_16_BPC (4 << 5) -#define TRANS_MSA_CEA_RANGE (1 << 3) -#define TRANS_MSA_USE_VSC_SDP (1 << 14) +/* See DP_MSA_MISC_* for the bit definitions */ /* LCPLL Control */ #define LCPLL_CTL _MMIO(0x130040) From 131d3b1af10599b13c7bcd9bb137b5419cd12798 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 8 Jul 2019 15:53:14 +0300 Subject: [PATCH 083/159] drm/i915: Stop using drm_atomic_helper_check_planes() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to insert stuff between the plane and crtc .atomic_check() drm_atomic_helper_check_planes() doesn't allow us to do that so stop using it and hand roll the loops instead. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190708125325.16576-9-ville.syrjala@linux.intel.com Reviewed-by: Stanislav Lisovskiy Reviewed-by: Maarten Lankhorst Reviewed-by: Juha-Pekka Heikkila --- .../gpu/drm/i915/display/intel_atomic_plane.c | 10 +--- .../gpu/drm/i915/display/intel_atomic_plane.h | 2 + drivers/gpu/drm/i915/display/intel_display.c | 57 +++++++++++++++---- 3 files changed, 50 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_atomic_plane.c b/drivers/gpu/drm/i915/display/intel_atomic_plane.c index 98b7766eaa7a..a6cff5a160fb 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic_plane.c +++ b/drivers/gpu/drm/i915/display/intel_atomic_plane.c @@ -195,14 +195,11 @@ get_crtc_from_states(const struct intel_plane_state *old_plane_state, return NULL; } -static int intel_plane_atomic_check(struct drm_plane *_plane, - struct drm_plane_state *_new_plane_state) +int intel_plane_atomic_check(struct intel_atomic_state *state, + struct intel_plane *plane) { - struct intel_plane *plane = to_intel_plane(_plane); - struct intel_atomic_state *state = - to_intel_atomic_state(_new_plane_state->state); struct intel_plane_state *new_plane_state = - to_intel_plane_state(_new_plane_state); + intel_atomic_get_new_plane_state(state, plane); const struct intel_plane_state *old_plane_state = intel_atomic_get_old_plane_state(state, plane); struct intel_crtc *crtc = @@ -369,5 +366,4 @@ void i9xx_update_planes_on_crtc(struct intel_atomic_state *state, const struct drm_plane_helper_funcs intel_plane_helper_funcs = { .prepare_fb = intel_prepare_plane_fb, .cleanup_fb = intel_cleanup_plane_fb, - .atomic_check = intel_plane_atomic_check, }; diff --git a/drivers/gpu/drm/i915/display/intel_atomic_plane.h b/drivers/gpu/drm/i915/display/intel_atomic_plane.h index cb7ef4f9eafd..dc85af02e9b7 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic_plane.h +++ b/drivers/gpu/drm/i915/display/intel_atomic_plane.h @@ -41,6 +41,8 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_ struct intel_crtc_state *crtc_state, const struct intel_plane_state *old_plane_state, struct intel_plane_state *intel_state); +int intel_plane_atomic_check(struct intel_atomic_state *state, + struct intel_plane *plane); int intel_plane_atomic_calc_changes(const struct intel_crtc_state *old_crtc_state, struct intel_crtc_state *crtc_state, const struct intel_plane_state *old_plane_state, diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 5d77f5bae0d9..00bee5abf87a 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -11783,15 +11783,14 @@ static bool c8_planes_changed(const struct intel_crtc_state *new_crtc_state) return !old_crtc_state->c8_planes != !new_crtc_state->c8_planes; } -static int intel_crtc_atomic_check(struct drm_crtc *_crtc, - struct drm_crtc_state *_crtc_state) +static int intel_crtc_atomic_check(struct intel_atomic_state *state, + struct intel_crtc *crtc) { - struct intel_crtc *crtc = to_intel_crtc(_crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_crtc_state *crtc_state = - to_intel_crtc_state(_crtc_state); - int ret; + intel_atomic_get_new_crtc_state(state, crtc); bool mode_changed = needs_modeset(crtc_state); + int ret; if (INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv) && mode_changed && !crtc_state->base.active) @@ -11863,10 +11862,6 @@ static int intel_crtc_atomic_check(struct drm_crtc *_crtc, return ret; } -static const struct drm_crtc_helper_funcs intel_helper_funcs = { - .atomic_check = intel_crtc_atomic_check, -}; - static void intel_modeset_update_connector_atomic_state(struct drm_device *dev) { struct intel_connector *connector; @@ -13541,6 +13536,42 @@ static void intel_crtc_check_fastset(const struct intel_crtc_state *old_crtc_sta new_crtc_state->has_drrs = old_crtc_state->has_drrs; } +static int intel_atomic_check_planes(struct intel_atomic_state *state) +{ + struct intel_plane_state *plane_state; + struct intel_plane *plane; + int i, ret; + + for_each_new_intel_plane_in_state(state, plane, plane_state, i) { + ret = intel_plane_atomic_check(state, plane); + if (ret) { + DRM_DEBUG_ATOMIC("[PLANE:%d:%s] atomic driver check failed\n", + plane->base.base.id, plane->base.name); + return ret; + } + } + + return 0; +} + +static int intel_atomic_check_crtcs(struct intel_atomic_state *state) +{ + struct intel_crtc_state *crtc_state; + struct intel_crtc *crtc; + int i; + + for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) { + int ret = intel_crtc_atomic_check(state, crtc); + if (ret) { + DRM_DEBUG_ATOMIC("[CRTC:%d:%s] atomic driver check failed\n", + crtc->base.base.id, crtc->base.name); + return ret; + } + } + + return 0; +} + /** * intel_atomic_check - validate state object * @dev: drm device @@ -13604,7 +13635,11 @@ static int intel_atomic_check(struct drm_device *dev, if (ret) goto fail; - ret = drm_atomic_helper_check_planes(dev, &state->base); + ret = intel_atomic_check_planes(state); + if (ret) + goto fail; + + ret = intel_atomic_check_crtcs(state); if (ret) goto fail; @@ -15175,8 +15210,6 @@ static int intel_crtc_init(struct drm_i915_private *dev_priv, enum pipe pipe) dev_priv->plane_to_crtc_mapping[i9xx_plane] = intel_crtc; } - drm_crtc_helper_add(&intel_crtc->base, &intel_helper_funcs); - intel_color_init(intel_crtc); WARN_ON(drm_crtc_index(&intel_crtc->base) != intel_crtc->pipe); From 3e30d70805d577e458592dcf59dcf8a2391d08a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 8 Jul 2019 15:53:16 +0300 Subject: [PATCH 084/159] drm/i915: Make .modeset_calc_cdclk() mandatory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While not all platforms allow us to change the cdclk frequency we should still verify that the fixed cdclk frequency isn't too low. To that end let's cook up a .modeset_calc_cdclk() implementation that only does the min_cdclk vs. actual cdclk frequency check for such platforms. Also we mustn't forget about double wide pipe on gen2/3 when doing this. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190708125325.16576-11-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst --- drivers/gpu/drm/i915/display/intel_cdclk.c | 31 ++++++++++++++++---- drivers/gpu/drm/i915/display/intel_display.c | 8 ++--- 2 files changed, 28 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 43564295b864..3d867963a6d1 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -1900,9 +1900,11 @@ intel_set_cdclk_post_plane_update(struct drm_i915_private *dev_priv, intel_set_cdclk(dev_priv, new_state, pipe); } -static int intel_pixel_rate_to_cdclk(struct drm_i915_private *dev_priv, - int pixel_rate) +static int intel_pixel_rate_to_cdclk(const struct intel_crtc_state *crtc_state) { + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + int pixel_rate = crtc_state->pixel_rate; + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) return DIV_ROUND_UP(pixel_rate, 2); else if (IS_GEN(dev_priv, 9) || @@ -1910,6 +1912,8 @@ static int intel_pixel_rate_to_cdclk(struct drm_i915_private *dev_priv, return pixel_rate; else if (IS_CHERRYVIEW(dev_priv)) return DIV_ROUND_UP(pixel_rate * 100, 95); + else if (crtc_state->double_wide) + return DIV_ROUND_UP(pixel_rate * 100, 90 * 2); else return DIV_ROUND_UP(pixel_rate * 100, 90); } @@ -1923,7 +1927,7 @@ int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state) if (!crtc_state->base.enable) return 0; - min_cdclk = intel_pixel_rate_to_cdclk(dev_priv, crtc_state->pixel_rate); + min_cdclk = intel_pixel_rate_to_cdclk(crtc_state); /* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */ if (IS_BROADWELL(dev_priv) && hsw_crtc_state_ips_capable(crtc_state)) @@ -2277,15 +2281,28 @@ static int intel_modeset_all_pipes(struct intel_atomic_state *state) return 0; } +static int fixed_modeset_calc_cdclk(struct intel_atomic_state *state) +{ + int min_cdclk; + + /* + * We can't change the cdclk frequency, but we still want to + * check that the required minimum frequency doesn't exceed + * the actual cdclk frequency. + */ + min_cdclk = intel_compute_min_cdclk(state); + if (min_cdclk < 0) + return min_cdclk; + + return 0; +} + int intel_modeset_calc_cdclk(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); enum pipe pipe; int ret; - if (!dev_priv->display.modeset_calc_cdclk) - return 0; - ret = dev_priv->display.modeset_calc_cdclk(state); if (ret) return ret; @@ -2596,6 +2613,8 @@ void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv) } else if (IS_VALLEYVIEW(dev_priv)) { dev_priv->display.set_cdclk = vlv_set_cdclk; dev_priv->display.modeset_calc_cdclk = vlv_modeset_calc_cdclk; + } else { + dev_priv->display.modeset_calc_cdclk = fixed_modeset_calc_cdclk; } if (INTEL_GEN(dev_priv) >= 10 || IS_GEN9_LP(dev_priv)) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 00bee5abf87a..1dc98a2d0b03 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -16816,11 +16816,9 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) intel_crtc_compute_pixel_rate(crtc_state); - if (dev_priv->display.modeset_calc_cdclk) { - min_cdclk = intel_crtc_compute_min_cdclk(crtc_state); - if (WARN_ON(min_cdclk < 0)) - min_cdclk = 0; - } + min_cdclk = intel_crtc_compute_min_cdclk(crtc_state); + if (WARN_ON(min_cdclk < 0)) + min_cdclk = 0; drm_calc_timestamping_constants(&crtc->base, &crtc_state->base.adjusted_mode); From dcdef1abbc50c57c982be3964cf2250660ba906e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 30 Sep 2019 16:42:13 +0300 Subject: [PATCH 085/159] drm/i915: Use drm_rect_translate_to() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the newly introduced drm_rect_translate_to() instead of hand rolling it. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190930134214.24702-3-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 1dc98a2d0b03..7cab9368e697 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -3476,9 +3476,8 @@ static int skl_check_main_surface(struct intel_plane_state *plane_state) * Put the final coordinates back so that the src * coordinate checks will see the right values. */ - drm_rect_translate(&plane_state->base.src, - (x << 16) - plane_state->base.src.x1, - (y << 16) - plane_state->base.src.y1); + drm_rect_translate_to(&plane_state->base.src, + x << 16, y << 16); return 0; } @@ -3700,9 +3699,8 @@ int i9xx_check_plane_surface(struct intel_plane_state *plane_state) * Put the final coordinates back so that the src * coordinate checks will see the right values. */ - drm_rect_translate(&plane_state->base.src, - (src_x << 16) - plane_state->base.src.x1, - (src_y << 16) - plane_state->base.src.y1); + drm_rect_translate_to(&plane_state->base.src, + src_x << 16, src_y << 16); /* HSW/BDW do this automagically in hardware */ if (!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv)) { From fc1a0fb53875724c3498e1402ac339de3fee20d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 30 Sep 2019 16:42:14 +0300 Subject: [PATCH 086/159] drm/i915: Use drm_rect_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the new drm_rect_init() helper where appropriate. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190930134214.24702-4-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display.c | 10 ++-------- drivers/gpu/drm/i915/display/intel_sprite.c | 6 ++---- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 7cab9368e697..7d7d1859775a 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -2733,10 +2733,7 @@ intel_fill_fb_info(struct drm_i915_private *dev_priv, size++; /* rotate the x/y offsets to match the GTT view */ - r.x1 = x; - r.y1 = y; - r.x2 = x + width; - r.y2 = y + height; + drm_rect_init(&r, x, y, width, height); drm_rect_rotate(&r, rot_info->plane[i].width * tile_width, rot_info->plane[i].height * tile_height, @@ -2858,10 +2855,7 @@ intel_plane_remap_gtt(struct intel_plane_state *plane_state) struct drm_rect r; /* rotate the x/y offsets to match the GTT view */ - r.x1 = x; - r.y1 = y; - r.x2 = x + width; - r.y2 = y + height; + drm_rect_init(&r, x, y, width, height); drm_rect_rotate(&r, info->plane[i].width * tile_width, info->plane[i].height * tile_height, diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c index 633fa8069348..5ae12ab3c5b7 100644 --- a/drivers/gpu/drm/i915/display/intel_sprite.c +++ b/drivers/gpu/drm/i915/display/intel_sprite.c @@ -287,10 +287,8 @@ int intel_plane_check_src_coordinates(struct intel_plane_state *plane_state) src_y = src->y1 >> 16; src_h = drm_rect_height(src) >> 16; - src->x1 = src_x << 16; - src->x2 = (src_x + src_w) << 16; - src->y1 = src_y << 16; - src->y2 = (src_y + src_h) << 16; + drm_rect_init(src, src_x << 16, src_y << 16, + src_w << 16, src_h << 16); if (!fb->format->is_yuv) return 0; From 193065024072c0d6f2972142d5df5a00b33324c6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 15 Oct 2019 14:26:06 +0100 Subject: [PATCH 087/159] drm/i915: Flush tasklet submission before sleeping on i915_request_wait If the system is being slow and userspace is racing ahead of the GPU and finds itself waiting for the GPU to catch up, before the process sleeps give the tasklet a kick, bypassing ksoftirqd. If the system is overloaded, then ksoftirqd may be delayed incurring additional latency to our user. This should not be a frequent problem, but in the past we have observed several hundred millisecond delays before ksoftirqd services an interrupt, so burn a few cycles to lend a helping hand. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191015132606.14349-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index b8a54572a4f8..f1cadad4e81c 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1496,6 +1496,7 @@ long i915_request_wait(struct i915_request *rq, break; } + intel_engine_flush_submission(rq->engine); timeout = io_schedule_timeout(timeout); } __set_current_state(TASK_RUNNING); From da5d2ca8adde6adadcc42e3dd87bcc01e7e4f3f9 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Tue, 15 Oct 2019 18:44:11 +0300 Subject: [PATCH 088/159] drm/i915/icl: Wa_1607087056 Avoid possible hang in tsg,vfe units by keeping l3 clocks runnings. Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191015154411.9984-1-mika.kuoppala@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 5 +++++ drivers/gpu/drm/i915/i915_reg.h | 2 ++ 2 files changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index ba65e5018978..81d299b27fbc 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -892,6 +892,11 @@ icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) wa_write_or(wal, GAMT_CHKN_BIT_REG, GAMT_CHKN_DISABLE_L3_COH_PIPE); + + /* Wa_1607087056:icl */ + wa_write_or(wal, + SLICE_UNIT_LEVEL_CLKGATE, + L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); } static void diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 7dd126cc3ac3..821159c4cd32 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4050,6 +4050,8 @@ enum { #define SARBUNIT_CLKGATE_DIS (1 << 5) #define RCCUNIT_CLKGATE_DIS (1 << 7) #define MSCUNIT_CLKGATE_DIS (1 << 10) +#define L3_CLKGATE_DIS REG_BIT(16) +#define L3_CR2X_CLKGATE_DIS REG_BIT(17) #define SUBSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9524) #define GWUNIT_CLKGATE_DIS (1 << 16) From 613716bbe721236a09167b64124c13c85ee69962 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Tue, 15 Oct 2019 18:44:39 +0300 Subject: [PATCH 089/159] drm/i915/tgl: Add IS_TGL_REVID We are going to need this macro on limiting the workaround scope. Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191015154449.10338-1-mika.kuoppala@linux.intel.com --- drivers/gpu/drm/i915/i915_drv.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c46b339064c0..f6aee1e01a7f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1686,6 +1686,11 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_ICL_REVID(p, since, until) \ (IS_ICELAKE(p) && IS_REVID(p, since, until)) +#define TGL_REVID_A0 0x0 + +#define IS_TGL_REVID(p, since, until) \ + (IS_TIGERLAKE(p) && IS_REVID(p, since, until)) + #define IS_LP(dev_priv) (INTEL_INFO(dev_priv)->is_lp) #define IS_GEN9_LP(dev_priv) (IS_GEN(dev_priv, 9) && IS_LP(dev_priv)) #define IS_GEN9_BC(dev_priv) (IS_GEN(dev_priv, 9) && !IS_LP(dev_priv)) From 62037ffff229b7d94f1db5ef8d2e2ec819832ef3 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Tue, 15 Oct 2019 18:44:40 +0300 Subject: [PATCH 090/159] drm/i915/tgl: Include ro parts of l3 to invalidate Aim for completeness and invalidate also the ro parts in l3 cache. This might allow to get rid of the preparser disable/enable workaround on invalidation path. Cc: Chris Wilson Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191015154449.10338-2-mika.kuoppala@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 1 + drivers/gpu/drm/i915/gt/intel_lrc.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index 8e63cffcabe0..afc869dc785f 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -233,6 +233,7 @@ #define PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH (1<<12) /* gen6+ */ #define PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE (1<<11) /* MBZ on ILK */ #define PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE (1<<10) /* GM45+ only */ +#define PIPE_CONTROL_L3_RO_CACHE_INVALIDATE REG_BIT(10) /* gen12 */ #define PIPE_CONTROL_INDIRECT_STATE_DISABLE (1<<9) #define PIPE_CONTROL_NOTIFY (1<<8) #define PIPE_CONTROL_FLUSH_ENABLE (1<<7) /* gen7+ */ diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 1e9e2cc168f7..217f6513e117 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -3230,6 +3230,7 @@ static int gen12_emit_flush_render(struct i915_request *request, flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_L3_RO_CACHE_INVALIDATE; flags |= PIPE_CONTROL_STORE_DATA_INDEX; flags |= PIPE_CONTROL_QW_WRITE; From 4aa0b5d457f5d29522b5db4aeb4587fc4dc24054 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Tue, 15 Oct 2019 18:44:41 +0300 Subject: [PATCH 091/159] drm/i915/tgl: Add HDC Pipeline Flush Add hdc pipeline flush to ensure memory state is coherent in L3 when we are done. v2: Flush also in breadcrumbs (Chris) Cc: Chris Wilson Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191015154449.10338-3-mika.kuoppala@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 1 + drivers/gpu/drm/i915/gt/intel_lrc.c | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index afc869dc785f..4294f146f13c 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -235,6 +235,7 @@ #define PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE (1<<10) /* GM45+ only */ #define PIPE_CONTROL_L3_RO_CACHE_INVALIDATE REG_BIT(10) /* gen12 */ #define PIPE_CONTROL_INDIRECT_STATE_DISABLE (1<<9) +#define PIPE_CONTROL_HDC_PIPELINE_FLUSH REG_BIT(9) /* gen12 */ #define PIPE_CONTROL_NOTIFY (1<<8) #define PIPE_CONTROL_FLUSH_ENABLE (1<<7) /* gen7+ */ #define PIPE_CONTROL_DC_FLUSH_ENABLE (1<<5) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 217f6513e117..d1b10f00d729 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -3205,6 +3205,7 @@ static int gen12_emit_flush_render(struct i915_request *request, flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; flags |= PIPE_CONTROL_FLUSH_ENABLE; + flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH; flags |= PIPE_CONTROL_STORE_DATA_INDEX; flags |= PIPE_CONTROL_QW_WRITE; @@ -3415,7 +3416,8 @@ gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_DC_FLUSH_ENABLE | - PIPE_CONTROL_FLUSH_ENABLE); + PIPE_CONTROL_FLUSH_ENABLE | + PIPE_CONTROL_HDC_PIPELINE_FLUSH); return gen12_emit_fini_breadcrumb_footer(request, cs); } From 36a6b5d964d995b536b1925ec42052ee40ba92c4 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Tue, 15 Oct 2019 18:44:42 +0300 Subject: [PATCH 092/159] drm/i915/tgl: Add extra hdc flush workaround In order to ensure constant caches are invalidated properly with a0, we need extra hdc flush after invalidation. v2: use IS_TGL_REVID (Chris) References: HSDES#1604544889 Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191015154449.10338-4-mika.kuoppala@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_lrc.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index d1b10f00d729..62d5ece95c9d 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -3253,6 +3253,26 @@ static int gen12_emit_flush_render(struct i915_request *request, *cs++ = preparser_disable(false); intel_ring_advance(request, cs); + + /* + * Wa_1604544889:tgl + */ + if (IS_TGL_REVID(request->i915, TGL_REVID_A0, TGL_REVID_A0)) { + flags = 0; + flags |= PIPE_CONTROL_CS_STALL; + flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH; + + flags |= PIPE_CONTROL_STORE_DATA_INDEX; + flags |= PIPE_CONTROL_QW_WRITE; + + cs = intel_ring_begin(request, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + cs = gen8_emit_pipe_control(cs, flags, + LRC_PPHWSP_SCRATCH_ADDR); + intel_ring_advance(request, cs); + } } return 0; From 99739f9431f9fa8b4706dfa4c99389b400b8af23 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Tue, 15 Oct 2019 18:44:43 +0300 Subject: [PATCH 093/159] drm/i915/tgl: Keep FF dop clock enabled for A0 To ensure correct state data for compute workloads, we need to keep the ff dop clock enabled. References: HSDES#1606700617 Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191015154449.10338-5-mika.kuoppala@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 9 ++++++++- drivers/gpu/drm/i915/i915_reg.h | 1 + 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 81d299b27fbc..bc5fdb4e47b1 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -567,7 +567,7 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { - /* Wa_1409142259 */ + /* Wa_1409142259:tgl */ WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3, GEN12_DISABLE_CPS_AWARE_COLOR_PIPE); } @@ -1265,6 +1265,13 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { struct drm_i915_private *i915 = engine->i915; + if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) { + /* Wa_1606700617:tgl */ + wa_masked_en(wal, + GEN9_CS_DEBUG_MODE1, + FF_DOP_CLOCK_GATE_DISABLE); + } + if (IS_GEN(i915, 11)) { /* This is not an Wa. Enable for better image quality */ wa_masked_en(wal, diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 821159c4cd32..4b58861b5114 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7673,6 +7673,7 @@ enum { #define GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE (1 << 10) #define GEN9_CS_DEBUG_MODE1 _MMIO(0x20ec) +#define FF_DOP_CLOCK_GATE_DISABLE REG_BIT(1) #define GEN9_CTX_PREEMPT_REG _MMIO(0x2248) #define GEN8_CS_CHICKEN1 _MMIO(0x2580) #define GEN9_PREEMPT_3D_OBJECT_LEVEL (1 << 0) From 65df78bda385a905d086a787fd186506aea87df4 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Tue, 15 Oct 2019 18:44:44 +0300 Subject: [PATCH 094/159] drm/i915/tgl: Wa_1409420604 Avoid possible hang in CPSS unit. Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191015154449.10338-6-mika.kuoppala@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 5 +++++ drivers/gpu/drm/i915/i915_reg.h | 3 +++ 2 files changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index bc5fdb4e47b1..7fea61b00b99 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -902,6 +902,11 @@ icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) static void tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) { + /* Wa_1409420604:tgl */ + if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) + wa_write_or(wal, + SUBSLICE_UNIT_LEVEL_CLKGATE2, + CPSSUNIT_CLKGATE_DIS); } static void diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 4b58861b5114..449648a28a67 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4056,6 +4056,9 @@ enum { #define SUBSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9524) #define GWUNIT_CLKGATE_DIS (1 << 16) +#define SUBSLICE_UNIT_LEVEL_CLKGATE2 _MMIO(0x9528) +#define CPSSUNIT_CLKGATE_DIS REG_BIT(9) + #define UNSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9434) #define VFUNIT_CLKGATE_DIS (1 << 20) From 2cbe2d8c562eab58347b39342e37c7a2adbe7fef Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Tue, 15 Oct 2019 18:44:45 +0300 Subject: [PATCH 095/159] drm/i915/tgl: Wa_1409170338 Avoid possible hang in tsg,vfe units by keeping l3 clocks runnings. Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191015154449.10338-7-mika.kuoppala@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 7fea61b00b99..4f9be2eee132 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -907,6 +907,12 @@ tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) wa_write_or(wal, SUBSLICE_UNIT_LEVEL_CLKGATE2, CPSSUNIT_CLKGATE_DIS); + + /* Wa_1409180338:tgl */ + if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) + wa_write_or(wal, + SLICE_UNIT_LEVEL_CLKGATE, + L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); } static void From 2e19af943822710cf00621eb32cc82394441bb60 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Tue, 15 Oct 2019 18:44:46 +0300 Subject: [PATCH 096/159] drm/i915/tgl: Wa_1409600907 To avoid possible hang, we need to add depth stall if we flush the depth cache. Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191015154449.10338-8-mika.kuoppala@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_lrc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 62d5ece95c9d..d64b4d868b75 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -3203,6 +3203,8 @@ static int gen12_emit_flush_render(struct i915_request *request, flags |= PIPE_CONTROL_TILE_CACHE_FLUSH; flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; + /* Wa_1409600907:tgl */ + flags |= PIPE_CONTROL_DEPTH_STALL; flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; flags |= PIPE_CONTROL_FLUSH_ENABLE; flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH; @@ -3435,6 +3437,8 @@ gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) PIPE_CONTROL_TILE_CACHE_FLUSH | PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | PIPE_CONTROL_DEPTH_CACHE_FLUSH | + /* Wa_1409600907:tgl */ + PIPE_CONTROL_DEPTH_STALL | PIPE_CONTROL_DC_FLUSH_ENABLE | PIPE_CONTROL_FLUSH_ENABLE | PIPE_CONTROL_HDC_PIPELINE_FLUSH); From 79bfa607e60f12daa3db8b1b3834219818a153e2 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Tue, 15 Oct 2019 18:44:47 +0300 Subject: [PATCH 097/159] drm/i915/tgl: Wa_1607138336 Avoid possible deadlock on context switch. Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191015154449.10338-9-mika.kuoppala@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 5 +++++ drivers/gpu/drm/i915/i915_reg.h | 2 ++ 2 files changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 4f9be2eee132..483725137291 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1281,6 +1281,11 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) wa_masked_en(wal, GEN9_CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE); + + /* Wa_1607138336:tgl */ + wa_write_or(wal, + GEN9_CTX_PREEMPT_REG, + GEN12_DISABLE_POSH_BUSY_FF_DOP_CG); } if (IS_GEN(i915, 11)) { diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 449648a28a67..baf5939df1ec 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7678,6 +7678,8 @@ enum { #define GEN9_CS_DEBUG_MODE1 _MMIO(0x20ec) #define FF_DOP_CLOCK_GATE_DISABLE REG_BIT(1) #define GEN9_CTX_PREEMPT_REG _MMIO(0x2248) +#define GEN12_DISABLE_POSH_BUSY_FF_DOP_CG REG_BIT(11) + #define GEN8_CS_CHICKEN1 _MMIO(0x2580) #define GEN9_PREEMPT_3D_OBJECT_LEVEL (1 << 0) #define GEN9_PREEMPT_GPGPU_LEVEL(hi, lo) (((hi) << 2) | ((lo) << 1)) From 99db8c59e056e1e1a97845dbd38a3a6f47be6b7d Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Tue, 15 Oct 2019 18:44:48 +0300 Subject: [PATCH 098/159] drm/i915/tgl: Wa_1607030317, Wa_1607186500, Wa_1607297627 Disable semaphore idle messages and wait for event power downs. Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191015154449.10338-10-mika.kuoppala@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 8 ++++++++ drivers/gpu/drm/i915/i915_reg.h | 1 + 2 files changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 483725137291..af8a8183154a 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1286,6 +1286,14 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) wa_write_or(wal, GEN9_CTX_PREEMPT_REG, GEN12_DISABLE_POSH_BUSY_FF_DOP_CG); + + /* Wa_1607030317:tgl */ + /* Wa_1607186500:tgl */ + /* Wa_1607297627:tgl */ + wa_masked_en(wal, + GEN6_RC_SLEEP_PSMI_CONTROL, + GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE | + GEN8_RC_SEMA_IDLE_MSG_DISABLE); } if (IS_GEN(i915, 11)) { diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index baf5939df1ec..855db888516c 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2885,6 +2885,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN6_RC_SLEEP_PSMI_CONTROL _MMIO(0x2050) #define GEN6_PSMI_SLEEP_MSG_DISABLE (1 << 0) +#define GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE REG_BIT(7) #define GEN8_RC_SEMA_IDLE_MSG_DISABLE (1 << 12) #define GEN8_FF_DOP_CLOCK_GATE_DISABLE (1 << 10) From 08fff7aeddc9dd72161b4c8fc27fbab12b4b9352 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Tue, 15 Oct 2019 18:44:49 +0300 Subject: [PATCH 099/159] drm/i915/tgl: Wa_1607138340 Avoid possible cs hang with semaphores by disabling lite restore. Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191015154449.10338-11-mika.kuoppala@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_lrc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index d64b4d868b75..44f213fd7604 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1048,6 +1048,10 @@ static u64 execlists_update_context(const struct i915_request *rq) desc = ce->lrc_desc; ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE; + /* Wa_1607138340:tgl */ + if (IS_TGL_REVID(rq->i915, TGL_REVID_A0, TGL_REVID_A0)) + desc |= CTX_DESC_FORCE_RESTORE; + return desc; } From 8574685547bdc4f11cc528f386e5396212409f9c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 15 Oct 2019 09:59:11 +0100 Subject: [PATCH 100/159] drm/i915/selftests: Drop stale struct_mutex A lately added test was missed when applying the struct_mutex removal patches. Do so now. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191015085911.10317-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/selftest_lrc.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 1276da059dc6..9c1f34fb5882 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -107,7 +107,6 @@ static int live_unlite_restore(struct drm_i915_private *i915, int prio) struct intel_engine_cs *engine; struct i915_gem_context *ctx; enum intel_engine_id id; - intel_wakeref_t wakeref; struct igt_spinner spin; int err = -ENOMEM; @@ -116,11 +115,8 @@ static int live_unlite_restore(struct drm_i915_private *i915, int prio) * on the same engine from the same parent context. */ - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - if (igt_spinner_init(&spin, &i915->gt)) - goto err_unlock; + return err; ctx = kernel_context(i915); if (!ctx) @@ -264,9 +260,6 @@ static int live_unlite_restore(struct drm_i915_private *i915, int prio) kernel_context_close(ctx); err_spin: igt_spinner_fini(&spin); -err_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; } From 2229adc81380c46908d46a91f05a6c1650456769 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 16 Oct 2019 11:08:51 +0100 Subject: [PATCH 101/159] drm/i915/execlist: Trim immediate timeslice expiry We perform timeslicing immediately upon receipt of a request that may be put into the second ELSP slot. The idea behind this was that since we didn't install the timer if the second ELSP slot was empty, we would not have any idea of how long ELSP[0] had been running and so giving the newcomer a chance on the GPU was fair. However, this causes us extra busy work that we may be able to avoid if we wait a jiffie for the first timeslice as normal. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191016100851.4979-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 30 ++++++++++++++++------- drivers/gpu/drm/i915/gt/intel_lrc.c | 6 +++-- drivers/gpu/drm/i915/i915_gem.h | 14 +++++++++++ 3 files changed, 39 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index c9d639c6becb..5051a1fd2565 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1241,6 +1241,17 @@ static struct intel_timeline *get_timeline(struct i915_request *rq) return tl; } +static const char *repr_timer(const struct timer_list *t) +{ + if (!READ_ONCE(t->expires)) + return "inactive"; + + if (timer_pending(t)) + return "active"; + + return "expired"; +} + static void intel_engine_print_registers(struct intel_engine_cs *engine, struct drm_printer *m) { @@ -1302,19 +1313,20 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, unsigned int idx; u8 read, write; - drm_printf(m, "\tExeclist status: 0x%08x %08x, entries %u\n", - ENGINE_READ(engine, RING_EXECLIST_STATUS_LO), - ENGINE_READ(engine, RING_EXECLIST_STATUS_HI), - num_entries); + drm_printf(m, "\tExeclist tasklet queued? %s (%s), timeslice? %s\n", + yesno(test_bit(TASKLET_STATE_SCHED, + &engine->execlists.tasklet.state)), + enableddisabled(!atomic_read(&engine->execlists.tasklet.count)), + repr_timer(&engine->execlists.timer)); read = execlists->csb_head; write = READ_ONCE(*execlists->csb_write); - drm_printf(m, "\tExeclist CSB read %d, write %d, tasklet queued? %s (%s)\n", - read, write, - yesno(test_bit(TASKLET_STATE_SCHED, - &engine->execlists.tasklet.state)), - enableddisabled(!atomic_read(&engine->execlists.tasklet.count))); + drm_printf(m, "\tExeclist status: 0x%08x %08x; CSB read:%d, write:%d, entries:%d\n", + ENGINE_READ(engine, RING_EXECLIST_STATUS_LO), + ENGINE_READ(engine, RING_EXECLIST_STATUS_HI), + read, write, num_entries); + if (read >= num_entries) read = 0; if (write >= num_entries) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 44f213fd7604..e9fe9f79cedd 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1473,7 +1473,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) last->hw_context->lrc_desc |= CTX_DESC_FORCE_RESTORE; last = NULL; } else if (need_timeslice(engine, last) && - !timer_pending(&engine->execlists.timer)) { + timer_expired(&engine->execlists.timer)) { GEM_TRACE("%s: expired last=%llx:%lld, prio=%d, hint=%d\n", engine->name, last->fence.context, @@ -1932,6 +1932,8 @@ static void process_csb(struct intel_engine_cs *engine) if (enable_timeslice(execlists)) mod_timer(&execlists->timer, jiffies + 1); + else + cancel_timer(&execlists->timer); WRITE_ONCE(execlists->pending[0], NULL); } else { @@ -3452,7 +3454,7 @@ gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) static void execlists_park(struct intel_engine_cs *engine) { - del_timer(&engine->execlists.timer); + cancel_timer(&engine->execlists.timer); } void intel_execlists_set_default_submission(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index f6f9675848b8..2011f8e9a9f1 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -112,4 +112,18 @@ static inline bool __tasklet_is_scheduled(struct tasklet_struct *t) return test_bit(TASKLET_STATE_SCHED, &t->state); } +static inline void cancel_timer(struct timer_list *t) +{ + if (!READ_ONCE(t->expires)) + return; + + del_timer(t); + WRITE_ONCE(t->expires, 0); +} + +static inline bool timer_expired(const struct timer_list *t) +{ + return READ_ONCE(t->expires) && !timer_pending(t); +} + #endif /* __I915_GEM_H__ */ From 993254292b9e0be97555cd3475bcdb946c558381 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 7 Oct 2019 14:49:43 +0300 Subject: [PATCH 102/159] drm/i915: Refactor timestamping constants update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Once we do the hw vs. uapi split we can no longer use drm_atomic_helper_calc_timestamping_constants() as it'll consult the uapi state instead of the hw state. So let's just update the vblank timestamping constants whenever we update the scanline offset. We use both to convert the hw scanline count to something which matches the software timing values. First I thought to put these into intel_crtc_vblank_on() but we may want to get the scanline counter value before that (eg. from some early tracepoints), so let's stick to updating them a bit earlier than intel_crtc_vblank_on(). Signed-off-by: Ville Syrjälä Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20191007114943.29307-3-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 7d7d1859775a..cc1cf9a4f609 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -13316,10 +13316,15 @@ intel_modeset_verify_disabled(struct drm_i915_private *dev_priv, verify_disabled_dpll_state(dev_priv); } -static void update_scanline_offset(const struct intel_crtc_state *crtc_state) +static void +intel_crtc_update_active_timings(const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + const struct drm_display_mode *adjusted_mode = + &crtc_state->base.adjusted_mode; + + drm_calc_timestamping_constants(&crtc->base, adjusted_mode); /* * The scanline counter increments at the leading edge of hsync. @@ -13349,7 +13354,6 @@ static void update_scanline_offset(const struct intel_crtc_state *crtc_state) * answer that's slightly in the future. */ if (IS_GEN(dev_priv, 2)) { - const struct drm_display_mode *adjusted_mode = &crtc_state->base.adjusted_mode; int vtotal; vtotal = adjusted_mode->crtc_vtotal; @@ -13360,8 +13364,9 @@ static void update_scanline_offset(const struct intel_crtc_state *crtc_state) } else if (HAS_DDI(dev_priv) && intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) { crtc->scanline_offset = 2; - } else + } else { crtc->scanline_offset = 1; + } } static void intel_modeset_clear_plls(struct intel_atomic_state *state) @@ -13784,7 +13789,8 @@ static void intel_update_crtc(struct intel_crtc *crtc, to_intel_plane(crtc->base.primary)); if (modeset) { - update_scanline_offset(new_crtc_state); + intel_crtc_update_active_timings(new_crtc_state); + dev_priv->display.crtc_enable(new_crtc_state, state); /* vblanks work again, re-enable pipe CRC. */ @@ -16812,9 +16818,7 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) if (WARN_ON(min_cdclk < 0)) min_cdclk = 0; - drm_calc_timestamping_constants(&crtc->base, - &crtc_state->base.adjusted_mode); - update_scanline_offset(crtc_state); + intel_crtc_update_active_timings(crtc_state); } dev_priv->min_cdclk[crtc->pipe] = min_cdclk; From 4078c983feb930bc2a3e3f8ffa6fa65fdf466c8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 27 Sep 2019 16:14:27 +0300 Subject: [PATCH 103/159] drm/i915: Switch intel_legacy_cursor_update() to intel_ types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prefer the intel_ types in intel_legacy_cursor_update() over the drm_ types. Should make it easier to adapt this to the uapi vs. hw state split. Signed-off-by: Ville Syrjälä Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20190927131432.15978-1-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 84 ++++++++++---------- 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index cc1cf9a4f609..cc4489591721 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -14704,8 +14704,8 @@ static const struct drm_plane_funcs i8xx_plane_funcs = { }; static int -intel_legacy_cursor_update(struct drm_plane *plane, - struct drm_crtc *crtc, +intel_legacy_cursor_update(struct drm_plane *_plane, + struct drm_crtc *_crtc, struct drm_framebuffer *fb, int crtc_x, int crtc_y, unsigned int crtc_w, unsigned int crtc_h, @@ -14713,10 +14713,13 @@ intel_legacy_cursor_update(struct drm_plane *plane, u32 src_w, u32 src_h, struct drm_modeset_acquire_ctx *ctx) { - struct drm_plane_state *old_plane_state, *new_plane_state; - struct intel_plane *intel_plane = to_intel_plane(plane); + struct intel_plane *plane = to_intel_plane(_plane); + struct intel_crtc *crtc = to_intel_crtc(_crtc); + struct intel_plane_state *old_plane_state = + to_intel_plane_state(plane->base.state); + struct intel_plane_state *new_plane_state; struct intel_crtc_state *crtc_state = - to_intel_crtc_state(crtc->state); + to_intel_crtc_state(crtc->base.state); struct intel_crtc_state *new_crtc_state; int ret; @@ -14728,14 +14731,13 @@ intel_legacy_cursor_update(struct drm_plane *plane, crtc_state->update_pipe) goto slow; - old_plane_state = plane->state; /* * Don't do an async update if there is an outstanding commit modifying * the plane. This prevents our async update's changes from getting * overridden by a previous synchronous update's state. */ - if (old_plane_state->commit && - !try_wait_for_completion(&old_plane_state->commit->hw_done)) + if (old_plane_state->base.commit && + !try_wait_for_completion(&old_plane_state->base.commit->hw_done)) goto slow; /* @@ -14743,52 +14745,51 @@ intel_legacy_cursor_update(struct drm_plane *plane, * take the slowpath. Only changing fb or position should be * in the fastpath. */ - if (old_plane_state->crtc != crtc || - old_plane_state->src_w != src_w || - old_plane_state->src_h != src_h || - old_plane_state->crtc_w != crtc_w || - old_plane_state->crtc_h != crtc_h || - !old_plane_state->fb != !fb) + if (old_plane_state->base.crtc != &crtc->base || + old_plane_state->base.src_w != src_w || + old_plane_state->base.src_h != src_h || + old_plane_state->base.crtc_w != crtc_w || + old_plane_state->base.crtc_h != crtc_h || + !old_plane_state->base.fb != !fb) goto slow; - new_plane_state = intel_plane_duplicate_state(plane); + new_plane_state = to_intel_plane_state(intel_plane_duplicate_state(&plane->base)); if (!new_plane_state) return -ENOMEM; - new_crtc_state = to_intel_crtc_state(intel_crtc_duplicate_state(crtc)); + new_crtc_state = to_intel_crtc_state(intel_crtc_duplicate_state(&crtc->base)); if (!new_crtc_state) { ret = -ENOMEM; goto out_free; } - drm_atomic_set_fb_for_plane(new_plane_state, fb); + drm_atomic_set_fb_for_plane(&new_plane_state->base, fb); - new_plane_state->src_x = src_x; - new_plane_state->src_y = src_y; - new_plane_state->src_w = src_w; - new_plane_state->src_h = src_h; - new_plane_state->crtc_x = crtc_x; - new_plane_state->crtc_y = crtc_y; - new_plane_state->crtc_w = crtc_w; - new_plane_state->crtc_h = crtc_h; + new_plane_state->base.src_x = src_x; + new_plane_state->base.src_y = src_y; + new_plane_state->base.src_w = src_w; + new_plane_state->base.src_h = src_h; + new_plane_state->base.crtc_x = crtc_x; + new_plane_state->base.crtc_y = crtc_y; + new_plane_state->base.crtc_w = crtc_w; + new_plane_state->base.crtc_h = crtc_h; ret = intel_plane_atomic_check_with_state(crtc_state, new_crtc_state, - to_intel_plane_state(old_plane_state), - to_intel_plane_state(new_plane_state)); + old_plane_state, new_plane_state); if (ret) goto out_free; - ret = intel_plane_pin_fb(to_intel_plane_state(new_plane_state)); + ret = intel_plane_pin_fb(new_plane_state); if (ret) goto out_free; - intel_frontbuffer_flush(to_intel_frontbuffer(fb), ORIGIN_FLIP); - intel_frontbuffer_track(to_intel_frontbuffer(old_plane_state->fb), - to_intel_frontbuffer(fb), - intel_plane->frontbuffer_bit); + intel_frontbuffer_flush(to_intel_frontbuffer(new_plane_state->base.fb), ORIGIN_FLIP); + intel_frontbuffer_track(to_intel_frontbuffer(old_plane_state->base.fb), + to_intel_frontbuffer(new_plane_state->base.fb), + plane->frontbuffer_bit); /* Swap plane state */ - plane->state = new_plane_state; + plane->base.state = &new_plane_state->base; /* * We cannot swap crtc_state as it may be in use by an atomic commit or @@ -14802,25 +14803,24 @@ intel_legacy_cursor_update(struct drm_plane *plane, */ crtc_state->active_planes = new_crtc_state->active_planes; - if (plane->state->visible) - intel_update_plane(intel_plane, crtc_state, - to_intel_plane_state(plane->state)); + if (new_plane_state->base.visible) + intel_update_plane(plane, crtc_state, new_plane_state); else - intel_disable_plane(intel_plane, crtc_state); + intel_disable_plane(plane, crtc_state); - intel_plane_unpin_fb(to_intel_plane_state(old_plane_state)); + intel_plane_unpin_fb(old_plane_state); out_free: if (new_crtc_state) - intel_crtc_destroy_state(crtc, &new_crtc_state->base); + intel_crtc_destroy_state(&crtc->base, &new_crtc_state->base); if (ret) - intel_plane_destroy_state(plane, new_plane_state); + intel_plane_destroy_state(&plane->base, &new_plane_state->base); else - intel_plane_destroy_state(plane, old_plane_state); + intel_plane_destroy_state(&plane->base, &old_plane_state->base); return ret; slow: - return drm_atomic_helper_update_plane(plane, crtc, fb, + return drm_atomic_helper_update_plane(&plane->base, &crtc->base, fb, crtc_x, crtc_y, crtc_w, crtc_h, src_x, src_y, src_w, src_h, ctx); } From de3b67afc0602f81fc6309544f97d4425c2e2cf6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 27 Sep 2019 16:14:28 +0300 Subject: [PATCH 104/159] drm/i915: Prepare the connector/encoder mask readout for hw vs. uapi state split MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prepare the connector/encoder mask readout for the uapi vs. hw state split. We'll want to do all readout into the hw state. Signed-off-by: Ville Syrjälä Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20190927131432.15978-2-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index cc4489591721..c72c9a4bb973 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -16757,24 +16757,28 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) drm_connector_list_iter_begin(dev, &conn_iter); for_each_intel_connector_iter(connector, &conn_iter) { if (connector->get_hw_state(connector)) { + struct intel_crtc_state *crtc_state; + struct intel_crtc *crtc; + connector->base.dpms = DRM_MODE_DPMS_ON; encoder = connector->encoder; connector->base.encoder = &encoder->base; - if (encoder->base.crtc && - encoder->base.crtc->state->active) { + crtc = to_intel_crtc(encoder->base.crtc); + crtc_state = crtc ? to_intel_crtc_state(crtc->base.state) : NULL; + + if (crtc_state && crtc_state->base.active) { /* * This has to be done during hardware readout * because anything calling .crtc_disable may * rely on the connector_mask being accurate. */ - encoder->base.crtc->state->connector_mask |= + crtc_state->base.connector_mask |= drm_connector_mask(&connector->base); - encoder->base.crtc->state->encoder_mask |= + crtc_state->base.encoder_mask |= drm_encoder_mask(&encoder->base); } - } else { connector->base.dpms = DRM_MODE_DPMS_OFF; connector->base.encoder = NULL; From 3abe897787c2445fb1aa955ac229bdfe2ac786c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 27 Sep 2019 16:14:30 +0300 Subject: [PATCH 105/159] drm/i915: Prepare the mode readout for hw vs. uapi state split MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prepare the mode readout for the uapi vs. hw state split. We'll want to do all readout into the hw state. Signed-off-by: Ville Syrjälä Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20190927131432.15978-4-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index c72c9a4bb973..2309b349f4b5 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -16803,7 +16803,7 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) crtc->base.mode.hdisplay = crtc_state->pipe_src_w; crtc->base.mode.vdisplay = crtc_state->pipe_src_h; intel_mode_from_pipe_config(&crtc_state->base.adjusted_mode, crtc_state); - WARN_ON(drm_atomic_set_mode_for_crtc(crtc->base.state, &crtc->base.mode)); + WARN_ON(drm_atomic_set_mode_for_crtc(&crtc_state->base, &crtc->base.mode)); /* * The initial mode needs to be set in order to keep From fcb9bba47fb5ff39912f8f3b7b6d32992e461368 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 15 Oct 2019 09:11:31 -0700 Subject: [PATCH 106/159] drm/i915/ehl: Don't forget to set TC long detect function Since EHL's MCC PCH reuses one of the TC pins we need to supply a TC long detect function when handling the interrupts. Fixes: 53448aed7b80 ("drm/i915/ehl: Port C's hotplug interrupt is associated with TC1 bits") Reported-by: kbuild test robot Reported-by: Dan Carpenter Cc: Vivek Kasireddy Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20191015161131.21239-1-matthew.d.roper@intel.com Reviewed-by: Vivek Kasireddy --- drivers/gpu/drm/i915/i915_irq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index a7c968b01af3..af7426cd8de9 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2251,6 +2251,7 @@ static void icp_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) } else if (HAS_PCH_MCC(dev_priv)) { ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_ICP; tc_hotplug_trigger = pch_iir & SDE_TC_HOTPLUG_ICP(PORT_TC1); + tc_port_hotplug_long_detect = icp_tc_port_hotplug_long_detect; pins = hpd_icp; } else { ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_ICP; From 943682e3bd19385511171d730499120ab7245566 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 15 Oct 2019 09:28:54 -0700 Subject: [PATCH 107/159] drm/i915: Introduce Jasper Lake PCH MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Jasper Lake PCH follows ICP/TGP's south display behavior and is identical to MCC graphics-wise except that it does not use the unusual (port C -> TC1) pin mapping that MCC does. Also, it turns out the extra PCH ID that we had previously thought was a form of MCC is actually a second ID for JSP (i.e., port C uses the port C pins instead of the TC1 pins). v2: - Also update the port masks (not just the pin table) in mcc_hpd_irq_setup. (Vivek) v3: - Break jsp_hpd_irq_setup out into its own function for clarity. (Vivek) Cc: José Roberto de Souza Cc: James Ausmus Cc: Vivek Kasireddy Signed-off-by: Matt Roper Reviewed-by: Vivek Kasireddy Link: https://patchwork.freedesktop.org/patch/msgid/20191015162854.30546-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/i915_irq.c | 24 +++++++++++++++++++++++- drivers/gpu/drm/i915/intel_pch.c | 6 +++++- drivers/gpu/drm/i915/intel_pch.h | 5 ++++- 3 files changed, 32 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index af7426cd8de9..ef09fbb36f37 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2248,12 +2248,19 @@ static void icp_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) tc_hotplug_trigger = pch_iir & SDE_TC_MASK_TGP; tc_port_hotplug_long_detect = tgp_tc_port_hotplug_long_detect; pins = hpd_tgp; + } else if (HAS_PCH_JSP(dev_priv)) { + ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_TGP; + tc_hotplug_trigger = 0; + pins = hpd_tgp; } else if (HAS_PCH_MCC(dev_priv)) { ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_ICP; tc_hotplug_trigger = pch_iir & SDE_TC_HOTPLUG_ICP(PORT_TC1); tc_port_hotplug_long_detect = icp_tc_port_hotplug_long_detect; pins = hpd_icp; } else { + WARN(!HAS_PCH_ICP(dev_priv), + "Unrecognized PCH type 0x%x\n", INTEL_PCH_TYPE(dev_priv)); + ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_ICP; tc_hotplug_trigger = pch_iir & SDE_TC_MASK_ICP; tc_port_hotplug_long_detect = icp_tc_port_hotplug_long_detect; @@ -3383,6 +3390,19 @@ static void mcc_hpd_irq_setup(struct drm_i915_private *dev_priv) hpd_icp); } +/* + * JSP behaves exactly the same as MCC above except that port C is mapped to + * the DDI-C pins instead of the TC1 pins. This means we should follow TGP's + * masks & tables rather than ICP's masks & tables. + */ +static void jsp_hpd_irq_setup(struct drm_i915_private *dev_priv) +{ + icp_hpd_irq_setup(dev_priv, + SDE_DDI_MASK_TGP, 0, + TGP_DDI_HPD_ENABLE_MASK, 0, + hpd_tgp); +} + static void gen11_hpd_detection_setup(struct drm_i915_private *dev_priv) { u32 hotplug; @@ -4314,7 +4334,9 @@ void intel_irq_init(struct drm_i915_private *dev_priv) if (I915_HAS_HOTPLUG(dev_priv)) dev_priv->display.hpd_irq_setup = i915_hpd_irq_setup; } else { - if (HAS_PCH_MCC(dev_priv)) + if (HAS_PCH_JSP(dev_priv)) + dev_priv->display.hpd_irq_setup = jsp_hpd_irq_setup; + else if (HAS_PCH_MCC(dev_priv)) dev_priv->display.hpd_irq_setup = mcc_hpd_irq_setup; else if (INTEL_GEN(dev_priv) >= 11) dev_priv->display.hpd_irq_setup = gen11_hpd_irq_setup; diff --git a/drivers/gpu/drm/i915/intel_pch.c b/drivers/gpu/drm/i915/intel_pch.c index 15f8bff141f9..1035d3d46fd8 100644 --- a/drivers/gpu/drm/i915/intel_pch.c +++ b/drivers/gpu/drm/i915/intel_pch.c @@ -79,7 +79,6 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id) WARN_ON(!IS_ICELAKE(dev_priv)); return PCH_ICP; case INTEL_PCH_MCC_DEVICE_ID_TYPE: - case INTEL_PCH_MCC2_DEVICE_ID_TYPE: DRM_DEBUG_KMS("Found Mule Creek Canyon PCH\n"); WARN_ON(!IS_ELKHARTLAKE(dev_priv)); return PCH_MCC; @@ -87,6 +86,11 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id) DRM_DEBUG_KMS("Found Tiger Lake LP PCH\n"); WARN_ON(!IS_TIGERLAKE(dev_priv)); return PCH_TGP; + case INTEL_PCH_JSP_DEVICE_ID_TYPE: + case INTEL_PCH_JSP2_DEVICE_ID_TYPE: + DRM_DEBUG_KMS("Found Jasper Lake PCH\n"); + WARN_ON(!IS_ELKHARTLAKE(dev_priv)); + return PCH_JSP; default: return PCH_NONE; } diff --git a/drivers/gpu/drm/i915/intel_pch.h b/drivers/gpu/drm/i915/intel_pch.h index c29c81ec7971..f4dc18c34291 100644 --- a/drivers/gpu/drm/i915/intel_pch.h +++ b/drivers/gpu/drm/i915/intel_pch.h @@ -23,6 +23,7 @@ enum intel_pch { PCH_SPT, /* Sunrisepoint/Kaby Lake PCH */ PCH_CNP, /* Cannon/Comet Lake PCH */ PCH_ICP, /* Ice Lake PCH */ + PCH_JSP, /* Jasper Lake PCH */ PCH_MCC, /* Mule Creek Canyon PCH */ PCH_TGP, /* Tiger Lake PCH */ }; @@ -44,14 +45,16 @@ enum intel_pch { #define INTEL_PCH_CMP2_DEVICE_ID_TYPE 0x0680 #define INTEL_PCH_ICP_DEVICE_ID_TYPE 0x3480 #define INTEL_PCH_MCC_DEVICE_ID_TYPE 0x4B00 -#define INTEL_PCH_MCC2_DEVICE_ID_TYPE 0x3880 #define INTEL_PCH_TGP_DEVICE_ID_TYPE 0xA080 +#define INTEL_PCH_JSP_DEVICE_ID_TYPE 0x4D80 +#define INTEL_PCH_JSP2_DEVICE_ID_TYPE 0x3880 #define INTEL_PCH_P2X_DEVICE_ID_TYPE 0x7100 #define INTEL_PCH_P3X_DEVICE_ID_TYPE 0x7000 #define INTEL_PCH_QEMU_DEVICE_ID_TYPE 0x2900 /* qemu q35 has 2918 */ #define INTEL_PCH_TYPE(dev_priv) ((dev_priv)->pch_type) #define INTEL_PCH_ID(dev_priv) ((dev_priv)->pch_id) +#define HAS_PCH_JSP(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_JSP) #define HAS_PCH_MCC(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_MCC) #define HAS_PCH_TGP(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_TGP) #define HAS_PCH_ICP(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_ICP) From 1357fa8136ea0309890b0a8e5f7311e4f01f2b6a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 16 Oct 2019 13:02:49 +0100 Subject: [PATCH 108/159] drm/i915/selftests: Teach execlists to take intel_gt as its argument The execlists selftests are hardware centric and so want to use the gt as its target. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191016120249.22714-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/selftest_lrc.c | 290 ++++++++++++------------- 1 file changed, 143 insertions(+), 147 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 9c1f34fb5882..2868371c609e 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -51,20 +51,20 @@ static struct i915_vma *create_scratch(struct intel_gt *gt) static int live_sanitycheck(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct i915_gem_engines_iter it; struct i915_gem_context *ctx; struct intel_context *ce; struct igt_spinner spin; int err = -ENOMEM; - if (!HAS_LOGICAL_RING_CONTEXTS(i915)) + if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915)) return 0; - if (igt_spinner_init(&spin, &i915->gt)) + if (igt_spinner_init(&spin, gt)) return -ENOMEM; - ctx = kernel_context(i915); + ctx = kernel_context(gt->i915); if (!ctx) goto err_spin; @@ -81,13 +81,13 @@ static int live_sanitycheck(void *arg) if (!igt_wait_for_spinner(&spin, rq)) { GEM_TRACE("spinner failed to start\n"); GEM_TRACE_DUMP(); - intel_gt_set_wedged(&i915->gt); + intel_gt_set_wedged(gt); err = -EIO; goto err_ctx; } igt_spinner_end(&spin); - if (igt_flush_test(i915)) { + if (igt_flush_test(gt->i915)) { err = -EIO; goto err_ctx; } @@ -102,7 +102,7 @@ static int live_sanitycheck(void *arg) return err; } -static int live_unlite_restore(struct drm_i915_private *i915, int prio) +static int live_unlite_restore(struct intel_gt *gt, int prio) { struct intel_engine_cs *engine; struct i915_gem_context *ctx; @@ -115,15 +115,15 @@ static int live_unlite_restore(struct drm_i915_private *i915, int prio) * on the same engine from the same parent context. */ - if (igt_spinner_init(&spin, &i915->gt)) + if (igt_spinner_init(&spin, gt)) return err; - ctx = kernel_context(i915); + ctx = kernel_context(gt->i915); if (!ctx) goto err_spin; err = 0; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt->i915, id) { struct intel_context *ce[2] = {}; struct i915_request *rq[2]; struct igt_live_test t; @@ -135,7 +135,7 @@ static int live_unlite_restore(struct drm_i915_private *i915, int prio) if (!intel_engine_can_store_dword(engine)) continue; - if (igt_live_test_begin(&t, i915, __func__, engine->name)) { + if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { err = -EIO; break; } @@ -416,7 +416,7 @@ slice_semaphore_queue(struct intel_engine_cs *outer, static int live_timeslice_preempt(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct drm_i915_gem_object *obj; struct i915_vma *vma; void *vaddr; @@ -432,11 +432,11 @@ static int live_timeslice_preempt(void *arg) * ready task. */ - obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); if (IS_ERR(obj)) return PTR_ERR(obj); - vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); + vma = i915_vma_instance(obj, >->ggtt->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto err_obj; @@ -456,7 +456,7 @@ static int live_timeslice_preempt(void *arg) struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt->i915, id) { if (!intel_engine_has_preemption(engine)) continue; @@ -466,7 +466,7 @@ static int live_timeslice_preempt(void *arg) if (err) goto err_pin; - if (igt_flush_test(i915)) { + if (igt_flush_test(gt->i915)) { err = -EIO; goto err_pin; } @@ -484,7 +484,7 @@ static int live_timeslice_preempt(void *arg) static int live_busywait_preempt(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct i915_gem_context *ctx_hi, *ctx_lo; struct intel_engine_cs *engine; struct drm_i915_gem_object *obj; @@ -498,19 +498,19 @@ static int live_busywait_preempt(void *arg) * preempt the busywaits used to synchronise between rings. */ - ctx_hi = kernel_context(i915); + ctx_hi = kernel_context(gt->i915); if (!ctx_hi) return -ENOMEM; ctx_hi->sched.priority = I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); - ctx_lo = kernel_context(i915); + ctx_lo = kernel_context(gt->i915); if (!ctx_lo) goto err_ctx_hi; ctx_lo->sched.priority = I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); - obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); if (IS_ERR(obj)) { err = PTR_ERR(obj); goto err_ctx_lo; @@ -522,7 +522,7 @@ static int live_busywait_preempt(void *arg) goto err_obj; } - vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); + vma = i915_vma_instance(obj, >->ggtt->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto err_map; @@ -532,7 +532,7 @@ static int live_busywait_preempt(void *arg) if (err) goto err_map; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt->i915, id) { struct i915_request *lo, *hi; struct igt_live_test t; u32 *cs; @@ -543,7 +543,7 @@ static int live_busywait_preempt(void *arg) if (!intel_engine_can_store_dword(engine)) continue; - if (igt_live_test_begin(&t, i915, __func__, engine->name)) { + if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { err = -EIO; goto err_vma; } @@ -623,7 +623,7 @@ static int live_busywait_preempt(void *arg) i915_request_add(hi); if (i915_request_wait(lo, 0, HZ / 5) < 0) { - struct drm_printer p = drm_info_printer(i915->drm.dev); + struct drm_printer p = drm_info_printer(gt->i915->drm.dev); pr_err("%s: Failed to preempt semaphore busywait!\n", engine->name); @@ -631,7 +631,7 @@ static int live_busywait_preempt(void *arg) intel_engine_dump(engine, &p, "%s\n", engine->name); GEM_TRACE_DUMP(); - intel_gt_set_wedged(&i915->gt); + intel_gt_set_wedged(gt); err = -EIO; goto err_vma; } @@ -677,45 +677,45 @@ spinner_create_request(struct igt_spinner *spin, static int live_preempt(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct i915_gem_context *ctx_hi, *ctx_lo; struct igt_spinner spin_hi, spin_lo; struct intel_engine_cs *engine; enum intel_engine_id id; int err = -ENOMEM; - if (!HAS_LOGICAL_RING_PREEMPTION(i915)) + if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) return 0; - if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION)) + if (!(gt->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION)) pr_err("Logical preemption supported, but not exposed\n"); - if (igt_spinner_init(&spin_hi, &i915->gt)) + if (igt_spinner_init(&spin_hi, gt)) return -ENOMEM; - if (igt_spinner_init(&spin_lo, &i915->gt)) + if (igt_spinner_init(&spin_lo, gt)) goto err_spin_hi; - ctx_hi = kernel_context(i915); + ctx_hi = kernel_context(gt->i915); if (!ctx_hi) goto err_spin_lo; ctx_hi->sched.priority = I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); - ctx_lo = kernel_context(i915); + ctx_lo = kernel_context(gt->i915); if (!ctx_lo) goto err_ctx_hi; ctx_lo->sched.priority = I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt->i915, id) { struct igt_live_test t; struct i915_request *rq; if (!intel_engine_has_preemption(engine)) continue; - if (igt_live_test_begin(&t, i915, __func__, engine->name)) { + if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { err = -EIO; goto err_ctx_lo; } @@ -731,7 +731,7 @@ static int live_preempt(void *arg) if (!igt_wait_for_spinner(&spin_lo, rq)) { GEM_TRACE("lo spinner failed to start\n"); GEM_TRACE_DUMP(); - intel_gt_set_wedged(&i915->gt); + intel_gt_set_wedged(gt); err = -EIO; goto err_ctx_lo; } @@ -748,7 +748,7 @@ static int live_preempt(void *arg) if (!igt_wait_for_spinner(&spin_hi, rq)) { GEM_TRACE("hi spinner failed to start\n"); GEM_TRACE_DUMP(); - intel_gt_set_wedged(&i915->gt); + intel_gt_set_wedged(gt); err = -EIO; goto err_ctx_lo; } @@ -776,7 +776,7 @@ static int live_preempt(void *arg) static int live_late_preempt(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct i915_gem_context *ctx_hi, *ctx_lo; struct igt_spinner spin_hi, spin_lo; struct intel_engine_cs *engine; @@ -784,34 +784,34 @@ static int live_late_preempt(void *arg) enum intel_engine_id id; int err = -ENOMEM; - if (!HAS_LOGICAL_RING_PREEMPTION(i915)) + if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) return 0; - if (igt_spinner_init(&spin_hi, &i915->gt)) + if (igt_spinner_init(&spin_hi, gt)) return -ENOMEM; - if (igt_spinner_init(&spin_lo, &i915->gt)) + if (igt_spinner_init(&spin_lo, gt)) goto err_spin_hi; - ctx_hi = kernel_context(i915); + ctx_hi = kernel_context(gt->i915); if (!ctx_hi) goto err_spin_lo; - ctx_lo = kernel_context(i915); + ctx_lo = kernel_context(gt->i915); if (!ctx_lo) goto err_ctx_hi; /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */ ctx_lo->sched.priority = I915_USER_PRIORITY(1); - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt->i915, id) { struct igt_live_test t; struct i915_request *rq; if (!intel_engine_has_preemption(engine)) continue; - if (igt_live_test_begin(&t, i915, __func__, engine->name)) { + if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { err = -EIO; goto err_ctx_lo; } @@ -875,7 +875,7 @@ static int live_late_preempt(void *arg) err_wedged: igt_spinner_end(&spin_hi); igt_spinner_end(&spin_lo); - intel_gt_set_wedged(&i915->gt); + intel_gt_set_wedged(gt); err = -EIO; goto err_ctx_lo; } @@ -885,14 +885,13 @@ struct preempt_client { struct i915_gem_context *ctx; }; -static int preempt_client_init(struct drm_i915_private *i915, - struct preempt_client *c) +static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c) { - c->ctx = kernel_context(i915); + c->ctx = kernel_context(gt->i915); if (!c->ctx) return -ENOMEM; - if (igt_spinner_init(&c->spin, &i915->gt)) + if (igt_spinner_init(&c->spin, gt)) goto err_ctx; return 0; @@ -910,7 +909,7 @@ static void preempt_client_fini(struct preempt_client *c) static int live_nopreempt(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct intel_engine_cs *engine; struct preempt_client a, b; enum intel_engine_id id; @@ -921,16 +920,16 @@ static int live_nopreempt(void *arg) * that may be being observed and not want to be interrupted. */ - if (!HAS_LOGICAL_RING_PREEMPTION(i915)) + if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) return 0; - if (preempt_client_init(i915, &a)) + if (preempt_client_init(gt, &a)) return -ENOMEM; - if (preempt_client_init(i915, &b)) + if (preempt_client_init(gt, &b)) goto err_client_a; b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX); - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt->i915, id) { struct i915_request *rq_a, *rq_b; if (!intel_engine_has_preemption(engine)) @@ -990,7 +989,7 @@ static int live_nopreempt(void *arg) goto err_wedged; } - if (igt_flush_test(i915)) + if (igt_flush_test(gt->i915)) goto err_wedged; } @@ -1004,14 +1003,14 @@ static int live_nopreempt(void *arg) err_wedged: igt_spinner_end(&b.spin); igt_spinner_end(&a.spin); - intel_gt_set_wedged(&i915->gt); + intel_gt_set_wedged(gt); err = -EIO; goto err_client_b; } static int live_suppress_self_preempt(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct intel_engine_cs *engine; struct i915_sched_attr attr = { .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX) @@ -1027,28 +1026,28 @@ static int live_suppress_self_preempt(void *arg) * completion event. */ - if (!HAS_LOGICAL_RING_PREEMPTION(i915)) + if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) return 0; - if (USES_GUC_SUBMISSION(i915)) + if (USES_GUC_SUBMISSION(gt->i915)) return 0; /* presume black blox */ - if (intel_vgpu_active(i915)) + if (intel_vgpu_active(gt->i915)) return 0; /* GVT forces single port & request submission */ - if (preempt_client_init(i915, &a)) + if (preempt_client_init(gt, &a)) return -ENOMEM; - if (preempt_client_init(i915, &b)) + if (preempt_client_init(gt, &b)) goto err_client_a; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt->i915, id) { struct i915_request *rq_a, *rq_b; int depth; if (!intel_engine_has_preemption(engine)) continue; - if (igt_flush_test(i915)) + if (igt_flush_test(gt->i915)) goto err_wedged; intel_engine_pm_get(engine); @@ -1109,7 +1108,7 @@ static int live_suppress_self_preempt(void *arg) } intel_engine_pm_put(engine); - if (igt_flush_test(i915)) + if (igt_flush_test(gt->i915)) goto err_wedged; } @@ -1123,7 +1122,7 @@ static int live_suppress_self_preempt(void *arg) err_wedged: igt_spinner_end(&b.spin); igt_spinner_end(&a.spin); - intel_gt_set_wedged(&i915->gt); + intel_gt_set_wedged(gt); err = -EIO; goto err_client_b; } @@ -1183,7 +1182,7 @@ static void dummy_request_free(struct i915_request *dummy) static int live_suppress_wait_preempt(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct preempt_client client[4]; struct intel_engine_cs *engine; enum intel_engine_id id; @@ -1196,19 +1195,19 @@ static int live_suppress_wait_preempt(void *arg) * not needlessly generate preempt-to-idle cycles. */ - if (!HAS_LOGICAL_RING_PREEMPTION(i915)) + if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) return 0; - if (preempt_client_init(i915, &client[0])) /* ELSP[0] */ + if (preempt_client_init(gt, &client[0])) /* ELSP[0] */ return -ENOMEM; - if (preempt_client_init(i915, &client[1])) /* ELSP[1] */ + if (preempt_client_init(gt, &client[1])) /* ELSP[1] */ goto err_client_0; - if (preempt_client_init(i915, &client[2])) /* head of queue */ + if (preempt_client_init(gt, &client[2])) /* head of queue */ goto err_client_1; - if (preempt_client_init(i915, &client[3])) /* bystander */ + if (preempt_client_init(gt, &client[3])) /* bystander */ goto err_client_2; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt->i915, id) { int depth; if (!intel_engine_has_preemption(engine)) @@ -1263,7 +1262,7 @@ static int live_suppress_wait_preempt(void *arg) for (i = 0; i < ARRAY_SIZE(client); i++) igt_spinner_end(&client[i].spin); - if (igt_flush_test(i915)) + if (igt_flush_test(gt->i915)) goto err_wedged; if (engine->execlists.preempt_hang.count) { @@ -1291,14 +1290,14 @@ static int live_suppress_wait_preempt(void *arg) err_wedged: for (i = 0; i < ARRAY_SIZE(client); i++) igt_spinner_end(&client[i].spin); - intel_gt_set_wedged(&i915->gt); + intel_gt_set_wedged(gt); err = -EIO; goto err_client_3; } static int live_chain_preempt(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct intel_engine_cs *engine; struct preempt_client hi, lo; enum intel_engine_id id; @@ -1310,16 +1309,16 @@ static int live_chain_preempt(void *arg) * the previously submitted spinner in B. */ - if (!HAS_LOGICAL_RING_PREEMPTION(i915)) + if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) return 0; - if (preempt_client_init(i915, &hi)) + if (preempt_client_init(gt, &hi)) return -ENOMEM; - if (preempt_client_init(i915, &lo)) + if (preempt_client_init(gt, &lo)) goto err_client_hi; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt->i915, id) { struct i915_sched_attr attr = { .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), }; @@ -1350,7 +1349,7 @@ static int live_chain_preempt(void *arg) goto err_wedged; } - if (igt_live_test_begin(&t, i915, __func__, engine->name)) { + if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { err = -EIO; goto err_wedged; } @@ -1388,7 +1387,7 @@ static int live_chain_preempt(void *arg) igt_spinner_end(&hi.spin); if (i915_request_wait(rq, 0, HZ / 5) < 0) { struct drm_printer p = - drm_info_printer(i915->drm.dev); + drm_info_printer(gt->i915->drm.dev); pr_err("Failed to preempt over chain of %d\n", count); @@ -1404,7 +1403,7 @@ static int live_chain_preempt(void *arg) i915_request_add(rq); if (i915_request_wait(rq, 0, HZ / 5) < 0) { struct drm_printer p = - drm_info_printer(i915->drm.dev); + drm_info_printer(gt->i915->drm.dev); pr_err("Failed to flush low priority chain of %d requests\n", count); @@ -1430,45 +1429,45 @@ static int live_chain_preempt(void *arg) err_wedged: igt_spinner_end(&hi.spin); igt_spinner_end(&lo.spin); - intel_gt_set_wedged(&i915->gt); + intel_gt_set_wedged(gt); err = -EIO; goto err_client_lo; } static int live_preempt_hang(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct i915_gem_context *ctx_hi, *ctx_lo; struct igt_spinner spin_hi, spin_lo; struct intel_engine_cs *engine; enum intel_engine_id id; int err = -ENOMEM; - if (!HAS_LOGICAL_RING_PREEMPTION(i915)) + if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) return 0; - if (!intel_has_reset_engine(&i915->gt)) + if (!intel_has_reset_engine(gt)) return 0; - if (igt_spinner_init(&spin_hi, &i915->gt)) + if (igt_spinner_init(&spin_hi, gt)) return -ENOMEM; - if (igt_spinner_init(&spin_lo, &i915->gt)) + if (igt_spinner_init(&spin_lo, gt)) goto err_spin_hi; - ctx_hi = kernel_context(i915); + ctx_hi = kernel_context(gt->i915); if (!ctx_hi) goto err_spin_lo; ctx_hi->sched.priority = I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); - ctx_lo = kernel_context(i915); + ctx_lo = kernel_context(gt->i915); if (!ctx_lo) goto err_ctx_hi; ctx_lo->sched.priority = I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt->i915, id) { struct i915_request *rq; if (!intel_engine_has_preemption(engine)) @@ -1485,7 +1484,7 @@ static int live_preempt_hang(void *arg) if (!igt_wait_for_spinner(&spin_lo, rq)) { GEM_TRACE("lo spinner failed to start\n"); GEM_TRACE_DUMP(); - intel_gt_set_wedged(&i915->gt); + intel_gt_set_wedged(gt); err = -EIO; goto err_ctx_lo; } @@ -1507,28 +1506,28 @@ static int live_preempt_hang(void *arg) HZ / 10)) { pr_err("Preemption did not occur within timeout!"); GEM_TRACE_DUMP(); - intel_gt_set_wedged(&i915->gt); + intel_gt_set_wedged(gt); err = -EIO; goto err_ctx_lo; } - set_bit(I915_RESET_ENGINE + id, &i915->gt.reset.flags); + set_bit(I915_RESET_ENGINE + id, >->reset.flags); intel_engine_reset(engine, NULL); - clear_bit(I915_RESET_ENGINE + id, &i915->gt.reset.flags); + clear_bit(I915_RESET_ENGINE + id, >->reset.flags); engine->execlists.preempt_hang.inject_hang = false; if (!igt_wait_for_spinner(&spin_hi, rq)) { GEM_TRACE("hi spinner failed to start\n"); GEM_TRACE_DUMP(); - intel_gt_set_wedged(&i915->gt); + intel_gt_set_wedged(gt); err = -EIO; goto err_ctx_lo; } igt_spinner_end(&spin_hi); igt_spinner_end(&spin_lo); - if (igt_flush_test(i915)) { + if (igt_flush_test(gt->i915)) { err = -EIO; goto err_ctx_lo; } @@ -1557,7 +1556,7 @@ static int random_priority(struct rnd_state *rnd) } struct preempt_smoke { - struct drm_i915_private *i915; + struct intel_gt *gt; struct i915_gem_context **contexts; struct intel_engine_cs *engine; struct drm_i915_gem_object *batch; @@ -1657,7 +1656,7 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) unsigned long count; int err = 0; - for_each_engine(engine, smoke->i915, id) { + for_each_engine(engine, smoke->gt->i915, id) { arg[id] = *smoke; arg[id].engine = engine; if (!(flags & BATCH)) @@ -1674,7 +1673,7 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) } count = 0; - for_each_engine(engine, smoke->i915, id) { + for_each_engine(engine, smoke->gt->i915, id) { int status; if (IS_ERR_OR_NULL(tsk[id])) @@ -1691,7 +1690,7 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n", count, flags, - RUNTIME_INFO(smoke->i915)->num_engines, smoke->ncontext); + RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext); return 0; } @@ -1703,7 +1702,7 @@ static int smoke_random(struct preempt_smoke *smoke, unsigned int flags) count = 0; do { - for_each_engine(smoke->engine, smoke->i915, id) { + for_each_engine(smoke->engine, smoke->gt->i915, id) { struct i915_gem_context *ctx = smoke_context(smoke); int err; @@ -1719,14 +1718,14 @@ static int smoke_random(struct preempt_smoke *smoke, unsigned int flags) pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n", count, flags, - RUNTIME_INFO(smoke->i915)->num_engines, smoke->ncontext); + RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext); return 0; } static int live_preempt_smoke(void *arg) { struct preempt_smoke smoke = { - .i915 = arg, + .gt = arg, .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed), .ncontext = 1024, }; @@ -1736,7 +1735,7 @@ static int live_preempt_smoke(void *arg) u32 *cs; int n; - if (!HAS_LOGICAL_RING_PREEMPTION(smoke.i915)) + if (!HAS_LOGICAL_RING_PREEMPTION(smoke.gt->i915)) return 0; smoke.contexts = kmalloc_array(smoke.ncontext, @@ -1745,7 +1744,8 @@ static int live_preempt_smoke(void *arg) if (!smoke.contexts) return -ENOMEM; - smoke.batch = i915_gem_object_create_internal(smoke.i915, PAGE_SIZE); + smoke.batch = + i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE); if (IS_ERR(smoke.batch)) { err = PTR_ERR(smoke.batch); goto err_free; @@ -1762,13 +1762,13 @@ static int live_preempt_smoke(void *arg) i915_gem_object_flush_map(smoke.batch); i915_gem_object_unpin_map(smoke.batch); - if (igt_live_test_begin(&t, smoke.i915, __func__, "all")) { + if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) { err = -EIO; goto err_batch; } for (n = 0; n < smoke.ncontext; n++) { - smoke.contexts[n] = kernel_context(smoke.i915); + smoke.contexts[n] = kernel_context(smoke.gt->i915); if (!smoke.contexts[n]) goto err_ctx; } @@ -1801,7 +1801,7 @@ static int live_preempt_smoke(void *arg) return err; } -static int nop_virtual_engine(struct drm_i915_private *i915, +static int nop_virtual_engine(struct intel_gt *gt, struct intel_engine_cs **siblings, unsigned int nsibling, unsigned int nctx, @@ -1820,7 +1820,7 @@ static int nop_virtual_engine(struct drm_i915_private *i915, GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ctx)); for (n = 0; n < nctx; n++) { - ctx[n] = kernel_context(i915); + ctx[n] = kernel_context(gt->i915); if (!ctx[n]) { err = -ENOMEM; nctx = n; @@ -1845,7 +1845,7 @@ static int nop_virtual_engine(struct drm_i915_private *i915, } } - err = igt_live_test_begin(&t, i915, __func__, ve[0]->engine->name); + err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name); if (err) goto out; @@ -1892,7 +1892,7 @@ static int nop_virtual_engine(struct drm_i915_private *i915, request[nc]->fence.context, request[nc]->fence.seqno); GEM_TRACE_DUMP(); - intel_gt_set_wedged(&i915->gt); + intel_gt_set_wedged(gt); break; } } @@ -1914,7 +1914,7 @@ static int nop_virtual_engine(struct drm_i915_private *i915, prime, div64_u64(ktime_to_ns(times[1]), prime)); out: - if (igt_flush_test(i915)) + if (igt_flush_test(gt->i915)) err = -EIO; for (nc = 0; nc < nctx; nc++) { @@ -1927,19 +1927,18 @@ static int nop_virtual_engine(struct drm_i915_private *i915, static int live_virtual_engine(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; struct intel_engine_cs *engine; - struct intel_gt *gt = &i915->gt; enum intel_engine_id id; unsigned int class, inst; int err; - if (USES_GUC_SUBMISSION(i915)) + if (USES_GUC_SUBMISSION(gt->i915)) return 0; - for_each_engine(engine, i915, id) { - err = nop_virtual_engine(i915, &engine, 1, 1, 0); + for_each_engine(engine, gt->i915, id) { + err = nop_virtual_engine(gt, &engine, 1, 1, 0); if (err) { pr_err("Failed to wrap engine %s: err=%d\n", engine->name, err); @@ -1961,13 +1960,13 @@ static int live_virtual_engine(void *arg) continue; for (n = 1; n <= nsibling + 1; n++) { - err = nop_virtual_engine(i915, siblings, nsibling, + err = nop_virtual_engine(gt, siblings, nsibling, n, 0); if (err) return err; } - err = nop_virtual_engine(i915, siblings, nsibling, n, CHAIN); + err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN); if (err) return err; } @@ -1975,7 +1974,7 @@ static int live_virtual_engine(void *arg) return 0; } -static int mask_virtual_engine(struct drm_i915_private *i915, +static int mask_virtual_engine(struct intel_gt *gt, struct intel_engine_cs **siblings, unsigned int nsibling) { @@ -1991,7 +1990,7 @@ static int mask_virtual_engine(struct drm_i915_private *i915, * restrict it to our desired engine within the virtual engine. */ - ctx = kernel_context(i915); + ctx = kernel_context(gt->i915); if (!ctx) return -ENOMEM; @@ -2005,7 +2004,7 @@ static int mask_virtual_engine(struct drm_i915_private *i915, if (err) goto out_put; - err = igt_live_test_begin(&t, i915, __func__, ve->engine->name); + err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name); if (err) goto out_unpin; @@ -2036,7 +2035,7 @@ static int mask_virtual_engine(struct drm_i915_private *i915, request[n]->fence.context, request[n]->fence.seqno); GEM_TRACE_DUMP(); - intel_gt_set_wedged(&i915->gt); + intel_gt_set_wedged(gt); err = -EIO; goto out; } @@ -2052,7 +2051,7 @@ static int mask_virtual_engine(struct drm_i915_private *i915, err = igt_live_test_end(&t); out: - if (igt_flush_test(i915)) + if (igt_flush_test(gt->i915)) err = -EIO; for (n = 0; n < nsibling; n++) @@ -2069,13 +2068,12 @@ static int mask_virtual_engine(struct drm_i915_private *i915, static int live_virtual_mask(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; - struct intel_gt *gt = &i915->gt; unsigned int class, inst; int err; - if (USES_GUC_SUBMISSION(i915)) + if (USES_GUC_SUBMISSION(gt->i915)) return 0; for (class = 0; class <= MAX_ENGINE_CLASS; class++) { @@ -2091,7 +2089,7 @@ static int live_virtual_mask(void *arg) if (nsibling < 2) continue; - err = mask_virtual_engine(i915, siblings, nsibling); + err = mask_virtual_engine(gt, siblings, nsibling); if (err) return err; } @@ -2099,7 +2097,7 @@ static int live_virtual_mask(void *arg) return 0; } -static int preserved_virtual_engine(struct drm_i915_private *i915, +static int preserved_virtual_engine(struct intel_gt *gt, struct intel_engine_cs **siblings, unsigned int nsibling) { @@ -2112,7 +2110,7 @@ static int preserved_virtual_engine(struct drm_i915_private *i915, int err = 0; u32 *cs; - ctx = kernel_context(i915); + ctx = kernel_context(gt->i915); if (!ctx) return -ENOMEM; @@ -2132,7 +2130,7 @@ static int preserved_virtual_engine(struct drm_i915_private *i915, if (err) goto out_put; - err = igt_live_test_begin(&t, i915, __func__, ve->engine->name); + err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name); if (err) goto out_unpin; @@ -2212,9 +2210,8 @@ static int preserved_virtual_engine(struct drm_i915_private *i915, static int live_virtual_preserved(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; - struct intel_gt *gt = &i915->gt; unsigned int class, inst; /* @@ -2223,11 +2220,11 @@ static int live_virtual_preserved(void *arg) * are preserved. */ - if (USES_GUC_SUBMISSION(i915)) + if (USES_GUC_SUBMISSION(gt->i915)) return 0; /* As we use CS_GPR we cannot run before they existed on all engines. */ - if (INTEL_GEN(i915) < 9) + if (INTEL_GEN(gt->i915) < 9) return 0; for (class = 0; class <= MAX_ENGINE_CLASS; class++) { @@ -2243,7 +2240,7 @@ static int live_virtual_preserved(void *arg) if (nsibling < 2) continue; - err = preserved_virtual_engine(i915, siblings, nsibling); + err = preserved_virtual_engine(gt, siblings, nsibling); if (err) return err; } @@ -2251,7 +2248,7 @@ static int live_virtual_preserved(void *arg) return 0; } -static int bond_virtual_engine(struct drm_i915_private *i915, +static int bond_virtual_engine(struct intel_gt *gt, unsigned int class, struct intel_engine_cs **siblings, unsigned int nsibling, @@ -2267,13 +2264,13 @@ static int bond_virtual_engine(struct drm_i915_private *i915, GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1); - ctx = kernel_context(i915); + ctx = kernel_context(gt->i915); if (!ctx) return -ENOMEM; err = 0; rq[0] = ERR_PTR(-ENOMEM); - for_each_engine(master, i915, id) { + for_each_engine(master, gt->i915, id) { struct i915_sw_fence fence = {}; if (master->class == class) @@ -2378,7 +2375,7 @@ static int bond_virtual_engine(struct drm_i915_private *i915, out: for (n = 0; !IS_ERR(rq[n]); n++) i915_request_put(rq[n]); - if (igt_flush_test(i915)) + if (igt_flush_test(gt->i915)) err = -EIO; kernel_context_close(ctx); @@ -2395,13 +2392,12 @@ static int live_virtual_bond(void *arg) { "schedule", BOND_SCHEDULE }, { }, }; - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; - struct intel_gt *gt = &i915->gt; unsigned int class, inst; int err; - if (USES_GUC_SUBMISSION(i915)) + if (USES_GUC_SUBMISSION(gt->i915)) return 0; for (class = 0; class <= MAX_ENGINE_CLASS; class++) { @@ -2420,7 +2416,7 @@ static int live_virtual_bond(void *arg) continue; for (p = phases; p->name; p++) { - err = bond_virtual_engine(i915, + err = bond_virtual_engine(gt, class, siblings, nsibling, p->flags); if (err) { @@ -2462,7 +2458,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) if (intel_gt_is_wedged(&i915->gt)) return 0; - return i915_live_subtests(tests, i915); + return intel_gt_live_subtests(tests, &i915->gt); } static void hexdump(const void *buf, size_t len) From 3b05c4f832fa9beeb8da4bafb3c2d115b77a8d5b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 16 Oct 2019 12:53:11 +0100 Subject: [PATCH 109/159] drm/i915/selftests: Teach guc to take intel_gt as its argument The guc selftests are hardware^W firmare centric and so want to use the gt as its target. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191016115311.12894-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/uc/selftest_guc.c | 42 ++++++++--------------- 1 file changed, 14 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c index f927f851aadf..d8a80388bd31 100644 --- a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c @@ -108,22 +108,15 @@ static bool client_doorbell_in_sync(struct intel_guc_client *client) * validating that the doorbells status expected by the driver matches what the * GuC/HW have. */ -static int igt_guc_clients(void *args) +static int igt_guc_clients(void *arg) { - struct drm_i915_private *dev_priv = args; + struct intel_gt *gt = arg; + struct intel_guc *guc = >->uc.guc; intel_wakeref_t wakeref; - struct intel_guc *guc; int err = 0; - GEM_BUG_ON(!HAS_GT_UC(dev_priv)); - wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); - - guc = &dev_priv->gt.uc.guc; - if (!guc) { - pr_err("No guc object!\n"); - err = -EINVAL; - goto unlock; - } + GEM_BUG_ON(!HAS_GT_UC(gt->i915)); + wakeref = intel_runtime_pm_get(gt->uncore->rpm); err = check_all_doorbells(guc); if (err) @@ -188,7 +181,7 @@ static int igt_guc_clients(void *args) guc_clients_create(guc); guc_clients_enable(guc); unlock: - intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); + intel_runtime_pm_put(gt->uncore->rpm, wakeref); return err; } @@ -199,21 +192,14 @@ static int igt_guc_clients(void *args) */ static int igt_guc_doorbells(void *arg) { - struct drm_i915_private *dev_priv = arg; + struct intel_gt *gt = arg; + struct intel_guc *guc = >->uc.guc; intel_wakeref_t wakeref; - struct intel_guc *guc; int i, err = 0; u16 db_id; - GEM_BUG_ON(!HAS_GT_UC(dev_priv)); - wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); - - guc = &dev_priv->gt.uc.guc; - if (!guc) { - pr_err("No guc object!\n"); - err = -EINVAL; - goto unlock; - } + GEM_BUG_ON(!HAS_GT_UC(gt->i915)); + wakeref = intel_runtime_pm_get(gt->uncore->rpm); err = check_all_doorbells(guc); if (err) @@ -295,19 +281,19 @@ static int igt_guc_doorbells(void *arg) guc_client_free(clients[i]); } unlock: - intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); + intel_runtime_pm_put(gt->uncore->rpm, wakeref); return err; } -int intel_guc_live_selftest(struct drm_i915_private *dev_priv) +int intel_guc_live_selftest(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_guc_clients), SUBTEST(igt_guc_doorbells), }; - if (!USES_GUC_SUBMISSION(dev_priv)) + if (!USES_GUC_SUBMISSION(i915)) return 0; - return i915_subtests(tests, dev_priv); + return intel_gt_live_subtests(tests, &i915->gt); } From bb3d4c9d636b3304da95facd172323f9e86e1262 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 16 Oct 2019 12:49:02 +0100 Subject: [PATCH 110/159] drm/i915/selftests: Teach workarounds to take intel_gt as its argument The workarounds selftests are hardware centric and so want to use the gt as its target. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191016114902.24388-1-chris@chris-wilson.co.uk --- .../gpu/drm/i915/gt/selftest_workarounds.c | 128 +++++++++--------- 1 file changed, 65 insertions(+), 63 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index dc11f7ad50a2..8856c6c46cc4 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -58,7 +58,7 @@ static int request_add_spin(struct i915_request *rq, struct igt_spinner *spin) } static void -reference_lists_init(struct drm_i915_private *i915, struct wa_lists *lists) +reference_lists_init(struct intel_gt *gt, struct wa_lists *lists) { struct intel_engine_cs *engine; enum intel_engine_id id; @@ -66,10 +66,10 @@ reference_lists_init(struct drm_i915_private *i915, struct wa_lists *lists) memset(lists, 0, sizeof(*lists)); wa_init_start(&lists->gt_wa_list, "GT_REF", "global"); - gt_init_workarounds(i915, &lists->gt_wa_list); + gt_init_workarounds(gt->i915, &lists->gt_wa_list); wa_init_finish(&lists->gt_wa_list); - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt->i915, id) { struct i915_wa_list *wal = &lists->engine[id].wa_list; wa_init_start(wal, "REF", engine->name); @@ -83,12 +83,12 @@ reference_lists_init(struct drm_i915_private *i915, struct wa_lists *lists) } static void -reference_lists_fini(struct drm_i915_private *i915, struct wa_lists *lists) +reference_lists_fini(struct intel_gt *gt, struct wa_lists *lists) { struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, i915, id) + for_each_engine(engine, gt->i915, id) intel_wa_list_free(&lists->engine[id].wa_list); intel_wa_list_free(&lists->gt_wa_list); @@ -215,10 +215,10 @@ static int check_whitelist(struct i915_gem_context *ctx, err = 0; i915_gem_object_lock(results); - intel_wedge_on_timeout(&wedge, &ctx->i915->gt, HZ / 5) /* safety net! */ + intel_wedge_on_timeout(&wedge, engine->gt, HZ / 5) /* safety net! */ err = i915_gem_object_set_to_cpu_domain(results, false); i915_gem_object_unlock(results); - if (intel_gt_is_wedged(&ctx->i915->gt)) + if (intel_gt_is_wedged(engine->gt)) err = -EIO; if (err) goto out_put; @@ -605,7 +605,7 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx, if (err) { pr_err("%s: Futzing %x timedout; cancelling test\n", engine->name, reg); - intel_gt_set_wedged(&ctx->i915->gt); + intel_gt_set_wedged(engine->gt); goto out_batch; } @@ -704,7 +704,7 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx, static int live_dirty_whitelist(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct intel_engine_cs *engine; struct i915_gem_context *ctx; enum intel_engine_id id; @@ -713,20 +713,20 @@ static int live_dirty_whitelist(void *arg) /* Can the user write to the whitelisted registers? */ - if (INTEL_GEN(i915) < 7) /* minimum requirement for LRI, SRM, LRM */ + if (INTEL_GEN(gt->i915) < 7) /* minimum requirement for LRI, SRM, LRM */ return 0; - file = mock_file(i915); + file = mock_file(gt->i915); if (IS_ERR(file)) return PTR_ERR(file); - ctx = live_context(i915, file); + ctx = live_context(gt->i915, file); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto out_file; } - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt->i915, id) { if (engine->whitelist.count == 0) continue; @@ -736,41 +736,43 @@ static int live_dirty_whitelist(void *arg) } out_file: - mock_file_free(i915, file); + mock_file_free(gt->i915, file); return err; } static int live_reset_whitelist(void *arg) { - struct drm_i915_private *i915 = arg; - struct intel_engine_cs *engine = i915->engine[RCS0]; + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; int err = 0; /* If we reset the gpu, we should not lose the RING_NONPRIV */ + igt_global_reset_lock(gt); - if (!engine || engine->whitelist.count == 0) - return 0; + for_each_engine(engine, gt->i915, id) { + if (engine->whitelist.count == 0) + continue; - igt_global_reset_lock(&i915->gt); + if (intel_has_reset_engine(gt)) { + err = check_whitelist_across_reset(engine, + do_engine_reset, + "engine"); + if (err) + goto out; + } - if (intel_has_reset_engine(&i915->gt)) { - err = check_whitelist_across_reset(engine, - do_engine_reset, - "engine"); - if (err) - goto out; - } - - if (intel_has_gpu_reset(&i915->gt)) { - err = check_whitelist_across_reset(engine, - do_device_reset, - "device"); - if (err) - goto out; + if (intel_has_gpu_reset(gt)) { + err = check_whitelist_across_reset(engine, + do_device_reset, + "device"); + if (err) + goto out; + } } out: - igt_global_reset_unlock(&i915->gt); + igt_global_reset_unlock(gt); return err; } @@ -996,7 +998,7 @@ check_whitelisted_registers(struct intel_engine_cs *engine, static int live_isolated_whitelist(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct { struct i915_gem_context *ctx; struct i915_vma *scratch[2]; @@ -1010,17 +1012,14 @@ static int live_isolated_whitelist(void *arg) * invisible to a second context. */ - if (!intel_engines_has_context_isolation(i915)) - return 0; - - if (!i915->kernel_context->vm) + if (!intel_engines_has_context_isolation(gt->i915)) return 0; for (i = 0; i < ARRAY_SIZE(client); i++) { struct i915_address_space *vm; struct i915_gem_context *c; - c = kernel_context(i915); + c = kernel_context(gt->i915); if (IS_ERR(c)) { err = PTR_ERR(c); goto err; @@ -1049,7 +1048,10 @@ static int live_isolated_whitelist(void *arg) i915_vm_put(vm); } - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt->i915, id) { + if (!engine->kernel_context->vm) + continue; + if (!whitelist_writable_count(engine)) continue; @@ -1103,7 +1105,7 @@ static int live_isolated_whitelist(void *arg) kernel_context_close(client[i].ctx); } - if (igt_flush_test(i915)) + if (igt_flush_test(gt->i915)) err = -EIO; return err; @@ -1138,16 +1140,16 @@ verify_wa_lists(struct i915_gem_context *ctx, struct wa_lists *lists, static int live_gpu_reset_workarounds(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct i915_gem_context *ctx; intel_wakeref_t wakeref; struct wa_lists lists; bool ok; - if (!intel_has_gpu_reset(&i915->gt)) + if (!intel_has_gpu_reset(gt)) return 0; - ctx = kernel_context(i915); + ctx = kernel_context(gt->i915); if (IS_ERR(ctx)) return PTR_ERR(ctx); @@ -1155,25 +1157,25 @@ live_gpu_reset_workarounds(void *arg) pr_info("Verifying after GPU reset...\n"); - igt_global_reset_lock(&i915->gt); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); + igt_global_reset_lock(gt); + wakeref = intel_runtime_pm_get(gt->uncore->rpm); - reference_lists_init(i915, &lists); + reference_lists_init(gt, &lists); ok = verify_wa_lists(ctx, &lists, "before reset"); if (!ok) goto out; - intel_gt_reset(&i915->gt, ALL_ENGINES, "live_workarounds"); + intel_gt_reset(gt, ALL_ENGINES, "live_workarounds"); ok = verify_wa_lists(ctx, &lists, "after reset"); out: i915_gem_context_unlock_engines(ctx); kernel_context_close(ctx); - reference_lists_fini(i915, &lists); - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - igt_global_reset_unlock(&i915->gt); + reference_lists_fini(gt, &lists); + intel_runtime_pm_put(gt->uncore->rpm, wakeref); + igt_global_reset_unlock(gt); return ok ? 0 : -ESRCH; } @@ -1181,7 +1183,7 @@ live_gpu_reset_workarounds(void *arg) static int live_engine_reset_workarounds(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct i915_gem_engines_iter it; struct i915_gem_context *ctx; struct intel_context *ce; @@ -1191,17 +1193,17 @@ live_engine_reset_workarounds(void *arg) struct wa_lists lists; int ret = 0; - if (!intel_has_reset_engine(&i915->gt)) + if (!intel_has_reset_engine(gt)) return 0; - ctx = kernel_context(i915); + ctx = kernel_context(gt->i915); if (IS_ERR(ctx)) return PTR_ERR(ctx); - igt_global_reset_lock(&i915->gt); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); + igt_global_reset_lock(gt); + wakeref = intel_runtime_pm_get(gt->uncore->rpm); - reference_lists_init(i915, &lists); + reference_lists_init(gt, &lists); for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { struct intel_engine_cs *engine = ce->engine; @@ -1254,12 +1256,12 @@ live_engine_reset_workarounds(void *arg) } err: i915_gem_context_unlock_engines(ctx); - reference_lists_fini(i915, &lists); - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - igt_global_reset_unlock(&i915->gt); + reference_lists_fini(gt, &lists); + intel_runtime_pm_put(gt->uncore->rpm, wakeref); + igt_global_reset_unlock(gt); kernel_context_close(ctx); - igt_flush_test(i915); + igt_flush_test(gt->i915); return ret; } @@ -1277,5 +1279,5 @@ int intel_workarounds_live_selftests(struct drm_i915_private *i915) if (intel_gt_is_wedged(&i915->gt)) return 0; - return i915_subtests(tests, i915); + return intel_gt_live_subtests(tests, &i915->gt); } From 5f65d5a6e4bddd8212a101fd566e1554cfd5e355 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 16 Oct 2019 12:38:40 +0100 Subject: [PATCH 111/159] drm/i915/selftests: Teach timelines to take intel_gt as its argument The timelines selftests are [mostly] hardware centric and so want to use the gt as its target. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191016113840.1106-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/selftest_timeline.c | 48 +++++++++++---------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c index d6df40cdc8a6..473a56b0ae46 100644 --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c @@ -35,7 +35,7 @@ static unsigned long hwsp_cacheline(struct intel_timeline *tl) #define CACHELINES_PER_PAGE (PAGE_SIZE / CACHELINE_BYTES) struct mock_hwsp_freelist { - struct drm_i915_private *i915; + struct intel_gt *gt; struct radix_tree_root cachelines; struct intel_timeline **history; unsigned long count, max; @@ -68,7 +68,7 @@ static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state, unsigned long cacheline; int err; - tl = intel_timeline_create(&state->i915->gt, NULL); + tl = intel_timeline_create(state->gt, NULL); if (IS_ERR(tl)) return PTR_ERR(tl); @@ -106,6 +106,7 @@ static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state, static int mock_hwsp_freelist(void *arg) { struct mock_hwsp_freelist state; + struct drm_i915_private *i915; const struct { const char *name; unsigned int flags; @@ -117,12 +118,14 @@ static int mock_hwsp_freelist(void *arg) unsigned int na; int err = 0; + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL); state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed); - state.i915 = mock_gem_device(); - if (!state.i915) - return -ENOMEM; + state.gt = &i915->gt; /* * Create a bunch of timelines and check that their HWSP do not overlap. @@ -151,7 +154,7 @@ static int mock_hwsp_freelist(void *arg) __mock_hwsp_record(&state, na, NULL); kfree(state.history); err_put: - drm_dev_put(&state.i915->drm); + drm_dev_put(&i915->drm); return err; } @@ -476,11 +479,11 @@ tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value) } static struct intel_timeline * -checked_intel_timeline_create(struct drm_i915_private *i915) +checked_intel_timeline_create(struct intel_gt *gt) { struct intel_timeline *tl; - tl = intel_timeline_create(&i915->gt, NULL); + tl = intel_timeline_create(gt, NULL); if (IS_ERR(tl)) return tl; @@ -497,7 +500,7 @@ checked_intel_timeline_create(struct drm_i915_private *i915) static int live_hwsp_engine(void *arg) { #define NUM_TIMELINES 4096 - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct intel_timeline **timelines; struct intel_engine_cs *engine; enum intel_engine_id id; @@ -516,7 +519,7 @@ static int live_hwsp_engine(void *arg) return -ENOMEM; count = 0; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt->i915, id) { if (!intel_engine_can_store_dword(engine)) continue; @@ -526,7 +529,7 @@ static int live_hwsp_engine(void *arg) struct intel_timeline *tl; struct i915_request *rq; - tl = checked_intel_timeline_create(i915); + tl = checked_intel_timeline_create(gt); if (IS_ERR(tl)) { err = PTR_ERR(tl); break; @@ -548,7 +551,7 @@ static int live_hwsp_engine(void *arg) break; } - if (igt_flush_test(i915)) + if (igt_flush_test(gt->i915)) err = -EIO; for (n = 0; n < count; n++) { @@ -570,7 +573,7 @@ static int live_hwsp_engine(void *arg) static int live_hwsp_alternate(void *arg) { #define NUM_TIMELINES 4096 - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct intel_timeline **timelines; struct intel_engine_cs *engine; enum intel_engine_id id; @@ -591,14 +594,14 @@ static int live_hwsp_alternate(void *arg) count = 0; for (n = 0; n < NUM_TIMELINES; n++) { - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt->i915, id) { struct intel_timeline *tl; struct i915_request *rq; if (!intel_engine_can_store_dword(engine)) continue; - tl = checked_intel_timeline_create(i915); + tl = checked_intel_timeline_create(gt); if (IS_ERR(tl)) { intel_engine_pm_put(engine); err = PTR_ERR(tl); @@ -620,7 +623,7 @@ static int live_hwsp_alternate(void *arg) } out: - if (igt_flush_test(i915)) + if (igt_flush_test(gt->i915)) err = -EIO; for (n = 0; n < count; n++) { @@ -641,8 +644,7 @@ static int live_hwsp_alternate(void *arg) static int live_hwsp_wrap(void *arg) { - struct drm_i915_private *i915 = arg; - struct intel_gt *gt = &i915->gt; + struct intel_gt *gt = arg; struct intel_engine_cs *engine; struct intel_timeline *tl; enum intel_engine_id id; @@ -740,7 +742,7 @@ static int live_hwsp_wrap(void *arg) } out: - if (igt_flush_test(i915)) + if (igt_flush_test(gt->i915)) err = -EIO; intel_timeline_unpin(tl); @@ -751,7 +753,7 @@ static int live_hwsp_wrap(void *arg) static int live_hwsp_recycle(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct intel_engine_cs *engine; enum intel_engine_id id; unsigned long count; @@ -764,7 +766,7 @@ static int live_hwsp_recycle(void *arg) */ count = 0; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt->i915, id) { IGT_TIMEOUT(end_time); if (!intel_engine_can_store_dword(engine)) @@ -776,7 +778,7 @@ static int live_hwsp_recycle(void *arg) struct intel_timeline *tl; struct i915_request *rq; - tl = checked_intel_timeline_create(i915); + tl = checked_intel_timeline_create(gt); if (IS_ERR(tl)) { err = PTR_ERR(tl); break; @@ -831,5 +833,5 @@ int intel_timeline_live_selftests(struct drm_i915_private *i915) if (intel_gt_is_wedged(&i915->gt)) return 0; - return i915_live_subtests(tests, i915); + return intel_gt_live_subtests(tests, &i915->gt); } From eca0b72089695d5b19c8c2b287ac3f6fbe79197e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 16 Oct 2019 10:07:49 +0100 Subject: [PATCH 112/159] drm/i915: Do initial mocs configuration directly Now that we record the default "goldenstate" context, we do not need to emit the mocs registers at the start of each context and can simply do mmio before the first context and capture the registers as part of its default image. As a consequence, this means that we repeat the mmio after each engine reset, fixing up any platform and registers that were zapped by the reset (for those platforms with global not context-saved settings). Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111723 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111645 Signed-off-by: Chris Wilson Cc: Prathap Kumar Valsan Reviewed-by: Prathap Kumar Valsan Link: https://patchwork.freedesktop.org/patch/msgid/20191016090749.7092-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_mocs.c | 318 ++++++++------------------- drivers/gpu/drm/i915/gt/intel_mocs.h | 3 - drivers/gpu/drm/i915/i915_gem.c | 9 - 3 files changed, 88 insertions(+), 242 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 728704bbbe18..5bac3966906b 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -287,10 +287,9 @@ static const struct drm_i915_mocs_entry icelake_mocs_table[] = { GEN11_MOCS_ENTRIES }; -static bool get_mocs_settings(struct intel_gt *gt, +static bool get_mocs_settings(const struct drm_i915_private *i915, struct drm_i915_mocs_table *table) { - struct drm_i915_private *i915 = gt->i915; bool result = false; if (INTEL_GEN(i915) >= 12) { @@ -331,9 +330,9 @@ static bool get_mocs_settings(struct intel_gt *gt, return result; } -static i915_reg_t mocs_register(enum intel_engine_id engine_id, int index) +static i915_reg_t mocs_register(const struct intel_engine_cs *engine, int index) { - switch (engine_id) { + switch (engine->id) { case RCS0: return GEN9_GFX_MOCS(index); case VCS0: @@ -347,7 +346,7 @@ static i915_reg_t mocs_register(enum intel_engine_id engine_id, int index) case VCS2: return GEN11_MFX2_MOCS(index); default: - MISSING_CASE(engine_id); + MISSING_CASE(engine->id); return INVALID_MMIO_REG; } } @@ -365,47 +364,95 @@ static u32 get_entry_control(const struct drm_i915_mocs_table *table, return table->table[I915_MOCS_PTE].control_value; } -/** - * intel_mocs_init_engine() - emit the mocs control table - * @engine: The engine for whom to emit the registers. - * - * This function simply emits a MI_LOAD_REGISTER_IMM command for the - * given table starting at the given address. - */ -void intel_mocs_init_engine(struct intel_engine_cs *engine) +static void init_mocs_table(struct intel_engine_cs *engine, + const struct drm_i915_mocs_table *table) { - struct intel_gt *gt = engine->gt; - struct intel_uncore *uncore = gt->uncore; - struct drm_i915_mocs_table table; - unsigned int index; - u32 unused_value; - - /* Platforms with global MOCS do not need per-engine initialization. */ - if (HAS_GLOBAL_MOCS_REGISTERS(gt->i915)) - return; - - /* Called under a blanket forcewake */ - assert_forcewakes_active(uncore, FORCEWAKE_ALL); - - if (!get_mocs_settings(gt, &table)) - return; - - /* Set unused values to PTE */ - unused_value = table.table[I915_MOCS_PTE].control_value; - - for (index = 0; index < table.size; index++) { - u32 value = get_entry_control(&table, index); + struct intel_uncore *uncore = engine->uncore; + u32 unused_value = table->table[I915_MOCS_PTE].control_value; + unsigned int i; + for (i = 0; i < table->size; i++) intel_uncore_write_fw(uncore, - mocs_register(engine->id, index), - value); + mocs_register(engine, i), + get_entry_control(table, i)); + + /* All remaining entries are unused */ + for (; i < table->n_entries; i++) + intel_uncore_write_fw(uncore, + mocs_register(engine, i), + unused_value); +} + +/* + * Get l3cc_value from MOCS entry taking into account when it's not used: + * I915_MOCS_PTE's value is returned in this case. + */ +static u16 get_entry_l3cc(const struct drm_i915_mocs_table *table, + unsigned int index) +{ + if (table->table[index].used) + return table->table[index].l3cc_value; + + return table->table[I915_MOCS_PTE].l3cc_value; +} + +static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table, + u16 low, + u16 high) +{ + return low | (u32)high << 16; +} + +static void init_l3cc_table(struct intel_engine_cs *engine, + const struct drm_i915_mocs_table *table) +{ + struct intel_uncore *uncore = engine->uncore; + u16 unused_value = table->table[I915_MOCS_PTE].l3cc_value; + unsigned int i; + + for (i = 0; i < table->size / 2; i++) { + u16 low = get_entry_l3cc(table, 2 * i); + u16 high = get_entry_l3cc(table, 2 * i + 1); + + intel_uncore_write(uncore, + GEN9_LNCFCMOCS(i), + l3cc_combine(table, low, high)); + } + + /* Odd table size - 1 left over */ + if (table->size & 1) { + u16 low = get_entry_l3cc(table, 2 * i); + + intel_uncore_write(uncore, + GEN9_LNCFCMOCS(i), + l3cc_combine(table, low, unused_value)); + i++; } /* All remaining entries are also unused */ - for (; index < table.n_entries; index++) - intel_uncore_write_fw(uncore, - mocs_register(engine->id, index), - unused_value); + for (; i < table->n_entries / 2; i++) + intel_uncore_write(uncore, + GEN9_LNCFCMOCS(i), + l3cc_combine(table, unused_value, + unused_value)); +} + +void intel_mocs_init_engine(struct intel_engine_cs *engine) +{ + struct drm_i915_mocs_table table; + + /* Called under a blanket forcewake */ + assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL); + + if (!get_mocs_settings(engine->i915, &table)) + return; + + /* Platforms with global MOCS do not need per-engine initialization. */ + if (!HAS_GLOBAL_MOCS_REGISTERS(engine->i915)) + init_mocs_table(engine, &table); + + if (engine->class == RENDER_CLASS) + init_l3cc_table(engine, &table); } static void intel_mocs_init_global(struct intel_gt *gt) @@ -416,7 +463,7 @@ static void intel_mocs_init_global(struct intel_gt *gt) GEM_BUG_ON(!HAS_GLOBAL_MOCS_REGISTERS(gt->i915)); - if (!get_mocs_settings(gt, &table)) + if (!get_mocs_settings(gt->i915, &table)) return; if (GEM_DEBUG_WARN_ON(table.size > table.n_entries)) @@ -438,197 +485,8 @@ static void intel_mocs_init_global(struct intel_gt *gt) table.table[0].control_value); } -static int emit_mocs_control_table(struct i915_request *rq, - const struct drm_i915_mocs_table *table) -{ - enum intel_engine_id engine = rq->engine->id; - unsigned int index; - u32 unused_value; - u32 *cs; - - if (GEM_WARN_ON(table->size > table->n_entries)) - return -ENODEV; - - /* Set unused values to PTE */ - unused_value = table->table[I915_MOCS_PTE].control_value; - - cs = intel_ring_begin(rq, 2 + 2 * table->n_entries); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries); - - for (index = 0; index < table->size; index++) { - u32 value = get_entry_control(table, index); - - *cs++ = i915_mmio_reg_offset(mocs_register(engine, index)); - *cs++ = value; - } - - /* All remaining entries are also unused */ - for (; index < table->n_entries; index++) { - *cs++ = i915_mmio_reg_offset(mocs_register(engine, index)); - *cs++ = unused_value; - } - - *cs++ = MI_NOOP; - intel_ring_advance(rq, cs); - - return 0; -} - -/* - * Get l3cc_value from MOCS entry taking into account when it's not used: - * I915_MOCS_PTE's value is returned in this case. - */ -static u16 get_entry_l3cc(const struct drm_i915_mocs_table *table, - unsigned int index) -{ - if (table->table[index].used) - return table->table[index].l3cc_value; - - return table->table[I915_MOCS_PTE].l3cc_value; -} - -static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table, - u16 low, - u16 high) -{ - return low | high << 16; -} - -static int emit_mocs_l3cc_table(struct i915_request *rq, - const struct drm_i915_mocs_table *table) -{ - u16 unused_value; - unsigned int i; - u32 *cs; - - if (GEM_WARN_ON(table->size > table->n_entries)) - return -ENODEV; - - /* Set unused values to PTE */ - unused_value = table->table[I915_MOCS_PTE].l3cc_value; - - cs = intel_ring_begin(rq, 2 + table->n_entries); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries / 2); - - for (i = 0; i < table->size / 2; i++) { - u16 low = get_entry_l3cc(table, 2 * i); - u16 high = get_entry_l3cc(table, 2 * i + 1); - - *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); - *cs++ = l3cc_combine(table, low, high); - } - - /* Odd table size - 1 left over */ - if (table->size & 0x01) { - u16 low = get_entry_l3cc(table, 2 * i); - - *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); - *cs++ = l3cc_combine(table, low, unused_value); - i++; - } - - /* All remaining entries are also unused */ - for (; i < table->n_entries / 2; i++) { - *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); - *cs++ = l3cc_combine(table, unused_value, unused_value); - } - - *cs++ = MI_NOOP; - intel_ring_advance(rq, cs); - - return 0; -} - -static void intel_mocs_init_l3cc_table(struct intel_gt *gt) -{ - struct intel_uncore *uncore = gt->uncore; - struct drm_i915_mocs_table table; - unsigned int i; - u16 unused_value; - - if (!get_mocs_settings(gt, &table)) - return; - - /* Set unused values to PTE */ - unused_value = table.table[I915_MOCS_PTE].l3cc_value; - - for (i = 0; i < table.size / 2; i++) { - u16 low = get_entry_l3cc(&table, 2 * i); - u16 high = get_entry_l3cc(&table, 2 * i + 1); - - intel_uncore_write(uncore, - GEN9_LNCFCMOCS(i), - l3cc_combine(&table, low, high)); - } - - /* Odd table size - 1 left over */ - if (table.size & 0x01) { - u16 low = get_entry_l3cc(&table, 2 * i); - - intel_uncore_write(uncore, - GEN9_LNCFCMOCS(i), - l3cc_combine(&table, low, unused_value)); - i++; - } - - /* All remaining entries are also unused */ - for (; i < table.n_entries / 2; i++) - intel_uncore_write(uncore, - GEN9_LNCFCMOCS(i), - l3cc_combine(&table, unused_value, - unused_value)); -} - -/** - * intel_mocs_emit() - program the MOCS register. - * @rq: Request to use to set up the MOCS tables. - * - * This function will emit a batch buffer with the values required for - * programming the MOCS register values for all the currently supported - * rings. - * - * These registers are partially stored in the RCS context, so they are - * emitted at the same time so that when a context is created these registers - * are set up. These registers have to be emitted into the start of the - * context as setting the ELSP will re-init some of these registers back - * to the hw values. - * - * Return: 0 on success, otherwise the error status. - */ -int intel_mocs_emit(struct i915_request *rq) -{ - struct drm_i915_mocs_table t; - int ret; - - if (HAS_GLOBAL_MOCS_REGISTERS(rq->i915) || - rq->engine->class != RENDER_CLASS) - return 0; - - if (get_mocs_settings(rq->engine->gt, &t)) { - /* Program the RCS control registers */ - ret = emit_mocs_control_table(rq, &t); - if (ret) - return ret; - - /* Now program the l3cc registers */ - ret = emit_mocs_l3cc_table(rq, &t); - if (ret) - return ret; - } - - return 0; -} - void intel_mocs_init(struct intel_gt *gt) { - intel_mocs_init_l3cc_table(gt); - if (HAS_GLOBAL_MOCS_REGISTERS(gt->i915)) intel_mocs_init_global(gt); } diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.h b/drivers/gpu/drm/i915/gt/intel_mocs.h index 2ae816b7ca19..83371f3e6ba1 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.h +++ b/drivers/gpu/drm/i915/gt/intel_mocs.h @@ -49,13 +49,10 @@ * context handling keep the MOCS in step. */ -struct i915_request; struct intel_engine_cs; struct intel_gt; void intel_mocs_init(struct intel_gt *gt); void intel_mocs_init_engine(struct intel_engine_cs *engine); -int intel_mocs_emit(struct i915_request *rq); - #endif diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 0ddbd3a5fb8d..3694c7ff6d23 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1111,15 +1111,6 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915) if (err) goto err_rq; - /* - * Failing to program the MOCS is non-fatal.The system will not - * run at peak performance. So warn the user and carry on. - */ - err = intel_mocs_emit(rq); - if (err) - dev_notice(i915->drm.dev, - "Failed to program MOCS registers; expect performance issues.\n"); - err = intel_renderstate_emit(rq); if (err) goto err_rq; From e9d4c9245f54cd50b9bdbdf216a9c0d6404ced7b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 16 Oct 2019 15:32:33 +0100 Subject: [PATCH 113/159] drm/i915: Store i915_ggtt as the backpointer on fence registers Now that i915_ggtt knows everything about its own paths to perform mmio, we can use that as our primary backpointer for individual fence registers. This reduces the amount of pointer dancing we have to perform on the common paths, but more importantly finishes our fence register encapsulation. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Daniele Ceraolo Spurio Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191016143234.4075-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_reset.c | 2 +- drivers/gpu/drm/i915/gvt/aperture_gm.c | 2 +- drivers/gpu/drm/i915/i915_drv.c | 6 +- drivers/gpu/drm/i915/i915_gem.c | 2 +- drivers/gpu/drm/i915/i915_gem_fence_reg.c | 74 ++++++++++++++--------- drivers/gpu/drm/i915/i915_gem_fence_reg.h | 7 +-- drivers/gpu/drm/i915/selftests/i915_gem.c | 2 +- 7 files changed, 54 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 77445d100ca8..477bfafdb103 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -715,7 +715,7 @@ static int gt_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask) for_each_engine(engine, gt->i915, id) __intel_engine_reset(engine, stalled_mask & engine->mask); - i915_gem_restore_fences(gt->i915); + i915_gem_restore_fences(gt->ggtt); return err; } diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c b/drivers/gpu/drm/i915/gvt/aperture_gm.c index d996bbc7ea59..771420453f82 100644 --- a/drivers/gpu/drm/i915/gvt/aperture_gm.c +++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c @@ -198,7 +198,7 @@ static int alloc_vgpu_fence(struct intel_vgpu *vgpu) mutex_lock(&dev_priv->ggtt.vm.mutex); for (i = 0; i < vgpu_fence_sz(vgpu); i++) { - reg = i915_reserve_fence(dev_priv); + reg = i915_reserve_fence(&dev_priv->ggtt); if (IS_ERR(reg)) goto out_free_fence; diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 9354924576c4..fd532b8cfdb8 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1804,7 +1804,7 @@ static int i915_drm_resume(struct drm_device *dev) DRM_ERROR("failed to re-enable GGTT\n"); i915_gem_restore_gtt_mappings(dev_priv); - i915_gem_restore_fences(dev_priv); + i915_gem_restore_fences(&dev_priv->ggtt); intel_csr_ucode_resume(dev_priv); @@ -2502,7 +2502,7 @@ static int intel_runtime_suspend(struct device *kdev) intel_gt_runtime_resume(&dev_priv->gt); - i915_gem_restore_fences(dev_priv); + i915_gem_restore_fences(&dev_priv->ggtt); enable_rpm_wakeref_asserts(rpm); @@ -2582,7 +2582,7 @@ static int intel_runtime_resume(struct device *kdev) * we can do is to hope that things will still work (and disable RPM). */ intel_gt_runtime_resume(&dev_priv->gt); - i915_gem_restore_fences(dev_priv); + i915_gem_restore_fences(&dev_priv->ggtt); /* * On VLV/CHV display interrupts are part of the display diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 3694c7ff6d23..aef5cf2227dd 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1360,7 +1360,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv) /* Minimal basic recovery for KMS */ ret = i915_ggtt_enable_hw(dev_priv); i915_gem_restore_gtt_mappings(dev_priv); - i915_gem_restore_fences(dev_priv); + i915_gem_restore_fences(&dev_priv->ggtt); intel_init_clock_gating(dev_priv); } diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c index 487b7261f7ed..83b450d9ce84 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c @@ -59,6 +59,16 @@ #define pipelined 0 +static struct drm_i915_private *fence_to_i915(struct i915_fence_reg *fence) +{ + return fence->ggtt->vm.i915; +} + +static struct intel_uncore *fence_to_uncore(struct i915_fence_reg *fence) +{ + return fence->ggtt->vm.gt->uncore; +} + static void i965_write_fence_reg(struct i915_fence_reg *fence, struct i915_vma *vma) { @@ -66,7 +76,7 @@ static void i965_write_fence_reg(struct i915_fence_reg *fence, int fence_pitch_shift; u64 val; - if (INTEL_GEN(fence->i915) >= 6) { + if (INTEL_GEN(fence_to_i915(fence)) >= 6) { fence_reg_lo = FENCE_REG_GEN6_LO(fence->id); fence_reg_hi = FENCE_REG_GEN6_HI(fence->id); fence_pitch_shift = GEN6_FENCE_PITCH_SHIFT; @@ -95,7 +105,7 @@ static void i965_write_fence_reg(struct i915_fence_reg *fence, } if (!pipelined) { - struct intel_uncore *uncore = &fence->i915->uncore; + struct intel_uncore *uncore = fence_to_uncore(fence); /* * To w/a incoherency with non-atomic 64-bit register updates, @@ -132,7 +142,7 @@ static void i915_write_fence_reg(struct i915_fence_reg *fence, GEM_BUG_ON(!is_power_of_2(vma->fence_size)); GEM_BUG_ON(!IS_ALIGNED(vma->node.start, vma->fence_size)); - if (is_y_tiled && HAS_128_BYTE_Y_TILING(fence->i915)) + if (is_y_tiled && HAS_128_BYTE_Y_TILING(fence_to_i915(fence))) stride /= 128; else stride /= 512; @@ -148,7 +158,7 @@ static void i915_write_fence_reg(struct i915_fence_reg *fence, } if (!pipelined) { - struct intel_uncore *uncore = &fence->i915->uncore; + struct intel_uncore *uncore = fence_to_uncore(fence); i915_reg_t reg = FENCE_REG(fence->id); intel_uncore_write_fw(uncore, reg, val); @@ -180,7 +190,7 @@ static void i830_write_fence_reg(struct i915_fence_reg *fence, } if (!pipelined) { - struct intel_uncore *uncore = &fence->i915->uncore; + struct intel_uncore *uncore = fence_to_uncore(fence); i915_reg_t reg = FENCE_REG(fence->id); intel_uncore_write_fw(uncore, reg, val); @@ -191,15 +201,17 @@ static void i830_write_fence_reg(struct i915_fence_reg *fence, static void fence_write(struct i915_fence_reg *fence, struct i915_vma *vma) { + struct drm_i915_private *i915 = fence_to_i915(fence); + /* * Previous access through the fence register is marshalled by * the mb() inside the fault handlers (i915_gem_release_mmaps) * and explicitly managed for internal users. */ - if (IS_GEN(fence->i915, 2)) + if (IS_GEN(i915, 2)) i830_write_fence_reg(fence, vma); - else if (IS_GEN(fence->i915, 3)) + else if (IS_GEN(i915, 3)) i915_write_fence_reg(fence, vma); else i965_write_fence_reg(fence, vma); @@ -215,6 +227,8 @@ static void fence_write(struct i915_fence_reg *fence, static int fence_update(struct i915_fence_reg *fence, struct i915_vma *vma) { + struct i915_ggtt *ggtt = fence->ggtt; + struct intel_uncore *uncore = fence_to_uncore(fence); intel_wakeref_t wakeref; struct i915_vma *old; int ret; @@ -256,7 +270,7 @@ static int fence_update(struct i915_fence_reg *fence, old->fence = NULL; } - list_move(&fence->link, &fence->i915->ggtt.fence_list); + list_move(&fence->link, &ggtt->fence_list); } /* @@ -269,7 +283,7 @@ static int fence_update(struct i915_fence_reg *fence, * be cleared before we can use any other fences to ensure that * the new fences do not overlap the elided clears, confusing HW. */ - wakeref = intel_runtime_pm_get_if_in_use(&fence->i915->runtime_pm); + wakeref = intel_runtime_pm_get_if_in_use(uncore->rpm); if (!wakeref) { GEM_BUG_ON(vma); return 0; @@ -280,10 +294,10 @@ static int fence_update(struct i915_fence_reg *fence, if (vma) { vma->fence = fence; - list_move_tail(&fence->link, &fence->i915->ggtt.fence_list); + list_move_tail(&fence->link, &ggtt->fence_list); } - intel_runtime_pm_put(&fence->i915->runtime_pm, wakeref); + intel_runtime_pm_put(uncore->rpm, wakeref); return 0; } @@ -312,11 +326,11 @@ int i915_vma_revoke_fence(struct i915_vma *vma) return fence_update(fence, NULL); } -static struct i915_fence_reg *fence_find(struct drm_i915_private *i915) +static struct i915_fence_reg *fence_find(struct i915_ggtt *ggtt) { struct i915_fence_reg *fence; - list_for_each_entry(fence, &i915->ggtt.fence_list, link) { + list_for_each_entry(fence, &ggtt->fence_list, link) { GEM_BUG_ON(fence->vma && fence->vma->fence != fence); if (atomic_read(&fence->pin_count)) @@ -326,7 +340,7 @@ static struct i915_fence_reg *fence_find(struct drm_i915_private *i915) } /* Wait for completion of pending flips which consume fences */ - if (intel_has_pending_fb_unpin(i915)) + if (intel_has_pending_fb_unpin(ggtt->vm.i915)) return ERR_PTR(-EAGAIN); return ERR_PTR(-EDEADLK); @@ -351,7 +365,7 @@ int __i915_vma_pin_fence(struct i915_vma *vma) return 0; } } else if (set) { - fence = fence_find(vma->vm->i915); + fence = fence_find(ggtt); if (IS_ERR(fence)) return PTR_ERR(fence); @@ -402,7 +416,7 @@ int i915_vma_pin_fence(struct i915_vma *vma) * Note that we revoke fences on runtime suspend. Therefore the user * must keep the device awake whilst using the fence. */ - assert_rpm_wakelock_held(&vma->vm->i915->runtime_pm); + assert_rpm_wakelock_held(vma->vm->gt->uncore->rpm); GEM_BUG_ON(!i915_vma_is_pinned(vma)); GEM_BUG_ON(!i915_vma_is_ggtt(vma)); @@ -418,14 +432,13 @@ int i915_vma_pin_fence(struct i915_vma *vma) /** * i915_reserve_fence - Reserve a fence for vGPU - * @i915: i915 device private + * @ggtt: Global GTT * * This function walks the fence regs looking for a free one and remove * it from the fence_list. It is used to reserve fence for vGPU to use. */ -struct i915_fence_reg *i915_reserve_fence(struct drm_i915_private *i915) +struct i915_fence_reg *i915_reserve_fence(struct i915_ggtt *ggtt) { - struct i915_ggtt *ggtt = &i915->ggtt; struct i915_fence_reg *fence; int count; int ret; @@ -439,7 +452,7 @@ struct i915_fence_reg *i915_reserve_fence(struct drm_i915_private *i915) if (count <= 1) return ERR_PTR(-ENOSPC); - fence = fence_find(i915); + fence = fence_find(ggtt); if (IS_ERR(fence)) return fence; @@ -463,7 +476,7 @@ struct i915_fence_reg *i915_reserve_fence(struct drm_i915_private *i915) */ void i915_unreserve_fence(struct i915_fence_reg *fence) { - struct i915_ggtt *ggtt = &fence->i915->ggtt; + struct i915_ggtt *ggtt = fence->ggtt; lockdep_assert_held(&ggtt->vm.mutex); @@ -472,19 +485,19 @@ void i915_unreserve_fence(struct i915_fence_reg *fence) /** * i915_gem_restore_fences - restore fence state - * @i915: i915 device private + * @ggtt: Global GTT * * Restore the hw fence state to match the software tracking again, to be called * after a gpu reset and on resume. Note that on runtime suspend we only cancel * the fences, to be reacquired by the user later. */ -void i915_gem_restore_fences(struct drm_i915_private *i915) +void i915_gem_restore_fences(struct i915_ggtt *ggtt) { int i; rcu_read_lock(); /* keep obj alive as we dereference */ - for (i = 0; i < i915->ggtt.num_fences; i++) { - struct i915_fence_reg *reg = &i915->ggtt.fence_regs[i]; + for (i = 0; i < ggtt->num_fences; i++) { + struct i915_fence_reg *reg = &ggtt->fence_regs[i]; struct i915_vma *vma = READ_ONCE(reg->vma); GEM_BUG_ON(vma && vma->fence != reg); @@ -550,7 +563,7 @@ void i915_gem_restore_fences(struct drm_i915_private *i915) */ /** - * i915_gem_detect_bit_6_swizzle - detect bit 6 swizzling pattern + * detect_bit_6_swizzle - detect bit 6 swizzling pattern * @i915: i915 device private * * Detects bit 6 swizzling of address lookup between IGD access and CPU @@ -822,12 +835,13 @@ i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj, void i915_ggtt_init_fences(struct i915_ggtt *ggtt) { struct drm_i915_private *i915 = ggtt->vm.i915; + struct intel_uncore *uncore = ggtt->vm.gt->uncore; int num_fences; int i; INIT_LIST_HEAD(&ggtt->fence_list); INIT_LIST_HEAD(&ggtt->userfault_list); - intel_wakeref_auto_init(&ggtt->userfault_wakeref, &i915->runtime_pm); + intel_wakeref_auto_init(&ggtt->userfault_wakeref, uncore->rpm); detect_bit_6_swizzle(i915); @@ -842,20 +856,20 @@ void i915_ggtt_init_fences(struct i915_ggtt *ggtt) num_fences = 8; if (intel_vgpu_active(i915)) - num_fences = intel_uncore_read(&i915->uncore, + num_fences = intel_uncore_read(uncore, vgtif_reg(avail_rs.fence_num)); /* Initialize fence registers to zero */ for (i = 0; i < num_fences; i++) { struct i915_fence_reg *fence = &ggtt->fence_regs[i]; - fence->i915 = i915; + fence->ggtt = ggtt; fence->id = i; list_add_tail(&fence->link, &ggtt->fence_list); } ggtt->num_fences = num_fences; - i915_gem_restore_fences(i915); + i915_gem_restore_fences(ggtt); } void intel_gt_init_swizzling(struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.h b/drivers/gpu/drm/i915/i915_gem_fence_reg.h index 99866fb9d94f..7bd521cd7cd7 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.h +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.h @@ -29,7 +29,6 @@ #include struct drm_i915_gem_object; -struct drm_i915_private; struct i915_ggtt; struct i915_vma; struct intel_gt; @@ -39,7 +38,7 @@ struct sg_table; struct i915_fence_reg { struct list_head link; - struct drm_i915_private *i915; + struct i915_ggtt *ggtt; struct i915_vma *vma; atomic_t pin_count; int id; @@ -55,10 +54,10 @@ struct i915_fence_reg { }; /* i915_gem_fence_reg.c */ -struct i915_fence_reg *i915_reserve_fence(struct drm_i915_private *i915); +struct i915_fence_reg *i915_reserve_fence(struct i915_ggtt *ggtt); void i915_unreserve_fence(struct i915_fence_reg *fence); -void i915_gem_restore_fences(struct drm_i915_private *i915); +void i915_gem_restore_fences(struct i915_ggtt *ggtt); void i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj, struct sg_table *pages); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c index bfa40a5b6d98..97f89f744ee2 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c @@ -120,7 +120,7 @@ static void pm_resume(struct drm_i915_private *i915) i915_gem_sanitize(i915); i915_gem_restore_gtt_mappings(i915); - i915_gem_restore_fences(i915); + i915_gem_restore_fences(&i915->ggtt); i915_gem_resume(i915); } From 972c646f1cfed31ec9661ee7abc161b3ccf21fdd Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 16 Oct 2019 15:32:34 +0100 Subject: [PATCH 114/159] drm/i915: Move swizzle_bit under i915_ggtt The HW performs swizzling as part of its fence tiling inside the Global GTT. We already do the probing of the HW settings from the GGTT setup, complete the picture by storing the information as part of the GGTT. The primary benefit is the consistency of our probe routines do not break the i915_ggtt encapsulation. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Daniele Ceraolo Spurio Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191016143234.4075-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_tiling.c | 8 ++++---- .../gpu/drm/i915/gem/selftests/i915_gem_mman.c | 8 ++++---- drivers/gpu/drm/i915/i915_debugfs.c | 4 ++-- drivers/gpu/drm/i915/i915_drv.h | 9 ++------- drivers/gpu/drm/i915/i915_gem_fence_reg.c | 15 ++++++++------- drivers/gpu/drm/i915/i915_gem_gtt.h | 5 +++++ 6 files changed, 25 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c index dc2a83ce44d5..1fa592d82af5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c @@ -348,9 +348,9 @@ i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data, args->stride = 0; } else { if (args->tiling_mode == I915_TILING_X) - args->swizzle_mode = to_i915(dev)->mm.bit_6_swizzle_x; + args->swizzle_mode = to_i915(dev)->ggtt.bit_6_swizzle_x; else - args->swizzle_mode = to_i915(dev)->mm.bit_6_swizzle_y; + args->swizzle_mode = to_i915(dev)->ggtt.bit_6_swizzle_y; /* Hide bit 17 swizzling from the user. This prevents old Mesa * from aborting the application on sw fallbacks to bit 17, @@ -421,10 +421,10 @@ i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data, switch (args->tiling_mode) { case I915_TILING_X: - args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x; + args->swizzle_mode = dev_priv->ggtt.bit_6_swizzle_x; break; case I915_TILING_Y: - args->swizzle_mode = dev_priv->mm.bit_6_swizzle_y; + args->swizzle_mode = dev_priv->ggtt.bit_6_swizzle_y; break; default: case I915_TILING_NONE: diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index cfa52c525691..65d4dbf91999 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -357,10 +357,10 @@ static int igt_partial_tiling(void *arg) tile.tiling = tiling; switch (tiling) { case I915_TILING_X: - tile.swizzle = i915->mm.bit_6_swizzle_x; + tile.swizzle = i915->ggtt.bit_6_swizzle_x; break; case I915_TILING_Y: - tile.swizzle = i915->mm.bit_6_swizzle_y; + tile.swizzle = i915->ggtt.bit_6_swizzle_y; break; } @@ -474,10 +474,10 @@ static int igt_smoke_tiling(void *arg) break; case I915_TILING_X: - tile.swizzle = i915->mm.bit_6_swizzle_x; + tile.swizzle = i915->ggtt.bit_6_swizzle_x; break; case I915_TILING_Y: - tile.swizzle = i915->mm.bit_6_swizzle_y; + tile.swizzle = i915->ggtt.bit_6_swizzle_y; break; } diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index a541b6ae534f..ada57eee914a 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1660,9 +1660,9 @@ static int i915_swizzle_info(struct seq_file *m, void *data) wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); seq_printf(m, "bit6 swizzle for X-tiling = %s\n", - swizzle_string(dev_priv->mm.bit_6_swizzle_x)); + swizzle_string(dev_priv->ggtt.bit_6_swizzle_x)); seq_printf(m, "bit6 swizzle for Y-tiling = %s\n", - swizzle_string(dev_priv->mm.bit_6_swizzle_y)); + swizzle_string(dev_priv->ggtt.bit_6_swizzle_y)); if (IS_GEN_RANGE(dev_priv, 3, 4)) { seq_printf(m, "DDC = 0x%08x\n", diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f6aee1e01a7f..88956f37d96c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -695,11 +695,6 @@ struct i915_gem_mm { */ struct workqueue_struct *userptr_wq; - /** Bit 6 swizzling required for X tiling */ - u32 bit_6_swizzle_x; - /** Bit 6 swizzling required for Y tiling */ - u32 bit_6_swizzle_y; - /* shrinker accounting, also useful for userland debugging */ u64 shrink_memory; u32 shrink_count; @@ -2014,9 +2009,9 @@ i915_gem_object_create_internal(struct drm_i915_private *dev_priv, /* i915_gem_tiling.c */ static inline bool i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj) { - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); + struct drm_i915_private *i915 = to_i915(obj->base.dev); - return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 && + return i915->ggtt.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 && i915_gem_object_is_tiled(obj); } diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c index 83b450d9ce84..321189e1b0f2 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c @@ -564,14 +564,15 @@ void i915_gem_restore_fences(struct i915_ggtt *ggtt) /** * detect_bit_6_swizzle - detect bit 6 swizzling pattern - * @i915: i915 device private + * @ggtt: Global GGTT * * Detects bit 6 swizzling of address lookup between IGD access and CPU * access through main memory. */ -static void detect_bit_6_swizzle(struct drm_i915_private *i915) +static void detect_bit_6_swizzle(struct i915_ggtt *ggtt) { - struct intel_uncore *uncore = &i915->uncore; + struct intel_uncore *uncore = ggtt->vm.gt->uncore; + struct drm_i915_private *i915 = ggtt->vm.i915; u32 swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; u32 swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; @@ -733,8 +734,8 @@ static void detect_bit_6_swizzle(struct drm_i915_private *i915) swizzle_y = I915_BIT_6_SWIZZLE_NONE; } - i915->mm.bit_6_swizzle_x = swizzle_x; - i915->mm.bit_6_swizzle_y = swizzle_y; + i915->ggtt.bit_6_swizzle_x = swizzle_x; + i915->ggtt.bit_6_swizzle_y = swizzle_y; } /* @@ -843,7 +844,7 @@ void i915_ggtt_init_fences(struct i915_ggtt *ggtt) INIT_LIST_HEAD(&ggtt->userfault_list); intel_wakeref_auto_init(&ggtt->userfault_wakeref, uncore->rpm); - detect_bit_6_swizzle(i915); + detect_bit_6_swizzle(ggtt); if (INTEL_GEN(i915) >= 7 && !(IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))) @@ -878,7 +879,7 @@ void intel_gt_init_swizzling(struct intel_gt *gt) struct intel_uncore *uncore = gt->uncore; if (INTEL_GEN(i915) < 5 || - i915->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) + i915->ggtt.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) return; intel_uncore_rmw(uncore, DISP_ARB_CTL, 0, DISP_TILE_SURFACE_SWIZZLING); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 0a18fdfe63ff..f074f1de66e8 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -411,6 +411,11 @@ struct i915_ggtt { int mtrr; + /** Bit 6 swizzling required for X tiling */ + u32 bit_6_swizzle_x; + /** Bit 6 swizzling required for Y tiling */ + u32 bit_6_swizzle_y; + u32 pin_bias; unsigned int num_fences; From eb8de23c955bd4dae47bfdac61179010d3e451d8 Mon Sep 17 00:00:00 2001 From: Khaled Almahallawy Date: Tue, 8 Oct 2019 15:09:05 -0700 Subject: [PATCH 115/159] drm/i915/tgl: Enable DDI/Port G In TGL there we are missing the initialization of port G. Do the same as for other ports. Signed-off-by: Khaled Almahallawy Reviewed-by: Lucas De Marchi Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20191008220905.18278-1-khaled.almahallawy@intel.com --- drivers/gpu/drm/i915/display/intel_bios.c | 4 ++++ drivers/gpu/drm/i915/display/intel_display.c | 6 ++++++ drivers/gpu/drm/i915/display/intel_display.h | 1 + drivers/gpu/drm/i915/display/intel_dp.c | 2 ++ drivers/gpu/drm/i915/display/intel_vbt_defs.h | 3 +++ 5 files changed, 16 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index f0307b04cc13..63c1bd4c2954 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -1399,6 +1399,7 @@ static enum port dvo_port_to_port(u8 dvo_port) [PORT_D] = { DVO_PORT_HDMID, DVO_PORT_DPD, -1}, [PORT_E] = { DVO_PORT_CRT, DVO_PORT_HDMIE, DVO_PORT_DPE}, [PORT_F] = { DVO_PORT_HDMIF, DVO_PORT_DPF, -1}, + [PORT_G] = { DVO_PORT_HDMIG, DVO_PORT_DPG, -1}, }; enum port port; int i; @@ -2258,6 +2259,9 @@ enum aux_ch intel_bios_port_aux_ch(struct drm_i915_private *dev_priv, case DP_AUX_F: aux_ch = AUX_CH_F; break; + case DP_AUX_G: + aux_ch = AUX_CH_G; + break; default: MISSING_CASE(info->alternate_aux_channel); aux_ch = AUX_CH_A; diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 2309b349f4b5..164ded862148 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -6689,6 +6689,8 @@ enum intel_display_power_domain intel_port_to_power_domain(enum port port) return POWER_DOMAIN_PORT_DDI_E_LANES; case PORT_F: return POWER_DOMAIN_PORT_DDI_F_LANES; + case PORT_G: + return POWER_DOMAIN_PORT_DDI_G_LANES; default: MISSING_CASE(port); return POWER_DOMAIN_PORT_OTHER; @@ -6712,6 +6714,8 @@ intel_aux_power_domain(struct intel_digital_port *dig_port) return POWER_DOMAIN_AUX_E_TBT; case AUX_CH_F: return POWER_DOMAIN_AUX_F_TBT; + case AUX_CH_G: + return POWER_DOMAIN_AUX_G_TBT; default: MISSING_CASE(dig_port->aux_ch); return POWER_DOMAIN_AUX_C_TBT; @@ -6731,6 +6735,8 @@ intel_aux_power_domain(struct intel_digital_port *dig_port) return POWER_DOMAIN_AUX_E; case AUX_CH_F: return POWER_DOMAIN_AUX_F; + case AUX_CH_G: + return POWER_DOMAIN_AUX_G; default: MISSING_CASE(dig_port->aux_ch); return POWER_DOMAIN_AUX_A; diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index bed203ec1df8..90807603987c 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -271,6 +271,7 @@ enum aux_ch { AUX_CH_D, AUX_CH_E, /* ICL+ */ AUX_CH_F, + AUX_CH_G, }; #define aux_ch_name(a) ((a) + 'A') diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index dd5dc1e38495..c4c081c79dd9 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -1644,6 +1644,7 @@ static i915_reg_t skl_aux_ctl_reg(struct intel_dp *intel_dp) case AUX_CH_D: case AUX_CH_E: case AUX_CH_F: + case AUX_CH_G: return DP_AUX_CH_CTL(aux_ch); default: MISSING_CASE(aux_ch); @@ -1664,6 +1665,7 @@ static i915_reg_t skl_aux_data_reg(struct intel_dp *intel_dp, int index) case AUX_CH_D: case AUX_CH_E: case AUX_CH_F: + case AUX_CH_G: return DP_AUX_CH_DATA(aux_ch, index); default: MISSING_CASE(aux_ch); diff --git a/drivers/gpu/drm/i915/display/intel_vbt_defs.h b/drivers/gpu/drm/i915/display/intel_vbt_defs.h index dfcd156b5094..e3045ced4bfe 100644 --- a/drivers/gpu/drm/i915/display/intel_vbt_defs.h +++ b/drivers/gpu/drm/i915/display/intel_vbt_defs.h @@ -291,6 +291,8 @@ struct bdb_general_features { #define DVO_PORT_HDMIE 12 /* 193 */ #define DVO_PORT_DPF 13 /* N/A */ #define DVO_PORT_HDMIF 14 /* N/A */ +#define DVO_PORT_DPG 15 +#define DVO_PORT_HDMIG 16 #define DVO_PORT_MIPIA 21 /* 171 */ #define DVO_PORT_MIPIB 22 /* 171 */ #define DVO_PORT_MIPIC 23 /* 171 */ @@ -325,6 +327,7 @@ enum vbt_gmbus_ddi { #define DP_AUX_D 0x30 #define DP_AUX_E 0x50 #define DP_AUX_F 0x60 +#define DP_AUX_G 0x70 #define VBT_DP_MAX_LINK_RATE_HBR3 0 #define VBT_DP_MAX_LINK_RATE_HBR2 1 From 05488673a4d41383f9dd537f298e525e6b00fb93 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Wed, 16 Oct 2019 10:38:02 +0100 Subject: [PATCH 116/159] drm/i915/pmu: Support multiple GPUs With discrete graphics system can have both integrated and discrete GPU handled by i915. Currently we use a fixed name ("i915") when registering as the uncore PMU provider which stops working in this case. To fix this we add the PCI device name string to non-integrated devices handled by us. Integrated devices keep the legacy name preserving backward compatibility. v2: * Detect IGP and keep legacy name. (Michal) * Use PCI device name as suffix. (Michal, Chris) v3: * Constify the name. (Chris) * Use pci_domain_nr. (Chris) v4: * Fix kfree_const usage. (Chris) v5: * kfree_const does not work for modules. (Chris) * Changed is_igp helper to take i915. Signed-off-by: Tvrtko Ursulin Cc: Chris Wilson Cc: Michal Wajdeczko Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191016093802.12483-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_pmu.c | 29 +++++++++++++++++++++++++++-- drivers/gpu/drm/i915/i915_pmu.h | 4 ++++ 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index d0508719492e..00fe401868c4 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -1080,6 +1080,17 @@ static void i915_pmu_unregister_cpuhp_state(struct i915_pmu *pmu) cpuhp_remove_multi_state(cpuhp_slot); } +static bool is_igp(struct drm_i915_private *i915) +{ + struct pci_dev *pdev = i915->drm.pdev; + + /* IGP is 0000:00:02.0 */ + return pci_domain_nr(pdev->bus) == 0 && + pdev->bus->number == 0 && + PCI_SLOT(pdev->devfn) == 2 && + PCI_FUNC(pdev->devfn) == 0; +} + void i915_pmu_register(struct drm_i915_private *i915) { struct i915_pmu *pmu = &i915->pmu; @@ -1110,10 +1121,19 @@ void i915_pmu_register(struct drm_i915_private *i915) hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); pmu->timer.function = i915_sample; - ret = perf_pmu_register(&pmu->base, "i915", -1); - if (ret) + if (!is_igp(i915)) + pmu->name = kasprintf(GFP_KERNEL, + "i915-%s", + dev_name(i915->drm.dev)); + else + pmu->name = "i915"; + if (!pmu->name) goto err; + ret = perf_pmu_register(&pmu->base, pmu->name, -1); + if (ret) + goto err_name; + ret = i915_pmu_register_cpuhp_state(pmu); if (ret) goto err_unreg; @@ -1122,6 +1142,9 @@ void i915_pmu_register(struct drm_i915_private *i915) err_unreg: perf_pmu_unregister(&pmu->base); +err_name: + if (!is_igp(i915)) + kfree(pmu->name); err: pmu->base.event_init = NULL; free_event_attributes(pmu); @@ -1143,5 +1166,7 @@ void i915_pmu_unregister(struct drm_i915_private *i915) perf_pmu_unregister(&pmu->base); pmu->base.event_init = NULL; + if (!is_igp(i915)) + kfree(pmu->name); free_event_attributes(pmu); } diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h index 067dbbf3bdff..bf52e3983631 100644 --- a/drivers/gpu/drm/i915/i915_pmu.h +++ b/drivers/gpu/drm/i915/i915_pmu.h @@ -46,6 +46,10 @@ struct i915_pmu { * @base: PMU base. */ struct pmu base; + /** + * @name: Name as registered with perf core. + */ + const char *name; /** * @lock: Lock protecting enable mask and ref count handling. */ From d4a415dcda35d298b0048eb9c1a9bc04a5007fe0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 15 Oct 2019 22:05:38 +0300 Subject: [PATCH 117/159] drm/i915: Fix MST oops due to MSA changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The MSA MISC computation now depends on the connector state, and we do it from the DDI .pre_enable() hook. All that is fine for DP SST but with MST we don't actually pass the connector state to the dig port's .pre_enable() hook which leads to an oops. Need to think more how to solve this in a cleaner fashion, but for now let's just add a NULL check to stop the oopsing. Cc: Gwan-gyeong Mun Cc: Uma Shankar Fixes: 0c06fa156006 ("drm/i915/dp: Add support of BT.2020 Colorimetry to DP MSA") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191015190538.27539-1-ville.syrjala@linux.intel.com Reviewed-by: Uma Shankar Reviewed-by: Gwan-gyeong Mun --- drivers/gpu/drm/i915/display/intel_ddi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 80f8e2698be0..4c81449ec144 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -1794,8 +1794,10 @@ void intel_ddi_set_dp_msa(const struct intel_crtc_state *crtc_state, * of Color Encoding Format and Content Color Gamut] while sending * YCBCR 420, HDR BT.2020 signals we should program MSA MISC1 fields * which indicate VSC SDP for the Pixel Encoding/Colorimetry Format. + * + * FIXME MST doesn't pass in the conn_state */ - if (intel_dp_needs_vsc_sdp(crtc_state, conn_state)) + if (conn_state && intel_dp_needs_vsc_sdp(crtc_state, conn_state)) temp |= DP_MSA_MISC_COLOR_VSC_SDP; I915_WRITE(TRANS_MSA_MISC(cpu_transcoder), temp); From 493065e24eb13cf1e974ef9fc0185d6266d16067 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Mon, 14 Oct 2019 11:36:00 -0700 Subject: [PATCH 118/159] drm/i915: Add microcontrollers documentation section To better organize the information, add a microcontrollers section and move/link the GuC, HuC and DMC documentation under it. Also add a small intro. Signed-off-by: Daniele Ceraolo Spurio Cc: Michal Wajdeczko Acked-by: Anna Karas Reviewed-by: Martin Peres Link: https://patchwork.freedesktop.org/patch/msgid/20191014183602.3643-1-daniele.ceraolospurio@intel.com --- Documentation/gpu/i915.rst | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst index 465779670fd4..f1bae7867045 100644 --- a/Documentation/gpu/i915.rst +++ b/Documentation/gpu/i915.rst @@ -415,6 +415,15 @@ Object Tiling IOCTLs .. kernel-doc:: drivers/gpu/drm/i915/gem/i915_gem_tiling.c :doc: buffer object tiling +Microcontrollers +================ + +Starting from gen9, three microcontrollers are available on the HW: the +graphics microcontroller (GuC), the HEVC/H.265 microcontroller (HuC) and the +display microcontroller (DMC). The driver is responsible for loading the +firmwares on the microcontrollers; the GuC and HuC firmwares are transferred +to WOPCM using the DMA engine, while the DMC firmware is written through MMIO. + WOPCM ----- @@ -454,6 +463,15 @@ GuC Address Space .. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_guc.c :doc: GuC Address Space +HuC +--- +.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c + :doc: HuC Firmware + +DMC +--- +See `CSR firmware support for DMC`_ + Tracing ======= From 218151e997449717cbb70f304b7b8f9918ba6af4 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Mon, 14 Oct 2019 11:36:01 -0700 Subject: [PATCH 119/159] drm/i915/guc: improve documentation Add a short description of what we expect from GuC and some minor improvements to existing documentation. Also remove a comment about a difference between GuC and HuC that is not true anymore. v2: add that the GuC is not mandatory (Martin) v3: add extra newline for better text organization (Martin) Signed-off-by: Daniele Ceraolo Spurio Cc: Michal Wajdeczko Cc: Matthew Brost Cc: Martin Peres Acked-by: Anna Karas Reviewed-by: Martin Peres Link: https://patchwork.freedesktop.org/patch/msgid/20191014183602.3643-2-daniele.ceraolospurio@intel.com --- Documentation/gpu/i915.rst | 22 ++++++++----- drivers/gpu/drm/i915/gt/uc/intel_guc.c | 31 +++++++++++++++++-- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 6 ++++ drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h | 3 -- 4 files changed, 49 insertions(+), 13 deletions(-) diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst index f1bae7867045..357e9dfa7de1 100644 --- a/Documentation/gpu/i915.rst +++ b/Documentation/gpu/i915.rst @@ -436,12 +436,24 @@ WOPCM Layout GuC --- -Firmware Layout -~~~~~~~~~~~~~~~ +.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_guc.c + :doc: GuC + +GuC Firmware Layout +~~~~~~~~~~~~~~~~~~~ .. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h :doc: Firmware Layout +GuC Memory Management +~~~~~~~~~~~~~~~~~~~~~ + +.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_guc.c + :doc: GuC Memory Management +.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_guc.c + :functions: intel_guc_allocate_vma + + GuC-specific firmware loader ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -457,12 +469,6 @@ GuC-based command submission .. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c :internal: -GuC Address Space -~~~~~~~~~~~~~~~~~ - -.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_guc.c - :doc: GuC Address Space - HuC --- .. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 249c747e9756..37f7bcbf7dac 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -9,6 +9,27 @@ #include "intel_guc_submission.h" #include "i915_drv.h" +/** + * DOC: GuC + * + * The GuC is a microcontroller inside the GT HW, introduced in gen9. The GuC is + * designed to offload some of the functionality usually performed by the host + * driver; currently the main operations it can take care of are: + * + * - Authentication of the HuC, which is required to fully enable HuC usage. + * - Low latency graphics context scheduling (a.k.a. GuC submission). + * - GT Power management. + * + * The enable_guc module parameter can be used to select which of those + * operations to enable within GuC. Note that not all the operations are + * supported on all gen9+ platforms. + * + * Enabling the GuC is not mandatory and therefore the firmware is only loaded + * if at least one of the operations is selected. However, not loading the GuC + * might result in the loss of some features that do require the GuC (currently + * just the HuC, but more are expected to land in the future). + */ + static void gen8_guc_raise_irq(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); @@ -548,9 +569,15 @@ int intel_guc_resume(struct intel_guc *guc) } /** - * DOC: GuC Address Space + * DOC: GuC Memory Management * - * The layout of GuC address space is shown below: + * GuC can't allocate any memory for its own usage, so all the allocations must + * be handled by the host driver. GuC accesses the memory via the GGTT, with the + * exception of the top and bottom parts of the 4GB address space, which are + * instead re-mapped by the GuC HW to memory location of the FW itself (WOPCM) + * or other parts of the HW. The driver must take care not to place objects that + * the GuC is going to access in these reserved ranges. The layout of the GuC + * address space is shown below: * * :: * diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index f325d3dd564f..849a44add424 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -29,6 +29,12 @@ enum { /** * DOC: GuC-based command submission * + * IMPORTANT NOTE: GuC submission is currently not supported in i915. The GuC + * firmware is moving to an updated submission interface and we plan to + * turn submission back on when that lands. The below documentation (and related + * code) matches the old submission model and will be updated as part of the + * upgrade to the new flow. + * * GuC client: * A intel_guc_client refers to a submission path through GuC. Currently, there * is only one client, which is charged with all submissions to the GuC. This diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h index f8f6c91a0df6..029214cdedd5 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h @@ -39,9 +39,6 @@ * 3. Length info of each component can be found in header, in dwords. * 4. Modulus and exponent key are not required by driver. They may not appear * in fw. So driver will load a truncated firmware in this case. - * - * The only difference between GuC and HuC firmwares is how the version - * information is saved. */ struct uc_css_header { From 0b23e2a6ed5cb1196dae7d34c5910193d86244a9 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Mon, 14 Oct 2019 11:36:02 -0700 Subject: [PATCH 120/159] drm/i915/huc: improve documentation Better explain the usage of the microcontroller and what i915 is responsible of. While at it, fix the documentation for the auth function, which doesn't do any pinning anymore. v2: add a comment on HuC being optional and descrive how HuC accesses memory (Martin) v3: add extra newline for better text organization (Martin) Signed-off-by: Daniele Ceraolo Spurio Cc: Michal Wajdeczko Cc: Martin Peres Acked-by: Anna Karas Reviewed-by: Martin Peres Link: https://patchwork.freedesktop.org/patch/msgid/20191014183602.3643-3-daniele.ceraolospurio@intel.com --- Documentation/gpu/i915.rst | 16 +++++++++-- drivers/gpu/drm/i915/gt/uc/intel_huc.c | 35 ++++++++++++++++++++--- drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c | 15 ---------- 3 files changed, 45 insertions(+), 21 deletions(-) diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst index 357e9dfa7de1..60bd6e6403da 100644 --- a/Documentation/gpu/i915.rst +++ b/Documentation/gpu/i915.rst @@ -471,8 +471,20 @@ GuC-based command submission HuC --- -.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c - :doc: HuC Firmware +.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_huc.c + :doc: HuC +.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_huc.c + :functions: intel_huc_auth + +HuC Memory Management +~~~~~~~~~~~~~~~~~~~~~ + +.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_huc.c + :doc: HuC Memory Management + +HuC Firmware Layout +~~~~~~~~~~~~~~~~~~~ +The HuC FW layout is the same as the GuC one, see `GuC Firmware Layout`_ DMC --- diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c index 33608a114d4e..8be515c8d0f0 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -9,6 +9,34 @@ #include "intel_huc.h" #include "i915_drv.h" +/** + * DOC: HuC + * + * The HuC is a dedicated microcontroller for usage in media HEVC (High + * Efficiency Video Coding) operations. Userspace can directly use the firmware + * capabilities by adding HuC specific commands to batch buffers. + * + * The kernel driver is only responsible for loading the HuC firmware and + * triggering its security authentication, which is performed by the GuC. For + * The GuC to correctly perform the authentication, the HuC binary must be + * loaded before the GuC one. Loading the HuC is optional; however, not using + * the HuC might negatively impact power usage and/or performance of media + * workloads, depending on the use-cases. + * + * See https://github.com/intel/media-driver for the latest details on HuC + * functionality. + */ + +/** + * DOC: HuC Memory Management + * + * Similarly to the GuC, the HuC can't do any memory allocations on its own, + * with the difference being that the allocations for HuC usage are handled by + * the userspace driver instead of the kernel one. The HuC accesses the memory + * via the PPGTT belonging to the context loaded on the VCS executing the + * HuC-specific commands. + */ + void intel_huc_init_early(struct intel_huc *huc) { struct drm_i915_private *i915 = huc_to_gt(huc)->i915; @@ -118,10 +146,9 @@ void intel_huc_fini(struct intel_huc *huc) * * Called after HuC and GuC firmware loading during intel_uc_init_hw(). * - * This function pins HuC firmware image object into GGTT. - * Then it invokes GuC action to authenticate passing the offset to RSA - * signature through intel_guc_auth_huc(). It then waits for 50ms for - * firmware verification ACK and unpins the object. + * This function invokes the GuC action to authenticate the HuC firmware, + * passing the offset of the RSA signature to intel_guc_auth_huc(). It then + * waits for up to 50ms for firmware verification ACK. */ int intel_huc_auth(struct intel_huc *huc) { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c index 74602487ed67..d654340d4d03 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c @@ -7,21 +7,6 @@ #include "intel_huc_fw.h" #include "i915_drv.h" -/** - * DOC: HuC Firmware - * - * Motivation: - * GEN9 introduces a new dedicated firmware for usage in media HEVC (High - * Efficiency Video Coding) operations. Userspace can use the firmware - * capabilities by adding HuC specific commands to batch buffers. - * - * Implementation: - * The same firmware loader is used as the GuC. However, the actual - * loading to HW is deferred until GEM initialization is done. - * - * Note that HuC firmware loading must be done before GuC loading. - */ - /** * intel_huc_fw_init_early() - initializes HuC firmware struct * @huc: intel_huc struct From e9768bfe875f1c54d70d5a512e734993996513a5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 16 Oct 2019 13:52:36 +0100 Subject: [PATCH 121/159] drm/i915/selftests: Teach requests to use all available engines The request selftests straddle the boundary between checking the driver and the hardware. They are subject to the quirks of the underlying HW, but operate on top of the backend abstractions. The tests focus on the scheduler elements and so should check for interactions of the scheduler across all exposed engines. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191016125236.17960-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_request.c | 287 +++++++++++------- 1 file changed, 175 insertions(+), 112 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index 0897a7b04944..30ae34f62176 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -37,6 +37,18 @@ #include "mock_drm.h" #include "mock_gem_device.h" +static unsigned int num_uabi_engines(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + unsigned int count; + + count = 0; + for_each_uabi_engine(engine, i915) + count++; + + return count; +} + static int igt_add_request(void *arg) { struct drm_i915_private *i915 = arg; @@ -281,7 +293,7 @@ static int __igt_breadcrumbs_smoketest(void *arg) * that the fences were marked as signaled. */ - requests = kmalloc_array(total, sizeof(*requests), GFP_KERNEL); + requests = kcalloc(total, sizeof(*requests), GFP_KERNEL); if (!requests) return -ENOMEM; @@ -422,12 +434,11 @@ static int mock_breadcrumbs_smoketest(void *arg) * See __igt_breadcrumbs_smoketest(); */ - threads = kmalloc_array(ncpus, sizeof(*threads), GFP_KERNEL); + threads = kcalloc(ncpus, sizeof(*threads), GFP_KERNEL); if (!threads) return -ENOMEM; - t.contexts = - kmalloc_array(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL); + t.contexts = kcalloc(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL); if (!t.contexts) { ret = -ENOMEM; goto out_threads; @@ -511,15 +522,15 @@ static int live_nop_request(void *arg) struct drm_i915_private *i915 = arg; struct intel_engine_cs *engine; struct igt_live_test t; - unsigned int id; int err = -ENODEV; - /* Submit various sized batches of empty requests, to each engine + /* + * Submit various sized batches of empty requests, to each engine * (individually), and wait for the batch to complete. We can check * the overhead of submitting requests to the hardware. */ - for_each_engine(engine, i915, id) { + for_each_uabi_engine(engine, i915) { unsigned long n, prime; IGT_TIMEOUT(end_time); ktime_t times[2] = {}; @@ -539,7 +550,8 @@ static int live_nop_request(void *arg) if (IS_ERR(request)) return PTR_ERR(request); - /* This space is left intentionally blank. + /* + * This space is left intentionally blank. * * We do not actually want to perform any * action with this request, we just want @@ -657,10 +669,10 @@ static int live_empty_request(void *arg) struct intel_engine_cs *engine; struct igt_live_test t; struct i915_vma *batch; - unsigned int id; int err = 0; - /* Submit various sized batches of empty requests, to each engine + /* + * Submit various sized batches of empty requests, to each engine * (individually), and wait for the batch to complete. We can check * the overhead of submitting requests to the hardware. */ @@ -669,7 +681,7 @@ static int live_empty_request(void *arg) if (IS_ERR(batch)) return PTR_ERR(batch); - for_each_engine(engine, i915, id) { + for_each_uabi_engine(engine, i915) { IGT_TIMEOUT(end_time); struct i915_request *request; unsigned long n, prime; @@ -801,63 +813,73 @@ static int recursive_batch_resolve(struct i915_vma *batch) static int live_all_engines(void *arg) { struct drm_i915_private *i915 = arg; + const unsigned int nengines = num_uabi_engines(i915); struct intel_engine_cs *engine; - struct i915_request *request[I915_NUM_ENGINES]; + struct i915_request **request; struct igt_live_test t; struct i915_vma *batch; - unsigned int id; + unsigned int idx; int err; - /* Check we can submit requests to all engines simultaneously. We + /* + * Check we can submit requests to all engines simultaneously. We * send a recursive batch to each engine - checking that we don't * block doing so, and that they don't complete too soon. */ + request = kcalloc(nengines, sizeof(*request), GFP_KERNEL); + if (!request) + return -ENOMEM; + err = igt_live_test_begin(&t, i915, __func__, ""); if (err) - return err; + goto out_free; batch = recursive_batch(i915); if (IS_ERR(batch)) { err = PTR_ERR(batch); pr_err("%s: Unable to create batch, err=%d\n", __func__, err); - return err; + goto out_free; } - for_each_engine(engine, i915, id) { - request[id] = i915_request_create(engine->kernel_context); - if (IS_ERR(request[id])) { - err = PTR_ERR(request[id]); + idx = 0; + for_each_uabi_engine(engine, i915) { + request[idx] = i915_request_create(engine->kernel_context); + if (IS_ERR(request[idx])) { + err = PTR_ERR(request[idx]); pr_err("%s: Request allocation failed with err=%d\n", __func__, err); goto out_request; } - err = engine->emit_bb_start(request[id], + err = engine->emit_bb_start(request[idx], batch->node.start, batch->node.size, 0); GEM_BUG_ON(err); - request[id]->batch = batch; + request[idx]->batch = batch; i915_vma_lock(batch); - err = i915_request_await_object(request[id], batch->obj, 0); + err = i915_request_await_object(request[idx], batch->obj, 0); if (err == 0) - err = i915_vma_move_to_active(batch, request[id], 0); + err = i915_vma_move_to_active(batch, request[idx], 0); i915_vma_unlock(batch); GEM_BUG_ON(err); - i915_request_get(request[id]); - i915_request_add(request[id]); + i915_request_get(request[idx]); + i915_request_add(request[idx]); + idx++; } - for_each_engine(engine, i915, id) { - if (i915_request_completed(request[id])) { + idx = 0; + for_each_uabi_engine(engine, i915) { + if (i915_request_completed(request[idx])) { pr_err("%s(%s): request completed too early!\n", __func__, engine->name); err = -EINVAL; goto out_request; } + idx++; } err = recursive_batch_resolve(batch); @@ -866,10 +888,11 @@ static int live_all_engines(void *arg) goto out_request; } - for_each_engine(engine, i915, id) { + idx = 0; + for_each_uabi_engine(engine, i915) { long timeout; - timeout = i915_request_wait(request[id], 0, + timeout = i915_request_wait(request[idx], 0, MAX_SCHEDULE_TIMEOUT); if (timeout < 0) { err = timeout; @@ -878,43 +901,56 @@ static int live_all_engines(void *arg) goto out_request; } - GEM_BUG_ON(!i915_request_completed(request[id])); - i915_request_put(request[id]); - request[id] = NULL; + GEM_BUG_ON(!i915_request_completed(request[idx])); + i915_request_put(request[idx]); + request[idx] = NULL; + idx++; } err = igt_live_test_end(&t); out_request: - for_each_engine(engine, i915, id) - if (request[id]) - i915_request_put(request[id]); + idx = 0; + for_each_uabi_engine(engine, i915) { + if (request[idx]) + i915_request_put(request[idx]); + idx++; + } i915_vma_unpin(batch); i915_vma_put(batch); +out_free: + kfree(request); return err; } static int live_sequential_engines(void *arg) { struct drm_i915_private *i915 = arg; - struct i915_request *request[I915_NUM_ENGINES] = {}; + const unsigned int nengines = num_uabi_engines(i915); + struct i915_request **request; struct i915_request *prev = NULL; struct intel_engine_cs *engine; struct igt_live_test t; - unsigned int id; + unsigned int idx; int err; - /* Check we can submit requests to all engines sequentially, such + /* + * Check we can submit requests to all engines sequentially, such * that each successive request waits for the earlier ones. This * tests that we don't execute requests out of order, even though * they are running on independent engines. */ + request = kcalloc(nengines, sizeof(*request), GFP_KERNEL); + if (!request) + return -ENOMEM; + err = igt_live_test_begin(&t, i915, __func__, ""); if (err) - return err; + goto out_free; - for_each_engine(engine, i915, id) { + idx = 0; + for_each_uabi_engine(engine, i915) { struct i915_vma *batch; batch = recursive_batch(i915); @@ -922,66 +958,69 @@ static int live_sequential_engines(void *arg) err = PTR_ERR(batch); pr_err("%s: Unable to create batch for %s, err=%d\n", __func__, engine->name, err); - return err; + goto out_free; } - request[id] = i915_request_create(engine->kernel_context); - if (IS_ERR(request[id])) { - err = PTR_ERR(request[id]); + request[idx] = i915_request_create(engine->kernel_context); + if (IS_ERR(request[idx])) { + err = PTR_ERR(request[idx]); pr_err("%s: Request allocation failed for %s with err=%d\n", __func__, engine->name, err); goto out_request; } if (prev) { - err = i915_request_await_dma_fence(request[id], + err = i915_request_await_dma_fence(request[idx], &prev->fence); if (err) { - i915_request_add(request[id]); + i915_request_add(request[idx]); pr_err("%s: Request await failed for %s with err=%d\n", __func__, engine->name, err); goto out_request; } } - err = engine->emit_bb_start(request[id], + err = engine->emit_bb_start(request[idx], batch->node.start, batch->node.size, 0); GEM_BUG_ON(err); - request[id]->batch = batch; + request[idx]->batch = batch; i915_vma_lock(batch); - err = i915_request_await_object(request[id], batch->obj, false); + err = i915_request_await_object(request[idx], + batch->obj, false); if (err == 0) - err = i915_vma_move_to_active(batch, request[id], 0); + err = i915_vma_move_to_active(batch, request[idx], 0); i915_vma_unlock(batch); GEM_BUG_ON(err); - i915_request_get(request[id]); - i915_request_add(request[id]); + i915_request_get(request[idx]); + i915_request_add(request[idx]); - prev = request[id]; + prev = request[idx]; + idx++; } - for_each_engine(engine, i915, id) { + idx = 0; + for_each_uabi_engine(engine, i915) { long timeout; - if (i915_request_completed(request[id])) { + if (i915_request_completed(request[idx])) { pr_err("%s(%s): request completed too early!\n", __func__, engine->name); err = -EINVAL; goto out_request; } - err = recursive_batch_resolve(request[id]->batch); + err = recursive_batch_resolve(request[idx]->batch); if (err) { pr_err("%s: failed to resolve batch, err=%d\n", __func__, err); goto out_request; } - timeout = i915_request_wait(request[id], 0, + timeout = i915_request_wait(request[idx], 0, MAX_SCHEDULE_TIMEOUT); if (timeout < 0) { err = timeout; @@ -990,30 +1029,35 @@ static int live_sequential_engines(void *arg) goto out_request; } - GEM_BUG_ON(!i915_request_completed(request[id])); + GEM_BUG_ON(!i915_request_completed(request[idx])); + idx++; } err = igt_live_test_end(&t); out_request: - for_each_engine(engine, i915, id) { + idx = 0; + for_each_uabi_engine(engine, i915) { u32 *cmd; - if (!request[id]) + if (!request[idx]) break; - cmd = i915_gem_object_pin_map(request[id]->batch->obj, + cmd = i915_gem_object_pin_map(request[idx]->batch->obj, I915_MAP_WC); if (!IS_ERR(cmd)) { *cmd = MI_BATCH_BUFFER_END; intel_gt_chipset_flush(engine->gt); - i915_gem_object_unpin_map(request[id]->batch->obj); + i915_gem_object_unpin_map(request[idx]->batch->obj); } - i915_vma_put(request[id]->batch); - i915_request_put(request[id]); + i915_vma_put(request[idx]->batch); + i915_request_put(request[idx]); + idx++; } +out_free: + kfree(request); return err; } @@ -1079,9 +1123,10 @@ static int live_parallel_engines(void *arg) __live_parallel_engineN, NULL, }; + const unsigned int nengines = num_uabi_engines(i915); struct intel_engine_cs *engine; - enum intel_engine_id id; int (* const *fn)(void *arg); + struct task_struct **tsk; int err = 0; /* @@ -1089,42 +1134,49 @@ static int live_parallel_engines(void *arg) * tests that we load up the system maximally. */ + tsk = kcalloc(nengines, sizeof(*tsk), GFP_KERNEL); + if (!tsk) + return -ENOMEM; + for (fn = func; !err && *fn; fn++) { - struct task_struct *tsk[I915_NUM_ENGINES] = {}; struct igt_live_test t; + unsigned int idx; err = igt_live_test_begin(&t, i915, __func__, ""); if (err) break; - for_each_engine(engine, i915, id) { - tsk[id] = kthread_run(*fn, engine, - "igt/parallel:%s", - engine->name); - if (IS_ERR(tsk[id])) { - err = PTR_ERR(tsk[id]); + idx = 0; + for_each_uabi_engine(engine, i915) { + tsk[idx] = kthread_run(*fn, engine, + "igt/parallel:%s", + engine->name); + if (IS_ERR(tsk[idx])) { + err = PTR_ERR(tsk[idx]); break; } - get_task_struct(tsk[id]); + get_task_struct(tsk[idx++]); } - for_each_engine(engine, i915, id) { + idx = 0; + for_each_uabi_engine(engine, i915) { int status; - if (IS_ERR_OR_NULL(tsk[id])) - continue; + if (IS_ERR(tsk[idx])) + break; - status = kthread_stop(tsk[id]); + status = kthread_stop(tsk[idx]); if (status && !err) err = status; - put_task_struct(tsk[id]); + put_task_struct(tsk[idx++]); } if (igt_live_test_end(&t)) err = -EIO; } + kfree(tsk); return err; } @@ -1168,16 +1220,16 @@ max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine) static int live_breadcrumbs_smoketest(void *arg) { struct drm_i915_private *i915 = arg; - struct smoketest t[I915_NUM_ENGINES]; - unsigned int ncpus = num_online_cpus(); + const unsigned int nengines = num_uabi_engines(i915); + const unsigned int ncpus = num_online_cpus(); unsigned long num_waits, num_fences; struct intel_engine_cs *engine; struct task_struct **threads; struct igt_live_test live; - enum intel_engine_id id; intel_wakeref_t wakeref; struct drm_file *file; - unsigned int n; + struct smoketest *smoke; + unsigned int n, idx; int ret = 0; /* @@ -1196,28 +1248,31 @@ static int live_breadcrumbs_smoketest(void *arg) goto out_rpm; } - threads = kcalloc(ncpus * I915_NUM_ENGINES, - sizeof(*threads), - GFP_KERNEL); - if (!threads) { + smoke = kcalloc(nengines, sizeof(*smoke), GFP_KERNEL); + if (!smoke) { ret = -ENOMEM; goto out_file; } - memset(&t[0], 0, sizeof(t[0])); - t[0].request_alloc = __live_request_alloc; - t[0].ncontexts = 64; - t[0].contexts = kmalloc_array(t[0].ncontexts, - sizeof(*t[0].contexts), - GFP_KERNEL); - if (!t[0].contexts) { + threads = kcalloc(ncpus * nengines, sizeof(*threads), GFP_KERNEL); + if (!threads) { + ret = -ENOMEM; + goto out_smoke; + } + + smoke[0].request_alloc = __live_request_alloc; + smoke[0].ncontexts = 64; + smoke[0].contexts = kcalloc(smoke[0].ncontexts, + sizeof(*smoke[0].contexts), + GFP_KERNEL); + if (!smoke[0].contexts) { ret = -ENOMEM; goto out_threads; } - for (n = 0; n < t[0].ncontexts; n++) { - t[0].contexts[n] = live_context(i915, file); - if (!t[0].contexts[n]) { + for (n = 0; n < smoke[0].ncontexts; n++) { + smoke[0].contexts[n] = live_context(i915, file); + if (!smoke[0].contexts[n]) { ret = -ENOMEM; goto out_contexts; } @@ -1227,42 +1282,47 @@ static int live_breadcrumbs_smoketest(void *arg) if (ret) goto out_contexts; - for_each_engine(engine, i915, id) { - t[id] = t[0]; - t[id].engine = engine; - t[id].max_batch = max_batches(t[0].contexts[0], engine); - if (t[id].max_batch < 0) { - ret = t[id].max_batch; + idx = 0; + for_each_uabi_engine(engine, i915) { + smoke[idx] = smoke[0]; + smoke[idx].engine = engine; + smoke[idx].max_batch = + max_batches(smoke[0].contexts[0], engine); + if (smoke[idx].max_batch < 0) { + ret = smoke[idx].max_batch; goto out_flush; } /* One ring interleaved between requests from all cpus */ - t[id].max_batch /= num_online_cpus() + 1; + smoke[idx].max_batch /= num_online_cpus() + 1; pr_debug("Limiting batches to %d requests on %s\n", - t[id].max_batch, engine->name); + smoke[idx].max_batch, engine->name); for (n = 0; n < ncpus; n++) { struct task_struct *tsk; tsk = kthread_run(__igt_breadcrumbs_smoketest, - &t[id], "igt/%d.%d", id, n); + &smoke[idx], "igt/%d.%d", idx, n); if (IS_ERR(tsk)) { ret = PTR_ERR(tsk); goto out_flush; } get_task_struct(tsk); - threads[id * ncpus + n] = tsk; + threads[idx * ncpus + n] = tsk; } + + idx++; } msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies)); out_flush: + idx = 0; num_waits = 0; num_fences = 0; - for_each_engine(engine, i915, id) { + for_each_uabi_engine(engine, i915) { for (n = 0; n < ncpus; n++) { - struct task_struct *tsk = threads[id * ncpus + n]; + struct task_struct *tsk = threads[idx * ncpus + n]; int err; if (!tsk) @@ -1275,17 +1335,20 @@ static int live_breadcrumbs_smoketest(void *arg) put_task_struct(tsk); } - num_waits += atomic_long_read(&t[id].num_waits); - num_fences += atomic_long_read(&t[id].num_fences); + num_waits += atomic_long_read(&smoke[idx].num_waits); + num_fences += atomic_long_read(&smoke[idx].num_fences); + idx++; } pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n", num_waits, num_fences, RUNTIME_INFO(i915)->num_engines, ncpus); ret = igt_live_test_end(&live) ?: ret; out_contexts: - kfree(t[0].contexts); + kfree(smoke[0].contexts); out_threads: kfree(threads); +out_smoke: + kfree(smoke); out_file: mock_file_free(i915, file); out_rpm: From 1dfffa0051eae890ce36924651ecff60df5d779e Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 17 Oct 2019 18:13:52 +0200 Subject: [PATCH 122/159] drm/i915: Don't disable interrupts independently of the lock The locks (active.lock and rq->lock) need to be taken with disabled interrupts. This is done in i915_request_retire() by disabling the interrupts independently of the locks itself. While local_irq_disable()+spin_lock() equals spin_lock_irq() on vanilla it does not on PREEMPT_RT. Chris Wilson confirmed that local_irq_disable() was just introduced as an optimisation to avoid enabling/disabling interrupts during lock/unlock combo. Enable/disable interrupts as part of the locking instruction. Cc: Chris Wilson Reviewed-by: Chris Wilson Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191017161352.e5z3ugse7gxl5ari@linutronix.de --- drivers/gpu/drm/i915/i915_request.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index f1cadad4e81c..4575f368455d 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -206,14 +206,14 @@ static void remove_from_engine(struct i915_request *rq) * check that the rq still belongs to the newly locked engine. */ locked = READ_ONCE(rq->engine); - spin_lock(&locked->active.lock); + spin_lock_irq(&locked->active.lock); while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) { spin_unlock(&locked->active.lock); spin_lock(&engine->active.lock); locked = engine; } list_del(&rq->sched.link); - spin_unlock(&locked->active.lock); + spin_unlock_irq(&locked->active.lock); } bool i915_request_retire(struct i915_request *rq) @@ -242,8 +242,6 @@ bool i915_request_retire(struct i915_request *rq) &i915_request_timeline(rq)->requests)); rq->ring->head = rq->postfix; - local_irq_disable(); - /* * We only loosely track inflight requests across preemption, * and so we may find ourselves attempting to retire a _completed_ @@ -252,7 +250,7 @@ bool i915_request_retire(struct i915_request *rq) */ remove_from_engine(rq); - spin_lock(&rq->lock); + spin_lock_irq(&rq->lock); i915_request_mark_complete(rq); if (!i915_request_signaled(rq)) dma_fence_signal_locked(&rq->fence); @@ -267,9 +265,7 @@ bool i915_request_retire(struct i915_request *rq) __notify_execute_cb(rq); } GEM_BUG_ON(!list_empty(&rq->execute_cb)); - spin_unlock(&rq->lock); - - local_irq_enable(); + spin_unlock_irq(&rq->lock); remove_from_client(rq); list_del(&rq->link); From a50134b1983b8860e0e74e41579cbb19a7304ca7 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 17 Oct 2019 17:18:52 +0100 Subject: [PATCH 123/159] drm/i915: Make for_each_engine_masked work on intel_gt Medium term goal is to eliminate the i915->engine[] array and to get there we have recently introduced equivalent array in intel_gt. Now we need to migrate the code further towards this state. This next step is to eliminate usage of i915->engines[] from the for_each_engine_masked iterator. For this to work we also need to use engine->id as index when populating the gt->engine[] array and adjust the default engine set indexing to use engine->legacy_idx instead of assuming gt->engines[] indexing. v2: * Populate gt->engine[] earlier. * Check that we don't duplicate engine->legacy_idx v3: * Work around the initialization order issue between default_engines() and intel_engines_driver_register() which sets engine->legacy_idx for now. It will be fixed properly later. v4: * Merge with forgotten v2.5. Signed-off-by: Tvrtko Ursulin Cc: Chris Wilson Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191017161852.8836-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 13 ++++++++++--- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 5 +++++ drivers/gpu/drm/i915/gt/intel_engine_types.h | 1 + drivers/gpu/drm/i915/gt/intel_engine_user.c | 18 +++++------------- drivers/gpu/drm/i915/gt/intel_gt.c | 2 +- drivers/gpu/drm/i915/gt/intel_hangcheck.c | 2 +- drivers/gpu/drm/i915/gt/intel_reset.c | 12 ++++++------ drivers/gpu/drm/i915/gvt/execlist.c | 4 ++-- drivers/gpu/drm/i915/gvt/scheduler.c | 2 +- drivers/gpu/drm/i915/i915_active.c | 4 ++-- drivers/gpu/drm/i915/i915_drv.h | 6 +++--- 11 files changed, 37 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 5d8221c7ba83..7b01f4605f21 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -203,15 +203,22 @@ static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx) for_each_engine(engine, gt, id) { struct intel_context *ce; + if (engine->legacy_idx == INVALID_ENGINE) + continue; + + GEM_BUG_ON(engine->legacy_idx >= I915_NUM_ENGINES); + GEM_BUG_ON(e->engines[engine->legacy_idx]); + ce = intel_context_create(ctx, engine); if (IS_ERR(ce)) { - __free_engines(e, id); + __free_engines(e, e->num_engines + 1); return ERR_CAST(ce); } - e->engines[id] = ce; - e->num_engines = id + 1; + e->engines[engine->legacy_idx] = ce; + e->num_engines = max(e->num_engines, engine->legacy_idx); } + e->num_engines++; return e; } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 5051a1fd2565..e514c68b0713 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -277,6 +277,9 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH)); BUILD_BUG_ON(MAX_ENGINE_INSTANCE >= BIT(GEN11_ENGINE_INSTANCE_WIDTH)); + if (GEM_DEBUG_WARN_ON(id >= ARRAY_SIZE(gt->engine))) + return -EINVAL; + if (GEM_DEBUG_WARN_ON(info->class > MAX_ENGINE_CLASS)) return -EINVAL; @@ -293,6 +296,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) BUILD_BUG_ON(BITS_PER_TYPE(engine->mask) < I915_NUM_ENGINES); engine->id = id; + engine->legacy_idx = INVALID_ENGINE; engine->mask = BIT(id); engine->i915 = gt->i915; engine->gt = gt; @@ -328,6 +332,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) intel_engine_sanitize_mmio(engine); gt->engine_class[info->class][info->instance] = engine; + gt->engine[id] = engine; intel_engine_add_user(engine); gt->i915->engine[id] = engine; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 6199064f332b..3451be034caf 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -148,6 +148,7 @@ enum intel_engine_id { VECS1, #define _VECS(n) (VECS0 + (n)) I915_NUM_ENGINES +#define INVALID_ENGINE ((enum intel_engine_id)-1) }; struct st_preempt_hang { diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c index 77cd5de83930..7f7150a733f4 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c @@ -160,10 +160,10 @@ static int legacy_ring_idx(const struct legacy_ring *ring) }; if (GEM_DEBUG_WARN_ON(ring->class >= ARRAY_SIZE(map))) - return -1; + return INVALID_ENGINE; if (GEM_DEBUG_WARN_ON(ring->instance >= map[ring->class].max)) - return -1; + return INVALID_ENGINE; return map[ring->class].base + ring->instance; } @@ -171,23 +171,15 @@ static int legacy_ring_idx(const struct legacy_ring *ring) static void add_legacy_ring(struct legacy_ring *ring, struct intel_engine_cs *engine) { - int idx; - if (engine->gt != ring->gt || engine->class != ring->class) { ring->gt = engine->gt; ring->class = engine->class; ring->instance = 0; } - idx = legacy_ring_idx(ring); - if (unlikely(idx == -1)) - return; - - GEM_BUG_ON(idx >= ARRAY_SIZE(ring->gt->engine)); - ring->gt->engine[idx] = engine; - ring->instance++; - - engine->legacy_idx = idx; + engine->legacy_idx = legacy_ring_idx(ring); + if (engine->legacy_idx != INVALID_ENGINE) + ring->instance++; } void intel_engines_driver_register(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index b3619a2a5d0e..c99b6b2f38c2 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -186,7 +186,7 @@ intel_gt_clear_error_registers(struct intel_gt *gt, struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine_masked(engine, i915, engine_mask, id) + for_each_engine_masked(engine, gt, engine_mask, id) gen8_clear_engine_error_register(engine); } } diff --git a/drivers/gpu/drm/i915/gt/intel_hangcheck.c b/drivers/gpu/drm/i915/gt/intel_hangcheck.c index c14dbeb3ccc3..b2af73984f93 100644 --- a/drivers/gpu/drm/i915/gt/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/intel_hangcheck.c @@ -237,7 +237,7 @@ static void hangcheck_declare_hang(struct intel_gt *gt, hung &= ~stuck; len = scnprintf(msg, sizeof(msg), "%s on ", stuck == hung ? "no progress" : "hang"); - for_each_engine_masked(engine, gt->i915, hung, tmp) + for_each_engine_masked(engine, gt, hung, tmp) len += scnprintf(msg + len, sizeof(msg) - len, "%s, ", engine->name); msg[len-2] = '\0'; diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 477bfafdb103..b191b0745703 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -298,7 +298,7 @@ static int gen6_reset_engines(struct intel_gt *gt, intel_engine_mask_t tmp; hw_mask = 0; - for_each_engine_masked(engine, gt->i915, engine_mask, tmp) { + for_each_engine_masked(engine, gt, engine_mask, tmp) { GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask)); hw_mask |= hw_engine_mask[engine->id]; } @@ -432,7 +432,7 @@ static int gen11_reset_engines(struct intel_gt *gt, hw_mask = GEN11_GRDOM_FULL; } else { hw_mask = 0; - for_each_engine_masked(engine, gt->i915, engine_mask, tmp) { + for_each_engine_masked(engine, gt, engine_mask, tmp) { GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask)); hw_mask |= hw_engine_mask[engine->id]; ret = gen11_lock_sfc(engine, &hw_mask); @@ -451,7 +451,7 @@ static int gen11_reset_engines(struct intel_gt *gt, * expiration). */ if (engine_mask != ALL_ENGINES) - for_each_engine_masked(engine, gt->i915, engine_mask, tmp) + for_each_engine_masked(engine, gt, engine_mask, tmp) gen11_unlock_sfc(engine); return ret; @@ -510,7 +510,7 @@ static int gen8_reset_engines(struct intel_gt *gt, intel_engine_mask_t tmp; int ret; - for_each_engine_masked(engine, gt->i915, engine_mask, tmp) { + for_each_engine_masked(engine, gt, engine_mask, tmp) { ret = gen8_engine_reset_prepare(engine); if (ret && !reset_non_ready) goto skip_reset; @@ -536,7 +536,7 @@ static int gen8_reset_engines(struct intel_gt *gt, ret = gen6_reset_engines(gt, engine_mask, retry); skip_reset: - for_each_engine_masked(engine, gt->i915, engine_mask, tmp) + for_each_engine_masked(engine, gt, engine_mask, tmp) gen8_engine_reset_cancel(engine); return ret; @@ -1206,7 +1206,7 @@ void intel_gt_handle_error(struct intel_gt *gt, * single reset fails. */ if (intel_has_reset_engine(gt) && !intel_gt_is_wedged(gt)) { - for_each_engine_masked(engine, gt->i915, engine_mask, tmp) { + for_each_engine_masked(engine, gt, engine_mask, tmp) { BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE); if (test_and_set_bit(I915_RESET_ENGINE + engine->id, >->reset.flags)) diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index f21b8fb5b37e..d6e7a1189bad 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -534,7 +534,7 @@ static void clean_execlist(struct intel_vgpu *vgpu, struct intel_vgpu_submission *s = &vgpu->submission; intel_engine_mask_t tmp; - for_each_engine_masked(engine, dev_priv, engine_mask, tmp) { + for_each_engine_masked(engine, &dev_priv->gt, engine_mask, tmp) { kfree(s->ring_scan_buffer[engine->id]); s->ring_scan_buffer[engine->id] = NULL; s->ring_scan_buffer_size[engine->id] = 0; @@ -548,7 +548,7 @@ static void reset_execlist(struct intel_vgpu *vgpu, struct intel_engine_cs *engine; intel_engine_mask_t tmp; - for_each_engine_masked(engine, dev_priv, engine_mask, tmp) + for_each_engine_masked(engine, &dev_priv->gt, engine_mask, tmp) init_vgpu_execlist(vgpu, engine->id); } diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 6850f1f40241..9ebb2534558b 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -887,7 +887,7 @@ void intel_vgpu_clean_workloads(struct intel_vgpu *vgpu, intel_engine_mask_t tmp; /* free the unsubmited workloads in the queues. */ - for_each_engine_masked(engine, dev_priv, engine_mask, tmp) { + for_each_engine_masked(engine, &dev_priv->gt, engine_mask, tmp) { list_for_each_entry_safe(pos, n, &s->workload_q_head[engine->id], list) { list_del_init(&pos->list); diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index aa37c07004b9..7927b1a0c7a6 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -590,8 +590,8 @@ static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx) int i915_active_acquire_preallocate_barrier(struct i915_active *ref, struct intel_engine_cs *engine) { - struct drm_i915_private *i915 = engine->i915; intel_engine_mask_t tmp, mask = engine->mask; + struct intel_gt *gt = engine->gt; struct llist_node *pos, *next; int err; @@ -603,7 +603,7 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref, * We can then use the preallocated nodes in * i915_active_acquire_barrier() */ - for_each_engine_masked(engine, i915, mask, tmp) { + for_each_engine_masked(engine, gt, mask, tmp) { u64 idx = engine->kernel_context->timeline->fence_context; struct active_node *node; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 88956f37d96c..40e923b0c2c8 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1415,10 +1415,10 @@ static inline struct drm_i915_private *pdev_to_i915(struct pci_dev *pdev) for_each_if ((engine__) = (dev_priv__)->engine[(id__)]) /* Iterator over subset of engines selected by mask */ -#define for_each_engine_masked(engine__, dev_priv__, mask__, tmp__) \ - for ((tmp__) = (mask__) & INTEL_INFO(dev_priv__)->engine_mask; \ +#define for_each_engine_masked(engine__, gt__, mask__, tmp__) \ + for ((tmp__) = (mask__) & INTEL_INFO((gt__)->i915)->engine_mask; \ (tmp__) ? \ - ((engine__) = (dev_priv__)->engine[__mask_next_bit(tmp__)]), 1 : \ + ((engine__) = (gt__)->engine[__mask_next_bit(tmp__)]), 1 : \ 0;) #define rb_to_uabi_engine(rb) \ From 5d904e3c5d40c3939d53659e9c473500c3c24039 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 17 Oct 2019 10:45:00 +0100 Subject: [PATCH 124/159] drm/i915: Pass in intel_gt at some for_each_engine sites Where the function, or code segment, operates on intel_gt, we need to start passing it instead of i915 to for_each_engine(_masked). This is another partial step in migration of i915->engines[] to gt->engines[]. Signed-off-by: Tvrtko Ursulin Cc: Chris Wilson Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191017094500.21831-2-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 4 +- drivers/gpu/drm/i915/gt/intel_gt.c | 2 +- drivers/gpu/drm/i915/gt/intel_gt_pm.c | 8 ++-- drivers/gpu/drm/i915/gt/intel_gt_requests.c | 2 +- drivers/gpu/drm/i915/gt/intel_hangcheck.c | 4 +- drivers/gpu/drm/i915/gt/intel_reset.c | 18 ++++----- drivers/gpu/drm/i915/gt/selftest_context.c | 6 +-- drivers/gpu/drm/i915/gt/selftest_engine_pm.c | 2 +- drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 20 +++++----- drivers/gpu/drm/i915/gt/selftest_lrc.c | 38 +++++++++---------- drivers/gpu/drm/i915/gt/selftest_reset.c | 2 +- drivers/gpu/drm/i915/gt/selftest_timeline.c | 8 ++-- .../gpu/drm/i915/gt/selftest_workarounds.c | 10 ++--- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 6 +-- drivers/gpu/drm/i915/selftests/igt_reset.c | 4 +- 15 files changed, 67 insertions(+), 67 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index e514c68b0713..051734c9b733 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1123,7 +1123,7 @@ bool intel_engines_are_idle(struct intel_gt *gt) if (!READ_ONCE(gt->awake)) return true; - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { if (!intel_engine_is_idle(engine)) return false; } @@ -1136,7 +1136,7 @@ void intel_engines_reset_default_submission(struct intel_gt *gt) struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, gt->i915, id) + for_each_engine(engine, gt, id) engine->set_default_submission(engine); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index c99b6b2f38c2..1c4b6c9642ad 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -197,7 +197,7 @@ static void gen6_check_faults(struct intel_gt *gt) enum intel_engine_id id; u32 fault; - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { fault = GEN6_RING_FAULT_REG_READ(engine); if (fault & RING_FAULT_VALID) { DRM_DEBUG_DRIVER("Unexpected fault\n" diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index bd1bd3e00a94..b866d5b1eee0 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -136,16 +136,16 @@ void intel_gt_sanitize(struct intel_gt *gt, bool force) intel_uc_sanitize(>->uc); - for_each_engine(engine, gt->i915, id) + for_each_engine(engine, gt, id) if (engine->reset.prepare) engine->reset.prepare(engine); if (reset_engines(gt) || force) { - for_each_engine(engine, gt->i915, id) + for_each_engine(engine, gt, id) __intel_engine_reset(engine, false); } - for_each_engine(engine, gt->i915, id) + for_each_engine(engine, gt, id) if (engine->reset.finish) engine->reset.finish(engine); } @@ -177,7 +177,7 @@ int intel_gt_resume(struct intel_gt *gt) intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); intel_rc6_sanitize(>->rc6); - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { struct intel_context *ce; intel_engine_pm_get(engine); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c index cbb4069b11e1..b73229a84d85 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c @@ -25,7 +25,7 @@ static void flush_submission(struct intel_gt *gt) struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, gt->i915, id) + for_each_engine(engine, gt, id) intel_engine_flush_submission(engine); } diff --git a/drivers/gpu/drm/i915/gt/intel_hangcheck.c b/drivers/gpu/drm/i915/gt/intel_hangcheck.c index b2af73984f93..0fdef00af9e4 100644 --- a/drivers/gpu/drm/i915/gt/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/intel_hangcheck.c @@ -281,7 +281,7 @@ static void hangcheck_elapsed(struct work_struct *work) */ intel_uncore_arm_unclaimed_mmio_detection(gt->uncore); - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { struct hangcheck hc; intel_engine_breadcrumbs_irq(engine); @@ -303,7 +303,7 @@ static void hangcheck_elapsed(struct work_struct *work) if (GEM_SHOW_DEBUG() && (hung | stuck)) { struct drm_printer p = drm_debug_printer("hangcheck"); - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { if (intel_engine_is_idle(engine)) continue; diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index b191b0745703..bf8d1ed4b1d8 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -682,7 +682,7 @@ static intel_engine_mask_t reset_prepare(struct intel_gt *gt) intel_engine_mask_t awake = 0; enum intel_engine_id id; - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { if (intel_engine_pm_get_if_awake(engine)) awake |= engine->mask; reset_prepare_engine(engine); @@ -712,7 +712,7 @@ static int gt_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask) if (err) return err; - for_each_engine(engine, gt->i915, id) + for_each_engine(engine, gt, id) __intel_engine_reset(engine, stalled_mask & engine->mask); i915_gem_restore_fences(gt->ggtt); @@ -733,7 +733,7 @@ static void reset_finish(struct intel_gt *gt, intel_engine_mask_t awake) struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { reset_finish_engine(engine); if (awake & engine->mask) intel_engine_pm_put(engine); @@ -769,7 +769,7 @@ static void __intel_gt_set_wedged(struct intel_gt *gt) if (GEM_SHOW_DEBUG() && !intel_engines_are_idle(gt)) { struct drm_printer p = drm_debug_printer(__func__); - for_each_engine(engine, gt->i915, id) + for_each_engine(engine, gt, id) intel_engine_dump(engine, &p, "%s\n", engine->name); } @@ -786,7 +786,7 @@ static void __intel_gt_set_wedged(struct intel_gt *gt) if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display) __intel_gt_reset(gt, ALL_ENGINES); - for_each_engine(engine, gt->i915, id) + for_each_engine(engine, gt, id) engine->submit_request = nop_submit_request; /* @@ -798,7 +798,7 @@ static void __intel_gt_set_wedged(struct intel_gt *gt) set_bit(I915_WEDGED, >->reset.flags); /* Mark all executing requests as skipped */ - for_each_engine(engine, gt->i915, id) + for_each_engine(engine, gt, id) engine->cancel_requests(engine); reset_finish(gt, awake); @@ -934,7 +934,7 @@ static int resume(struct intel_gt *gt) enum intel_engine_id id; int ret; - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { ret = engine->resume(engine); if (ret) return ret; @@ -1234,7 +1234,7 @@ void intel_gt_handle_error(struct intel_gt *gt, synchronize_rcu_expedited(); /* Prevent any other reset-engine attempt. */ - for_each_engine(engine, gt->i915, tmp) { + for_each_engine(engine, gt, tmp) { while (test_and_set_bit(I915_RESET_ENGINE + engine->id, >->reset.flags)) wait_on_bit(>->reset.flags, @@ -1244,7 +1244,7 @@ void intel_gt_handle_error(struct intel_gt *gt, intel_gt_reset_global(gt, engine_mask, msg); - for_each_engine(engine, gt->i915, tmp) + for_each_engine(engine, gt, tmp) clear_bit_unlock(I915_RESET_ENGINE + engine->id, >->reset.flags); clear_bit_unlock(I915_RESET_BACKOFF, >->reset.flags); diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c index 7c838a57e174..f63a26a3e620 100644 --- a/drivers/gpu/drm/i915/gt/selftest_context.c +++ b/drivers/gpu/drm/i915/gt/selftest_context.c @@ -159,7 +159,7 @@ static int live_context_size(void *arg) if (IS_ERR(fixme)) return PTR_ERR(fixme); - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { struct { struct drm_i915_gem_object *state; void *pinned; @@ -305,7 +305,7 @@ static int live_active_context(void *arg) goto out_file; } - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { err = __live_active_context(engine, fixme); if (err) break; @@ -415,7 +415,7 @@ static int live_remote_context(void *arg) goto out_file; } - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { err = __live_remote_context(engine, fixme); if (err) break; diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c index 3a1419376912..20b9c83f43ad 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c @@ -25,7 +25,7 @@ static int live_engine_pm(void *arg) } GEM_BUG_ON(intel_gt_pm_is_awake(gt)); - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { const typeof(*igt_atomic_phases) *p; for (p = igt_atomic_phases; p->name; p++) { diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 569a4105d49e..8e0016464325 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -323,7 +323,7 @@ static int igt_hang_sanitycheck(void *arg) if (err) return err; - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { struct intel_wedge_me w; long timeout; @@ -400,7 +400,7 @@ static int igt_reset_nop(void *arg) reset_count = i915_reset_count(global); count = 0; do { - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { int i; for (i = 0; i < 16; i++) { @@ -471,7 +471,7 @@ static int igt_reset_nop_engine(void *arg) } i915_gem_context_clear_bannable(ctx); - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { unsigned int reset_count, reset_engine_count; unsigned int count; IGT_TIMEOUT(end_time); @@ -560,7 +560,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) return err; } - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { unsigned int reset_count, reset_engine_count; IGT_TIMEOUT(end_time); @@ -782,7 +782,7 @@ static int __igt_reset_engines(struct intel_gt *gt, h.ctx->sched.priority = 1024; } - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { struct active_engine threads[I915_NUM_ENGINES] = {}; unsigned long device = i915_reset_count(global); unsigned long count = 0, reported; @@ -800,7 +800,7 @@ static int __igt_reset_engines(struct intel_gt *gt, } memset(threads, 0, sizeof(threads)); - for_each_engine(other, gt->i915, tmp) { + for_each_engine(other, gt, tmp) { struct task_struct *tsk; threads[tmp].resets = @@ -914,7 +914,7 @@ static int __igt_reset_engines(struct intel_gt *gt, } unwind: - for_each_engine(other, gt->i915, tmp) { + for_each_engine(other, gt, tmp) { int ret; if (!threads[tmp].task) @@ -1335,7 +1335,7 @@ static int wait_for_others(struct intel_gt *gt, struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { if (engine == exclude) continue; @@ -1363,7 +1363,7 @@ static int igt_reset_queue(void *arg) if (err) goto unlock; - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { struct i915_request *prev; IGT_TIMEOUT(end_time); unsigned int count; @@ -1651,7 +1651,7 @@ static int igt_reset_engines_atomic(void *arg) struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { err = igt_atomic_reset_engine(engine, p); if (err) goto out; diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 2868371c609e..007d6203fe0e 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -123,7 +123,7 @@ static int live_unlite_restore(struct intel_gt *gt, int prio) goto err_spin; err = 0; - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { struct intel_context *ce[2] = {}; struct i915_request *rq[2]; struct igt_live_test t; @@ -384,7 +384,7 @@ slice_semaphore_queue(struct intel_engine_cs *outer, return PTR_ERR(head); i915_request_get(head); - for_each_engine(engine, outer->i915, id) { + for_each_engine(engine, outer->gt, id) { for (i = 0; i < count; i++) { struct i915_request *rq; @@ -456,7 +456,7 @@ static int live_timeslice_preempt(void *arg) struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { if (!intel_engine_has_preemption(engine)) continue; @@ -532,7 +532,7 @@ static int live_busywait_preempt(void *arg) if (err) goto err_map; - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { struct i915_request *lo, *hi; struct igt_live_test t; u32 *cs; @@ -708,7 +708,7 @@ static int live_preempt(void *arg) ctx_lo->sched.priority = I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { struct igt_live_test t; struct i915_request *rq; @@ -804,7 +804,7 @@ static int live_late_preempt(void *arg) /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */ ctx_lo->sched.priority = I915_USER_PRIORITY(1); - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { struct igt_live_test t; struct i915_request *rq; @@ -929,7 +929,7 @@ static int live_nopreempt(void *arg) goto err_client_a; b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX); - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { struct i915_request *rq_a, *rq_b; if (!intel_engine_has_preemption(engine)) @@ -1040,7 +1040,7 @@ static int live_suppress_self_preempt(void *arg) if (preempt_client_init(gt, &b)) goto err_client_a; - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { struct i915_request *rq_a, *rq_b; int depth; @@ -1207,7 +1207,7 @@ static int live_suppress_wait_preempt(void *arg) if (preempt_client_init(gt, &client[3])) /* bystander */ goto err_client_2; - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { int depth; if (!intel_engine_has_preemption(engine)) @@ -1318,7 +1318,7 @@ static int live_chain_preempt(void *arg) if (preempt_client_init(gt, &lo)) goto err_client_hi; - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { struct i915_sched_attr attr = { .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), }; @@ -1467,7 +1467,7 @@ static int live_preempt_hang(void *arg) ctx_lo->sched.priority = I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { struct i915_request *rq; if (!intel_engine_has_preemption(engine)) @@ -1656,7 +1656,7 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) unsigned long count; int err = 0; - for_each_engine(engine, smoke->gt->i915, id) { + for_each_engine(engine, smoke->gt, id) { arg[id] = *smoke; arg[id].engine = engine; if (!(flags & BATCH)) @@ -1673,7 +1673,7 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) } count = 0; - for_each_engine(engine, smoke->gt->i915, id) { + for_each_engine(engine, smoke->gt, id) { int status; if (IS_ERR_OR_NULL(tsk[id])) @@ -1702,7 +1702,7 @@ static int smoke_random(struct preempt_smoke *smoke, unsigned int flags) count = 0; do { - for_each_engine(smoke->engine, smoke->gt->i915, id) { + for_each_engine(smoke->engine, smoke->gt, id) { struct i915_gem_context *ctx = smoke_context(smoke); int err; @@ -1937,7 +1937,7 @@ static int live_virtual_engine(void *arg) if (USES_GUC_SUBMISSION(gt->i915)) return 0; - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { err = nop_virtual_engine(gt, &engine, 1, 1, 0); if (err) { pr_err("Failed to wrap engine %s: err=%d\n", @@ -2270,7 +2270,7 @@ static int bond_virtual_engine(struct intel_gt *gt, err = 0; rq[0] = ERR_PTR(-ENOMEM); - for_each_engine(master, gt->i915, id) { + for_each_engine(master, gt, id) { struct i915_sw_fence fence = {}; if (master->class == class) @@ -2508,7 +2508,7 @@ static int live_lrc_layout(void *arg) return -ENOMEM; err = 0; - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { u32 *hw, *lrc; int dw; @@ -2701,7 +2701,7 @@ static int live_lrc_state(void *arg) goto out_close; } - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { err = __live_lrc_state(fixme, engine, scratch); if (err) break; @@ -2844,7 +2844,7 @@ static int live_gpr_clear(void *arg) goto out_close; } - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { err = __live_gpr_clear(fixme, engine, scratch); if (err) break; diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c index 419b38fa7828..6efb9221b7fa 100644 --- a/drivers/gpu/drm/i915/gt/selftest_reset.c +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c @@ -125,7 +125,7 @@ static int igt_atomic_engine_reset(void *arg) if (!igt_force_reset(gt)) goto out_unlock; - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { tasklet_disable_nosync(&engine->execlists.tasklet); intel_engine_pm_get(engine); diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c index 473a56b0ae46..dac86f699a4c 100644 --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c @@ -519,7 +519,7 @@ static int live_hwsp_engine(void *arg) return -ENOMEM; count = 0; - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { if (!intel_engine_can_store_dword(engine)) continue; @@ -594,7 +594,7 @@ static int live_hwsp_alternate(void *arg) count = 0; for (n = 0; n < NUM_TIMELINES; n++) { - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { struct intel_timeline *tl; struct i915_request *rq; @@ -666,7 +666,7 @@ static int live_hwsp_wrap(void *arg) if (err) goto out_free; - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { const u32 *hwsp_seqno[2]; struct i915_request *rq; u32 seqno[2]; @@ -766,7 +766,7 @@ static int live_hwsp_recycle(void *arg) */ count = 0; - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { IGT_TIMEOUT(end_time); if (!intel_engine_can_store_dword(engine)) diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index 8856c6c46cc4..ef02920cec29 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -69,7 +69,7 @@ reference_lists_init(struct intel_gt *gt, struct wa_lists *lists) gt_init_workarounds(gt->i915, &lists->gt_wa_list); wa_init_finish(&lists->gt_wa_list); - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { struct i915_wa_list *wal = &lists->engine[id].wa_list; wa_init_start(wal, "REF", engine->name); @@ -88,7 +88,7 @@ reference_lists_fini(struct intel_gt *gt, struct wa_lists *lists) struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, gt->i915, id) + for_each_engine(engine, gt, id) intel_wa_list_free(&lists->engine[id].wa_list); intel_wa_list_free(&lists->gt_wa_list); @@ -726,7 +726,7 @@ static int live_dirty_whitelist(void *arg) goto out_file; } - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { if (engine->whitelist.count == 0) continue; @@ -750,7 +750,7 @@ static int live_reset_whitelist(void *arg) /* If we reset the gpu, we should not lose the RING_NONPRIV */ igt_global_reset_lock(gt); - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { if (engine->whitelist.count == 0) continue; @@ -1048,7 +1048,7 @@ static int live_isolated_whitelist(void *arg) i915_vm_put(vm); } - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { if (!engine->kernel_context->vm) continue; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 849a44add424..009e54a3764f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1020,7 +1020,7 @@ static void guc_interrupts_capture(struct intel_gt *gt) * to GuC */ irqs = _MASKED_BIT_ENABLE(GFX_INTERRUPT_STEERING); - for_each_engine(engine, gt->i915, id) + for_each_engine(engine, gt, id) ENGINE_WRITE(engine, RING_MODE_GEN7, irqs); /* route USER_INTERRUPT to Host, all others are sent to GuC. */ @@ -1068,7 +1068,7 @@ static void guc_interrupts_release(struct intel_gt *gt) */ irqs = _MASKED_FIELD(GFX_FORWARD_VBLANK_MASK, GFX_FORWARD_VBLANK_NEVER); irqs |= _MASKED_BIT_DISABLE(GFX_INTERRUPT_STEERING); - for_each_engine(engine, gt->i915, id) + for_each_engine(engine, gt, id) ENGINE_WRITE(engine, RING_MODE_GEN7, irqs); /* route all GT interrupts to the host */ @@ -1151,7 +1151,7 @@ int intel_guc_submission_enable(struct intel_guc *guc) /* Take over from manual control of ELSP (execlists) */ guc_interrupts_capture(gt); - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { engine->set_default_submission = guc_set_default_submission; engine->set_default_submission(engine); } diff --git a/drivers/gpu/drm/i915/selftests/igt_reset.c b/drivers/gpu/drm/i915/selftests/igt_reset.c index 7ec8f8b049c6..9f8590b868a9 100644 --- a/drivers/gpu/drm/i915/selftests/igt_reset.c +++ b/drivers/gpu/drm/i915/selftests/igt_reset.c @@ -22,7 +22,7 @@ void igt_global_reset_lock(struct intel_gt *gt) wait_event(gt->reset.queue, !test_bit(I915_RESET_BACKOFF, >->reset.flags)); - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { while (test_and_set_bit(I915_RESET_ENGINE + id, >->reset.flags)) wait_on_bit(>->reset.flags, I915_RESET_ENGINE + id, @@ -35,7 +35,7 @@ void igt_global_reset_unlock(struct intel_gt *gt) struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, gt->i915, id) + for_each_engine(engine, gt, id) clear_bit(I915_RESET_ENGINE + id, >->reset.flags); clear_bit(I915_RESET_BACKOFF, >->reset.flags); From fb26eee0600dbd6b2eb20356b7d425cb71413ea2 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 18 Oct 2019 10:05:14 +0100 Subject: [PATCH 125/159] drm/i915/pmu: Fix uninitialized variable on error path If name allocation failed the log message will contain an uninitialized error code which can be confusing. Fixes: 05488673a4d4 ("drm/i915/pmu: Support multiple GPUs") Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191018090514.1818-1-tvrtko.ursulin@linux.intel.com [tursulin: Commit message spelling fix.] --- drivers/gpu/drm/i915/i915_pmu.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 00fe401868c4..144c32eed045 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -1094,7 +1094,7 @@ static bool is_igp(struct drm_i915_private *i915) void i915_pmu_register(struct drm_i915_private *i915) { struct i915_pmu *pmu = &i915->pmu; - int ret; + int ret = -ENOMEM; if (INTEL_GEN(i915) <= 2) { dev_info(i915->drm.dev, "PMU not supported for this GPU."); @@ -1102,10 +1102,8 @@ void i915_pmu_register(struct drm_i915_private *i915) } i915_pmu_events_attr_group.attrs = create_event_attributes(pmu); - if (!i915_pmu_events_attr_group.attrs) { - ret = -ENOMEM; + if (!i915_pmu_events_attr_group.attrs) goto err; - } pmu->base.attr_groups = i915_pmu_attr_groups; pmu->base.task_ctx_nr = perf_invalid_context; From 253a774bb08b549488047c3fb4afc0faf6f194ae Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 18 Oct 2019 08:20:27 +0100 Subject: [PATCH 126/159] drm/i915/execlists: Don't merely skip submission if maybe timeslicing Normally, we try and skip submission if ELSP[1] is filled. However, we may desire to enable timeslicing due to the queue priority, even if ELSP[1] itself does not require timeslicing. That is the queue is equal priority to ELSP[0] and higher priority then ELSP[1]. Previously, we would wait until the context switch to preempt the current ELSP[1], but with timeslicing, we want to preempt ELSP[0] and replace it with the queue. In writing the test case, it become quickly apparent that we were also suppressing the tasklet during promotion and so failing to notice when the queue started requiring timeslicing. Fixes: 2229adc81380 ("drm/i915/execlist: Trim immediate timeslice expiry") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191018072027.31948-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 22 +++- drivers/gpu/drm/i915/gt/selftest_lrc.c | 168 ++++++++++++++++++++++++- drivers/gpu/drm/i915/i915_scheduler.c | 17 ++- drivers/gpu/drm/i915/i915_scheduler.h | 18 --- 4 files changed, 196 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index e9fe9f79cedd..d0088d020220 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -352,10 +352,15 @@ static inline bool need_preempt(const struct intel_engine_cs *engine, * However, the priority hint is a mere hint that we may need to * preempt. If that hint is stale or we may be trying to preempt * ourselves, ignore the request. + * + * More naturally we would write + * prio >= max(0, last); + * except that we wish to prevent triggering preemption at the same + * priority level: the task that is running should remain running + * to preserve FIFO ordering of dependencies. */ - last_prio = effective_prio(rq); - if (!i915_scheduler_need_preempt(engine->execlists.queue_priority_hint, - last_prio)) + last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1); + if (engine->execlists.queue_priority_hint <= last_prio) return false; /* @@ -1509,8 +1514,17 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * submission. */ if (!list_is_last(&last->sched.link, - &engine->active.requests)) + &engine->active.requests)) { + /* + * Even if ELSP[1] is occupied and not worthy + * of timeslices, our queue might be. + */ + if (!execlists->timer.expires && + need_timeslice(engine, last)) + mod_timer(&execlists->timer, + jiffies + 1); return; + } /* * WaIdleLiteRestore:bdw,skl diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 007d6203fe0e..5dc679781a08 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -325,7 +325,13 @@ semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx) if (IS_ERR(rq)) goto out_ctx; - err = emit_semaphore_chain(rq, vma, idx); + err = 0; + if (rq->engine->emit_init_breadcrumb) + err = rq->engine->emit_init_breadcrumb(rq); + if (err == 0) + err = emit_semaphore_chain(rq, vma, idx); + if (err == 0) + i915_request_get(rq); i915_request_add(rq); if (err) rq = ERR_PTR(err); @@ -338,10 +344,10 @@ semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx) static int release_queue(struct intel_engine_cs *engine, struct i915_vma *vma, - int idx) + int idx, int prio) { struct i915_sched_attr attr = { - .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), + .priority = prio, }; struct i915_request *rq; u32 *cs; @@ -362,9 +368,15 @@ release_queue(struct intel_engine_cs *engine, *cs++ = 1; intel_ring_advance(rq, cs); + + i915_request_get(rq); i915_request_add(rq); + local_bh_disable(); engine->schedule(rq, &attr); + local_bh_enable(); /* kick tasklet */ + + i915_request_put(rq); return 0; } @@ -383,7 +395,6 @@ slice_semaphore_queue(struct intel_engine_cs *outer, if (IS_ERR(head)) return PTR_ERR(head); - i915_request_get(head); for_each_engine(engine, outer->gt, id) { for (i = 0; i < count; i++) { struct i915_request *rq; @@ -393,10 +404,12 @@ slice_semaphore_queue(struct intel_engine_cs *outer, err = PTR_ERR(rq); goto out; } + + i915_request_put(rq); } } - err = release_queue(outer, vma, n); + err = release_queue(outer, vma, n, INT_MAX); if (err) goto out; @@ -482,6 +495,150 @@ static int live_timeslice_preempt(void *arg) return err; } +static struct i915_request *nop_request(struct intel_engine_cs *engine) +{ + struct i915_request *rq; + + rq = i915_request_create(engine->kernel_context); + if (IS_ERR(rq)) + return rq; + + i915_request_get(rq); + i915_request_add(rq); + + return rq; +} + +static void wait_for_submit(struct intel_engine_cs *engine, + struct i915_request *rq) +{ + do { + cond_resched(); + intel_engine_flush_submission(engine); + } while (!i915_request_is_active(rq)); +} + +static int live_timeslice_queue(void *arg) +{ + struct intel_gt *gt = arg; + struct drm_i915_gem_object *obj; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct i915_vma *vma; + void *vaddr; + int err = 0; + + /* + * Make sure that even if ELSP[0] and ELSP[1] are filled with + * timeslicing between them disabled, we *do* enable timeslicing + * if the queue demands it. (Normally, we do not submit if + * ELSP[1] is already occupied, so must rely on timeslicing to + * eject ELSP[0] in favour of the queue.) + */ + + obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = i915_vma_instance(obj, >->ggtt->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_obj; + } + + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto err_obj; + } + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); + if (err) + goto err_map; + + for_each_engine(engine, gt, id) { + struct i915_sched_attr attr = { + .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), + }; + struct i915_request *rq, *nop; + + if (!intel_engine_has_preemption(engine)) + continue; + + memset(vaddr, 0, PAGE_SIZE); + + /* ELSP[0]: semaphore wait */ + rq = semaphore_queue(engine, vma, 0); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_pin; + } + engine->schedule(rq, &attr); + wait_for_submit(engine, rq); + + /* ELSP[1]: nop request */ + nop = nop_request(engine); + if (IS_ERR(nop)) { + err = PTR_ERR(nop); + i915_request_put(rq); + goto err_pin; + } + wait_for_submit(engine, nop); + i915_request_put(nop); + + GEM_BUG_ON(i915_request_completed(rq)); + GEM_BUG_ON(execlists_active(&engine->execlists) != rq); + + /* Queue: semaphore signal, matching priority as semaphore */ + err = release_queue(engine, vma, 1, effective_prio(rq)); + if (err) { + i915_request_put(rq); + goto err_pin; + } + + intel_engine_flush_submission(engine); + if (!READ_ONCE(engine->execlists.timer.expires) && + !i915_request_completed(rq)) { + struct drm_printer p = + drm_info_printer(gt->i915->drm.dev); + + GEM_TRACE_ERR("%s: Failed to enable timeslicing!\n", + engine->name); + intel_engine_dump(engine, &p, + "%s\n", engine->name); + GEM_TRACE_DUMP(); + + memset(vaddr, 0xff, PAGE_SIZE); + err = -EINVAL; + } + + /* Timeslice every jiffie, so within 2 we should signal */ + if (i915_request_wait(rq, 0, 3) < 0) { + struct drm_printer p = + drm_info_printer(gt->i915->drm.dev); + + pr_err("%s: Failed to timeslice into queue\n", + engine->name); + intel_engine_dump(engine, &p, + "%s\n", engine->name); + + memset(vaddr, 0xff, PAGE_SIZE); + err = -EIO; + } + i915_request_put(rq); + if (err) + break; + } + +err_pin: + i915_vma_unpin(vma); +err_map: + i915_gem_object_unpin_map(obj); +err_obj: + i915_gem_object_put(obj); + return err; +} + static int live_busywait_preempt(void *arg) { struct intel_gt *gt = arg; @@ -2437,6 +2594,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) SUBTEST(live_unlite_switch), SUBTEST(live_unlite_preempt), SUBTEST(live_timeslice_preempt), + SUBTEST(live_timeslice_queue), SUBTEST(live_busywait_preempt), SUBTEST(live_preempt), SUBTEST(live_late_preempt), diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 7b84ebca2901..0ca40f6bf08c 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -177,9 +177,22 @@ static inline int rq_prio(const struct i915_request *rq) return rq->sched.attr.priority | __NO_PREEMPTION; } +static inline bool need_preempt(int prio, int active) +{ + /* + * Allow preemption of low -> normal -> high, but we do + * not allow low priority tasks to preempt other low priority + * tasks under the impression that latency for low priority + * tasks does not matter (as much as background throughput), + * so kiss. + */ + return prio >= max(I915_PRIORITY_NORMAL, active); +} + static void kick_submission(struct intel_engine_cs *engine, int prio) { - const struct i915_request *inflight = *engine->execlists.active; + const struct i915_request *inflight = + execlists_active(&engine->execlists); /* * If we are already the currently executing context, don't @@ -188,7 +201,7 @@ static void kick_submission(struct intel_engine_cs *engine, int prio) * tasklet, i.e. we have not change the priority queue * sufficiently to oust the running context. */ - if (!inflight || !i915_scheduler_need_preempt(prio, rq_prio(inflight))) + if (!inflight || !need_preempt(prio, rq_prio(inflight))) return; tasklet_hi_schedule(&engine->execlists.tasklet); diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index 7eefccff39bf..07d243acf553 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -52,22 +52,4 @@ static inline void i915_priolist_free(struct i915_priolist *p) __i915_priolist_free(p); } -static inline bool i915_scheduler_need_preempt(int prio, int active) -{ - /* - * Allow preemption of low -> normal -> high, but we do - * not allow low priority tasks to preempt other low priority - * tasks under the impression that latency for low priority - * tasks does not matter (as much as background throughput), - * so kiss. - * - * More naturally we would write - * prio >= max(0, last); - * except that we wish to prevent triggering preemption at the same - * priority level: the task that is running should remain running - * to preserve FIFO ordering of dependencies. - */ - return prio > max(I915_PRIORITY_NORMAL - 1, active); -} - #endif /* _I915_SCHEDULER_H_ */ From 3aae9d08532c8e542ad2787b3f1c6b4ee14db32b Mon Sep 17 00:00:00 2001 From: Abdiel Janulgue Date: Fri, 18 Oct 2019 10:07:49 +0100 Subject: [PATCH 127/159] drm/i915: enumerate and init each supported region Nothing to enumerate yet... Signed-off-by: Abdiel Janulgue Signed-off-by: Matthew Auld Cc: Joonas Lahtinen Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191018090751.28295-1-matthew.auld@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 7 +++ drivers/gpu/drm/i915/i915_gem_gtt.c | 56 ++++++++++++++++--- drivers/gpu/drm/i915/intel_device_info.h | 2 + drivers/gpu/drm/i915/intel_memory_region.h | 4 ++ .../gpu/drm/i915/selftests/mock_gem_device.c | 6 ++ 5 files changed, 68 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 40e923b0c2c8..faf645802710 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -684,6 +684,8 @@ struct i915_gem_mm { */ struct vfsmount *gemfs; + struct intel_memory_region *regions[INTEL_REGION_UNKNOWN]; + struct notifier_block oom_notifier; struct notifier_block vmap_notifier; struct shrinker shrinker; @@ -1783,6 +1785,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_IPC(dev_priv) (INTEL_INFO(dev_priv)->display.has_ipc) +#define HAS_REGION(i915, i) (INTEL_INFO(i915)->memory_regions & (i)) + #define HAS_GT_UC(dev_priv) (INTEL_INFO(dev_priv)->has_gt_uc) /* Having GuC is not the same as using GuC */ @@ -2001,6 +2005,9 @@ int __must_check i915_gem_evict_for_node(struct i915_address_space *vm, unsigned int flags); int i915_gem_evict_vm(struct i915_address_space *vm); +void i915_gem_cleanup_memory_regions(struct drm_i915_private *i915); +int i915_gem_init_memory_regions(struct drm_i915_private *i915); + /* i915_gem_internal.c */ struct drm_i915_gem_object * i915_gem_object_create_internal(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 7b15bb891970..7b8e30c72f86 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2744,6 +2744,52 @@ int i915_init_ggtt(struct drm_i915_private *i915) return 0; } +void i915_gem_cleanup_memory_regions(struct drm_i915_private *i915) +{ + int i; + + i915_gem_cleanup_stolen(i915); + + for (i = 0; i < INTEL_REGION_UNKNOWN; i++) { + struct intel_memory_region *region = i915->mm.regions[i]; + + if (region) + intel_memory_region_put(region); + } +} + +int i915_gem_init_memory_regions(struct drm_i915_private *i915) +{ + int err, i; + + /* + * Initialise stolen early so that we may reserve preallocated + * objects for the BIOS to KMS transition. + */ + /* XXX: stolen will become a region at some point */ + err = i915_gem_init_stolen(i915); + if (err) + return err; + + for (i = 0; i < INTEL_REGION_UNKNOWN; i++) { + struct intel_memory_region *mem = ERR_PTR(-ENODEV); + + if (!HAS_REGION(i915, BIT(i))) + continue; + + if (IS_ERR(mem)) { + err = PTR_ERR(mem); + goto out_cleanup; + } + } + + return 0; + +out_cleanup: + i915_gem_cleanup_memory_regions(i915); + return err; +} + static void ggtt_cleanup_hw(struct i915_ggtt *ggtt) { struct i915_vma *vma, *vn; @@ -2781,6 +2827,8 @@ void i915_ggtt_driver_release(struct drm_i915_private *i915) { struct pagevec *pvec; + i915_gem_cleanup_memory_regions(i915); + fini_aliasing_ppgtt(&i915->ggtt); ggtt_cleanup_hw(&i915->ggtt); @@ -2790,8 +2838,6 @@ void i915_ggtt_driver_release(struct drm_i915_private *i915) set_pages_array_wb(pvec->pages, pvec->nr); __pagevec_release(pvec); } - - i915_gem_cleanup_stolen(i915); } static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) @@ -3240,11 +3286,7 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv) if (ret) return ret; - /* - * Initialise stolen early so that we may reserve preallocated - * objects for the BIOS to KMS transition. - */ - ret = i915_gem_init_stolen(dev_priv); + ret = i915_gem_init_memory_regions(dev_priv); if (ret) goto out_gtt_cleanup; diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 0cdc2465534b..e9940f932d26 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -160,6 +160,8 @@ struct intel_device_info { unsigned int page_sizes; /* page sizes supported by the HW */ + u32 memory_regions; /* regions supported by the HW */ + u32 display_mmio_offset; u8 pipe_mask; diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h index 52c141b6108c..2c165a7a5ab4 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.h +++ b/drivers/gpu/drm/i915/intel_memory_region.h @@ -18,6 +18,10 @@ struct drm_i915_gem_object; struct intel_memory_region; struct sg_table; +enum intel_region_id { + INTEL_REGION_UNKNOWN = 0, /* Should be last */ +}; + #define I915_ALLOC_MIN_PAGE_SIZE BIT(0) #define I915_ALLOC_CONTIGUOUS BIT(1) diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 9c22d41b30cd..bc5e3c67409f 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -74,6 +74,8 @@ static void mock_device_release(struct drm_device *dev) i915_gemfs_fini(i915); + i915_gem_cleanup_memory_regions(i915); + drm_mode_config_cleanup(&i915->drm); drm_dev_fini(&i915->drm); @@ -196,6 +198,10 @@ struct drm_i915_private *mock_gem_device(void) WARN_ON(i915_gemfs_init(i915)); + err = i915_gem_init_memory_regions(i915); + if (err) + goto err_context; + return i915; err_context: From da1184cd41d4c6b316a937ac1da5825807e8f6fb Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 18 Oct 2019 10:07:50 +0100 Subject: [PATCH 128/159] drm/i915: treat shmem as a region Convert shmem to an intel_memory_region. Signed-off-by: Matthew Auld Cc: Joonas Lahtinen Cc: Abdiel Janulgue Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191018090751.28295-2-matthew.auld@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_object.h | 5 +- drivers/gpu/drm/i915/gem/i915_gem_phys.c | 5 +- drivers/gpu/drm/i915/gem/i915_gem_region.c | 7 +- drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 79 ++++++++++++++----- drivers/gpu/drm/i915/i915_drv.h | 2 + drivers/gpu/drm/i915/i915_gem.c | 9 --- drivers/gpu/drm/i915/i915_gem_gtt.c | 15 ++++ drivers/gpu/drm/i915/i915_pci.c | 29 +++++-- drivers/gpu/drm/i915/intel_memory_region.c | 10 +++ drivers/gpu/drm/i915/intel_memory_region.h | 30 ++++++- .../gpu/drm/i915/selftests/mock_gem_device.c | 7 +- 11 files changed, 150 insertions(+), 48 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index c5e14c9c805c..85921796851f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -25,10 +25,11 @@ void i915_gem_object_free(struct drm_i915_gem_object *obj); void i915_gem_object_init(struct drm_i915_gem_object *obj, const struct drm_i915_gem_object_ops *ops); struct drm_i915_gem_object * -i915_gem_object_create_shmem(struct drm_i915_private *i915, u64 size); +i915_gem_object_create_shmem(struct drm_i915_private *i915, + resource_size_t size); struct drm_i915_gem_object * i915_gem_object_create_shmem_from_data(struct drm_i915_private *i915, - const void *data, size_t size); + const void *data, resource_size_t size); extern const struct drm_i915_gem_object_ops i915_gem_shmem_ops; void __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c index 768356908160..8043ff63d73f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c @@ -16,6 +16,7 @@ #include "gt/intel_gt.h" #include "i915_drv.h" #include "i915_gem_object.h" +#include "i915_gem_region.h" #include "i915_scatterlist.h" static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) @@ -191,8 +192,10 @@ int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align) /* Perma-pin (until release) the physical set of pages */ __i915_gem_object_pin_pages(obj); - if (!IS_ERR_OR_NULL(pages)) + if (!IS_ERR_OR_NULL(pages)) { i915_gem_shmem_ops.put_pages(obj, pages); + i915_gem_object_release_memory_region(obj); + } mutex_unlock(&obj->mm.lock); return 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c b/drivers/gpu/drm/i915/gem/i915_gem_region.c index d3f7733bc7ed..2f7bcfb9c964 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_region.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c @@ -6,6 +6,7 @@ #include "intel_memory_region.h" #include "i915_gem_region.h" #include "i915_drv.h" +#include "i915_trace.h" void i915_gem_object_put_pages_buddy(struct drm_i915_gem_object *obj, @@ -165,5 +166,9 @@ i915_gem_object_create_region(struct intel_memory_region *mem, if (overflows_type(size, obj->base.size)) return ERR_PTR(-E2BIG); - return mem->ops->create_object(mem, size, flags); + obj = mem->ops->create_object(mem, size, flags); + if (!IS_ERR(obj)) + trace_i915_gem_object_create(obj); + + return obj; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 4c4954e8ce0a..be68b76e13b3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -7,7 +7,9 @@ #include #include +#include "gem/i915_gem_region.h" #include "i915_drv.h" +#include "i915_gemfs.h" #include "i915_gem_object.h" #include "i915_scatterlist.h" #include "i915_trace.h" @@ -26,6 +28,7 @@ static void check_release_pagevec(struct pagevec *pvec) static int shmem_get_pages(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct intel_memory_region *mem = obj->mm.region; const unsigned long page_count = obj->base.size / PAGE_SIZE; unsigned long i; struct address_space *mapping; @@ -52,7 +55,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj) * If there's no chance of allocating enough pages for the whole * object, bail early. */ - if (page_count > totalram_pages()) + if (obj->base.size > resource_size(&mem->region)) return -ENOMEM; st = kmalloc(sizeof(*st), GFP_KERNEL); @@ -417,6 +420,8 @@ shmem_pwrite(struct drm_i915_gem_object *obj, static void shmem_release(struct drm_i915_gem_object *obj) { + i915_gem_object_release_memory_region(obj); + fput(obj->base.filp); } @@ -434,9 +439,9 @@ const struct drm_i915_gem_object_ops i915_gem_shmem_ops = { .release = shmem_release, }; -static int create_shmem(struct drm_i915_private *i915, - struct drm_gem_object *obj, - size_t size) +static int __create_shmem(struct drm_i915_private *i915, + struct drm_gem_object *obj, + resource_size_t size) { unsigned long flags = VM_NORESERVE; struct file *filp; @@ -455,31 +460,23 @@ static int create_shmem(struct drm_i915_private *i915, return 0; } -struct drm_i915_gem_object * -i915_gem_object_create_shmem(struct drm_i915_private *i915, u64 size) +static struct drm_i915_gem_object * +create_shmem(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags) { + struct drm_i915_private *i915 = mem->i915; struct drm_i915_gem_object *obj; struct address_space *mapping; unsigned int cache_level; gfp_t mask; int ret; - /* There is a prevalence of the assumption that we fit the object's - * page count inside a 32bit _signed_ variable. Let's document this and - * catch if we ever need to fix it. In the meantime, if you do spot - * such a local variable, please consider fixing! - */ - if (size >> PAGE_SHIFT > INT_MAX) - return ERR_PTR(-E2BIG); - - if (overflows_type(size, obj->base.size)) - return ERR_PTR(-E2BIG); - obj = i915_gem_object_alloc(); if (!obj) return ERR_PTR(-ENOMEM); - ret = create_shmem(i915, &obj->base, size); + ret = __create_shmem(i915, &obj->base, size); if (ret) goto fail; @@ -518,7 +515,7 @@ i915_gem_object_create_shmem(struct drm_i915_private *i915, u64 size) i915_gem_object_set_cache_coherency(obj, cache_level); - trace_i915_gem_object_create(obj); + i915_gem_object_init_memory_region(obj, mem, 0); return obj; @@ -527,14 +524,22 @@ i915_gem_object_create_shmem(struct drm_i915_private *i915, u64 size) return ERR_PTR(ret); } +struct drm_i915_gem_object * +i915_gem_object_create_shmem(struct drm_i915_private *i915, + resource_size_t size) +{ + return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_SMEM], + size, 0); +} + /* Allocate a new GEM object and fill it with the supplied data */ struct drm_i915_gem_object * i915_gem_object_create_shmem_from_data(struct drm_i915_private *dev_priv, - const void *data, size_t size) + const void *data, resource_size_t size) { struct drm_i915_gem_object *obj; struct file *file; - size_t offset; + resource_size_t offset; int err; obj = i915_gem_object_create_shmem(dev_priv, round_up(size, PAGE_SIZE)); @@ -577,3 +582,35 @@ i915_gem_object_create_shmem_from_data(struct drm_i915_private *dev_priv, i915_gem_object_put(obj); return ERR_PTR(err); } + +static int init_shmem(struct intel_memory_region *mem) +{ + int err; + + err = i915_gemfs_init(mem->i915); + if (err) { + DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", + err); + } + + return 0; /* Don't error, we can simply fallback to the kernel mnt */ +} + +static void release_shmem(struct intel_memory_region *mem) +{ + i915_gemfs_fini(mem->i915); +} + +static const struct intel_memory_region_ops shmem_region_ops = { + .init = init_shmem, + .release = release_shmem, + .create_object = create_shmem, +}; + +struct intel_memory_region *i915_gem_shmem_setup(struct drm_i915_private *i915) +{ + return intel_memory_region_create(i915, 0, + totalram_pages() << PAGE_SHIFT, + PAGE_SIZE, 0, + &shmem_region_ops); +} diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index faf645802710..db3c3a025a03 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1876,6 +1876,8 @@ void i915_gem_cleanup_early(struct drm_i915_private *dev_priv); int i915_gem_freeze(struct drm_i915_private *dev_priv); int i915_gem_freeze_late(struct drm_i915_private *dev_priv); +struct intel_memory_region *i915_gem_shmem_setup(struct drm_i915_private *i915); + static inline void i915_gem_drain_freed_objects(struct drm_i915_private *i915) { /* diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index aef5cf2227dd..dd0a3271b4e2 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -45,7 +45,6 @@ #include "gem/i915_gem_context.h" #include "gem/i915_gem_ioctls.h" #include "gem/i915_gem_pm.h" -#include "gem/i915_gemfs.h" #include "gt/intel_engine_user.h" #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" @@ -1432,16 +1431,10 @@ static void i915_gem_init__mm(struct drm_i915_private *i915) void i915_gem_init_early(struct drm_i915_private *dev_priv) { - int err; - i915_gem_init__mm(dev_priv); i915_gem_init__pm(dev_priv); spin_lock_init(&dev_priv->fb_tracking.lock); - - err = i915_gemfs_init(dev_priv); - if (err) - DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err); } void i915_gem_cleanup_early(struct drm_i915_private *dev_priv) @@ -1450,8 +1443,6 @@ void i915_gem_cleanup_early(struct drm_i915_private *dev_priv) GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list)); GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count)); WARN_ON(dev_priv->mm.shrink_count); - - i915_gemfs_fini(dev_priv); } int i915_gem_freeze(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 7b8e30c72f86..263f1cea0b64 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2773,14 +2773,29 @@ int i915_gem_init_memory_regions(struct drm_i915_private *i915) for (i = 0; i < INTEL_REGION_UNKNOWN; i++) { struct intel_memory_region *mem = ERR_PTR(-ENODEV); + u32 type; if (!HAS_REGION(i915, BIT(i))) continue; + type = MEMORY_TYPE_FROM_REGION(intel_region_map[i]); + switch (type) { + case INTEL_MEMORY_SYSTEM: + mem = i915_gem_shmem_setup(i915); + break; + } + if (IS_ERR(mem)) { err = PTR_ERR(mem); + DRM_ERROR("Failed to setup region(%d) type=%d\n", err, type); goto out_cleanup; } + + mem->id = intel_region_map[i]; + mem->type = type; + mem->instance = MEMORY_INSTANCE_FROM_REGION(intel_region_map[i]); + + i915->mm.regions[i] = mem; } return 0; diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 1cbf3998b361..e55ab2724996 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -151,6 +151,9 @@ #define GEN_DEFAULT_PAGE_SIZES \ .page_sizes = I915_GTT_PAGE_SIZE_4K +#define GEN_DEFAULT_REGIONS \ + .memory_regions = REGION_SMEM + #define I830_FEATURES \ GEN(2), \ .is_mobile = 1, \ @@ -168,7 +171,8 @@ I9XX_PIPE_OFFSETS, \ I9XX_CURSOR_OFFSETS, \ I9XX_COLORS, \ - GEN_DEFAULT_PAGE_SIZES + GEN_DEFAULT_PAGE_SIZES, \ + GEN_DEFAULT_REGIONS #define I845_FEATURES \ GEN(2), \ @@ -185,7 +189,8 @@ I845_PIPE_OFFSETS, \ I845_CURSOR_OFFSETS, \ I9XX_COLORS, \ - GEN_DEFAULT_PAGE_SIZES + GEN_DEFAULT_PAGE_SIZES, \ + GEN_DEFAULT_REGIONS static const struct intel_device_info intel_i830_info = { I830_FEATURES, @@ -219,7 +224,8 @@ static const struct intel_device_info intel_i865g_info = { I9XX_PIPE_OFFSETS, \ I9XX_CURSOR_OFFSETS, \ I9XX_COLORS, \ - GEN_DEFAULT_PAGE_SIZES + GEN_DEFAULT_PAGE_SIZES, \ + GEN_DEFAULT_REGIONS static const struct intel_device_info intel_i915g_info = { GEN3_FEATURES, @@ -304,7 +310,8 @@ static const struct intel_device_info intel_pineview_m_info = { I9XX_PIPE_OFFSETS, \ I9XX_CURSOR_OFFSETS, \ I965_COLORS, \ - GEN_DEFAULT_PAGE_SIZES + GEN_DEFAULT_PAGE_SIZES, \ + GEN_DEFAULT_REGIONS static const struct intel_device_info intel_i965g_info = { GEN4_FEATURES, @@ -354,7 +361,8 @@ static const struct intel_device_info intel_gm45_info = { I9XX_PIPE_OFFSETS, \ I9XX_CURSOR_OFFSETS, \ ILK_COLORS, \ - GEN_DEFAULT_PAGE_SIZES + GEN_DEFAULT_PAGE_SIZES, \ + GEN_DEFAULT_REGIONS static const struct intel_device_info intel_ironlake_d_info = { GEN5_FEATURES, @@ -384,7 +392,8 @@ static const struct intel_device_info intel_ironlake_m_info = { I9XX_PIPE_OFFSETS, \ I9XX_CURSOR_OFFSETS, \ ILK_COLORS, \ - GEN_DEFAULT_PAGE_SIZES + GEN_DEFAULT_PAGE_SIZES, \ + GEN_DEFAULT_REGIONS #define SNB_D_PLATFORM \ GEN6_FEATURES, \ @@ -432,7 +441,8 @@ static const struct intel_device_info intel_sandybridge_m_gt2_info = { IVB_PIPE_OFFSETS, \ IVB_CURSOR_OFFSETS, \ IVB_COLORS, \ - GEN_DEFAULT_PAGE_SIZES + GEN_DEFAULT_PAGE_SIZES, \ + GEN_DEFAULT_REGIONS #define IVB_D_PLATFORM \ GEN7_FEATURES, \ @@ -493,6 +503,7 @@ static const struct intel_device_info intel_valleyview_info = { I9XX_CURSOR_OFFSETS, I965_COLORS, GEN_DEFAULT_PAGE_SIZES, + GEN_DEFAULT_REGIONS, }; #define G75_FEATURES \ @@ -587,6 +598,7 @@ static const struct intel_device_info intel_cherryview_info = { CHV_CURSOR_OFFSETS, CHV_COLORS, GEN_DEFAULT_PAGE_SIZES, + GEN_DEFAULT_REGIONS, }; #define GEN9_DEFAULT_PAGE_SIZES \ @@ -661,7 +673,8 @@ static const struct intel_device_info intel_skylake_gt4_info = { HSW_PIPE_OFFSETS, \ IVB_CURSOR_OFFSETS, \ IVB_COLORS, \ - GEN9_DEFAULT_PAGE_SIZES + GEN9_DEFAULT_PAGE_SIZES, \ + GEN_DEFAULT_REGIONS static const struct intel_device_info intel_broxton_info = { GEN9_LP_FEATURES, diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c index c0299435d75e..72f98a111de1 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.c +++ b/drivers/gpu/drm/i915/intel_memory_region.c @@ -6,6 +6,16 @@ #include "intel_memory_region.h" #include "i915_drv.h" +/* XXX: Hysterical raisins. BIT(inst) needs to just be (inst) at some point. */ +#define REGION_MAP(type, inst) \ + BIT((type) + INTEL_MEMORY_TYPE_SHIFT) | BIT(inst) + +const u32 intel_region_map[] = { + [INTEL_REGION_SMEM] = REGION_MAP(INTEL_MEMORY_SYSTEM, 0), + [INTEL_REGION_LMEM] = REGION_MAP(INTEL_MEMORY_LOCAL, 0), + [INTEL_REGION_STOLEN] = REGION_MAP(INTEL_MEMORY_STOLEN, 0), +}; + static u64 intel_memory_region_free_pages(struct intel_memory_region *mem, struct list_head *blocks) diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h index 2c165a7a5ab4..49b059a2be70 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.h +++ b/drivers/gpu/drm/i915/intel_memory_region.h @@ -18,13 +18,39 @@ struct drm_i915_gem_object; struct intel_memory_region; struct sg_table; -enum intel_region_id { - INTEL_REGION_UNKNOWN = 0, /* Should be last */ +/** + * Base memory type + */ +enum intel_memory_type { + INTEL_MEMORY_SYSTEM = 0, + INTEL_MEMORY_LOCAL, + INTEL_MEMORY_STOLEN, }; +enum intel_region_id { + INTEL_REGION_SMEM = 0, + INTEL_REGION_LMEM, + INTEL_REGION_STOLEN, + INTEL_REGION_UNKNOWN, /* Should be last */ +}; + +#define REGION_SMEM BIT(INTEL_REGION_SMEM) +#define REGION_LMEM BIT(INTEL_REGION_LMEM) +#define REGION_STOLEN BIT(INTEL_REGION_STOLEN) + +#define INTEL_MEMORY_TYPE_SHIFT 16 + +#define MEMORY_TYPE_FROM_REGION(r) (ilog2((r) >> INTEL_MEMORY_TYPE_SHIFT)) +#define MEMORY_INSTANCE_FROM_REGION(r) (ilog2((r) & 0xffff)) + #define I915_ALLOC_MIN_PAGE_SIZE BIT(0) #define I915_ALLOC_CONTIGUOUS BIT(1) +/** + * Memory regions encoded as type | instance + */ +extern const u32 intel_region_map[]; + struct intel_memory_region_ops { unsigned int flags; diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index bc5e3c67409f..cb8c3a501cc7 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -72,8 +72,6 @@ static void mock_device_release(struct drm_device *dev) mock_fini_ggtt(&i915->ggtt); destroy_workqueue(i915->wq); - i915_gemfs_fini(i915); - i915_gem_cleanup_memory_regions(i915); drm_mode_config_cleanup(&i915->drm); @@ -165,7 +163,10 @@ struct drm_i915_private *mock_gem_device(void) I915_GTT_PAGE_SIZE_64K | I915_GTT_PAGE_SIZE_2M; + mkwrite_device_info(i915)->memory_regions = REGION_SMEM; + mock_uncore_init(&i915->uncore, i915); + i915_gem_init__mm(i915); intel_gt_init_early(&i915->gt, i915); atomic_inc(&i915->gt.wakeref.count); /* disable; no hw support */ @@ -196,8 +197,6 @@ struct drm_i915_private *mock_gem_device(void) intel_engines_driver_register(i915); - WARN_ON(i915_gemfs_init(i915)); - err = i915_gem_init_memory_regions(i915); if (err) goto err_context; From 72405c3d7850c85dccbe4629720004ae933c2add Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 18 Oct 2019 10:07:51 +0100 Subject: [PATCH 129/159] drm/i915: treat stolen as a region Convert stolen memory over to a region object. Still leaves open the question with what to do with pre-allocated objects... Signed-off-by: Matthew Auld Cc: Joonas Lahtinen Cc: Abdiel Janulgue Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191018090751.28295-3-matthew.auld@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 65 +++++++++++++++++++--- drivers/gpu/drm/i915/gem/i915_gem_stolen.h | 3 +- drivers/gpu/drm/i915/i915_gem_gtt.c | 14 +---- drivers/gpu/drm/i915/i915_pci.c | 2 +- 4 files changed, 61 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index c76260ce13e3..57cd8bc2657c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -10,6 +10,7 @@ #include #include +#include "gem/i915_gem_region.h" #include "i915_drv.h" #include "i915_gem_stolen.h" @@ -150,7 +151,7 @@ static int i915_adjust_stolen(struct drm_i915_private *dev_priv, return 0; } -void i915_gem_cleanup_stolen(struct drm_i915_private *dev_priv) +static void i915_gem_cleanup_stolen(struct drm_i915_private *dev_priv) { if (!drm_mm_initialized(&dev_priv->mm.stolen)) return; @@ -355,7 +356,7 @@ static void icl_get_stolen_reserved(struct drm_i915_private *i915, } } -int i915_gem_init_stolen(struct drm_i915_private *dev_priv) +static int i915_gem_init_stolen(struct drm_i915_private *dev_priv) { resource_size_t reserved_base, stolen_top; resource_size_t reserved_total, reserved_size; @@ -539,6 +540,9 @@ i915_gem_object_release_stolen(struct drm_i915_gem_object *obj) i915_gem_stolen_remove_node(dev_priv, stolen); kfree(stolen); + + if (obj->mm.region) + i915_gem_object_release_memory_region(obj); } static const struct drm_i915_gem_object_ops i915_gem_object_stolen_ops = { @@ -548,8 +552,9 @@ static const struct drm_i915_gem_object_ops i915_gem_object_stolen_ops = { }; static struct drm_i915_gem_object * -_i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, - struct drm_mm_node *stolen) +__i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, + struct drm_mm_node *stolen, + struct intel_memory_region *mem) { struct drm_i915_gem_object *obj; unsigned int cache_level; @@ -571,6 +576,9 @@ _i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, if (err) goto cleanup; + if (mem) + i915_gem_object_init_memory_region(obj, mem, 0); + return obj; cleanup: @@ -579,10 +587,12 @@ _i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, return ERR_PTR(err); } -struct drm_i915_gem_object * -i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, - resource_size_t size) +static struct drm_i915_gem_object * +_i915_gem_object_create_stolen(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags) { + struct drm_i915_private *dev_priv = mem->i915; struct drm_i915_gem_object *obj; struct drm_mm_node *stolen; int ret; @@ -603,7 +613,7 @@ i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, goto err_free; } - obj = _i915_gem_object_create_stolen(dev_priv, stolen); + obj = __i915_gem_object_create_stolen(dev_priv, stolen, mem); if (IS_ERR(obj)) goto err_remove; @@ -616,6 +626,43 @@ i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, return obj; } +struct drm_i915_gem_object * +i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, + resource_size_t size) +{ + return i915_gem_object_create_region(dev_priv->mm.regions[INTEL_REGION_STOLEN], + size, I915_BO_ALLOC_CONTIGUOUS); +} + +static int init_stolen(struct intel_memory_region *mem) +{ + /* + * Initialise stolen early so that we may reserve preallocated + * objects for the BIOS to KMS transition. + */ + return i915_gem_init_stolen(mem->i915); +} + +static void release_stolen(struct intel_memory_region *mem) +{ + i915_gem_cleanup_stolen(mem->i915); +} + +static const struct intel_memory_region_ops i915_region_stolen_ops = { + .init = init_stolen, + .release = release_stolen, + .create_object = _i915_gem_object_create_stolen, +}; + +struct intel_memory_region *i915_gem_stolen_setup(struct drm_i915_private *i915) +{ + return intel_memory_region_create(i915, + intel_graphics_stolen_res.start, + resource_size(&intel_graphics_stolen_res), + PAGE_SIZE, 0, + &i915_region_stolen_ops); +} + struct drm_i915_gem_object * i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv, resource_size_t stolen_offset, @@ -655,7 +702,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv return ERR_PTR(ret); } - obj = _i915_gem_object_create_stolen(dev_priv, stolen); + obj = __i915_gem_object_create_stolen(dev_priv, stolen, NULL); if (IS_ERR(obj)) { DRM_DEBUG_DRIVER("failed to allocate stolen object\n"); i915_gem_stolen_remove_node(dev_priv, stolen); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.h b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h index 2289644d8604..c1040627fbf3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h @@ -21,8 +21,7 @@ int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv, u64 end); void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv, struct drm_mm_node *node); -int i915_gem_init_stolen(struct drm_i915_private *dev_priv); -void i915_gem_cleanup_stolen(struct drm_i915_private *dev_priv); +struct intel_memory_region *i915_gem_stolen_setup(struct drm_i915_private *i915); struct drm_i915_gem_object * i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, resource_size_t size); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 263f1cea0b64..0df057838a24 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2748,8 +2748,6 @@ void i915_gem_cleanup_memory_regions(struct drm_i915_private *i915) { int i; - i915_gem_cleanup_stolen(i915); - for (i = 0; i < INTEL_REGION_UNKNOWN; i++) { struct intel_memory_region *region = i915->mm.regions[i]; @@ -2762,15 +2760,6 @@ int i915_gem_init_memory_regions(struct drm_i915_private *i915) { int err, i; - /* - * Initialise stolen early so that we may reserve preallocated - * objects for the BIOS to KMS transition. - */ - /* XXX: stolen will become a region at some point */ - err = i915_gem_init_stolen(i915); - if (err) - return err; - for (i = 0; i < INTEL_REGION_UNKNOWN; i++) { struct intel_memory_region *mem = ERR_PTR(-ENODEV); u32 type; @@ -2783,6 +2772,9 @@ int i915_gem_init_memory_regions(struct drm_i915_private *i915) case INTEL_MEMORY_SYSTEM: mem = i915_gem_shmem_setup(i915); break; + case INTEL_MEMORY_STOLEN: + mem = i915_gem_stolen_setup(i915); + break; } if (IS_ERR(mem)) { diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index e55ab2724996..f9a3bfe68689 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -152,7 +152,7 @@ .page_sizes = I915_GTT_PAGE_SIZE_4K #define GEN_DEFAULT_REGIONS \ - .memory_regions = REGION_SMEM + .memory_regions = REGION_SMEM | REGION_STOLEN #define I830_FEATURES \ GEN(2), \ From bcce7d90d120cddc726d5c98aa46188daef141f3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 18 Oct 2019 14:07:03 +0100 Subject: [PATCH 130/159] drm/i915/selftests: Add the mock engine to the gt->engine[] Remember to include the newly created mock engine in the list of available engines inside the gt. Fixes: a50134b1983b ("drm/i915: Make for_each_engine_masked work on intel_gt") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191018130703.31125-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/mock_engine.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 747f7c7790eb..123db2c3f956 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -253,6 +253,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, snprintf(engine->base.name, sizeof(engine->base.name), "%s", name); engine->base.id = id; engine->base.mask = BIT(id); + engine->base.legacy_idx = INVALID_ENGINE; engine->base.instance = id; engine->base.status_page.addr = (void *)(engine + 1); @@ -267,6 +268,9 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, engine->base.reset.finish = mock_reset_finish; engine->base.cancel_requests = mock_cancel_requests; + i915->gt.engine[id] = &engine->base; + i915->gt.engine_class[0][id] = &engine->base; + /* fake hw queue */ spin_lock_init(&engine->hw_lock); timer_setup(&engine->hw_delay, hw_delay_complete, 0); From c6e07ada8eaa3ff66868a8bd6b1b9cdfa6d9fe21 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 18 Oct 2019 12:53:31 +0100 Subject: [PATCH 131/159] drm/i915/gt: Convert the leftover for_each_engine(gt) Use the local gt for iterating over the available set of engines. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191018115331.8980-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_rc6.c | 12 ++++++------ drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 6 +++--- drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +- drivers/gpu/drm/i915/i915_pmu.c | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index 71184aa72896..70f0e01a38b9 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -65,7 +65,7 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6) set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ - for_each_engine(engine, rc6_to_gt(rc6)->i915, id) + for_each_engine(engine, rc6_to_gt(rc6), id) set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); set(uncore, GUC_MAX_IDLE_COUNT, 0xA); @@ -133,7 +133,7 @@ static void gen9_rc6_enable(struct intel_rc6 *rc6) set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ - for_each_engine(engine, rc6_to_gt(rc6)->i915, id) + for_each_engine(engine, rc6_to_gt(rc6), id) set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); set(uncore, GUC_MAX_IDLE_COUNT, 0xA); @@ -192,7 +192,7 @@ static void gen8_rc6_enable(struct intel_rc6 *rc6) set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ - for_each_engine(engine, rc6_to_gt(rc6)->i915, id) + for_each_engine(engine, rc6_to_gt(rc6), id) set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); set(uncore, GEN6_RC_SLEEP, 0); set(uncore, GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */ @@ -219,7 +219,7 @@ static void gen6_rc6_enable(struct intel_rc6 *rc6) set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); - for_each_engine(engine, i915, id) + for_each_engine(engine, rc6_to_gt(rc6), id) set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); set(uncore, GEN6_RC_SLEEP, 0); @@ -344,7 +344,7 @@ static void chv_rc6_enable(struct intel_rc6 *rc6) set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ - for_each_engine(engine, rc6_to_gt(rc6)->i915, id) + for_each_engine(engine, rc6_to_gt(rc6), id) set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); set(uncore, GEN6_RC_SLEEP, 0); @@ -371,7 +371,7 @@ static void vlv_rc6_enable(struct intel_rc6 *rc6) set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); - for_each_engine(engine, rc6_to_gt(rc6)->i915, id) + for_each_engine(engine, rc6_to_gt(rc6), id) set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); set(uncore, GEN6_RC6_THRESHOLD, 0x557); diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c index 311fdc0a21bc..bf631f15aa78 100644 --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c @@ -1609,7 +1609,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) struct intel_engine_cs *signaller; *cs++ = MI_LOAD_REGISTER_IMM(num_engines); - for_each_engine(signaller, i915, id) { + for_each_engine(signaller, engine->gt, id) { if (signaller == engine) continue; @@ -1663,7 +1663,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) i915_reg_t last_reg = {}; /* keep gcc quiet */ *cs++ = MI_LOAD_REGISTER_IMM(num_engines); - for_each_engine(signaller, i915, id) { + for_each_engine(signaller, engine->gt, id) { if (signaller == engine) continue; @@ -1676,7 +1676,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) /* Insert a delay before the next switch! */ *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; *cs++ = i915_mmio_reg_offset(last_reg); - *cs++ = intel_gt_scratch_offset(rq->engine->gt, + *cs++ = intel_gt_scratch_offset(engine->gt, INTEL_GT_SCRATCH_FIELD_DEFAULT); *cs++ = MI_NOOP; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 0df057838a24..3148d5946b63 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1569,7 +1569,7 @@ static void gen7_ppgtt_enable(struct intel_gt *gt) } intel_uncore_write(uncore, GAM_ECOCHK, ecochk); - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { /* GFX_MODE is per-ring on gen7+ */ ENGINE_WRITE(engine, RING_MODE_GEN7, diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 144c32eed045..85912917c062 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -301,7 +301,7 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns) if ((i915->pmu.enable & ENGINE_SAMPLE_MASK) == 0) return; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { struct intel_engine_pmu *pmu = &engine->pmu; unsigned long flags; bool busy; From 30a027dcccc27051b3145ff9bb756d79038ad320 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 15 Oct 2019 18:27:57 +0300 Subject: [PATCH 132/159] drm/i915: Move the cursor rotation handling into intel_cursor_check_surface() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unlike other planes the cursor currently handles 180 degree rotation adjustment during the hardware programming phase. Let's move that stuff into intel_cursor_check_surface() to match how we do things with other plane types. And while at we'll plop in the final src x/y coordinates (which will actually always be zero) into the src rect and color_plane[0].x/y, just for some extra consistency. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191015152757.12231-1-ville.syrjala@linux.intel.com Reviewed-by: Juha-Pekka Heikkila --- drivers/gpu/drm/i915/display/intel_display.c | 31 ++++++++++++++------ 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 164ded862148..96de58281301 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -10530,15 +10530,7 @@ static u32 intel_cursor_base(const struct intel_plane_state *plane_state) else base = intel_plane_ggtt_offset(plane_state); - base += plane_state->color_plane[0].offset; - - /* ILK+ do this automagically */ - if (HAS_GMCH(dev_priv) && - plane_state->base.rotation & DRM_MODE_ROTATE_180) - base += (drm_rect_height(&plane_state->base.dst) * - drm_rect_width(&plane_state->base.dst) - 1) * fb->format->cpp[0]; - - return base; + return base + plane_state->color_plane[0].offset; } static u32 intel_cursor_position(const struct intel_plane_state *plane_state) @@ -10575,6 +10567,9 @@ static bool intel_cursor_size_ok(const struct intel_plane_state *plane_state) static int intel_cursor_check_surface(struct intel_plane_state *plane_state) { + struct drm_i915_private *dev_priv = + to_i915(plane_state->base.plane->dev); + unsigned int rotation = plane_state->base.rotation; int src_x, src_y; u32 offset; int ret; @@ -10598,7 +10593,25 @@ static int intel_cursor_check_surface(struct intel_plane_state *plane_state) return -EINVAL; } + /* + * Put the final coordinates back so that the src + * coordinate checks will see the right values. + */ + drm_rect_translate_to(&plane_state->base.src, + src_x << 16, src_y << 16); + + /* ILK+ do this automagically in hardware */ + if (HAS_GMCH(dev_priv) && rotation & DRM_MODE_ROTATE_180) { + const struct drm_framebuffer *fb = plane_state->base.fb; + int src_w = drm_rect_width(&plane_state->base.src) >> 16; + int src_h = drm_rect_height(&plane_state->base.src) >> 16; + + offset += (src_h * src_w - 1) * fb->format->cpp[0]; + } + plane_state->color_plane[0].offset = offset; + plane_state->color_plane[0].x = src_x; + plane_state->color_plane[0].y = src_y; return 0; } From c08f995a284dc202a934530ee73cc7136665af38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 2 Oct 2019 19:25:00 +0300 Subject: [PATCH 133/159] drm/i915: Polish possible_clones setup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the hand rolled stuff with drm_encoder_mask() when populating possible_clones, and rename the function to intel_encoder_possible_clones() to make it clear what it's used for. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191002162505.30716-1-ville.syrjala@linux.intel.com Reviewed-by: Juha-Pekka Heikkila --- drivers/gpu/drm/i915/display/intel_display.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 96de58281301..0233831f559a 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -15261,21 +15261,18 @@ int intel_get_pipe_from_crtc_id_ioctl(struct drm_device *dev, void *data, return 0; } -static int intel_encoder_clones(struct intel_encoder *encoder) +static u32 intel_encoder_possible_clones(struct intel_encoder *encoder) { struct drm_device *dev = encoder->base.dev; struct intel_encoder *source_encoder; - int index_mask = 0; - int entry = 0; + u32 possible_clones = 0; for_each_intel_encoder(dev, source_encoder) { if (encoders_cloneable(encoder, source_encoder)) - index_mask |= (1 << entry); - - entry++; + possible_clones |= drm_encoder_mask(&source_encoder->base); } - return index_mask; + return possible_clones; } static u32 intel_encoder_possible_crtcs(struct intel_encoder *encoder) @@ -15595,7 +15592,7 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) encoder->base.possible_crtcs = intel_encoder_possible_crtcs(encoder); encoder->base.possible_clones = - intel_encoder_clones(encoder); + intel_encoder_possible_clones(encoder); } intel_init_pch_refclk(dev_priv); From 98c93394ba907e6b741c67dc6affea78c2a940a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 18 Jul 2019 17:43:39 +0300 Subject: [PATCH 134/159] drm/i915: Refuse modes with hdisplay==4096 on pre-HSW DP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The DP port/pipe goes wonky if we try to use timings with hdisplay==4096 on pre-HSW platforms. The link fails to train and the pipe may not signal vblank interrupts. On HDMI such at mode works just fine (tested on ELK/SNB/CHV). So let's refuse such modes on DP on older platforms. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190718144340.1114-1-ville.syrjala@linux.intel.com Reviewed-by: Manasi Navare --- drivers/gpu/drm/i915/display/intel_dp.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index c4c081c79dd9..3792d143bea9 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -591,6 +591,25 @@ static u8 intel_dp_dsc_get_slice_count(struct intel_dp *intel_dp, return 0; } +static bool intel_dp_hdisplay_bad(struct drm_i915_private *dev_priv, + int hdisplay) +{ + /* + * Older platforms don't like hdisplay==4096 with DP. + * + * On ILK/SNB/IVB the pipe seems to be somewhat running (scanline + * and frame counter increment), but we don't get vblank interrupts, + * and the pipe underruns immediately. The link also doesn't seem + * to get trained properly. + * + * On CHV the vblank interrupts don't seem to disappear but + * otherwise the symptoms are similar. + * + * TODO: confirm the behaviour on HSW+ + */ + return hdisplay == 4096 && !HAS_DDI(dev_priv); +} + static enum drm_mode_status intel_dp_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) @@ -626,6 +645,9 @@ intel_dp_mode_valid(struct drm_connector *connector, max_rate = intel_dp_max_data_rate(max_link_clock, max_lanes); mode_rate = intel_dp_link_required(target_clock, 18); + if (intel_dp_hdisplay_bad(dev_priv, mode->hdisplay)) + return MODE_H_ILLEGAL; + /* * Output bpp is stored in 6.4 format so right shift by 4 to get the * integer value since we support only integer values of bpp. @@ -2345,6 +2367,9 @@ intel_dp_compute_config(struct intel_encoder *encoder, if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK) return -EINVAL; + if (intel_dp_hdisplay_bad(dev_priv, adjusted_mode->crtc_hdisplay)) + return -EINVAL; + ret = intel_dp_compute_link_config(encoder, pipe_config, conn_state); if (ret < 0) return ret; From d7a145849b680075ce94d8b341905104e2ee58d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 11 Oct 2019 23:09:42 +0300 Subject: [PATCH 135/159] drm/i915: Nuke the useless changed param from skl_ddb_add_affected_pipes() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit changed==true just means we have some crtcs in the state. All the stuff following this only operates on crtcs in the state anyway so there is no point in having this bool. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191011200949.7839-2-ville.syrjala@linux.intel.com Reviewed-by: Stanislav Lisovskiy --- drivers/gpu/drm/i915/intel_pm.c | 30 ++++-------------------------- 1 file changed, 4 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index b306e2338f5a..49568270a89d 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5424,35 +5424,14 @@ skl_print_wm_changes(struct intel_atomic_state *state) } static int -skl_ddb_add_affected_pipes(struct intel_atomic_state *state, bool *changed) +skl_ddb_add_affected_pipes(struct intel_atomic_state *state) { struct drm_device *dev = state->base.dev; const struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *crtc; struct intel_crtc_state *crtc_state; u32 realloc_pipes = pipes_modified(state); - int ret, i; - - /* - * When we distrust bios wm we always need to recompute to set the - * expected DDB allocations for each CRTC. - */ - if (dev_priv->wm.distrust_bios_wm) - (*changed) = true; - - /* - * If this transaction isn't actually touching any CRTC's, don't - * bother with watermark calculation. Note that if we pass this - * test, we're guaranteed to hold at least one CRTC state mutex, - * which means we can safely use values like dev_priv->active_pipes - * since any racing commits that want to update them would need to - * hold _all_ CRTC state mutexes. - */ - for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) - (*changed) = true; - - if (!*changed) - return 0; + int ret; /* * If this is our first atomic update following hardware readout, @@ -5576,14 +5555,13 @@ skl_compute_wm(struct intel_atomic_state *state) struct intel_crtc_state *new_crtc_state; struct intel_crtc_state *old_crtc_state; struct skl_ddb_values *results = &state->wm_results; - bool changed = false; int ret, i; /* Clear all dirty flags */ results->dirty_pipes = 0; - ret = skl_ddb_add_affected_pipes(state, &changed); - if (ret || !changed) + ret = skl_ddb_add_affected_pipes(state); + if (ret) return ret; /* From 49e0ed3848d0e3ef22c3a458ded8847cbf08330e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 11 Oct 2019 23:09:43 +0300 Subject: [PATCH 136/159] drm/i915: Nuke 'realloc_pipes' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 'realloc_pipes' bitmask is pointless. It is either: a) the set of pipes which are already part of the state, in which case adding them again is entirely redundant b) the set of all pipes which we then add to the state Also the fact that 'realloc_pipes' uses the crtc indexes is going to bite is at some point so best get rid of it quick. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191011200949.7839-3-ville.syrjala@linux.intel.com Reviewed-by: Stanislav Lisovskiy --- drivers/gpu/drm/i915/intel_pm.c | 50 ++++++++++++++------------------- 1 file changed, 21 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 49568270a89d..3536c2e975e7 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5235,19 +5235,6 @@ bool skl_ddb_allocation_overlaps(const struct skl_ddb_entry *ddb, return false; } -static u32 -pipes_modified(struct intel_atomic_state *state) -{ - struct intel_crtc *crtc; - struct intel_crtc_state *crtc_state; - u32 i, ret = 0; - - for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) - ret |= drm_crtc_mask(&crtc->base); - - return ret; -} - static int skl_ddb_add_affected_planes(const struct intel_crtc_state *old_crtc_state, struct intel_crtc_state *new_crtc_state) @@ -5423,14 +5410,26 @@ skl_print_wm_changes(struct intel_atomic_state *state) } } +static int intel_add_all_pipes(struct intel_atomic_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + struct intel_crtc *crtc; + + for_each_intel_crtc(&dev_priv->drm, crtc) { + struct intel_crtc_state *crtc_state; + + crtc_state = intel_atomic_get_crtc_state(&state->base, crtc); + if (IS_ERR(crtc_state)) + return PTR_ERR(crtc_state); + } + + return 0; +} + static int skl_ddb_add_affected_pipes(struct intel_atomic_state *state) { - struct drm_device *dev = state->base.dev; - const struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *crtc; - struct intel_crtc_state *crtc_state; - u32 realloc_pipes = pipes_modified(state); + struct drm_i915_private *dev_priv = to_i915(state->base.dev); int ret; /* @@ -5440,7 +5439,7 @@ skl_ddb_add_affected_pipes(struct intel_atomic_state *state) * ensure a full DDB recompute. */ if (dev_priv->wm.distrust_bios_wm) { - ret = drm_modeset_lock(&dev->mode_config.connection_mutex, + ret = drm_modeset_lock(&dev_priv->drm.mode_config.connection_mutex, state->base.acquire_ctx); if (ret) return ret; @@ -5471,18 +5470,11 @@ skl_ddb_add_affected_pipes(struct intel_atomic_state *state) * to grab the lock on *all* CRTC's. */ if (state->active_pipe_changes || state->modeset) { - realloc_pipes = ~0; state->wm_results.dirty_pipes = ~0; - } - /* - * We're not recomputing for the pipes not included in the commit, so - * make sure we start with the current state. - */ - for_each_intel_crtc_mask(dev, crtc, realloc_pipes) { - crtc_state = intel_atomic_get_crtc_state(&state->base, crtc); - if (IS_ERR(crtc_state)) - return PTR_ERR(crtc_state); + ret = intel_add_all_pipes(state); + if (ret) + return ret; } return 0; From 36b53a291b6a9213b7c053c7d72931bb89587440 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 11 Oct 2019 23:09:44 +0300 Subject: [PATCH 137/159] drm/i915: Make dirty_pipes refer to pipes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Despite the its name dirty_pipes refers to crtc indexes. Let's change its behaviout to match the name. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191011200949.7839-4-ville.syrjala@linux.intel.com Reviewed-by: Stanislav Lisovskiy --- drivers/gpu/drm/i915/display/intel_display.c | 9 +++------ drivers/gpu/drm/i915/intel_pm.c | 13 ++++++------- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 0233831f559a..23450f16df66 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -13934,7 +13934,6 @@ static void skl_commit_modeset_enables(struct intel_atomic_state *state) struct intel_crtc_state *old_crtc_state, *new_crtc_state; unsigned int updated = 0; bool progress; - enum pipe pipe; int i; u8 hw_enabled_slices = dev_priv->wm.skl_hw.ddb.enabled_slices; u8 required_slices = state->wm_results.ddb.enabled_slices; @@ -13959,12 +13958,10 @@ static void skl_commit_modeset_enables(struct intel_atomic_state *state) progress = false; for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { + enum pipe pipe = crtc->pipe; bool vbl_wait = false; - unsigned int cmask = drm_crtc_mask(&crtc->base); - pipe = crtc->pipe; - - if (updated & cmask || !new_crtc_state->base.active) + if (updated & BIT(crtc->pipe) || !new_crtc_state->base.active) continue; if (skl_ddb_allocation_overlaps(&new_crtc_state->wm.skl.ddb, @@ -13972,7 +13969,7 @@ static void skl_commit_modeset_enables(struct intel_atomic_state *state) INTEL_NUM_PIPES(dev_priv), i)) continue; - updated |= cmask; + updated |= BIT(pipe); entries[i] = new_crtc_state->wm.skl.ddb; /* diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 3536c2e975e7..2b71d52a4ede 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5575,7 +5575,7 @@ skl_compute_wm(struct intel_atomic_state *state) if (!skl_pipe_wm_equals(crtc, &old_crtc_state->wm.skl.optimal, &new_crtc_state->wm.skl.optimal)) - results->dirty_pipes |= drm_crtc_mask(&crtc->base); + results->dirty_pipes |= BIT(crtc->pipe); } ret = skl_compute_ddb(state); @@ -5595,7 +5595,7 @@ static void skl_atomic_update_crtc_wm(struct intel_atomic_state *state, struct skl_pipe_wm *pipe_wm = &crtc_state->wm.skl.optimal; enum pipe pipe = crtc->pipe; - if (!(state->wm_results.dirty_pipes & drm_crtc_mask(&crtc->base))) + if ((state->wm_results.dirty_pipes & BIT(crtc->pipe)) == 0) return; I915_WRITE(PIPE_WM_LINETIME(pipe), pipe_wm->linetime); @@ -5604,12 +5604,11 @@ static void skl_atomic_update_crtc_wm(struct intel_atomic_state *state, static void skl_initial_wm(struct intel_atomic_state *state, struct intel_crtc_state *crtc_state) { - struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); - struct drm_device *dev = intel_crtc->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct skl_ddb_values *results = &state->wm_results; - if ((results->dirty_pipes & drm_crtc_mask(&intel_crtc->base)) == 0) + if ((results->dirty_pipes & BIT(crtc->pipe)) == 0) return; mutex_lock(&dev_priv->wm.wm_mutex); @@ -5758,7 +5757,7 @@ void skl_wm_get_hw_state(struct drm_i915_private *dev_priv) skl_pipe_wm_get_hw_state(crtc, &crtc_state->wm.skl.optimal); if (crtc->active) - hw->dirty_pipes |= drm_crtc_mask(&crtc->base); + hw->dirty_pipes |= BIT(crtc->pipe); } if (dev_priv->active_pipes) { From 2edb3de9e599e79db4028335df2a64f3ad014b2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 10 Oct 2019 17:51:23 +0300 Subject: [PATCH 138/159] drm/i915: Shrink eDRAM ways/sets arrays MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make the ways/sets arrays static cosnt u8 to shrink things a bit. text data bss dec hex filename - 23935 629 128 24692 6074 i915_drv.o + 23818 629 128 24575 5fff i915_drv.o Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191010145127.7487-1-ville.syrjala@linux.intel.com Reviewed-by: Ramalingam C --- drivers/gpu/drm/i915/i915_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index fd532b8cfdb8..b3a92951f671 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1073,8 +1073,8 @@ intel_get_dram_info(struct drm_i915_private *dev_priv) static u32 gen9_edram_size_mb(struct drm_i915_private *dev_priv, u32 cap) { - const unsigned int ways[8] = { 4, 8, 12, 16, 16, 16, 16, 16 }; - const unsigned int sets[4] = { 1, 1, 2, 2 }; + static const u8 ways[8] = { 4, 8, 12, 16, 16, 16, 16, 16 }; + static const u8 sets[4] = { 1, 1, 2, 2 }; return EDRAM_NUM_BANKS(cap) * ways[EDRAM_WAYS_IDX(cap)] * From 67fdd8ea53195ee265cf4b8bf5498a549cebcd71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 10 Oct 2019 17:51:24 +0300 Subject: [PATCH 139/159] drm/i915: s/hdcp2_hdmi_msg_data/hdcp2_hdmi_msg_timeout/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The array is there only for timeout, "data" doesn't mean anything so let's rename the thing to be more descriptive. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191010145127.7487-2-ville.syrjala@linux.intel.com Reviewed-by: Ramalingam C --- drivers/gpu/drm/i915/display/intel_hdmi.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index 92d1cbbbee2b..f779f60a1160 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -1535,13 +1535,13 @@ bool intel_hdmi_hdcp_check_link(struct intel_digital_port *intel_dig_port) return true; } -struct hdcp2_hdmi_msg_data { +struct hdcp2_hdmi_msg_timeout { u8 msg_id; u32 timeout; u32 timeout2; }; -static const struct hdcp2_hdmi_msg_data hdcp2_msg_data[] = { +static const struct hdcp2_hdmi_msg_timeout hdcp2_msg_timeout[] = { { HDCP_2_2_AKE_INIT, 0, 0 }, { HDCP_2_2_AKE_SEND_CERT, HDCP_2_2_CERT_TIMEOUT_MS, 0 }, { HDCP_2_2_AKE_NO_STORED_KM, 0, 0 }, @@ -1572,12 +1572,12 @@ static int get_hdcp2_msg_timeout(u8 msg_id, bool is_paired) { int i; - for (i = 0; i < ARRAY_SIZE(hdcp2_msg_data); i++) - if (hdcp2_msg_data[i].msg_id == msg_id && + for (i = 0; i < ARRAY_SIZE(hdcp2_msg_timeout); i++) + if (hdcp2_msg_timeout[i].msg_id == msg_id && (msg_id != HDCP_2_2_AKE_SEND_HPRIME || is_paired)) - return hdcp2_msg_data[i].timeout; - else if (hdcp2_msg_data[i].msg_id == msg_id) - return hdcp2_msg_data[i].timeout2; + return hdcp2_msg_timeout[i].timeout; + else if (hdcp2_msg_timeout[i].msg_id == msg_id) + return hdcp2_msg_timeout[i].timeout2; return -EINVAL; } From 770ce5a07190e26b18f46037f357c1870ce29b30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 10 Oct 2019 17:51:25 +0300 Subject: [PATCH 140/159] drm/i915: Remove dead weight from hdcp2_msg_timeout[] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .read_2_2() hooks is never called for any of the message types with a zero timeout. So it's all just dead weight which we can chuck. text data bss dec hex filename - 34701 360 0 35061 88f5 intel_hdmi.o + 34633 360 0 34993 88b1 intel_hdmi.o Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191010145127.7487-3-ville.syrjala@linux.intel.com Reviewed-by: Ramalingam C --- drivers/gpu/drm/i915/display/intel_hdmi.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index f779f60a1160..ef2c19470b92 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -1542,19 +1542,12 @@ struct hdcp2_hdmi_msg_timeout { }; static const struct hdcp2_hdmi_msg_timeout hdcp2_msg_timeout[] = { - { HDCP_2_2_AKE_INIT, 0, 0 }, { HDCP_2_2_AKE_SEND_CERT, HDCP_2_2_CERT_TIMEOUT_MS, 0 }, - { HDCP_2_2_AKE_NO_STORED_KM, 0, 0 }, - { HDCP_2_2_AKE_STORED_KM, 0, 0 }, { HDCP_2_2_AKE_SEND_HPRIME, HDCP_2_2_HPRIME_PAIRED_TIMEOUT_MS, HDCP_2_2_HPRIME_NO_PAIRED_TIMEOUT_MS }, { HDCP_2_2_AKE_SEND_PAIRING_INFO, HDCP_2_2_PAIRING_TIMEOUT_MS, 0 }, - { HDCP_2_2_LC_INIT, 0, 0 }, { HDCP_2_2_LC_SEND_LPRIME, HDCP_2_2_HDMI_LPRIME_TIMEOUT_MS, 0 }, - { HDCP_2_2_SKE_SEND_EKS, 0, 0 }, { HDCP_2_2_REP_SEND_RECVID_LIST, HDCP_2_2_RECVID_LIST_TIMEOUT_MS, 0 }, - { HDCP_2_2_REP_SEND_ACK, 0, 0 }, - { HDCP_2_2_REP_STREAM_MANAGE, 0, 0 }, { HDCP_2_2_REP_STREAM_READY, HDCP_2_2_STREAM_READY_TIMEOUT_MS, 0 }, }; From eac03efdc85f062b16c72dae848b56aba261d566 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 10 Oct 2019 17:51:26 +0300 Subject: [PATCH 141/159] drm/i915: Remove hdcp2_hdmi_msg_timeout.timeout2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The only reason for the timeout2 value in the array is the HDCP_2_2_AKE_SEND_HPRIME message. But that one still needs special casing inside the loop, and so just ends up making the code harder to read. Let's just remove this leaky timeout2 abstraction and special case that one command in a way that is easy to understand. We can then remove the timeout2 member from struct entirely. text data bss dec hex filename - 34633 360 0 34993 88b1 intel_hdmi.o + 34521 360 0 34881 8841 intel_hdmi.o Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191010145127.7487-4-ville.syrjala@linux.intel.com Reviewed-by: Ramalingam C --- drivers/gpu/drm/i915/display/intel_hdmi.c | 28 ++++++++++++----------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index ef2c19470b92..3b88d7b0aac5 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -1538,17 +1538,14 @@ bool intel_hdmi_hdcp_check_link(struct intel_digital_port *intel_dig_port) struct hdcp2_hdmi_msg_timeout { u8 msg_id; u32 timeout; - u32 timeout2; }; static const struct hdcp2_hdmi_msg_timeout hdcp2_msg_timeout[] = { - { HDCP_2_2_AKE_SEND_CERT, HDCP_2_2_CERT_TIMEOUT_MS, 0 }, - { HDCP_2_2_AKE_SEND_HPRIME, HDCP_2_2_HPRIME_PAIRED_TIMEOUT_MS, - HDCP_2_2_HPRIME_NO_PAIRED_TIMEOUT_MS }, - { HDCP_2_2_AKE_SEND_PAIRING_INFO, HDCP_2_2_PAIRING_TIMEOUT_MS, 0 }, - { HDCP_2_2_LC_SEND_LPRIME, HDCP_2_2_HDMI_LPRIME_TIMEOUT_MS, 0 }, - { HDCP_2_2_REP_SEND_RECVID_LIST, HDCP_2_2_RECVID_LIST_TIMEOUT_MS, 0 }, - { HDCP_2_2_REP_STREAM_READY, HDCP_2_2_STREAM_READY_TIMEOUT_MS, 0 }, + { HDCP_2_2_AKE_SEND_CERT, HDCP_2_2_CERT_TIMEOUT_MS, }, + { HDCP_2_2_AKE_SEND_PAIRING_INFO, HDCP_2_2_PAIRING_TIMEOUT_MS, }, + { HDCP_2_2_LC_SEND_LPRIME, HDCP_2_2_HDMI_LPRIME_TIMEOUT_MS, }, + { HDCP_2_2_REP_SEND_RECVID_LIST, HDCP_2_2_RECVID_LIST_TIMEOUT_MS, }, + { HDCP_2_2_REP_STREAM_READY, HDCP_2_2_STREAM_READY_TIMEOUT_MS, }, }; static @@ -1565,12 +1562,17 @@ static int get_hdcp2_msg_timeout(u8 msg_id, bool is_paired) { int i; - for (i = 0; i < ARRAY_SIZE(hdcp2_msg_timeout); i++) - if (hdcp2_msg_timeout[i].msg_id == msg_id && - (msg_id != HDCP_2_2_AKE_SEND_HPRIME || is_paired)) + if (msg_id == HDCP_2_2_AKE_SEND_HPRIME) { + if (is_paired) + return HDCP_2_2_HPRIME_PAIRED_TIMEOUT_MS; + else + return HDCP_2_2_HPRIME_NO_PAIRED_TIMEOUT_MS; + } + + for (i = 0; i < ARRAY_SIZE(hdcp2_msg_timeout); i++) { + if (hdcp2_msg_timeout[i].msg_id == msg_id) return hdcp2_msg_timeout[i].timeout; - else if (hdcp2_msg_timeout[i].msg_id == msg_id) - return hdcp2_msg_timeout[i].timeout2; + } return -EINVAL; } From 0b7b6966403d75472eb2dc6082a3d483c219b173 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 10 Oct 2019 17:51:27 +0300 Subject: [PATCH 142/159] drm/i915: Make hdcp2_msg_timeout.timeout u16 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All the timeout values fit in u16, so let's shrink the structure a bit. This ends up actually increasing the .text size a bit due to some changes in instructions (constant imul+small jmps replaced with mov+bigger jmpqs). Seems pretty arbitrary to me so I'll just pretend I didn't see it. text data bss dec hex filename - 34521 360 0 34881 8841 intel_hdmi.o + 34537 360 0 34897 8851 intel_hdmi.o Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191010145127.7487-5-ville.syrjala@linux.intel.com Reviewed-by: Ramalingam C --- drivers/gpu/drm/i915/display/intel_hdmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index 3b88d7b0aac5..b54ccbb5aad5 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -1537,7 +1537,7 @@ bool intel_hdmi_hdcp_check_link(struct intel_digital_port *intel_dig_port) struct hdcp2_hdmi_msg_timeout { u8 msg_id; - u32 timeout; + u16 timeout; }; static const struct hdcp2_hdmi_msg_timeout hdcp2_msg_timeout[] = { From e83c467358c1d99f235af8374c0eca0b0c91b329 Mon Sep 17 00:00:00 2001 From: Vivek Kasireddy Date: Wed, 16 Oct 2019 11:35:14 -0700 Subject: [PATCH 143/159] drm/i915: Correct the PCH type in irq postinstall JasperLake PCH (JSP) has DDI HPD pin mappings similar to TGP and not MCC. Also add the correct HPD pin mappings for the MCC PCH. Cc: Matt Roper Signed-off-by: Vivek Kasireddy Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20191016183514.11128-1-vivek.kasireddy@intel.com --- drivers/gpu/drm/i915/i915_irq.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index ef09fbb36f37..e618f4621308 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3803,8 +3803,11 @@ static void icp_irq_postinstall(struct drm_i915_private *dev_priv) if (HAS_PCH_TGP(dev_priv)) icp_hpd_detection_setup(dev_priv, TGP_DDI_HPD_ENABLE_MASK, TGP_TC_HPD_ENABLE_MASK); - else if (HAS_PCH_MCC(dev_priv)) + else if (HAS_PCH_JSP(dev_priv)) icp_hpd_detection_setup(dev_priv, TGP_DDI_HPD_ENABLE_MASK, 0); + else if (HAS_PCH_MCC(dev_priv)) + icp_hpd_detection_setup(dev_priv, ICP_DDI_HPD_ENABLE_MASK, + ICP_TC_HPD_ENABLE(PORT_TC1)); else icp_hpd_detection_setup(dev_priv, ICP_DDI_HPD_ENABLE_MASK, ICP_TC_HPD_ENABLE_MASK); From 789fa8746daf8561e8cd0870a66ba7a5bb05fbf7 Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Fri, 18 Oct 2019 12:07:10 +0200 Subject: [PATCH 144/159] drm/i915: Restore full symmetry in i915_driver_modeset_probe/remove Commit 2d6f6f359fd8 ("drm/i915: add i915_driver_modeset_remove()") claimed removal of asymmetry in probe() and remove() calls, however, it didn't take care of calling intel_irq_uninstall() on driver remove. That doesn't hurt as long as we still call it from intel_modeset_driver_remove() but in order to have full symmetry we should call it again from i915_driver_modeset_remove(). Note that it's safe to call intel_irq_uninstall() twice thanks to commit b318b82455bd ("drm/i915: Nuke drm_driver irq vfuncs"). We may only want to mention the case we are adding in a related FIXME comment provided by that commit. While being at it, update the name of function mentioned as calling it out of sequence as that name has been changed meanwhile by commit 78dae1ac35dd ("drm/i915: Propagate "_remove" function name suffix down"). Suggested-by: Michal Wajdeczko Signed-off-by: Janusz Krzysztofik Cc: Michal Wajdeczko Reviewed-by: Michal Wajdeczko Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/6250061.7lZMOAyebC@jkrzyszt-desk.ger.corp.intel.com --- drivers/gpu/drm/i915/i915_drv.c | 2 ++ drivers/gpu/drm/i915/i915_irq.c | 8 ++++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index b3a92951f671..157ed22052a2 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -354,6 +354,8 @@ static void i915_driver_modeset_remove(struct drm_i915_private *i915) { intel_modeset_driver_remove(i915); + intel_irq_uninstall(i915); + intel_bios_driver_remove(i915); i915_switcheroo_unregister(i915); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index e618f4621308..572a5c37cc61 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -4486,10 +4486,10 @@ void intel_irq_uninstall(struct drm_i915_private *dev_priv) int irq = dev_priv->drm.pdev->irq; /* - * FIXME we can get called twice during driver load - * error handling due to intel_modeset_cleanup() - * calling us out of sequence. Would be nice if - * it didn't do that... + * FIXME we can get called twice during driver probe + * error handling as well as during driver remove due to + * intel_modeset_driver_remove() calling us out of sequence. + * Would be nice if it didn't do that... */ if (!dev_priv->drm.irq_enabled) return; From bfb926e3238580a5b4c13509c6aa73d84643d75d Mon Sep 17 00:00:00 2001 From: Manasi Navare Date: Fri, 18 Oct 2019 10:27:20 -0700 Subject: [PATCH 145/159] drm/i915/display/icl: Save Master transcoder in slave's crtc_state for Transcoder Port Sync MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In case of tiled displays when the two tiles are sent across two CRTCs over two separate DP SST connectors, we need a mechanism to synchronize the two CRTCs and their corresponding transcoders. So use the master-slave mode where there is one master corresponding to last horizontal and vertical tile that needs to be genlocked with all other slave tiles. This patch identifies saves the master transcoder in all the slave CRTC states. This is needed to select the master CRTC/transcoder while configuring transcoder port sync for the corresponding slaves. v6: Rebase (manasi) v5: * Address Ville's comments * Just pass crtc_state, no need to check GEN (Ville) v4: * Rebase v3: * Use master_tramscoder instead of master_crtc for valid HW state readouts (Ville) v2: * Move this to intel_mode_set_pipe_config(Jani N, Ville) * Use slave_bitmask to save associated slaves in master crtc state (Ville) Cc: Daniel Vetter Cc: Ville Syrjälä Cc: Maarten Lankhorst Cc: Matt Roper Signed-off-by: Manasi Navare Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20191018172725.1338-1-manasi.d.navare@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 118 ++++++++++++++++++ drivers/gpu/drm/i915/display/intel_display.h | 2 + .../drm/i915/display/intel_display_types.h | 6 + 3 files changed, 126 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 23450f16df66..a1595f50edf6 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -519,6 +519,20 @@ needs_modeset(const struct intel_crtc_state *state) return drm_atomic_crtc_needs_modeset(&state->base); } +bool +is_trans_port_sync_mode(const struct intel_crtc_state *crtc_state) +{ + return (crtc_state->master_transcoder != INVALID_TRANSCODER || + crtc_state->sync_mode_slaves_mask); +} + +static bool +is_trans_port_sync_master(const struct intel_crtc_state *crtc_state) +{ + return (crtc_state->master_transcoder == INVALID_TRANSCODER && + crtc_state->sync_mode_slaves_mask); +} + /* * Platform specific helpers to calculate the port PLL loopback- (clock.m), * and post-divider (clock.p) values, pre- (clock.vco) and post-divided fast @@ -11794,6 +11808,91 @@ static bool c8_planes_changed(const struct intel_crtc_state *new_crtc_state) return !old_crtc_state->c8_planes != !new_crtc_state->c8_planes; } +static int icl_add_sync_mode_crtcs(struct intel_crtc_state *crtc_state) +{ + struct drm_crtc *crtc = crtc_state->base.crtc; + struct intel_atomic_state *state = to_intel_atomic_state(crtc_state->base.state); + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + struct drm_connector *master_connector, *connector; + struct drm_connector_state *connector_state; + struct drm_connector_list_iter conn_iter; + struct drm_crtc *master_crtc = NULL; + struct drm_crtc_state *master_crtc_state; + struct intel_crtc_state *master_pipe_config; + int i, tile_group_id; + + if (INTEL_GEN(dev_priv) < 11) + return 0; + + /* + * In case of tiled displays there could be one or more slaves but there is + * only one master. Lets make the CRTC used by the connector corresponding + * to the last horizonal and last vertical tile a master/genlock CRTC. + * All the other CRTCs corresponding to other tiles of the same Tile group + * are the slave CRTCs and hold a pointer to their genlock CRTC. + */ + for_each_new_connector_in_state(&state->base, connector, connector_state, i) { + if (connector_state->crtc != crtc) + continue; + if (!connector->has_tile) + continue; + if (crtc_state->base.mode.hdisplay != connector->tile_h_size || + crtc_state->base.mode.vdisplay != connector->tile_v_size) + return 0; + if (connector->tile_h_loc == connector->num_h_tile - 1 && + connector->tile_v_loc == connector->num_v_tile - 1) + continue; + crtc_state->sync_mode_slaves_mask = 0; + tile_group_id = connector->tile_group->id; + drm_connector_list_iter_begin(&dev_priv->drm, &conn_iter); + drm_for_each_connector_iter(master_connector, &conn_iter) { + struct drm_connector_state *master_conn_state = NULL; + + if (!master_connector->has_tile) + continue; + if (master_connector->tile_h_loc != master_connector->num_h_tile - 1 || + master_connector->tile_v_loc != master_connector->num_v_tile - 1) + continue; + if (master_connector->tile_group->id != tile_group_id) + continue; + + master_conn_state = drm_atomic_get_connector_state(&state->base, + master_connector); + if (IS_ERR(master_conn_state)) { + drm_connector_list_iter_end(&conn_iter); + return PTR_ERR(master_conn_state); + } + if (master_conn_state->crtc) { + master_crtc = master_conn_state->crtc; + break; + } + } + drm_connector_list_iter_end(&conn_iter); + + if (!master_crtc) { + DRM_DEBUG_KMS("Could not find Master CRTC for Slave CRTC %d\n", + connector_state->crtc->base.id); + return -EINVAL; + } + + master_crtc_state = drm_atomic_get_crtc_state(&state->base, + master_crtc); + if (IS_ERR(master_crtc_state)) + return PTR_ERR(master_crtc_state); + + master_pipe_config = to_intel_crtc_state(master_crtc_state); + crtc_state->master_transcoder = master_pipe_config->cpu_transcoder; + master_pipe_config->sync_mode_slaves_mask |= + BIT(crtc_state->cpu_transcoder); + DRM_DEBUG_KMS("Master Transcoder = %s added for Slave CRTC = %d, slave transcoder bitmask = %d\n", + transcoder_name(crtc_state->master_transcoder), + crtc_state->base.crtc->base.id, + master_pipe_config->sync_mode_slaves_mask); + } + + return 0; +} + static int intel_crtc_atomic_check(struct intel_atomic_state *state, struct intel_crtc *crtc) { @@ -12292,6 +12391,13 @@ clear_intel_crtc_state(struct intel_crtc_state *crtc_state) if (IS_G4X(dev_priv) || IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) saved_state->wm = crtc_state->wm; + /* + * Save the slave bitmask which gets filled for master crtc state during + * slave atomic check call. + */ + if (is_trans_port_sync_master(crtc_state)) + saved_state->sync_mode_slaves_mask = + crtc_state->sync_mode_slaves_mask; /* Keep base drm_crtc_state intact, only clear our extended struct */ BUILD_BUG_ON(offsetof(struct intel_crtc_state, base)); @@ -12385,6 +12491,15 @@ intel_modeset_pipe_config(struct intel_crtc_state *pipe_config) drm_mode_set_crtcinfo(&pipe_config->base.adjusted_mode, CRTC_STEREO_DOUBLE); + /* Set the crtc_state defaults for trans_port_sync */ + pipe_config->master_transcoder = INVALID_TRANSCODER; + ret = icl_add_sync_mode_crtcs(pipe_config); + if (ret) { + DRM_DEBUG_KMS("Cannot assign Sync Mode CRTCs: %d\n", + ret); + return ret; + } + /* Pass our mode to the connectors and the CRTC to give them a chance to * adjust it according to limitations or connector properties, and also * a chance to reject the mode entirely. @@ -12900,6 +13015,9 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, PIPE_CONF_CHECK_INFOFRAME(hdmi); PIPE_CONF_CHECK_INFOFRAME(drm); + PIPE_CONF_CHECK_I(sync_mode_slaves_mask); + PIPE_CONF_CHECK_I(master_transcoder); + #undef PIPE_CONF_CHECK_X #undef PIPE_CONF_CHECK_I #undef PIPE_CONF_CHECK_BOOL diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index 90807603987c..db149cb56ec5 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -93,6 +93,7 @@ enum pipe { #define pipe_name(p) ((p) + 'A') enum transcoder { + INVALID_TRANSCODER = -1, /* * The following transcoders have a 1:1 transcoder -> pipe mapping, * keep their values fixed: the code assumes that TRANSCODER_A=0, the @@ -462,6 +463,7 @@ enum drm_mode_status intel_mode_valid_max_plane_size(struct drm_i915_private *dev_priv, const struct drm_display_mode *mode); enum phy intel_port_to_phy(struct drm_i915_private *i915, enum port port); +bool is_trans_port_sync_mode(const struct intel_crtc_state *state); void intel_plane_destroy(struct drm_plane *plane); void i830_enable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe); diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 40390d855815..8358152e403e 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -991,6 +991,12 @@ struct intel_crtc_state { /* Forward Error correction State */ bool fec_enable; + + /* Pointer to master transcoder in case of tiled displays */ + enum transcoder master_transcoder; + + /* Bitmask to indicate slaves attached */ + u8 sync_mode_slaves_mask; }; struct intel_crtc { From 705135bd734c949495f6c347dbfcca14cd2389b9 Mon Sep 17 00:00:00 2001 From: Manasi Navare Date: Fri, 18 Oct 2019 10:27:21 -0700 Subject: [PATCH 146/159] drm/i915/display/icl: Enable TRANSCODER PORT SYNC for tiled displays across separate ports MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In case of tiled displays where different tiles are displayed across different ports, we need to synchronize the transcoders involved. This patch implements the transcoder port sync feature for synchronizing one master transcoder with one or more slave transcoders. This is only enbaled in slave transcoder and the master transcoder is unaware that it is operating in this mode. This has been tested with tiled display connected to ICL. v7: * Rebase on Maarten's patches v6: * Use master_trans +1 and address missing trans_edp case (Ville) v5: * Add TRANSCODER_D case and MISSING_CASE (Maarten) v4: Rebase v3: * Check of DP_MST moved to atomic_check (Maarten) v2: * Do not use RMW, just write to the register in commit (Jani N) Cc: Daniel Vetter Cc: Ville Syrjälä Cc: Maarten Lankhorst Cc: Matt Roper Cc: Jani Nikula Signed-off-by: Manasi Navare Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20191018172725.1338-2-manasi.d.navare@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 33 ++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index a1595f50edf6..4033e410b1df 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -4405,6 +4405,36 @@ static void icl_set_pipe_chicken(struct intel_crtc *crtc) I915_WRITE(PIPE_CHICKEN(pipe), tmp); } +static void icl_enable_trans_port_sync(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + u32 trans_ddi_func_ctl2_val; + u8 master_select; + + /* + * Configure the master select and enable Transcoder Port Sync for + * Slave CRTCs transcoder. + */ + if (crtc_state->master_transcoder == INVALID_TRANSCODER) + return; + + if (crtc_state->master_transcoder == TRANSCODER_EDP) + master_select = 0; + else + master_select = crtc_state->master_transcoder + 1; + + /* Set the master select bits for Tranascoder Port Sync */ + trans_ddi_func_ctl2_val = (PORT_SYNC_MODE_MASTER_SELECT(master_select) & + PORT_SYNC_MODE_MASTER_SELECT_MASK) << + PORT_SYNC_MODE_MASTER_SELECT_SHIFT; + /* Enable Transcoder Port Sync */ + trans_ddi_func_ctl2_val |= PORT_SYNC_MODE_ENABLE; + + I915_WRITE(TRANS_DDI_FUNC_CTL2(crtc_state->cpu_transcoder), + trans_ddi_func_ctl2_val); +} + static void intel_fdi_normal_train(struct intel_crtc *crtc) { struct drm_device *dev = crtc->base.dev; @@ -6429,6 +6459,9 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, if (!transcoder_is_dsi(cpu_transcoder)) intel_set_pipe_timings(pipe_config); + if (INTEL_GEN(dev_priv) >= 11) + icl_enable_trans_port_sync(pipe_config); + intel_set_pipe_src_size(pipe_config); if (cpu_transcoder != TRANSCODER_EDP && From ba5f1ae95d28cf3d5358fa7375578a1ad3b424e1 Mon Sep 17 00:00:00 2001 From: Manasi Navare Date: Fri, 18 Oct 2019 10:27:22 -0700 Subject: [PATCH 147/159] drm/i915/display/icl: HW state readout for transcoder port sync config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After the state is committed, we readout the HW registers and compare the HW state with the SW state that we just committed. For Transcdoer port sync, we add master_transcoder and the salves bitmask to the crtc_state, hence we need to read those during the HW state readout to avoid pipe state mismatch. v11: * Move master trans init to get pipe_Config hooks (Ville) v10: * Initialize master_tarnscoder readout for all platforms (Ville) v9: * Initialize master_transcoder = INVALID at get config (Ville) v8: * Use master_select -1, address TRANS_EDP case (Ville) * Rename master_transcoder to _readout (Lucas) v7: * NDont read HW state for DSI v6: * Go through both parts of HW readout (Maarten) * Add a WARN if the same trans configured as master and slave (Ville, Maarten) v5: * Add return INVALID in defaut case (Maarten) v4: * Get power domains in master loop for get_config (Ville) v3: * Add TRANSCODER_D (Maarten) * v3 Reviewed-by: Maarten Lankhorst v2: * Add Transcoder_D and MISSING_CASE (Maarten) Cc: Ville Syrjälä Cc: Maarten Lankhorst Cc: Matt Roper Cc: Jani Nikula Signed-off-by: Manasi Navare Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20191018172725.1338-3-manasi.d.navare@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 61 ++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 4033e410b1df..30595d19d0f9 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -8798,6 +8798,7 @@ static bool i9xx_get_pipe_config(struct intel_crtc *crtc, pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB; pipe_config->cpu_transcoder = (enum transcoder) crtc->pipe; pipe_config->shared_dpll = NULL; + pipe_config->master_transcoder = INVALID_TRANSCODER; ret = false; @@ -9987,6 +9988,7 @@ static bool ironlake_get_pipe_config(struct intel_crtc *crtc, pipe_config->cpu_transcoder = (enum transcoder) crtc->pipe; pipe_config->shared_dpll = NULL; + pipe_config->master_transcoder = INVALID_TRANSCODER; ret = false; tmp = I915_READ(PIPECONF(crtc->pipe)); @@ -10437,6 +10439,59 @@ static void haswell_get_ddi_port_state(struct intel_crtc *crtc, } } +static enum transcoder transcoder_master_readout(struct drm_i915_private *dev_priv, + enum transcoder cpu_transcoder) +{ + u32 trans_port_sync, master_select; + + trans_port_sync = I915_READ(TRANS_DDI_FUNC_CTL2(cpu_transcoder)); + + if ((trans_port_sync & PORT_SYNC_MODE_ENABLE) == 0) + return INVALID_TRANSCODER; + + master_select = trans_port_sync & + PORT_SYNC_MODE_MASTER_SELECT_MASK; + if (master_select == 0) + return TRANSCODER_EDP; + else + return master_select - 1; +} + +static void icelake_get_trans_port_sync_config(struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + u32 transcoders; + enum transcoder cpu_transcoder; + + crtc_state->master_transcoder = transcoder_master_readout(dev_priv, + crtc_state->cpu_transcoder); + + transcoders = BIT(TRANSCODER_A) | + BIT(TRANSCODER_B) | + BIT(TRANSCODER_C) | + BIT(TRANSCODER_D); + for_each_cpu_transcoder_masked(dev_priv, cpu_transcoder, transcoders) { + enum intel_display_power_domain power_domain; + intel_wakeref_t trans_wakeref; + + power_domain = POWER_DOMAIN_TRANSCODER(cpu_transcoder); + trans_wakeref = intel_display_power_get_if_enabled(dev_priv, + power_domain); + + if (!trans_wakeref) + continue; + + if (transcoder_master_readout(dev_priv, cpu_transcoder) == + crtc_state->cpu_transcoder) + crtc_state->sync_mode_slaves_mask |= BIT(cpu_transcoder); + + intel_display_power_put(dev_priv, power_domain, trans_wakeref); + } + + WARN_ON(crtc_state->master_transcoder != INVALID_TRANSCODER && + crtc_state->sync_mode_slaves_mask); +} + static bool haswell_get_pipe_config(struct intel_crtc *crtc, struct intel_crtc_state *pipe_config) { @@ -10448,6 +10503,8 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc, intel_crtc_init_scalers(crtc, pipe_config); + pipe_config->master_transcoder = INVALID_TRANSCODER; + power_domain = POWER_DOMAIN_PIPE(crtc->pipe); wf = intel_display_power_get_if_enabled(dev_priv, power_domain); if (!wf) @@ -10556,6 +10613,10 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc, pipe_config->pixel_multiplier = 1; } + if (INTEL_GEN(dev_priv) >= 11 && + !transcoder_is_dsi(pipe_config->cpu_transcoder)) + icelake_get_trans_port_sync_config(pipe_config); + out: for_each_power_domain(power_domain, power_domain_mask) intel_display_power_put(dev_priv, From eadf6f9170d5e72ce571ce914638316863d512e7 Mon Sep 17 00:00:00 2001 From: Manasi Navare Date: Fri, 18 Oct 2019 10:27:23 -0700 Subject: [PATCH 148/159] drm/i915/display/icl: Enable master-slaves in trans port sync MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As per the display enable sequence, we need to follow the enable sequence for slaves first with DP_TP_CTL set to Idle and configure the transcoder port sync register to select the corersponding master, then follow the enable sequence for master leaving DP_TP_CTL to idle. At this point the transcoder port sync mode is configured and enabled and the Vblanks of both ports are synchronized so then set DP_TP_CTL for the slave and master to Normal and do post crtc enable updates. v11: * Rebase (Manasi) v10: * in trans sync mode, dont stop link train for tgl (Manasi) v9: Remove update_scanline_offset to rebase on Maarten's patch (Manasi) v8: * Rebase on Maarten's patches (Manasi) v7: * Use ffs(slaves) to get slave crtc (Ville) v6: * Modeset implies active_changed, remove one condition (Maarten) v5: * Fix checkpatch warning (Manasi) v4: * Reuse skl_commit_modeset_enables() hook (Maarten) * Obtain slave crtc and states from master (Maarten) v3: * Rebase on drm-tip (Manasi) v2: * Create a icl_update_crtcs hook (Maarten, Danvet) * This sequence only for CRTCs in trans port sync mode (Maarten) Cc: Daniel Vetter Cc: Ville Syrjälä Cc: Maarten Lankhorst Cc: Matt Roper Signed-off-by: Manasi Navare Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20191018172725.1338-4-manasi.d.navare@intel.com --- drivers/gpu/drm/i915/display/intel_ddi.c | 6 +- drivers/gpu/drm/i915/display/intel_display.c | 136 ++++++++++++++++++- drivers/gpu/drm/i915/display/intel_display.h | 2 + 3 files changed, 139 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 4c81449ec144..46a7b2ca2782 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -3500,7 +3500,8 @@ static void tgl_ddi_pre_enable_dp(struct intel_encoder *encoder, intel_dp_start_link_train(intel_dp); /* 7.k */ - intel_dp_stop_link_train(intel_dp); + if (!is_trans_port_sync_mode(crtc_state)) + intel_dp_stop_link_train(intel_dp); /* * TODO: enable clock gating @@ -3574,7 +3575,8 @@ static void hsw_ddi_pre_enable_dp(struct intel_encoder *encoder, true); intel_dp_sink_set_fec_ready(intel_dp, crtc_state); intel_dp_start_link_train(intel_dp); - if (port != PORT_A || INTEL_GEN(dev_priv) >= 9) + if ((port != PORT_A || INTEL_GEN(dev_priv) >= 9) && + !is_trans_port_sync_mode(crtc_state)) intel_dp_stop_link_train(intel_dp); intel_ddi_enable_fec(encoder, crtc_state); diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 30595d19d0f9..17c259d4922a 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -14061,6 +14061,18 @@ static void intel_update_crtc(struct intel_crtc *crtc, intel_crtc_arm_fifo_underrun(crtc, new_crtc_state); } +static struct intel_crtc *intel_get_slave_crtc(const struct intel_crtc_state *new_crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(new_crtc_state->base.crtc->dev); + enum transcoder slave_transcoder; + + WARN_ON(!is_power_of_2(new_crtc_state->sync_mode_slaves_mask)); + + slave_transcoder = ffs(new_crtc_state->sync_mode_slaves_mask) - 1; + return intel_get_crtc_for_pipe(dev_priv, + (enum pipe)slave_transcoder); +} + static void intel_old_crtc_state_disables(struct intel_atomic_state *state, struct intel_crtc_state *old_crtc_state, struct intel_crtc_state *new_crtc_state, @@ -14139,6 +14151,113 @@ static void intel_commit_modeset_enables(struct intel_atomic_state *state) } } +static void intel_crtc_enable_trans_port_sync(struct intel_crtc *crtc, + struct intel_atomic_state *state, + struct intel_crtc_state *new_crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + + intel_crtc_update_active_timings(new_crtc_state); + dev_priv->display.crtc_enable(new_crtc_state, state); + intel_crtc_enable_pipe_crc(crtc); +} + +static void intel_set_dp_tp_ctl_normal(struct intel_crtc *crtc, + struct intel_atomic_state *state) +{ + struct drm_connector_state *conn_state; + struct drm_connector *conn; + struct intel_dp *intel_dp; + int i; + + for_each_new_connector_in_state(&state->base, conn, conn_state, i) { + if (conn_state->crtc == &crtc->base) + break; + } + intel_dp = enc_to_intel_dp(&intel_attached_encoder(conn)->base); + intel_dp_stop_link_train(intel_dp); +} + +static void intel_post_crtc_enable_updates(struct intel_crtc *crtc, + struct intel_atomic_state *state) +{ + struct intel_crtc_state *new_crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); + struct intel_crtc_state *old_crtc_state = + intel_atomic_get_old_crtc_state(state, crtc); + struct intel_plane_state *new_plane_state = + intel_atomic_get_new_plane_state(state, + to_intel_plane(crtc->base.primary)); + bool modeset = needs_modeset(new_crtc_state); + + if (new_crtc_state->update_pipe && !new_crtc_state->enable_fbc) + intel_fbc_disable(crtc); + else if (new_plane_state) + intel_fbc_enable(crtc, new_crtc_state, new_plane_state); + + /* Perform vblank evasion around commit operation */ + intel_pipe_update_start(new_crtc_state); + commit_pipe_config(state, old_crtc_state, new_crtc_state); + skl_update_planes_on_crtc(state, crtc); + intel_pipe_update_end(new_crtc_state); + + /* + * We usually enable FIFO underrun interrupts as part of the + * CRTC enable sequence during modesets. But when we inherit a + * valid pipe configuration from the BIOS we need to take care + * of enabling them on the CRTC's first fastset. + */ + if (new_crtc_state->update_pipe && !modeset && + old_crtc_state->base.mode.private_flags & I915_MODE_FLAG_INHERITED) + intel_crtc_arm_fifo_underrun(crtc, new_crtc_state); +} + +static void intel_update_trans_port_sync_crtcs(struct intel_crtc *crtc, + struct intel_atomic_state *state, + struct intel_crtc_state *old_crtc_state, + struct intel_crtc_state *new_crtc_state) +{ + struct intel_crtc *slave_crtc = intel_get_slave_crtc(new_crtc_state); + struct intel_crtc_state *new_slave_crtc_state = + intel_atomic_get_new_crtc_state(state, slave_crtc); + struct intel_crtc_state *old_slave_crtc_state = + intel_atomic_get_old_crtc_state(state, slave_crtc); + + WARN_ON(!slave_crtc || !new_slave_crtc_state || + !old_slave_crtc_state); + + DRM_DEBUG_KMS("Updating Transcoder Port Sync Master CRTC = %d %s and Slave CRTC %d %s\n", + crtc->base.base.id, crtc->base.name, slave_crtc->base.base.id, + slave_crtc->base.name); + + /* Enable seq for slave with with DP_TP_CTL left Idle until the + * master is ready + */ + intel_crtc_enable_trans_port_sync(slave_crtc, + state, + new_slave_crtc_state); + + /* Enable seq for master with with DP_TP_CTL left Idle */ + intel_crtc_enable_trans_port_sync(crtc, + state, + new_crtc_state); + + /* Set Slave's DP_TP_CTL to Normal */ + intel_set_dp_tp_ctl_normal(slave_crtc, + state); + + /* Set Master's DP_TP_CTL To Normal */ + usleep_range(200, 400); + intel_set_dp_tp_ctl_normal(crtc, + state); + + /* Now do the post crtc enable for all master and slaves */ + intel_post_crtc_enable_updates(slave_crtc, + state); + intel_post_crtc_enable_updates(crtc, + state); +} + static void skl_commit_modeset_enables(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); @@ -14172,6 +14291,7 @@ static void skl_commit_modeset_enables(struct intel_atomic_state *state) for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { enum pipe pipe = crtc->pipe; bool vbl_wait = false; + bool modeset = needs_modeset(new_crtc_state); if (updated & BIT(crtc->pipe) || !new_crtc_state->base.active) continue; @@ -14192,12 +14312,22 @@ static void skl_commit_modeset_enables(struct intel_atomic_state *state) */ if (!skl_ddb_entry_equal(&new_crtc_state->wm.skl.ddb, &old_crtc_state->wm.skl.ddb) && - !new_crtc_state->base.active_changed && + !modeset && state->wm_results.dirty_pipes != updated) vbl_wait = true; - intel_update_crtc(crtc, state, old_crtc_state, - new_crtc_state); + if (modeset && is_trans_port_sync_mode(new_crtc_state)) { + if (is_trans_port_sync_master(new_crtc_state)) + intel_update_trans_port_sync_crtcs(crtc, + state, + old_crtc_state, + new_crtc_state); + else + continue; + } else { + intel_update_crtc(crtc, state, old_crtc_state, + new_crtc_state); + } if (vbl_wait) intel_wait_for_vblank(dev_priv, pipe); diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index db149cb56ec5..7dcb176d91b0 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -27,6 +27,7 @@ #include #include +#include "intel_dp_link_training.h" enum link_m_n_set; struct dpll; @@ -54,6 +55,7 @@ struct intel_plane; struct intel_plane_state; struct intel_remapped_info; struct intel_rotation_info; +struct intel_crtc_state; enum i915_gpio { GPIOA, From 51528afe7c5ed6fcd6ed4cfb046b74659d21738f Mon Sep 17 00:00:00 2001 From: Manasi Navare Date: Fri, 18 Oct 2019 10:27:24 -0700 Subject: [PATCH 149/159] drm/i915/display/icl: Disable transcoder port sync as part of crtc_disable() sequence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This clears the transcoder port sync bits of the TRANS_DDI_FUNC_CTL2 register during crtc_disable(). v3: * Rebase on maarten's patches v2: * Directly write the trans_port_sync reg value (Maarten) Cc: Ville Syrjälä Cc: Maarten Lankhorst Cc: Matt Roper Cc: Jani Nikula Signed-off-by: Manasi Navare Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20191018172725.1338-5-manasi.d.navare@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 22 ++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 17c259d4922a..1ea8f37448ba 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -4435,6 +4435,25 @@ static void icl_enable_trans_port_sync(const struct intel_crtc_state *crtc_state trans_ddi_func_ctl2_val); } +static void icl_disable_transcoder_port_sync(const struct intel_crtc_state *old_crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + i915_reg_t reg; + u32 trans_ddi_func_ctl2_val; + + if (old_crtc_state->master_transcoder == INVALID_TRANSCODER) + return; + + DRM_DEBUG_KMS("Disabling Transcoder Port Sync on Slave Transcoder %s\n", + transcoder_name(old_crtc_state->cpu_transcoder)); + + reg = TRANS_DDI_FUNC_CTL2(old_crtc_state->cpu_transcoder); + trans_ddi_func_ctl2_val = ~(PORT_SYNC_MODE_ENABLE | + PORT_SYNC_MODE_MASTER_SELECT_MASK); + I915_WRITE(reg, trans_ddi_func_ctl2_val); +} + static void intel_fdi_normal_train(struct intel_crtc *crtc) { struct drm_device *dev = crtc->base.dev; @@ -6639,6 +6658,9 @@ static void haswell_crtc_disable(struct intel_crtc_state *old_crtc_state, if (intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_DP_MST)) intel_ddi_set_vc_payload_alloc(old_crtc_state, false); + if (INTEL_GEN(dev_priv) >= 11) + icl_disable_transcoder_port_sync(old_crtc_state); + if (!transcoder_is_dsi(cpu_transcoder)) intel_ddi_disable_transcoder_func(old_crtc_state); From a6c948f98239cbec1f5f0dc741b5841008c264ff Mon Sep 17 00:00:00 2001 From: Manasi Navare Date: Fri, 18 Oct 2019 10:27:25 -0700 Subject: [PATCH 150/159] drm/i915/display/icl: In port sync mode disable slaves first then master MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the transcoder port sync mode, the slave transcoders mask their vblanks until master transcoder's vblank so while disabling them, make sure slaves are disabled first and then the masters. v5: * Dont pass dev priv to get_slave_crtc (Ville) v4: * Obtain slave state from master (Maarten) v3: * Rebase v2: * Use the intel_old_crtc_state_disables() helper Cc: Ville Syrjälä Cc: Maarten Lankhorst Cc: Matt Roper Cc: Jani Nikula Signed-off-by: Manasi Navare Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20191018172725.1338-6-manasi.d.navare@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 58 ++++++++++++++++++-- 1 file changed, 52 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 1ea8f37448ba..ff97423ea7ed 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -14130,6 +14130,37 @@ static void intel_old_crtc_state_disables(struct intel_atomic_state *state, new_crtc_state); } +static void intel_trans_port_sync_modeset_disables(struct intel_atomic_state *state, + struct intel_crtc *crtc, + struct intel_crtc_state *old_crtc_state, + struct intel_crtc_state *new_crtc_state) +{ + struct intel_crtc *slave_crtc = intel_get_slave_crtc(new_crtc_state); + struct intel_crtc_state *new_slave_crtc_state = + intel_atomic_get_new_crtc_state(state, slave_crtc); + struct intel_crtc_state *old_slave_crtc_state = + intel_atomic_get_old_crtc_state(state, slave_crtc); + + WARN_ON(!slave_crtc || !new_slave_crtc_state || + !old_slave_crtc_state); + + /* Disable Slave first */ + intel_pre_plane_update(old_slave_crtc_state, new_slave_crtc_state); + if (old_slave_crtc_state->base.active) + intel_old_crtc_state_disables(state, + old_slave_crtc_state, + new_slave_crtc_state, + slave_crtc); + + /* Disable Master */ + intel_pre_plane_update(old_crtc_state, new_crtc_state); + if (old_crtc_state->base.active) + intel_old_crtc_state_disables(state, + old_crtc_state, + new_crtc_state, + crtc); +} + static void intel_commit_modeset_disables(struct intel_atomic_state *state) { struct intel_crtc_state *new_crtc_state, *old_crtc_state; @@ -14148,13 +14179,28 @@ static void intel_commit_modeset_disables(struct intel_atomic_state *state) if (!needs_modeset(new_crtc_state)) continue; - intel_pre_plane_update(old_crtc_state, new_crtc_state); + /* In case of Transcoder port Sync master slave CRTCs can be + * assigned in any order and we need to make sure that + * slave CRTCs are disabled first and then master CRTC since + * Slave vblanks are masked till Master Vblanks. + */ + if (is_trans_port_sync_mode(new_crtc_state)) { + if (is_trans_port_sync_master(new_crtc_state)) + intel_trans_port_sync_modeset_disables(state, + crtc, + old_crtc_state, + new_crtc_state); + else + continue; + } else { + intel_pre_plane_update(old_crtc_state, new_crtc_state); - if (old_crtc_state->base.active) - intel_old_crtc_state_disables(state, - old_crtc_state, - new_crtc_state, - crtc); + if (old_crtc_state->base.active) + intel_old_crtc_state_disables(state, + old_crtc_state, + new_crtc_state, + crtc); + } } } From 327f8d8c336d2f7a037e26cdbd7f372d993a138f Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 10 Oct 2019 18:09:01 -0700 Subject: [PATCH 151/159] drm/i915: simplify setting of ddi_io_power_domain MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of the ever growing switch, just compute the ddi io power domain based on the port number. Signed-off-by: Lucas De Marchi Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20191011010907.103309-2-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/display/intel_ddi.c | 43 ++---------------------- 1 file changed, 3 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 46a7b2ca2782..9ba794cb9b4f 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -4758,46 +4758,9 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) intel_encoder->update_complete = intel_ddi_update_complete; } - switch (port) { - case PORT_A: - intel_dig_port->ddi_io_power_domain = - POWER_DOMAIN_PORT_DDI_A_IO; - break; - case PORT_B: - intel_dig_port->ddi_io_power_domain = - POWER_DOMAIN_PORT_DDI_B_IO; - break; - case PORT_C: - intel_dig_port->ddi_io_power_domain = - POWER_DOMAIN_PORT_DDI_C_IO; - break; - case PORT_D: - intel_dig_port->ddi_io_power_domain = - POWER_DOMAIN_PORT_DDI_D_IO; - break; - case PORT_E: - intel_dig_port->ddi_io_power_domain = - POWER_DOMAIN_PORT_DDI_E_IO; - break; - case PORT_F: - intel_dig_port->ddi_io_power_domain = - POWER_DOMAIN_PORT_DDI_F_IO; - break; - case PORT_G: - intel_dig_port->ddi_io_power_domain = - POWER_DOMAIN_PORT_DDI_G_IO; - break; - case PORT_H: - intel_dig_port->ddi_io_power_domain = - POWER_DOMAIN_PORT_DDI_H_IO; - break; - case PORT_I: - intel_dig_port->ddi_io_power_domain = - POWER_DOMAIN_PORT_DDI_I_IO; - break; - default: - MISSING_CASE(port); - } + WARN_ON(port > PORT_I); + intel_dig_port->ddi_io_power_domain = POWER_DOMAIN_PORT_DDI_A_IO + + port - PORT_A; if (init_dp) { if (!intel_ddi_init_dp_connector(intel_dig_port)) From 10d987fd1b7baceaafa78d805e71427ab735b4e4 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 10 Oct 2019 18:09:03 -0700 Subject: [PATCH 152/159] drm/i915: fix port checks for MST support on gen >= 11 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both Ice Lake and Elkhart Lake (gen 11) support MST on all external connections except DDI A. Tiger Lake (gen 12) supports on all external connections. Move the check to happen inside intel_dp_mst_encoder_init() and add specific platform checks. v2: Replace != with == checks for ports on gen < 11 (Ville) Signed-off-by: Lucas De Marchi Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191015164029.18431-3-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/display/intel_dp.c | 7 ++----- drivers/gpu/drm/i915/display/intel_dp_mst.c | 22 +++++++++++++++------ 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 3792d143bea9..3696623ce443 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -7468,11 +7468,8 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, intel_connector->get_hw_state = intel_connector_get_hw_state; /* init MST on ports that can support it */ - if (HAS_DP_MST(dev_priv) && !intel_dp_is_edp(intel_dp) && - (port == PORT_B || port == PORT_C || - port == PORT_D || port == PORT_F)) - intel_dp_mst_encoder_init(intel_dig_port, - intel_connector->base.base.id); + intel_dp_mst_encoder_init(intel_dig_port, + intel_connector->base.base.id); if (!intel_edp_init_connector(intel_dp, intel_connector)) { intel_dp_aux_fini(intel_dp); diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 2203be28ea01..bbcab27644dc 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -655,21 +655,31 @@ intel_dp_mst_encoder_active_links(struct intel_digital_port *intel_dig_port) int intel_dp_mst_encoder_init(struct intel_digital_port *intel_dig_port, int conn_base_id) { + struct drm_i915_private *i915 = to_i915(intel_dig_port->base.base.dev); struct intel_dp *intel_dp = &intel_dig_port->dp; - struct drm_device *dev = intel_dig_port->base.base.dev; + enum port port = intel_dig_port->base.port; int ret; - intel_dp->can_mst = true; + if (!HAS_DP_MST(i915) || intel_dp_is_edp(intel_dp)) + return 0; + + if (INTEL_GEN(i915) < 12 && port == PORT_A) + return 0; + + if (INTEL_GEN(i915) < 11 && port == PORT_E) + return 0; + intel_dp->mst_mgr.cbs = &mst_cbs; /* create encoders */ intel_dp_create_fake_mst_encoders(intel_dig_port); - ret = drm_dp_mst_topology_mgr_init(&intel_dp->mst_mgr, dev, + ret = drm_dp_mst_topology_mgr_init(&intel_dp->mst_mgr, &i915->drm, &intel_dp->aux, 16, 3, conn_base_id); - if (ret) { - intel_dp->can_mst = false; + if (ret) return ret; - } + + intel_dp->can_mst = true; + return 0; } From cbd9b9f2e7b1f2e34478e0b1d944d521063e430d Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 10 Oct 2019 18:09:04 -0700 Subject: [PATCH 153/159] drm/i915: remove extra new line on pipe_config mismatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The new line is already added by pipe_config_mismatch(), so the callers shouldn't add it. Signed-off-by: Lucas De Marchi Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191011010907.103309-5-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 22 ++++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index ff97423ea7ed..87e42faeed98 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -12817,7 +12817,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, #define PIPE_CONF_CHECK_X(name) do { \ if (current_config->name != pipe_config->name) { \ pipe_config_mismatch(fastset, __stringify(name), \ - "(expected 0x%08x, found 0x%08x)\n", \ + "(expected 0x%08x, found 0x%08x)", \ current_config->name, \ pipe_config->name); \ ret = false; \ @@ -12827,7 +12827,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, #define PIPE_CONF_CHECK_I(name) do { \ if (current_config->name != pipe_config->name) { \ pipe_config_mismatch(fastset, __stringify(name), \ - "(expected %i, found %i)\n", \ + "(expected %i, found %i)", \ current_config->name, \ pipe_config->name); \ ret = false; \ @@ -12837,7 +12837,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, #define PIPE_CONF_CHECK_BOOL(name) do { \ if (current_config->name != pipe_config->name) { \ pipe_config_mismatch(fastset, __stringify(name), \ - "(expected %s, found %s)\n", \ + "(expected %s, found %s)", \ yesno(current_config->name), \ yesno(pipe_config->name)); \ ret = false; \ @@ -12854,7 +12854,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, PIPE_CONF_CHECK_BOOL(name); \ } else { \ pipe_config_mismatch(fastset, __stringify(name), \ - "unable to verify whether state matches exactly, forcing modeset (expected %s, found %s)\n", \ + "unable to verify whether state matches exactly, forcing modeset (expected %s, found %s)", \ yesno(current_config->name), \ yesno(pipe_config->name)); \ ret = false; \ @@ -12864,7 +12864,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, #define PIPE_CONF_CHECK_P(name) do { \ if (current_config->name != pipe_config->name) { \ pipe_config_mismatch(fastset, __stringify(name), \ - "(expected %p, found %p)\n", \ + "(expected %p, found %p)", \ current_config->name, \ pipe_config->name); \ ret = false; \ @@ -12877,7 +12877,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, !fastset)) { \ pipe_config_mismatch(fastset, __stringify(name), \ "(expected tu %i gmch %i/%i link %i/%i, " \ - "found tu %i, gmch %i/%i link %i/%i)\n", \ + "found tu %i, gmch %i/%i link %i/%i)", \ current_config->name.tu, \ current_config->name.gmch_m, \ current_config->name.gmch_n, \ @@ -12905,7 +12905,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, pipe_config_mismatch(fastset, __stringify(name), \ "(expected tu %i gmch %i/%i link %i/%i, " \ "or tu %i gmch %i/%i link %i/%i, " \ - "found tu %i, gmch %i/%i link %i/%i)\n", \ + "found tu %i, gmch %i/%i link %i/%i)", \ current_config->name.tu, \ current_config->name.gmch_m, \ current_config->name.gmch_n, \ @@ -12928,7 +12928,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, #define PIPE_CONF_CHECK_FLAGS(name, mask) do { \ if ((current_config->name ^ pipe_config->name) & (mask)) { \ pipe_config_mismatch(fastset, __stringify(name), \ - "(%x) (expected %i, found %i)\n", \ + "(%x) (expected %i, found %i)", \ (mask), \ current_config->name & (mask), \ pipe_config->name & (mask)); \ @@ -12939,7 +12939,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, #define PIPE_CONF_CHECK_CLOCK_FUZZY(name) do { \ if (!intel_fuzzy_clock_check(current_config->name, pipe_config->name)) { \ pipe_config_mismatch(fastset, __stringify(name), \ - "(expected %i, found %i)\n", \ + "(expected %i, found %i)", \ current_config->name, \ pipe_config->name); \ ret = false; \ @@ -12959,7 +12959,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, #define PIPE_CONF_CHECK_COLOR_LUT(name1, name2, bit_precision) do { \ if (current_config->name1 != pipe_config->name1) { \ pipe_config_mismatch(fastset, __stringify(name1), \ - "(expected %i, found %i, won't compare lut values)\n", \ + "(expected %i, found %i, won't compare lut values)", \ current_config->name1, \ pipe_config->name1); \ ret = false;\ @@ -12968,7 +12968,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, pipe_config->name2, pipe_config->name1, \ bit_precision)) { \ pipe_config_mismatch(fastset, __stringify(name2), \ - "hw_state doesn't match sw_state\n"); \ + "hw_state doesn't match sw_state"); \ ret = false; \ } \ } \ From 73cefd903de77ac084b278e152248d6923d400e5 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 10 Oct 2019 18:09:05 -0700 Subject: [PATCH 154/159] drm/i915: add pipe id/name to pipe mismatch logs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This way it's easier to figure out what didn't match when we have multiple pipes enabled. v2: pass drm_crtc and use the more common [CRTC:%d:%s] format (Ville) v3: use struct intel_crtc type to pass crtc around (Ville) Signed-off-by: Lucas De Marchi Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191015164029.18431-5-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 34 +++++++++++--------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 87e42faeed98..2912abd85148 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -12762,8 +12762,9 @@ pipe_config_infoframe_mismatch(struct drm_i915_private *dev_priv, } } -static void __printf(3, 4) -pipe_config_mismatch(bool fastset, const char *name, const char *format, ...) +static void __printf(4, 5) +pipe_config_mismatch(bool fastset, const struct intel_crtc *crtc, + const char *name, const char *format, ...) { struct va_format vaf; va_list args; @@ -12773,9 +12774,11 @@ pipe_config_mismatch(bool fastset, const char *name, const char *format, ...) vaf.va = &args; if (fastset) - DRM_DEBUG_KMS("fastset mismatch in %s %pV\n", name, &vaf); + DRM_DEBUG_KMS("[CRTC:%d:%s] fastset mismatch in %s %pV\n", + crtc->base.base.id, crtc->base.name, name, &vaf); else - DRM_ERROR("mismatch in %s %pV\n", name, &vaf); + DRM_ERROR("[CRTC:%d:%s] mismatch in %s %pV\n", + crtc->base.base.id, crtc->base.name, name, &vaf); va_end(args); } @@ -12803,6 +12806,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, bool fastset) { struct drm_i915_private *dev_priv = to_i915(current_config->base.crtc->dev); + struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc); bool ret = true; u32 bp_gamma = 0; bool fixup_inherited = fastset && @@ -12816,7 +12820,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, #define PIPE_CONF_CHECK_X(name) do { \ if (current_config->name != pipe_config->name) { \ - pipe_config_mismatch(fastset, __stringify(name), \ + pipe_config_mismatch(fastset, crtc, __stringify(name), \ "(expected 0x%08x, found 0x%08x)", \ current_config->name, \ pipe_config->name); \ @@ -12826,7 +12830,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, #define PIPE_CONF_CHECK_I(name) do { \ if (current_config->name != pipe_config->name) { \ - pipe_config_mismatch(fastset, __stringify(name), \ + pipe_config_mismatch(fastset, crtc, __stringify(name), \ "(expected %i, found %i)", \ current_config->name, \ pipe_config->name); \ @@ -12836,7 +12840,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, #define PIPE_CONF_CHECK_BOOL(name) do { \ if (current_config->name != pipe_config->name) { \ - pipe_config_mismatch(fastset, __stringify(name), \ + pipe_config_mismatch(fastset, crtc, __stringify(name), \ "(expected %s, found %s)", \ yesno(current_config->name), \ yesno(pipe_config->name)); \ @@ -12853,7 +12857,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, if (!fixup_inherited || (!current_config->name && !pipe_config->name)) { \ PIPE_CONF_CHECK_BOOL(name); \ } else { \ - pipe_config_mismatch(fastset, __stringify(name), \ + pipe_config_mismatch(fastset, crtc, __stringify(name), \ "unable to verify whether state matches exactly, forcing modeset (expected %s, found %s)", \ yesno(current_config->name), \ yesno(pipe_config->name)); \ @@ -12863,7 +12867,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, #define PIPE_CONF_CHECK_P(name) do { \ if (current_config->name != pipe_config->name) { \ - pipe_config_mismatch(fastset, __stringify(name), \ + pipe_config_mismatch(fastset, crtc, __stringify(name), \ "(expected %p, found %p)", \ current_config->name, \ pipe_config->name); \ @@ -12875,7 +12879,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, if (!intel_compare_link_m_n(¤t_config->name, \ &pipe_config->name,\ !fastset)) { \ - pipe_config_mismatch(fastset, __stringify(name), \ + pipe_config_mismatch(fastset, crtc, __stringify(name), \ "(expected tu %i gmch %i/%i link %i/%i, " \ "found tu %i, gmch %i/%i link %i/%i)", \ current_config->name.tu, \ @@ -12902,7 +12906,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, &pipe_config->name, !fastset) && \ !intel_compare_link_m_n(¤t_config->alt_name, \ &pipe_config->name, !fastset)) { \ - pipe_config_mismatch(fastset, __stringify(name), \ + pipe_config_mismatch(fastset, crtc, __stringify(name), \ "(expected tu %i gmch %i/%i link %i/%i, " \ "or tu %i gmch %i/%i link %i/%i, " \ "found tu %i, gmch %i/%i link %i/%i)", \ @@ -12927,7 +12931,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, #define PIPE_CONF_CHECK_FLAGS(name, mask) do { \ if ((current_config->name ^ pipe_config->name) & (mask)) { \ - pipe_config_mismatch(fastset, __stringify(name), \ + pipe_config_mismatch(fastset, crtc, __stringify(name), \ "(%x) (expected %i, found %i)", \ (mask), \ current_config->name & (mask), \ @@ -12938,7 +12942,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, #define PIPE_CONF_CHECK_CLOCK_FUZZY(name) do { \ if (!intel_fuzzy_clock_check(current_config->name, pipe_config->name)) { \ - pipe_config_mismatch(fastset, __stringify(name), \ + pipe_config_mismatch(fastset, crtc, __stringify(name), \ "(expected %i, found %i)", \ current_config->name, \ pipe_config->name); \ @@ -12958,7 +12962,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, #define PIPE_CONF_CHECK_COLOR_LUT(name1, name2, bit_precision) do { \ if (current_config->name1 != pipe_config->name1) { \ - pipe_config_mismatch(fastset, __stringify(name1), \ + pipe_config_mismatch(fastset, crtc, __stringify(name1), \ "(expected %i, found %i, won't compare lut values)", \ current_config->name1, \ pipe_config->name1); \ @@ -12967,7 +12971,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, if (!intel_color_lut_equal(current_config->name2, \ pipe_config->name2, pipe_config->name1, \ bit_precision)) { \ - pipe_config_mismatch(fastset, __stringify(name2), \ + pipe_config_mismatch(fastset, crtc, __stringify(name2), \ "hw_state doesn't match sw_state"); \ ret = false; \ } \ From 7acf6c9495d075a2f05e90915d03027227b0a1d5 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 10 Oct 2019 18:09:06 -0700 Subject: [PATCH 155/159] drm/i915: prettify MST debug message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit s/?/:/ so it gets correctly colored by dmesg. Signed-off-by: Lucas De Marchi Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191011010907.103309-7-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/display/intel_dp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 3696623ce443..5eeafa45831a 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -4478,7 +4478,7 @@ intel_dp_configure_mst(struct intel_dp *intel_dp) &dp_to_dig_port(intel_dp)->base; bool sink_can_mst = intel_dp_sink_can_mst(intel_dp); - DRM_DEBUG_KMS("[ENCODER:%d:%s] MST support? port: %s, sink: %s, modparam: %s\n", + DRM_DEBUG_KMS("[ENCODER:%d:%s] MST support: port: %s, sink: %s, modparam: %s\n", encoder->base.base.id, encoder->base.name, yesno(intel_dp->can_mst), yesno(sink_can_mst), yesno(i915_modparams.enable_dp_mst)); From 8814c6d01f7e82e6be70ac04d8bca0fc90757418 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Sun, 20 Oct 2019 00:46:47 +0300 Subject: [PATCH 156/159] drm/i915/perf: fix oa config reconfiguration The current logic just reapplies the same configuration already stored into stream->oa_config instead of the newly selected one. Signed-off-by: Lionel Landwerlin Fixes: 7831e9a965ea ("drm/i915/perf: Allow dynamic reconfiguration of the OA stream") Cc: Chris Wilson Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191019214647.27866-1-lionel.g.landwerlin@intel.com --- drivers/gpu/drm/i915/i915_perf.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 54ec1c4190ac..d2ac51fe4f04 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1863,13 +1863,14 @@ get_oa_vma(struct i915_perf_stream *stream, struct i915_oa_config *oa_config) } static int emit_oa_config(struct i915_perf_stream *stream, + struct i915_oa_config *oa_config, struct intel_context *ce) { struct i915_request *rq; struct i915_vma *vma; int err; - vma = get_oa_vma(stream, stream->oa_config); + vma = get_oa_vma(stream, oa_config); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -1927,7 +1928,7 @@ static int hsw_enable_metric_set(struct i915_perf_stream *stream) intel_uncore_rmw(uncore, GEN6_UCGCTL1, 0, GEN6_CSUNIT_CLOCK_GATE_DISABLE); - return emit_oa_config(stream, oa_context(stream)); + return emit_oa_config(stream, stream->oa_config, oa_context(stream)); } static void hsw_disable_metric_set(struct i915_perf_stream *stream) @@ -2250,7 +2251,7 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream, static int gen8_enable_metric_set(struct i915_perf_stream *stream) { struct intel_uncore *uncore = stream->uncore; - const struct i915_oa_config *oa_config = stream->oa_config; + struct i915_oa_config *oa_config = stream->oa_config; int ret; /* @@ -2291,7 +2292,7 @@ static int gen8_enable_metric_set(struct i915_perf_stream *stream) if (ret) return ret; - return emit_oa_config(stream, oa_context(stream)); + return emit_oa_config(stream, oa_config, oa_context(stream)); } static void gen8_disable_metric_set(struct i915_perf_stream *stream) @@ -2895,7 +2896,7 @@ static long i915_perf_config_locked(struct i915_perf_stream *stream, * When set globally, we use a low priority kernel context, * so it will effectively take effect when idle. */ - err = emit_oa_config(stream, oa_context(stream)); + err = emit_oa_config(stream, config, oa_context(stream)); if (err == 0) config = xchg(&stream->oa_config, config); else From 0dc3c562aa95e029f40779c5c9709c7c1bdeb415 Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Sun, 20 Oct 2019 19:41:39 +0100 Subject: [PATCH 157/159] drm/i915: Extract GT ring management Although the ring management is much smaller compared to the other GT power management functions, continue the theme of extracting it out of the huge intel_pm.c for maintenance. Based on a patch by Chris Wilson. Signed-off-by: Andi Shyti Cc: Chris Wilson Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191020184139.9145-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/gt/intel_gt_types.h | 2 + drivers/gpu/drm/i915/gt/intel_llc.c | 161 ++++++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_llc.h | 15 ++ drivers/gpu/drm/i915/gt/intel_llc_types.h | 13 ++ drivers/gpu/drm/i915/gt/selftest_gt_pm.c | 9 ++ drivers/gpu/drm/i915/gt/selftest_llc.c | 77 +++++++++++ drivers/gpu/drm/i915/gt/selftest_llc.h | 14 ++ drivers/gpu/drm/i915/i915_drv.h | 5 - drivers/gpu/drm/i915/intel_pm.c | 120 +--------------- 10 files changed, 297 insertions(+), 120 deletions(-) create mode 100644 drivers/gpu/drm/i915/gt/intel_llc.c create mode 100644 drivers/gpu/drm/i915/gt/intel_llc.h create mode 100644 drivers/gpu/drm/i915/gt/intel_llc_types.h create mode 100644 drivers/gpu/drm/i915/gt/selftest_llc.c create mode 100644 drivers/gpu/drm/i915/gt/selftest_llc.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index e791d9323b51..a16a2daef977 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -87,6 +87,7 @@ gt-y += \ gt/intel_gt_pm_irq.o \ gt/intel_gt_requests.o \ gt/intel_hangcheck.o \ + gt/intel_llc.o \ gt/intel_lrc.o \ gt/intel_rc6.o \ gt/intel_renderstate.o \ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index be4b263621c8..ae4aaf75ac78 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -17,6 +17,7 @@ #include "i915_vma.h" #include "intel_engine_types.h" +#include "intel_llc_types.h" #include "intel_reset_types.h" #include "intel_rc6_types.h" #include "intel_wakeref.h" @@ -79,6 +80,7 @@ struct intel_gt { */ intel_wakeref_t awake; + struct intel_llc llc; struct intel_rc6 rc6; struct blocking_notifier_head pm_notifications; diff --git a/drivers/gpu/drm/i915/gt/intel_llc.c b/drivers/gpu/drm/i915/gt/intel_llc.c new file mode 100644 index 000000000000..35093eb5f24e --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_llc.c @@ -0,0 +1,161 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include + +#include "i915_drv.h" +#include "intel_gt.h" +#include "intel_llc.h" +#include "intel_sideband.h" + +struct ia_constants { + unsigned int min_gpu_freq; + unsigned int max_gpu_freq; + + unsigned int min_ring_freq; + unsigned int max_ia_freq; +}; + +static struct intel_gt *llc_to_gt(struct intel_llc *llc) +{ + return container_of(llc, struct intel_gt, llc); +} + +static unsigned int cpu_max_MHz(void) +{ + struct cpufreq_policy *policy; + unsigned int max_khz; + + policy = cpufreq_cpu_get(0); + if (policy) { + max_khz = policy->cpuinfo.max_freq; + cpufreq_cpu_put(policy); + } else { + /* + * Default to measured freq if none found, PCU will ensure we + * don't go over + */ + max_khz = tsc_khz; + } + + return max_khz / 1000; +} + +static bool get_ia_constants(struct intel_llc *llc, + struct ia_constants *consts) +{ + struct drm_i915_private *i915 = llc_to_gt(llc)->i915; + struct intel_rps *rps = &i915->gt_pm.rps; + + if (rps->max_freq <= rps->min_freq) + return false; + + consts->max_ia_freq = cpu_max_MHz(); + + consts->min_ring_freq = + intel_uncore_read(llc_to_gt(llc)->uncore, DCLK) & 0xf; + /* convert DDR frequency from units of 266.6MHz to bandwidth */ + consts->min_ring_freq = mult_frac(consts->min_ring_freq, 8, 3); + + consts->min_gpu_freq = rps->min_freq; + consts->max_gpu_freq = rps->max_freq; + if (INTEL_GEN(i915) >= 9) { + /* Convert GT frequency to 50 HZ units */ + consts->min_gpu_freq /= GEN9_FREQ_SCALER; + consts->max_gpu_freq /= GEN9_FREQ_SCALER; + } + + return true; +} + +static void calc_ia_freq(struct intel_llc *llc, + unsigned int gpu_freq, + const struct ia_constants *consts, + unsigned int *out_ia_freq, + unsigned int *out_ring_freq) +{ + struct drm_i915_private *i915 = llc_to_gt(llc)->i915; + const int diff = consts->max_gpu_freq - gpu_freq; + unsigned int ia_freq = 0, ring_freq = 0; + + if (INTEL_GEN(i915) >= 9) { + /* + * ring_freq = 2 * GT. ring_freq is in 100MHz units + * No floor required for ring frequency on SKL. + */ + ring_freq = gpu_freq; + } else if (INTEL_GEN(i915) >= 8) { + /* max(2 * GT, DDR). NB: GT is 50MHz units */ + ring_freq = max(consts->min_ring_freq, gpu_freq); + } else if (IS_HASWELL(i915)) { + ring_freq = mult_frac(gpu_freq, 5, 4); + ring_freq = max(consts->min_ring_freq, ring_freq); + /* leave ia_freq as the default, chosen by cpufreq */ + } else { + const int min_freq = 15; + const int scale = 180; + + /* + * On older processors, there is no separate ring + * clock domain, so in order to boost the bandwidth + * of the ring, we need to upclock the CPU (ia_freq). + * + * For GPU frequencies less than 750MHz, + * just use the lowest ring freq. + */ + if (gpu_freq < min_freq) + ia_freq = 800; + else + ia_freq = consts->max_ia_freq - diff * scale / 2; + ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100); + } + + *out_ia_freq = ia_freq; + *out_ring_freq = ring_freq; +} + +static void gen6_update_ring_freq(struct intel_llc *llc) +{ + struct drm_i915_private *i915 = llc_to_gt(llc)->i915; + struct ia_constants consts; + unsigned int gpu_freq; + + if (!get_ia_constants(llc, &consts)) + return; + + /* + * For each potential GPU frequency, load a ring frequency we'd like + * to use for memory access. We do this by specifying the IA frequency + * the PCU should use as a reference to determine the ring frequency. + */ + for (gpu_freq = consts.max_gpu_freq; + gpu_freq >= consts.min_gpu_freq; + gpu_freq--) { + unsigned int ia_freq, ring_freq; + + calc_ia_freq(llc, gpu_freq, &consts, &ia_freq, &ring_freq); + sandybridge_pcode_write(i915, + GEN6_PCODE_WRITE_MIN_FREQ_TABLE, + ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT | + ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT | + gpu_freq); + } +} + +void intel_llc_enable(struct intel_llc *llc) +{ + if (HAS_LLC(llc_to_gt(llc)->i915)) + gen6_update_ring_freq(llc); +} + +void intel_llc_disable(struct intel_llc *llc) +{ + /* Currently there is no HW configuration to be done to disable. */ +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_llc.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_llc.h b/drivers/gpu/drm/i915/gt/intel_llc.h new file mode 100644 index 000000000000..ef09a890d2b7 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_llc.h @@ -0,0 +1,15 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_LLC_H +#define INTEL_LLC_H + +struct intel_llc; + +void intel_llc_enable(struct intel_llc *llc); +void intel_llc_disable(struct intel_llc *llc); + +#endif /* INTEL_LLC_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_llc_types.h b/drivers/gpu/drm/i915/gt/intel_llc_types.h new file mode 100644 index 000000000000..ecad4687b930 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_llc_types.h @@ -0,0 +1,13 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_LLC_TYPES_H +#define INTEL_LLC_TYPES_H + +struct intel_llc { +}; + +#endif /* INTEL_LLC_TYPES_H */ diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c index 87985bd46423..5d429037cdad 100644 --- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c @@ -5,6 +5,8 @@ * Copyright © 2019 Intel Corporation */ +#include "selftest_llc.h" + static int live_gt_resume(void *arg) { struct intel_gt *gt = arg; @@ -32,6 +34,13 @@ static int live_gt_resume(void *arg) err = -EINVAL; break; } + + err = st_llc_verify(>->llc); + if (err) { + pr_err("llc state not restored upon resume!\n"); + intel_gt_set_wedged_on_init(gt); + break; + } } while (!__igt_timeout(end_time, NULL)); return err; diff --git a/drivers/gpu/drm/i915/gt/selftest_llc.c b/drivers/gpu/drm/i915/gt/selftest_llc.c new file mode 100644 index 000000000000..a7057785e420 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_llc.c @@ -0,0 +1,77 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "intel_pm.h" /* intel_gpu_freq() */ +#include "selftest_llc.h" + +static int gen6_verify_ring_freq(struct intel_llc *llc) +{ + struct drm_i915_private *i915 = llc_to_gt(llc)->i915; + struct ia_constants consts; + intel_wakeref_t wakeref; + unsigned int gpu_freq; + int err = 0; + + wakeref = intel_runtime_pm_get(llc_to_gt(llc)->uncore->rpm); + + if (!get_ia_constants(llc, &consts)) { + err = -ENODEV; + goto out_rpm; + } + + for (gpu_freq = consts.min_gpu_freq; + gpu_freq <= consts.max_gpu_freq; + gpu_freq++) { + unsigned int ia_freq, ring_freq, found; + u32 val; + + calc_ia_freq(llc, gpu_freq, &consts, &ia_freq, &ring_freq); + + val = gpu_freq; + if (sandybridge_pcode_read(i915, + GEN6_PCODE_READ_MIN_FREQ_TABLE, + &val, NULL)) { + pr_err("Failed to read freq table[%d], range [%d, %d]\n", + gpu_freq, consts.min_gpu_freq, consts.max_gpu_freq); + err = -ENXIO; + break; + } + + found = (val >> 0) & 0xff; + if (found != ia_freq) { + pr_err("Min freq table(%d/[%d, %d]):%dMHz did not match expected CPU freq, found %d, expected %d\n", + gpu_freq, consts.min_gpu_freq, consts.max_gpu_freq, + intel_gpu_freq(i915, gpu_freq * (INTEL_GEN(i915) >= 9 ? GEN9_FREQ_SCALER : 1)), + found, ia_freq); + err = -EINVAL; + break; + } + + found = (val >> 8) & 0xff; + if (found != ring_freq) { + pr_err("Min freq table(%d/[%d, %d]):%dMHz did not match expected ring freq, found %d, expected %d\n", + gpu_freq, consts.min_gpu_freq, consts.max_gpu_freq, + intel_gpu_freq(i915, gpu_freq * (INTEL_GEN(i915) >= 9 ? GEN9_FREQ_SCALER : 1)), + found, ring_freq); + err = -EINVAL; + break; + } + } + +out_rpm: + intel_runtime_pm_put(llc_to_gt(llc)->uncore->rpm, wakeref); + return err; +} + +int st_llc_verify(struct intel_llc *llc) +{ + int err = 0; + + if (HAS_LLC(llc_to_gt(llc)->i915)) + err = gen6_verify_ring_freq(llc); + + return err; +} diff --git a/drivers/gpu/drm/i915/gt/selftest_llc.h b/drivers/gpu/drm/i915/gt/selftest_llc.h new file mode 100644 index 000000000000..873f896e72f2 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_llc.h @@ -0,0 +1,14 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef SELFTEST_LLC_H +#define SELFTEST_LLC_H + +struct intel_llc; + +int st_llc_verify(struct intel_llc *llc); + +#endif /* SELFTEST_LLC_H */ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index db3c3a025a03..16b85c7f7f21 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -605,13 +605,8 @@ struct intel_rps { struct intel_rps_ei ei; }; -struct intel_llc_pstate { - bool enabled; -}; - struct intel_gen6_power_mgmt { struct intel_rps rps; - struct intel_llc_pstate llc_pstate; }; /* defined intel_pm.c */ diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 2b71d52a4ede..362234449087 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -25,7 +25,6 @@ * */ -#include #include #include @@ -38,6 +37,8 @@ #include "display/intel_fbc.h" #include "display/intel_sprite.h" +#include "gt/intel_llc.h" + #include "i915_drv.h" #include "i915_irq.h" #include "i915_trace.h" @@ -7030,93 +7031,6 @@ static void gen6_enable_rps(struct drm_i915_private *dev_priv) intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); } -static void gen6_update_ring_freq(struct drm_i915_private *dev_priv) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - const int min_freq = 15; - const int scaling_factor = 180; - unsigned int gpu_freq; - unsigned int max_ia_freq, min_ring_freq; - unsigned int max_gpu_freq, min_gpu_freq; - struct cpufreq_policy *policy; - - lockdep_assert_held(&rps->lock); - - if (rps->max_freq <= rps->min_freq) - return; - - policy = cpufreq_cpu_get(0); - if (policy) { - max_ia_freq = policy->cpuinfo.max_freq; - cpufreq_cpu_put(policy); - } else { - /* - * Default to measured freq if none found, PCU will ensure we - * don't go over - */ - max_ia_freq = tsc_khz; - } - - /* Convert from kHz to MHz */ - max_ia_freq /= 1000; - - min_ring_freq = I915_READ(DCLK) & 0xf; - /* convert DDR frequency from units of 266.6MHz to bandwidth */ - min_ring_freq = mult_frac(min_ring_freq, 8, 3); - - min_gpu_freq = rps->min_freq; - max_gpu_freq = rps->max_freq; - if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) { - /* Convert GT frequency to 50 HZ units */ - min_gpu_freq /= GEN9_FREQ_SCALER; - max_gpu_freq /= GEN9_FREQ_SCALER; - } - - /* - * For each potential GPU frequency, load a ring frequency we'd like - * to use for memory access. We do this by specifying the IA frequency - * the PCU should use as a reference to determine the ring frequency. - */ - for (gpu_freq = max_gpu_freq; gpu_freq >= min_gpu_freq; gpu_freq--) { - const int diff = max_gpu_freq - gpu_freq; - unsigned int ia_freq = 0, ring_freq = 0; - - if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) { - /* - * ring_freq = 2 * GT. ring_freq is in 100MHz units - * No floor required for ring frequency on SKL. - */ - ring_freq = gpu_freq; - } else if (INTEL_GEN(dev_priv) >= 8) { - /* max(2 * GT, DDR). NB: GT is 50MHz units */ - ring_freq = max(min_ring_freq, gpu_freq); - } else if (IS_HASWELL(dev_priv)) { - ring_freq = mult_frac(gpu_freq, 5, 4); - ring_freq = max(min_ring_freq, ring_freq); - /* leave ia_freq as the default, chosen by cpufreq */ - } else { - /* On older processors, there is no separate ring - * clock domain, so in order to boost the bandwidth - * of the ring, we need to upclock the CPU (ia_freq). - * - * For GPU frequencies less than 750MHz, - * just use the lowest ring freq. - */ - if (gpu_freq < min_freq) - ia_freq = 800; - else - ia_freq = max_ia_freq - ((diff * scaling_factor) / 2); - ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100); - } - - sandybridge_pcode_write(dev_priv, - GEN6_PCODE_WRITE_MIN_FREQ_TABLE, - ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT | - ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT | - gpu_freq); - } -} - static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv) { u32 val, rp0; @@ -7965,18 +7879,6 @@ void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv) gen6_reset_rps_interrupts(dev_priv); } -static inline void intel_disable_llc_pstate(struct drm_i915_private *i915) -{ - lockdep_assert_held(&i915->gt_pm.rps.lock); - - if (!i915->gt_pm.llc_pstate.enabled) - return; - - /* Currently there is no HW configuration to be done to disable. */ - - i915->gt_pm.llc_pstate.enabled = false; -} - static void intel_disable_rps(struct drm_i915_private *dev_priv) { lockdep_assert_held(&dev_priv->gt_pm.rps.lock); @@ -8004,23 +7906,11 @@ void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) intel_disable_rps(dev_priv); if (HAS_LLC(dev_priv)) - intel_disable_llc_pstate(dev_priv); + intel_llc_disable(&dev_priv->gt.llc); mutex_unlock(&dev_priv->gt_pm.rps.lock); } -static inline void intel_enable_llc_pstate(struct drm_i915_private *i915) -{ - lockdep_assert_held(&i915->gt_pm.rps.lock); - - if (i915->gt_pm.llc_pstate.enabled) - return; - - gen6_update_ring_freq(i915); - - i915->gt_pm.llc_pstate.enabled = true; -} - static void intel_enable_rps(struct drm_i915_private *dev_priv) { struct intel_rps *rps = &dev_priv->gt_pm.rps; @@ -8064,8 +7954,8 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) if (HAS_RPS(dev_priv)) intel_enable_rps(dev_priv); - if (HAS_LLC(dev_priv)) - intel_enable_llc_pstate(dev_priv); + + intel_llc_enable(&dev_priv->gt.llc); mutex_unlock(&dev_priv->gt_pm.rps.lock); } From 952f89f098c75ae4147fc440aaa3b9a209240cea Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 16 Oct 2019 19:39:01 +0100 Subject: [PATCH 158/159] drm/i915/gvt: Wean off struct_mutex Use the local vgpu_lock while preparing workloads to avoid taking the obsolete i915->drm.struct_mutex Signed-off-by: Chris Wilson Reviewed-by: Zhenyu Wang Link: https://patchwork.freedesktop.org/patch/msgid/20191016183902.13614-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gvt/scheduler.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 9ebb2534558b..36bb7639e82f 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -415,10 +415,9 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; struct intel_vgpu_submission *s = &vgpu->submission; - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; int ret; - lockdep_assert_held(&dev_priv->drm.struct_mutex); + lockdep_assert_held(&vgpu->vgpu_lock); if (workload->shadow) return 0; @@ -580,8 +579,6 @@ static void update_vreg_in_ctx(struct intel_vgpu_workload *workload) static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload) { - struct intel_vgpu *vgpu = workload->vgpu; - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; struct intel_vgpu_shadow_bb *bb, *pos; if (list_empty(&workload->shadow_bb)) @@ -590,8 +587,6 @@ static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload) bb = list_first_entry(&workload->shadow_bb, struct intel_vgpu_shadow_bb, list); - mutex_lock(&dev_priv->drm.struct_mutex); - list_for_each_entry_safe(bb, pos, &workload->shadow_bb, list) { if (bb->obj) { if (bb->accessing) @@ -609,8 +604,6 @@ static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload) list_del(&bb->list); kfree(bb); } - - mutex_unlock(&dev_priv->drm.struct_mutex); } static int prepare_workload(struct intel_vgpu_workload *workload) @@ -685,7 +678,6 @@ static int prepare_workload(struct intel_vgpu_workload *workload) static int dispatch_workload(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; struct i915_request *rq; int ring_id = workload->ring_id; int ret; @@ -694,7 +686,6 @@ static int dispatch_workload(struct intel_vgpu_workload *workload) ring_id, workload); mutex_lock(&vgpu->vgpu_lock); - mutex_lock(&dev_priv->drm.struct_mutex); ret = intel_gvt_workload_req_alloc(workload); if (ret) @@ -729,7 +720,6 @@ static int dispatch_workload(struct intel_vgpu_workload *workload) err_req: if (ret) workload->status = ret; - mutex_unlock(&dev_priv->drm.struct_mutex); mutex_unlock(&vgpu->vgpu_lock); return ret; } @@ -1594,9 +1584,9 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id, */ if (list_empty(workload_q_head(vgpu, ring_id))) { intel_runtime_pm_get(&dev_priv->runtime_pm); - mutex_lock(&dev_priv->drm.struct_mutex); + mutex_lock(&vgpu->vgpu_lock); ret = intel_gvt_scan_and_shadow_workload(workload); - mutex_unlock(&dev_priv->drm.struct_mutex); + mutex_unlock(&vgpu->vgpu_lock); intel_runtime_pm_put_unchecked(&dev_priv->runtime_pm); } From ce53908bba6fa6e905d8fe81da4591d3e7a65878 Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Mon, 21 Oct 2019 12:56:07 +0300 Subject: [PATCH 159/159] drm/i915: Update DRIVER_DATE to 20191021 Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_drv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 16b85c7f7f21..8882c0908c3b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -108,8 +108,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20191007" -#define DRIVER_TIMESTAMP 1570451087 +#define DRIVER_DATE "20191021" +#define DRIVER_TIMESTAMP 1571651766 struct drm_i915_gem_object;