0455-drm-vc4-Implement-precise-vblank-timestamping.patch 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358
  1. From 50cb4c343d766b0a3efa441a2c62fb890f0b3e45 Mon Sep 17 00:00:00 2001
  2. From: Mario Kleiner <mario.kleiner.de@gmail.com>
  3. Date: Thu, 23 Jun 2016 08:17:50 +0200
  4. Subject: [PATCH] drm/vc4: Implement precise vblank timestamping.
  5. Precise vblank timestamping is implemented via the
  6. usual scanout position based method. On VC4 the
  7. pixelvalves PV do not have a scanout position
  8. register. Only the hardware video scaler HVS has a
  9. similar register which describes which scanline for
  10. the output is currently composited and stored in the
  11. HVS fifo for later consumption by the PV.
  12. This causes a problem in that the HVS runs at a much
  13. faster clock (system clock / audio gate) than the PV
  14. which runs at video mode dot clock, so the unless the
  15. fifo between HVS and PV is full, the HVS will progress
  16. faster in its observable read line position than video
  17. scan rate, so the HVS position reading can't be directly
  18. translated into a scanout position for timestamp correction.
  19. Additionally when the PV is in vblank, it doesn't consume
  20. from the fifo, so the fifo gets full very quickly and then
  21. the HVS stops compositing until the PV enters active scanout
  22. and starts consuming scanlines from the fifo again, making
  23. new space for the HVS to composite.
  24. Therefore a simple translation of HVS read position into
  25. elapsed time since (or to) start of active scanout does
  26. not work, but for the most interesting cases we can still
  27. get useful and sufficiently accurate results:
  28. 1. The PV enters active scanout of a new frame with the
  29. fifo of the HVS completely full, and the HVS can refill
  30. any fifo line which gets consumed and thereby freed up by
  31. the PV during active scanout very quickly. Therefore the
  32. PV and HVS work effectively in lock-step during active
  33. scanout with the fifo never having more than 1 scanline
  34. freed up by the PV before it gets refilled. The PV's
  35. real scanout position is therefore trailing the HVS
  36. compositing position as scanoutpos = hvspos - fifosize
  37. and we can get the true scanoutpos as HVS readpos minus
  38. fifo size, so precise timestamping works while in active
  39. scanout, except for the last few scanlines of the frame,
  40. when the HVS reaches end of frame, stops compositing and
  41. the PV catches up and drains the fifo. This special case
  42. would only introduce minor errors though.
  43. 2. If we are in vblank, then we can only guess something
  44. reasonable. If called from vblank irq, we assume the irq is
  45. usually dispatched with minimum delay, so we can take a
  46. timestamp taken at entry into the vblank irq handler as a
  47. baseline and then add a full vblank duration until the
  48. guessed start of active scanout. As irq dispatch is usually
  49. pretty low latency this works with relatively low jitter and
  50. good results.
  51. If we aren't called from vblank then we could be anywhere
  52. within the vblank interval, so we return a neutral result,
  53. simply the current system timestamp, and hope for the best.
  54. Measurement shows the generated timestamps to be rather precise,
  55. and at least never off more than 1 vblank duration worst-case.
  56. Limitations: Doesn't work well yet for interlaced video modes,
  57. therefore disabled in interlaced mode for now.
  58. v2: Use the DISPBASE registers to determine the FIFO size (changes
  59. by anholt)
  60. Signed-off-by: Mario Kleiner <mario.kleiner.de@gmail.com>
  61. Signed-off-by: Eric Anholt <eric@anholt.net>
  62. Reviewed-and-tested-by: Mario Kleiner <mario.kleiner.de@gmail.com> (v2)
  63. (cherry picked from commit 1bf59f1dcbe25272f6b5d870054647e58a8a9c55)
  64. ---
  65. drivers/gpu/drm/vc4/vc4_crtc.c | 162 +++++++++++++++++++++++++++++++++++++++++
  66. drivers/gpu/drm/vc4/vc4_drv.c | 2 +
  67. drivers/gpu/drm/vc4/vc4_drv.h | 7 ++
  68. drivers/gpu/drm/vc4/vc4_regs.h | 22 +++++-
  69. 4 files changed, 192 insertions(+), 1 deletion(-)
  70. --- a/drivers/gpu/drm/vc4/vc4_crtc.c
  71. +++ b/drivers/gpu/drm/vc4/vc4_crtc.c
  72. @@ -47,12 +47,17 @@ struct vc4_crtc {
  73. const struct vc4_crtc_data *data;
  74. void __iomem *regs;
  75. + /* Timestamp at start of vblank irq - unaffected by lock delays. */
  76. + ktime_t t_vblank;
  77. +
  78. /* Which HVS channel we're using for our CRTC. */
  79. int channel;
  80. u8 lut_r[256];
  81. u8 lut_g[256];
  82. u8 lut_b[256];
  83. + /* Size in pixels of the COB memory allocated to this CRTC. */
  84. + u32 cob_size;
  85. struct drm_pending_vblank_event *event;
  86. };
  87. @@ -134,6 +139,144 @@ int vc4_crtc_debugfs_regs(struct seq_fil
  88. }
  89. #endif
  90. +int vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id,
  91. + unsigned int flags, int *vpos, int *hpos,
  92. + ktime_t *stime, ktime_t *etime,
  93. + const struct drm_display_mode *mode)
  94. +{
  95. + struct vc4_dev *vc4 = to_vc4_dev(dev);
  96. + struct vc4_crtc *vc4_crtc = vc4->crtc[crtc_id];
  97. + u32 val;
  98. + int fifo_lines;
  99. + int vblank_lines;
  100. + int ret = 0;
  101. +
  102. + /*
  103. + * XXX Doesn't work well in interlaced mode yet, partially due
  104. + * to problems in vc4 kms or drm core interlaced mode handling,
  105. + * so disable for now in interlaced mode.
  106. + */
  107. + if (mode->flags & DRM_MODE_FLAG_INTERLACE)
  108. + return ret;
  109. +
  110. + /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
  111. +
  112. + /* Get optional system timestamp before query. */
  113. + if (stime)
  114. + *stime = ktime_get();
  115. +
  116. + /*
  117. + * Read vertical scanline which is currently composed for our
  118. + * pixelvalve by the HVS, and also the scaler status.
  119. + */
  120. + val = HVS_READ(SCALER_DISPSTATX(vc4_crtc->channel));
  121. +
  122. + /* Get optional system timestamp after query. */
  123. + if (etime)
  124. + *etime = ktime_get();
  125. +
  126. + /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
  127. +
  128. + /* Vertical position of hvs composed scanline. */
  129. + *vpos = VC4_GET_FIELD(val, SCALER_DISPSTATX_LINE);
  130. +
  131. + /* No hpos info available. */
  132. + if (hpos)
  133. + *hpos = 0;
  134. +
  135. + /* This is the offset we need for translating hvs -> pv scanout pos. */
  136. + fifo_lines = vc4_crtc->cob_size / mode->crtc_hdisplay;
  137. +
  138. + if (fifo_lines > 0)
  139. + ret |= DRM_SCANOUTPOS_VALID;
  140. +
  141. + /* HVS more than fifo_lines into frame for compositing? */
  142. + if (*vpos > fifo_lines) {
  143. + /*
  144. + * We are in active scanout and can get some meaningful results
  145. + * from HVS. The actual PV scanout can not trail behind more
  146. + * than fifo_lines as that is the fifo's capacity. Assume that
  147. + * in active scanout the HVS and PV work in lockstep wrt. HVS
  148. + * refilling the fifo and PV consuming from the fifo, ie.
  149. + * whenever the PV consumes and frees up a scanline in the
  150. + * fifo, the HVS will immediately refill it, therefore
  151. + * incrementing vpos. Therefore we choose HVS read position -
  152. + * fifo size in scanlines as a estimate of the real scanout
  153. + * position of the PV.
  154. + */
  155. + *vpos -= fifo_lines + 1;
  156. + if (mode->flags & DRM_MODE_FLAG_INTERLACE)
  157. + *vpos /= 2;
  158. +
  159. + ret |= DRM_SCANOUTPOS_ACCURATE;
  160. + return ret;
  161. + }
  162. +
  163. + /*
  164. + * Less: This happens when we are in vblank and the HVS, after getting
  165. + * the VSTART restart signal from the PV, just started refilling its
  166. + * fifo with new lines from the top-most lines of the new framebuffers.
  167. + * The PV does not scan out in vblank, so does not remove lines from
  168. + * the fifo, so the fifo will be full quickly and the HVS has to pause.
  169. + * We can't get meaningful readings wrt. scanline position of the PV
  170. + * and need to make things up in a approximative but consistent way.
  171. + */
  172. + ret |= DRM_SCANOUTPOS_IN_VBLANK;
  173. + vblank_lines = mode->crtc_vtotal - mode->crtc_vdisplay;
  174. +
  175. + if (flags & DRM_CALLED_FROM_VBLIRQ) {
  176. + /*
  177. + * Assume the irq handler got called close to first
  178. + * line of vblank, so PV has about a full vblank
  179. + * scanlines to go, and as a base timestamp use the
  180. + * one taken at entry into vblank irq handler, so it
  181. + * is not affected by random delays due to lock
  182. + * contention on event_lock or vblank_time lock in
  183. + * the core.
  184. + */
  185. + *vpos = -vblank_lines;
  186. +
  187. + if (stime)
  188. + *stime = vc4_crtc->t_vblank;
  189. + if (etime)
  190. + *etime = vc4_crtc->t_vblank;
  191. +
  192. + /*
  193. + * If the HVS fifo is not yet full then we know for certain
  194. + * we are at the very beginning of vblank, as the hvs just
  195. + * started refilling, and the stime and etime timestamps
  196. + * truly correspond to start of vblank.
  197. + */
  198. + if ((val & SCALER_DISPSTATX_FULL) != SCALER_DISPSTATX_FULL)
  199. + ret |= DRM_SCANOUTPOS_ACCURATE;
  200. + } else {
  201. + /*
  202. + * No clue where we are inside vblank. Return a vpos of zero,
  203. + * which will cause calling code to just return the etime
  204. + * timestamp uncorrected. At least this is no worse than the
  205. + * standard fallback.
  206. + */
  207. + *vpos = 0;
  208. + }
  209. +
  210. + return ret;
  211. +}
  212. +
  213. +int vc4_crtc_get_vblank_timestamp(struct drm_device *dev, unsigned int crtc_id,
  214. + int *max_error, struct timeval *vblank_time,
  215. + unsigned flags)
  216. +{
  217. + struct vc4_dev *vc4 = to_vc4_dev(dev);
  218. + struct vc4_crtc *vc4_crtc = vc4->crtc[crtc_id];
  219. + struct drm_crtc *crtc = &vc4_crtc->base;
  220. + struct drm_crtc_state *state = crtc->state;
  221. +
  222. + /* Helper routine in DRM core does all the work: */
  223. + return drm_calc_vbltimestamp_from_scanoutpos(dev, crtc_id, max_error,
  224. + vblank_time, flags,
  225. + &state->adjusted_mode);
  226. +}
  227. +
  228. static void vc4_crtc_destroy(struct drm_crtc *crtc)
  229. {
  230. drm_crtc_cleanup(crtc);
  231. @@ -535,6 +678,7 @@ static irqreturn_t vc4_crtc_irq_handler(
  232. irqreturn_t ret = IRQ_NONE;
  233. if (stat & PV_INT_VFP_START) {
  234. + vc4_crtc->t_vblank = ktime_get();
  235. CRTC_WRITE(PV_INTSTAT, PV_INT_VFP_START);
  236. drm_crtc_handle_vblank(&vc4_crtc->base);
  237. vc4_crtc_handle_page_flip(vc4_crtc);
  238. @@ -759,6 +903,22 @@ static void vc4_set_crtc_possible_masks(
  239. }
  240. }
  241. +static void
  242. +vc4_crtc_get_cob_allocation(struct vc4_crtc *vc4_crtc)
  243. +{
  244. + struct drm_device *drm = vc4_crtc->base.dev;
  245. + struct vc4_dev *vc4 = to_vc4_dev(drm);
  246. + u32 dispbase = HVS_READ(SCALER_DISPBASEX(vc4_crtc->channel));
  247. + /* Top/base are supposed to be 4-pixel aligned, but the
  248. + * Raspberry Pi firmware fills the low bits (which are
  249. + * presumably ignored).
  250. + */
  251. + u32 top = VC4_GET_FIELD(dispbase, SCALER_DISPBASEX_TOP) & ~3;
  252. + u32 base = VC4_GET_FIELD(dispbase, SCALER_DISPBASEX_BASE) & ~3;
  253. +
  254. + vc4_crtc->cob_size = top - base + 4;
  255. +}
  256. +
  257. static int vc4_crtc_bind(struct device *dev, struct device *master, void *data)
  258. {
  259. struct platform_device *pdev = to_platform_device(dev);
  260. @@ -835,6 +995,8 @@ static int vc4_crtc_bind(struct device *
  261. crtc->cursor = cursor_plane;
  262. }
  263. + vc4_crtc_get_cob_allocation(vc4_crtc);
  264. +
  265. CRTC_WRITE(PV_INTEN, 0);
  266. CRTC_WRITE(PV_INTSTAT, PV_INT_VFP_START);
  267. ret = devm_request_irq(dev, platform_get_irq(pdev, 0),
  268. --- a/drivers/gpu/drm/vc4/vc4_drv.c
  269. +++ b/drivers/gpu/drm/vc4/vc4_drv.c
  270. @@ -116,6 +116,8 @@ static struct drm_driver vc4_drm_driver
  271. .enable_vblank = vc4_enable_vblank,
  272. .disable_vblank = vc4_disable_vblank,
  273. .get_vblank_counter = drm_vblank_no_hw_counter,
  274. + .get_scanout_position = vc4_crtc_get_scanoutpos,
  275. + .get_vblank_timestamp = vc4_crtc_get_vblank_timestamp,
  276. #if defined(CONFIG_DEBUG_FS)
  277. .debugfs_init = vc4_debugfs_init,
  278. --- a/drivers/gpu/drm/vc4/vc4_drv.h
  279. +++ b/drivers/gpu/drm/vc4/vc4_drv.h
  280. @@ -419,6 +419,13 @@ int vc4_enable_vblank(struct drm_device
  281. void vc4_disable_vblank(struct drm_device *dev, unsigned int crtc_id);
  282. void vc4_cancel_page_flip(struct drm_crtc *crtc, struct drm_file *file);
  283. int vc4_crtc_debugfs_regs(struct seq_file *m, void *arg);
  284. +int vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id,
  285. + unsigned int flags, int *vpos, int *hpos,
  286. + ktime_t *stime, ktime_t *etime,
  287. + const struct drm_display_mode *mode);
  288. +int vc4_crtc_get_vblank_timestamp(struct drm_device *dev, unsigned int crtc_id,
  289. + int *max_error, struct timeval *vblank_time,
  290. + unsigned flags);
  291. /* vc4_debugfs.c */
  292. int vc4_debugfs_init(struct drm_minor *minor);
  293. --- a/drivers/gpu/drm/vc4/vc4_regs.h
  294. +++ b/drivers/gpu/drm/vc4/vc4_regs.h
  295. @@ -368,7 +368,6 @@
  296. # define SCALER_DISPBKGND_FILL BIT(24)
  297. #define SCALER_DISPSTAT0 0x00000048
  298. -#define SCALER_DISPBASE0 0x0000004c
  299. # define SCALER_DISPSTATX_MODE_MASK VC4_MASK(31, 30)
  300. # define SCALER_DISPSTATX_MODE_SHIFT 30
  301. # define SCALER_DISPSTATX_MODE_DISABLED 0
  302. @@ -377,6 +376,24 @@
  303. # define SCALER_DISPSTATX_MODE_EOF 3
  304. # define SCALER_DISPSTATX_FULL BIT(29)
  305. # define SCALER_DISPSTATX_EMPTY BIT(28)
  306. +# define SCALER_DISPSTATX_FRAME_COUNT_MASK VC4_MASK(17, 12)
  307. +# define SCALER_DISPSTATX_FRAME_COUNT_SHIFT 12
  308. +# define SCALER_DISPSTATX_LINE_MASK VC4_MASK(11, 0)
  309. +# define SCALER_DISPSTATX_LINE_SHIFT 0
  310. +
  311. +#define SCALER_DISPBASE0 0x0000004c
  312. +/* Last pixel in the COB (display FIFO memory) allocated to this HVS
  313. + * channel. Must be 4-pixel aligned (and thus 4 pixels less than the
  314. + * next COB base).
  315. + */
  316. +# define SCALER_DISPBASEX_TOP_MASK VC4_MASK(31, 16)
  317. +# define SCALER_DISPBASEX_TOP_SHIFT 16
  318. +/* First pixel in the COB (display FIFO memory) allocated to this HVS
  319. + * channel. Must be 4-pixel aligned.
  320. + */
  321. +# define SCALER_DISPBASEX_BASE_MASK VC4_MASK(15, 0)
  322. +# define SCALER_DISPBASEX_BASE_SHIFT 0
  323. +
  324. #define SCALER_DISPCTRL1 0x00000050
  325. #define SCALER_DISPBKGND1 0x00000054
  326. #define SCALER_DISPBKGNDX(x) (SCALER_DISPBKGND0 + \
  327. @@ -387,6 +404,9 @@
  328. (x) * (SCALER_DISPSTAT1 - \
  329. SCALER_DISPSTAT0))
  330. #define SCALER_DISPBASE1 0x0000005c
  331. +#define SCALER_DISPBASEX(x) (SCALER_DISPBASE0 + \
  332. + (x) * (SCALER_DISPBASE1 - \
  333. + SCALER_DISPBASE0))
  334. #define SCALER_DISPCTRL2 0x00000060
  335. #define SCALER_DISPCTRLX(x) (SCALER_DISPCTRL0 + \
  336. (x) * (SCALER_DISPCTRL1 - \