0477-drm-vc4-Add-a-bitmap-of-branch-targets-during-shader.patch 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200
  1. From a2be1b8d84ab4ad9a1721fd67824f1e164d5862b Mon Sep 17 00:00:00 2001
  2. From: Eric Anholt <eric@anholt.net>
  3. Date: Sat, 2 Jul 2016 10:10:24 -0700
  4. Subject: [PATCH] drm/vc4: Add a bitmap of branch targets during shader
  5. validation.
  6. This isn't used yet, it's just a first step toward loop validation.
  7. During the main parsing of instructions, we need to know when we hit a
  8. new basic block so that we can reset validated state.
  9. v2: Fix a stray semicolon after an if block. (caught by kbuild test).
  10. Signed-off-by: Eric Anholt <eric@anholt.net>
  11. (cherry picked from commit 93aa9ae3e5523e49e4e5abacd4dbee0e4ab2d931)
  12. ---
  13. drivers/gpu/drm/vc4/vc4_qpu_defines.h | 12 +++
  14. drivers/gpu/drm/vc4/vc4_validate_shaders.c | 114 ++++++++++++++++++++++++++++-
  15. 2 files changed, 124 insertions(+), 2 deletions(-)
  16. --- a/drivers/gpu/drm/vc4/vc4_qpu_defines.h
  17. +++ b/drivers/gpu/drm/vc4/vc4_qpu_defines.h
  18. @@ -230,6 +230,15 @@ enum qpu_unpack_r4 {
  19. #define QPU_COND_MUL_SHIFT 46
  20. #define QPU_COND_MUL_MASK QPU_MASK(48, 46)
  21. +#define QPU_BRANCH_COND_SHIFT 52
  22. +#define QPU_BRANCH_COND_MASK QPU_MASK(55, 52)
  23. +
  24. +#define QPU_BRANCH_REL ((uint64_t)1 << 51)
  25. +#define QPU_BRANCH_REG ((uint64_t)1 << 50)
  26. +
  27. +#define QPU_BRANCH_RADDR_A_SHIFT 45
  28. +#define QPU_BRANCH_RADDR_A_MASK QPU_MASK(49, 45)
  29. +
  30. #define QPU_SF ((uint64_t)1 << 45)
  31. #define QPU_WADDR_ADD_SHIFT 38
  32. @@ -261,4 +270,7 @@ enum qpu_unpack_r4 {
  33. #define QPU_OP_ADD_SHIFT 24
  34. #define QPU_OP_ADD_MASK QPU_MASK(28, 24)
  35. +#define QPU_BRANCH_TARGET_SHIFT 0
  36. +#define QPU_BRANCH_TARGET_MASK QPU_MASK(31, 0)
  37. +
  38. #endif /* VC4_QPU_DEFINES_H */
  39. --- a/drivers/gpu/drm/vc4/vc4_validate_shaders.c
  40. +++ b/drivers/gpu/drm/vc4/vc4_validate_shaders.c
  41. @@ -59,6 +59,13 @@ struct vc4_shader_validation_state {
  42. */
  43. uint32_t live_min_clamp_offsets[32 + 32 + 4];
  44. bool live_max_clamp_regs[32 + 32 + 4];
  45. +
  46. + /* Bitfield of which IPs are used as branch targets.
  47. + *
  48. + * Used for validation that the uniform stream is updated at the right
  49. + * points and clearing the texturing/clamping state.
  50. + */
  51. + unsigned long *branch_targets;
  52. };
  53. static uint32_t
  54. @@ -418,13 +425,104 @@ check_instruction_reads(uint64_t inst,
  55. return true;
  56. }
  57. +/* Make sure that all branches are absolute and point within the shader, and
  58. + * note their targets for later.
  59. + */
  60. +static bool
  61. +vc4_validate_branches(struct vc4_shader_validation_state *validation_state)
  62. +{
  63. + uint32_t max_branch_target = 0;
  64. + bool found_shader_end = false;
  65. + int ip;
  66. + int shader_end_ip = 0;
  67. + int last_branch = -2;
  68. +
  69. + for (ip = 0; ip < validation_state->max_ip; ip++) {
  70. + uint64_t inst = validation_state->shader[ip];
  71. + int32_t branch_imm = QPU_GET_FIELD(inst, QPU_BRANCH_TARGET);
  72. + uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
  73. + uint32_t after_delay_ip = ip + 4;
  74. + uint32_t branch_target_ip;
  75. +
  76. + if (sig == QPU_SIG_PROG_END) {
  77. + shader_end_ip = ip;
  78. + found_shader_end = true;
  79. + continue;
  80. + }
  81. +
  82. + if (sig != QPU_SIG_BRANCH)
  83. + continue;
  84. +
  85. + if (ip - last_branch < 4) {
  86. + DRM_ERROR("Branch at %d during delay slots\n", ip);
  87. + return false;
  88. + }
  89. + last_branch = ip;
  90. +
  91. + if (inst & QPU_BRANCH_REG) {
  92. + DRM_ERROR("branching from register relative "
  93. + "not supported\n");
  94. + return false;
  95. + }
  96. +
  97. + if (!(inst & QPU_BRANCH_REL)) {
  98. + DRM_ERROR("relative branching required\n");
  99. + return false;
  100. + }
  101. +
  102. + /* The actual branch target is the instruction after the delay
  103. + * slots, plus whatever byte offset is in the low 32 bits of
  104. + * the instruction. Make sure we're not branching beyond the
  105. + * end of the shader object.
  106. + */
  107. + if (branch_imm % sizeof(inst) != 0) {
  108. + DRM_ERROR("branch target not aligned\n");
  109. + return false;
  110. + }
  111. +
  112. + branch_target_ip = after_delay_ip + (branch_imm >> 3);
  113. + if (branch_target_ip >= validation_state->max_ip) {
  114. + DRM_ERROR("Branch at %d outside of shader (ip %d/%d)\n",
  115. + ip, branch_target_ip,
  116. + validation_state->max_ip);
  117. + return false;
  118. + }
  119. + set_bit(branch_target_ip, validation_state->branch_targets);
  120. +
  121. + /* Make sure that the non-branching path is also not outside
  122. + * the shader.
  123. + */
  124. + if (after_delay_ip >= validation_state->max_ip) {
  125. + DRM_ERROR("Branch at %d continues past shader end "
  126. + "(%d/%d)\n",
  127. + ip, after_delay_ip, validation_state->max_ip);
  128. + return false;
  129. + }
  130. + set_bit(after_delay_ip, validation_state->branch_targets);
  131. + max_branch_target = max(max_branch_target, after_delay_ip);
  132. +
  133. + /* There are two delay slots after program end is signaled
  134. + * that are still executed, then we're finished.
  135. + */
  136. + if (found_shader_end && ip == shader_end_ip + 2)
  137. + break;
  138. + }
  139. +
  140. + if (max_branch_target > shader_end_ip) {
  141. + DRM_ERROR("Branch landed after QPU_SIG_PROG_END");
  142. + return false;
  143. + }
  144. +
  145. + return true;
  146. +}
  147. +
  148. struct vc4_validated_shader_info *
  149. vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
  150. {
  151. bool found_shader_end = false;
  152. int shader_end_ip = 0;
  153. uint32_t ip;
  154. - struct vc4_validated_shader_info *validated_shader;
  155. + struct vc4_validated_shader_info *validated_shader = NULL;
  156. struct vc4_shader_validation_state validation_state;
  157. int i;
  158. @@ -437,9 +535,18 @@ vc4_validate_shader(struct drm_gem_cma_o
  159. for (i = 0; i < ARRAY_SIZE(validation_state.live_min_clamp_offsets); i++)
  160. validation_state.live_min_clamp_offsets[i] = ~0;
  161. + validation_state.branch_targets =
  162. + kcalloc(BITS_TO_LONGS(validation_state.max_ip),
  163. + sizeof(unsigned long), GFP_KERNEL);
  164. + if (!validation_state.branch_targets)
  165. + goto fail;
  166. +
  167. validated_shader = kcalloc(1, sizeof(*validated_shader), GFP_KERNEL);
  168. if (!validated_shader)
  169. - return NULL;
  170. + goto fail;
  171. +
  172. + if (!vc4_validate_branches(&validation_state))
  173. + goto fail;
  174. for (ip = 0; ip < validation_state.max_ip; ip++) {
  175. uint64_t inst = validation_state.shader[ip];
  176. @@ -508,9 +615,12 @@ vc4_validate_shader(struct drm_gem_cma_o
  177. (validated_shader->uniforms_size +
  178. 4 * validated_shader->num_texture_samples);
  179. + kfree(validation_state.branch_targets);
  180. +
  181. return validated_shader;
  182. fail:
  183. + kfree(validation_state.branch_targets);
  184. if (validated_shader) {
  185. kfree(validated_shader->texture_samples);
  186. kfree(validated_shader);