knc-asic.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551
  1. /*
  2. * library for KnCminer devices
  3. *
  4. * Copyright 2014 KnCminer
  5. *
  6. * This program is free software; you can redistribute it and/or modify it
  7. * under the terms of the GNU General Public License as published by the Free
  8. * Software Foundation; either version 3 of the License, or (at your option)
  9. * any later version. See COPYING for more details.
  10. */
  11. #include <stdlib.h>
  12. #include <assert.h>
  13. #include <fcntl.h>
  14. #include <limits.h>
  15. #include <unistd.h>
  16. #include <sys/ioctl.h>
  17. #include <linux/types.h>
  18. #include <linux/spi/spidev.h>
  19. #include <stdint.h>
  20. #include <string.h>
  21. #include <zlib.h>
  22. #include "miner.h"
  23. #include "logging.h"
  24. #include "knc-transport.h"
  25. #include "knc-asic.h"
  26. /* Control Commands
  27. *
  28. * SPI command on channel. 1-
  29. * 1'b1 3'channel 12'msglen_in_bits SPI message data
  30. * Sends the supplied message on selected SPI bus
  31. *
  32. * Communication test
  33. * 16'h1 16'x
  34. * Simple test of SPI communication
  35. *
  36. * LED control
  37. * 4'h1 4'red 4'green 4'blue
  38. * Sets led colour
  39. *
  40. * Clock frequency
  41. * 4'h2 12'msglen_in_bits 4'channel 4'die 16'MHz 512'x
  42. * Configures the hashing clock rate
  43. */
  44. /* ASIC Command structure
  45. * command 8 bits
  46. * chip 8 bits
  47. * core 16 bits
  48. * data [command dependent]
  49. * CRC32 32 bits (Neptune)
  50. *
  51. * ASIC response starts immediately after core address bits.
  52. *
  53. * response data
  54. * CRC32 32 bits (Neptune)
  55. * STATUS 8 bits 1 0 ~CRC_OK 0 0 ACCEPTED_WORK 0 1 (Neptune)
  56. *
  57. * Requests
  58. *
  59. * SETWORK (Jupiter)
  60. * midstate 256 bits
  61. * data 96 bits
  62. *
  63. * SETWORK/SETWORK_CLEAN (Neptune)
  64. * slot | 0xf0 8 bits
  65. * precalc_midstate 192 bits
  66. * precalc_data 96 bits
  67. * midstate 256 bits
  68. *
  69. * Returns REPORT response on Neptune
  70. *
  71. * Responses
  72. *
  73. * GETINFO
  74. *
  75. * (core field unused)
  76. *
  77. * cores 16 bits
  78. * version 16 bits
  79. * reserved 60 bits (Neptune)
  80. * die_status 4 bits (Neptune)
  81. * 1' pll_locked
  82. * 1' hash_reset_n 1 if cores have been reset since last report
  83. * 1' pll_reset_n 1 if PLL have been reset since last report
  84. * 1' pll_power_down
  85. * core_status cores * 2 bits (Neptune) rounded up to bytes
  86. * 1' want_work
  87. * 1' has_report (unreliable)
  88. *
  89. * REPORT
  90. *
  91. * reserved 2 bits
  92. * next_state 1 bit next work state loaded
  93. * state 1 bit hashing (0 on Jupiter)
  94. * next_slot 4 bit slot id of next work state (0 on Jupiter)
  95. * progress 8 bits upper 8 bits of nonce counter
  96. * active_slot 4 bits slot id of current work state
  97. * nonce_slot 4 bits slot id of found nonce
  98. * nonce 32 bits
  99. *
  100. * reserved 4 bits
  101. * nonce_slot 4 bits
  102. * nonce 32 bits
  103. *
  104. * repeat for 5 nonce entries in total on Neptune
  105. * Jupiter only has first nonce entry
  106. */
  107. // Precalculate first 3 rounds of SHA256 - as much as possible
  108. // Macro routines copied from sha2.c
  109. static void knc_prepare_neptune_work(unsigned char *out, struct work *work) {
  110. const uint8_t *midstate = work->midstate;
  111. const uint8_t *data = work->data + 16*4;
  112. #ifndef GET_ULONG_BE
  113. #define GET_ULONG_BE(b,i) \
  114. (( (uint32_t) (b)[(i) ] << 24 ) \
  115. | ( (uint32_t) (b)[(i) + 1] << 16 ) \
  116. | ( (uint32_t) (b)[(i) + 2] << 8 ) \
  117. | ( (uint32_t) (b)[(i) + 3] ))
  118. #endif
  119. #ifndef GET_ULONG_LE
  120. #define GET_ULONG_LE(b,i) \
  121. (( (uint32_t) (b)[(i) + 3] << 24 ) \
  122. | ( (uint32_t) (b)[(i) + 2] << 16 ) \
  123. | ( (uint32_t) (b)[(i) + 1] << 8 ) \
  124. | ( (uint32_t) (b)[(i) + 0] ))
  125. #endif
  126. #ifndef PUT_ULONG_BE
  127. #define PUT_ULONG_BE(n,b,i) \
  128. { \
  129. (b)[(i) ] = (unsigned char) ( (n) >> 24 ); \
  130. (b)[(i) + 1] = (unsigned char) ( (n) >> 16 ); \
  131. (b)[(i) + 2] = (unsigned char) ( (n) >> 8 ); \
  132. (b)[(i) + 3] = (unsigned char) ( (n) ); \
  133. }
  134. #endif
  135. #ifndef PUT_ULONG_LE
  136. #define PUT_ULONG_LE(n,b,i) \
  137. { \
  138. (b)[(i) + 3] = (unsigned char) ( (n) >> 24 ); \
  139. (b)[(i) + 2] = (unsigned char) ( (n) >> 16 ); \
  140. (b)[(i) + 1] = (unsigned char) ( (n) >> 8 ); \
  141. (b)[(i) + 0] = (unsigned char) ( (n) ); \
  142. }
  143. #endif
  144. #define SHR(x,n) ((x & 0xFFFFFFFF) >> n)
  145. #define ROTR(x,n) (SHR(x,n) | (x << (32 - n)))
  146. #define S0(x) (ROTR(x, 7) ^ ROTR(x,18) ^ SHR(x, 3))
  147. #define S1(x) (ROTR(x,17) ^ ROTR(x,19) ^ SHR(x,10))
  148. #define S2(x) (ROTR(x, 2) ^ ROTR(x,13) ^ ROTR(x,22))
  149. #define S3(x) (ROTR(x, 6) ^ ROTR(x,11) ^ ROTR(x,25))
  150. #define F0(x,y,z) ((x & y) | (z & (x | y)))
  151. #define F1(x,y,z) (z ^ (x & (y ^ z)))
  152. #define R(t) \
  153. ( \
  154. W[t] = S1(W[t - 2]) + W[t - 7] + \
  155. S0(W[t - 15]) + W[t - 16] \
  156. )
  157. #define P(a,b,c,d,e,f,g,h,x,K) \
  158. { \
  159. temp1 = h + S3(e) + F1(e,f,g) + K + x; \
  160. temp2 = S2(a) + F0(a,b,c); \
  161. d += temp1; h = temp1 + temp2; \
  162. }
  163. uint32_t temp1, temp2, W[16+3];
  164. uint32_t A, B, C, D, E, F, G, H;
  165. W[0] = GET_ULONG_LE(data, 0*4 );
  166. W[1] = GET_ULONG_LE(data, 1*4 );
  167. W[2] = GET_ULONG_LE(data, 2*4 );
  168. W[3] = 0; // since S0(0)==0, this must be 0. S0(nonce) is added in hardware.
  169. W[4] = 0x80000000;
  170. W[5] = 0;
  171. W[6] = 0;
  172. W[7] = 0;
  173. W[8] = 0;
  174. W[9] = 0;
  175. W[10] = 0;
  176. W[11] = 0;
  177. W[12] = 0;
  178. W[13] = 0;
  179. W[14] = 0;
  180. W[15] = 0x00000280;
  181. R(16); // Expand W 14, 9, 1, 0
  182. R(17); // 15, 10, 2, 1
  183. R(18); // 16, 11, 3, 2
  184. A = GET_ULONG_LE(midstate, 0*4 );
  185. B = GET_ULONG_LE(midstate, 1*4 );
  186. C = GET_ULONG_LE(midstate, 2*4 );
  187. D = GET_ULONG_LE(midstate, 3*4 );
  188. E = GET_ULONG_LE(midstate, 4*4 );
  189. F = GET_ULONG_LE(midstate, 5*4 );
  190. G = GET_ULONG_LE(midstate, 6*4 );
  191. H = GET_ULONG_LE(midstate, 7*4 );
  192. uint32_t D_ = D, H_ = H;
  193. P( A, B, C, D_, E, F, G, H_, W[ 0], 0x428A2F98 );
  194. uint32_t C_ = C, G_ = G;
  195. P( H_, A, B, C_, D_, E, F, G_, W[ 1], 0x71374491 );
  196. uint32_t B_ = B, F_ = F;
  197. P( G_, H_, A, B_, C_, D_, E, F_, W[ 2], 0xB5C0FBCF );
  198. PUT_ULONG_BE( D_, out, 0*4 );
  199. PUT_ULONG_BE( C_, out, 1*4 );
  200. PUT_ULONG_BE( B_, out, 2*4 );
  201. PUT_ULONG_BE( H_, out, 3*4 );
  202. PUT_ULONG_BE( G_, out, 4*4 );
  203. PUT_ULONG_BE( F_, out, 5*4 );
  204. PUT_ULONG_BE( W[18], out, 6*4 ); // This is partial S0(nonce) added by hardware
  205. PUT_ULONG_BE( W[17], out, 7*4 );
  206. PUT_ULONG_BE( W[16], out, 8*4 );
  207. PUT_ULONG_BE( H, out, 9*4 );
  208. PUT_ULONG_BE( G, out, 10*4 );
  209. PUT_ULONG_BE( F, out, 11*4 );
  210. PUT_ULONG_BE( E, out, 12*4 );
  211. PUT_ULONG_BE( D, out, 13*4 );
  212. PUT_ULONG_BE( C, out, 14*4 );
  213. PUT_ULONG_BE( B, out, 15*4 );
  214. PUT_ULONG_BE( A, out, 16*4 );
  215. }
  216. static void knc_prepare_jupiter_work(unsigned char *out, struct work *work) {
  217. int i;
  218. for (i = 0; i < 8 * 4; i++)
  219. out[i] = work->midstate[8 * 4 - i - 1];
  220. for (i = 0; i < 3 * 4; i++)
  221. out[8 * 4 + i] = work->data[16 * 4 + 3 * 4 - i - 1];
  222. }
  223. static void knc_prepare_core_command(uint8_t *request, int command, int die, int core)
  224. {
  225. request[0] = command;
  226. request[1] = die;
  227. request[2] = core >> 8;
  228. request[3] = core & 0xff;
  229. }
  230. int knc_prepare_report(uint8_t *request, int die, int core)
  231. {
  232. knc_prepare_core_command(request, KNC_ASIC_CMD_REPORT, die, core);
  233. return 4;
  234. }
  235. int knc_prepare_info(uint8_t *request, int die, struct knc_die_info *die_info, int *response_size)
  236. {
  237. request[0] = KNC_ASIC_CMD_GETINFO;
  238. request[1] = die;
  239. request[2] = 0;
  240. request[3] = 0;
  241. switch (die_info->version) {
  242. case KNC_VERSION_JUPITER:
  243. *response_size = 4;
  244. break;
  245. default:
  246. *response_size = 12 + (KNC_MAX_CORES_PER_DIE*2 + 7) / 8;
  247. break;
  248. case KNC_VERSION_NEPTUNE:
  249. *response_size = 12 + (die_info->cores*2 + 7) / 8;
  250. break;
  251. }
  252. return 4;
  253. }
  254. int knc_prepare_neptune_setwork(uint8_t *request, int die, int core, int slot, struct work *work, int clean)
  255. {
  256. if (!clean)
  257. knc_prepare_core_command(request, KNC_ASIC_CMD_SETWORK, die, core);
  258. else
  259. knc_prepare_core_command(request, KNC_ASIC_CMD_SETWORK_CLEAN, die, core);
  260. request[4] = slot | 0xf0;
  261. if (work)
  262. knc_prepare_neptune_work(request + 4 + 1, work);
  263. else
  264. memset(request + 4 + 1, 0, 6*4 + 3*4 + 8*4);
  265. return 4 + 1 + 6*4 + 3*4 + 8*4;
  266. }
  267. int knc_prepare_jupiter_setwork(uint8_t *request, int die, int core, int slot, struct work *work)
  268. {
  269. knc_prepare_core_command(request, KNC_ASIC_CMD_SETWORK, die, core);
  270. request[4] = slot | 0xf0;
  271. if (work)
  272. knc_prepare_jupiter_work(request + 4 + 1, work);
  273. else
  274. memset(request + 4 + 1, 0, 8*4 + 3*4);
  275. return 4 + 1 + 8*4 + 3*4;
  276. }
  277. int knc_prepare_jupiter_halt(uint8_t *request, int die, int core)
  278. {
  279. knc_prepare_core_command(request, KNC_ASIC_CMD_HALT, die, core);
  280. return 4;
  281. }
  282. int knc_prepare_neptune_halt(uint8_t *request, int die, int core)
  283. {
  284. knc_prepare_core_command(request, KNC_ASIC_CMD_HALT, die, core);
  285. request[4] = 0 | 0xf0;
  286. memset(request + 4 + 1, 0, 6*4 + 3*4 + 8*4);
  287. return 4 + 1 + 6*4 + 3*4 + 8*4;
  288. }
  289. void knc_prepare_neptune_message(int request_length, const uint8_t *request, uint8_t *buffer)
  290. {
  291. uint32_t crc;
  292. memcpy(buffer, request, request_length);
  293. buffer += request_length;
  294. crc = crc32(0, Z_NULL, 0);
  295. crc = crc32(crc, request, request_length);
  296. PUT_ULONG_BE(crc, buffer, 0);
  297. }
  298. int knc_transfer_length(int request_length, int response_length)
  299. {
  300. /* FPGA control, request header, request body/response, CRC(4), ACK(1), EXTRA(3) */
  301. return 2 + MAX(request_length, 4 + response_length ) + 4 + 1 + 3;
  302. }
  303. int knc_prepare_transfer(uint8_t *txbuf, int offset, int size, int channel, int request_length, const uint8_t *request, int response_length)
  304. {
  305. /* FPGA control, request header, request body/response, CRC(4), ACK(1), EXTRA(3) */
  306. int msglen = MAX(request_length, 4 + response_length ) + 4 + 1 + 3;
  307. int len = 2 + msglen;
  308. txbuf += offset;
  309. if (len + offset > size) {
  310. applog(LOG_DEBUG, "KnC SPI buffer full");
  311. return -1;
  312. }
  313. txbuf[0] = 1 << 7 | (channel+1) << 4 | (msglen * 8) >> 8;
  314. txbuf[1] = (msglen * 8);
  315. knc_prepare_neptune_message(request_length, request, txbuf+2);
  316. return offset + len;
  317. }
  318. /* red, green, blue valid range 0 - 15 */
  319. int knc_prepare_led(uint8_t *txbuf, int offset, int size, int red, int green, int blue)
  320. {
  321. /* 4'h1 4'red 4'green 4'blue */
  322. int len = 2;
  323. txbuf += offset;
  324. if (len + offset > size) {
  325. applog(LOG_DEBUG, "KnC SPI buffer full");
  326. return -1;
  327. }
  328. txbuf[0] = 1 << 4 | red;
  329. txbuf[1] = green << 4 | blue;
  330. return offset + len;
  331. }
  332. /* reset controller */
  333. int knc_prepare_reset(uint8_t *txbuf, int offset, int size)
  334. {
  335. /* 16'h0002 16'unused */
  336. int len = 4;
  337. txbuf += offset;
  338. if (len + offset > size) {
  339. applog(LOG_DEBUG, "KnC SPI buffer full");
  340. return -1;
  341. }
  342. txbuf[0] = (0x0002) >> 8;
  343. txbuf[1] = (0x0002) & 0xff;
  344. txbuf[2] = 0;
  345. txbuf[3] = 0;
  346. return offset + len;
  347. }
  348. /* request_length = 0 disables communication checks, i.e. Jupiter protocol */
  349. int knc_decode_response(uint8_t *rxbuf, int request_length, uint8_t **response, int response_length)
  350. {
  351. int ret = 0;
  352. int len = knc_transfer_length(request_length, response_length);
  353. if (request_length > 0 && response_length > 0) {
  354. uint32_t crc, recv_crc;
  355. crc = crc32(0, Z_NULL, 0);
  356. crc = crc32(crc, rxbuf + 2 + 4, response_length);
  357. recv_crc = GET_ULONG_BE(rxbuf + 2 + 4, response_length);
  358. if (crc != recv_crc)
  359. ret |= KNC_ERR_CRC;
  360. }
  361. if (response) {
  362. if (response_length > 0) {
  363. *response = rxbuf + 2 + 4;
  364. } else {
  365. *response = NULL;
  366. }
  367. }
  368. if (response_length == 0)
  369. return 0;
  370. uint8_t ack = rxbuf[len - 4];
  371. if ((ack & KNC_ASIC_ACK_MASK) != KNC_ASIC_ACK_MATCH)
  372. ret |= KNC_ERR_ACK;
  373. if ((ack & KNC_ASIC_ACK_CRC))
  374. ret |= KNC_ERR_CRCACK;
  375. if ((ack & KNC_ASIC_ACK_ACCEPT))
  376. ret |= KNC_ACCEPTED;
  377. if (ret && memcmp(&rxbuf[len-4], "\377\377\377\377", 4) == 0)
  378. ret = KNC_ERR_UNAVAIL;
  379. return ret;
  380. }
  381. int knc_syncronous_transfer(void *ctx, int channel, int request_length, const uint8_t *request, int response_length, uint8_t *response)
  382. {
  383. int len = knc_transfer_length(request_length, response_length);
  384. uint8_t txbuf[len];
  385. uint8_t rxbuf[len];
  386. memset(txbuf, 0, len);
  387. knc_prepare_transfer(txbuf, 0, len, channel, request_length, request, response_length);
  388. knc_trnsp_transfer(ctx, txbuf, rxbuf, len);
  389. uint8_t *response_buf;
  390. int rc = knc_decode_response(rxbuf, request_length, &response_buf, response_length);
  391. if (response)
  392. memcpy(response, response_buf, response_length);
  393. return rc;
  394. }
  395. int knc_decode_info(uint8_t *response, struct knc_die_info *die_info)
  396. {
  397. int cores_in_die = response[0]<<8 | response[1];
  398. int version = response[2]<<8 | response[3];
  399. if (version == KNC_ASIC_VERSION_JUPITER && cores_in_die <= 48) {
  400. die_info->version = KNC_VERSION_JUPITER;
  401. die_info->cores = cores_in_die;
  402. memset(die_info->want_work, -1, cores_in_die);
  403. die_info->pll_power_down = -1;
  404. die_info->pll_reset_n = -1;
  405. die_info->hash_reset_n = -1;
  406. die_info->pll_locked = -1;
  407. return 0;
  408. } else if (version == KNC_ASIC_VERSION_NEPTUNE && cores_in_die <= KNC_MAX_CORES_PER_DIE) {
  409. die_info->version = KNC_VERSION_NEPTUNE;
  410. die_info->cores = cores_in_die;
  411. int core;
  412. for (core = 0; core < cores_in_die; core++)
  413. die_info->want_work[core] = ((response[12 + core/4] >> ((3-(core % 4)) * 2)) >> 1) & 1;
  414. int die_status = response[11] & 0xf;
  415. die_info->pll_power_down = (die_status >> 0) & 1;
  416. die_info->pll_reset_n = (die_status >> 1) & 1;
  417. die_info->hash_reset_n = (die_status >> 2) & 1;
  418. die_info->pll_locked = (die_status >> 3) & 1;
  419. return 0;
  420. } else {
  421. return -1;
  422. }
  423. }
  424. int knc_decode_report(uint8_t *response, struct knc_report *report, int version)
  425. {
  426. /*
  427. * reserved 2 bits
  428. * next_state 1 bit next work state loaded
  429. * state 1 bit hashing (0 on Jupiter)
  430. * next_slot 4 bit slot id of next work state (0 on Jupiter)
  431. * progress 8 bits upper 8 bits of nonce counter
  432. * active_slot 4 bits slot id of current work state
  433. * nonce_slot 4 bits slot id of found nonce
  434. * nonce 32 bits
  435. *
  436. * reserved 4 bits
  437. * nonce_slot 4 bits
  438. * nonce 32 bits
  439. */
  440. report->next_state = (response[0] >> 5) & 1;
  441. if (version != KNC_VERSION_JUPITER) {
  442. report->state = (response[0] >> 4) & 1;
  443. report->next_slot = response[0] & ((1<<4)-1);
  444. } else {
  445. report->state = -1;
  446. report->next_slot = -1;
  447. }
  448. report->progress = (uint32_t)response[1] << 24;
  449. report->active_slot = (response[2] >> 4) & ((1<<4)-1);
  450. int n;
  451. int n_nonces = version == KNC_VERSION_JUPITER ? 1 : 5;
  452. for (n = 0; n < n_nonces; n++) {
  453. report->nonce[n].slot = response[2+n*5] & ((1<<4)-1);
  454. report->nonce[n].nonce =
  455. (uint32_t)response[3+n*5] << 24 |
  456. (uint32_t)response[4+n*5] << 16 |
  457. (uint32_t)response[5+n*5] << 8 |
  458. (uint32_t)response[6+n*5] << 0 |
  459. 0;
  460. }
  461. for (; n < KNC_NONCES_PER_REPORT; n++) {
  462. report->nonce[n].slot = -1;
  463. report->nonce[n].nonce = 0;
  464. }
  465. return 0;
  466. }
  467. int knc_detect_die(void *ctx, int channel, int die, struct knc_die_info *die_info)
  468. {
  469. uint8_t request[4];
  470. int response_len = 2 + 2 + 4 + 4 + (KNC_MAX_CORES_PER_DIE*2 + 7) / 8;
  471. uint8_t response[response_len];
  472. int request_len = knc_prepare_info(request, die, die_info, &response_len);
  473. int status = knc_syncronous_transfer(ctx, channel, request_len, request, response_len, response);
  474. /* Workaround for pre-ASIC version */
  475. int cores_in_die = response[0]<<8 | response[1];
  476. int version = response[2]<<8 | response[3];
  477. if (version == KNC_ASIC_VERSION_NEPTUNE && cores_in_die < KNC_MAX_CORES_PER_DIE) {
  478. applog(LOG_DEBUG, "KnC %d-%d: Looks like a NEPTUNE die with %d cores", channel, die, cores_in_die);
  479. /* Try again with right response size */
  480. response_len = 2 + 2 + 4 + 4 + (cores_in_die*2 + 7) / 8;
  481. status = knc_syncronous_transfer(ctx, channel, request_len, request, response_len, response);
  482. }
  483. int rc = -1;
  484. if (version == KNC_ASIC_VERSION_JUPITER || status == 0)
  485. rc = knc_decode_info(response, die_info);
  486. if (rc == 0)
  487. applog(LOG_INFO, "KnC %d-%d: Found %s die with %d cores", channel, die,
  488. die_info->version == KNC_VERSION_NEPTUNE ? "NEPTUNE" :
  489. die_info->version == KNC_VERSION_JUPITER ? "JUPITER" :
  490. "UNKNOWN",
  491. cores_in_die);
  492. else
  493. applog(LOG_DEBUG, "KnC %d-%d: No KnC chip found", channel, die);
  494. return rc;
  495. }