/* * library for KnCminer devices * * Copyright 2014 KnCminer * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free * Software Foundation; either version 3 of the License, or (at your option) * any later version. See COPYING for more details. */ #include #include #include #include #include #include #include #include #include #include #include #include "miner.h" #include "logging.h" #include "knc-transport.h" #include "knc-asic.h" /* Control Commands * * SPI command on channel. 1- * 1'b1 3'channel 12'msglen_in_bits SPI message data * Sends the supplied message on selected SPI bus * * Communication test * 16'h1 16'x * Simple test of SPI communication * * LED control * 4'h1 4'red 4'green 4'blue * Sets led colour * * Clock frequency * 4'h2 12'msglen_in_bits 4'channel 4'die 16'MHz 512'x * Configures the hashing clock rate */ /* ASIC Command structure * command 8 bits * chip 8 bits * core 16 bits * data [command dependent] * CRC32 32 bits (Neptune) * * ASIC response starts immediately after core address bits. * * response data * CRC32 32 bits (Neptune) * STATUS 8 bits 1 0 ~CRC_OK 0 0 ACCEPTED_WORK 0 1 (Neptune) * * Requests * * SETWORK (Jupiter) * midstate 256 bits * data 96 bits * * SETWORK/SETWORK_CLEAN (Neptune) * slot | 0xf0 8 bits * precalc_midstate 192 bits * precalc_data 96 bits * midstate 256 bits * * Returns REPORT response on Neptune * * Responses * * GETINFO * * (core field unused) * * cores 16 bits * version 16 bits * reserved 60 bits (Neptune) * die_status 4 bits (Neptune) * 1' pll_locked * 1' hash_reset_n 1 if cores have been reset since last report * 1' pll_reset_n 1 if PLL have been reset since last report * 1' pll_power_down * core_status cores * 2 bits (Neptune) rounded up to bytes * 1' want_work * 1' has_report (unreliable) * * REPORT * * reserved 2 bits * next_state 1 bit next work state loaded * state 1 bit hashing (0 on Jupiter) * next_slot 4 bit slot id of next work state (0 on Jupiter) * progress 8 bits upper 8 bits of nonce counter * active_slot 4 bits slot id of current work state * nonce_slot 4 bits slot id of found nonce * nonce 32 bits * * reserved 4 bits * nonce_slot 4 bits * nonce 32 bits * * repeat for 5 nonce entries in total on Neptune * Jupiter only has first nonce entry */ // Precalculate first 3 rounds of SHA256 - as much as possible // Macro routines copied from sha2.c static void knc_prepare_neptune_work(unsigned char *out, struct work *work) { const uint8_t *midstate = work->midstate; const uint8_t *data = work->data + 16*4; #ifndef GET_ULONG_BE #define GET_ULONG_BE(b,i) \ (( (uint32_t) (b)[(i) ] << 24 ) \ | ( (uint32_t) (b)[(i) + 1] << 16 ) \ | ( (uint32_t) (b)[(i) + 2] << 8 ) \ | ( (uint32_t) (b)[(i) + 3] )) #endif #ifndef GET_ULONG_LE #define GET_ULONG_LE(b,i) \ (( (uint32_t) (b)[(i) + 3] << 24 ) \ | ( (uint32_t) (b)[(i) + 2] << 16 ) \ | ( (uint32_t) (b)[(i) + 1] << 8 ) \ | ( (uint32_t) (b)[(i) + 0] )) #endif #ifndef PUT_ULONG_BE #define PUT_ULONG_BE(n,b,i) \ { \ (b)[(i) ] = (unsigned char) ( (n) >> 24 ); \ (b)[(i) + 1] = (unsigned char) ( (n) >> 16 ); \ (b)[(i) + 2] = (unsigned char) ( (n) >> 8 ); \ (b)[(i) + 3] = (unsigned char) ( (n) ); \ } #endif #ifndef PUT_ULONG_LE #define PUT_ULONG_LE(n,b,i) \ { \ (b)[(i) + 3] = (unsigned char) ( (n) >> 24 ); \ (b)[(i) + 2] = (unsigned char) ( (n) >> 16 ); \ (b)[(i) + 1] = (unsigned char) ( (n) >> 8 ); \ (b)[(i) + 0] = (unsigned char) ( (n) ); \ } #endif #define SHR(x,n) ((x & 0xFFFFFFFF) >> n) #define ROTR(x,n) (SHR(x,n) | (x << (32 - n))) #define S0(x) (ROTR(x, 7) ^ ROTR(x,18) ^ SHR(x, 3)) #define S1(x) (ROTR(x,17) ^ ROTR(x,19) ^ SHR(x,10)) #define S2(x) (ROTR(x, 2) ^ ROTR(x,13) ^ ROTR(x,22)) #define S3(x) (ROTR(x, 6) ^ ROTR(x,11) ^ ROTR(x,25)) #define F0(x,y,z) ((x & y) | (z & (x | y))) #define F1(x,y,z) (z ^ (x & (y ^ z))) #define R(t) \ ( \ W[t] = S1(W[t - 2]) + W[t - 7] + \ S0(W[t - 15]) + W[t - 16] \ ) #define P(a,b,c,d,e,f,g,h,x,K) \ { \ temp1 = h + S3(e) + F1(e,f,g) + K + x; \ temp2 = S2(a) + F0(a,b,c); \ d += temp1; h = temp1 + temp2; \ } uint32_t temp1, temp2, W[16+3]; uint32_t A, B, C, D, E, F, G, H; W[0] = GET_ULONG_LE(data, 0*4 ); W[1] = GET_ULONG_LE(data, 1*4 ); W[2] = GET_ULONG_LE(data, 2*4 ); W[3] = 0; // since S0(0)==0, this must be 0. S0(nonce) is added in hardware. W[4] = 0x80000000; W[5] = 0; W[6] = 0; W[7] = 0; W[8] = 0; W[9] = 0; W[10] = 0; W[11] = 0; W[12] = 0; W[13] = 0; W[14] = 0; W[15] = 0x00000280; R(16); // Expand W 14, 9, 1, 0 R(17); // 15, 10, 2, 1 R(18); // 16, 11, 3, 2 A = GET_ULONG_LE(midstate, 0*4 ); B = GET_ULONG_LE(midstate, 1*4 ); C = GET_ULONG_LE(midstate, 2*4 ); D = GET_ULONG_LE(midstate, 3*4 ); E = GET_ULONG_LE(midstate, 4*4 ); F = GET_ULONG_LE(midstate, 5*4 ); G = GET_ULONG_LE(midstate, 6*4 ); H = GET_ULONG_LE(midstate, 7*4 ); uint32_t D_ = D, H_ = H; P( A, B, C, D_, E, F, G, H_, W[ 0], 0x428A2F98 ); uint32_t C_ = C, G_ = G; P( H_, A, B, C_, D_, E, F, G_, W[ 1], 0x71374491 ); uint32_t B_ = B, F_ = F; P( G_, H_, A, B_, C_, D_, E, F_, W[ 2], 0xB5C0FBCF ); PUT_ULONG_BE( D_, out, 0*4 ); PUT_ULONG_BE( C_, out, 1*4 ); PUT_ULONG_BE( B_, out, 2*4 ); PUT_ULONG_BE( H_, out, 3*4 ); PUT_ULONG_BE( G_, out, 4*4 ); PUT_ULONG_BE( F_, out, 5*4 ); PUT_ULONG_BE( W[18], out, 6*4 ); // This is partial S0(nonce) added by hardware PUT_ULONG_BE( W[17], out, 7*4 ); PUT_ULONG_BE( W[16], out, 8*4 ); PUT_ULONG_BE( H, out, 9*4 ); PUT_ULONG_BE( G, out, 10*4 ); PUT_ULONG_BE( F, out, 11*4 ); PUT_ULONG_BE( E, out, 12*4 ); PUT_ULONG_BE( D, out, 13*4 ); PUT_ULONG_BE( C, out, 14*4 ); PUT_ULONG_BE( B, out, 15*4 ); PUT_ULONG_BE( A, out, 16*4 ); } static void knc_prepare_jupiter_work(unsigned char *out, struct work *work) { int i; for (i = 0; i < 8 * 4; i++) out[i] = work->midstate[8 * 4 - i - 1]; for (i = 0; i < 3 * 4; i++) out[8 * 4 + i] = work->data[16 * 4 + 3 * 4 - i - 1]; } static void knc_prepare_core_command(uint8_t *request, int command, int die, int core) { request[0] = command; request[1] = die; request[2] = core >> 8; request[3] = core & 0xff; } int knc_prepare_report(uint8_t *request, int die, int core) { knc_prepare_core_command(request, KNC_ASIC_CMD_REPORT, die, core); return 4; } int knc_prepare_info(uint8_t *request, int die, struct knc_die_info *die_info, int *response_size) { request[0] = KNC_ASIC_CMD_GETINFO; request[1] = die; request[2] = 0; request[3] = 0; switch (die_info->version) { case KNC_VERSION_JUPITER: *response_size = 4; break; default: *response_size = 12 + (KNC_MAX_CORES_PER_DIE*2 + 7) / 8; break; case KNC_VERSION_NEPTUNE: *response_size = 12 + (die_info->cores*2 + 7) / 8; break; } return 4; } int knc_prepare_neptune_setwork(uint8_t *request, int die, int core, int slot, struct work *work, int clean) { if (!clean) knc_prepare_core_command(request, KNC_ASIC_CMD_SETWORK, die, core); else knc_prepare_core_command(request, KNC_ASIC_CMD_SETWORK_CLEAN, die, core); request[4] = slot | 0xf0; if (work) knc_prepare_neptune_work(request + 4 + 1, work); else memset(request + 4 + 1, 0, 6*4 + 3*4 + 8*4); return 4 + 1 + 6*4 + 3*4 + 8*4; } int knc_prepare_jupiter_setwork(uint8_t *request, int die, int core, int slot, struct work *work) { knc_prepare_core_command(request, KNC_ASIC_CMD_SETWORK, die, core); request[4] = slot | 0xf0; if (work) knc_prepare_jupiter_work(request + 4 + 1, work); else memset(request + 4 + 1, 0, 8*4 + 3*4); return 4 + 1 + 8*4 + 3*4; } int knc_prepare_jupiter_halt(uint8_t *request, int die, int core) { knc_prepare_core_command(request, KNC_ASIC_CMD_HALT, die, core); return 4; } int knc_prepare_neptune_halt(uint8_t *request, int die, int core) { knc_prepare_core_command(request, KNC_ASIC_CMD_HALT, die, core); request[4] = 0 | 0xf0; memset(request + 4 + 1, 0, 6*4 + 3*4 + 8*4); return 4 + 1 + 6*4 + 3*4 + 8*4; } void knc_prepare_neptune_message(int request_length, const uint8_t *request, uint8_t *buffer) { uint32_t crc; memcpy(buffer, request, request_length); buffer += request_length; crc = crc32(0, Z_NULL, 0); crc = crc32(crc, request, request_length); PUT_ULONG_BE(crc, buffer, 0); } int knc_transfer_length(int request_length, int response_length) { /* FPGA control, request header, request body/response, CRC(4), ACK(1), EXTRA(3) */ return 2 + MAX(request_length, 4 + response_length ) + 4 + 1 + 3; } int knc_prepare_transfer(uint8_t *txbuf, int offset, int size, int channel, int request_length, const uint8_t *request, int response_length) { /* FPGA control, request header, request body/response, CRC(4), ACK(1), EXTRA(3) */ int msglen = MAX(request_length, 4 + response_length ) + 4 + 1 + 3; int len = 2 + msglen; txbuf += offset; if (len + offset > size) { applog(LOG_DEBUG, "KnC SPI buffer full"); return -1; } txbuf[0] = 1 << 7 | (channel+1) << 4 | (msglen * 8) >> 8; txbuf[1] = (msglen * 8); knc_prepare_neptune_message(request_length, request, txbuf+2); return offset + len; } /* red, green, blue valid range 0 - 15 */ int knc_prepare_led(uint8_t *txbuf, int offset, int size, int red, int green, int blue) { /* 4'h1 4'red 4'green 4'blue */ int len = 2; txbuf += offset; if (len + offset > size) { applog(LOG_DEBUG, "KnC SPI buffer full"); return -1; } txbuf[0] = 1 << 4 | red; txbuf[1] = green << 4 | blue; return offset + len; } /* reset controller */ int knc_prepare_reset(uint8_t *txbuf, int offset, int size) { /* 16'h0002 16'unused */ int len = 4; txbuf += offset; if (len + offset > size) { applog(LOG_DEBUG, "KnC SPI buffer full"); return -1; } txbuf[0] = (0x0002) >> 8; txbuf[1] = (0x0002) & 0xff; txbuf[2] = 0; txbuf[3] = 0; return offset + len; } /* request_length = 0 disables communication checks, i.e. Jupiter protocol */ int knc_decode_response(uint8_t *rxbuf, int request_length, uint8_t **response, int response_length) { int ret = 0; int len = knc_transfer_length(request_length, response_length); if (request_length > 0 && response_length > 0) { uint32_t crc, recv_crc; crc = crc32(0, Z_NULL, 0); crc = crc32(crc, rxbuf + 2 + 4, response_length); recv_crc = GET_ULONG_BE(rxbuf + 2 + 4, response_length); if (crc != recv_crc) ret |= KNC_ERR_CRC; } if (response) { if (response_length > 0) { *response = rxbuf + 2 + 4; } else { *response = NULL; } } if (response_length == 0) return 0; uint8_t ack = rxbuf[len - 4]; if ((ack & KNC_ASIC_ACK_MASK) != KNC_ASIC_ACK_MATCH) ret |= KNC_ERR_ACK; if ((ack & KNC_ASIC_ACK_CRC)) ret |= KNC_ERR_CRCACK; if ((ack & KNC_ASIC_ACK_ACCEPT)) ret |= KNC_ACCEPTED; if (ret && memcmp(&rxbuf[len-4], "\377\377\377\377", 4) == 0) ret = KNC_ERR_UNAVAIL; return ret; } int knc_syncronous_transfer(void *ctx, int channel, int request_length, const uint8_t *request, int response_length, uint8_t *response) { int len = knc_transfer_length(request_length, response_length); uint8_t txbuf[len]; uint8_t rxbuf[len]; memset(txbuf, 0, len); knc_prepare_transfer(txbuf, 0, len, channel, request_length, request, response_length); knc_trnsp_transfer(ctx, txbuf, rxbuf, len); uint8_t *response_buf; int rc = knc_decode_response(rxbuf, request_length, &response_buf, response_length); if (response) memcpy(response, response_buf, response_length); return rc; } int knc_decode_info(uint8_t *response, struct knc_die_info *die_info) { int cores_in_die = response[0]<<8 | response[1]; int version = response[2]<<8 | response[3]; if (version == KNC_ASIC_VERSION_JUPITER && cores_in_die <= 48) { die_info->version = KNC_VERSION_JUPITER; die_info->cores = cores_in_die; memset(die_info->want_work, -1, cores_in_die); die_info->pll_power_down = -1; die_info->pll_reset_n = -1; die_info->hash_reset_n = -1; die_info->pll_locked = -1; return 0; } else if (version == KNC_ASIC_VERSION_NEPTUNE && cores_in_die <= KNC_MAX_CORES_PER_DIE) { die_info->version = KNC_VERSION_NEPTUNE; die_info->cores = cores_in_die; int core; for (core = 0; core < cores_in_die; core++) die_info->want_work[core] = ((response[12 + core/4] >> ((3-(core % 4)) * 2)) >> 1) & 1; int die_status = response[11] & 0xf; die_info->pll_power_down = (die_status >> 0) & 1; die_info->pll_reset_n = (die_status >> 1) & 1; die_info->hash_reset_n = (die_status >> 2) & 1; die_info->pll_locked = (die_status >> 3) & 1; return 0; } else { return -1; } } int knc_decode_report(uint8_t *response, struct knc_report *report, int version) { /* * reserved 2 bits * next_state 1 bit next work state loaded * state 1 bit hashing (0 on Jupiter) * next_slot 4 bit slot id of next work state (0 on Jupiter) * progress 8 bits upper 8 bits of nonce counter * active_slot 4 bits slot id of current work state * nonce_slot 4 bits slot id of found nonce * nonce 32 bits * * reserved 4 bits * nonce_slot 4 bits * nonce 32 bits */ report->next_state = (response[0] >> 5) & 1; if (version != KNC_VERSION_JUPITER) { report->state = (response[0] >> 4) & 1; report->next_slot = response[0] & ((1<<4)-1); } else { report->state = -1; report->next_slot = -1; } report->progress = (uint32_t)response[1] << 24; report->active_slot = (response[2] >> 4) & ((1<<4)-1); int n; int n_nonces = version == KNC_VERSION_JUPITER ? 1 : 5; for (n = 0; n < n_nonces; n++) { report->nonce[n].slot = response[2+n*5] & ((1<<4)-1); report->nonce[n].nonce = (uint32_t)response[3+n*5] << 24 | (uint32_t)response[4+n*5] << 16 | (uint32_t)response[5+n*5] << 8 | (uint32_t)response[6+n*5] << 0 | 0; } for (; n < KNC_NONCES_PER_REPORT; n++) { report->nonce[n].slot = -1; report->nonce[n].nonce = 0; } return 0; } int knc_detect_die(void *ctx, int channel, int die, struct knc_die_info *die_info) { uint8_t request[4]; int response_len = 2 + 2 + 4 + 4 + (KNC_MAX_CORES_PER_DIE*2 + 7) / 8; uint8_t response[response_len]; int request_len = knc_prepare_info(request, die, die_info, &response_len); int status = knc_syncronous_transfer(ctx, channel, request_len, request, response_len, response); /* Workaround for pre-ASIC version */ int cores_in_die = response[0]<<8 | response[1]; int version = response[2]<<8 | response[3]; if (version == KNC_ASIC_VERSION_NEPTUNE && cores_in_die < KNC_MAX_CORES_PER_DIE) { applog(LOG_DEBUG, "KnC %d-%d: Looks like a NEPTUNE die with %d cores", channel, die, cores_in_die); /* Try again with right response size */ response_len = 2 + 2 + 4 + 4 + (cores_in_die*2 + 7) / 8; status = knc_syncronous_transfer(ctx, channel, request_len, request, response_len, response); } int rc = -1; if (version == KNC_ASIC_VERSION_JUPITER || status == 0) rc = knc_decode_info(response, die_info); if (rc == 0) applog(LOG_INFO, "KnC %d-%d: Found %s die with %d cores", channel, die, die_info->version == KNC_VERSION_NEPTUNE ? "NEPTUNE" : die_info->version == KNC_VERSION_JUPITER ? "JUPITER" : "UNKNOWN", cores_in_die); else applog(LOG_DEBUG, "KnC %d-%d: No KnC chip found", channel, die); return rc; }