|
|
|
@ -39,36 +39,36 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
|
|
|
|
|
|
|
|
|
namespace RandomX {
|
|
|
|
|
|
|
|
|
|
void initBlock(const uint8_t* cache, uint8_t* out, uint32_t blockNumber, const KeysContainer& keys) {
|
|
|
|
|
uint64_t r0, r1, r2, r3, r4, r5, r6, r7;
|
|
|
|
|
void initBlock(const uint8_t* cache, uint8_t* out, uint32_t blockNumber) {
|
|
|
|
|
uint64_t c0, c1, c2, c3, c4, c5, c6, c7;
|
|
|
|
|
|
|
|
|
|
r0 = 4ULL * blockNumber;
|
|
|
|
|
r1 = r2 = r3 = r4 = r5 = r6 = r7 = 0;
|
|
|
|
|
c0 = 4ULL * blockNumber;
|
|
|
|
|
c1 = c2 = c3 = c4 = c5 = c6 = c7 = 0;
|
|
|
|
|
|
|
|
|
|
constexpr uint32_t mask = (CacheSize - 1) & CacheLineAlignMask;
|
|
|
|
|
|
|
|
|
|
for (auto i = 0; i < DatasetIterations; ++i) {
|
|
|
|
|
const uint8_t* mixBlock = cache + (r0 & mask);
|
|
|
|
|
const uint8_t* mixBlock = cache + (c0 & mask);
|
|
|
|
|
PREFETCHNTA(mixBlock);
|
|
|
|
|
r0 = squareHash(r0);
|
|
|
|
|
r0 ^= load64(mixBlock + 0);
|
|
|
|
|
r1 ^= load64(mixBlock + 8);
|
|
|
|
|
r2 ^= load64(mixBlock + 16);
|
|
|
|
|
r3 ^= load64(mixBlock + 24);
|
|
|
|
|
r4 ^= load64(mixBlock + 32);
|
|
|
|
|
r5 ^= load64(mixBlock + 40);
|
|
|
|
|
r6 ^= load64(mixBlock + 48);
|
|
|
|
|
r7 ^= load64(mixBlock + 56);
|
|
|
|
|
c0 = squareHash(c0);
|
|
|
|
|
c0 ^= load64(mixBlock + 0);
|
|
|
|
|
c1 ^= load64(mixBlock + 8);
|
|
|
|
|
c2 ^= load64(mixBlock + 16);
|
|
|
|
|
c3 ^= load64(mixBlock + 24);
|
|
|
|
|
c4 ^= load64(mixBlock + 32);
|
|
|
|
|
c5 ^= load64(mixBlock + 40);
|
|
|
|
|
c6 ^= load64(mixBlock + 48);
|
|
|
|
|
c7 ^= load64(mixBlock + 56);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
store64(out + 0, r0);
|
|
|
|
|
store64(out + 8, r1);
|
|
|
|
|
store64(out + 16, r2);
|
|
|
|
|
store64(out + 24, r3);
|
|
|
|
|
store64(out + 32, r4);
|
|
|
|
|
store64(out + 40, r5);
|
|
|
|
|
store64(out + 48, r6);
|
|
|
|
|
store64(out + 56, r7);
|
|
|
|
|
store64(out + 0, c0);
|
|
|
|
|
store64(out + 8, c1);
|
|
|
|
|
store64(out + 16, c2);
|
|
|
|
|
store64(out + 24, c3);
|
|
|
|
|
store64(out + 32, c4);
|
|
|
|
|
store64(out + 40, c5);
|
|
|
|
|
store64(out + 48, c6);
|
|
|
|
|
store64(out + 56, c7);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void datasetRead(addr_t addr, MemoryRegisters& memory, RegisterFile& reg) {
|
|
|
|
@ -86,7 +86,7 @@ namespace RandomX {
|
|
|
|
|
memory.mx &= CacheLineAlignMask; //align to cache line
|
|
|
|
|
Cache* cache = memory.ds.cache;
|
|
|
|
|
uint64_t datasetLine[CacheLineSize / sizeof(uint64_t)];
|
|
|
|
|
initBlock(cache->getCache(), (uint8_t*)datasetLine, memory.ma / CacheLineSize, cache->getKeys());
|
|
|
|
|
initBlock(cache->getCache(), (uint8_t*)datasetLine, memory.ma / CacheLineSize);
|
|
|
|
|
for (int i = 0; i < RegistersCount; ++i)
|
|
|
|
|
reg[i] ^= datasetLine[i];
|
|
|
|
|
std::swap(memory.mx, memory.ma);
|
|
|
|
@ -119,31 +119,12 @@ namespace RandomX {
|
|
|
|
|
|
|
|
|
|
void datasetInit(Cache* cache, dataset_t ds, uint32_t startBlock, uint32_t blockCount) {
|
|
|
|
|
for (uint32_t i = startBlock; i < startBlock + blockCount; ++i) {
|
|
|
|
|
initBlock(cache->getCache(), ds.dataset + i * CacheLineSize, i, cache->getKeys());
|
|
|
|
|
initBlock(cache->getCache(), ds.dataset + i * CacheLineSize, i);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template<bool softAes>
|
|
|
|
|
void datasetInitCache(const void* seed, dataset_t& ds, bool largePages) {
|
|
|
|
|
ds.cache = new(Cache::alloc(largePages)) Cache();
|
|
|
|
|
ds.cache->initialize<softAes>(seed, SeedSize);
|
|
|
|
|
ds.cache->initialize(seed, SeedSize);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template
|
|
|
|
|
void datasetInitCache<false>(const void*, dataset_t&, bool);
|
|
|
|
|
|
|
|
|
|
template
|
|
|
|
|
void datasetInitCache<true>(const void*, dataset_t&, bool);
|
|
|
|
|
|
|
|
|
|
template<bool softAes>
|
|
|
|
|
void aesBench(uint32_t blockCount) {
|
|
|
|
|
alignas(16) KeysContainer keys;
|
|
|
|
|
alignas(16) uint8_t buffer[CacheLineSize];
|
|
|
|
|
for (uint32_t block = 0; block < blockCount; ++block) {
|
|
|
|
|
initBlock(buffer, buffer, 0, keys);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template void aesBench<false>(uint32_t blockCount);
|
|
|
|
|
template void aesBench<true>(uint32_t blockCount);
|
|
|
|
|
}
|
|
|
|
|