ESP32-S3 SIMD Minimal Example
In my recent blog post, I described the ESP32-S3 SIMD instructions and how I figured out how to use them. This post is to provide the simplest working example to serve as a starting point for your own projects. The code was designed for the Arduino IDE, but will work similarly on Espressif's ESP-IDF. To add assembly language files (.S) to your Arduino project, just place them in the same folder as the .ino file. Here are the 2 files needed for this project, along with the output on the serial terminal of the results when you run it. Simply create an empty project and copy/paste this code into it:
The Arduino sketch
//
// ESP32-S3 minimal SIMD example
// Written by Larry Bank
// Copyright (c) 2024 BitBank Software, Inc.
//
// The purpose of this example is to show how to make use of ESP32-S3 SIMD instructions
// in your Arduino or ESP-IDF projects. The code is not comprehensive and just provides
// a starting point for someone wanting to learn how to use them. I wrote this because
// I couldn't find such an example and thought that people would appreciate saving some
// time with the research I did.
//
// The ADD instruction always saturates the results, so notice what happens to value 7
// in the output
extern "C" {
int s3_add16x8(int16_t *pA, int16_t *pB, int16_t *pC);
}
// 128-bit (16-byte) loads and stores need to be 16-byte aligned
int16_t __attribute__((aligned (16))) u16_A[8] = {0x00, -0x100, 0x00, 0x1111, 0x00, 0x1234, 0x00, 0x7fff};
int16_t __attribute__((aligned (16))) u16_B[8] = {0x00, 0x3000, 0x00, 0x2222, 0x00, 0x4321, 0x00, 0x4000};
int16_t __attribute__((aligned (16))) u16_C[8] = {0};
void setup() {
Serial.begin(115200);
delay(3000); // wait for USB-CDC to start
Serial.println("About to call Asm code");
s3_add16x8(u16_A, u16_B, u16_C);
Serial.println("Returned from Asm code");
for (int i=0; i<8; i++) {
Serial.printf("value %d = 0x%04x\n", i, u16_C[i]);
}
} /* setup() */
void loop() {
} /* loop() */
The s3_simd.S file
//
// ESP32-S3 SIMD example
// Written by Larry Bank
// Copyright (c) 2024 BitBank Software, Inc.
//
#include "dsps_fft2r_platform.h"
#if (dsps_fft2r_sc16_aes3_enabled == 1)
.text
.align 4
// Simple signed 16-bit x 8 add
// registers with the args: A2 A3 A4
// Call as int s3_add16x8(int16_t *pA, int16_t *pB, int16_t *pC);
.global s3_add16x8
.type s3_add16x8,@function
s3_add16x8:
entry a1,16 # prepare windowed registers and reserve 16 bytes of stack
ee.vld.128.ip q0,a2,16 # load 8 "A" values into Q0 from A2, then add 16 to A2
ee.vld.128.ip q1,a3,16 # load 8 "B" values into Q1 from A3, then add 16 to A3
ee.vadds.s16 q2,q0,q1 # C = A+B (with saturation)
ee.vst.128.ip q2,a4,16 # store the 8 "C" values, then add 16 to A4
movi.n a2,0 # return value of 0
retw.n # restore state (windowed registers) and return to caller
#endif // dsps_fft2r_sc16_aes3_enabled
The Serial Terminal Output
About to call Asm code
Returned from Asm code
value 0 = 0x0000
value 1 = 0x2f00
value 2 = 0x0000
value 3 = 0x3333
value 4 = 0x0000
value 5 = 0x5555
value 6 = 0x0000
value 7 = 0x7fff
value 0 = 0x0000
value 1 = 0x2f00
value 2 = 0x0000
value 3 = 0x3333
value 4 = 0x0000
value 5 = 0x5555
value 6 = 0x0000
value 7 = 0x7fff
Comments
Post a Comment