PULP DSP  Version 1.0
Digital Signal Processing library for PULP processors (pulp-platform.org)
 All Classes Files Functions Groups Pages
plp_math.h
Go to the documentation of this file.
1 
7 /*
8  * Copyright (C) 2019 ETH Zurich and University of Bologna. All rights reserved.
9  *
10  * SPDX-License-Identifier: Apache-2.0
11  *
12  * Licensed under the Apache License, Version 2.0 (the License); you may
13  * not use this file except in compliance with the License.
14  * You may obtain a copy of the License at
15  *
16  * www.apache.org/licenses/LICENSE-2.0
17  *
18  * Unless required by applicable law or agreed to in writing, software
19  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
20  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21  * See the License for the specific language governing permissions and
22  * limitations under the License.
23  */
24 
110 #ifndef __PLP_MATH_H__
111 #define __PLP_MATH_H__
112 
113 #include "rt/rt_api.h"
114 #include "math.h"
115 
116 typedef float float32_t;
117 
118 #define PLP_MATH_IBEX // previously called zero-riscy
119 //#define PLP_MATH_RISCY
120 #define PLP_MATH_LOOPUNROLL
121 
122 
132 typedef struct
133 {
134  int32_t * pSrcA; // pointer to the first vector
135  int32_t * pSrcB; // pointer to the second vector
136  uint32_t blkSizePE; // number of samples in each vector
137  uint32_t nPE; // number of processing units
138  int32_t * resBuffer; // pointer to result vector
140 
149 typedef struct
150 {
151  int32_t * pSrcA; // pointer to the first vector
152  int32_t * pSrcB; // pointer to the second vector
153  uint32_t blkSizePE; // number of samples in each vector
154  uint32_t deciPoint; // decimal point for right shift
155  uint32_t nPE; // number of processing units
156  int32_t * resBuffer; // pointer to result vector
158 
168 typedef struct {
169  const int32_t * pSrcA; // pointer to the first vector
170  uint32_t srcALen;
171  const int32_t * pSrcB; // pointer to the second vector
172  uint32_t srcBLen; // number of samples in each vector
173  uint8_t nPE; // number of processing units
174  int32_t * pRes; // pointer to result vector
176 
186 typedef struct {
187  const int16_t * pSrcA; // pointer to the first vector
188  uint32_t srcALen;
189  const int16_t * pSrcB; // pointer to the second vector
190  uint32_t srcBLen; // number of samples in each vector
191  uint8_t nPE; // number of processing units
192  int32_t * pRes; // pointer to result vector
194 
204 typedef struct {
205  const int8_t * pSrcA; // pointer to the first vector
206  uint32_t srcALen;
207  const int8_t * pSrcB; // pointer to the second vector
208  uint32_t srcBLen; // number of samples in each vector
209  uint8_t nPE; // number of processing units
210  int32_t * pRes; // pointer to result vector
212 
223 typedef struct{
224  uint32_t addOffset;
225  uint32_t addLengthfirst;
226  uint32_t addLengthsecond;
227  uint32_t numVectors;
228  uint32_t blockOffset;
229  int32_t* pRes;
230  uint8_t coresPerVector;
232 
247 typedef struct{
248  uint32_t FFTLength;
249  uint8_t bitReverseFlag;
250  const float32_t * pTwiddleFactors;
251  const uint16_t * pBitReverseLUT;
253 
254 typedef struct{
256  const float32_t * pSrc;
257  const uint32_t nPE;
258  float32_t * pDst;
260 
261 typedef struct{
262  float32_t re;
263  float32_t im;
265 
269 typedef struct
270 {
271  const int8_t * __restrict__ pSrcA;
272  const int8_t * __restrict__ pSrcB;
273  uint32_t M;
274  uint32_t N;
275  uint32_t O;
276  uint32_t nPE;
277  int32_t * __restrict__ pDstC;
279 
280 
281 
285 typedef struct
286 {
287  const int16_t * __restrict__ pSrcA;
288  const int16_t * __restrict__ pSrcB;
289  uint32_t M;
290  uint32_t N;
291  uint32_t O;
292  uint32_t nPE;
293  int32_t * __restrict__ pDstC;
295 
296 
297 
301 typedef struct
302 {
303  const int32_t * __restrict__ pSrcA;
304  const int32_t * __restrict__ pSrcB;
305  uint32_t M;
306  uint32_t N;
307  uint32_t O;
308  uint32_t nPE;
309  int32_t * __restrict__ pDstC;
311 
312 
313 
325  const int32_t * __restrict__ pSrcA,
326  const int32_t * __restrict__ pSrcB,
327  uint32_t blockSize,
328  uint32_t nPE,
329  int32_t * __restrict__ pRes);
330 
331 
332 
345  const int32_t * __restrict__ pSrcA,
346  const int32_t * __restrict__ pSrcB,
347  uint32_t blockSize,
348  uint32_t deciPoint,
349  uint32_t nPE,
350  int32_t * __restrict__ pRes);
351 
352 
353 
363 void plp_dot_prod_i32p_xpulpv2(void * S);
364 
365 
366 
377 void plp_dot_prod_q32p_xpulpv2(void * S);
378 
379 
380 
388 void plp_dot_prod_i32(
389  const int32_t * __restrict__ pSrcA,
390  const int32_t * __restrict__ pSrcB,
391  uint32_t blockSize,
392  int32_t * __restrict__ pRes);
393 
394 
395 
404  const int32_t * __restrict__ pSrcA,
405  const int32_t * __restrict__ pSrcB,
406  uint32_t blockSize,
407  int32_t * __restrict__ pRes);
408 
409 
410 
419  const int32_t * __restrict__ pSrcA,
420  const int32_t * __restrict__ pSrcB,
421  uint32_t blockSize,
422  int32_t * __restrict__ pRes);
423 
424 
425 
436 void plp_dot_prod_q32(
437  const int32_t * __restrict__ pSrcA,
438  const int32_t * __restrict__ pSrcB,
439  uint32_t blockSize,
440  uint32_t deciPoint,
441  int32_t * __restrict__ pRes);
442 
443 
444 
456  const int32_t * __restrict__ pSrcA,
457  const int32_t * __restrict__ pSrcB,
458  uint32_t blockSize,
459  uint32_t deciPoint,
460  int32_t * __restrict__ pRes);
461 
462 
463 
475  const int32_t * __restrict__ pSrcA,
476  const int32_t * __restrict__ pSrcB,
477  uint32_t blockSize,
478  uint32_t deciPoint,
479  int32_t * __restrict__ pRes);
480 
481 
482 
493 void plp_dot_prod_i16(
494  const int16_t * pSrcA,
495  const int16_t * pSrcB,
496  uint32_t blockSize,
497  int32_t * __restrict__ pRes);
498 
499 
500 
514  const int16_t * __restrict__ pSrcA,
515  const int16_t * __restrict__ pSrcB,
516  uint32_t blockSize,
517  int32_t * __restrict__ pRes);
518 
519 
520 
534  const int16_t * __restrict__ pSrcA,
535  const int16_t * __restrict__ pSrcB,
536  uint32_t blockSize,
537  int32_t * __restrict__ pRes);
538 
539 
540 
554 void plp_dot_prod_q16(
555  const int16_t * __restrict__ pSrcA,
556  const int16_t * __restrict__ pSrcB,
557  uint32_t blockSize,
558  uint32_t deciPoint,
559  int32_t * __restrict__ pRes);
560 
561 
562 
577  const int16_t * __restrict__ pSrcA,
578  const int16_t * __restrict__ pSrcB,
579  uint32_t blockSize,
580  uint32_t deciPoint,
581  int32_t * __restrict__ pRes);
582 
583 
584 
599  const int16_t * __restrict__ pSrcA,
600  const int16_t * __restrict__ pSrcB,
601  uint32_t blockSize,
602  uint32_t deciPoint,
603  int32_t * __restrict__ pRes);
604 
605 
606 
619 void plp_dot_prod_i8(
620  const int8_t * __restrict__ pSrcA,
621  const int8_t * __restrict__ pSrcB,
622  uint32_t blockSize,
623  int32_t * __restrict__ pRes);
624 
625 
626 
640  const int8_t * __restrict__ pSrcA,
641  const int8_t * __restrict__ pSrcB,
642  uint32_t blockSize,
643  int32_t * __restrict__ pRes);
644 
645 
646 
660  const int8_t * __restrict__ pSrcA,
661  const int8_t * __restrict__ pSrcB,
662  uint32_t blockSize,
663  int32_t * __restrict__ pRes);
664 
665 
666 
680 void plp_dot_prod_q8(
681  const int8_t * __restrict__ pSrcA,
682  const int8_t * __restrict__ pSrcB,
683  uint32_t blockSize,
684  uint32_t deciPoint,
685  int32_t * __restrict__ pRes);
686 
687 
688 
703  const int8_t * __restrict__ pSrcA,
704  const int8_t * __restrict__ pSrcB,
705  uint32_t blockSize,
706  uint32_t deciPoint,
707  int32_t * __restrict__ pRes);
708 
709 
710 
725  const int8_t * __restrict__ pSrcA,
726  const int8_t * __restrict__ pSrcB,
727  uint32_t blockSize,
728  uint32_t deciPoint,
729  int32_t * __restrict__ pRes);
738 void plp_fill_i32(
739  int32_t value,
740  int32_t * __restrict__ pDst,
741  uint32_t blockSize);
742 
743 
744 
754  int32_t value,
755  int32_t * __restrict__ pDst,
756  uint32_t blockSize);
757 
758 
759 
769  int32_t value,
770  int32_t * __restrict__ pDst,
771  uint32_t blockSize);
772 
773 
774 
783 void plp_copy_i32(
784  int32_t * __restrict__ pSrc,
785  int32_t * __restrict__ pDst,
786  uint32_t blockSize);
787 
788 
789 
799  int32_t * __restrict__ pSrc,
800  int32_t * __restrict__ pDst,
801  uint32_t blockSize);
802 
803 
804 
814  int32_t * __restrict__ pSrc,
815  int32_t * __restrict__ pDst,
816  uint32_t blockSize);
817 
818 
819 
828 void plp_mean_i32(
829  const int32_t * __restrict__ pSrc,
830  uint32_t blockSize,
831  int32_t * __restrict__ pRes);
832 
833 
834 
844  const int32_t * __restrict__ pSrc,
845  uint32_t blockSize,
846  int32_t * __restrict__ pRes);
847 
848 
849 
859  const int32_t * __restrict__ pSrc,
860  uint32_t blockSize,
861  int32_t * __restrict__ pRes);
862 
873 void plp_conv_i32(
874  const int32_t * pSrcA,
875  const uint32_t srcALen,
876  const int32_t * pSrcB,
877  const uint32_t srcBLen,
878  int32_t * pRes);
879 
890 void plp_conv_i32s_rv32im(const int32_t * pSrcA,
891  const uint32_t srcALen,
892  const int32_t * pSrcB,
893  const uint32_t srcBLen,
894  int32_t * pRes);
895 
905 void plp_conv_i32s_xpulpv2(const int32_t * __restrict__ pSrcA,
906  const uint32_t srcALen,
907  const int32_t * __restrict__ pSrcB,
908  const uint32_t srcBLen,
909  int32_t * __restrict__ pRes);
910 
911 
922 void plp_conv_i16(const int16_t * pSrcA,
923  const uint32_t srcALen,
924  const int16_t * pSrcB,
925  const uint32_t srcBLen,
926  int32_t * pRes);
927 
938 void plp_conv_i16s_xpulpv2(const int16_t * pSrcA,
939  const uint32_t srcALen,
940  const int16_t * pSrcB,
941  const uint32_t srcBLen,
942  int32_t * pRes);
943 
954 void plp_conv_i16s_rv32im(const int16_t * pSrcA,
955  const uint32_t srcALen,
956  const int16_t * pSrcB,
957  const uint32_t srcBLen,
958  int32_t * pRes);
959 
970 void plp_conv_i8(const int8_t * pSrcA,
971  const uint32_t srcALen,
972  const int8_t * pSrcB,
973  const uint32_t srcBLen,
974  int32_t * pRes);
975 
986 void plp_conv_i8s_xpulpv2(const int8_t * pSrcA,
987  const uint32_t srcALen,
988  const int8_t * pSrcB,
989  const uint32_t srcBLen,
990  int32_t * pRes);
991 
1002 void plp_conv_i8s_rv32im(const int8_t * pSrcA,
1003  const uint32_t srcALen,
1004  const int8_t * pSrcB,
1005  const uint32_t srcBLen,
1006  int32_t * pRes);
1007 
1020  const int32_t * pSrcA,
1021  const uint32_t srcALen,
1022  const int32_t * pSrcB,
1023  const uint32_t srcBLen,
1024  const uint8_t nPE,
1025  int32_t * pRes);
1026 
1033 void plp_conv_i32p_xpulpv2(void* task_args);
1034 
1047  const int16_t * pSrcA,
1048  const uint32_t srcALen,
1049  const int16_t * pSrcB,
1050  const uint32_t srcBLen,
1051  const uint8_t nPE,
1052  int32_t * pRes);
1059 void plp_conv_i16p_xpulpv2(void* task_args);
1060 
1073  const int8_t * pSrcA,
1074  const uint32_t srcALen,
1075  const int8_t * pSrcB,
1076  const uint32_t srcBLen,
1077  const uint8_t nPE,
1078  int32_t * pRes);
1085 void plp_conv_i8p_xpulpv2(void* task_args);
1086 
1096 void plp_conv_parallel_OLA(uint32_t nPE, uint32_t srcALen, uint32_t srcBLen, int32_t* resultsBuffer);
1097 
1103 void plp_conv_parallel_OLA_kernel(void* task_args);
1104 
1105 
1117 void plp_mat_mult_i32(
1118  const int32_t * __restrict__ pSrcA,
1119  const int32_t * __restrict__ pSrcB,
1120  uint32_t M,
1121  uint32_t N,
1122  uint32_t O,
1123  int32_t * __restrict__ pDstC);
1124 
1125 
1126 
1127 
1140  const int32_t * __restrict__ pSrcA,
1141  const int32_t * __restrict__ pSrcB,
1142  uint32_t M,
1143  uint32_t N,
1144  uint32_t O,
1145  int32_t * __restrict__ pDstC);
1146 
1147 
1148 
1161  const int32_t * __restrict__ pSrcA,
1162  const int32_t * __restrict__ pSrcB,
1163  uint32_t M,
1164  uint32_t N,
1165  uint32_t O,
1166  int32_t * __restrict__ pDstC);
1167 
1168 
1169 
1181 void plp_mat_mult_i16(
1182  const int16_t * __restrict__ pSrcA,
1183  const int16_t * __restrict__ pSrcB,
1184  uint32_t M,
1185  uint32_t N,
1186  uint32_t O,
1187  int32_t * __restrict__ pDstC);
1188 
1189 
1190 
1203  const int16_t * __restrict__ pSrcA,
1204  const int16_t * __restrict__ pSrcB,
1205  uint32_t M,
1206  uint32_t N,
1207  uint32_t O,
1208  int32_t * __restrict__ pDstC);
1209 
1210 
1211 
1227  const int16_t * __restrict__ pSrcA,
1228  const int16_t * __restrict__ pSrcB,
1229  uint32_t M,
1230  uint32_t N,
1231  uint32_t O,
1232  int32_t * __restrict__ pDstC);
1233 
1234 
1235 
1247 void plp_mat_mult_i8(
1248  const int8_t * __restrict__ pSrcA,
1249  const int8_t * __restrict__ pSrcB,
1250  uint32_t M,
1251  uint32_t N,
1252  uint32_t O,
1253  int32_t * __restrict__ pDstC);
1254 
1255 
1256 
1269  const int8_t * __restrict__ pSrcA,
1270  const int8_t * __restrict__ pSrcB,
1271  uint32_t M,
1272  uint32_t N,
1273  uint32_t O,
1274  int32_t * __restrict__ pDstC);
1275 
1276 
1277 
1293  const int8_t * __restrict__ pSrcA,
1294  const int8_t * __restrict__ pSrcB,
1295  uint32_t M,
1296  uint32_t N,
1297  uint32_t O,
1298  int32_t * __restrict__ pDstC);
1299 
1300 
1301 
1315  const int32_t * __restrict__ pSrcA,
1316  const int32_t * __restrict__ pSrcB,
1317  uint32_t M,
1318  uint32_t N,
1319  uint32_t O,
1320  uint32_t nPE,
1321  int32_t * __restrict__ pDstC);
1322 
1323 
1324 
1332  void* args);
1333 
1334 
1348  const int16_t * __restrict__ pSrcA,
1349  const int16_t * __restrict__ pSrcB,
1350  uint32_t M,
1351  uint32_t N,
1352  uint32_t O,
1353  uint32_t nPE,
1354  int32_t * __restrict__ pDstC);
1355 
1356 
1357 
1368  void* args);
1369 
1370 
1371 
1385  const int8_t * __restrict__ pSrcA,
1386  const int8_t * __restrict__ pSrcB,
1387  uint32_t M,
1388  uint32_t N,
1389  uint32_t O,
1390  uint32_t nPE,
1391  int32_t * __restrict__ pDstC);
1392 
1393 
1394 
1405  void* args);
1406 
1419  const int32_t * __restrict__ pSrcA,
1420  const int32_t * __restrict__ pSrcB,
1421  uint32_t M,
1422  uint32_t N,
1423  uint32_t O,
1424  int32_t * __restrict__ pDstC);
1425 
1426 
1427 
1440  const int32_t * __restrict__ pSrcA,
1441  const int32_t * __restrict__ pSrcB,
1442  uint32_t M,
1443  uint32_t N,
1444  uint32_t O,
1445  int32_t * __restrict__ pDstC);
1446 
1447 
1448 
1461  const int32_t * __restrict__ pSrcA,
1462  const int32_t * __restrict__ pSrcB,
1463  uint32_t M,
1464  uint32_t N,
1465  uint32_t O,
1466  int32_t * __restrict__ pDstC);
1467 
1468 
1469 
1482  const int16_t * __restrict__ pSrcA,
1483  const int16_t * __restrict__ pSrcB,
1484  uint32_t M,
1485  uint32_t N,
1486  uint32_t O,
1487  int32_t * __restrict__ pDstC);
1488 
1489 
1490 
1503  const int16_t * __restrict__ pSrcA,
1504  const int16_t * __restrict__ pSrcB,
1505  uint32_t M,
1506  uint32_t N,
1507  uint32_t O,
1508  int32_t * __restrict__ pDstC);
1509 
1510 
1511 
1527  const int16_t * __restrict__ pSrcA,
1528  const int16_t * __restrict__ pSrcB,
1529  uint32_t M,
1530  uint32_t N,
1531  uint32_t O,
1532  int32_t * __restrict__ pDstC);
1533 
1534 
1535 
1548  const int8_t * __restrict__ pSrcA,
1549  const int8_t * __restrict__ pSrcB,
1550  uint32_t M,
1551  uint32_t N,
1552  uint32_t O,
1553  int32_t * __restrict__ pDstC);
1554 
1555 
1556 
1569  const int8_t * __restrict__ pSrcA,
1570  const int8_t * __restrict__ pSrcB,
1571  uint32_t M,
1572  uint32_t N,
1573  uint32_t O,
1574  int32_t * __restrict__ pDstC);
1575 
1576 
1577 
1593  const int8_t * __restrict__ pSrcA,
1594  const int8_t * __restrict__ pSrcB,
1595  uint32_t M,
1596  uint32_t N,
1597  uint32_t O,
1598  int32_t * __restrict__ pDstC);
1599 
1600 
1601 
1615  const int32_t * __restrict__ pSrcA,
1616  const int32_t * __restrict__ pSrcB,
1617  uint32_t M,
1618  uint32_t N,
1619  uint32_t O,
1620  uint32_t nPE,
1621  int32_t * __restrict__ pDstC);
1622 
1623 
1624 
1632  void* args);
1633 
1634 
1648  const int16_t * __restrict__ pSrcA,
1649  const int16_t * __restrict__ pSrcB,
1650  uint32_t M,
1651  uint32_t N,
1652  uint32_t O,
1653  uint32_t nPE,
1654  int32_t * __restrict__ pDstC);
1655 
1656 
1657 
1668  void* args);
1669 
1670 
1671 
1685  const int8_t * __restrict__ pSrcA,
1686  const int8_t * __restrict__ pSrcB,
1687  uint32_t M,
1688  uint32_t N,
1689  uint32_t O,
1690  uint32_t nPE,
1691  int32_t * __restrict__ pDstC);
1692 
1693 
1694 
1705  void* args);
1706 
1714 void plp_rfft_f32(
1715  const plp_rfft_instance_f32 *S,
1716  const float32_t * __restrict__ pSrc,
1717  float32_t * __restrict__ pDst);
1718 
1719 
1729  const plp_rfft_instance_f32 *S,
1730  const float32_t * __restrict__ pSrc,
1731  const uint32_t nPE,
1732  float32_t * __restrict__ pDst);
1733 
1742  const plp_rfft_instance_f32 *S,
1743  const float32_t * __restrict__ pSrc,
1744  float32_t * __restrict__ pDst);
1745 
1752 
1753 
1754 #endif // __PLP_MATH_H__
void plp_mat_mult_i32(const int32_t *__restrict__ pSrcA, const int32_t *__restrict__ pSrcB, uint32_t M, uint32_t N, uint32_t O, int32_t *__restrict__ pDstC)
Glue code for matrix matrix multiplication of a 32-bit integer matrices.
Definition: plp_mat_mult_i32.c:82
Instance structure for basic integer convolution.
Definition: plp_math.h:168
void plp_dot_prod_q16v_xpulpv2(const int16_t *__restrict__ pSrcA, const int16_t *__restrict__ pSrcB, uint32_t blockSize, uint32_t deciPoint, int32_t *__restrict__ pRes)
Vectorized dot product of 16-bit fixed point vectors kernel for XPULPV2 extension.
Definition: plp_dot_prod_q16v_xpulpv2.c:57
void plp_copy_i32s_xpulpv2(int32_t *__restrict__ pSrc, int32_t *__restrict__ pDst, uint32_t blockSize)
Copies the elements of a 32-bit integer vector for XPULPV2 extension.
Definition: plp_copy_i32s_xpulpv2.c:50
void plp_conv_i8(const int8_t *pSrcA, const uint32_t srcALen, const int8_t *pSrcB, const uint32_t srcBLen, int32_t *pRes)
Glue code for convolution of 8-bit integer vectors.
Definition: plp_conv_i8.c:55
void plp_mat_mult_trans_i32s_rv32im(const int32_t *__restrict__ pSrcA, const int32_t *__restrict__ pSrcB, uint32_t M, uint32_t N, uint32_t O, int32_t *__restrict__ pDstC)
Matrix transposed matrix multiplication of a 32-bit integer matrices for RV32IM extension.
Definition: plp_mat_mult_trans_i32s_rv32im.c:112
void plp_rfft_f32_xpulpv2_parallel(plp_rfft_parallel_arg_f32 *arg)
Floating-point FFT on real input data for XPULPV2 extension (parallel version).
Definition: plp_rfft_f32_xpulpv2.c:179
void plp_conv_i8s_rv32im(const int8_t *pSrcA, const uint32_t srcALen, const int8_t *pSrcB, const uint32_t srcBLen, int32_t *pRes)
Convolution of 8-bit integer vectors kernel for RV32IM extension.
Definition: plp_conv_i8s_rv32im.c:56
void plp_dot_prod_q8s_rv32im(const int8_t *__restrict__ pSrcA, const int8_t *__restrict__ pSrcB, uint32_t blockSize, uint32_t deciPoint, int32_t *__restrict__ pRes)
Scalar dot product of 8-bit fixed point vectors kernel for RV32IM extension.
Definition: plp_dot_prod_q8s_rv32im.c:56
void plp_mat_mult_i16v_xpulpv2(const int16_t *__restrict__ pSrcA, const int16_t *__restrict__ pSrcB, uint32_t M, uint32_t N, uint32_t O, int32_t *__restrict__ pDstC)
Matrix matrix multiplication of a 16-bit integer matrices for XPULPV2 extension.
Definition: plp_mat_mult_i16v_xpulpv2.c:84
void plp_dot_prod_q32p_xpulpv2(void *S)
Scalar dot product with interleaved access of 32-bit fixed point vectors kernel for XPULPV2 extension...
Definition: plp_dot_prod_q32p_xpulpv2.c:50
void plp_dot_prod_i16v_xpulpv2(const int16_t *__restrict__ pSrcA, const int16_t *__restrict__ pSrcB, uint32_t blockSize, int32_t *__restrict__ pRes)
Vectorized dot product of 16-bit integer vectors kernel for XPULPV2 extension.
Definition: plp_dot_prod_i16v_xpulpv2.c:56
void plp_mat_mult_trans_i16_parallel(const int16_t *__restrict__ pSrcA, const int16_t *__restrict__ pSrcB, uint32_t M, uint32_t N, uint32_t O, uint32_t nPE, int32_t *__restrict__ pDstC)
Glue code for parallel matrix transposed matrix multiplication of a 16-bit integer matrices...
Definition: plp_mat_mult_trans_i16_parallel.c:55
void plp_mat_mult_trans_i16v_xpulpv2(const int16_t *__restrict__ pSrcA, const int16_t *__restrict__ pSrcB, uint32_t M, uint32_t N, uint32_t O, int32_t *__restrict__ pDstC)
Matrix transposed matrix multiplication of a 16-bit integer matrices for XPULPV2 extension.
Definition: plp_mat_mult_trans_i16v_xpulpv2.c:112
void plp_conv_i8_parallel(const int8_t *pSrcA, const uint32_t srcALen, const int8_t *pSrcB, const uint32_t srcBLen, const uint8_t nPE, int32_t *pRes)
Glue code for parallel convolution of 8-bit integer vectors.
Definition: plp_conv_i8_parallel.c:56
void plp_mat_mult_trans_i8vp_xpulpv2(void *args)
Parallel matrix transposed matrix multiplication of a 8-bit integer matrices for XPULPV2 extension...
Definition: plp_mat_mult_trans_i8vp_xpulpv2.c:120
Instance structure for integer parallel matrix multiplication.
Definition: plp_math.h:285
void plp_mat_mult_trans_i8(const int8_t *__restrict__ pSrcA, const int8_t *__restrict__ pSrcB, uint32_t M, uint32_t N, uint32_t O, int32_t *__restrict__ pDstC)
Glue code for matrix transposed matrix multiplication of a 8-bit integer matrices.
Definition: plp_mat_mult_trans_i8.c:54
void plp_rfft_f32_xpulpv2(const plp_rfft_instance_f32 *S, const float32_t *__restrict__ pSrc, float32_t *__restrict__ pDst)
Floating-point FFT on real input data for XPULPV2 extension.
Definition: plp_rfft_f32_xpulpv2.c:65
void plp_dot_prod_i8v_xpulpv2(const int8_t *__restrict__ pSrcA, const int8_t *__restrict__ pSrcB, uint32_t blockSize, int32_t *__restrict__ pRes)
Vectorized dot product of 8-bit integer vectors kernel for XPULPV2 extension.
Definition: plp_dot_prod_i8v_xpulpv2.c:56
void plp_copy_i32s_rv32im(int32_t *__restrict__ pSrc, int32_t *__restrict__ pDst, uint32_t blockSize)
Copies the elements of a 32-bit integer vector for RV32IM extension.
Definition: plp_copy_i32s_rv32im.c:74
void plp_dot_prod_q32(const int32_t *__restrict__ pSrcA, const int32_t *__restrict__ pSrcB, uint32_t blockSize, uint32_t deciPoint, int32_t *__restrict__ pRes)
Glue code for dot product of 32-bit fixed point vectors.
Definition: plp_dot_prod_q32.c:53
void plp_mat_mult_trans_i32s_xpulpv2(const int32_t *__restrict__ pSrcA, const int32_t *__restrict__ pSrcB, uint32_t M, uint32_t N, uint32_t O, int32_t *__restrict__ pDstC)
Matrix transposed matrix multiplication of a 32-bit integer matrices for XPULPV2 extension.
Definition: plp_mat_mult_trans_i32s_xpulpv2.c:112
void plp_mat_mult_trans_i16vp_xpulpv2(void *args)
Parallel matrix transposed matrix multiplication of a 16-bit integer matrices for XPULPV2 extension...
Definition: plp_mat_mult_trans_i16vp_xpulpv2.c:120
void plp_conv_parallel_OLA(uint32_t nPE, uint32_t srcALen, uint32_t srcBLen, int32_t *resultsBuffer)
Helper function for parallelized overlap-adding of partial convolution results.
Definition: plp_conv_parallel_OLA.c:52
Definition: plp_math.h:254
void plp_mat_mult_trans_i16(const int16_t *__restrict__ pSrcA, const int16_t *__restrict__ pSrcB, uint32_t M, uint32_t N, uint32_t O, int32_t *__restrict__ pDstC)
Glue code for matrix transposed matrix multiplication of a 16-bit integer matrices.
Definition: plp_mat_mult_trans_i16.c:54
void plp_dot_prod_q32s_xpulpv2(const int32_t *__restrict__ pSrcA, const int32_t *__restrict__ pSrcB, uint32_t blockSize, uint32_t deciPoint, int32_t *__restrict__ pRes)
Scalar dot product of 32-bit fixed point vectors kernel for XPULPV2 extension.
Definition: plp_dot_prod_q32s_xpulpv2.c:54
Instance structure for basic integer convolution.
Definition: plp_math.h:186
void plp_conv_i16s_rv32im(const int16_t *pSrcA, const uint32_t srcALen, const int16_t *pSrcB, const uint32_t srcBLen, int32_t *pRes)
Convolution of 16-bit integer vectors kernel for RV32IM extension.
Definition: plp_conv_i16s_rv32im.c:56
void plp_conv_i8s_xpulpv2(const int8_t *pSrcA, const uint32_t srcALen, const int8_t *pSrcB, const uint32_t srcBLen, int32_t *pRes)
Convolution of 8-bit integer vectors kernel for XPULPV2 extension.
Definition: plp_conv_i8s_xpulpv2.c:60
Instance structure for integer parallel dot product.
Definition: plp_math.h:132
Instance structure for basic integer convolution.
Definition: plp_math.h:204
void plp_dot_prod_q32s_rv32im(const int32_t *__restrict__ pSrcA, const int32_t *__restrict__ pSrcB, uint32_t blockSize, uint32_t deciPoint, int32_t *__restrict__ pRes)
Scalar dot product of 32-bit fixed point vectors kernel for RV32IM extension.
Definition: plp_dot_prod_q32s_rv32im.c:53
void plp_dot_prod_i32(const int32_t *__restrict__ pSrcA, const int32_t *__restrict__ pSrcB, uint32_t blockSize, int32_t *__restrict__ pRes)
Glue code for dot product of 32-bit integer vectors.
Definition: plp_dot_prod_i32.c:80
void plp_rfft_f32_parallel(const plp_rfft_instance_f32 *S, const float32_t *__restrict__ pSrc, const uint32_t nPE, float32_t *__restrict__ pDst)
Floating-point FFT on real input data (parallel version).
Definition: plp_rfft_f32_parallel.c:58
void plp_conv_i16(const int16_t *pSrcA, const uint32_t srcALen, const int16_t *pSrcB, const uint32_t srcBLen, int32_t *pRes)
Glue code for convolution of 16-bit integer vectors.
Definition: plp_conv_i16.c:55
void plp_conv_i16_parallel(const int16_t *pSrcA, const uint32_t srcALen, const int16_t *pSrcB, const uint32_t srcBLen, const uint8_t nPE, int32_t *pRes)
Glue code for parallel convolution of 16-bit integer vectors.
Definition: plp_conv_i16_parallel.c:56
void plp_mat_mult_i8(const int8_t *__restrict__ pSrcA, const int8_t *__restrict__ pSrcB, uint32_t M, uint32_t N, uint32_t O, int32_t *__restrict__ pDstC)
Glue code for matrix matrix multiplication of a 8-bit integer matrices.
Definition: plp_mat_mult_i8.c:55
void plp_mat_mult_trans_i8_parallel(const int8_t *__restrict__ pSrcA, const int8_t *__restrict__ pSrcB, uint32_t M, uint32_t N, uint32_t O, uint32_t nPE, int32_t *__restrict__ pDstC)
Glue code for parallel matrix transposed matrix multiplication of a 8-bit integer matrices...
Definition: plp_mat_mult_trans_i8_parallel.c:55
void plp_mat_mult_i8vp_xpulpv2(void *args)
Parallel matrix multiplication of 8-bit integer matrices kernel for XPULPV2 extension.
Definition: plp_mat_mult_i8vp_xpulpv2.c:92
void plp_mat_mult_i32p_xpulpv2(void *args)
Parallel matrix matrix multiplication of a 32-bit integer matrices for XPULPV2 extension.
Definition: plp_mat_mult_i32p_xpulpv2.c:88
Instance structure for fixed point parallel dot product.
Definition: plp_math.h:149
void plp_mat_mult_trans_i32(const int32_t *__restrict__ pSrcA, const int32_t *__restrict__ pSrcB, uint32_t M, uint32_t N, uint32_t O, int32_t *__restrict__ pDstC)
Glue code for matrix transposed matrix multiplication of a 32-bit integer matrices.
Definition: plp_mat_mult_trans_i32.c:82
void plp_mat_mult_trans_i8v_xpulpv2(const int8_t *__restrict__ pSrcA, const int8_t *__restrict__ pSrcB, uint32_t M, uint32_t N, uint32_t O, int32_t *__restrict__ pDstC)
Matrix transposed matrix multiplication of a 8-bit integer matrices for XPULPV2 extension.
Definition: plp_mat_mult_trans_i8v_xpulpv2.c:112
void plp_dot_prod_i32s_xpulpv2(const int32_t *__restrict__ pSrcA, const int32_t *__restrict__ pSrcB, uint32_t blockSize, int32_t *__restrict__ pRes)
Scalar dot product of 32-bit integer vectors kernel for XPULPV2 extension.
Definition: plp_dot_prod_i32s_xpulpv2.c:53
void plp_dot_prod_i32_parallel(const int32_t *__restrict__ pSrcA, const int32_t *__restrict__ pSrcB, uint32_t blockSize, uint32_t nPE, int32_t *__restrict__ pRes)
Glue code for parallel dot product of 32-bit integer vectors.
Definition: plp_dot_prod_i32_parallel.c:54
void plp_dot_prod_q16s_rv32im(const int16_t *__restrict__ pSrcA, const int16_t *__restrict__ pSrcB, uint32_t blockSize, uint32_t deciPoint, int32_t *__restrict__ pRes)
Scalar dot product of 16-bit fixed point vectors kernel for RV32IM extension.
Definition: plp_dot_prod_q16s_rv32im.c:56
void plp_conv_i32p_xpulpv2(void *task_args)
Setup code for parallel convolution of 32-bit integer vectors.
Definition: plp_conv_i32p_xpulpv2.c:52
void plp_mean_i32s_xpulpv2(const int32_t *__restrict__ pSrc, uint32_t blockSize, int32_t *__restrict__ pRes)
Mean value of a 32-bit integer vector for XPULPV2 extension.
Definition: plp_mean_i32s_xpulpv2.c:52
void plp_conv_i32s_rv32im(const int32_t *pSrcA, const uint32_t srcALen, const int32_t *pSrcB, const uint32_t srcBLen, int32_t *pRes)
Convolution of 32-bit integer vectors kernel for RV32IM extension.
Definition: plp_conv_i32s_rv32im_2.c:56
Instance structure for floating-point FFT.
Definition: plp_math.h:247
void plp_conv_i16p_xpulpv2(void *task_args)
Setup code for parallel convolution of 16-bit integer vectors.
Definition: plp_conv_i16p_xpulpv2.c:53
void plp_conv_i8p_xpulpv2(void *task_args)
Setup code for parallel convolution of 8-bit integer vectors.
Definition: plp_conv_i8p_xpulpv2.c:52
void plp_conv_i32s_xpulpv2(const int32_t *__restrict__ pSrcA, const uint32_t srcALen, const int32_t *__restrict__ pSrcB, const uint32_t srcBLen, int32_t *__restrict__ pRes)
Convolution of 32-bit integer vectors kernel for XPULPV2 extension.
void plp_mat_mult_i8_parallel(const int8_t *__restrict__ pSrcA, const int8_t *__restrict__ pSrcB, uint32_t M, uint32_t N, uint32_t O, uint32_t nPE, int32_t *__restrict__ pDstC)
Glue code for parallel matrix matrix multiplication of a 8-bit integer matrices.
Definition: plp_mat_mult_i8_parallel.c:56
void plp_mat_mult_trans_i8s_rv32im(const int8_t *__restrict__ pSrcA, const int8_t *__restrict__ pSrcB, uint32_t M, uint32_t N, uint32_t O, int32_t *__restrict__ pDstC)
Matrix transposed matrix multiplication of a 8-bit integer matrices for RV32IM extension.
Definition: plp_mat_mult_trans_i8s_rv32im.c:112
void plp_conv_i32_parallel(const int32_t *pSrcA, const uint32_t srcALen, const int32_t *pSrcB, const uint32_t srcBLen, const uint8_t nPE, int32_t *pRes)
Glue code for parallel convolution of 32-bit integer vectors.
Definition: plp_conv_i32_parallel.c:58
void plp_dot_prod_q16(const int16_t *__restrict__ pSrcA, const int16_t *__restrict__ pSrcB, uint32_t blockSize, uint32_t deciPoint, int32_t *__restrict__ pRes)
Glue code for dot product of 16-bit fixed point vectors.
Definition: plp_dot_prod_q16.c:56
void plp_dot_prod_i8s_rv32im(const int8_t *__restrict__ pSrcA, const int8_t *__restrict__ pSrcB, uint32_t blockSize, int32_t *__restrict__ pRes)
Vectorized dot product of 8-bit integer vectors kernel for RV32IM extension.
Definition: plp_dot_prod_i8s_rv32im.c:56
void plp_dot_prod_q8(const int8_t *__restrict__ pSrcA, const int8_t *__restrict__ pSrcB, uint32_t blockSize, uint32_t deciPoint, int32_t *__restrict__ pRes)
Glue code for dot product of 8-bit fixed point vectors.
Definition: plp_dot_prod_q8.c:56
void plp_fill_i32(int32_t value, int32_t *__restrict__ pDst, uint32_t blockSize)
Glue code for filling a constant value into a 32-bit integer vector.
Definition: plp_fill_i32.c:74
void plp_mat_mult_i8v_xpulpv2(const int8_t *__restrict__ pSrcA, const int8_t *__restrict__ pSrcB, uint32_t M, uint32_t N, uint32_t O, int32_t *__restrict__ pDstC)
Matrix matrix multiplication of a 8-bit integer matrices for XPULPV2 extension.
Definition: plp_mat_mult_i8v_xpulpv2.c:89
void plp_mean_i32(const int32_t *__restrict__ pSrc, uint32_t blockSize, int32_t *__restrict__ pRes)
Glue code for mean value of a 32-bit integer vector.
Definition: plp_mean_i32.c:77
void plp_mat_mult_trans_i32_parallel(const int32_t *__restrict__ pSrcA, const int32_t *__restrict__ pSrcB, uint32_t M, uint32_t N, uint32_t O, uint32_t nPE, int32_t *__restrict__ pDstC)
Glue code for parallel matrix matrix multiplication of a 32-bit integer matrices. ...
Definition: plp_mat_mult_trans_i32_parallel.c:55
void plp_mat_mult_i16_parallel(const int16_t *__restrict__ pSrcA, const int16_t *__restrict__ pSrcB, uint32_t M, uint32_t N, uint32_t O, uint32_t nPE, int32_t *__restrict__ pDstC)
Glue code for parallel matrix matrix multiplication of a 16-bit integer matrices. ...
Definition: plp_mat_mult_i16_parallel.c:56
Instance structure for basic integer convolution.
Definition: plp_math.h:223
void plp_conv_i16s_xpulpv2(const int16_t *pSrcA, const uint32_t srcALen, const int16_t *pSrcB, const uint32_t srcBLen, int32_t *pRes)
Convolution of 16-bit integer vectors kernel for XPULPV2 extension.
Definition: plp_conv_i16s_xpulpv2.c:59
void plp_rfft_f32(const plp_rfft_instance_f32 *S, const float32_t *__restrict__ pSrc, float32_t *__restrict__ pDst)
Floating-point FFT on real input data.
Definition: plp_rfft_f32.c:57
void plp_mat_mult_i16s_rv32im(const int16_t *__restrict__ pSrcA, const int16_t *__restrict__ pSrcB, uint32_t M, uint32_t N, uint32_t O, int32_t *__restrict__ pDstC)
Matrix matrix multiplication of a 16-bit integer matrices for RV32IM extension.
Definition: plp_mat_mult_i16s_rv32im.c:112
void plp_conv_i32(const int32_t *pSrcA, const uint32_t srcALen, const int32_t *pSrcB, const uint32_t srcBLen, int32_t *pRes)
Glue code for convolution of 32-bit integer vectors.
void plp_fill_i32s_xpulpv2(int32_t value, int32_t *__restrict__ pDst, uint32_t blockSize)
Fills a constant value into a 32-bit integer vector for XPULPV2 extension.
Definition: plp_fill_i32s_xpulpv2.c:50
void plp_dot_prod_i16s_rv32im(const int16_t *__restrict__ pSrcA, const int16_t *__restrict__ pSrcB, uint32_t blockSize, int32_t *__restrict__ pRes)
Vectorized dot product of 16-bit integer vectors kernel for RV32IM extension.
Definition: plp_dot_prod_i16s_rv32im.c:56
void plp_fill_i32s_rv32im(int32_t value, int32_t *__restrict__ pDst, uint32_t blockSize)
Fills a constant value into a 32-bit integer vector for RV32IM extension.
Definition: plp_fill_i32s_rv32im.c:74
void plp_mat_mult_i32s_rv32im(const int32_t *__restrict__ pSrcA, const int32_t *__restrict__ pSrcB, uint32_t M, uint32_t N, uint32_t O, int32_t *__restrict__ pDstC)
Matrix matrix multiplication of a 32-bit integer matrices for RV32IM extension.
Definition: plp_mat_mult_i32s_rv32im.c:112
Instance structure for integer parallel matrix multiplication.
Definition: plp_math.h:269
void plp_dot_prod_q8v_xpulpv2(const int8_t *__restrict__ pSrcA, const int8_t *__restrict__ pSrcB, uint32_t blockSize, uint32_t deciPoint, int32_t *__restrict__ pRes)
Scalar dot product of 8-bit fixed point vectors kernel for XPULPV2 extension.
Definition: plp_dot_prod_q8v_xpulpv2.c:57
void plp_dot_prod_q32_parallel(const int32_t *__restrict__ pSrcA, const int32_t *__restrict__ pSrcB, uint32_t blockSize, uint32_t deciPoint, uint32_t nPE, int32_t *__restrict__ pRes)
Glue code for parallel dot product of 32-bit fixed point vectors.
Definition: plp_dot_prod_q32_parallel.c:55
void plp_conv_parallel_OLA_kernel(void *task_args)
Helper function for parallelized overlap-adding of partial convolution results.
Definition: plp_conv_parallel_OLA_kernel.c:49
void plp_mat_mult_i32s_xpulpv2(const int32_t *__restrict__ pSrcA, const int32_t *__restrict__ pSrcB, uint32_t M, uint32_t N, uint32_t O, int32_t *__restrict__ pDstC)
Matrix matrix multiplication of a 32-bit integer matrices for XPULPV2 extension.
Definition: plp_mat_mult_i32s_xpulpv2.c:84
void plp_mean_i32s_rv32im(const int32_t *__restrict__ pSrc, uint32_t blockSize, int32_t *__restrict__ pRes)
Mean value of a 32-bit integer vector for RV32IM extension.
Definition: plp_mean_i32s_rv32im.c:76
void plp_mat_mult_trans_i32p_xpulpv2(void *args)
Parallel matrix transposed matrix multiplication of a 32-bit integer matrices for RV32IM extension...
Definition: plp_mat_mult_trans_i32p_xpulpv2.c:116
Definition: plp_math.h:261
void plp_mat_mult_i32_parallel(const int32_t *__restrict__ pSrcA, const int32_t *__restrict__ pSrcB, uint32_t M, uint32_t N, uint32_t O, uint32_t nPE, int32_t *__restrict__ pDstC)
Glue code for parallel matrix matrix multiplication of a 32-bit integer matrices. ...
Definition: plp_mat_mult_i32_parallel.c:56
void plp_dot_prod_i32s_rv32im(const int32_t *__restrict__ pSrcA, const int32_t *__restrict__ pSrcB, uint32_t blockSize, int32_t *__restrict__ pRes)
Scalar dot product of 32-bit integer vectors kernel for RV32IM extension.
Definition: plp_dot_prod_i32s_rv32im.c:78
Instance structure for integer parallel matrix multiplication.
Definition: plp_math.h:301
void plp_mat_mult_i16vp_xpulpv2(void *args)
Parallel matrix multiplication of 16-bit integer matrices kernel for XPULPV2 extension.
Definition: plp_mat_mult_i16vp_xpulpv2.c:89
void plp_mat_mult_i8s_rv32im(const int8_t *__restrict__ pSrcA, const int8_t *__restrict__ pSrcB, uint32_t M, uint32_t N, uint32_t O, int32_t *__restrict__ pDstC)
Matrix matrix multiplication of a 8-bit integer matrices for RV32IM extension.
Definition: plp_mat_mult_i8s_rv32im.c:112
void plp_dot_prod_i32p_xpulpv2(void *S)
Scalar dot product with interleaved access of 32-bit integer vectors kernel for XPULPV2 extension...
Definition: plp_dot_prod_i32p_xpulpv2.c:50
void plp_copy_i32(int32_t *__restrict__ pSrc, int32_t *__restrict__ pDst, uint32_t blockSize)
Glue code for copying the elements of a 32-bit integer vector.
Definition: plp_copy_i32.c:74
void plp_mat_mult_trans_i16s_rv32im(const int16_t *__restrict__ pSrcA, const int16_t *__restrict__ pSrcB, uint32_t M, uint32_t N, uint32_t O, int32_t *__restrict__ pDstC)
Matrix transposed matrix multiplication of a 16-bit integer matrices for RV32IM extension.
Definition: plp_mat_mult_trans_i16s_rv32im.c:112
void plp_dot_prod_i16(const int16_t *pSrcA, const int16_t *pSrcB, uint32_t blockSize, int32_t *__restrict__ pRes)
Glue code for dot product of 16-bit integer vectors.
void plp_mat_mult_i16(const int16_t *__restrict__ pSrcA, const int16_t *__restrict__ pSrcB, uint32_t M, uint32_t N, uint32_t O, int32_t *__restrict__ pDstC)
Glue code for matrix matrix multiplication of a 16-bit integer matrices.
Definition: plp_mat_mult_i16.c:54
void plp_dot_prod_i8(const int8_t *__restrict__ pSrcA, const int8_t *__restrict__ pSrcB, uint32_t blockSize, int32_t *__restrict__ pRes)
Glue code for dot product of 8-bit integer vectors.
Definition: plp_dot_prod_i8.c:56