PULP DSP  Version 1.0
Digital Signal Processing library for PULP processors (pulp-platform.org)
 All Classes Files Functions Groups Pages
plp_math.h
Go to the documentation of this file.
1 
7 /*
8  * Copyright (C) 2019 ETH Zurich and University of Bologna. All rights reserved.
9  *
10  * SPDX-License-Identifier: Apache-2.0
11  *
12  * Licensed under the Apache License, Version 2.0 (the License); you may
13  * not use this file except in compliance with the License.
14  * You may obtain a copy of the License at
15  *
16  * www.apache.org/licenses/LICENSE-2.0
17  *
18  * Unless required by applicable law or agreed to in writing, software
19  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
20  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21  * See the License for the specific language governing permissions and
22  * limitations under the License.
23  */
24 
110 #ifndef __PLP_MATH_H__
111 #define __PLP_MATH_H__
112 
113 #include "rt/rt_api.h"
114 #include "math.h"
115 
116 typedef float float32_t;
117 
118 #define PLP_MATH_IBEX // previously called zero-riscy
119 //#define PLP_MATH_RISCY
120 #define PLP_MATH_LOOPUNROLL
121 
122 
132 typedef struct
133 {
134  int32_t * pSrcA; // pointer to the first vector
135  int32_t * pSrcB; // pointer to the second vector
136  uint32_t blkSizePE; // number of samples in each vector
137  uint32_t nPE; // number of processing units
138  int32_t * resBuffer; // pointer to result vector
140 
149 typedef struct
150 {
151  int32_t * pSrcA; // pointer to the first vector
152  int32_t * pSrcB; // pointer to the second vector
153  uint32_t blkSizePE; // number of samples in each vector
154  uint32_t deciPoint; // decimal point for right shift
155  uint32_t nPE; // number of processing units
156  int32_t * resBuffer; // pointer to result vector
158 
168 typedef struct
169 {
170  float32_t * pSrcA; // pointer to the first vector
171  float32_t * pSrcB; // pointer to the second vector
172  uint32_t blkSizePE; // number of samples in each vector
173  uint32_t nPE; // number of processing units
174  float32_t * resBuffer; // pointer to result vector
176 
186 typedef struct {
187  const int32_t * pSrcA; // pointer to the first vector
188  uint32_t srcALen;
189  const int32_t * pSrcB; // pointer to the second vector
190  uint32_t srcBLen; // number of samples in each vector
191  uint8_t nPE; // number of processing units
192  int32_t * pRes; // pointer to result vector
194 
204 typedef struct {
205  const int16_t * pSrcA; // pointer to the first vector
206  uint32_t srcALen;
207  const int16_t * pSrcB; // pointer to the second vector
208  uint32_t srcBLen; // number of samples in each vector
209  uint8_t nPE; // number of processing units
210  int32_t * pRes; // pointer to result vector
212 
222 typedef struct {
223  const int8_t * pSrcA; // pointer to the first vector
224  uint32_t srcALen;
225  const int8_t * pSrcB; // pointer to the second vector
226  uint32_t srcBLen; // number of samples in each vector
227  uint8_t nPE; // number of processing units
228  int32_t * pRes; // pointer to result vector
230 
241 typedef struct{
242  uint32_t addOffset;
243  uint32_t addLengthfirst;
244  uint32_t addLengthsecond;
245  uint32_t numVectors;
246  uint32_t blockOffset;
247  int32_t* pRes;
248  uint8_t coresPerVector;
250 
265 typedef struct{
266  uint32_t FFTLength;
267  uint8_t bitReverseFlag;
268  const float32_t * pTwiddleFactors;
269  const uint16_t * pBitReverseLUT;
271 
272 typedef struct{
274  const float32_t * pSrc;
275  const uint32_t nPE;
276  float32_t * pDst;
278 
279 typedef struct{
280  float32_t re;
281  float32_t im;
283 
287 typedef struct
288 {
289  const int8_t * __restrict__ pSrcA;
290  const int8_t * __restrict__ pSrcB;
291  uint32_t M;
292  uint32_t N;
293  uint32_t O;
294  uint32_t nPE;
295  int32_t * __restrict__ pDstC;
297 
298 
299 
303 typedef struct
304 {
305  const int16_t * __restrict__ pSrcA;
306  const int16_t * __restrict__ pSrcB;
307  uint32_t M;
308  uint32_t N;
309  uint32_t O;
310  uint32_t nPE;
311  int32_t * __restrict__ pDstC;
313 
314 
315 
319 typedef struct
320 {
321  const int32_t * __restrict__ pSrcA;
322  const int32_t * __restrict__ pSrcB;
323  uint32_t M;
324  uint32_t N;
325  uint32_t O;
326  uint32_t nPE;
327  int32_t * __restrict__ pDstC;
329 
330 
331 
343  const int32_t * __restrict__ pSrcA,
344  const int32_t * __restrict__ pSrcB,
345  uint32_t blockSize,
346  uint32_t nPE,
347  int32_t * __restrict__ pRes);
348 
349 
350 
363  const int32_t * __restrict__ pSrcA,
364  const int32_t * __restrict__ pSrcB,
365  uint32_t blockSize,
366  uint32_t deciPoint,
367  uint32_t nPE,
368  int32_t * __restrict__ pRes);
369 
370 
382  const float32_t * __restrict__ pSrcA,
383  const float32_t * __restrict__ pSrcB,
384  uint32_t blockSize,
385  uint32_t nPE,
386  float32_t * __restrict__ pRes);
387 
388 
395 void plp_dot_prod_i32p_xpulpv2(void * S);
396 
397 
398 
405 void plp_dot_prod_q32p_xpulpv2(void * S);
406 
407 
408 
415 void plp_dot_prod_f32p_xpulpv2(void * S);
416 
417 
418 
426 void plp_dot_prod_i32(
427  const int32_t * __restrict__ pSrcA,
428  const int32_t * __restrict__ pSrcB,
429  uint32_t blockSize,
430  int32_t * __restrict__ pRes);
431 
432 
433 
442  const int32_t * __restrict__ pSrcA,
443  const int32_t * __restrict__ pSrcB,
444  uint32_t blockSize,
445  int32_t * __restrict__ pRes);
446 
447 
448 
457  const int32_t * __restrict__ pSrcA,
458  const int32_t * __restrict__ pSrcB,
459  uint32_t blockSize,
460  int32_t * __restrict__ pRes);
461 
462 
473 void plp_dot_prod_q32(
474  const int32_t * __restrict__ pSrcA,
475  const int32_t * __restrict__ pSrcB,
476  uint32_t blockSize,
477  uint32_t deciPoint,
478  int32_t * __restrict__ pRes);
479 
480 
481 
493  const int32_t * __restrict__ pSrcA,
494  const int32_t * __restrict__ pSrcB,
495  uint32_t blockSize,
496  uint32_t deciPoint,
497  int32_t * __restrict__ pRes);
498 
499 
500 
512  const int32_t * __restrict__ pSrcA,
513  const int32_t * __restrict__ pSrcB,
514  uint32_t blockSize,
515  uint32_t deciPoint,
516  int32_t * __restrict__ pRes);
517 
518 
528 void plp_dot_prod_f32(
529  const float32_t * __restrict__ pSrcA,
530  const float32_t * __restrict__ pSrcB,
531  uint32_t blockSize,
532  float32_t * __restrict__ pRes);
533 
534 
545  const float32_t * __restrict__ pSrcA,
546  const float32_t * __restrict__ pSrcB,
547  uint32_t blockSize,
548  float32_t * __restrict__ pRes);
549 
550 
561 void plp_dot_prod_i16(
562  const int16_t * pSrcA,
563  const int16_t * pSrcB,
564  uint32_t blockSize,
565  int32_t * __restrict__ pRes);
566 
567 
568 
582  const int16_t * __restrict__ pSrcA,
583  const int16_t * __restrict__ pSrcB,
584  uint32_t blockSize,
585  int32_t * __restrict__ pRes);
586 
587 
588 
602  const int16_t * __restrict__ pSrcA,
603  const int16_t * __restrict__ pSrcB,
604  uint32_t blockSize,
605  int32_t * __restrict__ pRes);
606 
607 
608 
622 void plp_dot_prod_q16(
623  const int16_t * __restrict__ pSrcA,
624  const int16_t * __restrict__ pSrcB,
625  uint32_t blockSize,
626  uint32_t deciPoint,
627  int32_t * __restrict__ pRes);
628 
629 
630 
645  const int16_t * __restrict__ pSrcA,
646  const int16_t * __restrict__ pSrcB,
647  uint32_t blockSize,
648  uint32_t deciPoint,
649  int32_t * __restrict__ pRes);
650 
651 
652 
667  const int16_t * __restrict__ pSrcA,
668  const int16_t * __restrict__ pSrcB,
669  uint32_t blockSize,
670  uint32_t deciPoint,
671  int32_t * __restrict__ pRes);
672 
673 
674 
687 void plp_dot_prod_i8(
688  const int8_t * __restrict__ pSrcA,
689  const int8_t * __restrict__ pSrcB,
690  uint32_t blockSize,
691  int32_t * __restrict__ pRes);
692 
693 
694 
708  const int8_t * __restrict__ pSrcA,
709  const int8_t * __restrict__ pSrcB,
710  uint32_t blockSize,
711  int32_t * __restrict__ pRes);
712 
713 
714 
728  const int8_t * __restrict__ pSrcA,
729  const int8_t * __restrict__ pSrcB,
730  uint32_t blockSize,
731  int32_t * __restrict__ pRes);
732 
733 
734 
748 void plp_dot_prod_q8(
749  const int8_t * __restrict__ pSrcA,
750  const int8_t * __restrict__ pSrcB,
751  uint32_t blockSize,
752  uint32_t deciPoint,
753  int32_t * __restrict__ pRes);
754 
755 
756 
771  const int8_t * __restrict__ pSrcA,
772  const int8_t * __restrict__ pSrcB,
773  uint32_t blockSize,
774  uint32_t deciPoint,
775  int32_t * __restrict__ pRes);
776 
777 
778 
793  const int8_t * __restrict__ pSrcA,
794  const int8_t * __restrict__ pSrcB,
795  uint32_t blockSize,
796  uint32_t deciPoint,
797  int32_t * __restrict__ pRes);
806 void plp_fill_i32(
807  int32_t value,
808  int32_t * __restrict__ pDst,
809  uint32_t blockSize);
810 
811 
812 
822  int32_t value,
823  int32_t * __restrict__ pDst,
824  uint32_t blockSize);
825 
826 
827 
837  int32_t value,
838  int32_t * __restrict__ pDst,
839  uint32_t blockSize);
840 
841 
842 
851 void plp_copy_i32(
852  int32_t * __restrict__ pSrc,
853  int32_t * __restrict__ pDst,
854  uint32_t blockSize);
855 
856 
857 
867  int32_t * __restrict__ pSrc,
868  int32_t * __restrict__ pDst,
869  uint32_t blockSize);
870 
871 
872 
882  int32_t * __restrict__ pSrc,
883  int32_t * __restrict__ pDst,
884  uint32_t blockSize);
885 
886 
895 void plp_copy_f32(
896  float32_t * __restrict__ pSrc,
897  float32_t * __restrict__ pDst,
898  uint32_t blockSize);
899 
900 
910  float32_t * __restrict__ pSrc,
911  float32_t * __restrict__ pDst,
912  uint32_t blockSize);
913 
914 
915 
924 void plp_mean_i32(
925  const int32_t * __restrict__ pSrc,
926  uint32_t blockSize,
927  int32_t * __restrict__ pRes);
928 
929 
930 
940  const int32_t * __restrict__ pSrc,
941  uint32_t blockSize,
942  int32_t * __restrict__ pRes);
943 
944 
945 
955  const int32_t * __restrict__ pSrc,
956  uint32_t blockSize,
957  int32_t * __restrict__ pRes);
958 
969 void plp_conv_i32(
970  const int32_t * pSrcA,
971  const uint32_t srcALen,
972  const int32_t * pSrcB,
973  const uint32_t srcBLen,
974  int32_t * pRes);
975 
986 void plp_conv_valid_i32(
987  const int32_t * pSrcA,
988  const uint32_t srcALen,
989  const int32_t * pSrcB,
990  const uint32_t srcBLen,
991  int32_t * pRes);
992 
1003 void plp_conv_i32s_rv32im(const int32_t * pSrcA,
1004  const uint32_t srcALen,
1005  const int32_t * pSrcB,
1006  const uint32_t srcBLen,
1007  int32_t * pRes);
1008 
1018 void plp_conv_i32s_xpulpv2(const int32_t * __restrict__ pSrcA,
1019  const uint32_t srcALen,
1020  const int32_t * __restrict__ pSrcB,
1021  const uint32_t srcBLen,
1022  int32_t * __restrict__ pRes);
1023 
1033 void plp_conv_valid_i32s_xpulpv2(const int32_t * __restrict__ pSrcA,
1034  const uint32_t srcALen,
1035  const int32_t * __restrict__ pSrcB,
1036  const uint32_t srcBLen,
1037  int32_t * __restrict__ pRes);
1038 
1039 
1050 void plp_conv_i16(const int16_t * pSrcA,
1051  const uint32_t srcALen,
1052  const int16_t * pSrcB,
1053  const uint32_t srcBLen,
1054  int32_t * pRes);
1055 
1056 
1067 void plp_conv_valid_i16(const int16_t * pSrcA,
1068  const uint32_t srcALen,
1069  const int16_t * pSrcB,
1070  const uint32_t srcBLen,
1071  int32_t * pRes);
1072 
1073 
1084 void plp_conv_valid_rep_i16(const int16_t * pSrcA,
1085  const uint32_t srcALen,
1086  const int16_t * pSrcB,
1087  const uint32_t srcBLen,
1088  int32_t * pRes);
1089 
1100 void plp_conv_i16s_xpulpv2(const int16_t * pSrcA,
1101  const uint32_t srcALen,
1102  const int16_t * pSrcB,
1103  const uint32_t srcBLen,
1104  int32_t * pRes);
1105 
1116 void plp_conv_valid_i16s_xpulpv2(const int16_t * pSrcA,
1117  const uint32_t srcALen,
1118  const int16_t * pSrcB,
1119  const uint32_t srcBLen,
1120  int32_t * pRes);
1121 
1122 
1134 void plp_conv_valid_rep_i16s_xpulpv2(const int16_t * pSrcA,
1135  const uint32_t srcALen,
1136  const uint32_t srcAMem,
1137  const int16_t* pSrcB,
1138  const uint32_t srcBLen,
1139  int32_t * pRes);
1140 
1151 void plp_conv_i16s_rv32im(const int16_t * pSrcA,
1152  const uint32_t srcALen,
1153  const int16_t * pSrcB,
1154  const uint32_t srcBLen,
1155  int32_t * pRes);
1156 
1167 void plp_conv_i8(const int8_t * pSrcA,
1168  const uint32_t srcALen,
1169  const int8_t * pSrcB,
1170  const uint32_t srcBLen,
1171  int32_t * pRes);
1172 
1173 
1184 void plp_conv_valid_i8(const int8_t * pSrcA,
1185  const uint32_t srcALen,
1186  const int8_t * pSrcB,
1187  const uint32_t srcBLen,
1188  int32_t * pRes);
1189 
1190 
1201 void plp_conv_valid_rep_i8(const int8_t * pSrcA,
1202  const uint32_t srcALen,
1203  const int8_t * pSrcB,
1204  const uint32_t srcBLen,
1205  int32_t * pRes);
1206 
1207 
1218 void plp_conv_i8s_xpulpv2(const int8_t * pSrcA,
1219  const uint32_t srcALen,
1220  const int8_t * pSrcB,
1221  const uint32_t srcBLen,
1222  int32_t * pRes);
1223 
1224 
1235 void plp_conv_valid_i8s_xpulpv2(const int8_t * pSrcA,
1236  const uint32_t srcALen,
1237  const int8_t * pSrcB,
1238  const uint32_t srcBLen,
1239  int32_t * pRes);
1240 
1241 
1253 void plp_conv_valid_rep_i8s_xpulpv2(const int8_t * pSrcA,
1254  const uint32_t srcALen,
1255  const uint32_t srcAMem,
1256  const int8_t * pSrcB,
1257  const uint32_t srcBLen,
1258  int32_t * pRes);
1259 
1260 
1271 void plp_conv_i8s_rv32im(const int8_t * pSrcA,
1272  const uint32_t srcALen,
1273  const int8_t * pSrcB,
1274  const uint32_t srcBLen,
1275  int32_t * pRes);
1276 
1289  const int32_t * pSrcA,
1290  const uint32_t srcALen,
1291  const int32_t * pSrcB,
1292  const uint32_t srcBLen,
1293  const uint8_t nPE,
1294  int32_t * pRes);
1295 
1302 void plp_conv_i32p_xpulpv2(void* task_args);
1303 
1316  const int16_t * pSrcA,
1317  const uint32_t srcALen,
1318  const int16_t * pSrcB,
1319  const uint32_t srcBLen,
1320  const uint8_t nPE,
1321  int32_t * pRes);
1328 void plp_conv_i16p_xpulpv2(void* task_args);
1329 
1342  const int8_t * pSrcA,
1343  const uint32_t srcALen,
1344  const int8_t * pSrcB,
1345  const uint32_t srcBLen,
1346  const uint8_t nPE,
1347  int32_t * pRes);
1354 void plp_conv_i8p_xpulpv2(void* task_args);
1355 
1365 void plp_conv_parallel_OLA(uint32_t nPE, uint32_t srcALen, uint32_t srcBLen, int32_t* resultsBuffer);
1366 
1372 void plp_conv_parallel_OLA_kernel(void* task_args);
1373 
1374 
1386 void plp_mat_mult_i32(
1387  const int32_t * __restrict__ pSrcA,
1388  const int32_t * __restrict__ pSrcB,
1389  uint32_t M,
1390  uint32_t N,
1391  uint32_t O,
1392  int32_t * __restrict__ pDstC);
1393 
1394 
1395 
1396 
1409  const int32_t * __restrict__ pSrcA,
1410  const int32_t * __restrict__ pSrcB,
1411  uint32_t M,
1412  uint32_t N,
1413  uint32_t O,
1414  int32_t * __restrict__ pDstC);
1415 
1416 
1417 
1430  const int32_t * __restrict__ pSrcA,
1431  const int32_t * __restrict__ pSrcB,
1432  uint32_t M,
1433  uint32_t N,
1434  uint32_t O,
1435  int32_t * __restrict__ pDstC);
1436 
1437 
1438 
1450 void plp_mat_mult_i16(
1451  const int16_t * __restrict__ pSrcA,
1452  const int16_t * __restrict__ pSrcB,
1453  uint32_t M,
1454  uint32_t N,
1455  uint32_t O,
1456  int32_t * __restrict__ pDstC);
1457 
1458 
1459 
1472  const int16_t * __restrict__ pSrcA,
1473  const int16_t * __restrict__ pSrcB,
1474  uint32_t M,
1475  uint32_t N,
1476  uint32_t O,
1477  int32_t * __restrict__ pDstC);
1478 
1479 
1480 
1496  const int16_t * __restrict__ pSrcA,
1497  const int16_t * __restrict__ pSrcB,
1498  uint32_t M,
1499  uint32_t N,
1500  uint32_t O,
1501  int32_t * __restrict__ pDstC);
1502 
1503 
1504 
1516 void plp_mat_mult_i8(
1517  const int8_t * __restrict__ pSrcA,
1518  const int8_t * __restrict__ pSrcB,
1519  uint32_t M,
1520  uint32_t N,
1521  uint32_t O,
1522  int32_t * __restrict__ pDstC);
1523 
1524 
1525 
1538  const int8_t * __restrict__ pSrcA,
1539  const int8_t * __restrict__ pSrcB,
1540  uint32_t M,
1541  uint32_t N,
1542  uint32_t O,
1543  int32_t * __restrict__ pDstC);
1544 
1545 
1546 
1562  const int8_t * __restrict__ pSrcA,
1563  const int8_t * __restrict__ pSrcB,
1564  uint32_t M,
1565  uint32_t N,
1566  uint32_t O,
1567  int32_t * __restrict__ pDstC);
1568 
1569 
1570 
1584  const int32_t * __restrict__ pSrcA,
1585  const int32_t * __restrict__ pSrcB,
1586  uint32_t M,
1587  uint32_t N,
1588  uint32_t O,
1589  uint32_t nPE,
1590  int32_t * __restrict__ pDstC);
1591 
1592 
1593 
1601  void* args);
1602 
1603 
1617  const int16_t * __restrict__ pSrcA,
1618  const int16_t * __restrict__ pSrcB,
1619  uint32_t M,
1620  uint32_t N,
1621  uint32_t O,
1622  uint32_t nPE,
1623  int32_t * __restrict__ pDstC);
1624 
1625 
1626 
1637  void* args);
1638 
1639 
1640 
1654  const int8_t * __restrict__ pSrcA,
1655  const int8_t * __restrict__ pSrcB,
1656  uint32_t M,
1657  uint32_t N,
1658  uint32_t O,
1659  uint32_t nPE,
1660  int32_t * __restrict__ pDstC);
1661 
1662 
1663 
1674  void* args);
1675 
1688  const int32_t * __restrict__ pSrcA,
1689  const int32_t * __restrict__ pSrcB,
1690  uint32_t M,
1691  uint32_t N,
1692  uint32_t O,
1693  int32_t * __restrict__ pDstC);
1694 
1695 
1696 
1709  const int32_t * __restrict__ pSrcA,
1710  const int32_t * __restrict__ pSrcB,
1711  uint32_t M,
1712  uint32_t N,
1713  uint32_t O,
1714  int32_t * __restrict__ pDstC);
1715 
1716 
1717 
1730  const int32_t * __restrict__ pSrcA,
1731  const int32_t * __restrict__ pSrcB,
1732  uint32_t M,
1733  uint32_t N,
1734  uint32_t O,
1735  int32_t * __restrict__ pDstC);
1736 
1737 
1738 
1751  const int16_t * __restrict__ pSrcA,
1752  const int16_t * __restrict__ pSrcB,
1753  uint32_t M,
1754  uint32_t N,
1755  uint32_t O,
1756  int32_t * __restrict__ pDstC);
1757 
1758 
1759 
1772  const int16_t * __restrict__ pSrcA,
1773  const int16_t * __restrict__ pSrcB,
1774  uint32_t M,
1775  uint32_t N,
1776  uint32_t O,
1777  int32_t * __restrict__ pDstC);
1778 
1779 
1780 
1796  const int16_t * __restrict__ pSrcA,
1797  const int16_t * __restrict__ pSrcB,
1798  uint32_t M,
1799  uint32_t N,
1800  uint32_t O,
1801  int32_t * __restrict__ pDstC);
1802 
1803 
1804 
1817  const int8_t * __restrict__ pSrcA,
1818  const int8_t * __restrict__ pSrcB,
1819  uint32_t M,
1820  uint32_t N,
1821  uint32_t O,
1822  int32_t * __restrict__ pDstC);
1823 
1824 
1825 
1838  const int8_t * __restrict__ pSrcA,
1839  const int8_t * __restrict__ pSrcB,
1840  uint32_t M,
1841  uint32_t N,
1842  uint32_t O,
1843  int32_t * __restrict__ pDstC);
1844 
1845 
1846 
1862  const int8_t * __restrict__ pSrcA,
1863  const int8_t * __restrict__ pSrcB,
1864  uint32_t M,
1865  uint32_t N,
1866  uint32_t O,
1867  int32_t * __restrict__ pDstC);
1868 
1869 
1870 
1884  const int32_t * __restrict__ pSrcA,
1885  const int32_t * __restrict__ pSrcB,
1886  uint32_t M,
1887  uint32_t N,
1888  uint32_t O,
1889  uint32_t nPE,
1890  int32_t * __restrict__ pDstC);
1891 
1892 
1893 
1901  void* args);
1902 
1903 
1917  const int16_t * __restrict__ pSrcA,
1918  const int16_t * __restrict__ pSrcB,
1919  uint32_t M,
1920  uint32_t N,
1921  uint32_t O,
1922  uint32_t nPE,
1923  int32_t * __restrict__ pDstC);
1924 
1925 
1926 
1937  void* args);
1938 
1939 
1940 
1954  const int8_t * __restrict__ pSrcA,
1955  const int8_t * __restrict__ pSrcB,
1956  uint32_t M,
1957  uint32_t N,
1958  uint32_t O,
1959  uint32_t nPE,
1960  int32_t * __restrict__ pDstC);
1961 
1962 
1963 
1974  void* args);
1975 
1983 void plp_rfft_f32(
1984  const plp_rfft_instance_f32 *S,
1985  const float32_t * __restrict__ pSrc,
1986  float32_t * __restrict__ pDst);
1987 
1988 
1998  const plp_rfft_instance_f32 *S,
1999  const float32_t * __restrict__ pSrc,
2000  const uint32_t nPE,
2001  float32_t * __restrict__ pDst);
2002 
2011  const plp_rfft_instance_f32 *S,
2012  const float32_t * __restrict__ pSrc,
2013  float32_t * __restrict__ pDst);
2014 
2021 
2022 
2023 #endif // __PLP_MATH_H__
void plp_mat_mult_i32(const int32_t *__restrict__ pSrcA, const int32_t *__restrict__ pSrcB, uint32_t M, uint32_t N, uint32_t O, int32_t *__restrict__ pDstC)
Glue code for matrix matrix multiplication of a 32-bit integer matrices.
Definition: plp_mat_mult_i32.c:82
Instance structure for basic integer convolution.
Definition: plp_math.h:186
void plp_dot_prod_q16v_xpulpv2(const int16_t *__restrict__ pSrcA, const int16_t *__restrict__ pSrcB, uint32_t blockSize, uint32_t deciPoint, int32_t *__restrict__ pRes)
Vectorized dot product of 16-bit fixed point vectors kernel for XPULPV2 extension.
Definition: plp_dot_prod_q16v_xpulpv2.c:57
void plp_copy_i32s_xpulpv2(int32_t *__restrict__ pSrc, int32_t *__restrict__ pDst, uint32_t blockSize)
Copies the elements of a 32-bit integer vector for XPULPV2 extension.
Definition: plp_copy_i32s_xpulpv2.c:50
void plp_conv_i8(const int8_t *pSrcA, const uint32_t srcALen, const int8_t *pSrcB, const uint32_t srcBLen, int32_t *pRes)
Glue code for convolution of 8-bit integer vectors.
Definition: plp_conv_i8.c:55
void plp_conv_valid_i16(const int16_t *pSrcA, const uint32_t srcALen, const int16_t *pSrcB, const uint32_t srcBLen, int32_t *pRes)
Glue code for convolution (valid) of 16-bit integer vectors.
Definition: plp_conv_valid_i16.c:53
void plp_mat_mult_trans_i32s_rv32im(const int32_t *__restrict__ pSrcA, const int32_t *__restrict__ pSrcB, uint32_t M, uint32_t N, uint32_t O, int32_t *__restrict__ pDstC)
Matrix transposed matrix multiplication of a 32-bit integer matrices for RV32IM extension.
Definition: plp_mat_mult_trans_i32s_rv32im.c:112
void plp_rfft_f32_xpulpv2_parallel(plp_rfft_parallel_arg_f32 *arg)
Floating-point FFT on real input data for XPULPV2 extension (parallel version).
Definition: plp_rfft_f32_xpulpv2.c:179
void plp_conv_i8s_rv32im(const int8_t *pSrcA, const uint32_t srcALen, const int8_t *pSrcB, const uint32_t srcBLen, int32_t *pRes)
Convolution of 8-bit integer vectors kernel for RV32IM extension.
Definition: plp_conv_i8s_rv32im.c:56
void plp_dot_prod_q8s_rv32im(const int8_t *__restrict__ pSrcA, const int8_t *__restrict__ pSrcB, uint32_t blockSize, uint32_t deciPoint, int32_t *__restrict__ pRes)
Scalar dot product of 8-bit fixed point vectors kernel for RV32IM extension.
Definition: plp_dot_prod_q8s_rv32im.c:56
void plp_mat_mult_i16v_xpulpv2(const int16_t *__restrict__ pSrcA, const int16_t *__restrict__ pSrcB, uint32_t M, uint32_t N, uint32_t O, int32_t *__restrict__ pDstC)
Matrix matrix multiplication of a 16-bit integer matrices for XPULPV2 extension.
Definition: plp_mat_mult_i16v_xpulpv2.c:84
void plp_dot_prod_q32p_xpulpv2(void *S)
Parallel dot product with interleaved access of 32-bit fixed point vectors kernel for XPULPV2 extensi...
Definition: plp_dot_prod_q32p_xpulpv2.c:50
void plp_dot_prod_i16v_xpulpv2(const int16_t *__restrict__ pSrcA, const int16_t *__restrict__ pSrcB, uint32_t blockSize, int32_t *__restrict__ pRes)
Vectorized dot product of 16-bit integer vectors kernel for XPULPV2 extension.
Definition: plp_dot_prod_i16v_xpulpv2.c:56
void plp_mat_mult_trans_i16_parallel(const int16_t *__restrict__ pSrcA, const int16_t *__restrict__ pSrcB, uint32_t M, uint32_t N, uint32_t O, uint32_t nPE, int32_t *__restrict__ pDstC)
Glue code for parallel matrix transposed matrix multiplication of a 16-bit integer matrices...
Definition: plp_mat_mult_trans_i16_parallel.c:55
void plp_dot_prod_f32(const float32_t *__restrict__ pSrcA, const float32_t *__restrict__ pSrcB, uint32_t blockSize, float32_t *__restrict__ pRes)
Glue code for dot product of 32-bit float vectors.
Definition: plp_dot_prod_f32.c:52
void plp_conv_valid_i16s_xpulpv2(const int16_t *pSrcA, const uint32_t srcALen, const int16_t *pSrcB, const uint32_t srcBLen, int32_t *pRes)
Convolution (valid) of 16-bit integer vectors kernel for XPULPV2 extension.
Definition: plp_conv_valid_i16s_xpulpv2.c:60
void plp_mat_mult_trans_i16v_xpulpv2(const int16_t *__restrict__ pSrcA, const int16_t *__restrict__ pSrcB, uint32_t M, uint32_t N, uint32_t O, int32_t *__restrict__ pDstC)
Matrix transposed matrix multiplication of a 16-bit integer matrices for XPULPV2 extension.
Definition: plp_mat_mult_trans_i16v_xpulpv2.c:112
void plp_conv_i8_parallel(const int8_t *pSrcA, const uint32_t srcALen, const int8_t *pSrcB, const uint32_t srcBLen, const uint8_t nPE, int32_t *pRes)
Glue code for parallel convolution of 8-bit integer vectors.
Definition: plp_conv_i8_parallel.c:56
void plp_mat_mult_trans_i8vp_xpulpv2(void *args)
Parallel matrix transposed matrix multiplication of a 8-bit integer matrices for XPULPV2 extension...
Definition: plp_mat_mult_trans_i8vp_xpulpv2.c:120
Instance structure for integer parallel matrix multiplication.
Definition: plp_math.h:303
void plp_mat_mult_trans_i8(const int8_t *__restrict__ pSrcA, const int8_t *__restrict__ pSrcB, uint32_t M, uint32_t N, uint32_t O, int32_t *__restrict__ pDstC)
Glue code for matrix transposed matrix multiplication of a 8-bit integer matrices.
Definition: plp_mat_mult_trans_i8.c:54
void plp_rfft_f32_xpulpv2(const plp_rfft_instance_f32 *S, const float32_t *__restrict__ pSrc, float32_t *__restrict__ pDst)
Floating-point FFT on real input data for XPULPV2 extension.
Definition: plp_rfft_f32_xpulpv2.c:65
void plp_dot_prod_i8v_xpulpv2(const int8_t *__restrict__ pSrcA, const int8_t *__restrict__ pSrcB, uint32_t blockSize, int32_t *__restrict__ pRes)
Vectorized dot product of 8-bit integer vectors kernel for XPULPV2 extension.
Definition: plp_dot_prod_i8v_xpulpv2.c:56
void plp_copy_i32s_rv32im(int32_t *__restrict__ pSrc, int32_t *__restrict__ pDst, uint32_t blockSize)
Copies the elements of a 32-bit integer vector for RV32IM extension.
Definition: plp_copy_i32s_rv32im.c:74
void plp_conv_valid_i8s_xpulpv2(const int8_t *pSrcA, const uint32_t srcALen, const int8_t *pSrcB, const uint32_t srcBLen, int32_t *pRes)
Convolution (valid) of 8-bit integer vectors kernel for XPULPV2 extension.
Definition: plp_conv_valid_i8s_xpulpv2.c:61
void plp_conv_valid_i32s_xpulpv2(const int32_t *__restrict__ pSrcA, const uint32_t srcALen, const int32_t *__restrict__ pSrcB, const uint32_t srcBLen, int32_t *__restrict__ pRes)
Convolution (valid) of 32-bit integer vectors kernel for XPULPV2 extension.
void plp_dot_prod_q32(const int32_t *__restrict__ pSrcA, const int32_t *__restrict__ pSrcB, uint32_t blockSize, uint32_t deciPoint, int32_t *__restrict__ pRes)
Glue code for dot product of 32-bit fixed point vectors.
Definition: plp_dot_prod_q32.c:53
void plp_mat_mult_trans_i32s_xpulpv2(const int32_t *__restrict__ pSrcA, const int32_t *__restrict__ pSrcB, uint32_t M, uint32_t N, uint32_t O, int32_t *__restrict__ pDstC)
Matrix transposed matrix multiplication of a 32-bit integer matrices for XPULPV2 extension.
Definition: plp_mat_mult_trans_i32s_xpulpv2.c:112
void plp_mat_mult_trans_i16vp_xpulpv2(void *args)
Parallel matrix transposed matrix multiplication of a 16-bit integer matrices for XPULPV2 extension...
Definition: plp_mat_mult_trans_i16vp_xpulpv2.c:120
void plp_conv_parallel_OLA(uint32_t nPE, uint32_t srcALen, uint32_t srcBLen, int32_t *resultsBuffer)
Helper function for parallelized overlap-adding of partial convolution results.
Definition: plp_conv_parallel_OLA.c:52
Definition: plp_math.h:272
void plp_mat_mult_trans_i16(const int16_t *__restrict__ pSrcA, const int16_t *__restrict__ pSrcB, uint32_t M, uint32_t N, uint32_t O, int32_t *__restrict__ pDstC)
Glue code for matrix transposed matrix multiplication of a 16-bit integer matrices.
Definition: plp_mat_mult_trans_i16.c:54
void plp_dot_prod_q32s_xpulpv2(const int32_t *__restrict__ pSrcA, const int32_t *__restrict__ pSrcB, uint32_t blockSize, uint32_t deciPoint, int32_t *__restrict__ pRes)
Scalar dot product of 32-bit fixed point vectors kernel for XPULPV2 extension.
Definition: plp_dot_prod_q32s_xpulpv2.c:54
Instance structure for basic integer convolution.
Definition: plp_math.h:204
void plp_conv_i16s_rv32im(const int16_t *pSrcA, const uint32_t srcALen, const int16_t *pSrcB, const uint32_t srcBLen, int32_t *pRes)
Convolution of 16-bit integer vectors kernel for RV32IM extension.
Definition: plp_conv_i16s_rv32im.c:56
void plp_conv_i8s_xpulpv2(const int8_t *pSrcA, const uint32_t srcALen, const int8_t *pSrcB, const uint32_t srcBLen, int32_t *pRes)
Convolution of 8-bit integer vectors kernel for XPULPV2 extension.
Definition: plp_conv_i8s_xpulpv2.c:60
Instance structure for float parallel dot product.
Definition: plp_math.h:168
Instance structure for integer parallel dot product.
Definition: plp_math.h:132
Instance structure for basic integer convolution.
Definition: plp_math.h:222
void plp_dot_prod_q32s_rv32im(const int32_t *__restrict__ pSrcA, const int32_t *__restrict__ pSrcB, uint32_t blockSize, uint32_t deciPoint, int32_t *__restrict__ pRes)
Scalar dot product of 32-bit fixed point vectors kernel for RV32IM extension.
Definition: plp_dot_prod_q32s_rv32im.c:53
void plp_dot_prod_i32(const int32_t *__restrict__ pSrcA, const int32_t *__restrict__ pSrcB, uint32_t blockSize, int32_t *__restrict__ pRes)
Glue code for dot product of 32-bit integer vectors.
Definition: plp_dot_prod_i32.c:80
void plp_rfft_f32_parallel(const plp_rfft_instance_f32 *S, const float32_t *__restrict__ pSrc, const uint32_t nPE, float32_t *__restrict__ pDst)
Floating-point FFT on real input data (parallel version).
Definition: plp_rfft_f32_parallel.c:58
void plp_dot_prod_f32_parallel(const float32_t *__restrict__ pSrcA, const float32_t *__restrict__ pSrcB, uint32_t blockSize, uint32_t nPE, float32_t *__restrict__ pRes)
Glue code for parallel dot product of 32-bit float vectors.
Definition: plp_dot_prod_f32_parallel.c:54
void plp_conv_i16(const int16_t *pSrcA, const uint32_t srcALen, const int16_t *pSrcB, const uint32_t srcBLen, int32_t *pRes)
Glue code for convolution of 16-bit integer vectors.
Definition: plp_conv_i16.c:55
void plp_conv_i16_parallel(const int16_t *pSrcA, const uint32_t srcALen, const int16_t *pSrcB, const uint32_t srcBLen, const uint8_t nPE, int32_t *pRes)
Glue code for parallel convolution of 16-bit integer vectors.
Definition: plp_conv_i16_parallel.c:56
void plp_mat_mult_i8(const int8_t *__restrict__ pSrcA, const int8_t *__restrict__ pSrcB, uint32_t M, uint32_t N, uint32_t O, int32_t *__restrict__ pDstC)
Glue code for matrix matrix multiplication of a 8-bit integer matrices.
Definition: plp_mat_mult_i8.c:55
void plp_dot_prod_f32p_xpulpv2(void *S)
Parallel dot product with interleaved access of 32-bit float vectors kernel for XPULPV2 extension...
Definition: plp_dot_prod_f32p_xpulpv2.c:50
void plp_mat_mult_trans_i8_parallel(const int8_t *__restrict__ pSrcA, const int8_t *__restrict__ pSrcB, uint32_t M, uint32_t N, uint32_t O, uint32_t nPE, int32_t *__restrict__ pDstC)
Glue code for parallel matrix transposed matrix multiplication of a 8-bit integer matrices...
Definition: plp_mat_mult_trans_i8_parallel.c:55
void plp_mat_mult_i8vp_xpulpv2(void *args)
Parallel matrix multiplication of 8-bit integer matrices kernel for XPULPV2 extension.
Definition: plp_mat_mult_i8vp_xpulpv2.c:92
void plp_mat_mult_i32p_xpulpv2(void *args)
Parallel matrix matrix multiplication of a 32-bit integer matrices for XPULPV2 extension.
Definition: plp_mat_mult_i32p_xpulpv2.c:88
Instance structure for fixed point parallel dot product.
Definition: plp_math.h:149
void plp_mat_mult_trans_i32(const int32_t *__restrict__ pSrcA, const int32_t *__restrict__ pSrcB, uint32_t M, uint32_t N, uint32_t O, int32_t *__restrict__ pDstC)
Glue code for matrix transposed matrix multiplication of a 32-bit integer matrices.
Definition: plp_mat_mult_trans_i32.c:82
void plp_mat_mult_trans_i8v_xpulpv2(const int8_t *__restrict__ pSrcA, const int8_t *__restrict__ pSrcB, uint32_t M, uint32_t N, uint32_t O, int32_t *__restrict__ pDstC)
Matrix transposed matrix multiplication of a 8-bit integer matrices for XPULPV2 extension.
Definition: plp_mat_mult_trans_i8v_xpulpv2.c:112
void plp_dot_prod_i32s_xpulpv2(const int32_t *__restrict__ pSrcA, const int32_t *__restrict__ pSrcB, uint32_t blockSize, int32_t *__restrict__ pRes)
Scalar dot product of 32-bit integer vectors kernel for XPULPV2 extension.
Definition: plp_dot_prod_i32s_xpulpv2.c:53
void plp_dot_prod_i32_parallel(const int32_t *__restrict__ pSrcA, const int32_t *__restrict__ pSrcB, uint32_t blockSize, uint32_t nPE, int32_t *__restrict__ pRes)
Glue code for parallel dot product of 32-bit integer vectors.
Definition: plp_dot_prod_i32_parallel.c:54
void plp_dot_prod_q16s_rv32im(const int16_t *__restrict__ pSrcA, const int16_t *__restrict__ pSrcB, uint32_t blockSize, uint32_t deciPoint, int32_t *__restrict__ pRes)
Scalar dot product of 16-bit fixed point vectors kernel for RV32IM extension.
Definition: plp_dot_prod_q16s_rv32im.c:56
void plp_conv_i32p_xpulpv2(void *task_args)
Setup code for parallel convolution of 32-bit integer vectors.
Definition: plp_conv_i32p_xpulpv2.c:52
void plp_mean_i32s_xpulpv2(const int32_t *__restrict__ pSrc, uint32_t blockSize, int32_t *__restrict__ pRes)
Mean value of a 32-bit integer vector for XPULPV2 extension.
Definition: plp_mean_i32s_xpulpv2.c:52
void plp_conv_i32s_rv32im(const int32_t *pSrcA, const uint32_t srcALen, const int32_t *pSrcB, const uint32_t srcBLen, int32_t *pRes)
Convolution of 32-bit integer vectors kernel for RV32IM extension.
Definition: plp_conv_i32s_rv32im_2.c:56
Instance structure for floating-point FFT.
Definition: plp_math.h:265
void plp_conv_i16p_xpulpv2(void *task_args)
Setup code for parallel convolution of 16-bit integer vectors.
Definition: plp_conv_i16p_xpulpv2.c:53
void plp_conv_i8p_xpulpv2(void *task_args)
Setup code for parallel convolution of 8-bit integer vectors.
Definition: plp_conv_i8p_xpulpv2.c:52
void plp_conv_i32s_xpulpv2(const int32_t *__restrict__ pSrcA, const uint32_t srcALen, const int32_t *__restrict__ pSrcB, const uint32_t srcBLen, int32_t *__restrict__ pRes)
Convolution of 32-bit integer vectors kernel for XPULPV2 extension.
void plp_mat_mult_i8_parallel(const int8_t *__restrict__ pSrcA, const int8_t *__restrict__ pSrcB, uint32_t M, uint32_t N, uint32_t O, uint32_t nPE, int32_t *__restrict__ pDstC)
Glue code for parallel matrix matrix multiplication of a 8-bit integer matrices.
Definition: plp_mat_mult_i8_parallel.c:56
void plp_mat_mult_trans_i8s_rv32im(const int8_t *__restrict__ pSrcA, const int8_t *__restrict__ pSrcB, uint32_t M, uint32_t N, uint32_t O, int32_t *__restrict__ pDstC)
Matrix transposed matrix multiplication of a 8-bit integer matrices for RV32IM extension.
Definition: plp_mat_mult_trans_i8s_rv32im.c:112
void plp_conv_i32_parallel(const int32_t *pSrcA, const uint32_t srcALen, const int32_t *pSrcB, const uint32_t srcBLen, const uint8_t nPE, int32_t *pRes)
Glue code for parallel convolution of 32-bit integer vectors.
Definition: plp_conv_i32_parallel.c:58
void plp_dot_prod_q16(const int16_t *__restrict__ pSrcA, const int16_t *__restrict__ pSrcB, uint32_t blockSize, uint32_t deciPoint, int32_t *__restrict__ pRes)
Glue code for dot product of 16-bit fixed point vectors.
Definition: plp_dot_prod_q16.c:56
void plp_dot_prod_i8s_rv32im(const int8_t *__restrict__ pSrcA, const int8_t *__restrict__ pSrcB, uint32_t blockSize, int32_t *__restrict__ pRes)
Vectorized dot product of 8-bit integer vectors kernel for RV32IM extension.
Definition: plp_dot_prod_i8s_rv32im.c:56
void plp_dot_prod_q8(const int8_t *__restrict__ pSrcA, const int8_t *__restrict__ pSrcB, uint32_t blockSize, uint32_t deciPoint, int32_t *__restrict__ pRes)
Glue code for dot product of 8-bit fixed point vectors.
Definition: plp_dot_prod_q8.c:56
void plp_fill_i32(int32_t value, int32_t *__restrict__ pDst, uint32_t blockSize)
Glue code for filling a constant value into a 32-bit integer vector.
Definition: plp_fill_i32.c:74
void plp_mat_mult_i8v_xpulpv2(const int8_t *__restrict__ pSrcA, const int8_t *__restrict__ pSrcB, uint32_t M, uint32_t N, uint32_t O, int32_t *__restrict__ pDstC)
Matrix matrix multiplication of a 8-bit integer matrices for XPULPV2 extension.
Definition: plp_mat_mult_i8v_xpulpv2.c:89
void plp_copy_f32s_xpulpv2(float32_t *__restrict__ pSrc, float32_t *__restrict__ pDst, uint32_t blockSize)
Copies the elements of a 32-bit integer vector for XPULPV2 extension.
Definition: plp_copy_f32s_xpulpv2.c:50
void plp_mean_i32(const int32_t *__restrict__ pSrc, uint32_t blockSize, int32_t *__restrict__ pRes)
Glue code for mean value of a 32-bit integer vector.
Definition: plp_mean_i32.c:77
void plp_conv_valid_rep_i16s_xpulpv2(const int16_t *pSrcA, const uint32_t srcALen, const uint32_t srcAMem, const int16_t *pSrcB, const uint32_t srcBLen, int32_t *pRes)
Convolution (valid with data replication) of 16-bit integer vectors kernel for XPULPV2 extension...
Definition: plp_conv_valid_rep_i16s_xpulpv2.c:63
void plp_mat_mult_trans_i32_parallel(const int32_t *__restrict__ pSrcA, const int32_t *__restrict__ pSrcB, uint32_t M, uint32_t N, uint32_t O, uint32_t nPE, int32_t *__restrict__ pDstC)
Glue code for parallel matrix matrix multiplication of a 32-bit integer matrices. ...
Definition: plp_mat_mult_trans_i32_parallel.c:55
void plp_mat_mult_i16_parallel(const int16_t *__restrict__ pSrcA, const int16_t *__restrict__ pSrcB, uint32_t M, uint32_t N, uint32_t O, uint32_t nPE, int32_t *__restrict__ pDstC)
Glue code for parallel matrix matrix multiplication of a 16-bit integer matrices. ...
Definition: plp_mat_mult_i16_parallel.c:56
void plp_conv_valid_i32(const int32_t *pSrcA, const uint32_t srcALen, const int32_t *pSrcB, const uint32_t srcBLen, int32_t *pRes)
Glue code for convolution (valid) of 32-bit integer vectors.
Definition: plp_conv_valid_i32.c:52
Instance structure for basic integer convolution.
Definition: plp_math.h:241
void plp_conv_i16s_xpulpv2(const int16_t *pSrcA, const uint32_t srcALen, const int16_t *pSrcB, const uint32_t srcBLen, int32_t *pRes)
Convolution of 16-bit integer vectors kernel for XPULPV2 extension.
Definition: plp_conv_i16s_xpulpv2.c:59
void plp_rfft_f32(const plp_rfft_instance_f32 *S, const float32_t *__restrict__ pSrc, float32_t *__restrict__ pDst)
Floating-point FFT on real input data.
Definition: plp_rfft_f32.c:57
void plp_mat_mult_i16s_rv32im(const int16_t *__restrict__ pSrcA, const int16_t *__restrict__ pSrcB, uint32_t M, uint32_t N, uint32_t O, int32_t *__restrict__ pDstC)
Matrix matrix multiplication of a 16-bit integer matrices for RV32IM extension.
Definition: plp_mat_mult_i16s_rv32im.c:112
void plp_conv_i32(const int32_t *pSrcA, const uint32_t srcALen, const int32_t *pSrcB, const uint32_t srcBLen, int32_t *pRes)
Glue code for convolution of 32-bit integer vectors.
void plp_fill_i32s_xpulpv2(int32_t value, int32_t *__restrict__ pDst, uint32_t blockSize)
Fills a constant value into a 32-bit integer vector for XPULPV2 extension.
Definition: plp_fill_i32s_xpulpv2.c:50
void plp_dot_prod_i16s_rv32im(const int16_t *__restrict__ pSrcA, const int16_t *__restrict__ pSrcB, uint32_t blockSize, int32_t *__restrict__ pRes)
Vectorized dot product of 16-bit integer vectors kernel for RV32IM extension.
Definition: plp_dot_prod_i16s_rv32im.c:56
void plp_fill_i32s_rv32im(int32_t value, int32_t *__restrict__ pDst, uint32_t blockSize)
Fills a constant value into a 32-bit integer vector for RV32IM extension.
Definition: plp_fill_i32s_rv32im.c:74
void plp_mat_mult_i32s_rv32im(const int32_t *__restrict__ pSrcA, const int32_t *__restrict__ pSrcB, uint32_t M, uint32_t N, uint32_t O, int32_t *__restrict__ pDstC)
Matrix matrix multiplication of a 32-bit integer matrices for RV32IM extension.
Definition: plp_mat_mult_i32s_rv32im.c:112
void plp_dot_prod_f32s_xpulpv2(const float32_t *__restrict__ pSrcA, const float32_t *__restrict__ pSrcB, uint32_t blockSize, float32_t *__restrict__ pRes)
Glue code for dot product of 32-bit float vectors.
Definition: plp_dot_prod_f32s_xpulpv2.c:52
void plp_conv_valid_i8(const int8_t *pSrcA, const uint32_t srcALen, const int8_t *pSrcB, const uint32_t srcBLen, int32_t *pRes)
Glue code for convolution (valid) of 8-bit integer vectors.
Definition: plp_conv_valid_i8.c:52
Instance structure for integer parallel matrix multiplication.
Definition: plp_math.h:287
void plp_dot_prod_q8v_xpulpv2(const int8_t *__restrict__ pSrcA, const int8_t *__restrict__ pSrcB, uint32_t blockSize, uint32_t deciPoint, int32_t *__restrict__ pRes)
Scalar dot product of 8-bit fixed point vectors kernel for XPULPV2 extension.
Definition: plp_dot_prod_q8v_xpulpv2.c:57
void plp_dot_prod_q32_parallel(const int32_t *__restrict__ pSrcA, const int32_t *__restrict__ pSrcB, uint32_t blockSize, uint32_t deciPoint, uint32_t nPE, int32_t *__restrict__ pRes)
Glue code for parallel dot product of 32-bit fixed point vectors.
Definition: plp_dot_prod_q32_parallel.c:55
void plp_conv_parallel_OLA_kernel(void *task_args)
Helper function for parallelized overlap-adding of partial convolution results.
Definition: plp_conv_parallel_OLA_kernel.c:49
void plp_conv_valid_rep_i16(const int16_t *pSrcA, const uint32_t srcALen, const int16_t *pSrcB, const uint32_t srcBLen, int32_t *pRes)
Glue code for convolution (valid with replication) of 16-bit integer vectors.
Definition: plp_conv_valid_rep_i16.c:53
void plp_mat_mult_i32s_xpulpv2(const int32_t *__restrict__ pSrcA, const int32_t *__restrict__ pSrcB, uint32_t M, uint32_t N, uint32_t O, int32_t *__restrict__ pDstC)
Matrix matrix multiplication of a 32-bit integer matrices for XPULPV2 extension.
Definition: plp_mat_mult_i32s_xpulpv2.c:84
void plp_mean_i32s_rv32im(const int32_t *__restrict__ pSrc, uint32_t blockSize, int32_t *__restrict__ pRes)
Mean value of a 32-bit integer vector for RV32IM extension.
Definition: plp_mean_i32s_rv32im.c:76
void plp_mat_mult_trans_i32p_xpulpv2(void *args)
Parallel matrix transposed matrix multiplication of a 32-bit integer matrices for RV32IM extension...
Definition: plp_mat_mult_trans_i32p_xpulpv2.c:116
Definition: plp_math.h:279
void plp_mat_mult_i32_parallel(const int32_t *__restrict__ pSrcA, const int32_t *__restrict__ pSrcB, uint32_t M, uint32_t N, uint32_t O, uint32_t nPE, int32_t *__restrict__ pDstC)
Glue code for parallel matrix matrix multiplication of a 32-bit integer matrices. ...
Definition: plp_mat_mult_i32_parallel.c:56
void plp_dot_prod_i32s_rv32im(const int32_t *__restrict__ pSrcA, const int32_t *__restrict__ pSrcB, uint32_t blockSize, int32_t *__restrict__ pRes)
Scalar dot product of 32-bit integer vectors kernel for RV32IM extension.
Definition: plp_dot_prod_i32s_rv32im.c:78
void plp_copy_f32(float32_t *__restrict__ pSrc, float32_t *__restrict__ pDst, uint32_t blockSize)
Glue code for copying the elements of a 32-bit float vector.
Definition: plp_copy_f32.c:50
Instance structure for integer parallel matrix multiplication.
Definition: plp_math.h:319
void plp_mat_mult_i16vp_xpulpv2(void *args)
Parallel matrix multiplication of 16-bit integer matrices kernel for XPULPV2 extension.
Definition: plp_mat_mult_i16vp_xpulpv2.c:89
void plp_conv_valid_rep_i8s_xpulpv2(const int8_t *pSrcA, const uint32_t srcALen, const uint32_t srcAMem, const int8_t *pSrcB, const uint32_t srcBLen, int32_t *pRes)
Convolution (valid with data replication) of 8-bit integer vectors kernel for XPULPV2 extension...
Definition: plp_conv_valid_rep_i8s_xpulpv2.c:61
void plp_mat_mult_i8s_rv32im(const int8_t *__restrict__ pSrcA, const int8_t *__restrict__ pSrcB, uint32_t M, uint32_t N, uint32_t O, int32_t *__restrict__ pDstC)
Matrix matrix multiplication of a 8-bit integer matrices for RV32IM extension.
Definition: plp_mat_mult_i8s_rv32im.c:112
void plp_dot_prod_i32p_xpulpv2(void *S)
Parallel dot product with interleaved access of 32-bit integer vectors kernel for XPULPV2 extension...
Definition: plp_dot_prod_i32p_xpulpv2.c:50
void plp_copy_i32(int32_t *__restrict__ pSrc, int32_t *__restrict__ pDst, uint32_t blockSize)
Glue code for copying the elements of a 32-bit integer vector.
Definition: plp_copy_i32.c:74
void plp_mat_mult_trans_i16s_rv32im(const int16_t *__restrict__ pSrcA, const int16_t *__restrict__ pSrcB, uint32_t M, uint32_t N, uint32_t O, int32_t *__restrict__ pDstC)
Matrix transposed matrix multiplication of a 16-bit integer matrices for RV32IM extension.
Definition: plp_mat_mult_trans_i16s_rv32im.c:112
void plp_conv_valid_rep_i8(const int8_t *pSrcA, const uint32_t srcALen, const int8_t *pSrcB, const uint32_t srcBLen, int32_t *pRes)
Glue code for convolution (valid with data replication) of 8-bit integer vectors. ...
Definition: plp_conv_valid_rep_i8.c:53
void plp_dot_prod_i16(const int16_t *pSrcA, const int16_t *pSrcB, uint32_t blockSize, int32_t *__restrict__ pRes)
Glue code for dot product of 16-bit integer vectors.
void plp_mat_mult_i16(const int16_t *__restrict__ pSrcA, const int16_t *__restrict__ pSrcB, uint32_t M, uint32_t N, uint32_t O, int32_t *__restrict__ pDstC)
Glue code for matrix matrix multiplication of a 16-bit integer matrices.
Definition: plp_mat_mult_i16.c:54
void plp_dot_prod_i8(const int8_t *__restrict__ pSrcA, const int8_t *__restrict__ pSrcB, uint32_t blockSize, int32_t *__restrict__ pRes)
Glue code for dot product of 8-bit integer vectors.
Definition: plp_dot_prod_i8.c:56