SHL 2.2.x
Loading...
Searching...
No Matches
shl_c906.h
1/*
2 * Copyright (C) 2016-2023 T-Head Semiconductor Co., Ltd. All rights reserved.
3 *
4 * SPDX-License-Identifier: Apache-2.0
5 *
6 * Licensed under the Apache License, Version 2.0 (the License); you may
7 * not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
14 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18
19/* SHL version 2.2.x */
20
21#ifndef INCLUDE_SHL_C906_H_
22#define INCLUDE_SHL_C906_H_
23
24#include "csi_nn.h"
25#include "shl_gref.h"
26#include "shl_ref.h"
27#include "shl_thead_rvv.h"
28
29/************************** f32 func declaration ***************************/
30int shl_c906_abs_f32(struct csinn_tensor *input, struct csinn_tensor *output,
31 struct csinn_siso_params *params);
32
33int shl_c906_add_f32(struct csinn_tensor *input0, struct csinn_tensor *input1,
34 struct csinn_tensor *output, struct csinn_diso_params *params);
35
36int shl_c906_sub_f32(struct csinn_tensor *input0, struct csinn_tensor *input1,
37 struct csinn_tensor *output, struct csinn_diso_params *params);
38
39int shl_c906_mul_f32(struct csinn_tensor *input0, struct csinn_tensor *input1,
40 struct csinn_tensor *output, struct csinn_diso_params *params);
41
42int shl_c906_minimum_f32(struct csinn_tensor *input0, struct csinn_tensor *input1,
43 struct csinn_tensor *output, struct csinn_diso_params *params);
44
45int shl_c906_broadcast_to_f32(struct csinn_tensor *input, struct csinn_tensor *output,
46 struct csinn_broadcast_to_params *params);
47
48int shl_c906_clip_f32(struct csinn_tensor *input, struct csinn_tensor *output,
49 struct csinn_clip_params *params);
50
51int shl_c906_concat_f32(struct csinn_tensor **input, struct csinn_tensor *output,
52 struct csinn_concat_params *params);
53
54int shl_c906_split_f32(struct csinn_tensor *input, struct csinn_tensor **output,
55 struct csinn_split_params *params);
56
57int shl_c906_fullyconnected_init_fp16(struct csinn_tensor *input, struct csinn_tensor *output,
58 struct csinn_tensor *weights, struct csinn_tensor *bias,
59 struct csinn_fc_params *params);
60
61int shl_c906_pad_f32(struct csinn_tensor *input, struct csinn_tensor *output,
62 struct csinn_pad_params *params);
63
64int shl_c906_prelu_f32(struct csinn_tensor *input, struct csinn_tensor *alpha,
65 struct csinn_tensor *output, struct csinn_prelu_params *params);
66
67int shl_c906_relu_f32(struct csinn_tensor *input, struct csinn_tensor *output,
68 struct csinn_relu_params *params);
69
70int shl_c906_relu1_f32(struct csinn_tensor *input, struct csinn_tensor *output,
71 struct csinn_relu_params *params);
72
73int shl_c906_relu6_f32(struct csinn_tensor *input, struct csinn_tensor *output,
74 struct csinn_relu_params *params);
75
76int shl_c906_leaky_relu_f32(struct csinn_tensor *input, struct csinn_tensor *output,
77 struct csinn_relu_params *params);
78
79int shl_c906_conv1d_init_fp32(struct csinn_tensor *input, struct csinn_tensor *output,
80 struct csinn_tensor *kernel, struct csinn_tensor *bias,
81 struct csinn_conv1d_params *params);
82int shl_c906_conv1d_init_fp16(struct csinn_tensor *input, struct csinn_tensor *output,
83 struct csinn_tensor *kernel, struct csinn_tensor *bias,
84 struct csinn_conv1d_params *params);
85
86int shl_c906_depthwise_conv1d_init_fp16(struct csinn_tensor *input, struct csinn_tensor *output,
87 struct csinn_tensor *kernel, struct csinn_tensor *bias,
88 struct csinn_conv1d_params *params);
89
90int shl_c906_conv2d_init_fp32(struct csinn_tensor *input, struct csinn_tensor *output,
91 struct csinn_tensor *kernel, struct csinn_tensor *bias,
92 struct csinn_conv2d_params *params);
93int shl_c906_conv2d_init_fp16(struct csinn_tensor *input, struct csinn_tensor *output,
94 struct csinn_tensor *kernel, struct csinn_tensor *bias,
95 struct csinn_conv2d_params *params);
96
97int shl_c906_depthwise_conv2d_init_fp32(struct csinn_tensor *input, struct csinn_tensor *output,
98 struct csinn_tensor *kernel, struct csinn_tensor *bias,
99 struct csinn_conv2d_params *params);
100int shl_c906_depthwise_conv2d_init_fp16(struct csinn_tensor *input, struct csinn_tensor *output,
101 struct csinn_tensor *kernel, struct csinn_tensor *bias,
102 struct csinn_conv2d_params *params);
103
104int shl_c906_maxpool2d_init_fp32(struct csinn_tensor *input, struct csinn_tensor *output,
105 struct csinn_pool_params *params);
106
107int shl_c906_maxpool2d_init_fp16(struct csinn_tensor *input, struct csinn_tensor *output,
108 struct csinn_pool_params *params);
109
110int shl_c906_global_maxpool2d_f32(struct csinn_tensor *input, struct csinn_tensor *output,
111 struct csinn_pool_params *params);
112
113int shl_c906_avgpool2d_init_fp32(struct csinn_tensor *input, struct csinn_tensor *output,
114 struct csinn_pool_params *params);
115
116int shl_c906_avgpool2d_init_fp16(struct csinn_tensor *input, struct csinn_tensor *output,
117 struct csinn_pool_params *params);
118
119int shl_c906_global_avgpool2d_f32(struct csinn_tensor *input, struct csinn_tensor *output,
120 struct csinn_pool_params *params);
121
122int shl_c906_div_init_fp16(struct csinn_tensor *input0, struct csinn_tensor *input1,
123 struct csinn_tensor *output, struct csinn_diso_params *params);
124
125int shl_c906_div_init_fp32(struct csinn_tensor *input0, struct csinn_tensor *input1,
126 struct csinn_tensor *output, struct csinn_diso_params *params);
127
128int shl_c906_matmul_init_fp16(struct csinn_tensor *mat0, struct csinn_tensor *mat1,
129 struct csinn_tensor *output, struct csinn_matmul_params *params);
130
131/* pack */
132void shl_c906_reorder_kernel(float *a, float *sa, int m, int k, int ldx);
133
134void shl_c906_reorder_input(float *b, float *sb, int k, int n, int ldx);
135
136void shl_c906_reorder_input_1(float *b, float *sb, int k, int n, int ldx);
137
138/* gemm */
139void shl_c906_sgemm_kernel_f32(float *dst, const float *sa, const float *sb, int m, int k, int n,
140 int ldc, float *bias, bool fuse_relu);
141
142/* kernel transform */
143void shl_c906_conv1x1s1_sgemm_transform_kernel(struct csinn_tensor *kernel,
144 struct csinn_conv2d_params *params);
145
146void shl_c906_conv_im2col_sgemm_transform_kernel(struct csinn_tensor *kernel,
147 struct csinn_conv2d_params *params);
148
149void shl_c906_conv3x3s1_winograd23_transform_kernel(struct csinn_tensor *o_kernel,
150 struct csinn_tensor *t_kernel);
151
152void shl_c906_conv3x3s1_winograd43_transform_kernel(struct csinn_tensor *o_kernel,
153 struct csinn_tensor *t_kernel);
154
155void shl_c906_conv3x3s1_winograd64_transform_kernel(struct csinn_tensor *o_kernel,
156 struct csinn_tensor *t_kernel);
157
158void shl_c906_conv3x3s1_winograd64_transform_kernel_1(struct csinn_tensor *o_kernel,
159 struct csinn_tensor *t_kernel);
160
161void shl_c906_conv3x3s1_winograd64_transform_kernel_pack4(struct csinn_tensor *o_kernel,
162 struct csinn_tensor *t_kernel);
163
164void shl_c906_conv3x3s1_winograd43_transform_kernel_pack4(struct csinn_tensor *o_kernel,
165 struct csinn_tensor *t_kernel);
166
167/* convolution optimization */
168int shl_c906_conv1x1s1_sgemm(struct csinn_tensor *input, struct csinn_tensor *output,
169 struct csinn_tensor *kernel, struct csinn_tensor *bias,
170 struct csinn_conv2d_params *params);
171
172int shl_c906_conv1x1s1_sgemm_fuse_relu(struct csinn_tensor *input, struct csinn_tensor *output,
173 struct csinn_tensor *kernel, struct csinn_tensor *bias,
174 struct csinn_conv2d_params *params);
175
176int shl_c906_conv_im2col_sgemm(struct csinn_tensor *input, struct csinn_tensor *output,
177 struct csinn_tensor *kernel, struct csinn_tensor *bias,
178 struct csinn_conv2d_params *params);
179
180int shl_c906_conv_im2col_sgemm_fuse_relu(struct csinn_tensor *input, struct csinn_tensor *output,
181 struct csinn_tensor *kernel, struct csinn_tensor *bias,
182 struct csinn_conv2d_params *params);
183
184int shl_c906_conv3x3s1_winograd23(struct csinn_tensor *input, struct csinn_tensor *output,
185 struct csinn_tensor *kernel, struct csinn_tensor *bias,
186 struct csinn_conv2d_params *params);
187
188int shl_c906_conv3x3s1_winograd43(struct csinn_tensor *input, struct csinn_tensor *output,
189 struct csinn_tensor *kernel, struct csinn_tensor *bias,
190 struct csinn_conv2d_params *params);
191
192int shl_c906_conv3x3s1_winograd64(struct csinn_tensor *input, struct csinn_tensor *output,
193 struct csinn_tensor *kernel, struct csinn_tensor *bias,
194 struct csinn_conv2d_params *params);
195
196int shl_c906_conv3x3s1_winograd64_1(struct csinn_tensor *input, struct csinn_tensor *output,
197 struct csinn_tensor *kernel, struct csinn_tensor *bias,
198 struct csinn_conv2d_params *params);
199
200int shl_c906_conv3x3s1_winograd64_pack4(struct csinn_tensor *input, struct csinn_tensor *output,
201 struct csinn_tensor *kernel, struct csinn_tensor *bias,
202 struct csinn_conv2d_params *params);
203
204int shl_c906_conv3x3s1_winograd43_pack4(struct csinn_tensor *input, struct csinn_tensor *output,
205 struct csinn_tensor *kernel, struct csinn_tensor *bias,
206 struct csinn_conv2d_params *params);
207
208void shl_c906_conv3x3s1(struct csinn_tensor *input, struct csinn_tensor *output,
209 struct csinn_tensor *kernel, struct csinn_tensor *bias,
210 struct csinn_conv2d_params *params);
211
212void shl_c906_conv3x3s2(struct csinn_tensor *input, struct csinn_tensor *output,
213 struct csinn_tensor *kernel, struct csinn_tensor *bias,
214 struct csinn_conv2d_params *params);
215
216/* depthwise convolution optimization */
217int shl_c906_dwconv3x3s1(struct csinn_tensor *input, struct csinn_tensor *output,
218 struct csinn_tensor *kernel, struct csinn_tensor *bias,
219 struct csinn_conv2d_params *params);
220
221int shl_c906_dwconv3x3s2(struct csinn_tensor *input, struct csinn_tensor *output,
222 struct csinn_tensor *kernel, struct csinn_tensor *bias,
223 struct csinn_conv2d_params *params);
224
225int shl_c906_dwconv5x5s1(struct csinn_tensor *input, struct csinn_tensor *output,
226 struct csinn_tensor *kernel, struct csinn_tensor *bias,
227 struct csinn_conv2d_params *params);
228
229int shl_c906_dwconv5x5s2(struct csinn_tensor *input, struct csinn_tensor *output,
230 struct csinn_tensor *kernel, struct csinn_tensor *bias,
231 struct csinn_conv2d_params *params);
232
233int shl_c906_dwconv3x3s1_pack4(struct csinn_tensor *input, struct csinn_tensor *output,
234 struct csinn_tensor *kernel, struct csinn_tensor *bias,
235 struct csinn_conv2d_params *params);
236
237int shl_c906_dwconv3x3s2_pack4(struct csinn_tensor *input, struct csinn_tensor *output,
238 struct csinn_tensor *kernel, struct csinn_tensor *bias,
239 struct csinn_conv2d_params *params);
240
241/* depthwise convolution fuse relu */
242int shl_c906_dwconv3x3s1_fuse_relu(struct csinn_tensor *input, struct csinn_tensor *output,
243 struct csinn_tensor *kernel, struct csinn_tensor *bias,
244 struct csinn_conv2d_params *params);
245
246int shl_c906_dwconv3x3s2_fuse_relu(struct csinn_tensor *input, struct csinn_tensor *output,
247 struct csinn_tensor *kernel, struct csinn_tensor *bias,
248 struct csinn_conv2d_params *params);
249
250int shl_c906_dwconv5x5s1_fuse_relu(struct csinn_tensor *input, struct csinn_tensor *output,
251 struct csinn_tensor *kernel, struct csinn_tensor *bias,
252 struct csinn_conv2d_params *params);
253
254int shl_c906_dwconv5x5s2_fuse_relu(struct csinn_tensor *input, struct csinn_tensor *output,
255 struct csinn_tensor *kernel, struct csinn_tensor *bias,
256 struct csinn_conv2d_params *params);
257
258int shl_c906_dwconv3x3s1_pack4_fuse_relu(struct csinn_tensor *input, struct csinn_tensor *output,
259 struct csinn_tensor *kernel, struct csinn_tensor *bias,
260 struct csinn_conv2d_params *params);
261
262int shl_c906_dwconv3x3s2_pack4_fuse_relu(struct csinn_tensor *input, struct csinn_tensor *output,
263 struct csinn_tensor *kernel, struct csinn_tensor *bias,
264 struct csinn_conv2d_params *params);
265
266int shl_c906_dwconv2d_s1_pad0_fp16(struct csinn_tensor *input, struct csinn_tensor *output,
267 struct csinn_tensor *kernel, struct csinn_tensor *bias,
268 struct csinn_conv2d_params *params);
269
270/************************** fp16 func declaration ***************************/
271int shl_c906_add_fp16(struct csinn_tensor *input0, struct csinn_tensor *input1,
272 struct csinn_tensor *output, struct csinn_diso_params *params);
273
274int shl_c906_sub_fp16(struct csinn_tensor *input0, struct csinn_tensor *input1,
275 struct csinn_tensor *output, struct csinn_diso_params *params);
276
277int shl_c906_mul_fp16(struct csinn_tensor *input0, struct csinn_tensor *input1,
278 struct csinn_tensor *output, struct csinn_diso_params *params);
279
280int shl_c906_minimum_fp16(struct csinn_tensor *input0, struct csinn_tensor *input1,
281 struct csinn_tensor *output, struct csinn_diso_params *params);
282
283int shl_c906_global_avgpool2d_fp16(struct csinn_tensor *input, struct csinn_tensor *output,
284 struct csinn_pool_params *params);
285
286int shl_c906_global_maxpool2d_fp16(struct csinn_tensor *input, struct csinn_tensor *output,
287 struct csinn_pool_params *params);
288
289int shl_c906_pad_fp16(struct csinn_tensor *input, struct csinn_tensor *output,
290 struct csinn_pad_params *params);
291
292int shl_c906_relu_fp16(struct csinn_tensor *input, struct csinn_tensor *output,
293 struct csinn_relu_params *params);
294
295int shl_c906_relu1_fp16(struct csinn_tensor *input, struct csinn_tensor *output,
296 struct csinn_relu_params *params);
297
298int shl_c906_relu6_fp16(struct csinn_tensor *input, struct csinn_tensor *output,
299 struct csinn_relu_params *params);
300
301int shl_c906_prelu_fp16(struct csinn_tensor *input, struct csinn_tensor *alpha,
302 struct csinn_tensor *output, struct csinn_prelu_params *params);
303
304int shl_c906_leaky_relu_fp16(struct csinn_tensor *input, struct csinn_tensor *output,
305 struct csinn_relu_params *params);
306
307int shl_c906_abs_fp16(struct csinn_tensor *input, struct csinn_tensor *output,
308 struct csinn_siso_params *params);
309
310int shl_c906_clip_fp16(struct csinn_tensor *input, struct csinn_tensor *output,
311 struct csinn_clip_params *params);
312
313int shl_c906_concat_fp16(struct csinn_tensor **input, struct csinn_tensor *output,
314 struct csinn_concat_params *params);
315
316int shl_c906_split_fp16(struct csinn_tensor *input, struct csinn_tensor **output,
317 struct csinn_split_params *params);
318
319int shl_c906_fullyconnected_fp16(struct csinn_tensor *input, struct csinn_tensor *output,
320 struct csinn_tensor *weights, struct csinn_tensor *bias,
321 struct csinn_fc_params *params);
322
323int shl_c906_fullyconnected_pack8_fp16(struct csinn_tensor *input, struct csinn_tensor *output,
324 struct csinn_tensor *weights, struct csinn_tensor *bias,
325 struct csinn_fc_params *params);
326
327int shl_c906_fullyconnected_pack8_fp16_1(struct csinn_tensor *input, struct csinn_tensor *output,
328 struct csinn_tensor *weights, struct csinn_tensor *bias,
329 struct csinn_fc_params *params);
330
331int shl_c906_fullyconnected_pack16_fp16(struct csinn_tensor *input, struct csinn_tensor *output,
332 struct csinn_tensor *weights, struct csinn_tensor *bias,
333 struct csinn_fc_params *params);
334
335int shl_c906_fullyconnected_pack16_output16_fp16(struct csinn_tensor *input,
336 struct csinn_tensor *output,
337 struct csinn_tensor *weights,
338 struct csinn_tensor *bias,
339 struct csinn_fc_params *params);
340
341void shl_c906_reorder_weight_n8_fp16(__fp16 *src, __fp16 *dst, int m, int k, int ldx);
342
343void shl_c906_reorder_weight_n16_fp16(__fp16 *src, __fp16 *dst, int m, int k, int ldx);
344
345/* pack fp16 */
346void shl_c906_reorder_kernel_fp16(__fp16 *a, __fp16 *sa, int m, int k, int ldx);
347void shl_c906_reorder_input_fp16(__fp16 *b, __fp16 *sb, int k, int n, int ldx);
348
349void shl_c906_reorder_input_fp16_1(__fp16 *b, __fp16 *sb, int k, int n, int ldx);
350
351void shl_c906_reorder_matrix_z8_fp16(__fp16 *src, __fp16 *dst, int k, int n, int ldx);
352void shl_c906_reorder_matrix_z16_fp16(__fp16 *src, __fp16 *dst, int k, int n, int ldx);
353
354/* gemm fp16 */
355void shl_c906_sgemm_kernel_fp16(__fp16 *dst, const __fp16 *sa, const __fp16 *sb, int m, int k,
356 int n, int ldc, __fp16 *bias);
357void shl_c906_sgemm_kernel_fp16_1(__fp16 *dst, const __fp16 *sa, const __fp16 *sb, int m, int k,
358 int n, int ldc, __fp16 *bias);
359
360/* gemv fp16 */
361void shl_c906_gemv_pack8_fp16(__fp16 *dst, const __fp16 *sa, const __fp16 *sb, int k, int n,
362 int ldc, __fp16 *bias);
363void shl_c906_gemv_pack16_fp16(__fp16 *dst, const __fp16 *sa, const __fp16 *sb, int k, int n,
364 int ldc, __fp16 *bias);
365
366void shl_c906_gemv_trans_pack8_fp16(__fp16 *dst, const __fp16 *sa, const __fp16 *sb, int k, int n,
367 int ldc, __fp16 *bias);
368void shl_c906_gemv_trans_pack16_fp16(__fp16 *dst, const __fp16 *sa, const __fp16 *sb, int k, int n,
369 int ldc, __fp16 *bias);
370
371/* kernel transform fp16 */
372void shl_c906_conv1x1s1_sgemm_transform_kernel_fp16(struct csinn_tensor *kernel,
373 struct csinn_conv2d_params *params);
374void shl_c906_conv1x1s1_sgemm_transform_kernel_fp16_w_int8(struct csinn_tensor *kernel,
375 struct csinn_conv2d_params *params);
376void shl_c906_conv_im2col_sgemm_transform_kernel_fp16(struct csinn_tensor *kernel,
377 struct csinn_conv2d_params *params);
378
379void shl_c906_conv3x3s1_winograd43_transform_kernel_pack8_fp16(struct csinn_tensor *o_kernel,
380 struct csinn_tensor *t_kernel);
381
382void shl_c906_conv3x3s1_winograd64_transform_kernel_pack8_fp16(struct csinn_tensor *o_kernel,
383 struct csinn_tensor *t_kernel);
384
385/* convolution optimization fp16 */
386int shl_c906_conv1x1s1_sgemm_fp16(struct csinn_tensor *input, struct csinn_tensor *output,
387 struct csinn_tensor *kernel, struct csinn_tensor *bias,
388 struct csinn_conv2d_params *params);
389
390int shl_c906_conv1x1s1_batch_gemv_fp16(struct csinn_tensor *input, struct csinn_tensor *output,
391 struct csinn_tensor *kernel, struct csinn_tensor *bias,
392 struct csinn_conv2d_params *params);
393
394int shl_c906_conv_im2col_sgemm_fp16(struct csinn_tensor *input, struct csinn_tensor *output,
395 struct csinn_tensor *kernel, struct csinn_tensor *bias,
396 struct csinn_conv2d_params *params);
397
398int shl_c906_conv3x3s1_winograd43_pack8_fp16(struct csinn_tensor *input,
399 struct csinn_tensor *output,
400 struct csinn_tensor *kernel, struct csinn_tensor *bias,
401 struct csinn_conv2d_params *params);
402
403int shl_c906_conv3x3s1_winograd64_pack8_fp16(struct csinn_tensor *input,
404 struct csinn_tensor *output,
405 struct csinn_tensor *kernel, struct csinn_tensor *bias,
406 struct csinn_conv2d_params *params);
407
408void shl_c906_conv3x3s1_fp16(struct csinn_tensor *input, struct csinn_tensor *output,
409 struct csinn_tensor *kernel, struct csinn_tensor *bias,
410 struct csinn_conv2d_params *params);
411
412void shl_c906_conv3x3s2_fp16(struct csinn_tensor *input, struct csinn_tensor *output,
413 struct csinn_tensor *kernel, struct csinn_tensor *bias,
414 struct csinn_conv2d_params *params);
415
416/* depthwise convolution optimization for fp16*/
417int shl_c906_dwconv3x3s1_fp16(struct csinn_tensor *input, struct csinn_tensor *output,
418 struct csinn_tensor *kernel, struct csinn_tensor *bias,
419 struct csinn_conv2d_params *params);
420
421int shl_c906_dwconv3x3s2_fp16(struct csinn_tensor *input, struct csinn_tensor *output,
422 struct csinn_tensor *kernel, struct csinn_tensor *bias,
423 struct csinn_conv2d_params *params);
424
425int shl_c906_dwconv3x3s1_pack8_fp16(struct csinn_tensor *input, struct csinn_tensor *output,
426 struct csinn_tensor *kernel, struct csinn_tensor *bias,
427 struct csinn_conv2d_params *params);
428
429int shl_c906_dwconv3x3s2_pack8_fp16(struct csinn_tensor *input, struct csinn_tensor *output,
430 struct csinn_tensor *kernel, struct csinn_tensor *bias,
431 struct csinn_conv2d_params *params);
432
433/* utils */
434void shl_c906_memcpy(void *dst, const void *src, size_t n);
435
436void shl_c906_pad_input(const float *input, float *input_padded, int inc, int inh, int inw,
437 int padded_h, int padded_w, int pad_top, int pad_left);
438
439void shl_c906_crop_output(float *output_trans, float *output, int out_c, int out_h, int out_w,
440 int wino_h, int wino_w);
441
442void shl_c906_pad_input_fp16(const __fp16 *input, __fp16 *input_padded, int inc, int inh, int inw,
443 int padded_h, int padded_w, int pad_top, int pad_left);
444
445void shl_c906_crop_output_fp16(__fp16 *output_trans, __fp16 *output, int out_c, int out_h,
446 int out_w, int wino_h, int wino_w);
447
448/*asr related fuctions*/
449int shl_c906_cache_matmul_init(struct csinn_tensor *input, struct csinn_tensor *output,
450 struct csinn_tensor *weight, struct csinn_tensor *bias,
451 struct csinn_cache_matmul_params *params);
452
453int shl_c906_cache_matmul_fp16(struct csinn_tensor *input, struct csinn_tensor *output,
454 struct csinn_tensor *weight, struct csinn_tensor *bias,
455 struct csinn_cache_matmul_params *params);
456
457int shl_c906_matmul_init(struct csinn_tensor *mat0, struct csinn_tensor *mat1,
458 struct csinn_tensor *output, struct csinn_matmul_params *params);
459
460int shl_c906_matmul_fp16(struct csinn_tensor *mat0, struct csinn_tensor *mat1,
461 struct csinn_tensor *output, struct csinn_matmul_params *params);
462
463int shl_c906_reshape_fp16(struct csinn_tensor *input, struct csinn_tensor *output,
464 struct csinn_reshape_params *params);
465
466int shl_c906_cache_conv1d_init(struct csinn_tensor *input, struct csinn_tensor *output,
467 struct csinn_tensor *weight, struct csinn_tensor *bias,
468 struct csinn_cache_conv1d_params *params);
469
470int shl_c906_cache_conv1d_fp16(struct csinn_tensor *input, struct csinn_tensor *output,
471 struct csinn_tensor *weight, struct csinn_tensor *bias,
472 struct csinn_cache_conv1d_params *params);
473
474int shl_c906_lrn_fp16(struct csinn_tensor *input, struct csinn_tensor *output,
475 struct csinn_lrn_params *params);
476
477void asr_buffer_init_c906(struct csinn_asr_buffer_t *buffer, size_t buffer_size, size_t data_lenth);
478
479void *asr_buffer_insert_c906_front(struct csinn_asr_buffer_t *buffer, void *input, size_t len);
480
481void *asr_buffer_insert_c906_back(struct csinn_asr_buffer_t *buffer, void *input, size_t len);
482
483void *asr_buffer_get_buffer_c906(struct csinn_asr_buffer_t *buffer);
484
485void asr_buffer_reset_c906(struct csinn_asr_buffer_t *buffer);
486
487void shl_c906_reset_fcsr();
488int shl_c906_get_fcsr();
489
490/* hardware performance */
491struct shl_c906_hpm {
492 size_t inst;
493 size_t cycle;
494 size_t l1_icache_access;
495 size_t l1_icache_miss;
496 size_t store_inst;
497 size_t l1_dcache_raccess;
498 size_t l1_dcache_rmiss;
499 size_t l1_dcache_waccess;
500 size_t l1_dcache_wmiss;
501};
502
503uint64_t shl_c906_get_inst();
504uint64_t shl_c906_get_cycle();
505uint64_t shl_c906_get_l1_icache_access();
506uint64_t shl_c906_get_l1_icache_miss();
507uint64_t shl_c906_get_cb_miss();
508uint64_t shl_c906_get_cb_inst();
509uint64_t shl_c906_get_store_inst();
510uint64_t shl_c906_get_l1_dcache_raccess();
511uint64_t shl_c906_get_l1_dcache_rmiss();
512uint64_t shl_c906_get_l1_dcache_waccess();
513uint64_t shl_c906_get_l1_dcache_wmiss();
514
515struct shl_c906_hpm shl_c906_get_hw_perf();
516
517int shl_c906_reduce_sum_fp16(struct csinn_tensor *input, struct csinn_tensor *output,
518 struct csinn_reduce_params *params);
519
520void shl_c906_u8_to_f32(const uint8_t *input, float *output, int32_t offset, float *scale,
521 uint32_t length);
522void shl_c906_i8_to_f32(const int8_t *input, float *output, int32_t offset, float *scale,
523 uint32_t length);
524void shl_c906_f32_to_u8(const float *input, uint8_t *output, int32_t offset, float *scale,
525 uint32_t length);
526void shl_c906_f32_to_i8(const float *input, int8_t *output, int32_t offset, float *scale,
527 uint32_t length);
528
529struct csinn_callback *shl_cb_map_c906(int op, int dtype);
530int shl_c906_reg_op(enum csinn_dtype_enum dtype, enum csinn_op_enum op_name, void *init,
531 void *exec);
532int shl_c906_reg_op_est(enum csinn_dtype_enum dtype, enum csinn_op_enum op_name, void *est);
533
534struct shl_c906_option {
535 struct shl_rvv_option base;
536};
537
538int shl_c906_set_packn_layout(struct csinn_session *sess, bool packn_layout);
539struct shl_c906_option *shl_c906_get_graph_option(struct csinn_session *sess);
540
541#endif // INCLUDE_SHL_C906_H_
csinn_op_enum
Definition: csinn_data_structure.h:127
csinn_dtype_enum
Definition: csinn_data_structure.h:39
Definition: csinn_data_structure.h:1074
Definition: csinn_data_structure.h:524
int(* est)()
Definition: csinn_data_structure.h:526
int(* init)()
Definition: csinn_data_structure.h:525
int(* exec)()
Definition: csinn_data_structure.h:527
Definition: csinn_data_structure.h:1081
Definition: csinn_data_structure.h:780
Definition: csinn_data_structure.h:1162
Definition: csinn_data_structure.h:553
Definition: csinn_data_structure.h:753
Definition: csinn_data_structure.h:596
Definition: csinn_data_structure.h:727
Definition: csinn_data_structure.h:746
Definition: csinn_data_structure.h:763
Definition: csinn_data_structure.h:605
Definition: csinn_data_structure.h:686
Definition: csinn_data_structure.h:1033
Definition: csinn_data_structure.h:676
Definition: csinn_data_structure.h:825
Definition: csinn_data_structure.h:502
Definition: csinn_data_structure.h:661
Definition: csinn_data_structure.h:871
Definition: csinn_data_structure.h:475