21#ifndef INCLUDE_SHL_C908_H_
22#define INCLUDE_SHL_C908_H_
27#include "shl_thead_rvv.h"
79void shl_c908_conv_im2col_gemm_reorder_kernel_fp32(
struct csinn_tensor *kernel,
81void shl_c908_conv_im2col_gemm_reorder_kernel_fp16(
struct csinn_tensor *kernel,
83void shl_c908_conv_im2col_gemm_reorder_kernel_int8(
struct csinn_tensor *kernel,
96void shl_c908_conv_im2col_gemm_reorder_kernel_packn_fp32(
struct csinn_tensor *kernel,
98void shl_c908_conv_im2col_gemm_reorder_kernel_packn_fp16(
struct csinn_tensor *kernel,
100void shl_c908_conv_im2col_gemm_reorder_kernel_packn_int8(
struct csinn_tensor *kernel,
113void shl_c908_conv_im2col_gemm_reorder_kernel_pack1ton_fp32(
struct csinn_tensor *kernel,
115void shl_c908_conv_im2col_gemm_reorder_kernel_pack1ton_fp16(
struct csinn_tensor *kernel,
117void shl_c908_conv_im2col_gemm_reorder_kernel_pack1ton_int8(
struct csinn_tensor *kernel,
130void shl_c908_conv_im2col_gemm_reorder_kernel_packnto1_fp32(
struct csinn_tensor *kernel,
132void shl_c908_conv_im2col_gemm_reorder_kernel_packnto1_fp16(
struct csinn_tensor *kernel,
134void shl_c908_conv_im2col_gemm_reorder_kernel_packnto1_int8(
struct csinn_tensor *kernel,
148void shl_c908_conv1x1s1_gemm_reorder_kernel_fp32(
struct csinn_tensor *kernel,
150void shl_c908_conv1x1s1_gemm_reorder_kernel_fp16(
struct csinn_tensor *kernel,
152void shl_c908_conv1x1s1_gemm_reorder_kernel_int8(
struct csinn_tensor *kernel,
165void shl_c908_conv1x1s1_gemm_reorder_kernel_packn_fp32(
struct csinn_tensor *kernel,
167void shl_c908_conv1x1s1_gemm_reorder_kernel_packn_fp16(
struct csinn_tensor *kernel,
169void shl_c908_conv1x1s1_gemm_reorder_kernel_packn_int8(
struct csinn_tensor *kernel,
182void shl_c908_conv1x1s1_gemm_reorder_kernel_pack1ton_fp32(
struct csinn_tensor *kernel,
184void shl_c908_conv1x1s1_gemm_reorder_kernel_pack1ton_fp16(
struct csinn_tensor *kernel,
186void shl_c908_conv1x1s1_gemm_reorder_kernel_pack1ton_int8(
struct csinn_tensor *kernel,
199void shl_c908_conv1x1s1_gemm_reorder_kernel_packnto1_fp32(
struct csinn_tensor *kernel,
201void shl_c908_conv1x1s1_gemm_reorder_kernel_packnto1_fp16(
struct csinn_tensor *kernel,
203void shl_c908_conv1x1s1_gemm_reorder_kernel_packnto1_int8(
struct csinn_tensor *kernel,
217void shl_c908_wg_b6f3s1_trans_kernel_pack8_fp32(
struct csinn_tensor *src_kernel,
219void shl_c908_wg_b6f3s1_trans_kernel_pack8_fp16(
struct csinn_tensor *src_kernel,
221void shl_c908_wg_b6f3s1_trans_kernel_pack16_fp16(
struct csinn_tensor *src_kernel,
224void shl_c908_wg_b4f3s1_trans_kernel_pack8_fp32(
struct csinn_tensor *src_kernel,
226void shl_c908_wg_b4f3s1_trans_kernel_pack8_fp16(
struct csinn_tensor *src_kernel,
228void shl_c908_wg_b4f3s1_trans_kernel_pack16_fp16(
struct csinn_tensor *src_kernel,
230void shl_c908_wg_b4f3s1_trans_kernel_pack8_int8(
struct csinn_tensor *src_kernel,
256void shl_c908_ncxhwx_wg_b6f3s1_trans_kernel_packn_fp32(
struct csinn_tensor *src_kernel,
258void shl_c908_ncxhwx_wg_b6f3s1_trans_kernel_packn_fp16(
struct csinn_tensor *src_kernel,
268void shl_c908_ncxhwx_wg_b4f3s1_trans_kernel_packn_fp32(
struct csinn_tensor *src_kernel,
270void shl_c908_ncxhwx_wg_b4f3s1_trans_kernel_packn_fp16(
struct csinn_tensor *src_kernel,
272void shl_c908_ncxhwx_wg_b4f3s1_trans_kernel_packn_int8(
struct csinn_tensor *src_kernel,
286void shl_c908_ncxhwx_gemm_12xpack2n_fp32(
float *dst,
const float *sa,
const float *sb,
287 const float *bias,
int m,
int k,
int n,
bool fuse_relu);
288void shl_c908_ncxhwx_gemm_12xpack2n_fp16(__fp16 *dst,
const __fp16 *sa,
const __fp16 *sb,
289 const __fp16 *bias,
int m,
int k,
int n,
bool fuse_relu);
290void shl_c908_ncxhwx_gemm_4xpack2n_int8(int8_t *dst,
const int8_t *sa,
const int8_t *sb,
291 const int32_t *bias,
int m,
int k,
int n, int32_t out_zp,
292 int32_t *mult, int32_t *shift);
293void shl_c908_ncxhwx_gemm_12xpackn_int8_dot(int8_t *dst,
const int8_t *sa,
const int8_t *sb,
294 const int32_t *bias,
int m,
int k,
int n,
295 int32_t out_zp, int32_t *mult, int32_t *shift);
297void shl_c908_ncxhwx_gemm_12xpackn_int16(int32_t *dst,
const int16_t *sa,
const int16_t *sb,
int m,
300void shl_c908_reorder_kernel_n8_fp32(
float *src,
float *dst,
int m,
int k,
int ldc);
301void shl_c908_reorder_input_z12_fp32(
float *src,
float *dst,
int k,
int n,
int ldc);
302void shl_c908_gemm_8x12_fp32(
float *dst,
const float *sa,
const float *sb,
float *bias,
int m,
303 int k,
int n,
int ldc);
305void shl_c908_reorder_kernel_n8_fp16(__fp16 *src, __fp16 *dst,
int m,
int k,
int ldc);
306void shl_c908_reorder_input_z24_fp16(__fp16 *src, __fp16 *dst,
int k,
int n,
int ldc);
307void shl_c908_gemm_8x24_fp16(__fp16 *dst,
const __fp16 *sa,
const __fp16 *sb, __fp16 *bias,
int m,
308 int k,
int n,
int ldc);
310void shl_c908_reorder_kernel_n8_int8_dot(int8_t *src, int8_t *dst,
int m,
int k,
int ldc);
311void shl_c908_reorder_input_z8_int8_dot(int8_t *src, int8_t *dst,
int k,
int n,
int ldc);
312void shl_c908_gemm_8x8_int8_dot(int8_t *dst,
const int8_t *sa,
const int8_t *sb, int32_t *bias,
313 int m,
int k,
int n,
int ldc, int32_t out_zp, int32_t *mult,
315void shl_c908_reorder_input_z12_int8(int8_t *src, int8_t *dst,
int k,
int n,
int ldc);
321void shl_c908_reorder_input_z16_fp32_v256(
float *src,
float *dst,
int k,
int n,
int ldc);
322void shl_c908_gemm_8x16_fp32_v256(
float *dst,
const float *sa,
const float *sb,
float *bias,
int m,
323 int k,
int n,
int ldc);
325void shl_c908_reorder_input_z32_fp16_v256(__fp16 *src, __fp16 *dst,
int k,
int n,
int ldc);
326void shl_c908_gemm_8x32_fp16_v256(__fp16 *dst,
const __fp16 *sa,
const __fp16 *sb, __fp16 *bias,
327 int m,
int k,
int n,
int ldc);
329void shl_c908_reorder_input_z16_int8_v256_dot(int8_t *src, int8_t *dst,
int k,
int n,
int ldc);
330void shl_c908_gemm_8x16_int8_v256_dot(int8_t *dst,
const int8_t *sa,
const int8_t *sb,
331 int32_t *bias,
int m,
int k,
int n,
int ldc, int32_t out_zp,
332 int32_t *mult, int32_t *shift);
334void shl_c908_f32_to_u8(
const float *input, uint8_t *output, int32_t offset,
float *scale,
336void shl_c908_f32_to_i8(
const float *input, int8_t *output, int32_t offset,
float *scale,
339#ifdef SHL_UNUSED_REGISTER_BLK
340void shl_c908_reorder_input_z8_fp32(
float *src,
float *dst,
int k,
int n,
int ldc);
341void shl_c908_gemm_8x8_fp32(
float *dst,
const float *sa,
const float *sb,
float *bias,
int m,
int k,
343void shl_c908_reorder_input_z16_fp16(__fp16 *src, __fp16 *dst,
int k,
int n,
int ldc);
344void shl_c908_gemm_8x16_fp16(__fp16 *dst,
const __fp16 *sa,
const __fp16 *sb, __fp16 *bias,
int m,
345 int k,
int n,
int ldc);
347void shl_c908_reorder_input_z24_fp32_v256(
float *src,
float *dst,
int k,
int n,
int ldc);
348void shl_c908_gemm_8x24_fp32_v256(
float *dst,
const float *sa,
const float *sb,
float *bias,
int m,
349 int k,
int n,
int ldc);
350void shl_c908_reorder_input_z48_fp16_v256(__fp16 *src, __fp16 *dst,
int k,
int n,
int ldc);
351void shl_c908_gemm_8x48_fp16_v256(__fp16 *dst,
const __fp16 *sa,
const __fp16 *sb, __fp16 *bias,
352 int m,
int k,
int n,
int ldc);
355#ifdef SHL_USE_DOT_INT4
365struct shl_c908_option {
366 struct shl_rvv_option base;
369int shl_c908_set_packn_layout(
struct csinn_session *sess,
bool packn_layout);
370struct shl_c908_option *shl_c908_get_graph_option(
struct csinn_session *sess);
Definition: csinn_data_structure.h:553
Definition: csinn_data_structure.h:596
Definition: csinn_data_structure.h:605
Definition: csinn_data_structure.h:502
Definition: csinn_data_structure.h:475