21#ifndef INCLUDE_SHL_C906_H_
22#define INCLUDE_SHL_C906_H_
27#include "shl_thead_rvv.h"
132void shl_c906_reorder_kernel(
float *a,
float *sa,
int m,
int k,
int ldx);
134void shl_c906_reorder_input(
float *b,
float *sb,
int k,
int n,
int ldx);
136void shl_c906_reorder_input_1(
float *b,
float *sb,
int k,
int n,
int ldx);
139void shl_c906_sgemm_kernel_f32(
float *dst,
const float *sa,
const float *sb,
int m,
int k,
int n,
140 int ldc,
float *bias,
bool fuse_relu);
143void shl_c906_conv1x1s1_sgemm_transform_kernel(
struct csinn_tensor *kernel,
146void shl_c906_conv_im2col_sgemm_transform_kernel(
struct csinn_tensor *kernel,
149void shl_c906_conv3x3s1_winograd23_transform_kernel(
struct csinn_tensor *o_kernel,
152void shl_c906_conv3x3s1_winograd43_transform_kernel(
struct csinn_tensor *o_kernel,
155void shl_c906_conv3x3s1_winograd64_transform_kernel(
struct csinn_tensor *o_kernel,
158void shl_c906_conv3x3s1_winograd64_transform_kernel_1(
struct csinn_tensor *o_kernel,
161void shl_c906_conv3x3s1_winograd64_transform_kernel_pack4(
struct csinn_tensor *o_kernel,
164void shl_c906_conv3x3s1_winograd43_transform_kernel_pack4(
struct csinn_tensor *o_kernel,
335int shl_c906_fullyconnected_pack16_output16_fp16(
struct csinn_tensor *input,
341void shl_c906_reorder_weight_n8_fp16(__fp16 *src, __fp16 *dst,
int m,
int k,
int ldx);
343void shl_c906_reorder_weight_n16_fp16(__fp16 *src, __fp16 *dst,
int m,
int k,
int ldx);
346void shl_c906_reorder_kernel_fp16(__fp16 *a, __fp16 *sa,
int m,
int k,
int ldx);
347void shl_c906_reorder_input_fp16(__fp16 *b, __fp16 *sb,
int k,
int n,
int ldx);
349void shl_c906_reorder_input_fp16_1(__fp16 *b, __fp16 *sb,
int k,
int n,
int ldx);
351void shl_c906_reorder_matrix_z8_fp16(__fp16 *src, __fp16 *dst,
int k,
int n,
int ldx);
352void shl_c906_reorder_matrix_z16_fp16(__fp16 *src, __fp16 *dst,
int k,
int n,
int ldx);
355void shl_c906_sgemm_kernel_fp16(__fp16 *dst,
const __fp16 *sa,
const __fp16 *sb,
int m,
int k,
356 int n,
int ldc, __fp16 *bias);
357void shl_c906_sgemm_kernel_fp16_1(__fp16 *dst,
const __fp16 *sa,
const __fp16 *sb,
int m,
int k,
358 int n,
int ldc, __fp16 *bias);
361void shl_c906_gemv_pack8_fp16(__fp16 *dst,
const __fp16 *sa,
const __fp16 *sb,
int k,
int n,
362 int ldc, __fp16 *bias);
363void shl_c906_gemv_pack16_fp16(__fp16 *dst,
const __fp16 *sa,
const __fp16 *sb,
int k,
int n,
364 int ldc, __fp16 *bias);
366void shl_c906_gemv_trans_pack8_fp16(__fp16 *dst,
const __fp16 *sa,
const __fp16 *sb,
int k,
int n,
367 int ldc, __fp16 *bias);
368void shl_c906_gemv_trans_pack16_fp16(__fp16 *dst,
const __fp16 *sa,
const __fp16 *sb,
int k,
int n,
369 int ldc, __fp16 *bias);
372void shl_c906_conv1x1s1_sgemm_transform_kernel_fp16(
struct csinn_tensor *kernel,
374void shl_c906_conv1x1s1_sgemm_transform_kernel_fp16_w_int8(
struct csinn_tensor *kernel,
376void shl_c906_conv_im2col_sgemm_transform_kernel_fp16(
struct csinn_tensor *kernel,
379void shl_c906_conv3x3s1_winograd43_transform_kernel_pack8_fp16(
struct csinn_tensor *o_kernel,
382void shl_c906_conv3x3s1_winograd64_transform_kernel_pack8_fp16(
struct csinn_tensor *o_kernel,
398int shl_c906_conv3x3s1_winograd43_pack8_fp16(
struct csinn_tensor *input,
403int shl_c906_conv3x3s1_winograd64_pack8_fp16(
struct csinn_tensor *input,
434void shl_c906_memcpy(
void *dst,
const void *src,
size_t n);
436void shl_c906_pad_input(
const float *input,
float *input_padded,
int inc,
int inh,
int inw,
437 int padded_h,
int padded_w,
int pad_top,
int pad_left);
439void shl_c906_crop_output(
float *output_trans,
float *output,
int out_c,
int out_h,
int out_w,
440 int wino_h,
int wino_w);
442void shl_c906_pad_input_fp16(
const __fp16 *input, __fp16 *input_padded,
int inc,
int inh,
int inw,
443 int padded_h,
int padded_w,
int pad_top,
int pad_left);
445void shl_c906_crop_output_fp16(__fp16 *output_trans, __fp16 *output,
int out_c,
int out_h,
446 int out_w,
int wino_h,
int wino_w);
451 struct csinn_cache_matmul_params *params);
455 struct csinn_cache_matmul_params *params);
468 struct csinn_cache_conv1d_params *params);
472 struct csinn_cache_conv1d_params *params);
477void asr_buffer_init_c906(
struct csinn_asr_buffer_t *buffer,
size_t buffer_size,
size_t data_lenth);
479void *asr_buffer_insert_c906_front(
struct csinn_asr_buffer_t *buffer,
void *input,
size_t len);
481void *asr_buffer_insert_c906_back(
struct csinn_asr_buffer_t *buffer,
void *input,
size_t len);
483void *asr_buffer_get_buffer_c906(
struct csinn_asr_buffer_t *buffer);
485void asr_buffer_reset_c906(
struct csinn_asr_buffer_t *buffer);
487void shl_c906_reset_fcsr();
488int shl_c906_get_fcsr();
494 size_t l1_icache_access;
495 size_t l1_icache_miss;
497 size_t l1_dcache_raccess;
498 size_t l1_dcache_rmiss;
499 size_t l1_dcache_waccess;
500 size_t l1_dcache_wmiss;
503uint64_t shl_c906_get_inst();
504uint64_t shl_c906_get_cycle();
505uint64_t shl_c906_get_l1_icache_access();
506uint64_t shl_c906_get_l1_icache_miss();
507uint64_t shl_c906_get_cb_miss();
508uint64_t shl_c906_get_cb_inst();
509uint64_t shl_c906_get_store_inst();
510uint64_t shl_c906_get_l1_dcache_raccess();
511uint64_t shl_c906_get_l1_dcache_rmiss();
512uint64_t shl_c906_get_l1_dcache_waccess();
513uint64_t shl_c906_get_l1_dcache_wmiss();
515struct shl_c906_hpm shl_c906_get_hw_perf();
520void shl_c906_u8_to_f32(
const uint8_t *input,
float *output, int32_t offset,
float *scale,
522void shl_c906_i8_to_f32(
const int8_t *input,
float *output, int32_t offset,
float *scale,
524void shl_c906_f32_to_u8(
const float *input, uint8_t *output, int32_t offset,
float *scale,
526void shl_c906_f32_to_i8(
const float *input, int8_t *output, int32_t offset,
float *scale,
534struct shl_c906_option {
535 struct shl_rvv_option base;
538int shl_c906_set_packn_layout(
struct csinn_session *sess,
bool packn_layout);
539struct shl_c906_option *shl_c906_get_graph_option(
struct csinn_session *sess);
csinn_op_enum
Definition: csinn_data_structure.h:127
csinn_dtype_enum
Definition: csinn_data_structure.h:39
Definition: csinn_data_structure.h:1074
Definition: csinn_data_structure.h:524
int(* est)()
Definition: csinn_data_structure.h:526
int(* init)()
Definition: csinn_data_structure.h:525
int(* exec)()
Definition: csinn_data_structure.h:527
Definition: csinn_data_structure.h:1081
Definition: csinn_data_structure.h:780
Definition: csinn_data_structure.h:1162
Definition: csinn_data_structure.h:553
Definition: csinn_data_structure.h:753
Definition: csinn_data_structure.h:596
Definition: csinn_data_structure.h:727
Definition: csinn_data_structure.h:746
Definition: csinn_data_structure.h:763
Definition: csinn_data_structure.h:605
Definition: csinn_data_structure.h:686
Definition: csinn_data_structure.h:1033
Definition: csinn_data_structure.h:676
Definition: csinn_data_structure.h:825
Definition: csinn_data_structure.h:502
Definition: csinn_data_structure.h:661
Definition: csinn_data_structure.h:871
Definition: csinn_data_structure.h:475