Keep optimizing IRQ latency on the STM32H743 @ 480 MHz. The 70 ns vs. 100 ns overhead mystery solved. I did not correctly relocate the vector table to Tightly-Coupled Memory properly, it was still in Flash. The STM32 HAL macro USER_VECT_TAB_ADDRESS is a flag, not a memory address! In fact, only several hardcoded addresses are available, a real user override is not provided (the name "user" is a lie). Solution: just change VTOR manually, don't trust the startup code. I'm now getting 70-ns IRQ without CPU cache.

/*!< Uncomment the following line if you need to relocate the vector table
     anywhere in FLASH BANK1 or AXI SRAM, else the vector table is kept at the automatic
     remap of boot address selected */
/* #define USER_VECT_TAB_ADDRESS */

#if defined(USER_VECT_TAB_ADDRESS)
#if defined(DUAL_CORE) && defined(CORE_CM4)
/*!< Uncomment the following line if you need to relocate your vector Table
     in D2 AXI SRAM else user remap will be done in FLASH BANK2. */
/* #define VECT_TAB_SRAM */
#if defined(VECT_TAB_SRAM)
#define VECT_TAB_BASE_ADDRESS   D2_AXISRAM_BASE   /*!< Vector Table base address field.
                                                       This value must be a multiple of 0x400. */
#else
#define VECT_TAB_BASE_ADDRESS   FLASH_BANK2_BASE  /*!< Vector Table base address field.
                                                       This value must be a multiple of 0x400. */
#endif /* VECT_TAB_SRAM */
#else
/*!< Uncomment the following line if you need to relocate your vector Table
     in D1 AXI SRAM else user remap will be done in FLASH BANK1. */
/* #define VECT_TAB_SRAM */
#if defined(VECT_TAB_SRAM)
#define VECT_TAB_BASE_ADDRESS   D1_AXISRAM_BASE   /*!< Vector Table base address field.
                                                       This value must be a multiple of 0x400. */
#else
#define VECT_TAB_BASE_ADDRESS   FLASH_BANK1_BASE  /*!< Vector Table base address field.
                                                       This value must be a multiple of 0x400. */
#endif /* VECT_TAB_SRAM */
#endif /* DUAL_CORE && CORE_CM4 */

#if !defined(VECT_TAB_OFFSET)
#define VECT_TAB_OFFSET         0x00000000U       /*!< Vector Table base offset field.
                                                       This value must be a multiple of 0x400. */
#endif /* VECT_TAB_OFFSET */

#endif /* USER_VECT_TAB_ADDRESS */void relocate_to_itcm(void)
{
        extern volatile char _si_isr_vector;
        extern volatile char __isr_vector_start, __isr_vector_end;

        volatile char *flash_isr_vector_start = &_si_isr_vector;
        volatile char *ram_isr_vector_start = &__isr_vector_start;
        volatile char *ram_isr_vector_end = &__isr_vector_end;
        size_t len = ram_isr_vector_end - ram_isr_vector_start;

        for (size_t i = 0; i < len; i++) {
                ram_isr_vector_start[i] = flash_isr_vector_start[i];
        }

#ifdef SEMIHOSTING
        printf("relocate %p-%p to %p, %d bytes\n", flash_isr_vector_start, flash_isr_vector_start + len, ram_isr_vector_start, len);
#endif

        extern volatile char _si_itcm_text;
        extern volatile char __itcm_text_start, __itcm_text_end;

        volatile char *flash_itcm_text_start = &_si_itcm_text;
        volatile char *ram_itcm_text_start = &__itcm_text_start;
        volatile char *ram_itcm_text_end = &__itcm_text_end;
        len = ram_itcm_text_end - ram_itcm_text_start;

        for (size_t i = 0; i < len; i++) {
                ram_itcm_text_start[i] = flash_itcm_text_start[i];
        }

#ifdef SEMIHOSTING
        printf("relocate %p-%p to %p, %d bytes\n", flash_itcm_text_start, flash_itcm_text_start + len, ram_itcm_text_start, len);
#endif

        SCB->VTOR = D1_ITCMRAM_BASE;
}71.60 ns input-to-output latency observed on the oscilloscope even without any CPU cache.
0

If you have a fediverse account, you can quote this note from your own instance. Search https://mk.absturztau.be/notes/ajvb448y305b01i4 on your instance and quote it. (Note that quoting is not supported in Mastodon.)