Für Codegröße lohnt es sich, die Saturierung auszulagern. Zumindest dann, wenn das öfter gebraucht wird, was ja wohl der Fall ist:

Code:
#include <avr/io.h>
#include <avr/interrupt.h>

#define inline __attribute__((always_inline))

#define MAX_FIX_VALUE 0x8001
#define MIN_FIX_VALUE 0x7fff

static inline uint8_t foo (uint8_t op1, uint8_t op2);
void __attribute__((naked)) saturate16 (void);

void saturate16 (void)
{
    asm volatile ("; r27:r26 = sat(r1:r0), r1=0  " "\n\t"
        "movw    r26, r0"                          "\n\t"
        "brvc    0f"                               "\n\t"
        "ldi     r26, lo8(%[max])"                 "\n\t"
        "ldi     r27, hi8(%[max])"                 "\n\t"
        "brcc    0f"                               "\n\t"
        "ldi     r26, lo8(%[min])"                 "\n\t"
        "ldi     r27, hi8(%[min])"                 "\n"
        "0:\tclr     __zero_reg__"                 "\n\t"
        "ret"
        :: [max] "i" (MAX_FIX_VALUE),  [min] "i" (MIN_FIX_VALUE) );
}

uint8_t foo (uint8_t op1, uint8_t op2)
{
    uint16_t result;
    
    asm volatile (
        "muls    %[op1], %[op2] "                "\n\t"
        "%~call   saturate16 ; r27:r26 = sat(r1:r0), r1=0  "
        : [result] "=x" (result)
        : [op1] "a" (op1), [op2] "a" (op2)
        );

    return result;
}


uint8_t mul5 (uint8_t a, uint8_t b, uint8_t c, uint8_t d, uint8_t e)
{
    uint8_t x = foo (a, b);

    x = foo (x, c);
    x = foo (foo (x, d), e);

    return x;
}

SIGNAL (SIG_INTERRUPT0)
{
    // all used GPRs are saved as desired :-)
    foo (1, 2);
}
ergibt:

Code:
	.file	"fix.c"
	.arch atmega8
__SREG__ = 0x3f
__SP_H__ = 0x3e
__SP_L__ = 0x3d
__tmp_reg__ = 0
__zero_reg__ = 1
	.global __do_copy_data
	.global __do_clear_bss
 ;  GNU C version 3.4.6 (avr)
 ; 	compiled by GNU C version 3.3 20030226 (prerelease) (SuSE Linux).
 ;  GGC heuristics: --param ggc-min-expand=99 --param ggc-min-heapsize=129491
 ;  options passed:  -fpreprocessed -mmcu=atmega8 -auxbase -Os
 ;  -fverbose-asm
 ;  options enabled:  -feliminate-unused-debug-types -fdefer-pop
 ;  -fomit-frame-pointer -foptimize-sibling-calls -funit-at-a-time
 ;  -fcse-follow-jumps -fcse-skip-blocks -fexpensive-optimizations
 ;  -fthread-jumps -fstrength-reduce -fpeephole -fforce-mem -ffunction-cse
 ;  -fkeep-static-consts -fcaller-saves -freg-struct-return -fgcse
 ;  -fgcse-lm -fgcse-sm -fgcse-las -floop-optimize -fcrossjumping
 ;  -fif-conversion -fif-conversion2 -frerun-cse-after-loop
 ;  -frerun-loop-opt -fdelete-null-pointer-checks -fsched-interblock
 ;  -fsched-spec -fsched-stalled-insns -fsched-stalled-insns-dep
 ;  -fbranch-count-reg -freorder-functions -fcprop-registers -fcommon
 ;  -fverbose-asm -fregmove -foptimize-register-move -fargument-alias
 ;  -fstrict-aliasing -fmerge-constants -fzero-initialized-in-bss -fident
 ;  -fpeephole2 -fguess-branch-probability -fmath-errno -ftrapping-math
 ;  -minit-stack=__stack -mmcu=atmega8

	.text
.global	saturate16
	.type	saturate16, @function
saturate16:
/* prologue: frame size=0 */
/* prologue: naked */
/* prologue end (size=0) */
/* #APP */
	; r27:r26 = sat(r1:r0), r1=0  
	movw    r26, r0
	brvc    0f
	ldi     r26, lo8(-32767)	 ; 
	ldi     r27, hi8(-32767)	 ; 
	brcc    0f
	ldi     r26, lo8(32767)	 ; 
	ldi     r27, hi8(32767)	 ; 
0:	clr     __zero_reg__
	ret
/* #NOAPP */
/* epilogue: frame size=0 */
/* epilogue: naked */
/* epilogue end (size=0) */
/* function saturate16 size 20 (20) */
	.size	saturate16, .-saturate16
.global	mul5
	.type	mul5, @function
mul5:
/* prologue: frame size=0 */
	push r16
/* prologue end (size=1) */
	mov r19,r22	 ;  b, b
	mov r21,r18	 ;  d, d
	mov r23,r24	 ; , a
/* #APP */
	muls    r23, r19 	 ; , b
	rcall   saturate16 ; r27:r26 = sat(r1:r0), r1=0  
/* #NOAPP */
	movw r18,r26	 ;  result,
/* #APP */
	muls    r18, r20 	 ;  result, c
	rcall   saturate16 ; r27:r26 = sat(r1:r0), r1=0  
/* #NOAPP */
	movw r18,r26	 ;  result,
/* #APP */
	muls    r18, r21 	 ;  result, d
	rcall   saturate16 ; r27:r26 = sat(r1:r0), r1=0  
/* #NOAPP */
	movw r18,r26	 ;  result,
/* #APP */
	muls    r18, r16 	 ;  result, e
	rcall   saturate16 ; r27:r26 = sat(r1:r0), r1=0  
/* #NOAPP */
	mov r24,r26	 ;  result, result
	clr r25	 ;  <result>
/* epilogue: frame size=0 */
	pop r16
	ret
/* epilogue end (size=2) */
/* function mul5 size 27 (24) */
	.size	mul5, .-mul5
.global	__vector_1
	.type	__vector_1, @function
__vector_1:
/* prologue: frame size=0 */
	push __zero_reg__
	push __tmp_reg__
	in __tmp_reg__,__SREG__
	push __tmp_reg__
	clr __zero_reg__
	push r18
	push r19
	push r26
	push r27
/* prologue end (size=9) */
	ldi r18,lo8(1)	 ;  op1,
	ldi r19,lo8(2)	 ;  op2,
/* #APP */
	muls    r18, r19 	 ;  op1, op2
	rcall   saturate16 ; r27:r26 = sat(r1:r0), r1=0  
/* #NOAPP */
/* epilogue: frame size=0 */
	pop r27
	pop r26
	pop r19
	pop r18
	pop __tmp_reg__
	out __SREG__,__tmp_reg__
	pop __tmp_reg__
	pop __zero_reg__
	reti
/* epilogue end (size=9) */
/* function __vector_1 size 24 (6) */
	.size	__vector_1, .-__vector_1
/* File "fix.c": code   71 = 0x0047 (  50), prologues  10, epilogues  11 */
Sieht doch ganz gut aus.

Ich hoffe, ich verwirre dich nicht zu sehr :P