Für Codegröße lohnt es sich, die Saturierung auszulagern. Zumindest dann, wenn das öfter gebraucht wird, was ja wohl der Fall ist:
Code:
#include <avr/io.h>
#include <avr/interrupt.h>
#define inline __attribute__((always_inline))
#define MAX_FIX_VALUE 0x8001
#define MIN_FIX_VALUE 0x7fff
static inline uint8_t foo (uint8_t op1, uint8_t op2);
void __attribute__((naked)) saturate16 (void);
void saturate16 (void)
{
asm volatile ("; r27:r26 = sat(r1:r0), r1=0 " "\n\t"
"movw r26, r0" "\n\t"
"brvc 0f" "\n\t"
"ldi r26, lo8(%[max])" "\n\t"
"ldi r27, hi8(%[max])" "\n\t"
"brcc 0f" "\n\t"
"ldi r26, lo8(%[min])" "\n\t"
"ldi r27, hi8(%[min])" "\n"
"0:\tclr __zero_reg__" "\n\t"
"ret"
:: [max] "i" (MAX_FIX_VALUE), [min] "i" (MIN_FIX_VALUE) );
}
uint8_t foo (uint8_t op1, uint8_t op2)
{
uint16_t result;
asm volatile (
"muls %[op1], %[op2] " "\n\t"
"%~call saturate16 ; r27:r26 = sat(r1:r0), r1=0 "
: [result] "=x" (result)
: [op1] "a" (op1), [op2] "a" (op2)
);
return result;
}
uint8_t mul5 (uint8_t a, uint8_t b, uint8_t c, uint8_t d, uint8_t e)
{
uint8_t x = foo (a, b);
x = foo (x, c);
x = foo (foo (x, d), e);
return x;
}
SIGNAL (SIG_INTERRUPT0)
{
// all used GPRs are saved as desired :-)
foo (1, 2);
}
ergibt:
Code:
.file "fix.c"
.arch atmega8
__SREG__ = 0x3f
__SP_H__ = 0x3e
__SP_L__ = 0x3d
__tmp_reg__ = 0
__zero_reg__ = 1
.global __do_copy_data
.global __do_clear_bss
; GNU C version 3.4.6 (avr)
; compiled by GNU C version 3.3 20030226 (prerelease) (SuSE Linux).
; GGC heuristics: --param ggc-min-expand=99 --param ggc-min-heapsize=129491
; options passed: -fpreprocessed -mmcu=atmega8 -auxbase -Os
; -fverbose-asm
; options enabled: -feliminate-unused-debug-types -fdefer-pop
; -fomit-frame-pointer -foptimize-sibling-calls -funit-at-a-time
; -fcse-follow-jumps -fcse-skip-blocks -fexpensive-optimizations
; -fthread-jumps -fstrength-reduce -fpeephole -fforce-mem -ffunction-cse
; -fkeep-static-consts -fcaller-saves -freg-struct-return -fgcse
; -fgcse-lm -fgcse-sm -fgcse-las -floop-optimize -fcrossjumping
; -fif-conversion -fif-conversion2 -frerun-cse-after-loop
; -frerun-loop-opt -fdelete-null-pointer-checks -fsched-interblock
; -fsched-spec -fsched-stalled-insns -fsched-stalled-insns-dep
; -fbranch-count-reg -freorder-functions -fcprop-registers -fcommon
; -fverbose-asm -fregmove -foptimize-register-move -fargument-alias
; -fstrict-aliasing -fmerge-constants -fzero-initialized-in-bss -fident
; -fpeephole2 -fguess-branch-probability -fmath-errno -ftrapping-math
; -minit-stack=__stack -mmcu=atmega8
.text
.global saturate16
.type saturate16, @function
saturate16:
/* prologue: frame size=0 */
/* prologue: naked */
/* prologue end (size=0) */
/* #APP */
; r27:r26 = sat(r1:r0), r1=0
movw r26, r0
brvc 0f
ldi r26, lo8(-32767) ;
ldi r27, hi8(-32767) ;
brcc 0f
ldi r26, lo8(32767) ;
ldi r27, hi8(32767) ;
0: clr __zero_reg__
ret
/* #NOAPP */
/* epilogue: frame size=0 */
/* epilogue: naked */
/* epilogue end (size=0) */
/* function saturate16 size 20 (20) */
.size saturate16, .-saturate16
.global mul5
.type mul5, @function
mul5:
/* prologue: frame size=0 */
push r16
/* prologue end (size=1) */
mov r19,r22 ; b, b
mov r21,r18 ; d, d
mov r23,r24 ; , a
/* #APP */
muls r23, r19 ; , b
rcall saturate16 ; r27:r26 = sat(r1:r0), r1=0
/* #NOAPP */
movw r18,r26 ; result,
/* #APP */
muls r18, r20 ; result, c
rcall saturate16 ; r27:r26 = sat(r1:r0), r1=0
/* #NOAPP */
movw r18,r26 ; result,
/* #APP */
muls r18, r21 ; result, d
rcall saturate16 ; r27:r26 = sat(r1:r0), r1=0
/* #NOAPP */
movw r18,r26 ; result,
/* #APP */
muls r18, r16 ; result, e
rcall saturate16 ; r27:r26 = sat(r1:r0), r1=0
/* #NOAPP */
mov r24,r26 ; result, result
clr r25 ; <result>
/* epilogue: frame size=0 */
pop r16
ret
/* epilogue end (size=2) */
/* function mul5 size 27 (24) */
.size mul5, .-mul5
.global __vector_1
.type __vector_1, @function
__vector_1:
/* prologue: frame size=0 */
push __zero_reg__
push __tmp_reg__
in __tmp_reg__,__SREG__
push __tmp_reg__
clr __zero_reg__
push r18
push r19
push r26
push r27
/* prologue end (size=9) */
ldi r18,lo8(1) ; op1,
ldi r19,lo8(2) ; op2,
/* #APP */
muls r18, r19 ; op1, op2
rcall saturate16 ; r27:r26 = sat(r1:r0), r1=0
/* #NOAPP */
/* epilogue: frame size=0 */
pop r27
pop r26
pop r19
pop r18
pop __tmp_reg__
out __SREG__,__tmp_reg__
pop __tmp_reg__
pop __zero_reg__
reti
/* epilogue end (size=9) */
/* function __vector_1 size 24 (6) */
.size __vector_1, .-__vector_1
/* File "fix.c": code 71 = 0x0047 ( 50), prologues 10, epilogues 11 */
Sieht doch ganz gut aus.
Ich hoffe, ich verwirre dich nicht zu sehr :P
Lesezeichen