Jak se prelozi

Jan Waclawek konfera na efton.sk
Čtvrtek Duben 25 18:27:55 CEST 2024


<__TEXT_REGION_LENGTH__+0x7f0447>
   fe:    40 91 a8 04     lds    r20, 0x04A8    ; 0x8004a8 
<__TEXT_REGION_LENGTH__+0x7f04a8>
  102:    30 91 a8 04     lds    r19, 0x04A8    ; 0x8004a8 
[...]
<__TEXT_REGION_LENGTH__+0x7f0444>
  10e:    46 fd           sbrc    r20, 6
  110:    90 93 44 04     sts    0x0444, r25    ; 0x800444 
<__TEXT_REGION_LENGTH__+0x7f0444>
  114:    37 fd           sbrc    r19, 7
  116:    20 93 44 04     sts    0x0444, r18    ; 0x800444 
<__TEXT_REGION_LENGTH__+0x7f0444>


Skoda, ze v tom listingu nie su zdrojove riadky, to je C++ artefakt, alebo
len zle nastaveny nejaky -gX switch pri preklade?

Kazdopadne, v tomto priklade je uplne vyoptimalizovany samotny struct - v
prvych dvoch riadkoch sa precita port (musi sa citat dvakrat, lebo
volatile); a vo zvysku sa na zaklade tych precitanych hodnot priamo v
registroch zapise do vystupneho portu.


Ja som to skusil s volatile structom, aby ho nevyoptimalizoval (a
neplusplus C):

#include <stdint.h>
#include <stdbool.h>  // bring in bool, which is not a native type in C

struct {
  bool b0:1;
  bool b1:1;
} volatile b;

volatile uint8_t k;

int main(void) {
  b.b0 = (bool)k;
  b.b1 = (bool)k;
  if(b.b0) {
    k = 10;
  }; 
  if(b.b1) {
    k = 20;
  }; 
}


avr-gcc 4.2.2:
int main(void) {
  b.b0 = (bool)k;
 112:	90 91 01 02 	lds	r25, 0x0201
 116:	91 11       	cpse	r25, r1
 118:	91 e0       	ldi	r25, 0x01	; 1
 11a:	91 70       	andi	r25, 0x01	; 1
 11c:	80 91 00 02 	lds	r24, 0x0200
 120:	8e 7f       	andi	r24, 0xFE	; 254
 122:	89 2b       	or	r24, r25
 124:	80 93 00 02 	sts	0x0200, r24
  b.b1 = (bool)k;
 128:	90 91 01 02 	lds	r25, 0x0201
 12c:	91 11       	cpse	r25, r1
 12e:	91 e0       	ldi	r25, 0x01	; 1
 130:	91 70       	andi	r25, 0x01	; 1
 132:	99 0f       	add	r25, r25
 134:	80 91 00 02 	lds	r24, 0x0200
 138:	8d 7f       	andi	r24, 0xFD	; 253
 13a:	89 2b       	or	r24, r25
 13c:	80 93 00 02 	sts	0x0200, r24
  if(b.b0) {
 140:	80 91 00 02 	lds	r24, 0x0200
 144:	80 ff       	sbrs	r24, 0
 146:	03 c0       	rjmp	.+6      	; 0x14e <main+0x3c>
    k = 10;
 148:	8a e0       	ldi	r24, 0x0A	; 10
 14a:	80 93 01 02 	sts	0x0201, r24
  }; 
  if(b.b1) {
 14e:	80 91 00 02 	lds	r24, 0x0200
 152:	81 ff       	sbrs	r24, 1
 154:	03 c0       	rjmp	.+6      	; 0x15c <main+0x4a>
    k = 20;
 156:	84 e1       	ldi	r24, 0x14	; 20
 158:	80 93 01 02 	sts	0x0201, r24
  }; 
}
 15c:	08 95       	ret


avr-gcc 8.0:
int main(void) {
  b.b0 = (bool)k;
  f8:	80 91 01 02 	lds	r24, 0x0201	; 0x800201 <k>
  fc:	91 e0       	ldi	r25, 0x01	; 1
  fe:	81 11       	cpse	r24, r1
 100:	01 c0       	rjmp	.+2      	; 0x104 <main+0xc>
 102:	90 e0       	ldi	r25, 0x00	; 0
 104:	80 91 00 02 	lds	r24, 0x0200	; 0x800200 <_edata>
 108:	90 fb       	bst	r25, 0
 10a:	80 f9       	bld	r24, 0
 10c:	80 93 00 02 	sts	0x0200, r24	; 0x800200 <_edata>
  b.b1 = (bool)k;
 110:	80 91 01 02 	lds	r24, 0x0201	; 0x800201 <k>
 114:	91 e0       	ldi	r25, 0x01	; 1
 116:	81 11       	cpse	r24, r1
 118:	01 c0       	rjmp	.+2      	; 0x11c <main+0x24>
 11a:	90 e0       	ldi	r25, 0x00	; 0
 11c:	80 91 00 02 	lds	r24, 0x0200	; 0x800200 <_edata>
 120:	90 fb       	bst	r25, 0
 122:	81 f9       	bld	r24, 1
 124:	80 93 00 02 	sts	0x0200, r24	; 0x800200 <_edata>
  if(b.b0) {
 128:	80 91 00 02 	lds	r24, 0x0200	; 0x800200 <_edata>
 12c:	80 ff       	sbrs	r24, 0
 12e:	03 c0       	rjmp	.+6      	; 0x136 <main+0x3e>
    k = 10;
 130:	8a e0       	ldi	r24, 0x0A	; 10
 132:	80 93 01 02 	sts	0x0201, r24	; 0x800201 <k>
  }; 
  if(b.b1) {
 136:	80 91 00 02 	lds	r24, 0x0200	; 0x800200 <_edata>
 13a:	81 ff       	sbrs	r24, 1
 13c:	03 c0       	rjmp	.+6      	; 0x144 <main+0x4c>
    k = 20;
 13e:	84 e1       	ldi	r24, 0x14	; 20
 140:	80 93 01 02 	sts	0x0201, r24	; 0x800201 <k>
  }; 
}
 144:	90 e0       	ldi	r25, 0x00	; 0
 146:	80 e0       	ldi	r24, 0x00	; 0
 148:	08 95       	ret


cize viacmenej identicky, najprv sa cita z "portu" (volatile premennej na
adrese 0x0201) do r24, do r25 sa vyrobi 0 alebo 1 (t.j. bool), a potom sa
ten bit ulozi(*) do toho structu co je na adrese 0x200; to iste sa urobi s
druhym bitom; no a nakoniec sa pouzije sbrs (t.j. bitovy test a
obskocenie, take to PICkarske btfsz).

Jediny rozdiel je, ze to ulozenie bitu do structu (*) je v jednom pripade
and/or, a v druhom sa vyuziva bit T v stavovom slove a instrukcie bst a
bld.

Zlata '51 s bitovou pamatou... a este aj ten bit-banding (a
bitfield-clear-set-copy instrukcie) v Cortex-M3/M4... :-) ale pouzitelne
to aj na tom AVR je.

wek




Další informace o konferenci Hw-list