Jak jsem si nabehnul aneb double v pacalu na AVR
Josef Štengl
ok1ced na nagano.cz
Úterý Červenec 22 08:48:44 CEST 2014
Jsem líný to spouštět a měřit, ale pro informaci lehké nakouknouti k ARMu. Ona, ta potvora má cache a podobné vychytávky,
tak by to bylo stejně jen orientační.
ARM cortext-R4
VFP3D16 (single precision FPU - je to koprocesor paralelní k CPU (možné užití jako periférie))
CPU_cycle/Result_latency
int
mul 1..2/2..3 CPU (jak která instrukce)
div 4..20/? CPU cyklů (závisí na počtu nul před první jedničkou )
f32 (float)
mul 1/5
div 2/16
f64 (double)
mul 13/19
div 3/96
Result_latency -All result latencies are given as
the number of cycles until the register is available for a following instruction in the Ex2 stage.
Most ALU operations require their source registers at the start of the Ex2 stage, and have a result
latency of one.
Takže, když se nepotřebuje použitý registr v následující instrukci, tak může být dělení v FPU i rychlejší než v int. Nebo
také násobně delší :-).
ced
Dne 22.7.2014 07:19, Jan Waclawek napsal(a):
>> Skoro uz jsem chtel udelat test
>> rychlosti ale zrovna jsem nemel po ruce AVR...
>
> Na to predsa nepotrebujete AVR, staci (ba dokonca je lepsi) simulator.
>
> Vid dole.
>
> Ta celociselna varianta je asi 4060 cyklov, ta s double asi 27000 s
> WinAVR20100110 (t.j. avr-gcc 4.3.3). A to som sa prilis nenamahal
> optimalizovat... ;-)
>
> Schvalne som nepouzival type punning, aby sa to dalo 1:1 prepisat do
> Pascalu - pri volaniach treba pouzit referencie miesto pointrov, ->
> nahradit ^., poupravovat definicie, nemalo by to dat privela roboty.
>
> Na druhej strane som v tej variante s double, co som vykopiroval z appnote,
> musel upravit dva riadky, inak to vracia dvojnasobny vysledok - nechapem
> preco to tak napisal ten co to napisal.
>
> Mozete skusit ten ARM, ked uz ho mate poruke :-)
>
> wek
>
>
>
> #include <stdint.h>
>
> #define SHR >>
> #define SHL <<
> #define OR |
> #define AND &
>
> // #define DEBUG
> #ifdef DEBUG
> typedef union {
> struct __attribute__((packed)) {
> uint16_t h0;
> uint16_t h1;
> uint16_t h2;
> uint16_t h3;
> };
> uint64_t f;
> } h64_t;
> #else
> typedef struct __attribute__((packed)) {
> uint16_t h0;
> uint16_t h1;
> uint16_t h2;
> uint16_t h3;
> } h64_t;
> #endif
>
> void umul16_32(h64_t * c, uint16_t b, uint32_t a) {
> uint32_t t;
> t = (a AND 0xFFFF) * b;
> c->h0 = t AND 0xFFFF;
> c->h1 = t SHR 16;
> t = (a SHR 16) * b;
> c->h2 = t SHR 16;
> t = (t AND 0xFFFF) + c->h1;
> c->h1 = t AND 0xFFFF;
> t = (t SHR 16) + c->h2;
> c->h2 = t AND 0xFFFF;
> c->h3 = t SHR 16; // this is always 0 anyway...
> }
>
> void umul32_64(h64_t * c, uint32_t b, h64_t * a) {
> h64_t t;
> uint32_t tl;
>
> umul16_32(&t, a->h0, b);
> c->h0 = t.h0; c->h1 = t.h1; c->h2 = t.h2; c->h3 = t.h3;
> umul16_32(&t, a->h1, b);
> tl = (uint32_t)c->h1 + t.h0;
> c->h1 = tl AND 0xFFFF;
> tl = (tl SHR 16) + c->h2 + t.h1;
> c->h2 = tl AND 0xFFFF;
> tl = (tl SHR 16) + c->h3 + t.h2;
> c->h3 = tl AND 0xFFFF;
> umul16_32(&t, a->h2, b);
> tl = (uint32_t)c->h2 + t.h0;
> c->h2 = tl AND 0xFFFF;
> tl = (tl SHR 16) + c->h3 + t.h1;
> c->h3 = tl AND 0xFFFF;
> umul16_32(&t, a->h2, b);
> tl = (uint32_t)c->h3 + t.h0;
> c->h3 = tl AND 0xFFFF;
> }
>
> void shr64(h64_t * c, uint8_t s) {
> if (s < 16) {
> c->h0 = (c->h0 SHR s) OR (c->h1 SHL (16 - s));
> c->h1 = (c->h1 SHR s) OR (c->h2 SHL (16 - s));
> c->h2 = (c->h2 SHR s) OR (c->h3 SHL (16 - s));
> c->h3 = (c->h3 SHR s);
> } else { // we won't need s = 16 nor s > 32
> s = s - 16;
> c->h0 = (c->h1 SHR s) OR (c->h2 SHL (16 - s));
> c->h1 = (c->h2 SHR s) OR (c->h3 SHL (16 - s));
> c->h2 = (c->h3 SHR s);
> c->h3 = 0;
> }
> }
>
> void neg64(h64_t * c) {
> c->h0 = ~c->h0 + 1;
> c->h1 = ~c->h1;
> if (c->h0 == 0) c->h1++;
> c->h2 = ~c->h2;
> if (c->h1 == 0) c->h2++;
> c->h3 = ~c->h3;
> if (c->h2 == 0) c->h3++;
> }
>
> void add64(h64_t * c, h64_t * b, h64_t * a) {
> uint32_t t;
> t = a->h0 + b->h0;
> c->h0 = t AND 0xFFFF;
> t = (t SHR 16) + a->h1 + b->h1;
> c->h1 = t AND 0xFFFF;
> t = (t SHR 16) + a->h2 + b->h2;
> c->h2 = t AND 0xFFFF;
> t = (t SHR 16) + a->h3 + b->h3;
> c->h3 = t AND 0xFFFF;
> }
>
>
> uint32_t calcPressure(uint32_t d1, uint32_t d2, uint16_t c1, uint16_t c2,
> uint16_t c3, uint16_t c4, uint16_t c5, uint16_t c6)
> __attribute__((noinline));
> uint32_t calcPressure(uint32_t d1, uint32_t d2, uint16_t c1, uint16_t c2,
> uint16_t c3, uint16_t c4, uint16_t c5, uint16_t c6) {
> uint32_t temp32, dt;
> _Bool minus;
> h64_t temp64, off, sens, p;
>
> temp32 = (uint32_t)c5 SHL 8;
> minus = (temp32 > d2);
> if (minus) {
> dt = temp32 - d2;
> } else {
> dt = d2 - temp32;
> }
>
> umul16_32(&off, c4, dt);
> shr64(&off, 7);
> if (minus) {
> neg64(&off);
> }
> temp64.h0 = 0; temp64.h1 = c2; temp64.h2 = 0; temp64.h3 = 0;
> add64(&off, &off, &temp64);
>
> umul16_32(&sens, c3, dt);
> shr64(&sens, 8);
> if (minus) {
> neg64(&sens);
> }
> temp32 = (uint32_t)c1 SHL 15;
> temp64.h0 = temp32 AND 0xFFFF; temp64.h1 = temp32 SHR 16; temp64.h2 = 0;
> temp64.h3 = 0;
> add64(&sens, &sens, &temp64);
>
> minus = ((sens.h3 AND 0x8000) != 0);
> if (minus) neg64(&sens);
> umul32_64(&p, d1, &sens);
> if (minus) neg64(&p);
> shr64(&p, 21);
> neg64(&off);
> add64(&p, &p, &off);
> shr64(&p,15);
>
> return ((uint32_t)p.h1 SHL 16) + p.h0;
> }
>
> #include <math.h>
>
> double calcPressure2(uint32_t d1, uint32_t d2, uint16_t c1, uint16_t c2,
> uint16_t c3, uint16_t c4, uint16_t c5, uint16_t c6)
> __attribute__((noinline));
> double calcPressure2(uint32_t d1, uint32_t d2, uint16_t c1, uint16_t c2,
> uint16_t c3, uint16_t c4, uint16_t c5, uint16_t c6) {
> double P; // compensated pressure value
> // double T; // compensated temperature value
> double dT; // difference between actual and measured temperature
> double OFF; // offset at actual temperature
> double SENS; // sensitivity at actual temperature
>
> // calcualte 1st order pressure and temperature (MS5607 1st order
> algorithm)
> dT=d2-c5*pow(2,8);
> // OFF=(double)c2*pow(2,17)+dT*c4/pow(2,6);
> OFF=(double)c2*pow(2,16)+dT*c4/pow(2,7);
> // SENS=c1*pow(2,16)+dT*c3/pow(2,7);
> SENS=c1*pow(2,15)+dT*c3/pow(2,8);
> // T=(2000+(dT*c6)/pow(2,23))/100;
> P=(((d1*SENS)/pow(2,21)-OFF)/pow(2,15))/100;
> return P;
> }
>
>
> volatile uint32_t p;
> volatile double pp;
> int main(void) {
> p = calcPressure(9085466, 8569150, 40127, 36924, 23317, 23282, 33464,
> 28312);
> __asm("nop");
> pp = calcPressure2(9085466, 8569150, 40127, 36924, 23317, 23282, 33464,
> 28312);
> __asm("nop");
> while(1);
> }
>
>
> _______________________________________________
> HW-list mailing list - sponsored by www.HW.cz
> Hw-list na list.hw.cz
> http://list.hw.cz/mailman/listinfo/hw-list
>
Další informace o konferenci Hw-list