Jak jsem si nabehnul aneb double v pacalu na AVR

Jaroslav Buchta jaroslav.buchta na hascomp.cz
Úterý Červenec 22 09:07:25 CEST 2014

OK, tak jednotlive operace mi vychazeji takto, optimalizace je O1
Rekl bych, ze mezi pouzitim int_32t a float neni propastny rozdil, 
nejvetsi rozdil  je logicky u scitani

volatile int32_t ri;
volatile float rf;

int main(void)
     volatile int32_t i = 14;
     volatile float f = 12.3;

     volatile int32_t ki = 1234;
     volatile float kf = 3.1415926;

     ri = i + ki;    //27 cyklu

     ri = i * ki;    //73 cyklu

     ri = ri / ki;    //638 cyklu

     rf = f + kf;    //128 cyklu

     rf = f * kf;    //154 cyklu

     rf = rf / kf;    //651 cyklu

Dne 22. 7. 2014 7:19, Jan Waclawek napsal(a):
>> Skoro uz jsem chtel udelat test
>> rychlosti ale zrovna jsem nemel po ruce AVR...
> Na to predsa nepotrebujete AVR, staci (ba dokonca je lepsi) simulator.
> Vid dole.
> Ta celociselna varianta je asi 4060 cyklov, ta s double asi 27000 s
> WinAVR20100110 (t.j. avr-gcc 4.3.3). A to som sa prilis nenamahal
> optimalizovat... ;-)
> Schvalne som nepouzival type punning, aby sa to dalo 1:1 prepisat do
> Pascalu - pri volaniach treba pouzit referencie miesto pointrov, ->
> nahradit ^., poupravovat definicie, nemalo by to dat privela roboty.
> Na druhej strane som v tej variante s double, co som vykopiroval z appnote,
> musel upravit dva riadky, inak to vracia dvojnasobny vysledok - nechapem
> preco to tak napisal ten co to napisal.
> Mozete skusit ten ARM, ked uz ho mate poruke :-)
> wek
> #include <stdint.h>
> #define SHR >>
> #define SHL <<
> #define OR  |
> #define AND &
> // #define DEBUG
> #ifdef DEBUG
> typedef union {
>    struct __attribute__((packed)) {
>      uint16_t h0;
>      uint16_t h1;
>      uint16_t h2;
>      uint16_t h3;
>    };
>    uint64_t f;
> } h64_t;
> #else
> typedef struct __attribute__((packed)) {
>    uint16_t h0;
>    uint16_t h1;
>    uint16_t h2;
>    uint16_t h3;
> } h64_t;
> #endif
> void umul16_32(h64_t * c, uint16_t b, uint32_t a) {
>    uint32_t t;
>    t = (a AND 0xFFFF) * b;
>    c->h0 = t AND 0xFFFF;
>    c->h1 = t SHR 16;
>    t = (a SHR 16) * b;
>    c->h2 = t SHR 16;
>    t = (t AND 0xFFFF) + c->h1;
>    c->h1 = t AND 0xFFFF;
>    t = (t SHR 16) + c->h2;
>    c->h2 = t AND 0xFFFF;
>    c->h3 = t SHR 16; // this is always 0 anyway...
> }
> void umul32_64(h64_t * c, uint32_t b, h64_t * a) {
>    h64_t t;
>    uint32_t tl;
>    umul16_32(&t, a->h0, b);
>    c->h0 = t.h0; c->h1 = t.h1; c->h2 = t.h2; c->h3 = t.h3;
>    umul16_32(&t, a->h1, b);
>    tl = (uint32_t)c->h1 + t.h0;
>    c->h1 = tl AND 0xFFFF;
>    tl = (tl SHR 16) + c->h2 + t.h1;
>    c->h2 = tl AND 0xFFFF;
>    tl = (tl SHR 16) + c->h3 + t.h2;
>    c->h3 = tl AND 0xFFFF;
>    umul16_32(&t, a->h2, b);
>    tl = (uint32_t)c->h2 + t.h0;
>    c->h2 = tl AND 0xFFFF;
>    tl = (tl SHR 16) + c->h3 + t.h1;
>    c->h3 = tl AND 0xFFFF;
>    umul16_32(&t, a->h2, b);
>    tl = (uint32_t)c->h3 + t.h0;
>    c->h3 = tl AND 0xFFFF;
> }
> void shr64(h64_t * c, uint8_t s) {
>    if (s < 16) {
>      c->h0 = (c->h0 SHR s) OR (c->h1 SHL (16 - s));
>      c->h1 = (c->h1 SHR s) OR (c->h2 SHL (16 - s));
>      c->h2 = (c->h2 SHR s) OR (c->h3 SHL (16 - s));
>      c->h3 = (c->h3 SHR s);
>    } else {  // we won't need s = 16 nor s > 32
>      s = s - 16;
>      c->h0 = (c->h1 SHR s) OR (c->h2 SHL (16 - s));
>      c->h1 = (c->h2 SHR s) OR (c->h3 SHL (16 - s));
>      c->h2 = (c->h3 SHR s);
> 	c->h3 = 0;
>    }
> }
> void neg64(h64_t * c) {
>    c->h0 = ~c->h0 + 1;
>    c->h1 = ~c->h1;
>    if (c->h0 == 0) c->h1++;
>    c->h2 = ~c->h2;
>    if (c->h1 == 0) c->h2++;
>    c->h3 = ~c->h3;
>    if (c->h2 == 0) c->h3++;
> }
> void add64(h64_t * c, h64_t * b, h64_t * a) {
>    uint32_t t;
>    t = a->h0 + b->h0;
>    c->h0 = t AND 0xFFFF;
>    t = (t SHR 16) + a->h1 + b->h1;
>    c->h1 = t AND 0xFFFF;
>    t = (t SHR 16) + a->h2 + b->h2;
>    c->h2 = t AND 0xFFFF;
>    t = (t SHR 16) + a->h3 + b->h3;
>    c->h3 = t AND 0xFFFF;
> }
> uint32_t calcPressure(uint32_t d1, uint32_t d2, uint16_t c1, uint16_t c2,
> uint16_t c3, uint16_t c4, uint16_t c5, uint16_t c6)
> __attribute__((noinline));
> uint32_t calcPressure(uint32_t d1, uint32_t d2, uint16_t c1, uint16_t c2,
> uint16_t c3, uint16_t c4, uint16_t c5, uint16_t c6) {
>    uint32_t temp32, dt;
>    _Bool minus;
>    h64_t temp64, off, sens, p;
>    temp32 = (uint32_t)c5 SHL 8;
>    minus = (temp32 > d2);
>    if (minus) {
>      dt = temp32 - d2;
>    } else {
>      dt = d2 - temp32;
>    }
>    umul16_32(&off, c4, dt);
>    shr64(&off, 7);
>    if (minus) {
>      neg64(&off);
>    }
>    temp64.h0 = 0; temp64.h1 = c2; temp64.h2 = 0; temp64.h3 = 0;
>    add64(&off, &off, &temp64);
>    umul16_32(&sens, c3, dt);
>    shr64(&sens, 8);
>    if (minus) {
>      neg64(&sens);
>    }
>    temp32 = (uint32_t)c1 SHL 15;
>    temp64.h0 = temp32 AND 0xFFFF; temp64.h1 = temp32 SHR 16; temp64.h2 = 0;
> temp64.h3 = 0;
>    add64(&sens, &sens, &temp64);
>    minus = ((sens.h3 AND 0x8000) != 0);
>    if (minus) neg64(&sens);
>    umul32_64(&p, d1, &sens);
>    if (minus) neg64(&p);
>    shr64(&p, 21);
>    neg64(&off);
>    add64(&p, &p, &off);
>    shr64(&p,15);
>    return ((uint32_t)p.h1 SHL 16) + p.h0;
> }
> #include <math.h>
> double calcPressure2(uint32_t d1, uint32_t d2, uint16_t c1, uint16_t c2,
> uint16_t c3, uint16_t c4, uint16_t c5, uint16_t c6)
> __attribute__((noinline));
> double calcPressure2(uint32_t d1, uint32_t d2, uint16_t c1, uint16_t c2,
> uint16_t c3, uint16_t c4, uint16_t c5, uint16_t c6) {
>    double P;  // compensated pressure value
> //  double T; // compensated temperature value
>    double dT;  // difference between actual and measured temperature
>    double OFF;  // offset at actual temperature
>    double SENS;  // sensitivity at actual temperature
>    // calcualte 1st order pressure and temperature (MS5607 1st order
> algorithm)
>    dT=d2-c5*pow(2,8);
> //  OFF=(double)c2*pow(2,17)+dT*c4/pow(2,6);
>    OFF=(double)c2*pow(2,16)+dT*c4/pow(2,7);
> //  SENS=c1*pow(2,16)+dT*c3/pow(2,7);
>    SENS=c1*pow(2,15)+dT*c3/pow(2,8);
>    // T=(2000+(dT*c6)/pow(2,23))/100;
>    P=(((d1*SENS)/pow(2,21)-OFF)/pow(2,15))/100;
>    return P;
> }
> volatile uint32_t p;
> volatile double pp;
> int main(void) {
>    p = calcPressure(9085466, 8569150, 40127, 36924, 23317, 23282, 33464,
> 28312);
>    __asm("nop");
>    pp = calcPressure2(9085466, 8569150, 40127, 36924, 23317, 23282, 33464,
> 28312);
>    __asm("nop");
>    while(1);
> }
> _______________________________________________
> HW-list mailing list  -  sponsored by www.HW.cz
> Hw-list na list.hw.cz
> http://list.hw.cz/mailman/listinfo/hw-list

This email is free from viruses and malware because avast! Antivirus protection is active.

Další informace o konferenci Hw-list