Jak jsem si nabehnul aneb double v pacalu na AVR

Úterý Červenec 22 07:19:02 CEST 2014

> Skoro uz jsem chtel udelat test 
> rychlosti ale zrovna jsem nemel po ruce AVR... 

Na to predsa nepotrebujete AVR, staci (ba dokonca je lepsi) simulator.

Vid dole.

Ta celociselna varianta je asi 4060 cyklov, ta s double asi 27000 s
WinAVR20100110 (t.j. avr-gcc 4.3.3). A to som sa prilis nenamahal
optimalizovat... ;-)

Schvalne som nepouzival type punning, aby sa to dalo 1:1 prepisat do
Pascalu - pri volaniach treba pouzit referencie miesto pointrov, ->
nahradit ^., poupravovat definicie, nemalo by to dat privela roboty.

Na druhej strane som v tej variante s double, co som vykopiroval z appnote,
musel upravit dva riadky, inak to vracia dvojnasobny vysledok - nechapem
preco to tak napisal ten co to napisal.

Mozete skusit ten ARM, ked uz ho mate poruke :-)

wek

#include <stdint.h>

#define SHR >>
#define SHL <<
#define OR  |
#define AND &

// #define DEBUG
#ifdef DEBUG
typedef union {
  struct __attribute__((packed)) {
    uint16_t h0;
    uint16_t h1;
    uint16_t h2;
    uint16_t h3;
  };
  uint64_t f;
} h64_t;
#else
typedef struct __attribute__((packed)) {
  uint16_t h0;
  uint16_t h1;
  uint16_t h2;
  uint16_t h3;
} h64_t;
#endif

void umul16_32(h64_t * c, uint16_t b, uint32_t a) {
  uint32_t t;
  t = (a AND 0xFFFF) * b;
  c->h0 = t AND 0xFFFF;
  c->h1 = t SHR 16;
  t = (a SHR 16) * b;
  c->h2 = t SHR 16;
  t = (t AND 0xFFFF) + c->h1;
  c->h1 = t AND 0xFFFF;
  t = (t SHR 16) + c->h2;
  c->h2 = t AND 0xFFFF;
  c->h3 = t SHR 16; // this is always 0 anyway...
}

void umul32_64(h64_t * c, uint32_t b, h64_t * a) {
  h64_t t;
  uint32_t tl;

  umul16_32(&t, a->h0, b);
  c->h0 = t.h0; c->h1 = t.h1; c->h2 = t.h2; c->h3 = t.h3;
  umul16_32(&t, a->h1, b);
  tl = (uint32_t)c->h1 + t.h0;
  c->h1 = tl AND 0xFFFF;
  tl = (tl SHR 16) + c->h2 + t.h1;
  c->h2 = tl AND 0xFFFF;
  tl = (tl SHR 16) + c->h3 + t.h2;
  c->h3 = tl AND 0xFFFF;
  umul16_32(&t, a->h2, b);
  tl = (uint32_t)c->h2 + t.h0;
  c->h2 = tl AND 0xFFFF;
  tl = (tl SHR 16) + c->h3 + t.h1;
  c->h3 = tl AND 0xFFFF;
  umul16_32(&t, a->h2, b);
  tl = (uint32_t)c->h3 + t.h0;
  c->h3 = tl AND 0xFFFF;  
}

void shr64(h64_t * c, uint8_t s) {
  if (s < 16) {
    c->h0 = (c->h0 SHR s) OR (c->h1 SHL (16 - s));
    c->h1 = (c->h1 SHR s) OR (c->h2 SHL (16 - s));
    c->h2 = (c->h2 SHR s) OR (c->h3 SHL (16 - s));
    c->h3 = (c->h3 SHR s);
  } else {  // we won't need s = 16 nor s > 32
    s = s - 16;
    c->h0 = (c->h1 SHR s) OR (c->h2 SHL (16 - s));
    c->h1 = (c->h2 SHR s) OR (c->h3 SHL (16 - s));
    c->h2 = (c->h3 SHR s);
	c->h3 = 0;
  }
}

void neg64(h64_t * c) {
  c->h0 = ~c->h0 + 1;
  c->h1 = ~c->h1;
  if (c->h0 == 0) c->h1++;
  c->h2 = ~c->h2;
  if (c->h1 == 0) c->h2++;
  c->h3 = ~c->h3;
  if (c->h2 == 0) c->h3++;
}

void add64(h64_t * c, h64_t * b, h64_t * a) {
  uint32_t t;
  t = a->h0 + b->h0;
  c->h0 = t AND 0xFFFF;
  t = (t SHR 16) + a->h1 + b->h1;
  c->h1 = t AND 0xFFFF;
  t = (t SHR 16) + a->h2 + b->h2;
  c->h2 = t AND 0xFFFF;
  t = (t SHR 16) + a->h3 + b->h3;
  c->h3 = t AND 0xFFFF;
}

uint32_t calcPressure(uint32_t d1, uint32_t d2, uint16_t c1, uint16_t c2,
uint16_t c3, uint16_t c4, uint16_t c5, uint16_t c6)
__attribute__((noinline));
uint32_t calcPressure(uint32_t d1, uint32_t d2, uint16_t c1, uint16_t c2,
uint16_t c3, uint16_t c4, uint16_t c5, uint16_t c6) {
  uint32_t temp32, dt;
  _Bool minus;
  h64_t temp64, off, sens, p;

  temp32 = (uint32_t)c5 SHL 8;
  minus = (temp32 > d2);
  if (minus) {
    dt = temp32 - d2;
  } else {
    dt = d2 - temp32;
  }

  umul16_32(&off, c4, dt);
  shr64(&off, 7);
  if (minus) {
    neg64(&off);
  }
  temp64.h0 = 0; temp64.h1 = c2; temp64.h2 = 0; temp64.h3 = 0;
  add64(&off, &off, &temp64);

  umul16_32(&sens, c3, dt);
  shr64(&sens, 8);
  if (minus) {
    neg64(&sens);
  }
  temp32 = (uint32_t)c1 SHL 15;
  temp64.h0 = temp32 AND 0xFFFF; temp64.h1 = temp32 SHR 16; temp64.h2 = 0;
temp64.h3 = 0;
  add64(&sens, &sens, &temp64);

  minus = ((sens.h3 AND 0x8000) != 0);
  if (minus) neg64(&sens);
  umul32_64(&p, d1, &sens);
  if (minus) neg64(&p);
  shr64(&p, 21);
  neg64(&off);
  add64(&p, &p, &off);
  shr64(&p,15);

  return ((uint32_t)p.h1 SHL 16) + p.h0;
}

#include <math.h>

double calcPressure2(uint32_t d1, uint32_t d2, uint16_t c1, uint16_t c2,
uint16_t c3, uint16_t c4, uint16_t c5, uint16_t c6)
__attribute__((noinline));
double calcPressure2(uint32_t d1, uint32_t d2, uint16_t c1, uint16_t c2,
uint16_t c3, uint16_t c4, uint16_t c5, uint16_t c6) {
  double P;  // compensated pressure value
//  double T; // compensated temperature value
  double dT;  // difference between actual and measured temperature
  double OFF;  // offset at actual temperature
  double SENS;  // sensitivity at actual temperature

  // calcualte 1st order pressure and temperature (MS5607 1st order
algorithm)
  dT=d2-c5*pow(2,8);
//  OFF=(double)c2*pow(2,17)+dT*c4/pow(2,6);
  OFF=(double)c2*pow(2,16)+dT*c4/pow(2,7);
//  SENS=c1*pow(2,16)+dT*c3/pow(2,7);
  SENS=c1*pow(2,15)+dT*c3/pow(2,8);
  // T=(2000+(dT*c6)/pow(2,23))/100;
  P=(((d1*SENS)/pow(2,21)-OFF)/pow(2,15))/100;
  return P;
}

volatile uint32_t p;
volatile double pp;
int main(void) {
  p = calcPressure(9085466, 8569150, 40127, 36924, 23317, 23282, 33464,
28312);
  __asm("nop");
  pp = calcPressure2(9085466, 8569150, 40127, 36924, 23317, 23282, 33464,
28312);
  __asm("nop");
  while(1);
}