Random size optimizations

2011-03-24 06:01:28 +00:00 · 2011-03-24 06:01:28 +00:00 · 9605f56e5f
parent 6ad70d0e71
commit 9605f56e5f
4 changed files with 108 additions and 143 deletions
--- a/examples/pong/main.c
+++ b/examples/pong/main.c
@ -13,6 +13,7 @@
 #include "timer.h"
 #include "keyboard.h"
 #include "vmbackdoor.h"
 #include "math.h"
 #define PONG_DOT_SIZE           8
 #define PONG_DIGIT_PIXEL_SIZE   10
@ -21,6 +22,9 @@
 #define PONG_PLAYFIELD_COLOR    0xAAAAAA
 #define PONG_FRAME_RATE         60
 #define MODE_WIDTH              800
 #define MODE_HEIGHT             600
 #define MAX_DIRTY_RECTS         128
 #define MAX_SPRITES             8
@ -118,7 +122,7 @@ BackFill(FillRect fr)   // IN
   int i, j;
   for (i = 0; i < fr.r.h; i++) {
-      uint32 *line = &back.buffer[(fr.r.y + i) * gSVGA.width + fr.r.x];
+      uint32 *line = &back.buffer[(fr.r.y + i) * MODE_WIDTH + fr.r.x];
      for (j = 0; j < fr.r.w; j++) {
         line[j] = fr.color;
@ -170,7 +174,7 @@ BackUpdate()  // IN
      uint32 i, j;
      for (i = 0; i < rect.h; i++) {
-         uint32 offset = (rect.y + i) * gSVGA.width + rect.x;
+         uint32 offset = (rect.y + i) * MODE_WIDTH + rect.x;
         uint32 *src = &back.buffer[offset];
         uint32 *dest = &((uint32*) gSVGA.fbMem)[offset];
@ -306,7 +310,7 @@ PongDrawPlayfield()
    * Clear the screen
    */
   FillRect background = {
-      {0, 0, gSVGA.width, gSVGA.height},
+      {0, 0, MODE_WIDTH, MODE_HEIGHT},
      PONG_BG_COLOR,
   };
   BackFill(background);
@ -315,10 +319,10 @@ PongDrawPlayfield()
    * Draw the dotted dividing line
    */
   for (i = PONG_DOT_SIZE;
-        i <= gSVGA.height - PONG_DOT_SIZE * 2;
+        i <= MODE_HEIGHT - PONG_DOT_SIZE * 2;
        i += PONG_DOT_SIZE * 2) {
      FillRect dot = {
-         {(gSVGA.width - PONG_DOT_SIZE) / 2, i,
+         {(MODE_WIDTH - PONG_DOT_SIZE) / 2, i,
          PONG_DOT_SIZE, PONG_DOT_SIZE},
         PONG_PLAYFIELD_COLOR,
      };
@ -347,7 +351,7 @@ PongDrawPlayfield()
      }
      *(p++) = DecDigit(pong.scores[1], 1, FALSE);
-      PongDrawString(gSVGA.width/2, PONG_DIGIT_PIXEL_SIZE,
+      PongDrawString(MODE_WIDTH/2, PONG_DIGIT_PIXEL_SIZE,
                     scoreStr, sizeof scoreStr);
   }
 }
@ -378,7 +382,7 @@ PongDrawScreen()
   PongDrawPlayfield();
   if (pong.playfieldDirty) {
-      Rect r = {0, 0, gSVGA.width, gSVGA.height};
+      Rect r = {0, 0, MODE_WIDTH, MODE_HEIGHT};
      BackMarkDirty(r);
      pong.playfieldDirty = FALSE;
   }
@ -414,50 +418,13 @@ PongDrawScreen()
 static void
 PongLaunchBall()
 {
-   /* sin() from 0 to PI/2 */
+   float angle = Random32() * 1.4629e-9;  // (PI * 2 / MAX_UINT32)
   static const float sineTable[64] = {
      0.000000, 0.024931, 0.049846, 0.074730, 0.099568, 0.124344, 0.149042, 0.173648,
      0.198146, 0.222521, 0.246757, 0.270840, 0.294755, 0.318487, 0.342020, 0.365341,
      0.388435, 0.411287, 0.433884, 0.456211, 0.478254, 0.500000, 0.521435, 0.542546,
      0.563320, 0.583744, 0.603804, 0.623490, 0.642788, 0.661686, 0.680173, 0.698237,
      0.715867, 0.733052, 0.749781, 0.766044, 0.781831, 0.797133, 0.811938, 0.826239,
      0.840026, 0.853291, 0.866025, 0.878222, 0.889872, 0.900969, 0.911506, 0.921476,
      0.930874, 0.939693, 0.947927, 0.955573, 0.962624, 0.969077, 0.974928, 0.980172,
      0.984808, 0.988831, 0.992239, 0.995031, 0.997204, 0.998757, 0.999689, 1.000000,
   };
   int t;
   float sinT, cosT;
-   pong.ballPos.x = gSVGA.width / 2;
+   pong.ballPos.x = MODE_WIDTH / 2;
-   pong.ballPos.y = gSVGA.height / 2;
+   pong.ballPos.y = MODE_HEIGHT / 2;
-   /* Limit the random angle to avoid those within 45 degrees of vertical */
+   pong.ballVelocity.x = sinf(angle) * pong.ballSpeed;
-   t = 32 + (Random32() & 31);
+   pong.ballVelocity.y = cosf(angle) * pong.ballSpeed;
   sinT = sineTable[t];
   cosT = -sineTable[(t + 32) & 63];
   sinT *= pong.ballSpeed;
   cosT *= pong.ballSpeed;
   switch (Random32() & 3) {
   case 0:
      pong.ballVelocity.x = sinT;
      pong.ballVelocity.y = cosT;
      break;
   case 1:
      pong.ballVelocity.x = -sinT;
      pong.ballVelocity.y = cosT;
      break;
   case 2:
      pong.ballVelocity.x = -sinT;
      pong.ballVelocity.y = -cosT;
      break;
   case 3:
      pong.ballVelocity.x = sinT;
      pong.ballVelocity.y = -cosT;
      break;
   }
 }
@ -479,14 +446,14 @@ PongInit()
   pong.scores[1] = 0;
   pong.playfieldDirty = TRUE;
-   pong.paddlePos[0] = pong.paddlePos[1] = gSVGA.height / 2;
+   pong.paddlePos[0] = pong.paddlePos[1] = MODE_HEIGHT / 2;
   pong.paddles[0].r.x = 10;
   pong.paddles[0].r.w = 16;
   pong.paddles[0].r.h = 64;
   pong.paddles[0].color = PONG_SPRITE_COLOR;
-   pong.paddles[1].r.x = gSVGA.width - 16 - 10;
+   pong.paddles[1].r.x = MODE_WIDTH - 16 - 10;
   pong.paddles[1].r.w = 16;
   pong.paddles[1].r.h = 64;
   pong.paddles[1].color = PONG_SPRITE_COLOR;
@ -514,8 +481,8 @@ PongInit()
 static void
 PongUpdateMotion(float dt)  // IN
 {
-   int playableWidth = gSVGA.width - pong.ball.r.w;
+   int playableWidth = MODE_WIDTH - pong.ball.r.w;
-   int playableHeight = gSVGA.height - pong.ball.r.h;
+   int playableHeight = MODE_HEIGHT - pong.ball.r.h;
   int i;
   pong.ballPos.x += pong.ballVelocity.x * dt;
@ -523,7 +490,7 @@ PongUpdateMotion(float dt)  // IN
   for (i = 0; i < 2; i++) {
      int pos = pong.paddlePos[i] + pong.paddleVelocities[i] * dt;
-      pong.paddlePos[i] = MIN(gSVGA.height - pong.paddles[i].r.h, MAX(0, pos));
+      pong.paddlePos[i] = MIN(MODE_HEIGHT - pong.paddles[i].r.h, MAX(0, pos));
      pong.paddles[i].r.y = (int)pong.paddlePos[i];
   }
@ -572,7 +539,7 @@ PongUpdateMotion(float dt)  // IN
       * Only bounce off the paddle when we're moving toward it, to
       * prevent the ball from getting stuck inside the paddle
       */
-      if ((pong.paddles[i].r.x > gSVGA.width / 2) == (pong.ballVelocity.x > 0) &&
+      if ((pong.paddles[i].r.x > MODE_WIDTH / 2) == (pong.ballVelocity.x > 0) &&
          RectTestIntersection(&pong.ball.r, &pong.paddles[i].r)) {
         /*
          * Boing! The ball bounces back, plus it gets a little spin
@ -638,9 +605,9 @@ PongAbsMousePlayer(int playerNum)   // IN
   Bool mouseMoved = FALSE;
   while (VMBackdoor_MouseGetPacket(&p)) {
-      newY = (p.y * gSVGA.height / 0xFFFF) - pong.paddles[playerNum].r.h / 2;
+      newY = (p.y * MODE_HEIGHT / 0xFFFF) - pong.paddles[playerNum].r.h / 2;
      newY = MAX(0, newY);
-      newY = MIN(gSVGA.height - pong.paddles[playerNum].r.h, newY);
+      newY = MIN(MODE_HEIGHT - pong.paddles[playerNum].r.h, newY);
      mouseMoved = TRUE;
   }
@ -668,7 +635,7 @@ PongComputerPlayer(int playerNum,   // IN
   int ballCenter = pong.ball.r.y + pong.ball.r.h / 2;
   int distance = ballCenter - paddleCenter;
-   pong.paddleVelocities[playerNum] = distance / (float)gSVGA.height * maxSpeed;
+   pong.paddleVelocities[playerNum] = distance * maxSpeed * (1.0f / MODE_HEIGHT);
 }
@ -687,8 +654,8 @@ main(void)
 {
   Intr_Init();
   SVGA_Init();
-   SVGA_SetMode(800, 600, 32);
+   SVGA_SetMode(MODE_WIDTH, MODE_HEIGHT, 32);
-   back.buffer = (uint32*) (gSVGA.fbMem + gSVGA.width * gSVGA.height * sizeof(uint32));
+   back.buffer = (uint32*) (gSVGA.fbMem + MODE_WIDTH * MODE_HEIGHT * sizeof(uint32));
   Keyboard_Init();
   VMBackdoor_MouseInit(TRUE);
--- a/lib/Makefile.rules
+++ b/lib/Makefile.rules
@ -24,10 +24,13 @@ LDFLAGS := -nostdlib -Wl,-T,$(LIB_DIR)/metalkit/image.ld
 CFLAGS += -Wall -Werror
 # Size Optimizations.
-CFLAGS += -Os -Wl,--gc-sections -ffunction-sections -fdata-sections
+CFLAGS += -Os -fweb
 # Linker optimizations
 CFLAGS += -Wl,--gc-sections -ffunction-sections -fdata-sections
 # This enables extra gcc builtins for floating point math.
-CFLAGS += -march=i686 -ffast-math
+CFLAGS += -march=i686 -ffast-math -funsafe-math-optimizations
 # Generate debug symbols. These only show up in the .elf file, not the
 # final image. Recent versions of VMware have a gdb debug stub that
--- a/lib/metalkit/intr.c
+++ b/lib/metalkit/intr.c
@ -35,15 +35,6 @@
 #include "boot.h"
 #include "io.h"
 /*
 * Definitions for the two PIC chips.
 */
 #define PIC1_COMMAND_PORT  0x20
 #define PIC1_DATA_PORT     0x21
 #define PIC2_COMMAND_PORT  0xA0
 #define PIC2_DATA_PORT     0xA1
 /*
 * IDT table and IDT table descriptor. The table itself lives in the
@ -78,29 +69,7 @@ const struct {
   .address = IDT,
 };
-/*
+IntrTrampolineType ALIGNED(4) IntrTrampoline[NUM_INTR_VECTORS];
 * To save space, we don't include assembly-language trampolines for
 * each interrupt vector. Instead, we allocate a table in the BSS
 * segment which we can fill in at runtime with simple trampoline
 * functions. This structure actually describes executable 32-bit
 * code.
 */
 typedef struct {
   uint16      code1;
   uint32      arg;
   uint8       code2;
   IntrHandler handler;
   uint32      code3;
   uint32      code4;
   uint32      code5;
   uint32      code6;
   uint32      code7;
   uint32      code8;
 } PACKED IntrTrampolineType;
 static IntrTrampolineType ALIGNED(4) IntrTrampoline[NUM_INTR_VECTORS];
 /*
 * IntrDefaultHandler --
@ -253,52 +222,6 @@ Intr_Init(void)
 }
 /*
 * Intr_SetHandler --
 *
 *    Set a C-language interrupt handler for a particular vector.
 *    Note that the argument is a vector number, not an IRQ.
 */
 fastcall void
 Intr_SetHandler(int vector, IntrHandler handler)
 {
   IntrTrampoline[vector].handler = handler;
 }
 /*
 * Intr_SetMask --
 *
 *    (Un)mask a particular IRQ.
 */
 fastcall void
 Intr_SetMask(int irq, Bool enable)
 {
   uint8 port, bit, mask;
   if (irq >= 8) {
      bit = 1 << (irq - 8);
      port = PIC2_DATA_PORT;
   } else {
      bit = 1 << irq;
      port = PIC1_DATA_PORT;
   }
   mask = IO_In8(port);
   /* A '1' bit in the mask inhibits the interrupt. */
   if (enable) {
      mask &= ~bit;
   } else {
      mask |= bit;
   }
   IO_Out8(port, mask);
 }
 /*
 * Intr_SetFaultHandlers --
 *
--- a/lib/metalkit/intr.h
+++ b/lib/metalkit/intr.h
@ -34,6 +34,7 @@
 #define __INTR_H__
 #include "types.h"
 #include "io.h"
 #define NUM_INTR_VECTORS    256
 #define NUM_FAULT_VECTORS   0x20
@ -64,13 +65,16 @@
 #define FAULT_MC            0x12    // Machine check
 #define FAULT_XM            0x13    // SIMD floating point exception
 #define PIC1_COMMAND_PORT  0x20
 #define PIC1_DATA_PORT     0x21
 #define PIC2_COMMAND_PORT  0xA0
 #define PIC2_DATA_PORT     0xA1
 typedef void (*IntrHandler)(int vector);
 typedef void (*IntrContextFn)(void);
 fastcall void Intr_Init(void);
 fastcall void Intr_SetFaultHandlers(IntrHandler handler);
 fastcall void Intr_SetHandler(int vector, IntrHandler handler);
 fastcall void Intr_SetMask(int irq, Bool enable);
 static inline void
 Intr_Enable(void) {
@ -173,4 +177,72 @@ uint32 Intr_SaveContext(IntrContext *ctx);
 void Intr_RestoreContext(IntrContext *ctx);
 fastcall void Intr_InitContext(IntrContext *ctx, uint32 *stack, IntrContextFn main);
 /*
 * To save space, we don't include assembly-language trampolines for
 * each interrupt vector. Instead, we allocate a table in the BSS
 * segment which we can fill in at runtime with simple trampoline
 * functions. This structure actually describes executable 32-bit
 * code.
 */
 typedef struct {
   uint16      code1;
   uint32      arg;
   uint8       code2;
   IntrHandler handler;
   uint32      code3;
   uint32      code4;
   uint32      code5;
   uint32      code6;
   uint32      code7;
   uint32      code8;
 } PACKED IntrTrampolineType;
 extern IntrTrampolineType ALIGNED(4) IntrTrampoline[NUM_INTR_VECTORS];
 /*
 * Intr_SetHandler --
 *
 *    Set a C-language interrupt handler for a particular vector.
 *    Note that the argument is a vector number, not an IRQ.
 */
 static inline void
 Intr_SetHandler(int vector, IntrHandler handler)
 {
   IntrTrampoline[vector].handler = handler;
 }
 /*
 * Intr_SetMask --
 *
 *    (Un)mask a particular IRQ.
 */
 static inline void
 Intr_SetMask(int irq, Bool enable)
 {
   uint8 port, bit, mask;
   if (irq >= 8) {
      bit = 1 << (irq - 8);
      port = PIC2_DATA_PORT;
   } else {
      bit = 1 << irq;
      port = PIC1_DATA_PORT;
   }
   mask = IO_In8(port);
   /* A '1' bit in the mask inhibits the interrupt. */
   if (enable) {
      mask &= ~bit;
   } else {
      mask |= bit;
   }
   IO_Out8(port, mask);
 }
 #endif /* __INTR_H__ */