C Code for Reducing Precision Floating Point

Reduced Precision Floating Point in C Assignment Sample

A normal floating-point value is 32 bits long and uses 1 bit for the sign, 23 bits for the mantissa, and 8 bits for the exponent. Negative values for the exponent mean the value is less than 1, and positive values for the exponent mean the value is greater than 1. You need to write a simple program that can read in code such as a = 1.4 b = 3.4 c = a+b print c. There are 26 variables, each a single lowercase letter, and the only operations are * and +. Code is already provided to parse the input, so all you need is the functions to convert a value to and from the reduced float format, and perform addition and multiplication. To simplify things, the values are all positive and use 4 bits for the exponent and 8 bits for the mantissa. You do not need to convert values from strings, but only to and from a regular floating-point value. This is a more advanced C programming assignment that we can provide help with.

Solution:

fp.h

#define EXPONENT_BITS 4

#define FRACTION_BITS 8

fp_functs.c

#include

#include “fp.h”

int computeFP(float val)

{

// input: float value to be represented

// output: integer version in our representation

// Perform this the same way we did in class –

// either dividing or multiplying the value by 2

// until it is in the correct range (between 1 and 2).

// Your exponent is the number of times this operation

// was performed.

// Deal with rounding by simply truncating the number.

// You will only have Positive Values. Sign will always be 0.

// Check for overflow –

// with 4 exponent bits, we have overflow if the number to be

// stored is:

// for overflow (exp > 14), store the value as Positive Infinity

// Check for Denormalized values as well.

// If M is in the form of 1.X and E is < 1-Bias, it will be Denormalized

// If the number is too small to encode as denormalized, return 0.

// initialize E

int E = 0;

// check val if > 1 or < 1

// and increment E accordingly

if (val > 1.0)

{

while (val >= 2)

{

val = val / 2;

E++;

}

else if (val < 1.0)

{

while (val <= 1)

{

val = val * 2;

E–;

}

// check for overflow and underflow

if (E > 14)

return -1;

if (E == 0 || E < -14)

return 0;

// initialize

int fract_size = (1 << FRACTION_BITS);

int bias = (1 << (EXPONENT_BITS – 1)) – 1;

int exponent = E + bias;

int frac = 0;

int j;

for (j = 0; j < fract_size; j++)

{

float M = (1 + (float) j / fract_size);

float M1 = (1 + (float) (j + 1) / fract_size);

if (M == val)

{

frac = j;

break;

}

if (val > M && val < M1)

{

frac = j;

break;

}

int x = 0 | exponent;

x = x << 8;

x = x | frac;

//return

return x;

}

float getFP(int val)

{

// Using the defined representation, compute the floating point

// value

// For 0, simply return 0.

// For Infinity, return -1;

int fract_size = (1 << FRACTION_BITS);

int bias = (1 << (EXPONENT_BITS – 1)) – 1;

int exponent = (val & 0x0F00) >> 8;

int fraction = val & 0x00FF;

float E = powf(2.0, (float) exponent – bias);

float M = (1 + (float) fraction / fract_size);

float x = M * E;

// return

return x;

}

int multVals(int source1, int source2)

{

// You must implement this by using the algorithm

// described in class:

// Add the exponents: E = E1+E2

// multiply the fractional values: M = M1*M2

// if M too large, divide it by 2 and increment E

// save the result

// You will only deal with positive source values

// Be sure to check for overflow – store value as Infinity

// If the value is too small for denormalized, return 0

int fract_size = (1 << FRACTION_BITS);

int bias = (1 << (EXPONENT_BITS – 1)) – 1;

int exponent1 = (source1 & 0x0F00) >> 8;

int fraction1 = source1 & 0x00FF;

int exponent2 = (source2 & 0x0F00) >> 8;

int fraction2 = source2 & 0x00FF;

int E1 = exponent1 – bias;

int E2 = exponent2 – bias;

int E = E1 + E2;

float M1 = (1 + (float) fraction1 / fract_size);

float M2 = (1 + (float) fraction2 / fract_size);

float M = M1 * M2;

if (M > 2)

{

M = M / 2;

E++;

}

if (E > 14)

return -1;

if (E == 0 || E < -14)

return 0;

int frac = 0;

int exponentMulti = E + bias;

int j;

for (j = 0; j < fract_size; j++)

{

float newM = (1 + (float) j / fract_size);

float newM1 = (1 + (float) (j + 1) / fract_size);

if (newM == M)

{

frac = j;

break;

}

if (M > newM && M < newM1)

{

frac = j;

break;

}

int x = 0 | exponentMulti;

x = x << 8;

x = x | frac;

// return

return x;

}

int addVals(int source1, int source2)

{

// Do this function last – it is the most difficult!

// You must implement this as described in class:

// If needed, adjust one of the two number so that

// they have the same exponent E

// Add the two fractional parts: F1′ + F2 = F

// (assumes F1′ is the adjusted F1)

// Adjust the sum F and E so that F is in the correct range

// As described in the handout, you only need to implement this for

// positive source values

// If the sum results in overflow, return Infinity

// If the sum is 0, return 0

int fract_size = (1 << FRACTION_BITS);

int bias = (1 << (EXPONENT_BITS – 1)) – 1;

int exponent1 = (source1 & 0x0F00) >> 8;

int fraction1 = source1 & 0x00FF;

int exponent2 = (source2 & 0x0F00) >> 8;

int fraction2 = source2 & 0x00FF;

int E1 = exponent1 – bias;

int E2 = exponent2 – bias;

int Ediff;

int E;

float M1 = (1 + (float) fraction1 / fract_size);

float M2 = (1 + (float) fraction2 / fract_size);

float M;

if (E1 == E2)

{

M = M1 + M2;

}

if (E1 > E2)

{

Ediff = E1 – E2;

E = E2;

M1 = M1 * powf(2.0, Ediff);

M = M1 + M2;

}

else

{

Ediff = E2 – E1;

E = E1;

M2 = M2 * powf(2.0, Ediff);

M = M1 + M2;

}

if (M > 1.0)

{

while (M >= 2)

{

M = M / 2;

E++;

}

else if (M < 1.0)

{

while (M < 1)

{

M = M * 2;

E–;

}

if (E > 14)

return -1;

if (E == 0 || E < -14)

return 0;

int exponentAdd = E + bias;

int j;

int frac = 0;

for (j = 0; j < fract_size; j++)

{

float newM = (1 + (float) j / fract_size);

float newM1 = (1 + (float) (j + 1) / fract_size);

if (newM == M)

{

frac = j;

break;

}

if (M > newM && M < newM1)

{

frac = j;

break;

}

int x = 0 | exponentAdd;

x = x << 8;

x = x | frac;

// return

return x;

}

fp_program.c

#include

#include “fpParse.h”

typedef enum {PRINT_OP, ASSIGN_OP, ADD_OP, MULT_OP} operation;

typedef struct l {

operation op;

int id1,id2,id3;

float fpv;

} op_rec;

// Prototypes

void execute_op();

void match(int token);

int is_id(int token);

void line();

void print();

void assign();

extern int yylex();

extern void test_it();

// shared with the lexer

extern int lineno;

extern float fp_val;

// functions you will implement are:

extern int computeFP(float val);

extern float getFP(int val) ;

extern int addVals(int source1, int source2);

extern int multVals(int source1, int source2);

int variable[27];

op_rec current_operation;

int lookahead;

int main(int argc, char**argv) {

if ((argc == 2) && (strcmp(argv[1],”-t”)==0)) {

test_it(); exit(1);

}

printf(“> “);

lookahead = yylex();

while (is_id(lookahead) || (lookahead==PRINT)) {line(); }

printf(“Exiting\n”);

return 0;

}

// Calls the functions you will implement based on the input program

void execute_op() {

switch(current_operation.op) {

case ASSIGN_OP:

variable[current_operation.id1] = computeFP(current_operation.fpv);

break;

case ADD_OP:

variable[current_operation.id1] =

addVals(variable[current_operation.id2],

variable[current_operation.id3]);

break;

case MULT_OP:

variable[current_operation.id1] =

multVals(variable[current_operation.id2],

variable[current_operation.id3]);

break;

}

// The code below is a recursive descent parser that processes the input

// program and builds the current_operation information. If there is a

// syntax error in the input, the program will halt. Once a line has

// been processed, execute_op() is called.

void match(int token) {

if (token == lookahead) lookahead = yylex();

else {

printf(“Unexpected input\n”);

exit(2);

}

int is_id(int token) {

return ((token > 0) && (token <=26));

}

void line() {

if (lookahead == PRINT) print(); else {

assign();

execute_op();

}

void print() {

match(PRINT);

current_operation.op = PRINT_OP;

current_operation.id1 = lookahead;

if (is_id(lookahead)) match(lookahead);

else {printf(“ID expected line %d\n”,lineno); exit(3);}

printf(“%c = %0.10f\n”,current_operation.id1+’a’ -1,

getFP(variable[current_operation.id1]));

printf(“> “);

match(EOLN);

}

void assign() {

current_operation.id1 = lookahead;

if (is_id(lookahead)) match(lookahead);

else {printf(“ID expected line %d\n”,lineno); exit(3);}

match(ASSIGN);

if (lookahead == FLOAT) {

current_operation.op = ASSIGN_OP;

current_operation.fpv = fp_val;

match(FLOAT);

} else {

current_operation.id2 = lookahead;

if (is_id(lookahead)) match(lookahead);

else {printf(“ID expected line %d\n”,lineno); exit(3);}

if (lookahead == PLUS) {

match(PLUS);

current_operation.op = ADD_OP;

} else {

match(MULT);

current_operation.op = MULT_OP;

}

current_operation.id3 = lookahead;

if (is_id(lookahead)) match(lookahead);

else {printf(“ID expected line %d\n”,lineno); exit(3);}

}

printf(“> “);

match(EOLN);

}

fpParse.h

#define ASSIGN 31

#define PLUS 27

#define PRINT 28

#define EOLN 29

#define FLOAT 30

#define MULT 32

Elaborative reduced precision floating point in C assignment sample

Reduced Precision Floating Point in C Assignment Sample

Solution: