You can create ANSI C programs on the UWO Engineering Linux server. (ssh login to linux01.eng.uwo.ca)
[10:00pm linux01] cat > distrib.c
int p,q,r,s,t;
main() {
p=(q+r)*(s-t);
}
Programs can be cross-compiled for 8086 machines using bcc:
[10:02pm linux01] bcc -S -o distrib.s distrib.c
bcc produces a human-readable assembler output
when the -S switch is used. You may cat the file:
[10:02pm linux01] cat distrib.s
! 1
! 1 int p,q,r,s,t;
! 2 main() {
export _main
_main:
! 3 p=(q+r)*(s-t);
push bp
mov bp,sp
push di
push si
mov ax,[_s]
sub ax,[_t]
push ax
mov ax,[_q]
add ax,[_r]
mov cx,-6[bp]
imul cx
inc sp
inc sp
mov [_p],ax
! 4 }
pop si
pop di
pop bp
ret
! 5
.data
.bss
.comm _t,2
.comm _q,2
.comm _s,2
.comm _p,2
.comm _r,2
Here is an explanation of the .s listing:
The integer declarations cause the assembler to allocate two-byte variables in the common block:
int p,q,r,s,t;
.data
.bss
.comm _t,2
.comm _q,2
.comm _s,2
.comm _p,2
.comm _r,2
The main program assembles as a symbol, which begins by saving the BP register on the stack, and resetting BP to the current stack pointer. Then, DI and SI are pushed, using up two 2-byte words:
export _main
_main:
main() {
push bp
mov bp,sp
push di
push si
The expression is implemented by first subtracting (s-t) and pushing that result onto the stack. Subsequently, it is retrieved from this location (-6 bytes from where the base page BP points). This temporary 2-byte word is deallocated from the stack by incrementing SP twice. Finally, the result of the operations are stored in the variable (int p) which the assembler names _p.
p=(q+r)*(s-t);
mov ax,[_s]
sub ax,[_t]
push ax
mov ax,[_q]
add ax,[_r]
mov cx,-6[bp]
imul cx
inc sp
inc sp
mov [_p],ax
Finally, at the end of the C program, the stack is re-balanced as the registers SI,DI, and BP are popped.
pop si
pop di
pop bp
ret
Consider the following program which will read a character and convert it to uppercase if it was a lowercase character:
[9:14pm linux01] cat > upcase1.c
char ch,out;
main() {
ch = getchar();
if (('a' <= ch) && (ch <= 'z'))
out = ch + 'A' - 'a';
else
out = ch;
putchar(out);
}
This program compiles to the following
essential assembler instructions:
call _getchar
mov [_ch],al
mov al,[_ch]
cmp al,*$61
jb .1
.3:
mov al,[_ch]
cmp al,*$7A
ja .1
.2:
mov al,[_ch]
xor ah,ah
add ax,*-$20
mov [_out],al
jmp .4
.1:
mov al,[_ch]
mov [_out],al
.4:
mov al,[_out]
xor ah,ah
push ax
call _putchar
[9:14pm linux01] !bc
bcc -S -o upcase1.s upcase1.c
[9:14pm linux01] cat upcase1.s
! 1
! 1 char ch,out;
! 2 main() {
export _main
_main:
push bp
mov bp,sp
push di
push si
call _getchar
mov [_ch],al
mov al,[_ch]
cmp al,*$61
jb .1
.3:
mov al,[_ch]
cmp al,*$7A
ja .1
.2:
mov al,[_ch]
xor ah,ah
add ax,*-$20
mov [_out],al
jmp .4
.1:
mov al,[_ch]
mov [_out],al
! 8 putchar(out);
.4:
mov al,[_out]
xor ah,ah
push ax
call _putchar
inc sp
inc sp
! 9 }
pop si
pop di
pop bp
ret
.data
.bss
.comm _ch,1
.comm _out,1
The following ANSI C program makes use of `short hand' notations, but only leads to slightly smaller machine code:
[9:17pm linux01] cat > upcase2.c
char ch;
main() {
putchar((('a'<= (ch=getchar()))&&(ch<='z')) ?
(ch+'A'-'a') : ch );
}
call _getchar
mov [_ch],al
cmp al,*$61
jb .1
.3:
mov al,[_ch]
cmp al,*$7A
ja .1
.2:
mov al,[_ch]
xor ah,ah
add ax,*-$20
jmp .4
.1:
mov al,[_ch]
xor ah,ah
.4:
push ax
call _putchar
[9:17pm linux01] cat > upcase2.c
char ch;
main() {
putchar((('a'<= (ch=getchar()))&&(ch<='z')) ? (ch+'A'-'a') : ch );
}
[9:17pm linux01] bcc -S -o upcase2.s upcase2.c
[9:17pm linux01] cat upcase2.s
! 1 char ch;
! 2 main() {
export _main
_main:
! 3 putchar((('a'<= (ch=getchar()))&&(ch<='z')) ? (ch+'A'-'a') : ch );
push bp
mov bp,sp
push di
push si
call _getchar
mov [_ch],al
cmp al,*$61
jb .1
.3:
mov al,[_ch]
cmp al,*$7A
ja .1
.2:
mov al,[_ch]
xor ah,ah
add ax,*-$20
jmp .4
.1:
mov al,[_ch]
xor ah,ah
.4:
push ax
call _putchar
inc sp
inc sp
! 4 }
pop si
pop di
pop bp
ret
.data
.bss
.comm _ch,1
Consider the following ANSI C program in order
to explain the functionality of each of the lines:
char n;
int i,j=0;
main() {
n = 7&i;
n = ( ~0xf)^n;
n = sizeof(i);
for ( i=1; i != 0; i = i << 1 ) j++;
}
n = 7&i;
mov al,[_i]
and al,*7
mov [_n],al
n = ( ~0xf)^n;
mov ax,*-$10
xor al,[_n]
mov [_n],al
n = sizeof(i);
mov al,*2
mov [_n],al
for ( i=1; i != 0; i = i << 1 ) j++;
mov ax,*1
mov [_i],ax
jmp .3
.4:
mov ax,[_j]
inc ax
mov [_j],ax
.2:
mov ax,[_i]
shl ax,*1
mov [_i],ax
.3:
mov ax,[_i]
test ax,ax
jne .4
OPERATOR PRECEDENCE
precedence (highest first;
all associate left to right except as indicated)
=====================================================
scoping (C++ only) ::
postfix function() array[] -> . (C++: ++ --)
unary (RtoL) ! ~ ++ -- - * & sizeof +
(and C++: new delete)
casting (type)
(C++ only) .* ->*
arithmetic binary * / %
arithmetic binary + -
shift << >>
relational < <= > >=
relational == !=
bit ops and &
ex or ^
or |
logical &&
||
(cond? T : F) (RtoL) ?:
assignment(RtoL) = += -= *= /= %= ^= |= <<= >>=
expression sequence ,
======================================================
Notes:
"function()" refers to function invocation
"(type)" refers to type casting
unary +, - and * have higher precedence than
the corresponding binary operators
Note that because the precedence of bitwise
operators &, ^ and | is lower than that of the
equality operators (== and !=),
bit-testing expressions must be parenthesised
to give proper results: e.g.
if ((value & BITMASK) == REQUIREDBITS) {...}
The comma between parameters of a function call is not an operator and is different than the comma operator.
Unary + is not available in old C.
Example unary ops: *argv++ means *(argv++)
Example: *a[3] means *(a[3]) and *a.p means *(a.p)
More common, however, is a->p which abbreviates (*a).p
Grouping () override precedence but do NOT force order of evaluation. In old C
a+(b+c) could actually be executed as (a+b)+c. To force order of evaluation
use: a+ +(b+c) or (t=b+c, a+t). This applies to commutative and associative
operations: + * & ^ | In ANSI the order of evaluation is undefined
but combination must be done as brackets imply.
The precedence and associativity are natural except for the bit and shift
ops. Note especially that the unary operations are very high and are all
at the same level. It could be argued that the bit ops should have been with
arithmetics and the shift should be with just below the unary ops - treat
these with caution; extra ()'s may be wise. Examples:
x & y == 0 means x & (y == 0) whereas (x & y) == 0 is probably intended.
x<<4 + y means x<< (4+y) whereas (x<<4) + y might be intended.
The latter example also shows spacing does not imply precedence.
++ -- and assignments are the only operators that change a stored named value;
the others just change temporary/scratch values.
SEQUENCE POINTS
Sequence points in Ansi C: These are points at which everything before this
point must be done before the compiler/machine can go on to do stuff after.
1) at operators: unary + && || ?: , (the comma-operator)
2) an expression must be completely evaluated before proceeding if:
it is an ordinary statement, that is, everything before a ";" or "}"
is done before proceeding.
it is the control expression in an: if, for, do, while, switch.
it is an initializer of an auto object.
3) all parameters to a function are evaluated before the function is called
and the return expression is completely evaluated before the return taken.
Note that assignment is NOT a sequence point, thus constructions like
a[i++] = i; should never be used. Also bad: f(i++) + g(i)
DECLARATIONS
Interpret declarations by reading from the name outward using:
[N] = " ... array of size N, each of whose elements is ... "
(x) = " ... function (with parameters x) returning value ... "
In Old C, x is not given.
In Ansi C use "void" to indicate no parameters.
A comma will always signal a parameter list.
* = " ... pointer to ... " This has lowest precedence!
In C++, mainly for call by reference formal param.: & = "...reference to..."
( ) may also indicate grouping. Unlike in other contexts, extra ( )'s are
potentially harmful: extra grouping is NOT allowed where
this may lead to ambiguity - see example below.
In casts the name is dropped and enclosing () are used; if in doubt read from outside inward.
typedefs are strongly recommended for complicated situations. EXAMPLES:
int *g[N]; int *(g[N]); /* array of pointers */
g is an array of size N each of whose elements is a pointer to int
int (*g)[N]; /* pointer to an array */
g is a pointer to: an array of size N each of whose elements is an int
int *f(void); /* function returning pointer */
f is a function with no parameters returning value a pointer to int
int (*f)();
f is a pointer to a function with unspecified parameters which returns an int
(int ())
a cast to a function returning an integer: obtained by stripping
f from int f(); and then enclosing result in ( )
Note that int x; and int (x); are same declarations but the latter has
unnecessary parentheses so the cast to int is: (int) NOT (int ()) !!!
void (*signal(int,void(*)(int)))(int); /* very confusing */
Use a typedef instead:
typedef void (*HANDLER)(int); /* define intermediate type HANDLER */
HANDLER signal(int,HANDLER); /* equivalent to the above */
This says that HANDLER is a pointer to a function that takes an
int value and returns void. signal is a function that takes an
int and a HANDLER and returns a HANDLER.
NOTES:
In expressions read * (dereferencing) as " ... value at address ... "
and & (referencing) as " ... address of ... "
[12:11am linux01] cat > line1.c
int i=5;
char line[80];
main() {
line[i] = 7;
}
[12:11am linux01] bcc -S -o line1.s line1.c
[12:12am linux01] cat line1.s
! 1
! 1 int i=5;
.data
export _i
_i:
.word 5
! 2 char line[80];
! 3 main() {
.text
export _main
_main:
! 4 line[i] = 7;
push bp
mov bp,sp
push di
push si
mov bx,[_i]
mov al,*7
mov _line[bx],al
! 5 }
pop si
pop di
pop bp
ret
! 6
.data
.bss
.comm _line,$50
! 0 errors detected
[12:13am linux01] cat > data2.c
int i=5,j=12;
char data[20][30];
main() {
data[i][j] = 99;
}
[12:13am linux01] bcc -S -o data2.s data2.c
[12:13am linux01] cat data2.s
! 1 int i=5,j=12;
.data
export _i
_i:
.word 5
export _j
_j:
.word $C
! 2 char data[20][30];
! 3 main() {
.text
export _main
_main:
! 4 data[i][j] = 99;
push bp
mov bp,sp
push di
push si
mov bx,[_i]
mov ax,bx
mov cx,*$1E
imul cx
mov bx,ax
mov ax,[_j]
add bx,ax
mov al,*$63
mov _data[bx],al
! 5 }
pop si
pop di
pop bp
ret
! 6
.data
.bss
.comm _data,$258
! 0 errors detected
int i,j[100],k;
main() {
for (i=0; i<=99; i++) k = k+j[i];
}
xor ax,ax
mov [_i],ax
jmp .3
.4:
mov bx,[_i]
shl bx,*1
mov ax,[_k]
add ax,_j[bx]
mov [_k],ax
.2:
mov ax,[_i]
inc ax
mov [_i],ax
.3:
mov ax,[_i]
cmp ax,*$63
jle .4
[12:07am linux01] cat simplearray.c
int i,j[100],k;
main() {
for (i=0; i<=99; i++) k = k+j[i];
}
[12:07am linux01] cat simplearray.s
! 1 int i,j[100],k;
! 2 main() {
export _main
_main:
! 3 for (i=0; i<=99; i++) k = k+j[i];
push bp
mov bp,sp
push di
push si
xor ax,ax
mov [_i],ax
jmp .3
.4:
mov bx,[_i]
shl bx,*1
mov ax,[_k]
add ax,_j[bx]
mov [_k],ax
! 4 }
.2:
mov ax,[_i]
inc ax
mov [_i],ax
.3:
mov ax,[_i]
cmp ax,*$63
jle .4
.5:
.1:
pop si
pop di
pop bp
ret
! 5
.data
.bss
.comm _j,$C8
.comm _i,2
.comm _k,2
[4:34pm linux01] cat > simpleptr.c
int i, *ip, j;
main() {
ip = &i;
j = *ip + 5;
}
The two lines in the program are
equivalent to the following 8086 assembly:
mov bx,#_i
mov [_ip],bx
mov bx,[_ip]
mov bx,[bx]
add bx,*5
mov [_j],bx
In fact the whole program listing is as follows:
[4:38pm linux01] bcc -S -o simpleptr.s simpleptr.c
[4:39pm linux01] cat simpleptr.s
! 1 int i, *ip, j;
! 2 main() {
export _main
_main:
! 3 ip = &i;
push bp
mov bp,sp
push di
push si
mov bx,#_i
mov [_ip],bx
! 4 j = *ip + 5;
mov bx,[_ip]
mov bx,[bx]
add bx,*5
mov [_j],bx
! 5 }
pop si
pop di
pop bp
ret
.data
.bss
.comm _j,2
.comm _i,2
.comm _ip,2
What do these lines mean?
int i,j, *ip;
main() {
ip = &i;
j= j + *ip;
ip++;
((void *) ip)++;
((long int *) ip)++;
((int) ip)++;
}
char c1,c[100],*cp;
int i;
main() {
cp = c;
cp = &c[0];
c1 = c[i];
c1 = *(cp+i);
c1 = i[c];
c1 = "abcdef"[i];
}
[12:20am linux01] cat > interesting.c
char c1,c[100],*cp;
int i;
main() {
cp = c;
cp = &c[0];
c1 = c[i];
c1 = *(cp+i);
c1 = i[c];
c1 = "abcdef"[i];
}
[12:26am linux01] bcc -S -o interesting.s interesting.c
[12:26am linux01] cat interesting.s
! 1
! 1 char c1,c[100],*cp;
! 2 int i;
! 3 main() {
export _main
_main:
! 4 cp = c;
push bp
mov bp,sp
push di
push si
mov bx,#_c
mov [_cp],bx
! 5 cp = &c[0];
mov bx,#_c
mov [_cp],bx
! 6 c1 = c[i];
mov bx,[_i]
mov al,_c[bx]
mov [_c1],al
! 7 c1 = *(cp+i);
mov ax,[_i]
add ax,[_cp]
mov bx,ax
mov al,[bx]
mov [_c1],al
! 8 c1 = i[c];
mov bx,[_i]
mov al,_c[bx]
mov [_c1],al
! 9 c1 = "abcdef"[i];
mov bx,[_i]
mov al,.1[bx]
mov [_c1],al
! 10 }
pop si
pop di
pop bp
ret
! 11
.data
.1:
.2:
.ascii "abcdef"
.byte 0
.bss
.comm _cp,2
.comm _i,2
.comm _c1,1
.comm _c,$64
! 0 errors detected
[12:26am linux01]
[12:20am linux01] cat > interesting.c
char c1,c[100],*cp;
int i;
main() {
cp = c;
cp = &c[0];
c1 = c[i];
c1 = *(cp+i);
c1 = i[c];
c1 = "abcdef"[i];
}
cp = c;
mov bx,#_c
mov [_cp],bx
cp = &c[0];
mov bx,#_c
mov [_cp],bx
c1 = *(cp+i);
mov ax,[_i]
add ax,[_cp]
mov bx,ax
mov al,[bx]
mov [_c1],al
c1 = c[i];
mov bx,[_i]
mov al,_c[bx]
mov [_c1],al
c1 = i[c];
mov bx,[_i]
mov al,_c[bx]
mov [_c1],al
c1 = "abcdef"[i];
mov bx,[_i]
mov al,.1[bx]
mov [_c1],al
.1:
.ascii "abcdef"
.byte 0
.bss
.comm _cp,2
.comm _i,2
.comm _c1,1
.comm _c,$644>
Share with your friends: |