i just started learning assembly and making some custom loop for swapping two variables using C++ 's asm{} body with Digital-Mars compiler in C-Free 5.0
Enabled the -o (optimization)
And got the results:
 time of for-loop(cycles)        844
 time of while-loop(cycles)      735
 time of custom-loop-1(cycles)   562
 time of custom-loop-2(cycles)   469
i couldnt find Digital-Mars compiler "asm output" option to compare. There is no other optimisation options in the build options. Should i change my compiler? if yes, which one? Can you look at the codes below and tell me why custom loops are faster?
here is the standard for loop:
t1=clock(); 
for(int i=0;i<200000000;i++)
{
    temp=a;//instruction 1
    a=b;//instruction 2
    b=temp;//3 instructions total   
}   
t2=clock();
printf("\n time of for-loop(increasing) %i  \n",(t2-t1));
here is the standard while loop:
t1=clock();
while(j<200000000)
{
    temp=a;//again it is three instructions
    a=b;
    b=temp; 
            j++;
}
t2=clock();
printf("\n time of while-loop(cycles)  %i  \n",(t2-t1));
here is my custom loop 1:
t1=clock();
j=200000000;//setting the count
    __asm
    {
        pushf           //backup
        push eax        //backup
        push ebx        //backup
        push ecx        //backup
        push edx        //backup
        mov ecx,0       //init of loop range(0 to 200000000)
        mov edx,j
        do_it_again:    //begin to loop
        mov eax,a       //basic swap steps between cpu and mem(cache)
        mov ebx,b       
        mov b,eax       
        mov a,ebx       //four instructions total
        inc ecx         // j++
        cmp ecx,edx     //i<200000000  ?
        jb do_it_again  // end of loop block
        pop edx     //rolling back to history   
        pop ecx         
        pop ebx         
        pop eax         
        popf            
    }
t2=clock();
printf("\n time of custom-loop-1(cycles)   %i   \n",(t2-t1));
here is my second custom loop:
t1=clock();
j=200000000;//setting the count
    __asm
    {
        pushf           //backup
        push eax        
        push ebx        
        push ecx        
        push edx        
        mov ecx,0       //init of loop range(0 to 200000000)
        mov edx,j
        mov eax,a       //getting variables to registers
        mov ebx,b
        do_it_again2:   //begin to loop
        //swapping with using only 2 variables(only in cpu)
        sub eax,ebx         //a is now a-b
        add ebx,eax         //b is now a
        sub eax,ebx         //a is now -b
        xor eax,80000000h   //a is now b and four instructions total
        inc ecx         // j++
        cmp ecx,edx     //i<200000000  ?
        jb do_it_again2  // end of loop block
        pop edx         //rollback
        pop ecx         
        pop ebx         
        pop eax         
        popf            
    }
t2=clock();
printf("\n time of custom-loop-2(cycles)  %i   \n",(t2-t1));
full code:
#include<stdio.h>
#include<stdlib.h>
#include<time.h>
int main()
{
int j=0;
int a=0,b=0,temp=0;
srand(time(0));
time_t t1=0;
time_t t2=0;
t1=clock(); 
for(int i=0;i<200000000;i++)
{
    temp=a;//instruction 1
    a=b;//instruction 2
    b=temp;//3 instructions total   
}   
t2=clock();
printf("\n time of for-loop(cycles) %i  \n",(t2-t1));
t1=clock();
while(j<200000000)
{
    temp=a;//again it is three instructions
    a=b;
    b=temp; 
    j++;
}
t2=clock();
printf("\n time of while-loop(cycles)  %i  \n",(t2-t1));
t1=clock();
j=200000000;//setting the count
    __asm
    {
        pushf           //backup
        push eax        //backup
        push ebx        //backup
        push ecx        //backup
        push edx        //backup
        mov ecx,0       //init of loop range(0 to 200000000)
        mov edx,j
        do_it_again:    //begin to loop
        mov eax,a       //basic swap steps between cpu and mem(cache)
        mov ebx,b       
        mov b,eax       
        mov a,ebx       //four instructions total
        inc ecx         // j++
        cmp ecx,edx     //i<200000000  ?
        jb do_it_again  // end of loop block
        pop edx     //rolling back to history   
        pop ecx         
        pop ebx         
        pop eax         
        popf            
    }
t2=clock();
printf("\n time of custom-loop-1(cycles)   %i   \n",(t2-t1));
t1=clock();
j=200000000;//setting the count
    __asm
    {
        pushf           //backup
        push eax        
        push ebx        
        push ecx        
        push edx        
        mov ecx,0       //init of loop range(0 to 200000000)
        mov edx,j
        mov eax,a       //getting variables to registers
        mov ebx,b
        do_it_again2:   //begin to loop
        //swapping with using only 2 variables(only in cpu)
        sub eax,ebx         //a is now a-b
        add ebx,eax         //b is now a
        sub eax,ebx         //a is now -b
        xor eax,80000000h   //a is now b and four instructions total
        inc ecx         // j++
        cmp ecx,edx     //i<200000000  ?
        jb do_it_again2  // end of loop block
        pop edx         //rollback
        pop ecx         
        pop ebx         
        pop eax         
        popf            
    }
t2=clock();
printf("\n time of custom-loop-2(cycles)  %i   \n",(t2-t1));
return 0;
}
i am just learning c++ and assembly and wondered how things going on. Thank you
windows xp, pentium 4 (2 GHz) Digital-Mars in C-Free
 
     
     
     
     
     
    