I am new to SSE2 instructions. I have found an instruction _mm_add_epi8 which can add two array elements. But I want an SSE instruction which can add all elements of an array.
I was trying to develop this concept using this code:
#include <iostream>
#include <conio.h>
#include <emmintrin.h>
void sse(unsigned char* a,unsigned char* b); 
void main()
{
    /*unsigned char *arr;
    arr=(unsigned char *)malloc(50);*/
    unsigned char arr[]={'a','b','c','d','e','f','i','j','k','l','m','n','o','p','q','r','a','b','c','d','e','f','i','j','k','l','m','n','o','p','q','r'};
    unsigned char *next_arr=arr+16;
    for(int i=0;i<16;i++)
          printf("%d,%c   ",next_arr[i],next_arr[i]);
    sse(arr,next_arr);
    getch();
}
void sse(unsigned char* a,unsigned char* b)                                                                                                                                                                          
{                                                                                                                                                                                                                                                                                                                                                                                            
  __m128i* l = (__m128i*)a;                                                                                                                                                                                      
  __m128i* r = (__m128i*)b; 
  __m128i result;
      result= _mm_add_epi8(*l, *r);
      unsigned char *p;
         p=(unsigned char *)&result;
        for(int i=0;i<16;i++)
          printf("%d ",p[i]);
         printf("\n");
         l=(__m128i*)p;
         r=(__m128i*)(p+8);         
         result=_mm_add_epi8(*l, *r);
         p=(unsigned char *)&result;
         printf("%d ",p[0]);
         l=(__m128i*)p;
         r=(__m128i*)(p+4);
         result=_mm_add_epi8(*l, *r);
         p=(unsigned char *)&result;
         l=(__m128i*)p;
         r=(__m128i*)(p+2);
         result=_mm_add_epi8(*l, *r);
         p=(unsigned char *)&result;
         l=(__m128i*)p;
         r=(__m128i*)(p+1);
         result=_mm_add_epi8(*l, *r);
          p=(unsigned char *)&result;
            printf("result =%d ",p[0]);
}
So can anybody please tell me how it is possible to add all elements of an array using SSE2 instructions ?
Any help will be appreciated.
 
    