www.delorie.com/archives/browse.cgi   search  
Mail Archives: pgcc/1999/07/16/09:45:05

Sender: jgbauman AT rommel DOT stw DOT uni-erlangen DOT de
Message-ID: <378F37A6.E18DC7E0@stud.informatik.uni-erlangen.de>
Date: Fri, 16 Jul 1999 15:46:14 +0200
From: Joerg Baumann <joerg DOT baumann AT stud DOT informatik DOT uni-erlangen DOT de>
X-Mailer: Mozilla 4.04 [en] (X11; I; Linux 2.2.9 i686)
MIME-Version: 1.0
To: pgcc AT delorie DOT com
Subject: not really a bug, but performance problem
Reply-To: pgcc AT delorie DOT com

This is a multi-part message in MIME format.
--------------DF8856AA3EBE5425158BD648
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit

Hi,

Normally you expect C++ to be slower than C.
But for small test-programs you think you can estimate where there will
be a preformance penalty using C++ or not.
In an undergraduate course (pattern matching) at my univeristy 
i heared of an example which runs fast as C-Programm, but written as
C++-Programm it's much slower. 
If you look at c_.c and c_.C you can see that they are very similar and 
you would think, that they should execute at the same speed, but many
c++ compilers don't get it right.
 
I tried SUN Workshop compilers 4.2 on an UltraSparc 10 => same speed
        gcc and g++ 2.7.2.3                "           => same speed
        pgcc-2.91.66 on linux pentium II 333           => c 1.5 times
faster than c++
  
But the most astonishing thing was.
  c_.c compiled with gcc-2.7.2.1 was 1.15 times faster than c_.c
compiled with pgcc-2.91.66

the generated assemblerfiles and timing-results are in t13.s and t10.s

i hope you'll find this interesting, and please excuse my bad english

   joerg

--------------------------------------------------------------------
 cpu time/usefulness ratio too high -- core dumped.

  Joerg Baumann    (joerg DOT baumann AT stud DOT informatik DOT uni-erlangen DOT de)
--------------DF8856AA3EBE5425158BD648
Content-Type: text/plain; charset=us-ascii; name="c_.C"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline; filename="c_.C"

#include <iostream.h>
#include <stdlib.h>
#include <assert.h>

const int MAX=512;

class H{
  int size;
  int * const h;
public:
  H():h(new int[256]){
    assert(h!=NULL);
  }
  inline int& operator[](int i) const {
    return h[i];
  }
};

int main(int argc,char * argv[]){
  int i,j,k,m;
  unsigned char pic[MAX][MAX];
  H h;

  if (argc!=2) {
    cout << "usage: c_ loop\n"<<endl;
    exit(-1);
  }
  const H& hh=h;
  m=atoi(argv[1]);
  for (k=0;k<m;k++)
    for (i=0;i<MAX;i++)
      for (j=0;j<MAX;j++)
	hh[pic[i][j]]++;
  return 0;
}


--------------DF8856AA3EBE5425158BD648
Content-Type: text/plain; charset=us-ascii; name="c_.c"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline; filename="c_.c"

#include <stdlib.h>
#include <assert.h>

#define MAX 512

typedef struct _H {
  int s;
  int *h;
}H;

int main(int argc,char * argv[]){
  int i,j,k,m;
  unsigned char pic[MAX][MAX];
  H h;

  h.h=(int*)malloc(sizeof(int)*4);
  assert(h.h!=NULL);

  if (argc!=2) {
    printf("usage: c_ loop\n");
    exit(-1);
  }
  m=atoi(argv[1]);
  for (k=0;k<m;k++)
    for (i=0;i<MAX;i++)
      for (j=0;j<MAX;j++)
	h.h[pic[i][j]]++;
  return 0;
}

--------------DF8856AA3EBE5425158BD648
Content-Type: text/plain; charset=us-ascii; name="t13.s"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline; filename="t13.s"

#gcc -bi486-linux -V2.7.2.1 -O6  c_.c -S -o t13.s 
#time t13 5000
#real    0m16.243s
#user    0m16.230s
#sys     0m0.000s
	.file	"c_.c"
	.version	"01.01"
gcc2_compiled.:
.section	.rodata
.LC0:
	.string	"main"
.LC1:
	.string	"c_.c"
.LC2:
	.string	"h.h!=((void *)0)"
.LC3:
	.string	"usage: c_ loop\n"
.text
	.align 16
.globl main
	.type	 main,@function
main:
	pushl %ebp
	movl %esp,%ebp
	subl $262148,%esp
	pushl %edi
	pushl %esi
	pushl %ebx
	movl 12(%ebp),%esi
	pushl $16
	call malloc
	movl %eax,%ebx
	addl $4,%esp
	testl %ebx,%ebx
	jne .L43
	pushl $.LC0
	pushl $17
	pushl $.LC1
	pushl $.LC2
	call __assert_fail
	.align 16
.L43:
	cmpl $2,8(%ebp)
	je .L45
	pushl $.LC3
	call printf
	pushl $-1
	call exit
	.align 16
.L45:
	movl 4(%esi),%eax
	pushl $0
	pushl $10
	pushl $0
	pushl %eax
	call __strtol_internal
	movl %eax,-262148(%ebp)
	xorl %edi,%edi
	cmpl %eax,%edi
	jge .L49
	movl %ebx,%esi
	.align 4
.L51:
	xorl %ecx,%ecx
	.align 4
.L55:
	xorl %edx,%edx
	movl %ecx,%eax
	sall $9,%eax
	leal -262144(%ebp,%eax),%ebx
	.align 4
.L59:
	movzbl (%edx,%ebx),%eax
	incl (%esi,%eax,4)
	incl %edx
	cmpl $511,%edx
	jle .L59
	incl %ecx
	cmpl $511,%ecx
	jle .L55
	incl %edi
	cmpl %edi,-262148(%ebp)
	jg .L51
.L49:
	xorl %eax,%eax
	leal -262160(%ebp),%esp
	popl %ebx
	popl %esi
	popl %edi
	movl %ebp,%esp
	popl %ebp
	ret
.Lfe1:
	.size	 main,.Lfe1-main
	.ident	"GCC: (GNU) 2.7.2.1"










--------------DF8856AA3EBE5425158BD648
Content-Type: text/plain; charset=us-ascii; name="t10.s"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline; filename="t10.s"

# gcc -O6 -mpentiumpro c_.c -S -o t10
# time t10 5000
# real    0m18.843s		
# user    0m18.840s		
# sys     0m0.000s
	.file	"c_.c"
	.version	"01.01"
gcc2_compiled.:
.section	.rodata
.LC0:
	.string	"main"
.LC1:
	.string	"c_.c"
.LC2:
	.string	"h.h!=((void *)0)"
.LC3:
	.string	"usage: c_ loop\n"
.text
	.align 16
.globl main
	.type	 main,@function
main:
	subl $262148,%esp
	pushl %ebp
	pushl %edi
	pushl %esi
	pushl %ebx
	pushl $16
	call malloc
	movl %eax,%ebx
	addl $4,%esp
	testl %ebx,%ebx
	jne .L22
	pushl $.LC0
	pushl $17
	pushl $.LC1
	pushl $.LC2
	call __assert_fail
	.p2align 4,,7
.L22:
	cmpl $2,262168(%esp)
	je .L24
	pushl $.LC3
	call printf
	pushl $-1
	call exit
	.p2align 4,,7
.L24:
	movl 262172(%esp),%eax
	xorl %edi,%edi
	movl 4(%eax),%eax
	pushl $0
	pushl $10
	pushl $0
	pushl %eax
	call __strtol_internal
	movl %eax,32(%esp)
	addl $16,%esp
	cmpl %eax,%edi
	jge .L28
	leal 20(%esp),%ebp
	movl %ebx,%esi
	.p2align 4,,7
.L30:
	movl $-512,%ebx
	xorl %ecx,%ecx
	.p2align 4,,7
.L34:
	xorl %edx,%edx
	.p2align 4,,7
.L38:
	leal (%ecx,%edx),%eax
	movzbl (%eax,%ebp),%eax
	incl %edx
	incl (%esi,%eax,4)
	cmpl $511,%edx
	jle .L38
	addl $512,%ecx
	incl %ebx
	jnz .L34
	incl %edi
	cmpl 16(%esp),%edi
	jl .L30
.L28:
	popl %ebx
	xorl %eax,%eax
	popl %esi
	popl %edi
	popl %ebp
	addl $262148,%esp
	ret
.Lfe1:
	.size	 main,.Lfe1-main
	.ident	"GCC: (GNU) pgcc-2.91.66 19990314 (egcs-1.1.2 release)"







--------------DF8856AA3EBE5425158BD648--

- Raw text -


  webmaster     delorie software   privacy  
  Copyright © 2019   by DJ Delorie     Updated Jul 2019