program matmul_main ! !******************************************************************************* ! !! MAIN is the main program for a matrix multiplication performance test. ! ! ! Discussion: ! ! MATMUL is an interactive program that sets up, carries out, and times ! a matrix multiplication. ! ! At the user's command, MATMUL can use many different algorithms, ! matrix sizes, arithmetic types. ! ! Author: ! ! John Burkardt ! !******************************************************************************* ! ! Making a version for a given machine: ! ! 1) Modify MATMUL_CPU_TIMER ! ! MATMUL_CPU_TIMER is a routine to compute the current reading of ! the CPU timer in seconds. Several sets of appropiate code ! are listed in the routine, depending on the machine and compiler ! used. Uncomment the one appropriate for your machine, or ! add a new one if necessary. ! ! 2) Set the value of "MACHINE" to be the name of your machine. ! ! This occurs in routine INIT. ! ! 4) Make some special routines available for special machines: ! ! For the Cray, ! uncomment the calls to ! MXMA ! SAXPY ! SDOT ! SGEMM ! SGEMMS ! TIMEF. ! uncomment the declaration of the array WORK in routine USGEMMS. ! ! For the SGI/IRIS, ! uncomment the calls to ! SAXPY ! SDOT ! SECNDS ! SGEMM. ! !******************************************************************************* ! implicit none ! integer, parameter :: order_max = 100 ! character ( len = 20 ) command logical cshow logical flop_show integer i integer ido integer ierror integer ios integer itemp integer lchar integer lda integer lenc character ( len = 7 ) lingo logical lnshow logical lda_show character ( len = 10 ) machine logical machine_show integer n integer nhi integer ninc integer nlo integer nmult logical noshow integer nrep logical nrep_show logical nshow character ( len = 10 ) order character ( len = 10 ) order_list(order_max) integer order_num character ( len = 82 ) output logical s_eqi logical time_show ! call timestamp ( ) ! ! Initialize the data. ! call init ( command, cshow, flop_show, lda, lingo, lnshow, lda_show, & machine, machine_show, n, nhi, ninc, nlo, nmult, noshow, nrep, & nrep_show, nshow, order, order_list, order_max, order_num, time_show ) ! ! Say hello to the user. ! call hello ( machine ) do ! ! Print the prompt and read the command. ! write ( *, '(a)' ) ' ' write ( *, '(a)' ) 'Command? (Type H for help)' read ( *, '(a)', iostat = ios ) command if ( ios /= 0 ) then command = 'QUIT' end if ! ! Capitalize the command. ! call s_cap ( command ) ! ! Remove all blanks to make interpretation easier. ! call s_blank_delete ( command ) ierror = 0 ! ! "H" means print out the help message. ! if ( s_eqi ( command(1:1), 'H' ) ) then call help ! ! "LDA = " means the user wants to set lda. ! else if ( s_eqi ( command(1:4), 'LDA=' ) ) then call s_to_i ( command(5:), itemp, ierror, lchar ) if ( ierror /= 0 ) then write ( *, '(a)' ) ' ' write ( *, '(a)' ) 'I did not understand your definition of LDA.' else if ( itemp <= 0 ) then write ( *, '(a)' ) ' ' write ( *, '(a)' ) 'The assignment of LDA was not acceptable!' write ( *, '(a)' ) 'LDA must be positive.' else lda = itemp write ( *, '(a)' ) ' ' write ( *, '(a,i6)' ) 'LDA has been set to ', lda end if if ( lda < n ) then n = lda write ( *, '(a)' ) ' ' write ( *, '(a)' ) 'Note: Since N must be no greater than LDA,' write ( *, '(a)' ) 'MATMUL has decreased the value of N.' write ( *, '(a,i6)' ) 'N has been set to ', n end if ! ! "M" means the user wants the multiplication to be carried out. ! else if ( s_eqi ( command(1:1), 'M' ) ) then ! ! Carry out multiplication for one, or many values of N. ! n = nlo do call mult ( cshow, flop_show, lda, lingo, lnshow, lda_show, machine, & machine_show, n, noshow, nrep, nrep_show, nshow, order, order_list, & order_num, output, time_show ) call nstep ( ido, n, nhi, ninc, nlo, nmult ) if ( ido /= 1 ) then exit end if end do write ( *, '(a)' ) ' ' write ( *, '(a)' ) 'The matrix multiplication has been carried out.' ! ! "N=" means the user wants to set the matrix size N. ! else if ( s_eqi ( command(1:2), 'N=' ) ) then call getn ( command(3:), ierror, lda, n, nhi, ninc, nlo, nmult ) if ( lda < n ) then lda = n write ( *, '(a)' ) ' ' write ( *, '(a)' ) ' Because N was changed, LDA is automatically' write ( *, '(a,i6)' ) ' increased to LDA = ', lda end if ! ! "NOSHOW" means the user wants to turn off the display of all quantities. ! else if ( s_eqi ( command, 'NOSHOW' ) ) then command = 'NOSHOW=ALL' call getsho ( command(8:), cshow, flop_show, lnshow, lda_show, .false., & machine_show, noshow, nrep_show, nshow, time_show ) ! ! "NOSHOW=" means the user wants to turn off the display of a ! particular quantity. ! else if ( s_eqi ( command(1:7), 'NOSHOW=' ) ) then call getsho ( command(8:), cshow, flop_show, lnshow, lda_show, .false., & machine_show, noshow, nrep_show, nshow, time_show ) ! ! "NREP=" sets the number of repetitions. ! else if ( s_eqi ( command(1:5), 'NREP=' ) ) then call s_to_i ( command(6:), nrep, ierror, lchar ) write ( *, '(a,i6)' ) 'The repetition factor is now NREP = ', nrep if ( nrep == 1 ) then nrep_show = .false. else nrep_show = .true. end if ! ! "ORDER=" means the user wants to set the method. ! else if ( s_eqi ( command(1:5), 'ORDER' ) ) then i = index ( command, '=' ) if ( i == 0 ) then call order_list_print ( order_list, order_num ) write ( *, '(a)' ) 'Enter your choice for the order' read ( *, '(a)' ) command call order_get ( command, ierror, order, order_list, order_num ) else command = adjustl ( command(i+1:) ) call order_get ( command, ierror, order, order_list, order_num ) end if ! ! "Q" means the user wants to quit. ! else if ( s_eqi ( command(1:1), 'Q' ) ) then write ( *, '(a)' ) ' ' write ( *, '(a)' ) 'Type "Y" to confirm that you want to quit.' read ( *, '(a)', iostat = ios ) command if ( ios /= 0 ) then command = 'Y' end if call s_cap ( command ) if ( s_eqi ( command(1:1), 'Y' ) ) then exit end if ! ! "P" means the user wants to print out the current settings. ! else if ( s_eqi ( command(1:1), 'P' ) ) then call printr ( lda, lingo, n, nhi, ninc, nlo, nmult, nrep, order ) ! ! "SHOW" means the user wants all items to be displayed. ! else if ( s_eqi ( command, 'SHOW' ) ) then command = 'SHOW=ALL' call getsho ( command(6:), cshow, flop_show, lnshow, lda_show, .true., & machine_show, noshow, nrep_show, nshow, time_show ) ! ! "SHOW=" means the user wants a particular item displayed. ! else if ( s_eqi(command(1:5), 'SHOW=' ) ) then call getsho ( command(6:), cshow, flop_show, lnshow, lda_show, .true., & machine_show, noshow, nrep_show, nshow, time_show ) ! ! The user's input did not match any acceptable command. ! else if ( lenc > 0 ) then write ( *, '(a)' ) ' ' write ( *, '(a)' ) 'Your command was not recognized.' write ( *, '(a)' ) 'You typed ' // trim ( command) write ( *, '(a)' ) 'Type HELP for a list of commands.' end if end if end do write ( *, '(a)' ) ' ' write ( *, '(a)' ) 'MATMUL:' write ( *, '(a)' ) ' Normal end of execution.' stop end subroutine ch_cap ( c ) ! !******************************************************************************* ! !! CH_CAP capitalizes a single character. ! ! ! Modified: ! ! 19 July 1998 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input/output, character C, the character to capitalize. ! implicit none ! character c integer itemp ! itemp = ichar ( c ) if ( 97 <= itemp .and. itemp <= 122 ) then c = char ( itemp - 32 ) end if return end subroutine c_matmul ( lda, n, nrep, ttime ) ! !******************************************************************************* ! !! C_MATMUL computes A = B*C using FORTRAN 90 MATMUL and complex arithmetic. ! ! ! Modified: ! ! 21 March 2001 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, integer N, the number of rows and columns in the matrices. ! ! Input, integer NREP, the number of repetitions to carry out. ! ! Output, real TTIME, an estimate of the CPU time in seconds required ! for the matrix multiplications. ! implicit none ! integer lda integer n ! complex a(lda,n) complex b(lda,n) complex c(lda,n) integer irep integer nrep real time1 real time2 real ttime ! ttime = 0.0E+00 do irep = 1, nrep call c_set ( a, b, c, n, n ) call matmul_cpu_timer ( time1 ) a(1:n,1:n) = matmul ( b(1:n,1:n), c(1:n,1:n) ) call matmul_cpu_timer ( time2 ) ttime = ttime + time2 - time1 end do return end subroutine c_ijk ( lda, n, nrep, ttime ) ! !******************************************************************************* ! !! C_IJK computes A = B*C using index order IJK and complex arithmetic. ! ! ! Modified: ! ! 28 August 1999 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, integer LDA, the leading dimension used for arrays. ! ! Input, integer N, the number of rows and columns in the matrices. ! ! Input, integer NREP, the number of repetitions to carry out. ! ! Output, real TTIME, an estimate of the CPU time in seconds required ! for the matrix multiplications. ! implicit none ! integer lda integer n ! complex a(lda,n) complex b(lda,n) complex c(lda,n) integer i integer irep integer j integer k integer nrep real time1 real time2 real ttime ! ttime = 0.0E+00 do irep = 1, nrep call c_set ( a, b, c, lda, n ) call matmul_cpu_timer ( time1 ) do i = 1, n do j = 1, n do k = 1, n a(i,k) = a(i,k) + b(i,j) * c(j,k) end do end do end do call matmul_cpu_timer ( time2 ) ttime = ttime + time2 - time1 end do return end subroutine c_set ( a, b, c, lda, n ) ! !******************************************************************************* ! !! C_SET initializes the complex A, B and C matrices. ! ! ! Modified: ! ! 28 August 2000 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Output, complex A(LDA,N), B(LDA,N), C(LDA,N), the three matrices ! used in the multiplication. ! ! Input, integer LDA, the leading dimension used for arrays. ! ! Input, integer N, the number of rows and columns in the matrices. ! implicit none ! integer lda integer n ! complex a(lda,n) complex b(lda,n) complex c(lda,n) ! a(1:n,1:n) = cmplx ( 0.0E+00, 0.0E+00 ) b(1:n,1:n) = cmplx ( 2.0E+00, 1.0E+00 ) c(1:n,1:n) = cmplx ( 1.0E+00, 1.0E+00 ) return end subroutine d_matmul ( lda, n, nrep, ttime ) ! !******************************************************************************* ! !! D_MATMUL computes A = B*C using FORTRAN 90 MATMUL and double precision arithmetic. ! ! ! Modified: ! ! 21 March 2001 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, integer N, the number of rows and columns in the matrices. ! ! Input, integer NREP, the number of repetitions to carry out. ! ! Output, real TTIME, an estimate of the CPU time in seconds required ! for the matrix multiplications. ! implicit none ! integer lda integer n ! double precision a(lda,n) double precision b(lda,n) double precision c(lda,n) integer irep integer nrep real time1 real time2 real ttime ! ttime = 0.0E+00 do irep = 1, nrep call d_set ( a, b, c, n, n ) call matmul_cpu_timer ( time1 ) a(1:n,1:n) = matmul ( b(1:n,1:n), c(1:n,1:n) ) call matmul_cpu_timer ( time2 ) ttime = ttime + time2 - time1 end do return end subroutine d_ijk ( lda, n, nrep, ttime ) ! !******************************************************************************* ! !! D_IJK multiplies A = B*C using index order IJK and double precision. ! ! ! Modified: ! ! 28 August 1999 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, integer LDA, the leading dimension used for arrays. ! ! Input, integer N, the number of rows and columns in the matrices. ! ! Input, integer NREP, the number of repetitions to carry out. ! ! Output, real TTIME, an estimate of the CPU time in seconds required ! for the matrix multiplications. ! implicit none ! integer lda integer n ! double precision a(lda,n) double precision b(lda,n) double precision c(lda,n) integer i integer irep integer j integer k integer nrep real time1 real time2 real ttime ! ttime = 0.0E+00 do irep = 1, nrep call d_set ( a, b, c, lda, n ) call matmul_cpu_timer ( time1 ) do i = 1, n do j = 1, n do k = 1, n a(i,k) = a(i,k) + b(i,j) * c(j,k) end do end do end do call matmul_cpu_timer ( time2 ) ttime = ttime + time2 - time1 end do return end subroutine d_set ( a, b, c, lda, n ) ! !******************************************************************************* ! !! D_SET initializes the double precision A, B and C matrices. ! ! ! Modified: ! ! 28 August 1999 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Output, double precision A(LDA,N), B(LDA,N), C(LDA,N), the three matrices ! used in the multiplication. ! ! Input, integer LDA, the leading dimension used for arrays. ! ! Input, integer N, the number of rows and columns in the matrices. ! implicit none ! integer lda integer n ! double precision a(lda,n) double precision b(lda,n) double precision c(lda,n) ! a(1:n,1:n) = 0.0D+00 b(1:n,1:n) = 1.0D+00 c(1:n,1:n) = 1.0D+00 return end subroutine domethod ( lda, n, nrep, order, ttime ) ! !******************************************************************************* ! !! DOMETHOD calls a specific multiplication routine. ! ! ! Modified: ! ! 28 August 1999 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, integer LDA, the leading dimension used for arrays. ! ! Input, integer N, the number of rows and columns in the matrices. ! ! Input, integer NREP, the number of repetitions to carry out. ! ! Input, character ( len = 10 ) ORDER, specifies the method to be used. ! ! Output, real TTIME, an estimate of the CPU time in seconds required ! for the matrix multiplications. ! implicit none ! integer lda integer n ! integer nrep character ( len = 10 ) order logical s_eqi real ttime ! if ( s_eqi ( order, 'C_IJK' ) ) then call c_ijk ( lda, n, nrep, ttime ) else if ( s_eqi ( order, 'C_MATMUL' ) ) then call c_matmul ( lda, n, nrep, ttime ) else if ( s_eqi ( order, 'D_IJK' ) ) then call d_ijk ( lda, n, nrep, ttime ) else if ( s_eqi ( order, 'D_MATMUL' ) ) then call d_matmul ( lda, n, nrep, ttime ) else if ( s_eqi ( order, 'I_IJK' ) ) then call i_ijk ( lda, n, nrep, ttime ) else if ( s_eqi ( order, 'I_IJK_46' ) ) then call i_ijk_46 ( lda, n, nrep, ttime ) else if ( s_eqi ( order, 'I_MATMUL' ) ) then call i_matmul ( lda, n, nrep, ttime ) else if ( s_eqi ( order, 'L_IJK' ) ) then call l_ijk ( lda, n, nrep, ttime ) else if ( s_eqi ( order, 'R_DOT_PRODUCT' ) ) then call r_dot_product ( lda, n, nrep, ttime ) else if ( s_eqi ( order, 'R_IJ' ) ) then call r_ij ( lda, n, nrep, ttime ) else if ( s_eqi ( order, 'R_IJK' ) ) then call r_ijk ( lda, n, nrep, ttime ) else if ( s_eqi ( order, 'R_IJK_S' ) ) then call r_ijk_s ( lda, n, nrep, ttime ) else if ( s_eqi ( order, 'R_IJK_I2' ) ) then call r_ijk_i2 ( lda, n, nrep, ttime ) else if ( s_eqi ( order, 'R_IJK_I4' ) ) then call r_ijk_i4 ( lda, n, nrep, ttime ) else if ( s_eqi ( order, 'R_IJK_I8' ) ) then call r_ijk_i8 ( lda, n, nrep, ttime ) else if ( s_eqi ( order, 'R_IJK_J4' ) ) then call r_ijk_j4 ( lda, n, nrep, ttime ) else if ( s_eqi ( order, 'R_IJK_K4' ) ) then call r_ijk_k4 ( lda, n, nrep, ttime ) else if ( s_eqi ( order, 'R_IJK_M' ) ) then call r_ijk_m ( lda, n, nrep, ttime ) else if ( s_eqi ( order, 'R_IKJ' ) ) then call r_ikj ( lda, n, nrep, ttime ) else if ( s_eqi ( order, 'R_JIK' ) ) then call r_jik ( lda, n, nrep, ttime ) else if ( s_eqi ( order, 'R_JKI' ) ) then call r_jki ( lda, n, nrep, ttime ) else if ( s_eqi ( order, 'R_KJI_M' ) ) then call r_kji_m ( lda, n, nrep, ttime ) else if ( s_eqi ( order, 'R_MATMUL' ) ) then call r_matmul ( lda, n, nrep, ttime ) else if ( s_eqi ( order, 'R_MXMA' ) ) then call r_mxma ( lda, n, nrep, ttime ) else if ( s_eqi ( order, 'R_SAXPYC' ) ) then call r_saxpyc ( lda, n, nrep, ttime ) else if ( s_eqi ( order, 'R_SAXPYR' ) ) then call r_saxpyr ( lda, n, nrep, ttime ) else if ( s_eqi ( order, 'R_SDOT' ) ) then call r_sdot ( lda, n, nrep, ttime ) else if ( s_eqi ( order, 'R_SGEMM' ) ) then call r_sgemm ( lda, n, nrep, ttime ) else if ( s_eqi ( order, 'R_SGEMMS' ) ) then call r_sgemms ( lda, n, nrep, ttime ) else if ( s_eqi ( order, 'R_TAXPYC' ) ) then call r_taxpyc ( lda, n, nrep, ttime ) else if ( s_eqi ( order, 'R_TAXPYR' ) ) then call r_taxpyr ( lda, n, nrep, ttime ) else if ( s_eqi ( order, 'R_TDOT' ) ) then call r_tdot ( lda, n, nrep, ttime ) else if ( s_eqi ( order, 'R_TGEMM' ) ) then call r_tgemm ( lda, n, nrep, ttime ) end if return end subroutine getn ( string, ierror, lda, n, nhi, ninc, nlo, nmult ) ! !******************************************************************************* ! !! GETN determines the problem sizes desired by the user. ! ! ! Modified: ! ! 28 August 1999 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, character ( len = * ) STRING, a string containing the user's ! data for an "N" command. This command might have one of the ! following forms, where "n", "nlo", "nhi", "ninc" and "nmult" ! are actually numeric values: ! ! n Solve a single problem of size N. ! nlo, nhi Solve every problem from N = NLO to N = NHI. ! nlo, nhi, ninc Solve from N = NLO to N = NHI, incrementing by NINC. ! nlo, nhi, *nmult Solve from N = NLO to N = NHI, multiplying by NMULT. ! ! Output, integer IERROR, an error flag. ! 0, no error occurred. ! nonzero, an error occurred, and the operation could not be done. ! ! Input, integer LDA, the leading dimension used for arrays. ! ! Output, integer N, the new value for the number of rows and columns ! to use in the matrices. ! ! Output, integer NHI, the maximum value of N to use. ! ! Output, integer NINC, the additive increment to use, if additive ! steps are being taken. ! ! Output, integer NLO, the smallest value of N to use. ! ! Output, integer NMULT, the multiplier to use, if multiplicative ! steps are being taken. ! implicit none ! integer ierror integer imult integer itemp integer lchar integer lda integer n integer next integer nhi integer ninc integer nlo integer nmult character ( len = * ) string ! nhi = 0 ninc = 0 nmult = 1 nlo = 0 ! ! Read the first number, N or NLO. ! call s_to_i ( string, itemp, ierror, lchar ) next = lchar + 1 if ( ierror /= 0 ) then write ( *, '(a)' ) ' ' write ( *, '(a)' ) 'I could not understand your definition of N.' return end if if ( itemp <= 0 ) then write ( *, '(a)' ) ' ' write ( *, '(a)' ) 'This value of N is not acceptable!' write ( *, '(a)' ) 'N must be positive.' return end if n = itemp nlo = itemp nhi = itemp write ( *, '(a)' ) ' ' write ( *, '(a,i6)' ) 'N has been set to ', n if ( next > len_trim ( string ) ) then return end if ! ! Read the second number, NHI. ! call s_to_i ( string(next:), itemp, ierror, lchar ) next = next + lchar if ( ierror /= 0 ) then ! write ( *, '(a)' ) ' ' ! write ( *, '(a)' ) 'I could not understand your definition of NHI.' return else if ( itemp <= 0 ) then write ( *, '(a)' ) ' ' write ( *, '(a)' ) 'This value of NHI is not acceptable!' write ( *, '(a)' ) 'NHI must be positive.' return else nhi = itemp write ( *, '(a)' ) ' ' write ( *, '(a,i6)' ) 'NHI has been set to ', nhi ninc = 1 end if if ( string(next:next) == '*' ) then next = next+1 imult = 1 else imult = 0 end if ! ! Read third number, ninc or nmult ! call s_to_i ( string(next:), itemp, ierror, lchar ) if ( ierror /= 0 ) then ! write ( *, '(a)' ) ' ' ! write ( *, '(a)' ) 'I could not understand your definition of NINC.' else if ( imult == 0 ) then ninc = itemp nmult = 1 write ( *, '(a)' ) ' ' write ( *, '(a,i6)' ) 'NINC has been set to ', ninc else ninc = 0 nmult = itemp write ( *, '(a)' ) ' ' write ( *, '(a,i6)' ) 'NMULT has been set to ', nmult end if end if return end subroutine getsho ( string, cshow, flop_show, lnshow, lda_show, lval, & machine_show, noshow, nrep_show, nshow, time_show ) ! !******************************************************************************* ! !! GETSHO determines what items the user wishes to print out. ! ! ! Modified: ! ! 06 May 2000 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, character STRING*(*), a string which specifies which ! variable is to be assigned the input value of LVAL. ! ! Output, logical CSHOW, is TRUE if the multiplication method ! is to be shown. ! ! Output, logical FLOP_SHOW, is TRUE if the MegaFLOP rate is to be shown. ! ! Output, logical LNSHOW, is TRUE if the software's programming ! language, stored in LINGO, is to be shown. ! ! Output, logical LDA_SHOW, is TRUE if the variable LDA is to be shown. ! ! Input, logical LVAL, a logical value, which is to be assigned ! to one of the variables. ! ! Output, logical MACHINE_SHOW, is TRUE if the machine name is to be shown. ! ! Output, logical NOSHOW, is TRUE if the number of operations is ! to be shown. ! ! Output, logical NREP_SHOW, is TRUE if the number of repetitions is ! to be shown. ! ! Output, logical NSHOW, is TRUE if the matrix size N is to be shown. ! ! Output, logical TIME_SHOW, is TRUE if the execution time is to be shown. ! implicit none ! logical cshow logical flop_show logical lnshow logical lda_show logical lval logical machine_show logical noshow logical nrep_show logical nshow logical s_eqi character ( len = * ) string logical time_show ! if ( s_eqi ( string(1:3), 'ALL' ) ) then cshow = lval flop_show = lval lnshow = lval lda_show = lval machine_show = lval noshow = lval nrep_show = lval nshow = lval time_show = lval else if ( s_eqi ( string(1:3), 'CPU' ) ) then time_show = lval else if ( s_eqi ( string(1:8), 'LANGUAGE' ) ) then lnshow = lval else if ( s_eqi ( string(1:3), 'LDA' ) ) then lda_show = lval else if ( s_eqi ( string(1:7), 'MACHINE' ) ) then machine_show = lval else if ( s_eqi ( string(1:6), 'MFLOPS' ) ) then flop_show = lval else if ( s_eqi ( string(1:1), 'N' ) ) then nshow = lval else if ( s_eqi ( string(1:4), 'NREP' ) ) then nrep_show = lval else if ( s_eqi ( string(1:3), 'OPS' ) ) then noshow = lval else if ( s_eqi ( string(1:5), 'ORDER' ) ) then cshow = lval else if ( s_eqi ( string(1:4), 'TIME' ) ) then time_show = lval else write ( *, '(a)' ) ' ' write ( *, '(a)' ) 'That is not a legal name!' write ( *, '(a)' ) 'Legal names are CPU, LANGUAGE, LDA, MACHINE, MFLOPS,' write ( *, '(a)' ) 'N, NREP, OPS, ORDER, and TIME.' end if return end subroutine header ( cshow, flop_show, lnshow, lda_show, machine_show, noshow, & nrep_show, nshow, output, time_show ) ! !******************************************************************************* ! !! HEADER prints out a header for the results. ! ! ! Modified: ! ! 28 August 1999 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, logical CSHOW, is TRUE if the multiplication method ! is to be shown. ! ! Input, logical FLOP_SHOW, is TRUE if the MegaFLOP rate is to be shown. ! ! Input, logical LNSHOW, is TRUE if the software's programming ! language, stored in LINGO, is to be shown. ! ! Input, logical LDA_SHOW, is TRUE if the variable LDA is to be shown. ! ! Input, logical MACHINE_SHOW, is TRUE if the machine name is to be shown. ! ! Input, logical NOSHOW, is TRUE if the number of operations is ! to be shown. ! ! Input, logical NREP_SHOW, is TRUE if the number of repetitions is ! to be shown. ! ! Input, logical NSHOW, is TRUE if the matrix size N is to be shown. ! ! Output, character ( len = 82 ) OUTPUT, a string containing a header for ! all the variables which are to be printed. ! ! Input, logical TIME_SHOW, is TRUE if the execution time is to be shown. ! implicit none ! logical cshow logical flop_show logical lnshow logical lda_show logical machine_show integer next logical noshow logical nrep_show logical nshow character ( len = 82 ) output logical time_show ! ! Prepare the header string. ! next = 1 if ( cshow ) then output(next:) = ' Order' next = len_trim(output) + 1 end if if ( lda_show ) then output(next:) = ' LDA' next = len_trim(output) + 1 end if if ( nshow ) then output(next:) = ' N' next = len_trim(output) + 1 end if if ( cshow ) then output(next:) = ' CPU' next = len_trim(output) + 1 end if if ( time_show ) then output(next:) = ' Secs' next = len_trim(output) + 1 end if if ( noshow ) then output(next:) = ' Ops' next = len_trim(output) + 1 end if if ( nrep_show ) then output(next:) = ' NREP' next = len_trim(output) + 1 end if if ( flop_show ) then output(next:) = ' MFLOPS' next = len_trim(output) + 1 end if if ( machine_show ) then output(next:) = ' Machine' next = len_trim(output) + 1 end if if ( lnshow ) then output(next:) = ' Language' next = len_trim(output) + 1 end if write ( *, '(a)' ) ' ' write ( *, '(a)' ) output(1:next-1) write ( *, '(a)' ) ' ' return end subroutine hello ( machine ) ! !******************************************************************************* ! !! HELLO says hello to the user. ! ! ! Modified: ! ! 06 May 2000 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, character ( len = 10 ) MACHINE, the computer where MATMUL is running. ! implicit none ! character ( len = 10 ) machine ! write ( *, '(a)' ) ' ' write ( *, '(a)' ) 'MATMUL' write ( *, '(a)' ) ' An interactive demonstration of the speed' write ( *, '(a)' ) ' of matrix multiplication.' write ( *, '(a)' ) ' ' write ( *, '(a)' ) ' This is version 1.20' write ( *, '(a)' ) ' Last modified on 13 January 2002.' write ( *, '(a)' ) ' ' write ( *, '(a)' ) ' This is the version for ' // trim ( machine ) return end subroutine help ! !******************************************************************************* ! !! HELP prints a list of the available commands. ! ! ! Modified: ! ! 06 May 2000 ! ! Author: ! ! John Burkardt ! implicit none ! write ( *, '(a)' ) ' ' write ( *, '(a)' ) 'This is the list of legal commands.' write ( *, '(a)' ) ' ' write ( *, '(a)' ) 'H Help. List the legal commands.' write ( *, '(a)' ) 'LDA=value Set leading dimension of arrays.' write ( *, '(a)' ) 'M Multiply two matrices.' write ( *, '(a)' ) 'N=value Assigns the size of the arrays.' write ( *, '(a)' ) 'N=nlo,nhi,ninc Sets N=nlo, N=nlo+ninc, ....' write ( *, '(a)' ) 'N=nlo,nhi,*nmult Sets N=nlo, N=nlo*nmult, ....' write ( *, '(a)' ) 'NREP=nrep Sets the repetition factor.' write ( *, '(a)' ) 'ORDER=name Chooses the algorithm.' write ( *, '(a)' ) 'P Prints out current results.' write ( *, '(a)' ) 'Q Quit.' write ( *, '(a)' ) 'SHOW=name Include "name" in output.' write ( *, '(a)' ) ' "name" = ORDER, LDA, N, CPU, OPS,' write ( *, '(a)' ) ' MFLOPS, MACHINE, or LANGUAGE.' write ( *, '(a)' ) ' If "name"=ALL, all items are shown.' write ( *, '(a)' ) 'NOSHOW=name removes item from output list.' write ( *, '(a)' ) ' ' return end subroutine init ( command, cshow, flop_show, lda, lingo, lnshow, lda_show, & machine, machine_show, n, nhi, ninc, nlo, nmult, noshow, nrep, nrep_show, & nshow, order, order_list, order_max, order_num, time_show ) ! !******************************************************************************* ! !! INIT initializes data. ! ! ! Modified: ! ! 05 May 2000 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Output, character ( len = 6 ) COMMAND, the most recent user command. ! ! Output, logical CSHOW, is TRUE if the multiplication method ! is to be shown. ! ! Output, logical FLOP_SHOW, is TRUE if the MegaFLOP rate is to be shown. ! ! Output, integer LDA, the leading dimension used for arrays. ! ! Output, character ( len = 7 ) LINGO, the language in which MATMUL is written. ! ! Output, logical LNSHOW, is TRUE if the software's programming ! language, stored in LINGO, is to be shown. ! ! Output, logical LDA_SHOW, is TRUE if the variable LDA is to be shown. ! ! Output, character ( len = 10 ) MACHINE, the computer where MATMUL is running. ! ! Output, logical MACHINE_SHOW, is TRUE if the machine name is to be shown. ! ! Output, integer N, the number of rows and columns in the matrices. ! ! Output, integer NHI, the maximum value of N to use. ! ! Output, integer NINC, the additive increment to use, if additive ! steps are being taken. ! ! Output, integer NLO, the smallest value of N to use. ! ! Output, integer NMULT, the multiplier to use, if multiplicative ! steps are being taken. ! ! Output, logical NOSHOW, is TRUE if the number of operations is ! to be shown. ! ! Output, integer NREP, the number of repetitions to carry out. ! ! Output, logical NREP_SHOW, is TRUE if the number of repetitions is ! to be shown. ! ! Output, logical NSHOW, is TRUE if the matrix size N is to be shown. ! ! Output, character ( len = 10 ) ORDER, specifies the method to be used. ! ! Output, character ( len = 10 ) ORDER_LIST(ORDER_NUM), the list of ! available methods. ! ! Output, integer ORDER_NUM, the number of available methods. ! ! Output, logical TIME_SHOW, is TRUE if the execution time is to be shown. ! implicit none ! integer order_max ! character ( len = 6 ) command logical cshow logical flop_show integer lda character ( len = 7 ) lingo logical lnshow logical lda_show character ( len = 10 ) machine logical machine_show integer n integer nhi integer ninc integer nlo integer nmult logical noshow integer nrep logical nrep_show logical nshow character ( len = 10 ) order character ( len = 10 ) order_list(order_max) integer order_num logical s_eqi logical time_show ! command = ' ' cshow = .true. flop_show = .true. lda = 16 lingo = 'F90' lnshow = .true. lda_show = .true. ! machine = 'Macintosh' ! machine = 'Mac/6881' ! machine = 'PowerMac' ! machine = 'CM-2' ! machine = 'Cray C90' ! machine = 'Cray YMP' ! machine = 'DEC Alpha' ! machine = 'DECstation' ! machine = 'IBM PC' machine = 'SGI/IRIS' ! machine = 'SUN' ! machine = 'VAX/VMS' ! machine = 'VECTOR/VAX' ! machine_show = .true. n = 16 nhi = n ninc = 0 nlo = n nmult = 1 noshow = .true. nrep = 1 nrep_show = .false. nshow = .true. order = 'R_IJK' order_num = 0 order_num = order_num + 1 order_list(order_num) = 'C_IJK' order_num = order_num + 1 order_list(order_num) = 'C_MATMUL' order_num = order_num + 1 order_list(order_num) = 'D_IJK' order_num = order_num + 1 order_list(order_num) = 'D_MATMUL' order_num = order_num + 1 order_list(order_num) = 'I_IJK' order_num = order_num + 1 order_list(order_num) = 'I_MATMUL' if ( s_eqi ( machine(1:4), 'CRAY') ) then order_num = order_num + 1 order_list(order_num) = 'I_IJK_46' end if order_num = order_num + 1 order_list(order_num) = 'L_IJK' order_num = order_num + 1 order_list(order_num) = 'R_DOT_PRODUCT' order_num = order_num + 1 order_list(order_num) = 'R_IJ' order_num = order_num + 1 order_list(order_num) = 'R_IJK' order_num = order_num + 1 order_list(order_num) = 'R_IJK_I2' order_num = order_num + 1 order_list(order_num) = 'R_IJK_I4' order_num = order_num + 1 order_list(order_num) = 'R_IJK_I8' order_num = order_num + 1 order_list(order_num) = 'R_IJK_J4' order_num = order_num + 1 order_list(order_num) = 'R_IJK_K4' if ( s_eqi ( machine(1:4), 'CRAY') ) then order_num = order_num + 1 order_list(order_num) = 'R_IJK_M' end if if ( s_eqi ( machine(1:4), 'CRAY') ) then order_num = order_num + 1 order_list(order_num) = 'R_IJK_S' end if order_num = order_num + 1 order_list(order_num) = 'R_IKJ' order_num = order_num + 1 order_list(order_num) = 'R_JIK' order_num = order_num + 1 order_list(order_num) = 'R_JKI' order_num = order_num + 1 order_list(order_num) = 'R_KIJ' order_num = order_num + 1 order_list(order_num) = 'R_KJI' if ( s_eqi ( machine, 'SGI/IRIS' ) ) then order_num = order_num + 1 order_list(order_num) = 'R_KJI_M' end if order_num = order_num + 1 order_list(order_num) = 'R_MATMUL' if ( s_eqi ( machine(1:4), 'CRAY') ) then order_num = order_num + 1 order_list(order_num) = 'R_MXMA' end if if ( s_eqi ( machine(1:4), 'CRAY') .or. s_eqi ( machine, 'SGI/IRIS' ) ) then order_num = order_num + 1 order_list(order_num) = 'R_SAXPYC' end if if ( s_eqi ( machine(1:4), 'CRAY') .or. s_eqi ( machine, 'SGI/IRIS' ) ) then order_num = order_num + 1 order_list(order_num) = 'R_SAXPYR' end if if ( s_eqi ( machine(1:4), 'CRAY') .or. s_eqi ( machine, 'SGI/IRIS' ) ) then order_num = order_num + 1 order_list(order_num) = 'R_SDOT' end if if ( s_eqi ( machine(1:4), 'CRAY') .or. s_eqi ( machine, 'SGI/IRIS' ) ) then order_num = order_num + 1 order_list(order_num) = 'R_SGEMM' end if if ( s_eqi ( machine(1:4), 'CRAY') ) then order_num = order_num + 1 order_list(order_num) = 'R_SGEMMS' end if order_num = order_num + 1 order_list(order_num) = 'R_TAXPYC' order_num = order_num + 1 order_list(order_num) = 'R_TAXPYR' order_num = order_num + 1 order_list(order_num) = 'R_TDOT' order_num = order_num + 1 order_list(order_num) = 'R_TGEMM' time_show = .true. return end subroutine i_matmul ( lda, n, nrep, ttime ) ! !******************************************************************************* ! !! I_MATMUL computes A = B*C using FORTRAN 90 MATMUL and integer arithmetic. ! ! ! Modified: ! ! 21 March 2001 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, integer N, the number of rows and columns in the matrices. ! ! Input, integer NREP, the number of repetitions to carry out. ! ! Output, real TTIME, an estimate of the CPU time in seconds required ! for the matrix multiplications. ! implicit none ! integer lda integer n ! integer a(lda,n) integer b(lda,n) integer c(lda,n) integer irep integer nrep real time1 real time2 real ttime ! ttime = 0.0E+00 do irep = 1, nrep call i_set ( a, b, c, n, n ) call matmul_cpu_timer ( time1 ) a(1:n,1:n) = matmul ( b(1:n,1:n), c(1:n,1:n) ) call matmul_cpu_timer ( time2 ) ttime = ttime + time2 - time1 end do return end subroutine i_ijk ( lda, n, nrep, ttime ) !DIR$ integer = 64 ! ! The above line is a Cray compiler directive, which requests that ! integers be stored as 64 bit quantities, and that 64 bit integer ! arithmetic be used. ! !******************************************************************************* ! !! I_IJK multiplies A = B*C using index order IJK, using integer arithmetic. ! ! ! Modified: ! ! 28 August 1999 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, integer LDA, the leading dimension used for arrays. ! ! Input, integer N, the number of rows and columns in the matrices. ! ! Input, integer NREP, the number of repetitions to carry out. ! ! Output, real TTIME, an estimate of the CPU time in seconds required ! for the matrix multiplications. ! implicit none ! integer lda integer n ! integer a(lda,n) integer b(lda,n) integer c(lda,n) integer i integer irep integer j integer k integer nrep real time1 real time2 real ttime ! ttime = 0.0E+00 do irep = 1, nrep call i_set ( a, b, c, lda, n ) call matmul_cpu_timer ( time1 ) do i = 1, n do j = 1, n do k = 1, n a(i,k) = a(i,k) + b(i,j) * c(j,k) end do end do end do call matmul_cpu_timer ( time2 ) ttime = ttime + time2 - time1 end do return end subroutine i_ijk_46 ( lda, n, nrep, ttime ) !DIR$ integer = 46 ! ! The above line is a Cray compiler directive, which requests that ! integers be stored as 46 bit quantities, and that 46 bit integer ! arithmetic be used. ! !******************************************************************************* ! !! I_IJK_46 multiplies A = B*C using index order IJK, and 46 bit integer arithmetic. ! ! ! Modified: ! ! 28 August 1999 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, integer LDA, the leading dimension used for arrays. ! ! Input, integer N, the number of rows and columns in the matrices. ! ! Input, integer NREP, the number of repetitions to carry out. ! ! Output, real TTIME, an estimate of the CPU time in seconds required ! for the matrix multiplications. ! implicit none ! integer lda integer n ! integer a(lda,n) integer b(lda,n) integer c(lda,n) integer i integer irep integer j integer k integer nrep real time1 real time2 real ttime ! ttime = 0.0E+00 do irep = 1, nrep call i_set ( a, b, c, lda, n ) call matmul_cpu_timer ( time1 ) do i = 1, n do j = 1, n do k = 1, n a(i,k) = a(i,k) + b(i,j) * c(j,k) end do end do end do call matmul_cpu_timer ( time2 ) ttime = ttime + time2 - time1 end do return end subroutine i_set ( a, b, c, lda, n ) ! !******************************************************************************* ! !! I_SET initializes the integer A, B and C matrices. ! ! ! Modified: ! ! 28 August 1999 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Workspace, integer A(LDA,N), B(LDA,N), C(LDA,N), the three matrices ! used in the multiplication. ! ! Input, integer LDA, the leading dimension used for arrays. ! ! Input, integer N, the number of rows and columns in the matrices. ! implicit none ! integer lda integer n ! integer a(lda,n) integer b(lda,n) integer c(lda,n) ! a(1:n,1:n) = 0 b(1:n,1:n) = 1 c(1:n,1:n) = 1 return end subroutine l_ijk ( lda, n, nrep, ttime ) ! !******************************************************************************* ! !! L_IJK "multiplies" A = B*C using index order IJK, using logical data. ! ! ! Modified: ! ! 28 August 1999 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, integer LDA, the leading dimension used for arrays. ! ! Input, integer N, the number of rows and columns in the matrices. ! ! Input, integer NREP, the number of repetitions to carry out. ! ! Output, real TTIME, an estimate of the CPU time in seconds required ! for the matrix multiplications. ! implicit none ! integer lda integer n ! logical a(lda,n) logical b(lda,n) logical c(lda,n) integer i integer irep integer j integer k integer nrep real time1 real time2 real ttime ! ttime = 0.0E+00 do irep = 1, nrep call l_set ( a, b, c, lda, n ) call matmul_cpu_timer ( time1 ) do i = 1, n do j = 1, n do k = 1, n a(i,k) = a(i,k) .or. ( b(i,j) .and. c(j,k) ) end do end do end do call matmul_cpu_timer ( time2 ) ttime = ttime + time2 - time1 end do return end subroutine l_set ( a, b, c, lda, n ) ! !******************************************************************************* ! !! L_SET initializes the logical A, B and C matrices. ! ! ! Modified: ! ! 04 February 2001 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Output, logical A(LDA,N), B(LDA,N), C(LDA,N), the three matrices ! used in the multiplication. ! ! Input, integer LDA, the leading dimension used for arrays. ! ! Input, integer N, the number of rows and columns in the matrices. ! implicit none ! integer lda integer n ! logical a(lda,n) logical b(lda,n) logical c(lda,n) ! a(1:n,1:n) = .false. b(1:n,1:n) = .true. c(1:n,1:n) = .true. return end subroutine matmul_cpu_timer ( cpu ) ! !******************************************************************************* ! !! MATMUL_CPU_TIMER computes total CPU seconds. ! ! ! Modified: ! ! 22 March 2001 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Output, real CPU, the total CPU time, in seconds, since the ! program began running. ! implicit none ! real cpu ! call cpu_time ( cpu ) return end subroutine matmul_real_timer ( seconds ) ! !******************************************************************************* ! !! MATMUL_REAL_TIMER returns a reading of the real time clock. ! ! ! Modified: ! ! 22 March 2001 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Output, real SECONDS, a current real time in seconds. ! implicit none ! integer clock_count integer clock_max integer clock_rate real seconds ! call system_clock ( clock_count, clock_rate, clock_max ) seconds = real ( clock_count ) / real ( clock_rate ) return end subroutine mult ( cshow, flop_show, lda, lingo, lnshow, lda_show, machine, & machine_show, n, noshow, nrep, nrep_show, nshow, order, order_list, & order_num, output, time_show ) ! !******************************************************************************* ! !! MULT carries out the matrix multiplication, using the requested method. ! ! ! Modified: ! ! 06 May 2000 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, logical CSHOW, is TRUE if the multiplication method ! is to be shown. ! ! Input, logical FLOP_SHOW, is TRUE if the MegaFLOP rate is to be shown. ! ! Input, integer LDA, the leading dimension used for arrays. ! ! Input, character ( len = 7 ) LINGO, the language in which MATMUL is ! written. ! ! Input, logical LNSHOW, is TRUE if the software's programming ! language, stored in LINGO, is to be shown. ! ! Input, logical LDA_SHOW, is TRUE if the variable LDA is to be shown. ! ! Input, character ( len = 10 ) MACHINE, the computer where MATMUL is ! running. ! ! Input, logical MACHINE_SHOW, is TRUE if the machine name is to be shown. ! ! Input, integer N, the number of rows and columns in the matrices. ! ! Input, logical NOSHOW, is TRUE if the number of operations is ! to be shown. ! ! Input, integer NREP, the number of repetitions to carry out. ! ! Input, logical NREP_SHOW, is TRUE if the number of repetitions is ! to be shown. ! ! Input, logical NSHOW, is TRUE if the matrix size N is to be shown. ! ! Input, character ( len = 10 ) ORDER, specifies the method to be used. ! ! Input, character ( len = 10 ) ORDER_LIST(ORDER_NUM), the list of ! available methods. ! ! Input, integer ORDER_NUM, the number of available methods. ! ! Output, character ( len = 82 ) OUTPUT, a string containing the data for ! this multiplication. ! ! Input, logical TIME_SHOW, is TRUE if the execution time is to be shown. ! implicit none ! integer order_num ! integer lda integer n ! logical cshow logical flop_show integer i character ( len = 7 ) lingo logical lnshow logical lda_show character ( len = 10 ) machine logical machine_show logical noshow integer nrep logical nrep_show logical nshow character ( len = 10 ) order character ( len = 10 ) order_list(order_num) character ( len = 82 ) output logical s_eqi logical time_show real ttime ! call header ( cshow, flop_show, lnshow, lda_show, machine_show, noshow, & nrep_show, nshow, output, time_show ) if ( s_eqi ( order, 'ALL' ) ) then do i = 1, order_num order = order_list(i) call domethod ( lda, n, nrep, order, ttime ) call report ( cshow, flop_show, lda, lda_show, lingo, lnshow, machine, & machine_show, n, noshow, nrep, nrep_show, nshow, order, output, & time_show, ttime ) end do order = 'ALL' else call domethod ( lda, n, nrep, order, ttime ) call report ( cshow, flop_show, lda, lda_show, lingo, lnshow, machine, & machine_show, n, noshow, nrep, nrep_show, nshow, order, output, & time_show, ttime ) end if return end subroutine nstep ( ido, n, nhi, ninc, nlo, nmult ) ! !******************************************************************************* ! !! NSTEP is used when a set of values of N is being generated. ! ! ! Discussion: ! ! The routine checks whether addition or multiplication is being used, ! and increases the value of N. It also checks whether the set of ! values is done, or whether the input values are inconsistent. ! ! Modified: ! ! 28 August 1999 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Output, integer IDO. ! ! 0, N has reached or surpassed NHI, and no further ! increase should be carried out. N has been reset to NLO. ! ! 1, N had not reached or surpassed NHI, and so N has been ! incremented by NINC, or multiplied by NMULT. ! ! 2, N had not reached or surpassed NHI, but we're never ! going to get there! Either: ! ! NINC is negative but NHI is greater than NLO, or ! ! NINC is positive but NHI is less than NLO, or ! ! NMULT is positive but NLO is greater than NHI. ! ! 3, NINC is 0 and NMULT is less than or equal to 1. ! ! Input/output, integer N, the number of rows and columns in the matrices. ! This routine modifies the value of N as appropriate. ! ! Input, integer NHI, the maximum value of N to use. ! ! Input, integer NINC, the additive increment to use, if additive ! steps are being taken. ! ! Input, integer NLO, the smallest value of N to use. ! ! Input, integer NMULT, the multiplier to use, if multiplicative ! steps are being taken. ! implicit none ! integer ido integer n integer nhi integer ninc integer nlo integer nmult ! ! If NINC is not 0, then ! if it's pointing in the right direction, then ! add NINC to N, ! set a continuation flag ! if N+NINC exceeds NHI, then ! reset N, and ! set a completion flag ! else ! set an error flag. ! if ( ninc /= 0 ) then if ( ( nlo < nhi .and. ninc > 0 ) .or. & ( nlo > nhi .and. ninc < 0 ) ) then n = n + ninc ido = 1 if ( ( n > nhi .and. nhi >= nlo ) .or. & ( n < nhi .and. nhi <= nlo ) ) then ido = 0 n = nlo end if else ido = 2 end if return end if ! ! If NMULT is greater than 1, then ! if it's pointing in the right direction, then ! multiply N by NMULT, ! set a continuation flag ! if N*NMULT exceeds NHI, then ! reset N, and ! set a completion flag ! else ! set an error flag. ! if ( nmult > 1 ) then if ( nlo < nhi ) then n = n * nmult ido = 1 if ( ( n > nhi .and. nhi >= nlo ) .or. & ( n < nhi .and. nhi <= nlo ) ) then ido = 0 n = nlo end if else ido = 2 end if return end if ! ! NINC was 0, and NMULT wasn't greater than 1. ! ido = 3 return end subroutine order_get ( string, ierror, order, order_list, order_num ) ! !******************************************************************************* ! !! ORDER_GET reads a new value of order from the user. ! ! ! Modified: ! ! 06 May 2000 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, character ( len = * ) STRING, a string of characters, containing ! the user's choice for the matrix multiplication method to employ. ! ! Output, integer IERROR, an error flag. ! 0, no error occurred. ! nonzero, an error occurred and the operation could not be completed. ! ! Output, character ( len = 10 ) ORDER, specifies the method to be used. ! ! Input, character ( len = 10 ) ORDER_LIST(ORDER_NUM), the list of ! available methods. ! ! Input, integer ORDER_NUM, the number of available methods. ! implicit none ! integer order_num ! integer i integer ierror character ( len = 10 ) order character ( len = 10 ) order_list(order_num) logical s_eqi character ( len = * ) string ! ierror = 0 if ( s_eqi ( string, 'ALL' ) ) then order = string write ( *, '(a)' ) ' ' write ( *, '(a)' ) 'The order has been set to '// trim ( order ) return end if do i = 1, order_num if ( s_eqi ( string, order_list(i) ) ) then order = string write ( *, '(a)' ) ' ' write ( *, '(a)' ) 'The order has been set to ' // trim ( order ) return end if end do write ( *, '(a)' ) ' ' write ( *, '(a)' ) 'The order you requested was not valid:' write ( *, '(a)' ) ' "' // trim ( string ) // '"' ierror = 1 return return end subroutine order_list_print ( order_list, order_num ) ! !******************************************************************************* ! !! ORDER_LIST_PRINT prints the list of choices for the algorithm. ! ! ! Modified: ! ! 06 May 2000 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, character ( len = 10 ) ORDER_LIST(ORDER_NUM), the list of ! available methods. ! ! Input, integer ORDER_NUM), the number of available methods. ! implicit none ! integer order_num ! integer i character ( len = * ) order_list(order_num) ! write ( *, '(a)' ) ' ' write ( *, '(a)' ) 'Valid choices for the order are:' write ( *, '(a)' ) ' ' write ( *, '(5a10)' ) ( order_list(i), i = 1, order_num ) write ( *, '(a)' ) ' ' write ( *, '(a)' ) 'or "ALL" to try them all.' return end subroutine printr ( lda, lingo, n, nhi, ninc, nlo, nmult, nrep, order ) ! !******************************************************************************* ! !! PRINTR prints out those parameters the user wants to see. ! ! ! Discussion: ! ! These parameters include: ! ! the language MATMUL is written in, ! the algorithm, ! the leading dimension, ! the maximum allowable dimension, ! the actual size of arrays, ! the number of multiplications carried out. ! ! Modified: ! ! 28 August 1999 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, integer LDA, the leading dimension used for arrays. ! ! Input, character ( len = 7 ) LINGO, the language in which MATMUL was written. ! ! Input, integer N, the number of rows and columns in the matrices. ! ! Input, integer NHI, the maximum value of N to use. ! ! Input, integer NINC, the additive increment to use, if additive ! steps are being taken. ! ! Input, integer NLO, the smallest value of N to use. ! ! Input, integer NMULT, the multiplier to use, if multiplicative ! steps are being taken. ! ! Input, integer NREP, the number of repetitions to carry out. ! ! Input, character ( len = 10 ) ORDER, specifies the method to be used. ! implicit none ! integer lda character ( len = 7 ) lingo integer n integer nhi integer ninc integer nlo integer nmult integer nrep character ( len = 10 ) order ! write ( *, '(a)' ) ' ' write ( *, '(a)' ) 'This version of MATMUL is written in ' // trim ( lingo ) write ( *, '(a)' ) 'The algorithm chosen is ' // trim ( order ) write ( *, '(a,i6)' ) 'The leading dimension of arrays, LDA, is ', lda write ( *, '(a,i6)' ) 'The actual size of the arrays, N, is ', n if ( nhi /= nlo ) then write ( *, '(a)' ) 'Several problem sizes will be solved in order.' write ( *, '(a,i6)' ) 'The final size of arrays, NHI, will be ', nhi end if if ( ninc /= 0 ) then write ( *, '(a,i6)' ) 'Array size will be incremented by NINC = ', ninc end if if ( nmult /= 1 ) then write ( *, '(a,i6)' ) 'Array size will be multiplied by NMULT = ', nmult end if if ( nrep /= 1 ) then write ( *, '(a,i6)' ) 'Multiplications repeated NREP = ', nrep, ' times.' end if return end subroutine r_dot_product ( lda, n, nrep, ttime ) ! !******************************************************************************* ! !! R_DOT_PRODUCT multiplies A = B*C using the FORTRAN90 DOT_PRODUCT function. ! ! ! Discussion: ! ! This is equivalent to the following "IKJ" code: ! ! do i = 1, n ! do k = 1, n ! do j = 1, n ! a(i,k) = a(i,k)+b(i,j)*c(j,k) ! end do ! end do ! end do ! ! Modified: ! ! 21 March 2001 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, integer LDA, the leading dimension used for arrays. ! ! Input, integer N, the number of rows and columns in the matrices. ! ! Input, integer NREP, the number of repetitions to carry out. ! ! Output, real TTIME, an estimate of the CPU time in seconds required ! for the matrix multiplications. ! implicit none ! integer lda integer n ! real a(lda,n) real b(lda,n) real c(lda,n) integer i integer irep integer k integer nrep real sdot real time1 real time2 real ttime ! ttime = 0.0E+00 do irep = 1, nrep call r_set ( a, b, c, lda, n ) call matmul_cpu_timer ( time1 ) do i = 1, n do k = 1, n a(i,k) = dot_product ( b(i,1:n), c(1:n,k) ) end do end do call matmul_cpu_timer ( time2 ) ttime = ttime + time2 - time1 end do return end subroutine r_matmul ( lda, n, nrep, ttime ) ! !******************************************************************************* ! !! R_MATMUL computes A = B*C using FORTRAN 90 MATMUL and real arithmetic. ! ! ! Modified: ! ! 21 March 2001 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, integer N, the number of rows and columns in the matrices. ! ! Input, integer NREP, the number of repetitions to carry out. ! ! Output, real TTIME, an estimate of the CPU time in seconds required ! for the matrix multiplications. ! implicit none ! integer lda integer n ! real a(lda,n) real b(lda,n) real c(lda,n) integer irep integer nrep real time1 real time2 real ttime ! ttime = 0.0E+00 do irep = 1, nrep call r_set ( a, b, c, n, n ) call matmul_cpu_timer ( time1 ) a(1:n,1:n) = matmul ( b(1:n,1:n), c(1:n,1:n) ) call matmul_cpu_timer ( time2 ) ttime = ttime + time2 - time1 end do return end subroutine r_ij ( lda, n, nrep, ttime ) ! !******************************************************************************* ! !! R_IJ multiplies A = B*C using index order IJ with implicit K. ! ! ! Modified: ! ! 22 March 2001 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, integer LDA, the leading dimension used for arrays. ! ! Input, integer N, the number of rows and columns in the matrices. ! ! Input, integer NREP, the number of repetitions to carry out. ! ! Output, real TTIME, an estimate of the CPU time in seconds required ! for the matrix multiplications. ! implicit none ! integer lda integer n ! real a(lda,n) real b(lda,n) real c(lda,n) integer i integer irep integer j integer nrep real time1 real time2 real ttime ! ttime = 0.0E+00 do irep = 1, nrep call r_set ( a, b, c, lda, n ) call matmul_cpu_timer ( time1 ) do i = 1, n do j = 1, n a(i,1:n) = a(i,1:n) + b(i,j) * c(j,1:n) end do end do call matmul_cpu_timer ( time2 ) ttime = ttime + time2 - time1 end do return end subroutine r_ijk ( lda, n, nrep, ttime ) ! !******************************************************************************* ! !! R_IJK multiplies A = B*C using index order IJK. ! ! ! Modified: ! ! 28 August 1999 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, integer LDA, the leading dimension used for arrays. ! ! Input, integer N, the number of rows and columns in the matrices. ! ! Input, integer NREP, the number of repetitions to carry out. ! ! Output, real TTIME, an estimate of the CPU time in seconds required ! for the matrix multiplications. ! implicit none ! integer lda integer n ! real a(lda,n) real b(lda,n) real c(lda,n) integer i integer irep integer j integer k integer nrep real time1 real time2 real ttime ! ttime = 0.0E+00 do irep = 1, nrep call r_set ( a, b, c, lda, n ) call matmul_cpu_timer ( time1 ) do i = 1, n do j = 1, n do k = 1, n a(i,k) = a(i,k) + b(i,j) * c(j,k) end do end do end do call matmul_cpu_timer ( time2 ) ttime = ttime + time2 - time1 end do return end subroutine r_ijk_m ( lda, n, nrep, ttime ) ! !******************************************************************************* ! !! R_IJK_M multiplies A = B*C using index order IJK. ! ! ! Discussion: ! ! The routine uses a Cray directive to run the triple loop using ! multitasking. The benefit of such a directive depends on the ! algorithm and the load on the machine. ! ! Except on the Cray, this routine should not be used, and in ! particular, the call to TIMEF should be commented out. ! ! Note that the Cray routine TIMEF must be called, rather than ! SECOND. TIMEF reports elapsed "real" time or "wallclock" time, ! which should go down with multitasking, whereas CPU time should ! remain roughly constant. ! ! In order for parallel processing to occur, this routine must be ! compiled on the Cray with the directive "-Zu"; moreover, the user must ! set the environment variable NCPUS to the number of processors the ! user would like. For instance, a C shell user would type: ! ! setenv NCPUS 8 ! ! while a Bourne shell user would type ! ! NCPUS = 8 ! export NCPUS ! ! Modified: ! ! 28 August 1999 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, integer LDA, the leading dimension used for arrays. ! ! Input, integer N, the number of rows and columns in the matrices. ! ! Input, integer NREP, the number of repetitions to carry out. ! ! Output, real TTIME, an estimate of the CPU time in seconds required ! for the matrix multiplications. ! implicit none ! integer lda integer n ! real a(lda,n) real b(lda,n) real c(lda,n) integer i integer irep integer j integer k integer nrep real time1 real time2 real ttime ! time1 = 0.0E+00 time2 = 0.0E+00 ttime = 0.0E+00 do irep = 1, nrep call r_set ( a, b, c, lda, n ) ! ! call timef ( time1 ) ! !MIC$ do GLOBAL ! do i = 1, n do j = 1, n do k = 1, n a(i,k) = a(i,k) + b(i,j) * c(j,k) end do end do end do ! call timef ( time2 ) ttime = ttime + ( time2 - time1 ) / 1000.0E+00 end do return end subroutine r_ijk_s ( lda, n, nrep, ttime ) ! !******************************************************************************* ! !! R_IJK_S multiplies A = B*C using index order IJK, and no Cray vectorization. ! ! ! Discussion: ! ! The routine uses a Cray directive to run the inner do loop WITHOUT ! vectorization. ! ! Modified: ! ! 28 August 1999 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, integer LDA, the leading dimension used for arrays. ! ! Input, integer N, the number of rows and columns in the matrices. ! ! Input, integer NREP, the number of repetitions to carry out. ! ! Output, real TTIME, an estimate of the CPU time in seconds required ! for the matrix multiplications. ! implicit none ! integer lda integer n ! real a(lda,n) real b(lda,n) real c(lda,n) integer i integer irep integer j integer k integer nrep real time1 real time2 real ttime ! ttime = 0.0E+00 do irep = 1, nrep call r_set ( a, b, c, lda, n ) call matmul_cpu_timer ( time1 ) do i = 1, n do j = 1, n !DIR$ NEXTSCALAR do k = 1, n a(i,k) = a(i,k) + b(i,j) * c(j,k) end do end do end do call matmul_cpu_timer ( time2 ) ttime = ttime + time2 - time1 end do return end subroutine r_ijk_i2 ( lda, n, nrep, ttime ) ! !******************************************************************************* ! !! R_IJK_I2 multiplies A = B*C using index order IJK and unrolling I 2 times. ! ! ! Modified: ! ! 28 August 1999 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, integer LDA, the leading dimension used for arrays. ! ! Input, integer N, the number of rows and columns in the matrices. ! ! Input, integer NREP, the number of repetitions to carry out. ! ! Output, real TTIME, an estimate of the CPU time in seconds required ! for the matrix multiplications. ! implicit none ! integer lda integer n ! real a(lda,n) real b(lda,n) real c(lda,n) integer i integer ihi integer irep integer j integer k integer nrep integer, parameter :: nroll = 2 real time1 real time2 real ttime ! ttime = 0.0E+00 do irep = 1, nrep call r_set ( a, b, c, lda, n ) call matmul_cpu_timer ( time1 ) ihi = ( n / nroll ) * nroll do i = 1, ihi, nroll do j = 1, n do k = 1, n a(i,k) = a(i,k) + b(i,j) * c(j,k) a(i+1,k) = a(i+1,k) + b(i+1,j) * c(j,k) end do end do end do ! ! Take care of the few cases we missed if N is not a multiple of NROLL. ! do i = ihi+1, n do j = 1, n do k = 1, n a(i,k) = a(i,k) + b(i,j) * c(j,k) end do end do end do call matmul_cpu_timer ( time2 ) ttime = ttime + time2 - time1 end do return end subroutine r_ijk_i4 ( lda, n, nrep, ttime ) ! !******************************************************************************* ! !! R_IJK_I4 multiplies A = B*C using index order IJK and unrolling I 4 times. ! ! ! Modified: ! ! 28 August 1999 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, integer LDA, the leading dimension used for arrays. ! ! Input, integer N, the number of rows and columns in the matrices. ! ! Input, integer NREP, the number of repetitions to carry out. ! ! Output, real TTIME, an estimate of the CPU time in seconds required ! for the matrix multiplications. ! implicit none ! integer lda integer n ! real a(lda,n) real b(lda,n) real c(lda,n) integer i integer ihi integer irep integer j integer k integer nrep integer, parameter :: nroll = 4 real time1 real time2 real ttime ! ttime = 0.0E+00 do irep = 1, nrep call r_set ( a, b, c, lda, n ) call matmul_cpu_timer ( time1 ) ihi = ( n / nroll ) * nroll do i = 1, ihi, nroll do j = 1, n do k = 1, n a(i,k) = a(i,k) + b(i,j) * c(j,k) a(i+1,k) = a(i+1,k) + b(i+1,j) * c(j,k) a(i+2,k) = a(i+2,k) + b(i+2,j) * c(j,k) a(i+3,k) = a(i+3,k) + b(i+3,j) * c(j,k) end do end do end do ! ! Take care of the few cases we missed if N is not a multiple of NROLL. ! do i = ihi+1, n do j = 1, n do k = 1, n a(i,k) = a(i,k) + b(i,j) * c(j,k) end do end do end do call matmul_cpu_timer ( time2 ) ttime = ttime + time2 - time1 end do return end subroutine r_ijk_i8 ( lda, n, nrep, ttime ) ! !******************************************************************************* ! !! R_IJK_I8 multiplies A = B*C using index order IJK and unrolling I 8 times. ! ! ! Modified: ! ! 08 May 2000 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, integer LDA, the leading dimension used for arrays. ! ! Input, integer N, the number of rows and columns in the matrices. ! ! Input, integer NREP, the number of repetitions to carry out. ! ! Output, real TTIME, an estimate of the CPU time in seconds required ! for the matrix multiplications. ! implicit none ! integer lda integer n ! real a(lda,n) real b(lda,n) real c(lda,n) integer i integer ihi integer irep integer j integer k integer nrep integer, parameter :: nroll = 8 real time1 real time2 real ttime ! ttime = 0.0E+00 do irep = 1, nrep call r_set ( a, b, c, lda, n ) call matmul_cpu_timer ( time1 ) ihi = ( n / nroll ) * nroll do i = 1, ihi, nroll do j = 1, n do k = 1, n a(i,k) = a(i,k) + b(i,j) * c(j,k) a(i+1,k) = a(i+1,k) + b(i+1,j) * c(j,k) a(i+2,k) = a(i+2,k) + b(i+2,j) * c(j,k) a(i+3,k) = a(i+3,k) + b(i+3,j) * c(j,k) a(i+4,k) = a(i+4,k) + b(i+4,j) * c(j,k) a(i+5,k) = a(i+5,k) + b(i+5,j) * c(j,k) a(i+6,k) = a(i+6,k) + b(i+6,j) * c(j,k) a(i+7,k) = a(i+7,k) + b(i+7,j) * c(j,k) end do end do end do ! ! Take care of the few cases we missed if N is not a multiple of NROLL. ! do i = ihi+1, n do j = 1, n do k = 1, n a(i,k) = a(i,k) + b(i,j) * c(j,k) end do end do end do call matmul_cpu_timer ( time2 ) ttime = ttime + time2 - time1 end do return end subroutine r_ijk_j4 ( lda, n, nrep, ttime ) ! !******************************************************************************* ! !! R_IJK_J4 multiplies A = B*C using index order IJK, and unrolling on J. ! ! ! Modified: ! ! 28 August 1999 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, integer LDA, the leading dimension used for arrays. ! ! Input, integer N, the number of rows and columns in the matrices. ! ! Input, integer NREP, the number of repetitions to carry out. ! ! Output, real TTIME, an estimate of the CPU time in seconds required ! for the matrix multiplications. ! implicit none ! integer lda integer n ! real a(lda,n) real b(lda,n) real c(lda,n) integer i integer irep integer j integer jhi integer k integer nrep integer, parameter :: nroll = 4 real time1 real time2 real ttime ! ttime = 0.0E+00 do irep = 1, nrep call r_set ( a, b, c, lda, n ) call matmul_cpu_timer ( time1 ) jhi = ( n / nroll ) * nroll do i = 1, n do j = 1, jhi, nroll do k = 1, n a(i,k) = a(i,k) + b(i,j ) * c(j,k) + b(i,j+1) * c(j+1,k) & + b(i,j+2) * c(j+2,k) + b(i,j+3) * c(j+3,k) end do end do end do ! ! Take care of the few cases we missed if N is not a multiple of NROLL. ! do i = 1, n do j = jhi+1, n do k = 1, n a(i,k) = a(i,k) + b(i,j) * c(j,k) end do end do end do call matmul_cpu_timer ( time2 ) ttime = ttime + time2 - time1 end do return end subroutine r_ijk_k4 ( lda, n, nrep, ttime ) ! !******************************************************************************* ! !! R_IJK_K4 multiplies A = B*C using index order IJK and unrolling on K. ! ! ! Modified: ! ! 28 August 1999 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, integer LDA, the leading dimension used for arrays. ! ! Input, integer N, the number of rows and columns in the matrices. ! ! Input, integer NREP, the number of repetitions to carry out. ! ! Output, real TTIME, an estimate of the CPU time in seconds required ! for the matrix multiplications. ! implicit none ! integer lda integer n ! real a(lda,n) real b(lda,n) real c(lda,n) integer i integer irep integer j integer k integer khi integer nrep integer, parameter :: nroll = 4 real time1 real time2 real ttime ! ttime = 0.0E+00 do irep = 1, nrep call r_set ( a, b, c, lda, n ) call matmul_cpu_timer ( time1 ) khi = ( n / nroll ) * nroll do i = 1, n do j = 1, n do k = 1, khi, nroll a(i,k) = a(i,k) + b(i,j) * c(j,k) a(i,k+1) = a(i,k+1) + b(i,j) * c(j,k+1) a(i,k+2) = a(i,k+2) + b(i,j) * c(j,k+2) a(i,k+3) = a(i,k+3) + b(i,j) * c(j,k+3) end do end do end do ! ! Take care of the few cases we missed if N is not a multiple of NROLL. ! do i = 1, n do j = 1, n do k = khi+1, n a(i,k) = a(i,k) + b(i,j) * c(j,k) end do end do end do call matmul_cpu_timer ( time2 ) ttime = ttime + time2 - time1 end do return end subroutine r_ikj ( lda, n, nrep, ttime ) ! !******************************************************************************* ! !! R_IKJ multiplies A = B*C using index order IKJ. ! ! ! Modified: ! ! 28 August 1999 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, integer LDA, the leading dimension used for arrays. ! ! Input, integer N, the number of rows and columns in the matrices. ! ! Input, integer NREP, the number of repetitions to carry out. ! ! Output, real TTIME, an estimate of the CPU time in seconds required ! for the matrix multiplications. ! implicit none ! integer lda integer n ! real a(lda,n) real b(lda,n) real c(lda,n) integer i integer irep integer j integer k integer nrep real time1 real time2 real ttime ! ttime = 0.0E+00 do irep = 1, nrep call r_set ( a, b, c, lda, n ) call matmul_cpu_timer ( time1 ) do i = 1, n do k = 1, n do j = 1, n a(i,k) = a(i,k) + b(i,j) * c(j,k) end do end do end do call matmul_cpu_timer ( time2 ) ttime = ttime + time2 - time1 end do return end subroutine r_jik ( lda, n, nrep, ttime ) ! !******************************************************************************* ! !! R_JIK multiplies A = B*C using index order JIK. ! ! ! Modified: ! ! 28 August 1999 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, integer LDA, the leading dimension used for arrays. ! ! Input, integer N, the number of rows and columns in the matrices. ! ! Input, integer NREP, the number of repetitions to carry out. ! ! Output, real TTIME, an estimate of the CPU time in seconds required ! for the matrix multiplications. ! implicit none ! integer lda integer n ! real a(lda,n) real b(lda,n) real c(lda,n) integer i integer irep integer j integer k integer nrep real time1 real time2 real ttime ! ttime = 0.0E+00 do irep = 1, nrep call r_set ( a, b, c, lda, n ) call matmul_cpu_timer ( time1 ) do j = 1, n do i = 1, n do k = 1, n a(i,k) = a(i,k) + b(i,j) * c(j,k) end do end do end do call matmul_cpu_timer ( time2 ) ttime = ttime + time2 - time1 end do return end subroutine r_jki ( lda, n, nrep, ttime ) ! !******************************************************************************* ! !! R_JKI multiplies A = B*C using index order JKI. ! ! ! Modified: ! ! 28 August 1999 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Workspace, real A(LDA,N), B(LDA,N), C(LDA,N), the three matrices ! used in the multiplication. ! ! Input, integer LDA, the leading dimension used for arrays. ! ! Input, integer N, the number of rows and columns in the matrices. ! ! Input, integer NREP, the number of repetitions to carry out. ! ! Output, real TTIME, an estimate of the CPU time in seconds required ! for the matrix multiplications. ! implicit none ! integer lda integer n ! real a(lda,n) real b(lda,n) real c(lda,n) integer i integer irep integer j integer k integer nrep real time1 real time2 real ttime ! ttime = 0.0E+00 do irep = 1, nrep call r_set ( a, b, c, lda, n ) call matmul_cpu_timer ( time1 ) do j = 1, n do k = 1, n do i = 1, n a(i,k) = a(i,k) + b(i,j) * c(j,k) end do end do end do call matmul_cpu_timer ( time2 ) ttime = ttime + time2 - time1 end do return end subroutine r_kij ( lda, n, nrep, ttime ) ! !******************************************************************************* ! !! R_KIJ multiplies A = B*C using index order KIJ. ! ! ! Modified: ! ! 28 August 1999 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, integer LDA, the leading dimension used for arrays. ! ! Input, integer N, the number of rows and columns in the matrices. ! ! Input, integer NREP, the number of repetitions to carry out. ! ! Output, real TTIME, an estimate of the CPU time in seconds required ! for the matrix multiplications. ! implicit none ! integer lda integer n ! real a(lda,n) real b(lda,n) real c(lda,n) integer i integer irep integer j integer k integer nrep real time1 real time2 real ttime ! ttime = 0.0E+00 do irep = 1, nrep call r_set ( a, b, c, lda, n ) call matmul_cpu_timer ( time1 ) do k = 1, n do i = 1, n do j = 1, n a(i,k) = a(i,k) + b(i,j) * c(j,k) end do end do end do call matmul_cpu_timer ( time2 ) ttime = ttime + time2 - time1 end do return end subroutine r_kji ( lda, n, nrep, ttime ) ! !******************************************************************************* ! !! R_KJI multiplies A = B*C using index order KJI. ! ! ! Modified: ! ! 28 August 1999 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, integer LDA, the leading dimension used for arrays. ! ! Input, integer N, the number of rows and columns in the matrices. ! ! Input, integer NREP, the number of repetitions to carry out. ! ! Output, real TTIME, an estimate of the CPU time in seconds required ! for the matrix multiplications. ! implicit none ! integer lda integer n ! real a(lda,n) real b(lda,n) real c(lda,n) integer i integer irep integer j integer k integer nrep real time1 real time2 real ttime ! ttime = 0.0E+00 do irep = 1, nrep call r_set ( a, b, c, lda, n ) call matmul_cpu_timer ( time1 ) do k = 1, n do j = 1, n do i = 1, n a(i,k) = a(i,k) + b(i,j) * c(j,k) end do end do end do call matmul_cpu_timer ( time2 ) ttime = ttime + time2 - time1 end do return end subroutine r_kji_m ( lda, n, nrep, ttime ) ! !******************************************************************************* ! !! R_KJI_M multiplies A = B*C using index order KJI and multitasking. ! ! ! Discussion: ! ! The routine uses an SGI/IRIS parallel processing directive to run the ! triple loop using multitasking. ! ! The benefit of such a directive depends on the algorithm and the ! load on the machine. ! ! Except on the SGI/IRIS, this routine should not be used, and in ! particular, the call to SECNDS should be commented out. ! ! Note that the SGI/IRIS routine SECNDS must be called, rather than ! SECOND. SECNDS reports elapsed "real" time or "wallclock" time, ! which should go down with multitasking, whereas CPU time should ! remain roughly constant. ! ! Modified: ! ! 28 August 1999 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, integer LDA, the leading dimension used for arrays. ! ! Input, integer N, the number of rows and columns in the matrices. ! ! Input, integer NREP, the number of repetitions to carry out. ! ! Output, real TTIME, an estimate of the CPU time in seconds required ! for the matrix multiplications. ! implicit none ! integer lda integer n ! real a(lda,n) real b(lda,n) real c(lda,n) integer i integer irep integer j integer k integer nrep real time1 real time2 real ttime ! time1 = 0.0E+00 time2 = 0.0E+00 ttime = 0.0E+00 do irep = 1, nrep call r_set ( a, b, c, lda, n ) call matmul_real_timer ( time1 ) ! !$DOACROSS LOCAL(I, J, K) ! do k = 1, n do j = 1, n do i = 1, n a(i,k) = a(i,k) + b(i,j) * c(j,k) end do end do end do call matmul_real_timer ( time2 ) ttime = ttime + time2 - time1 end do return end subroutine r_mxma ( lda, n, nrep, ttime ) ! !******************************************************************************* ! !! R_MXMA multiplies A = B*C using optimized MXMA. ! ! ! Discussion: ! ! Since the routine MXMA is only available on the Cray, in the ! SCILIB library, the statement ! ! call mxma(...) ! ! should be commented out in versions of MATMUL that are to run ! on other machines. ! ! Modified: ! ! 28 August 1999 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, integer LDA, the leading dimension used for arrays. ! ! Input, integer N, the number of rows and columns in the matrices. ! ! Input, integer NREP, the number of repetitions to carry out. ! ! Output, real TTIME, an estimate of the CPU time in seconds required ! for the matrix multiplications. ! implicit none ! integer lda integer n ! real a(lda,n) real b(lda,n) real c(lda,n) integer, parameter :: inca = 1 integer, parameter :: incb = 1 integer, parameter :: incc = 1 integer irep integer nrep real time1 real time2 real ttime ! ttime = 0.0E+00 do irep = 1, nrep call r_set ( a, b, c, lda, n ) call matmul_cpu_timer ( time1 ) ! call mxma ( b, incb, lda, c, incc, lda, a, inca, lda, n, n, n ) call matmul_cpu_timer ( time2 ) ttime = ttime + time2 - time1 end do return end subroutine r_saxpyc ( lda, n, nrep, ttime ) ! !******************************************************************************* ! !! R_SAXPYC multiplies A = B*C columnwise, using optimized SAXPY. ! ! ! Discussion: ! ! SAXPY is used to carry out the multiplication "columnwise". ! ! This is equivalent to the following "JKI" code: ! ! do j = 1, n ! do k = 1, n ! do i = 1, n ! a(i,k) = a(i,k)+b(i,j)*c(j,k) ! end do ! end do ! end do ! ! Except on the Cray and SGI/IRIS, the statement "call saxpy" below ! should be commented out. ! ! Modified: ! ! 28 August 1999 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, integer LDA, the leading dimension used for arrays. ! ! Input, integer N, the number of rows and columns in the matrices. ! ! Input, integer NREP, the number of repetitions to carry out. ! ! Output, real TTIME, an estimate of the CPU time in seconds required ! for the matrix multiplications. ! implicit none ! integer lda integer n ! real a(lda,n) real b(lda,n) real c(lda,n) integer, parameter :: inca = 1 integer, parameter :: incb = 1 integer irep integer j integer k integer nrep real time1 real time2 real ttime ! ttime = 0.0E+00 do irep = 1, nrep call r_set ( a, b, c, lda, n ) call matmul_cpu_timer ( time1 ) do j = 1, n do k = 1, n call saxpy ( n, c(j,k), b(1,j), incb, a(1,k), inca ) end do end do call matmul_cpu_timer ( time2 ) ttime = ttime + time2 - time1 end do return end subroutine r_saxpyr ( lda, n, nrep, ttime ) ! !******************************************************************************* ! !! R_SAXPYR multiplies A = B*C "rowwise", using optimized SAXPY. ! ! ! Discussion: ! ! This is equivalent to the following "IJK" code: ! ! do i = 1, n ! do j = 1, n ! do k = 1, n ! a(i,k) = a(i,k)+b(i,j)*c(j,k) ! end do ! end do ! end do ! ! Except on the Cray and SGI/IRIS, the statement "call saxpy" below ! should be commented out. ! ! Modified: ! ! 28 August 1999 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, integer LDA, the leading dimension used for arrays. ! ! Input, integer N, the number of rows and columns in the matrices. ! ! Input, integer NREP, the number of repetitions to carry out. ! ! Output, real TTIME, an estimate of the CPU time in seconds required ! for the matrix multiplications. ! implicit none ! integer lda integer n ! real a(lda,n) real b(lda,n) real c(lda,n) integer i integer irep integer j integer nrep real time1 real time2 real ttime ! ttime = 0.0E+00 do irep = 1, nrep call r_set ( a, b, c, lda, n ) call matmul_cpu_timer ( time1 ) do i = 1, n do j = 1, n call saxpy ( n, b(i,j), c(j,1), lda, a(i,1), lda ) end do end do call matmul_cpu_timer ( time2 ) ttime = ttime + time2 - time1 end do return end subroutine r_sdot ( lda, n, nrep, ttime ) ! !******************************************************************************* ! !! R_SDOT multiplies A = B*C using optimized SDOT. ! ! ! Discussion: ! ! This is equivalent to the following "IKJ" code: ! ! do i = 1, n ! do k = 1, n ! do j = 1, n ! a(i,k) = a(i,k)+b(i,j)*c(j,k) ! end do ! end do ! end do ! ! Except on the Cray or SGI/IRIS, the call to SDOT should be commented out. ! ! Modified: ! ! 28 August 1999 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, integer LDA, the leading dimension used for arrays. ! ! Input, integer N, the number of rows and columns in the matrices. ! ! Input, integer NREP, the number of repetitions to carry out. ! ! Output, real TTIME, an estimate of the CPU time in seconds required ! for the matrix multiplications. ! implicit none ! integer lda integer n ! real a(lda,n) real b(lda,n) real c(lda,n) integer i integer irep integer k integer nrep real sdot real time1 real time2 real ttime ! ttime = 0.0E+00 do irep = 1, nrep call r_set ( a, b, c, lda, n ) call matmul_cpu_timer ( time1 ) do i = 1, n do k = 1, n a(i,k) = sdot ( n, b(i,1), lda, c(1,k), 1 ) end do end do call matmul_cpu_timer ( time2 ) ttime = ttime + time2 - time1 end do return end subroutine r_set ( a, b, c, lda, n ) ! !******************************************************************************* ! !! R_SET initializes the real A, B and C matrices. ! ! ! Modified: ! ! 28 August 1999 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Output, real A(LDA,N), B(LDA,N), C(LDA,N), the three matrices ! used in the multiplication. ! ! Input, integer LDA, the leading dimension used for arrays. ! ! Input, integer N, the number of rows and columns in the matrices. ! implicit none ! integer lda integer n ! real a(lda,n) real b(lda,n) real c(lda,n) integer i integer j ! a(1:n,1:n) = 0.0E+00 b(1:n,1:n) = 1.0E+00 c(1:n,1:n) = 1.0E+00 return end subroutine r_sgemm ( lda, n, nrep, ttime ) ! !******************************************************************************* ! !! R_SGEMM multiplies A = B*C using optimized SGEMM. ! ! ! Discussion: ! ! Except on the Cray or SGI/IRIS, the call to SGEMM should be commented out. ! ! Modified: ! ! 28 August 1999 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, integer LDA, the leading dimension used for arrays. ! ! Input, integer N, the number of rows and columns in the matrices. ! ! Input, integer NREP, the number of repetitions to carry out. ! ! Output, real TTIME, an estimate of the CPU time in seconds required ! for the matrix multiplications. ! implicit none ! integer lda integer n ! real a(lda,n) real, parameter :: alpha = 1.0E+00 real b(lda,n) real, parameter :: beta = 0.0E+00 real c(lda,n) integer irep integer nrep real time1 real time2 real ttime ! ttime = 0.0E+00 do irep = 1, nrep call r_set ( a, b, c, lda, n ) call matmul_cpu_timer ( time1 ) call sgemm ( 'n', 'n', n, n, n, alpha, b, lda, c, lda, beta, a, lda ) call matmul_cpu_timer ( time2 ) ttime = ttime + time2 - time1 end do return end subroutine r_sgemms ( lda, n, nrep, ttime ) ! !******************************************************************************* ! !! R_SGEMMS multiplies A = B*C using optimized SGEMMS. ! ! ! Discussion: ! ! SGEMMS is the Cray SCILIB variant of the BLAS3 routine SGEMM. ! The difference is that SGEMMS uses Strassen's algorithm. ! ! Except on the Cray, the call to SGEMMS should be commented out. ! ! Modified: ! ! 28 August 1999 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, integer LDA, the leading dimension used for arrays. ! ! Input, integer N, the number of rows and columns in the matrices. ! ! Input, integer NREP, the number of repetitions to carry out. ! ! Output, real TTIME, an estimate of the CPU time in seconds required ! for the matrix multiplications. ! implicit none ! integer lda integer n ! real a(lda,n) real, parameter :: alpha = 1.0E+00 real b(lda,n) real, parameter :: beta = 0.0E+00 real c(lda,n) integer irep integer nrep real time1 real time2 real ttime ! real work(n,n,3) ! ttime = 0.0E+00 do irep = 1, nrep call r_set ( a, b, c, lda, n ) call matmul_cpu_timer ( time1 ) ! call sgemms ( 'n', 'n', n, n, n, alpha, b, lda, c, lda, beta, a, lda, work ) call matmul_cpu_timer ( time2 ) ttime = ttime + time2 - time1 end do return end subroutine r_taxpyc ( lda, n, nrep, ttime ) ! !******************************************************************************* ! !! R_TAXPYC multiplies A = B*C columnwise, using unoptimized SAXPY. ! ! ! Discussion: ! ! This is equivalent to the following "JKI" code: ! ! do j = 1, n ! do k = 1, n ! do i = 1, n ! a(i,k) = a(i,k)+b(i,j)*c(j,k) ! end do ! end do ! end do ! ! Modified: ! ! 28 August 1999 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, integer LDA, the leading dimension used for arrays. ! ! Input, integer N, the number of rows and columns in the matrices. ! ! Input, integer NREP, the number of repetitions to carry out. ! ! Output, real TTIME, an estimate of the CPU time in seconds required ! for the matrix multiplications. ! implicit none ! integer lda integer n ! real a(lda,n) real b(lda,n) real c(lda,n) integer irep integer j integer k integer nrep real time1 real time2 real ttime ! ttime = 0.0E+00 do irep = 1, nrep call r_set ( a, b, c, lda, n ) call matmul_cpu_timer ( time1 ) do j = 1, n do k = 1, n call taxpy ( n, c(j,k), b(1,j), 1, a(1,k), 1 ) end do end do call matmul_cpu_timer ( time2 ) ttime = ttime + time2 - time1 end do return end subroutine r_taxpyr ( lda, n, nrep, ttime ) ! !******************************************************************************* ! !! R_TAXPYR multiplies A = B*C rowwise using source code SAXPY. ! ! ! Discussion: ! ! This is equivalent to the following "IJK" code: ! ! do i = 1, n ! do j = 1, n ! do k = 1, n ! a(i,k) = a(i,k)+b(i,j)*c(j,k) ! end do ! end do ! end do ! ! Modified: ! ! 28 August 1999 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, integer LDA, the leading dimension used for arrays. ! ! Input, integer N, the number of rows and columns in the matrices. ! ! Input, integer NREP, the number of repetitions to carry out. ! ! Output, real TTIME, an estimate of the CPU time in seconds required ! for the matrix multiplications. ! implicit none ! integer lda integer n ! real a(lda,n) real b(lda,n) real c(lda,n) integer i integer irep integer j integer nrep real time1 real time2 real ttime ! ttime = 0.0E+00 do irep = 1, nrep call r_set ( a, b, c, lda, n ) call matmul_cpu_timer ( time1 ) do i = 1, n do j = 1, n call taxpy ( n, b(i,j), c(j,1), lda, a(i,1), lda ) end do end do call matmul_cpu_timer ( time2 ) ttime = ttime + time2 - time1 end do return end subroutine r_tdot ( lda, n, nrep, ttime ) ! !******************************************************************************* ! !! R_TDOT multiplies A = B * C using source code SDOT. ! ! ! Discussion: ! ! This is equivalent to the following "IKJ" code: ! ! do i = 1, n ! do k = 1, n ! do j = 1, n ! a(i,k) = a(i,k) + b(i,j) * c(j,k) ! end do ! end do ! end do ! ! Modified: ! ! 28 August 1999 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, integer LDA, the leading dimension used for arrays. ! ! Input, integer N, the number of rows and columns in the matrices. ! ! Input, integer NREP, the number of repetitions to carry out. ! ! Output, real TTIME, an estimate of the CPU time in seconds required ! for the matrix multiplications. ! implicit none ! integer lda integer n ! real a(lda,n) real b(lda,n) real c(lda,n) integer i integer irep integer k integer nrep real tdot real time1 real time2 real ttime ! ttime = 0.0E+00 do irep = 1, nrep call r_set ( a, b, c, lda, n ) call matmul_cpu_timer ( time1 ) do i = 1, n do k = 1, n a(i,k) = tdot ( n, b(i,1), lda, c(1,k), 1 ) end do end do call matmul_cpu_timer ( time2 ) ttime = ttime + time2 - time1 end do return end subroutine r_tgemm ( lda, n, nrep, ttime ) ! !******************************************************************************* ! !! R_TGEMM multiplies A = B*C using TGEMM. ! ! ! Modified: ! ! 28 August 1999 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, integer LDA, the leading dimension used for arrays. ! ! Input, integer N, the number of rows and columns in the matrices. ! ! Input, integer NREP, the number of repetitions to carry out. ! ! Output, real TTIME, an estimate of the CPU time in seconds required ! for the matrix multiplications. ! implicit none ! integer lda integer n ! real a(lda,n) real, parameter :: alpha = 1.0E+00 real b(lda,n) real, parameter :: beta = 0.0E+00 real c(lda,n) integer irep integer nrep real time1 real time2 real ttime ! ttime = 0.0E+00 do irep = 1, nrep call r_set ( a, b, c, lda, n ) call matmul_cpu_timer ( time1 ) call tgemm ( 'n', 'n', n, n, n, alpha, b, lda, c, lda, beta, a, lda ) call matmul_cpu_timer ( time2 ) ttime = ttime + time2 - time1 end do return end subroutine report ( cshow, flop_show, lda, lda_show, lingo, lnshow, machine, & machine_show, n, noshow, nrep, nrep_show, nshow, order, output, time_show, & ttime ) ! !******************************************************************************* ! !! REPORT reports the results for each multiplication experiment. ! ! ! Modified: ! ! 06 May 2000 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, logical CSHOW, is TRUE if the multiplication method ! is to be shown. ! ! Input, logical FLOP_SHOW, is TRUE if the MegaFLOP rate is to be shown. ! ! Input, integer LDA, the leading dimension used for arrays. ! ! Input, logical LDA_SHOW, is TRUE if the variable LDA is to be shown. ! ! Input, character ( len = 7 ) LINGO, the language in which MATMUL is written. ! ! Input, logical LNSHOW, is TRUE if the software's programming ! language, stored in LINGO, is to be shown. ! ! Input, character ( len = 10 ) MACHINE, the computer where MATMUL is running. ! ! Input, logical MACHINE_SHOW, is TRUE if the machine name is to be shown. ! ! Input, integer N, the number of rows and columns in the matrices. ! ! Input, logical NOSHOW, is TRUE if the number of operations is ! to be shown. ! ! Input, integer NREP, the number of repetitions to carry out. ! ! Input, logical NREP_SHOW, is TRUE if the number of repetitions is ! to be shown. ! ! Input, logical NSHOW, is TRUE if the matrix size N is to be shown. ! ! Input, character ( len = 10 ) ORDER, specifies the method to be used. ! ! Output, character ( len = 82 ) OUTPUT, a string containing the data for ! this multiplication. ! ! Input, logical TIME_SHOW, is TRUE if the execution time is to be shown. ! ! Output, real TTIME, an estimate of the CPU time in seconds required ! for the matrix multiplications. ! implicit none ! integer lda integer n ! logical cshow logical flop_show real ftemp integer ihi integer ilo logical lda_show character ( len = 7 ) lingo logical lnshow character ( len = 10 ) machine logical machine_show logical noshow integer nrep logical nrep_show logical nshow integer ntemp character ( len = 10 ) order character ( len = 82 ) output logical time_show real ttime ! output = ' ' ihi = 0 if ( cshow ) then ilo = ihi + 1 ihi = ilo + 9 output(ilo:ihi) = order end if if ( lda_show ) then ilo = ihi + 1 ihi = ilo + 3 write ( output(ilo:ihi), '(i4)' ) lda end if if ( nshow ) then ilo = ihi + 1 ihi = ilo + 3 write ( output(ilo:ihi), '(i4)' ) n end if if ( time_show ) then ilo = ihi + 1 ihi = ilo + 13 write ( output(ilo:ihi), '(f14.6)' ) ttime end if if ( noshow ) then ntemp = 2 * n**3 ilo = ihi + 1 ihi = ilo + 9 write ( output(ilo:ihi), '(i10)' ) ntemp end if if ( nrep_show ) then ilo = ihi + 1 ihi = ilo + 4 write ( output(ilo:ihi), '(i5)' ) nrep end if if ( flop_show ) then if ( ttime == 0.0E+00 ) then ftemp = 0.0E+00 else ftemp = real ( ntemp * nrep ) / ( 1.0E+06 * ttime ) end if ilo = ihi + 1 ihi = ilo + 9 write ( output(ilo:ihi), '(f10.4)' ) ftemp end if if ( machine_show ) then ilo = ihi + 1 ilo = ilo + 1 ihi = ilo + 9 output(ilo:ihi) = machine end if if ( lnshow ) then ilo = ihi + 1 ilo = ilo + 1 ihi = ilo + 6 output(ilo:ihi) = lingo end if if ( ihi > 0 ) then write ( *, '(a)' ) output(1:ihi) end if return end subroutine s_blank_delete ( s ) ! !******************************************************************************* ! !! S_BLANK_DELETE removes blanks from a string, left justifying the remainder. ! ! ! Comment: ! ! All TAB characters are also removed. ! ! Modified: ! ! 26 July 1998 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input/output, character ( len = * ) S, the string to be transformed. ! implicit none ! character c integer iget integer iput character ( len = * ) s character, parameter :: TAB = char ( 9 ) ! iput = 0 do iget = 1, len ( s ) c = s(iget:iget) if ( c /= ' ' .and. c /= TAB ) then iput = iput + 1 s(iput:iput) = c end if end do s(iput+1:) = ' ' return end subroutine s_cap ( string ) ! !******************************************************************************* ! !! S_CAP replaces any lowercase letters by uppercase ones in a string. ! ! ! Modified: ! ! 16 May 1999 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input/output, character ( len = * ) STRING, the string to be transformed. ! implicit none ! character c integer i integer nchar character ( len = * ) string ! nchar = len ( string ) do i = 1, nchar c = string(i:i) call ch_cap ( c ) string(i:i) = c end do return end function s_eqi ( strng1, strng2 ) ! !******************************************************************************* ! !! S_EQI is a case insensitive comparison of two strings for equality. ! ! ! Examples: ! ! S_EQI ( 'Anjana', 'ANJANA' ) is .TRUE. ! ! Modified: ! ! 14 April 1999 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, character ( len = * ) STRNG1, STRNG2, the strings to compare. ! ! Output, logical S_EQI, the result of the comparison. ! implicit none ! integer i integer len1 integer len2 integer lenc logical s_eqi character s1 character s2 character ( len = * ) strng1 character ( len = * ) strng2 ! len1 = len ( strng1 ) len2 = len ( strng2 ) lenc = min ( len1, len2 ) s_eqi = .false. do i = 1, lenc s1 = strng1(i:i) s2 = strng2(i:i) call ch_cap ( s1 ) call ch_cap ( s2 ) if ( s1 /= s2 ) then return end if end do do i = lenc + 1, len1 if ( strng1(i:i) /= ' ' ) then return end if end do do i = lenc + 1, len2 if ( strng2(i:i) /= ' ' ) then return end if end do s_eqi = .true. return end subroutine s_to_i ( s, ival, ierror, last ) ! !******************************************************************************* ! !! S_TO_I reads an integer value from a string. ! ! ! Modified: ! ! 28 June 2000 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, character ( len = * ) S, a string to be examined. ! ! Output, integer IVAL, the integer value read from the string. ! If the string is blank, then IVAL will be returned 0. ! ! Output, integer IERROR, an error flag. ! 0, no error. ! 1, an error occurred. ! ! Output, integer LAST, the last character of S used to make IVAL. ! implicit none ! character c integer i integer ierror integer isgn integer istate integer ival integer last character ( len = * ) s ! ierror = 0 istate = 0 isgn = 1 ival = 0 do i = 1, len_trim ( s ) c = s(i:i) ! ! Haven't read anything. ! if ( istate == 0 ) then if ( c == ' ' ) then else if ( c == '-' ) then istate = 1 isgn = -1 else if ( c == '+' ) then istate = 1 isgn = + 1 else if ( lle ( '0', c ) .and. lle ( c, '9' ) ) then istate = 2 ival = ichar ( c ) - ichar ( '0' ) else ierror = 1 return end if ! ! Have read the sign, expecting digits. ! else if ( istate == 1 ) then if ( c == ' ' ) then else if ( lle ( '0', c ) .and. lle ( c, '9' ) ) then istate = 2 ival = ichar ( c ) - ichar ( '0' ) else ierror = 1 return end if ! ! Have read at least one digit, expecting more. ! else if ( istate == 2 ) then if ( lle ( '0', c ) .and. lle ( c, '9' ) ) then ival = 10 * ival + ichar ( c ) - ichar ( '0' ) else ival = isgn * ival last = i - 1 return end if end if end do ! ! If we read all the characters in the string, see if we're OK. ! if ( istate == 2 ) then ival = isgn * ival last = len_trim ( s ) else ierror = 1 last = 0 end if return end subroutine taxpy ( n, sa, sx, incx, sy, incy ) ! !******************************************************************************* ! !! TAXPY is unoptimized standard BLAS routine SAXPY. ! ! ! Comments: ! ! Roughly, TAXPY adds SA * SX(I) to SY(I) for I = 1 to N. ! However, the increments INCX and INCY allow this to be done ! even when SX or SY is a row or column of a matrix. ! ! Modified: ! ! 28 August 1999 ! ! Parameters: ! ! Input, integer N, the "logical" number of items in the vectors. ! ! Input, real SA, the multiplier. ! ! Input, real SX(*), a vector, a multiple of which is to be added to SY. ! ! Input, integer INCX, the increment in SX between the successive ! elements that we will use. ! ! Input/output, real SY(*), a vector, to which is to be added SA ! times an entry of SX. ! ! Input, integer INCY, the increment in SY between the successive ! elements that we will use. ! implicit none ! integer i integer incx integer incy integer ix integer iy integer m integer n real sa real sx(*) real sy(*) ! if ( n <= 0 ) then return end if if ( sa == 0.0E+00 ) then return end if if ( incx /= 1 .or. incy /= 1 ) then if ( incx < 0 ) then ix = ( - n + 1 ) * incx + 1 else ix = 1 end if if ( incy < 0 ) then iy = ( - n + 1 ) * incy + 1 else iy = 1 end if do i = 1, n sy(iy) = sy(iy) + sa * sx(ix) ix = ix + incx iy = iy + incy end do else m = mod ( n, 4 ) do i = 1, m sy(i) = sy(i) + sa * sx(i) end do do i = m+1, n, 4 sy(i) = sy(i) + sa * sx(i) sy(i+1) = sy(i+1) + sa * sx(i+1) sy(i+2) = sy(i+2) + sa * sx(i+2) sy(i+3) = sy(i+3) + sa * sx(i+3) end do end if return end function tdot ( n, sx, incx, sy, incy ) ! !******************************************************************************* ! !! TDOT computes the inner product of two vectors. ! ! ! Discussion: ! ! TDOT is a source code version of the BLAS level 1 routine SDOT, ! which can be used to compare performance with optimized versions ! of SDOT supplied by the compiler or operating system. ! ! Modified: ! ! 28 August 1999 ! ! Parameters: ! ! Input, integer N, the "logical" number of items in the vectors. ! ! Input, real SX(*), the first vector. ! ! Input, integer INCX, the increment in SX between the successive ! elements that we will use. ! ! Input/output, real SY(*), the second vector. ! ! Input, integer INCY, the increment in SY between the successive ! elements that we will use. ! ! Output, real TDOT, the sum of the products of the appropriate ! entries of SX and SY. ! implicit none ! integer i integer incx integer incy integer ix integer iy integer m integer n real stemp real sx(*) real sy(*) real tdot ! tdot = 0.0E+00 if ( n <= 0 ) then return end if if ( incx /= 1 .or. incy /= 1 ) then if ( incx < 0 ) then ix = ( - n + 1 ) * incx + 1 else ix = 1 end if if ( incy < 0 ) then iy = ( - n + 1 ) * incy + 1 else iy = 1 end if stemp = 0.0E+00 do i = 1, n stemp = stemp + sx(ix) * sy(iy) ix = ix + incx iy = iy + incy end do tdot = stemp else m = mod ( n, 5) stemp = 0.0E+00 do i = 1, m stemp = stemp + sx(i) * sy(i) end do do i = m+1, n, 5 stemp = stemp + sx(i) * sy(i) + sx(i+1) * sy(i+1) & + sx(i+2) * sy(i+2) + sx(i+3) * sy(i+3) & + sx(i+4) * sy(i+4) end do end if tdot = stemp return end subroutine terbla ( srname, info ) ! !******************************************************************************* ! !! TERBLA is the source code for the BLAS error handler. ! ! ! Modified: ! ! 28 August 1999 ! ! Parameters: ! ! Input, character ( len = 6 ) SRNAME, the routine which called TERBLA. ! ! Input, integer INFO, the position of the invalid ! parameter in the parameter-list of the calling routine. ! implicit none ! integer info character ( len = * ) srname ! write ( *, 99999 ) srname, info stop 99999 format ( ' ** On entry to ', A6, ' parameter number ', i2, & ' had an illegal value' ) end subroutine tgemm ( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, & ldc ) ! !******************************************************************************* ! !! TGEMM is a source code copy of SGEMM, a BLAS matrix * matrix routine. ! ! ! Discussion: ! ! TGEMM performs one of the matrix-matrix operations ! ! C : = ALPHA * op( A ) * op( B ) + BETA * C, ! ! where op( X ) is one of ! ! op( X ) = X or op( X ) = X', ! ! ALPHA and BETA are scalars, and A, B and C are matrices, with op( A ) ! an M by K matrix, op( B ) a K by N matrix and C an M by N matrix. ! ! Parameters: ! ! transa - character. ! On entry, transa specifies the form of op( A ) to be used in ! the matrix multiplication as follows: ! ! transa = 'N' or 'n', op( A ) = A. ! ! transa = 'T' or 't', op( A ) = A'. ! ! transa = 'C' or 'c', op( A ) = A'. ! ! Unchanged on exit. ! ! transb - character. ! On entry, transb specifies the form of op( B ) to be used in ! the matrix multiplication as follows: ! ! transa = 'N' or 'n', op( B ) = B. ! ! transa = 'T' or 't', op( B ) = B'. ! ! transa = 'C' or 'c', op( B ) = B'. ! ! Unchanged on exit. ! ! m - integer. ! On entry, m specifies the number of rows of the matrix ! op( A ) and of the matrix C. m must be at least zero. ! Unchanged on exit. ! ! N - integer. ! On entry, N specifies the number of columns of the matrix ! op( B ) and the number of columns of the matrix C. N must be ! at least zero. ! Unchanged on exit. ! ! K - integer. ! On entry, K specifies the number of columns of the matrix ! op( A ) and the number of rows of the matrix op( B ). K must ! be at least zero. ! Unchanged on exit. ! ! alpha - real . ! On entry, alpha specifies the scalar alpha. ! Unchanged on exit. ! ! A - real array of DIMENSION ( lda, ka ), where ka is ! k when transa = 'N' or 'n', and is m otherwise. ! Before entry with transa = 'N' or 'n', the leading m by k ! part of the array A must contain the matrix a, otherwise ! the leading k by m part of the array A must contain the ! matrix A. ! Unchanged on exit. ! ! lda - integer. ! On entry, lda specifies the first dimension of A as declared ! in the calling (sub) program. When transa = 'N' or 'n' then ! lda must be at least max( 1, m ), otherwise lda must be at ! least max( 1, k ). ! Unchanged on exit. ! ! B - real array of DIMENSION ( ldb, kb ), where kb is ! n when transb = 'N' or 'n', and is k otherwise. ! Before entry with transb = 'N' or 'n', the leading k by n ! part of the array B must contain the matrix B, otherwise ! the leading n by k part of the array B must contain the ! matrix B. ! Unchanged on exit. ! ! ldb - integer. ! On entry, ldb specifies the first dimension of B as declared ! in the calling (sub) program. When transb = 'N' or 'n' then ! ldb must be at least max( 1, k ), otherwise LDB must be at ! least max( 1, n ). ! Unchanged on exit. ! ! beta - real . ! On entry, beta specifies the scalar beta. When BETA is ! supplied as zero then C need not be set on input. ! Unchanged on exit. ! ! c - real array of DIMENSION ( LDC, n ). ! Before entry, the leading m by n part of the array C must ! contain the matrix C, except when beta is zero, in which ! case C need not be set on entry. ! On exit, the array C is overwritten by the m by n matrix ! ( alpha*op( A )*op( B ) + beta*C ). ! ! LDC - integer. ! On entry, LDC specifies the first dimension of C as declared ! in the calling (sub) program. LDC must be at least ! max( 1, m ). ! Unchanged on exit. ! implicit none ! integer lda integer ldb integer ldc ! real a(lda,*) real alpha real b(ldb,*) real beta real c(ldc,*) integer i integer info integer j integer k integer m integer n integer ncola logical nota logical notb integer nrowa integer nrowb logical tlsame character transa character transb ! ! Set nota and notb as true if A and B respectively are not ! transposed, and set nrowa, ncola and nrowb as the number of rows ! and columns of A and the number of rows of B respectively. ! nota = tlsame ( transa, 'N' ) notb = tlsame ( transb, 'N' ) if ( nota ) then nrowa = m ncola = k else nrowa = k ncola = m end if if ( notb ) then nrowb = k else nrowb = n end if ! ! Test the input parameters. ! info = 0 if ( ( .not.nota ) .and. & ( .not.tlsame( transa, 'T' ) ).and. & ( .not.tlsame( transa, 'C' ) ) ) then info = 1 else if ( ( .not.notb ) .and. & ( .not.tlsame( transb, 'T' ) ).and. & ( .not.tlsame( transb, 'C' ) ) ) then info = 2 else if ( m<0 ) then info = 3 else if ( n<0 ) then info = 4 else if ( k<0 ) then info = 5 else if ( lda0 ) then kx = 1 else kx = 1 - ( lenx - 1 )*incx end if if ( incy>0 ) then ky = 1 else ky = 1 - ( leny - 1 )*incy end if ! ! Start the operations. In this version the elements of A are ! accessed sequentially with one pass through A. ! ! First form y : = beta*y. ! if ( beta /= 1.0E+00 ) then if ( incy == 1 ) then if ( beta == 0.0E+00 ) then y(1:leny) = 0.0E+00 else y(1:leny) = beta*y(1:leny) end if else iy = ky if ( beta==0.0E+00 ) then do i = 1, leny y(iy) = 0.0E+00 iy = iy + incy end do else do i = 1,leny y(iy) = beta*y(iy) iy = iy + incy end do end if end if end if if ( alpha==0.0E+00 ) then return end if if ( tlsame( trans, 'N' ) ) then ! ! Form y : = alpha*A*x + y. ! jx = kx if ( incy == 1 ) then do j = 1, n if ( x( jx ) /= 0.0E+00 ) then temp = alpha * x(jx) do i = 1, m y(i) = y(i) + temp * a(i,j) end do end if jx = jx + incx end do else do j = 1, n if ( x( jx ) /= 0.0E+00 ) then temp = alpha*x( jx ) iy = ky do i = 1, m y(iy) = y(iy) + temp*a(i,j) iy = iy + incy end do end if jx = jx + incx end do end if else ! ! Form y : = alpha*A'*x + y. ! jy = ky if ( incx==1 ) then do j = 1, n temp = 0.0E+00 do i = 1, m temp = temp + a(i,j)*x(i) end do y( jy ) = y( jy ) + alpha*temp jy = jy + incy end do else do j = 1, n temp = 0.0E+00 ix = kx do i = 1,m temp = temp + a(i,j) * x(ix) ix = ix + incx end do y(jy) = y(jy) + alpha * temp jy = jy + incy end do end if end if return end subroutine timestamp ( ) ! !******************************************************************************* ! !! TIMESTAMP prints the current YMDHMS date as a time stamp. ! ! ! Example: ! ! May 31 2001 9:45:54.872 AM ! ! Modified: ! ! 31 May 2001 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! None ! implicit none ! character ( len = 8 ) ampm integer d character ( len = 8 ) date integer h integer m integer mm character ( len = 9 ), parameter, dimension(12) :: month = (/ & 'January ', 'February ', 'March ', 'April ', & 'May ', 'June ', 'July ', 'August ', & 'September', 'October ', 'November ', 'December ' /) integer n integer s character ( len = 10 ) time integer values(8) integer y character ( len = 5 ) zone ! call date_and_time ( date, time, zone, values ) y = values(1) m = values(2) d = values(3) h = values(5) n = values(6) s = values(7) mm = values(8) if ( h < 12 ) then ampm = 'AM' else if ( h == 12 ) then if ( n == 0 .and. s == 0 ) then ampm = 'Noon' else ampm = 'PM' end if else h = h - 12 if ( h < 12 ) then ampm = 'PM' else if ( h == 12 ) then if ( n == 0 .and. s == 0 ) then ampm = 'Midnight' else ampm = 'AM' end if end if end if write ( *, '(a,1x,i2,1x,i4,2x,i2,a1,i2.2,a1,i2.2,a1,i3.3,1x,a)' ) & trim ( month(m) ), d, y, h, ':', n, ':', s, '.', mm, trim ( ampm ) return end function tlsame ( ca, cb ) ! !******************************************************************************* ! !! TLSAME is a source code copy of BLAS LSAME, testing character equality. ! ! ! Discussion: ! ! CB is assumed to be an upper case letter. The routine returns TRUE if ! CA is either the same as CB or the equivalent lower case letter. ! ! This version of the routine is only correct for ASCII code. ! Installers must modify the routine for other character-codes. ! ! For EBCDIC systems the constant ioff must be changed to -64. ! ! Parameters: ! ! Input, character CA, CB, the characters to be compared. ! ! Output, logical TLSAME, is TRUE if CA is the same as CB, or ! if CA is the lower-case version of CB. ! implicit none ! character ca character cb integer, parameter :: ioff = 32 logical tlsame ! ! Test if the characters are equal. ! tlsame = ( ca == cb ) ! ! Test for equivalence. ! if ( .not. tlsame ) then tlsame = ichar(ca) - ioff == ichar(cb) end if return end