Main Page | Namespace List | Data Structures | File List | Namespace Members | Data Fields | Globals

sample78.cpp

Go to the documentation of this file.
00001 #include <iostream>
00002 #include <iomanip>
00003 
00004 
00005 extern "C" {
00006 #include <atlas/cblas.h>
00007 }
00008 
00009 #include <boost/numeric/ublas/matrix.hpp>
00010 #include <boost/numeric/ublas/operation.hpp>
00011 #include <boost/numeric/ublas/operation_blocked.hpp>
00012 #include <boost/numeric/ublas/io.hpp>
00013 #include <boost/timer.hpp>
00014 
00015 
00016 namespace ublas = boost::numeric::ublas;
00017 using namespace std;
00018 
00019 /* #define double float
00020    #define cblas_dgemm cblas_sgemm
00021  */
00022     BOOST_UBLAS_INLINE
00023     void
00024     cblas_prod (ublas::matrix<double, ublas::row_major, ublas::unbounded_array<double> >& C,
00025                             const ublas::matrix<double, ublas::row_major, ublas::unbounded_array<double> >& A,
00026                 const ublas::matrix<double, ublas::row_major, ublas::unbounded_array<double> >& B) {
00027                 // C(m,n) <- aplha A(m,k) B(k,n) + beta C(m,n)
00028                 // cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
00029             int m = C.size1();
00030                 int n = C.size2();
00031                 int k = A.size2();
00032         BOOST_UBLAS_CHECK (C.size1() == A.size1(), ublas::bad_size ());
00033         BOOST_UBLAS_CHECK (A.size2() == B.size1(), ublas::bad_size ());
00034         BOOST_UBLAS_CHECK (B.size2() == C.size2(), ublas::bad_size ());
00035                 cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, 
00036                                         m, n, k, 1.0, &A.data()[0], A.size2(), &B.data()[0], B.size2(), 1.0, &C.data()[0], C.size2());
00037         }
00038 
00039     BOOST_UBLAS_INLINE
00040     void
00041     cblas_prod (ublas::matrix<double, ublas::row_major, ublas::unbounded_array<double> >& C,
00042                             const ublas::matrix<double, ublas::column_major, ublas::unbounded_array<double> >& A,
00043                 const ublas::matrix<double, ublas::row_major, ublas::unbounded_array<double> >& B) {
00044                 // C(m,n) <- aplha A(m,k) B(k,n) + beta C(m,n)
00045                 // cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
00046             int m = C.size1();
00047                 int n = C.size2();
00048                 int k = A.size2();
00049         BOOST_UBLAS_CHECK (C.size1() == A.size1(), ublas::bad_size ());
00050         BOOST_UBLAS_CHECK (A.size2() == B.size1(), ublas::bad_size ());
00051         BOOST_UBLAS_CHECK (B.size2() == C.size2(), ublas::bad_size ());
00052                 cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, 
00053                                         m, n, k, 1.0, &A.data()[0], A.size1(), &B.data()[0], B.size2(), 1.0, &C.data()[0], C.size2());
00054         }
00055 
00056     BOOST_UBLAS_INLINE
00057     void
00058     cblas_prod (ublas::matrix<double, ublas::row_major, ublas::unbounded_array<double> >& C,
00059                             const ublas::matrix<double, ublas::row_major, ublas::unbounded_array<double> >& A,
00060                 const ublas::matrix<double, ublas::column_major, ublas::unbounded_array<double> >& B) {
00061                 // C(m,n) <- aplha A(m,k) B(k,n) + beta C(m,n)
00062                 // cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
00063             int m = C.size1();
00064                 int n = C.size2();
00065                 int k = A.size2();
00066         BOOST_UBLAS_CHECK (C.size1() == A.size1(), ublas::bad_size ());
00067         BOOST_UBLAS_CHECK (A.size2() == B.size1(), ublas::bad_size ());
00068         BOOST_UBLAS_CHECK (B.size2() == C.size2(), ublas::bad_size ());
00069                 cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans, 
00070                                         m, n, k, 1.0, &A.data()[0], A.size2(), &B.data()[0], B.size1(), 1.0, &C.data()[0], C.size2());
00071         }
00072 
00073     BOOST_UBLAS_INLINE
00074     void
00075     cblas_prod (ublas::matrix<double, ublas::row_major, ublas::unbounded_array<double> >& C,
00076                             const ublas::matrix<double, ublas::column_major, ublas::unbounded_array<double> >& A,
00077                 const ublas::matrix<double, ublas::column_major, ublas::unbounded_array<double> >& B) {
00078                 // C(m,n) <- aplha A(m,k) B(k,n) + beta C(m,n)
00079                 // cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
00080             int m = C.size1();
00081                 int n = C.size2();
00082                 int k = A.size2();
00083         BOOST_UBLAS_CHECK (C.size1() == A.size1(), ublas::bad_size ());
00084         BOOST_UBLAS_CHECK (A.size2() == B.size1(), ublas::bad_size ());
00085         BOOST_UBLAS_CHECK (B.size2() == C.size2(), ublas::bad_size ());
00086                 cblas_dgemm(CblasRowMajor, CblasTrans, CblasTrans, 
00087                                         m, n, k, 1.0, &A.data()[0], A.size1(), &B.data()[0], B.size1(), 1.0, &C.data()[0], C.size2());
00088         }
00089 
00090     BOOST_UBLAS_INLINE
00091     void
00092     cblas_prod (ublas::matrix<double, ublas::column_major, ublas::unbounded_array<double> >& C,
00093                             const ublas::matrix<double, ublas::row_major, ublas::unbounded_array<double> >& A,
00094                 const ublas::matrix<double, ublas::row_major, ublas::unbounded_array<double> >& B) {
00095                 // C(m,n) <- aplha A(m,k) B(k,n) + beta C(m,n)
00096                 // cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
00097             int m = C.size1();
00098                 int n = C.size2();
00099                 int k = A.size2();
00100         BOOST_UBLAS_CHECK (C.size1() == A.size1(), ublas::bad_size ());
00101         BOOST_UBLAS_CHECK (A.size2() == B.size1(), ublas::bad_size ());
00102         BOOST_UBLAS_CHECK (B.size2() == C.size2(), ublas::bad_size ());
00103                 cblas_dgemm(CblasColMajor, CblasTrans, CblasTrans, 
00104                                         m, n, k, 1.0, &A.data()[0], A.size2(), &B.data()[0], B.size2(), 1.0, &C.data()[0], C.size1());
00105         }
00106 
00107     BOOST_UBLAS_INLINE
00108     void
00109     cblas_prod (ublas::matrix<double, ublas::column_major, ublas::unbounded_array<double> >& C,
00110                             const ublas::matrix<double, ublas::column_major, ublas::unbounded_array<double> >& A,
00111                 const ublas::matrix<double, ublas::row_major, ublas::unbounded_array<double> >& B) {
00112                 // C(m,n) <- aplha A(m,k) B(k,n) + beta C(m,n)
00113                 // cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
00114             int m = C.size1();
00115                 int n = C.size2();
00116                 int k = A.size2();
00117         BOOST_UBLAS_CHECK (C.size1() == A.size1(), ublas::bad_size ());
00118         BOOST_UBLAS_CHECK (A.size2() == B.size1(), ublas::bad_size ());
00119         BOOST_UBLAS_CHECK (B.size2() == C.size2(), ublas::bad_size ());
00120                 cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans, 
00121                                         m, n, k, 1.0, &A.data()[0], A.size1(), &B.data()[0], B.size2(), 1.0, &C.data()[0], C.size1());
00122         }
00123 
00124     BOOST_UBLAS_INLINE
00125     void
00126     cblas_prod (ublas::matrix<double, ublas::column_major, ublas::unbounded_array<double> >& C,
00127                             const ublas::matrix<double, ublas::row_major, ublas::unbounded_array<double> >& A,
00128                 const ublas::matrix<double, ublas::column_major, ublas::unbounded_array<double> >& B) {
00129                 // C(m,n) <- aplha A(m,k) B(k,n) + beta C(m,n)
00130                 // cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
00131             int m = C.size1();
00132                 int n = C.size2();
00133                 int k = A.size2();
00134         BOOST_UBLAS_CHECK (C.size1() == A.size1(), ublas::bad_size ());
00135         BOOST_UBLAS_CHECK (A.size2() == B.size1(), ublas::bad_size ());
00136         BOOST_UBLAS_CHECK (B.size2() == C.size2(), ublas::bad_size ());
00137                 cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, 
00138                                         m, n, k, 1.0, &A.data()[0], A.size2(), &B.data()[0], B.size1(), 1.0, &C.data()[0], C.size1());
00139         }
00140 
00141     BOOST_UBLAS_INLINE
00142     void
00143     cblas_prod (ublas::matrix<double, ublas::column_major, ublas::unbounded_array<double> >& C,
00144                             const ublas::matrix<double, ublas::column_major, ublas::unbounded_array<double> >& A,
00145                 const ublas::matrix<double, ublas::column_major, ublas::unbounded_array<double> >& B) {
00146                 // C(m,n) <- aplha A(m,k) B(k,n) + beta C(m,n)
00147                 // cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
00148             int m = C.size1();
00149                 int n = C.size2();
00150                 int k = A.size2();
00151         BOOST_UBLAS_CHECK (C.size1() == A.size1(), ublas::bad_size ());
00152         BOOST_UBLAS_CHECK (A.size2() == B.size1(), ublas::bad_size ());
00153         BOOST_UBLAS_CHECK (B.size2() == C.size2(), ublas::bad_size ());
00154                 cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, 
00155                                         m, n, k, 1.0, &A.data()[0], A.size1(), &B.data()[0], B.size1(), 1.0, &C.data()[0], C.size1());
00156         }
00157 
00158 void zeromatrix(ublas::matrix<double,ublas::row_major>& A) {
00159  for(size_t i = 0; i<A.size1(); i++)
00160   for(size_t j = 0; j<A.size2(); j++)
00161    A(i,j) = 0.0;
00162 }
00163 void zeromatrix(ublas::matrix<double,ublas::column_major>& A) {
00164  for(size_t i = 0; i<A.size1(); i++)
00165   for(size_t j = 0; j<A.size2(); j++)
00166    A(i,j) = 0.0;
00167 }
00168 void initmatrix(ublas::matrix<double,ublas::row_major>& A) {
00169  for(size_t i = 0; i<A.size1(); i++)
00170   for(size_t j = 0; j<A.size2(); j++)
00171    A(i,j) = 1.0*rand();
00172 }
00173 void initmatrix(ublas::matrix<double,ublas::column_major>& A) {
00174  for(size_t i = 0; i<A.size1(); i++)
00175   for(size_t j = 0; j<A.size2(); j++)
00176    A(i,j) = 1.0*rand();
00177 }
00178 
00179 #ifdef NOCHECK
00180 template <class M, class E>
00181 int equals(const M& lhs, const E& rhs)
00182 {
00183         return 2;
00184 }
00185 #endif
00186 
00187 typedef ublas::matrix<double,ublas::row_major> RT;
00188 typedef ublas::matrix<double,ublas::column_major> CT;
00189 
00190 #define mul(a,b,d) \
00191 { \
00192   zeromatrix(X); \
00193   initmatrix(A); \
00194   initmatrix(B); \
00195   t.restart(); \
00196   for (size_t a=0;a<size;a++) \
00197    for (size_t b=0;b<size;b++) \
00198     for (size_t d=0;d<size;d++) \
00199      X(r,c) += A(r,k)*B(k,c); \
00200   time = t.elapsed(); \
00201   cout << setw(8) << time << flush; \
00202   cerr << equals(X,prod(A,B))<< endl; \
00203 }
00204 
00205 #define mul2(xtype) \
00206 { \
00207   zeromatrix(X); \
00208   initmatrix(A); \
00209   initmatrix(B); \
00210   t.restart(); \
00211   X.plus_assign(prod(A,B)); \
00212   time = t.elapsed(); \
00213   cout << setw(8) << time << flush; \
00214   cerr << equals(X,prod(A,B)) << endl; \
00215 }
00216 
00217 #define mul3(xtype) \
00218 { \
00219   zeromatrix(X); \
00220   initmatrix(A); \
00221   initmatrix(B); \
00222   t.restart(); \
00223   ublas::axpy_prod(A,B,X,false); \
00224   time = t.elapsed(); \
00225   cout << setw(8) << time << flush; \
00226   cerr << equals(X,prod(A,B)) << endl; \
00227 }
00228 
00229 #define mul4(xtype) \
00230 { \
00231   zeromatrix(X); \
00232   initmatrix(A); \
00233   initmatrix(B); \
00234   t.restart(); \
00235   ublas::opb_prod(A,B,X,false); \
00236   time = t.elapsed(); \
00237   cout << setw(8) << time << flush; \
00238   cerr << equals(X,prod(A,B)) << endl; \
00239 }
00240 
00241 #define mul5(xtype) \
00242 { \
00243   zeromatrix(X); \
00244   initmatrix(A); \
00245   initmatrix(B); \
00246   t.restart(); \
00247   X.plus_assign(ublas::block_prod<xtype>(A,B,64)); \
00248   time = t.elapsed(); \
00249   cout << setw(8) << time << flush; \
00250   cerr << equals(X,prod(A,B)) << endl; \
00251 }
00252 
00253 #define mul6 \
00254 { \
00255   zeromatrix(X); \
00256   initmatrix(A); \
00257   initmatrix(B); \
00258   t.restart(); \
00259   cblas_prod(X, A, B); \
00260   time = t.elapsed(); \
00261   /* cout << setw(8) << (2.0*(double(size1)*double(size2)*double(size3))/time)/1e+6 << flush; */ \
00262   cout << setw(8) << time << flush; \
00263   cerr << equals(X,prod(A,B)) << endl; \
00264 }
00265 
00266 #define mulset(xtype,atype,btype) \
00267 { \
00268  xtype X(size1,size3); \
00269  atype A(size1,size2); \
00270  btype B(size2,size3); \
00271  mul2(xtype); \
00272  mul3(xtype); \
00273  mul4(xtype); \
00274  mul5(xtype); \
00275  mul6 ; \
00276  cout << endl; \
00277 }
00278 
00279 /*
00280  mul(r,k,c); \
00281  mul(r,c,k); \
00282  mul(k,r,c); \
00283  mul(k,c,r); \
00284  mul(c,r,k); \
00285  mul(c,k,r); \
00286 */
00287 
00288 void test(const size_t size1, const size_t size2, const size_t size3) {
00289   boost::timer t;
00290         double time;
00291   cout << "size of metrices - " << size1 << "x" << size2 << "*" << size2 << "x" << size3 << endl << endl;
00292   cout << "      prod    axpy    opb     block   atlas   rkc     rck     krc     kcr     crk     ckr" <<
00293 endl;
00294   cout << "RRR";mulset(RT,RT,RT);
00295   cout << "RRC";mulset(RT,RT,CT);
00296   cout << "RCR";mulset(RT,CT,RT);
00297   cout << "RCC";mulset(RT,CT,CT);
00298 
00299   cout << "CRR";mulset(CT,RT,RT);
00300   cout << "CRC";mulset(CT,RT,CT);
00301   cout << "CCR";mulset(CT,CT,RT);
00302   cout << "CCC";mulset(CT,CT,CT);
00303 
00304 }
00305 
00306 int  main() {
00307   test(500,499,501);
00308 }

Generated on Wed Oct 1 14:41:00 2003 for Sample Code by doxygen 1.3.2