00001 #include <iostream>
00002 #include <iomanip>
00003
00004
00005 extern "C" {
00006 #include <atlas/cblas.h>
00007 }
00008
00009 #include <boost/numeric/ublas/matrix.hpp>
00010 #include <boost/numeric/ublas/operation.hpp>
00011 #include <boost/numeric/ublas/operation_blocked.hpp>
00012 #include <boost/numeric/ublas/io.hpp>
00013 #include <boost/timer.hpp>
00014
00015
00016 namespace ublas = boost::numeric::ublas;
00017 using namespace std;
00018
00019
00020
00021
00022 BOOST_UBLAS_INLINE
00023 void
00024 cblas_prod (ublas::matrix<double, ublas::row_major, ublas::unbounded_array<double> >& C,
00025 const ublas::matrix<double, ublas::row_major, ublas::unbounded_array<double> >& A,
00026 const ublas::matrix<double, ublas::row_major, ublas::unbounded_array<double> >& B) {
00027
00028
00029 int m = C.size1();
00030 int n = C.size2();
00031 int k = A.size2();
00032 BOOST_UBLAS_CHECK (C.size1() == A.size1(), ublas::bad_size ());
00033 BOOST_UBLAS_CHECK (A.size2() == B.size1(), ublas::bad_size ());
00034 BOOST_UBLAS_CHECK (B.size2() == C.size2(), ublas::bad_size ());
00035 cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
00036 m, n, k, 1.0, &A.data()[0], A.size2(), &B.data()[0], B.size2(), 1.0, &C.data()[0], C.size2());
00037 }
00038
00039 BOOST_UBLAS_INLINE
00040 void
00041 cblas_prod (ublas::matrix<double, ublas::row_major, ublas::unbounded_array<double> >& C,
00042 const ublas::matrix<double, ublas::column_major, ublas::unbounded_array<double> >& A,
00043 const ublas::matrix<double, ublas::row_major, ublas::unbounded_array<double> >& B) {
00044
00045
00046 int m = C.size1();
00047 int n = C.size2();
00048 int k = A.size2();
00049 BOOST_UBLAS_CHECK (C.size1() == A.size1(), ublas::bad_size ());
00050 BOOST_UBLAS_CHECK (A.size2() == B.size1(), ublas::bad_size ());
00051 BOOST_UBLAS_CHECK (B.size2() == C.size2(), ublas::bad_size ());
00052 cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans,
00053 m, n, k, 1.0, &A.data()[0], A.size1(), &B.data()[0], B.size2(), 1.0, &C.data()[0], C.size2());
00054 }
00055
00056 BOOST_UBLAS_INLINE
00057 void
00058 cblas_prod (ublas::matrix<double, ublas::row_major, ublas::unbounded_array<double> >& C,
00059 const ublas::matrix<double, ublas::row_major, ublas::unbounded_array<double> >& A,
00060 const ublas::matrix<double, ublas::column_major, ublas::unbounded_array<double> >& B) {
00061
00062
00063 int m = C.size1();
00064 int n = C.size2();
00065 int k = A.size2();
00066 BOOST_UBLAS_CHECK (C.size1() == A.size1(), ublas::bad_size ());
00067 BOOST_UBLAS_CHECK (A.size2() == B.size1(), ublas::bad_size ());
00068 BOOST_UBLAS_CHECK (B.size2() == C.size2(), ublas::bad_size ());
00069 cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans,
00070 m, n, k, 1.0, &A.data()[0], A.size2(), &B.data()[0], B.size1(), 1.0, &C.data()[0], C.size2());
00071 }
00072
00073 BOOST_UBLAS_INLINE
00074 void
00075 cblas_prod (ublas::matrix<double, ublas::row_major, ublas::unbounded_array<double> >& C,
00076 const ublas::matrix<double, ublas::column_major, ublas::unbounded_array<double> >& A,
00077 const ublas::matrix<double, ublas::column_major, ublas::unbounded_array<double> >& B) {
00078
00079
00080 int m = C.size1();
00081 int n = C.size2();
00082 int k = A.size2();
00083 BOOST_UBLAS_CHECK (C.size1() == A.size1(), ublas::bad_size ());
00084 BOOST_UBLAS_CHECK (A.size2() == B.size1(), ublas::bad_size ());
00085 BOOST_UBLAS_CHECK (B.size2() == C.size2(), ublas::bad_size ());
00086 cblas_dgemm(CblasRowMajor, CblasTrans, CblasTrans,
00087 m, n, k, 1.0, &A.data()[0], A.size1(), &B.data()[0], B.size1(), 1.0, &C.data()[0], C.size2());
00088 }
00089
00090 BOOST_UBLAS_INLINE
00091 void
00092 cblas_prod (ublas::matrix<double, ublas::column_major, ublas::unbounded_array<double> >& C,
00093 const ublas::matrix<double, ublas::row_major, ublas::unbounded_array<double> >& A,
00094 const ublas::matrix<double, ublas::row_major, ublas::unbounded_array<double> >& B) {
00095
00096
00097 int m = C.size1();
00098 int n = C.size2();
00099 int k = A.size2();
00100 BOOST_UBLAS_CHECK (C.size1() == A.size1(), ublas::bad_size ());
00101 BOOST_UBLAS_CHECK (A.size2() == B.size1(), ublas::bad_size ());
00102 BOOST_UBLAS_CHECK (B.size2() == C.size2(), ublas::bad_size ());
00103 cblas_dgemm(CblasColMajor, CblasTrans, CblasTrans,
00104 m, n, k, 1.0, &A.data()[0], A.size2(), &B.data()[0], B.size2(), 1.0, &C.data()[0], C.size1());
00105 }
00106
00107 BOOST_UBLAS_INLINE
00108 void
00109 cblas_prod (ublas::matrix<double, ublas::column_major, ublas::unbounded_array<double> >& C,
00110 const ublas::matrix<double, ublas::column_major, ublas::unbounded_array<double> >& A,
00111 const ublas::matrix<double, ublas::row_major, ublas::unbounded_array<double> >& B) {
00112
00113
00114 int m = C.size1();
00115 int n = C.size2();
00116 int k = A.size2();
00117 BOOST_UBLAS_CHECK (C.size1() == A.size1(), ublas::bad_size ());
00118 BOOST_UBLAS_CHECK (A.size2() == B.size1(), ublas::bad_size ());
00119 BOOST_UBLAS_CHECK (B.size2() == C.size2(), ublas::bad_size ());
00120 cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans,
00121 m, n, k, 1.0, &A.data()[0], A.size1(), &B.data()[0], B.size2(), 1.0, &C.data()[0], C.size1());
00122 }
00123
00124 BOOST_UBLAS_INLINE
00125 void
00126 cblas_prod (ublas::matrix<double, ublas::column_major, ublas::unbounded_array<double> >& C,
00127 const ublas::matrix<double, ublas::row_major, ublas::unbounded_array<double> >& A,
00128 const ublas::matrix<double, ublas::column_major, ublas::unbounded_array<double> >& B) {
00129
00130
00131 int m = C.size1();
00132 int n = C.size2();
00133 int k = A.size2();
00134 BOOST_UBLAS_CHECK (C.size1() == A.size1(), ublas::bad_size ());
00135 BOOST_UBLAS_CHECK (A.size2() == B.size1(), ublas::bad_size ());
00136 BOOST_UBLAS_CHECK (B.size2() == C.size2(), ublas::bad_size ());
00137 cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans,
00138 m, n, k, 1.0, &A.data()[0], A.size2(), &B.data()[0], B.size1(), 1.0, &C.data()[0], C.size1());
00139 }
00140
00141 BOOST_UBLAS_INLINE
00142 void
00143 cblas_prod (ublas::matrix<double, ublas::column_major, ublas::unbounded_array<double> >& C,
00144 const ublas::matrix<double, ublas::column_major, ublas::unbounded_array<double> >& A,
00145 const ublas::matrix<double, ublas::column_major, ublas::unbounded_array<double> >& B) {
00146
00147
00148 int m = C.size1();
00149 int n = C.size2();
00150 int k = A.size2();
00151 BOOST_UBLAS_CHECK (C.size1() == A.size1(), ublas::bad_size ());
00152 BOOST_UBLAS_CHECK (A.size2() == B.size1(), ublas::bad_size ());
00153 BOOST_UBLAS_CHECK (B.size2() == C.size2(), ublas::bad_size ());
00154 cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans,
00155 m, n, k, 1.0, &A.data()[0], A.size1(), &B.data()[0], B.size1(), 1.0, &C.data()[0], C.size1());
00156 }
00157
00158 void zeromatrix(ublas::matrix<double,ublas::row_major>& A) {
00159 for(size_t i = 0; i<A.size1(); i++)
00160 for(size_t j = 0; j<A.size2(); j++)
00161 A(i,j) = 0.0;
00162 }
00163 void zeromatrix(ublas::matrix<double,ublas::column_major>& A) {
00164 for(size_t i = 0; i<A.size1(); i++)
00165 for(size_t j = 0; j<A.size2(); j++)
00166 A(i,j) = 0.0;
00167 }
00168 void initmatrix(ublas::matrix<double,ublas::row_major>& A) {
00169 for(size_t i = 0; i<A.size1(); i++)
00170 for(size_t j = 0; j<A.size2(); j++)
00171 A(i,j) = 1.0*rand();
00172 }
00173 void initmatrix(ublas::matrix<double,ublas::column_major>& A) {
00174 for(size_t i = 0; i<A.size1(); i++)
00175 for(size_t j = 0; j<A.size2(); j++)
00176 A(i,j) = 1.0*rand();
00177 }
00178
00179 #ifdef NOCHECK
00180 template <class M, class E>
00181 int equals(const M& lhs, const E& rhs)
00182 {
00183 return 2;
00184 }
00185 #endif
00186
00187 typedef ublas::matrix<double,ublas::row_major> RT;
00188 typedef ublas::matrix<double,ublas::column_major> CT;
00189
00190 #define mul(a,b,d) \
00191 { \
00192 zeromatrix(X); \
00193 initmatrix(A); \
00194 initmatrix(B); \
00195 t.restart(); \
00196 for (size_t a=0;a<size;a++) \
00197 for (size_t b=0;b<size;b++) \
00198 for (size_t d=0;d<size;d++) \
00199 X(r,c) += A(r,k)*B(k,c); \
00200 time = t.elapsed(); \
00201 cout << setw(8) << time << flush; \
00202 cerr << equals(X,prod(A,B))<< endl; \
00203 }
00204
00205 #define mul2(xtype) \
00206 { \
00207 zeromatrix(X); \
00208 initmatrix(A); \
00209 initmatrix(B); \
00210 t.restart(); \
00211 X.plus_assign(prod(A,B)); \
00212 time = t.elapsed(); \
00213 cout << setw(8) << time << flush; \
00214 cerr << equals(X,prod(A,B)) << endl; \
00215 }
00216
00217 #define mul3(xtype) \
00218 { \
00219 zeromatrix(X); \
00220 initmatrix(A); \
00221 initmatrix(B); \
00222 t.restart(); \
00223 ublas::axpy_prod(A,B,X,false); \
00224 time = t.elapsed(); \
00225 cout << setw(8) << time << flush; \
00226 cerr << equals(X,prod(A,B)) << endl; \
00227 }
00228
00229 #define mul4(xtype) \
00230 { \
00231 zeromatrix(X); \
00232 initmatrix(A); \
00233 initmatrix(B); \
00234 t.restart(); \
00235 ublas::opb_prod(A,B,X,false); \
00236 time = t.elapsed(); \
00237 cout << setw(8) << time << flush; \
00238 cerr << equals(X,prod(A,B)) << endl; \
00239 }
00240
00241 #define mul5(xtype) \
00242 { \
00243 zeromatrix(X); \
00244 initmatrix(A); \
00245 initmatrix(B); \
00246 t.restart(); \
00247 X.plus_assign(ublas::block_prod<xtype>(A,B,64)); \
00248 time = t.elapsed(); \
00249 cout << setw(8) << time << flush; \
00250 cerr << equals(X,prod(A,B)) << endl; \
00251 }
00252
00253 #define mul6 \
00254 { \
00255 zeromatrix(X); \
00256 initmatrix(A); \
00257 initmatrix(B); \
00258 t.restart(); \
00259 cblas_prod(X, A, B); \
00260 time = t.elapsed(); \
00261 \
00262 cout << setw(8) << time << flush; \
00263 cerr << equals(X,prod(A,B)) << endl; \
00264 }
00265
00266 #define mulset(xtype,atype,btype) \
00267 { \
00268 xtype X(size1,size3); \
00269 atype A(size1,size2); \
00270 btype B(size2,size3); \
00271 mul2(xtype); \
00272 mul3(xtype); \
00273 mul4(xtype); \
00274 mul5(xtype); \
00275 mul6 ; \
00276 cout << endl; \
00277 }
00278
00279
00280
00281
00282
00283
00284
00285
00286
00287
00288 void test(const size_t size1, const size_t size2, const size_t size3) {
00289 boost::timer t;
00290 double time;
00291 cout << "size of metrices - " << size1 << "x" << size2 << "*" << size2 << "x" << size3 << endl << endl;
00292 cout << " prod axpy opb block atlas rkc rck krc kcr crk ckr" <<
00293 endl;
00294 cout << "RRR";mulset(RT,RT,RT);
00295 cout << "RRC";mulset(RT,RT,CT);
00296 cout << "RCR";mulset(RT,CT,RT);
00297 cout << "RCC";mulset(RT,CT,CT);
00298
00299 cout << "CRR";mulset(CT,RT,RT);
00300 cout << "CRC";mulset(CT,RT,CT);
00301 cout << "CCR";mulset(CT,CT,RT);
00302 cout << "CCC";mulset(CT,CT,CT);
00303
00304 }
00305
00306 int main() {
00307 test(500,499,501);
00308 }