rzai@rzai00:~/prj/HMN4QA$ CUDA_VISIBLE_DEVICES=1 python main_run.py
Using gpu device 0: GeForce GTX 1080 (CNMeM is disabled)
Go to model
Start to prepare data set
Test sample 0 : stories [22016 22015 22014 22013 22012 22011 22010 22009 22008 22007 22006 22005
22004 22003 22002 22001]
Test sample 0 : queries 22017
Test sample 0 : answers 滕俊泽
Test sample 1 : stories [22016 22015 22014 22013 22012 22011 22010 22009 22008 22007 22006 22005
22004 22003 22002 22001]
Test sample 1 : queries 22018
Test sample 1 : answers IST
Test sample 2 : stories [22016 22015 22014 22013 22012 22011 22010 22009 22008 22007 22006 22005
22004 22003 22002 22001]
Test sample 2 : queries 22019
Test sample 2 : answers 南京
batch_size: 32 max_seq_story: 16 max_sent_enc: 17
all_text_idx: (44001, 17)
vocab size: 8540
train stories (5400, 16) queries (5400,) answers (5400,)
dev stories (600, 16) queries (600,) answers (600,)
test stories (6000, 16) queries (6000,) answers (6000,)
/usr/local/lib/python2.7/dist-packages/theano/scan_module/scan.py:1019: Warning: In the strict mode, all neccessary shared variables must be passed as a part of non_sequences
'must be passed as a part of non_sequences', Warning)
params_sent_mem: [B_embedding.W, A_time0.T, C_embedding0.W, C_time0.T, C_embedding1.W, C_time1.T, C_embedding2.W, C_time2.T]
params_word_mem: [B_embedding.W, A_time0.T, C_embedding0.W, C_time0.T, C_embedding1.W, C_time1.T, C_embedding2.W, W_in_to_updategate, W_hid_to_updategate, b_updategate, W_in_to_resetgate, W_hid_to_resetgate, b_resetgate, W_in_to_hidden_update, W_hid_to_hidden_update, b_hidden_update, hid_init, W_in_to_updategate, W_hid_to_updategate, b_updategate, W_in_to_resetgate, W_hid_to_resetgate, b_resetgate, W_in_to_hidden_update, W_hid_to_hidden_update, b_hidden_update, hid_init, word_mem_W_align, word_mem_U_align, word_mem_v_align]
Start to compile, the process may cost tens of minutes ...
1 #include <Python.h>
2 #include
3 #include "theano_mod_helper.h"
4 #include "cuda_ndarray.cuh"
5 #include "cudnn.h"
6 #include "cudnn_helper.h"
7 //////////////////////
8 //// Support Code
9 //////////////////////
10
11
12 static cudnnHandle_t _handle = NULL;
13
14
15 static int
16 c_set_tensorNd(CudaNdarray var, cudnnTensorDescriptor_t desc) {
17
18
19 int dim = CudaNdarray_NDIM(var);
20 int strides[dim];
21 int default_str = 1;
22
23 for (int i = dim-1; i >= 0; i--)
24 {
25 if (CudaNdarray_HOST_STRIDES(var)[i])
26 strides[i] = CudaNdarray_HOST_STRIDES(var)[i];
27 else
28 strides[i] = default_str;
29 default_str = CudaNdarray_HOST_DIMS(var)[i];
30 }
31
32 cudnnStatus_t err = cudnnSetTensorNdDescriptor(desc, CUDNN_DATA_FLOAT, dim,
33 CudaNdarray_HOST_DIMS(var),
34 strides);
35 if (err != CUDNN_STATUS_SUCCESS) {
36 PyErr_Format(PyExc_RuntimeError,
37 "Could not set tensorNd descriptor: %s"
38 "dim=%d",
39 cudnnGetErrorString(err), dim);
40 return -1;
41 }
42 return 0;
43 }
44
45
46 static int
47 c_set_filterNd(CudaNdarray var, cudnnFilterDescriptor_t desc) {
48 if (!CudaNdarray_is_c_contiguous(var)) {
49 PyErr_SetString(PyExc_ValueError,
50 "Only contiguous filters (kernels) are supported.");
51 return -1;
52 }
53 int dim = CudaNdarray_NDIM(var);
54 cudnnStatus_t err = cudnnSetFilterNdDescriptor(desc, CUDNN_DATA_FLOAT, dim,
55 CudaNdarray_HOST_DIMS(var));
56 if (err != CUDNN_STATUS_SUCCESS) {
57 PyErr_Format(PyExc_RuntimeError,
58 "Could not set filter descriptor: %s."
59 " dims= %d",
60 cudnnGetErrorString(err), dim);
61 return -1;
62 }
63 return 0;
64 }
65
66
67
68 namespace {
69 struct __struct_compiled_op_e4b7b2bc7fd903d81ee9c46ba97aa65e {
70 PyObject __ERROR;
71
72 PyObject storage_V3;
73 PyObject storage_V1;
74
75 cudnnTensorDescriptor_t softmax_input_node_e4b7b2bc7fd903d81ee9c46ba97aa65e_0;
76
77 cudnnTensorDescriptor_t softmax_output_node_e4b7b2bc7fd903d81ee9c46ba97aa65e_0;
78
79
80 __struct_compiled_op_e4b7b2bc7fd903d81ee9c46ba97aa65e() {}
81 ~__struct_compiled_op_e4b7b2bc7fd903d81ee9c46ba97aa65e(void) {
82 cleanup();
83 }
84
85 int init(PyObject* __ERROR, PyObject* storage_V3, PyObject* storage_V1) {
86 Py_XINCREF(storage_V3);
87 Py_XINCREF(storage_V1);
88 this->storage_V3 = storage_V3;
89 this->storage_V1 = storage_V1;
90
91
92
93
94 cudnnStatus_t errnode_e4b7b2bc7fd903d81ee9c46ba97aa65e_0;
95
96 softmax_input_node_e4b7b2bc7fd903d81ee9c46ba97aa65e_0 = NULL;
97 if ((errnode_e4b7b2bc7fd903d81ee9c46ba97aa65e_0 = cudnnCreateTensorDescriptor(&softmax_input_node_e4b7b2bc7fd903d81ee9c46ba97aa65e_0)) != CUDNN_STATUS_SUCCESS) {
98 PyErr_Format(PyExc_MemoryError, "could not allocate tensor descriptor "
99 ": %s", cudnnGetErrorString(errnode_e4b7b2bc7fd903d81ee9c46ba97aa65e_0));
100 {
101 if (!PyErr_Occurred()) {
102 PyErr_SetString(PyExc_RuntimeError,
103 "Unexpected error in an Op's C code. "
104 "No Python exception was set.");
105 }
106 return 5;
107 }
108 }
109
110 softmax_output_node_e4b7b2bc7fd903d81ee9c46ba97aa65e_0 = NULL;
111 if ((errnode_e4b7b2bc7fd903d81ee9c46ba97aa65e_0 = cudnnCreateTensorDescriptor(&softmax_output_node_e4b7b2bc7fd903d81ee9c46ba97aa65e_0)) != CUDNN_STATUS_SUCCESS) {
112 PyErr_Format(PyExc_MemoryError, "could not allocate tensor descriptor "
113 ": %s", cudnnGetErrorString(errnode_e4b7b2bc7fd903d81ee9c46ba97aa65e_0));
114 {
115 if (!PyErr_Occurred()) {
116 PyErr_SetString(PyExc_RuntimeError,
117 "Unexpected error in an Op's C code. "
118 "No Python exception was set.");
119 }
120 return 5;
121 }
122 }
123
124 this->__ERROR = __ERROR;
125 return 0;
126 }
127 void cleanup(void) {
128 __label_1:
129
130 double __DUMMY_1;
131 __label_3:
132
133 double __DUMMY_3;
134 __label_6:
135
136 if(softmax_input_node_e4b7b2bc7fd903d81ee9c46ba97aa65e_0!= NULL)
137 cudnnDestroyTensorDescriptor(softmax_input_node_e4b7b2bc7fd903d81ee9c46ba97aa65e_0);
138
139 if(softmax_output_node_e4b7b2bc7fd903d81ee9c46ba97aa65e_0!= NULL)
140 cudnnDestroyTensorDescriptor(softmax_output_node_e4b7b2bc7fd903d81ee9c46ba97aa65e_0);
141
142 double __DUMMY_6;
143
144 Py_XDECREF(this->storage_V3);
145 Py_XDECREF(this->storage_V1);
146 }
147 int run(void) {
148 int __failure = 0;
149
150 PyObject* py_V1;
151 CudaNdarray * V1;
152 PyObject* py_V3;
153 CudaNdarray * V3;
154 {
155
156 py_V1 = PyList_GET_ITEM(storage_V1, 0);
157 {Py_XINCREF(py_V1);}
158
159 if (py_V1 == Py_None)
160 {
161 V1 = NULL;
162 }
163 else
164 {
165
166 assert(py_V1->ob_refcnt >= 2); // There should be at least one ref from the container object,
167 // and one ref from the local scope.
168
169 if (CudaNdarray_Check(py_V1))
170 {
171 //fprintf(stderr, "c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt));
172 V1 = (CudaNdarray*)py_V1;
173 //std::cerr << "c_extract " << V1 << '\n';
174
175
176 if (V1->nd != 4)
177 {
178 PyErr_Format(PyExc_RuntimeError,
179 "c_extract: Some CudaNdarray has rank %i, it was supposed to have rank 4",
180 V1->nd);
181 V1 = NULL;
182 {
183 __failure = 2;
184 if (!PyErr_Occurred()) {
185 PyErr_SetString(PyExc_RuntimeError,
186 "Unexpected error in an Op's C code. "
187 "No Python exception was set.");
188 }
189 goto __label_2;};
190 }
191 //std::cerr << "c_extract " << V1 << " nd check passed\n";
192
193
194 assert(V1);
195 Py_INCREF(py_V1);
196 }
197 else if (py_V1 == Py_None)
198 {
199 PyErr_SetString(PyExc_TypeError,
200 "expected a CudaNdarray, not None");
201 V1 = NULL;
202 {
203 __failure = 2;
204 if (!PyErr_Occurred()) {
205 PyErr_SetString(PyExc_RuntimeError,
206 "Unexpected error in an Op's C code. "
207 "No Python exception was set.");
208 }
209 goto __label_2;};
210 }
211 else
212 {
213 //fprintf(stderr, "FAILING c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt));
214 PyErr_SetString(PyExc_TypeError, "Argument not a CudaNdarray");
215 V1 = NULL;
216 {
217 __failure = 2;
218 if (!PyErr_Occurred()) {
219 PyErr_SetString(PyExc_RuntimeError,
220 "Unexpected error in an Op's C code. "
221 "No Python exception was set.");
222 }
223 goto __label_2;};
224 }
225 //std::cerr << "c_extract done " << V1 << '\n';
226
227
228 }
229
230 {
231
232 py_V3 = PyList_GET_ITEM(storage_V3, 0);
233 {Py_XINCREF(py_V3);}
234
235 assert(py_V3->ob_refcnt >= 2); // There should be at least one ref from the container object,
236 // and one ref from the local scope.
237
238 if (CudaNdarray_Check(py_V3))
239 {
240 //fprintf(stderr, "c_extract CNDA object w refcnt %p %i\n", py_V3, (py_V3->ob_refcnt));
241 V3 = (CudaNdarray*)py_V3;
242 //std::cerr << "c_extract " << V3 << '\n';
243
244
245 if (V3->nd != 4)
246 {
247 PyErr_Format(PyExc_RuntimeError,
248 "c_extract: Some CudaNdarray has rank %i, it was supposed to have rank 4",
249 V3->nd);
250 V3 = NULL;
251 {
252 __failure = 4;
253 if (!PyErr_Occurred()) {
254 PyErr_SetString(PyExc_RuntimeError,
255 "Unexpected error in an Op's C code. "
256 "No Python exception was set.");
257 }
258 goto __label_4;};
259 }
260 //std::cerr << "c_extract " << V3 << " nd check passed\n";
261
262
263 assert(V3);
264 Py_INCREF(py_V3);
265 }
266 else if (py_V3 == Py_None)
267 {
268 PyErr_SetString(PyExc_TypeError,
269 "expected a CudaNdarray, not None");
270 V3 = NULL;
271 {
272 __failure = 4;
273 if (!PyErr_Occurred()) {
274 PyErr_SetString(PyExc_RuntimeError,
275 "Unexpected error in an Op's C code. "
276 "No Python exception was set.");
277 }
278 goto __label_4;};
279 }
280 else
281 {
282 //fprintf(stderr, "FAILING c_extract CNDA object w refcnt %p %i\n", py_V3, (py_V3->ob_refcnt));
283 PyErr_SetString(PyExc_TypeError, "Argument not a CudaNdarray");
284 V3 = NULL;
285 {
286 __failure = 4;
287 if (!PyErr_Occurred()) {
288 PyErr_SetString(PyExc_RuntimeError,
289 "Unexpected error in an Op's C code. "
290 "No Python exception was set.");
291 }
292 goto __label_4;};
293 }
294 //std::cerr << "c_extract done " << V3 << '\n';
295
296
297 {
298 // Op class GpuDnnSoftmax
299
300 cudnnStatus_t errnode_e4b7b2bc7fd903d81ee9c46ba97aa65e_0;
301 cudnnTensorFormat_t formatnode_e4b7b2bc7fd903d81ee9c46ba97aa65e_0 = CUDNN_TENSOR_NCHW;
302 if (0 == 1)
303 formatnode_e4b7b2bc7fd903d81ee9c46ba97aa65e_0 = CUDNN_TENSOR_NHWC;
304
305 cudnnSoftmaxAlgorithm_t algonode_e4b7b2bc7fd903d81ee9c46ba97aa65e_0 = CUDNN_SOFTMAX_ACCURATE;
306
307 cudnnSoftmaxMode_t modenode_e4b7b2bc7fd903d81ee9c46ba97aa65e_0 = CUDNN_SOFTMAX_MODE_CHANNEL;
308 if (0 == 1)
309 modenode_e4b7b2bc7fd903d81ee9c46ba97aa65e_0 = CUDNN_SOFTMAX_MODE_INSTANCE;
310
311 {
312 int str0, str1, str2, str3;
313 str3 = CudaNdarray_HOST_STRIDES(V3)[3]?CudaNdarray_HOST_STRIDES(V3)[3]:1;
314 str2 = CudaNdarray_HOST_STRIDES(V3)[2]?CudaNdarray_HOST_STRIDES(V3)[2]:CudaNdarray_HOST_DIMS(V3)[3];
315 str1 = CudaNdarray_HOST_STRIDES(V3)[1]?CudaNdarray_HOST_STRIDES(V3)[1]:CudaNdarray_HOST_DIMS(V3)[2]CudaNdarray_HOST_DIMS(V3)[3];
316 str0 = CudaNdarray_HOST_STRIDES(V3)[0]?CudaNdarray_HOST_STRIDES(V3)[0]:CudaNdarray_HOST_DIMS(V3)[2]CudaNdarray_HOST_DIMS(V3)[3]CudaNdarray_HOST_DIMS(V3)[1];
317 errnode_e4b7b2bc7fd903d81ee9c46ba97aa65e_0 = cudnnSetTensor4dDescriptorEx(
318 softmax_input_node_e4b7b2bc7fd903d81ee9c46ba97aa65e_0, CUDNN_DATA_FLOAT,
319 CudaNdarray_HOST_DIMS(V3)[0],
320 CudaNdarray_HOST_DIMS(V3)[1],
321 CudaNdarray_HOST_DIMS(V3)[2],
322 CudaNdarray_HOST_DIMS(V3)[3],
323 str0, str1, str2, str3
324 );
325 if (errnode_e4b7b2bc7fd903d81ee9c46ba97aa65e_0 != CUDNN_STATUS_SUCCESS) {
326 PyErr_Format(PyExc_RuntimeError,
327 "could not set tensor4d descriptor: %s"
328 "shapes=%d %d %d %d strides=%d %d %d %d",
329 cudnnGetErrorString(errnode_e4b7b2bc7fd903d81ee9c46ba97aa65e_0),
330 CudaNdarray_HOST_DIMS(V3)[0],
331 CudaNdarray_HOST_DIMS(V3)[1],
332 CudaNdarray_HOST_DIMS(V3)[2],
333 CudaNdarray_HOST_DIMS(V3)[3],
334 str0, str1, str2, str3
335 );
336 {
337 __failure = 5;
338 if (!PyErr_Occurred()) {
339 PyErr_SetString(PyExc_RuntimeError,
340 "Unexpected error in an Op's C code. "
341 "No Python exception was set.");
342 }
343 goto __label_5;}
344 }
345 }
346
347
348 if (CudaNdarray_prep_output(&V1, 4, CudaNdarray_HOST_DIMS(V3)) != 0)
349 {
350 {
351 __failure = 5;
352 if (!PyErr_Occurred()) {
353 PyErr_SetString(PyExc_RuntimeError,
354 "Unexpected error in an Op's C code. "
355 "No Python exception was set.");
356 }
357 goto __label_5;}
358 }
359
360 {
361 int str0, str1, str2, str3;
362 str3 = CudaNdarray_HOST_STRIDES(V1)[3]?CudaNdarray_HOST_STRIDES(V1)[3]:1;
363 str2 = CudaNdarray_HOST_STRIDES(V1)[2]?CudaNdarray_HOST_STRIDES(V1)[2]:CudaNdarray_HOST_DIMS(V1)[3];
364 str1 = CudaNdarray_HOST_STRIDES(V1)[1]?CudaNdarray_HOST_STRIDES(V1)[1]:CudaNdarray_HOST_DIMS(V1)[2]CudaNdarray_HOST_DIMS(V1)[3];
365 str0 = CudaNdarray_HOST_STRIDES(V1)[0]?CudaNdarray_HOST_STRIDES(V1)[0]:CudaNdarray_HOST_DIMS(V1)[2]CudaNdarray_HOST_DIMS(V1)[3]CudaNdarray_HOST_DIMS(V1)[1];
366 errnode_e4b7b2bc7fd903d81ee9c46ba97aa65e_0 = cudnnSetTensor4dDescriptorEx(
367 softmax_output_node_e4b7b2bc7fd903d81ee9c46ba97aa65e_0, CUDNN_DATA_FLOAT,
368 CudaNdarray_HOST_DIMS(V1)[0],
369 CudaNdarray_HOST_DIMS(V1)[1],
370 CudaNdarray_HOST_DIMS(V1)[2],
371 CudaNdarray_HOST_DIMS(V1)[3],
372 str0, str1, str2, str3
373 );
374 if (errnode_e4b7b2bc7fd903d81ee9c46ba97aa65e_0 != CUDNN_STATUS_SUCCESS) {
375 PyErr_Format(PyExc_RuntimeError,
376 "could not set tensor4d descriptor: %s"
377 "shapes=%d %d %d %d strides=%d %d %d %d",
378 cudnnGetErrorString(errnode_e4b7b2bc7fd903d81ee9c46ba97aa65e_0),
379 CudaNdarray_HOST_DIMS(V1)[0],
380 CudaNdarray_HOST_DIMS(V1)[1],
381 CudaNdarray_HOST_DIMS(V1)[2],
382 CudaNdarray_HOST_DIMS(V1)[3],
383 str0, str1, str2, str3
384 );
385 {
386 __failure = 5;
387 if (!PyErr_Occurred()) {
388 PyErr_SetString(PyExc_RuntimeError,
389 "Unexpected error in an Op's C code. "
390 "No Python exception was set.");
391 }
392 goto __label_5;}
393 }
394 }
395
396
397 #ifndef CUDNN_VERSION
398 errnode_e4b7b2bc7fd903d81ee9c46ba97aa65e_0 = cudnnSoftmaxForward(
399 _handle,
400 algonode_e4b7b2bc7fd903d81ee9c46ba97aa65e_0,
401 modenode_e4b7b2bc7fd903d81ee9c46ba97aa65e_0,
402 softmax_input_node_e4b7b2bc7fd903d81ee9c46ba97aa65e_0,
403 CudaNdarray_DEV_DATA(V3),
404 softmax_output_node_e4b7b2bc7fd903d81ee9c46ba97aa65e_0,
405 CudaNdarray_DEV_DATA(V1)
406 );
407 #else
408 {
409 const float alpha = 1.;
410 const float beta = 0.;
411 errnode_e4b7b2bc7fd903d81ee9c46ba97aa65e_0 = cudnnSoftmaxForward(
412 _handle,
413 algonode_e4b7b2bc7fd903d81ee9c46ba97aa65e_0,
414 modenode_e4b7b2bc7fd903d81ee9c46ba97aa65e_0,
415 (void) &alpha,
416 softmax_input_node_e4b7b2bc7fd903d81ee9c46ba97aa65e_0,
417 CudaNdarray_DEV_DATA(V3),
418 (void) &beta,
419 softmax_output_node_e4b7b2bc7fd903d81ee9c46ba97aa65e_0,
420 CudaNdarray_DEV_DATA(V1)
421 );
422 }
423 #endif
424 __label_5:
425
426 double __DUMMY_5;
427
428 }
429 __label_4:
430
431 //std::cerr << "cleanup " << py_V3 << " " << V3 << "\n";
432 //fprintf(stderr, "c_cleanup CNDA py_object w refcnt %p %i\n", py_V3, (py_V3->ob_refcnt));
433 if (V3)
434 {
435 //fprintf(stderr, "c_cleanup CNDA cn_object w refcnt %p %i\n", V3, (V3->ob_refcnt));
436 Py_XDECREF(V3);
437 }
438 //std::cerr << "cleanup done" << py_V3 << "\n";
439
440 {Py_XDECREF(py_V3);}
441
442 double __DUMMY_4;
443
444 }
445 __label_2:
446
447 if (!__failure) {
448
449 //std::cerr << "sync\n";
450 if (NULL == V1) {
451 // failure: sync None to storage
452 Py_XDECREF(py_V1);
453 py_V1 = Py_None;
454 Py_INCREF(py_V1);
455 }
456 else
457 {
458 if (py_V1 != (PyObject)V1)
459 {
460 Py_XDECREF(py_V1);
461 py_V1 = (PyObject)V1;
462 Py_INCREF(py_V1);
463 }
464 assert(py_V1->ob_refcnt);
465 }
466
467 PyObject old = PyList_GET_ITEM(storage_V1, 0);
468 {Py_XINCREF(py_V1);}
469 PyList_SET_ITEM(storage_V1, 0, py_V1);
470 {Py_XDECREF(old);}
471 }
472
473 //std::cerr << "cleanup " << py_V1 << " " << V1 << "\n";
474 //fprintf(stderr, "c_cleanup CNDA py_object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt));
475 if (V1)
476 {
477 //fprintf(stderr, "c_cleanup CNDA cn_object w refcnt %p %i\n", V1, (V1->ob_refcnt));
478 Py_XDECREF(V1);
479 }
480 //std::cerr << "cleanup done" << py_V1 << "\n";
481
482 {Py_XDECREF(py_V1);}
483
484 double __DUMMY_2;
485
486 }
487
488
489 if (__failure) {
490 // When there is a failure, this code puts the exception
491 // in __ERROR.
492 PyObject err_type = NULL;
493 PyObject* err_msg = NULL;
494 PyObject* err_traceback = NULL;
495 PyErr_Fetch(&err_type, &err_msg, &err_traceback);
496 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);}
497 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);}
498 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);}
499 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0);
500 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1);
501 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2);
502 PyList_SET_ITEM(__ERROR, 0, err_type);
503 PyList_SET_ITEM(__ERROR, 1, err_msg);
504 PyList_SET_ITEM(__ERROR, 2, err_traceback);
505 {Py_XDECREF(old_err_type);}
506 {Py_XDECREF(old_err_msg);}
507 {Py_XDECREF(old_err_traceback);}
508 }
509 // The failure code is returned to index what code block failed.
510 return __failure;
511
512 }
513 };
514 }
515
516
517 static int __struct_compiled_op_e4b7b2bc7fd903d81ee9c46ba97aa65e_executor(__struct_compiled_op_e4b7b2bc7fd903d81ee9c46ba97aa65e* self) {
518 return self->run();
519 }
520
521 static void __struct_compiled_op_e4b7b2bc7fd903d81ee9c46ba97aa65e_destructor(void* executor, void* self) {
522 delete ((__struct_compiled_op_e4b7b2bc7fd903d81ee9c46ba97aa65e*)self);
523 }
524
525 //////////////////////
526 //// Functions
527 //////////////////////
528 static PyObject * instantiate(PyObject * self, PyObject argtuple) {
529 assert(PyTuple_Check(argtuple));
530 if (3 != PyTuple_Size(argtuple)){
531 PyErr_Format(PyExc_TypeError, "Wrong number of arguments, expected 3, got %i", (int)PyTuple_Size(argtuple));
532 return NULL;
533 }
534 __struct_compiled_op_e4b7b2bc7fd903d81ee9c46ba97aa65e struct_ptr = new __struct_compiled_op_e4b7b2bc7fd903d81ee9c46ba97aa65e();
535 if (struct_ptr->init( PyTuple_GET_ITEM(argtuple, 0),PyTuple_GET_ITEM(argtuple, 1),PyTuple_GET_ITEM(argtuple, 2) ) != 0) {
536 delete struct_ptr;
537 return NULL;
538 }
539 PyObject* thunk = PyCObject_FromVoidPtrAndDesc((void*)(&__struct_compiled_op_e4b7b2bc7fd903d81ee9c46ba97aa65e_executor), struct_ptr, __struct_compiled_op_e4b7b2bc7fd903d81ee9c46ba97aa65e_destructor);
540 return thunk; }
541
542 //////////////////////
543 //// Module init
544 //////////////////////
545 static PyMethodDef MyMethods[] = {
546 {"instantiate", instantiate, METH_VARARGS, "undocumented"} ,
547 {NULL, NULL, 0, NULL}
548 };
549 PyMODINIT_FUNC inite4b7b2bc7fd903d81ee9c46ba97aa65e(void){
550
551
552 {
553 cudnnStatus_t err;
554 if ((err = cudnnCreate(&_handle)) != CUDNN_STATUS_SUCCESS) {
555 PyErr_Format(PyExc_RuntimeError, "could not create cuDNN handle: %s",
556 cudnnGetErrorString(err));
557 #if PY_MAJOR_VERSION >= 3
558 return NULL;
559 #else
560 return;
561 #endif
562 }
563 }
564
565 (void) Py_InitModule("e4b7b2bc7fd903d81ee9c46ba97aa65e", MyMethods);
566 }
567
mod.cu(54): error: argument of type "int" is incompatible with parameter of type "cudnnTensorFormat_t"
mod.cu(55): error: argument of type "const int *" is incompatible with parameter of type "int"
mod.cu(55): error: too few arguments in function call
mod.cu(16): warning: function "c_set_tensorNd" was declared but never referenced
mod.cu(47): warning: function "c_set_filterNd" was declared but never referenced
3 errors detected in the compilation of "/tmp/tmpxft_00004094_00000000-9_mod.cpp1.ii".
['nvcc', '-shared', '-O3', '-use_fast_math', '-arch=sm_61', '-m64', '-Xcompiler', '-fno-math-errno,-Wno-unused-label,-Wno-unused-variable,-Wno-write-strings,-DCUDA_NDARRAY_CUH=c72d035fdf91890f3b36710688069b2e,-DNPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION,-fPIC,-fvisibility=hidden', '-Xlinker', '-rpath,/home/rzai/.theano/compiledir_Linux-4.4--generic-x86_64-with-Ubuntu-14.04-trusty-x86_64-2.7.6-64/cuda_ndarray', '-I/home/rzai/.theano/compiledir_Linux-4.4--generic-x86_64-with-Ubuntu-14.04-trusty-x86_64-2.7.6-64/cuda_ndarray', '-I/usr/local/cuda-8.0/include', '-I/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda', '-I/home/rzai/.local/lib/python2.7/site-packages/numpy/core/include', '-I/usr/include/python2.7', '-I/usr/local/lib/python2.7/dist-packages/theano/gof', '-o', '/home/rzai/.theano/compiledir_Linux-4.4--generic-x86_64-with-Ubuntu-14.04-trusty-x86_64-2.7.6-64/tmpQyHjmt/e4b7b2bc7fd903d81ee9c46ba97aa65e.so', 'mod.cu', '-L/home/rzai/.theano/compiledir_Linux-4.4--generic-x86_64-with-Ubuntu-14.04-trusty-x86_64-2.7.6-64/cuda_ndarray', '-L/usr/lib', '-lpython2.7', '-lcudart', '-lcublas', '-lcuda_ndarray', '-lcudnn']
Traceback (most recent call last):
File "main_run.py", line 31, in
main()
File "main_run.py", line 25, in main
hmn4qa_model = model.Model(_log_file)
File "/home/rzai/prj/HMN4QA/model.py", line 21, in init
self.networks = self.build_network()
File "/home/rzai/prj/HMN4QA/model.py", line 140, in build_network
updates=updates_joint)
File "/usr/local/lib/python2.7/dist-packages/theano/compile/function.py", line 316, in function
output_keys=output_keys)
File "/usr/local/lib/python2.7/dist-packages/theano/compile/pfunc.py", line 523, in pfunc
output_keys=output_keys)
File "/usr/local/lib/python2.7/dist-packages/theano/compile/function_module.py", line 1526, in orig_function
defaults)
File "/usr/local/lib/python2.7/dist-packages/theano/compile/function_module.py", line 1390, in create
input_storage=input_storage_lists)
File "/usr/local/lib/python2.7/dist-packages/theano/gof/link.py", line 607, in make_thunk
output_storage=output_storage)[:3]
File "/usr/local/lib/python2.7/dist-packages/theano/gof/vm.py", line 1025, in make_all
no_recycling))
File "/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda/init.py", line 279, in make_thunk
compute_map, no_recycling)
File "/usr/local/lib/python2.7/dist-packages/theano/gof/op.py", line 807, in make_thunk
no_recycling)
File "/usr/local/lib/python2.7/dist-packages/theano/gof/op.py", line 733, in make_c_thunk
output_storage=node_output_storage)
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 1065, in make_thunk
keep_lock=keep_lock)
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 1007, in compile
keep_lock=keep_lock)
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 1435, in cthunk_factory
key=key, lnk=self, keep_lock=keep_lock)
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cmodule.py", line 1094, in module_from_key
module = lnk.compile_cmodule(location)
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 1347, in compile_cmodule
preargs=preargs)
File "/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda/nvcc_compiler.py", line 429, in compile_str
'for cmd', ' '.join(cmd))
Exception: ('The following error happened while compiling the node', GpuDnnSoftmax{tensor_format='bc01', mode='channel', algo='accurate'}(GpuContiguous.0), '\n', 'nvcc return status', 2, 'for cmd', 'nvcc -shared -O3 -use_fast_math -arch=sm_61 -m64 -Xcompiler -fno-math-errno,-Wno-unused-label,-Wno-unused-variable,-Wno-write-strings,-DCUDA_NDARRAY_CUH=c72d035fdf91890f3b36710688069b2e,-DNPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION,-fPIC,-fvisibility=hidden -Xlinker -rpath,/home/rzai/.theano/compiledir_Linux-4.4--generic-x86_64-with-Ubuntu-14.04-trusty-x86_64-2.7.6-64/cuda_ndarray -I/home/rzai/.theano/compiledir_Linux-4.4--generic-x86_64-with-Ubuntu-14.04-trusty-x86_64-2.7.6-64/cuda_ndarray -I/usr/local/cuda-8.0/include -I/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda -I/home/rzai/.local/lib/python2.7/site-packages/numpy/core/include -I/usr/include/python2.7 -I/usr/local/lib/python2.7/dist-packages/theano/gof -o /home/rzai/.theano/compiledir_Linux-4.4--generic-x86_64-with-Ubuntu-14.04-trusty-x86_64-2.7.6-64/tmpQyHjmt/e4b7b2bc7fd903d81ee9c46ba97aa65e.so mod.cu -L/home/rzai/.theano/compiledir_Linux-4.4--generic-x86_64-with-Ubuntu-14.04-trusty-x86_64-2.7.6-64/cuda_ndarray -L/usr/lib -lpython2.7 -lcudart -lcublas -lcuda_ndarray -lcudnn', "[GpuDnnSoftmax{tensor_format='bc01', mode='channel', algo='accurate'}(<CudaNdarrayType(float32, (False, False, True, True))>)]")
rzai@rzai00:~/prj/HMN4QA$