Deep Neural Network Library (DNNL)
1.91.0
Performance library for Deep Learning
Go to the documentation of this file.
486 dnnl_NCw16n16c = dnnl_ABc16a16b,
487 dnnl_NCdhw16n16c = dnnl_ABcde16a16b,
488 dnnl_NChw16n16c = dnnl_ABcd16a16b,
489 dnnl_NChw32n32c = dnnl_ABcd32a32b,
492 dnnl_IOw16o16i = dnnl_BAc16a16b,
493 dnnl_IOw16i16o = dnnl_BAc16b16a,
494 dnnl_OIw16i16o = dnnl_ABc16b16a,
495 dnnl_OIw16o16i = dnnl_ABc16a16b,
496 dnnl_Oiw16o = dnnl_Abc16a,
497 dnnl_OIw4i16o4i = dnnl_ABc4b16a4b,
498 dnnl_OIw2i8o4i = dnnl_ABc2b8a4b,
499 dnnl_OIw4i4o = dnnl_ABc4b4a,
500 dnnl_OIw4o4i = dnnl_ABc4a4b,
501 dnnl_Oiw4o = dnnl_Abc4a,
502 dnnl_OIw8i16o2i = dnnl_ABc8b16a2b,
503 dnnl_OIw8i8o = dnnl_ABc8b8a,
504 dnnl_OIw8o16i2o = dnnl_ABc8a16b2a,
505 dnnl_IOw8o16i2o = dnnl_BAc8a16b2a,
506 dnnl_OIw8o8i = dnnl_ABc8a8b,
507 dnnl_Owi16o = dnnl_Acb16a,
508 dnnl_OwI16o2i = dnnl_AcB16a2b,
509 dnnl_Owi4o = dnnl_Acb4a,
510 dnnl_Owi8o = dnnl_Acb8a,
513 dnnl_IOhw16i16o = dnnl_BAcd16b16a,
514 dnnl_IOhw16o16i = dnnl_BAcd16a16b,
515 dnnl_Ohwi16o = dnnl_Acdb16a,
516 dnnl_OhwI16o2i = dnnl_AcdB16a2b,
517 dnnl_Ohwi32o = dnnl_Acdb32a,
518 dnnl_Ohwi4o = dnnl_Acdb4a,
519 dnnl_Ohwi8o = dnnl_Acdb8a,
520 dnnl_OIhw16i16o = dnnl_ABcd16b16a,
521 dnnl_OIhw16o16i = dnnl_ABcd16a16b,
522 dnnl_Oihw16o = dnnl_Abcd16a,
523 dnnl_OIhw4i16o4i = dnnl_ABcd4b16a4b,
524 dnnl_OIhw4i4o = dnnl_ABcd4b4a,
525 dnnl_OIhw4o4i = dnnl_ABcd4a4b,
526 dnnl_Oihw4o = dnnl_Abcd4a,
527 dnnl_OIhw8i16o2i = dnnl_ABcd8b16a2b,
529 dnnl_OIhw8o16i2o = dnnl_ABcd8a16b2a,
530 dnnl_OIhw2i8o4i = dnnl_ABcd2b8a4b,
531 dnnl_IOhw8o16i2o = dnnl_BAcd8a16b2a,
532 dnnl_OIhw8o8i = dnnl_ABcd8a8b,
535 dnnl_Odhwi16o = dnnl_Acdeb16a,
536 dnnl_OdhwI16o2i = dnnl_AcdeB16a2b,
537 dnnl_Odhwi4o = dnnl_Acdeb4a,
538 dnnl_Odhwi8o = dnnl_Acdeb8a,
539 dnnl_OIdhw16i16o = dnnl_ABcde16b16a,
540 dnnl_OIdhw16o16i = dnnl_ABcde16a16b,
541 dnnl_Oidhw16o = dnnl_Abcde16a,
542 dnnl_OIdhw4i4o = dnnl_ABcde4b4a,
543 dnnl_OIdhw4o4i = dnnl_ABcde4a4b,
544 dnnl_Oidhw4o = dnnl_Abcde4a,
545 dnnl_OIdhw8i16o2i = dnnl_ABcde8b16a2b,
546 dnnl_OIdhw8i8o = dnnl_ABcde8b8a,
547 dnnl_OIdhw8o16i2o = dnnl_ABcde8a16b2a,
548 dnnl_IOdhw8o16i2o = dnnl_BAcde8a16b2a,
551 dnnl_OIdhw8o8i = dnnl_ABcde8a8b,
552 dnnl_IOdhw16i16o = dnnl_BAcde16b16a,
555 dnnl_Goiw16g = dnnl_Abcd16a,
556 dnnl_Goiw8g = dnnl_Abcd8a,
557 dnnl_gIOw16o16i = dnnl_aCBd16b16c,
558 dnnl_gIOw16i16o = dnnl_aCBd16c16b,
559 dnnl_gOIw16i16o = dnnl_aBCd16c16b,
560 dnnl_gOIw16o16i = dnnl_aBCd16b16c,
562 dnnl_gOIw4i16o4i = dnnl_aBCd4c16b4c,
563 dnnl_gOIw2i8o4i = dnnl_aBCd2c8b4c,
564 dnnl_gOIw4i4o = dnnl_aBCd4c4b,
565 dnnl_gOIw4o4i = dnnl_aBCd4b4c,
567 dnnl_gOIw8i16o2i = dnnl_aBCd8c16b2c,
568 dnnl_gOIw8i8o = dnnl_aBCd8c8b,
569 dnnl_gOIw8o16i2o = dnnl_aBCd8b16c2b,
570 dnnl_gIOw8o16i2o = dnnl_aCBd8b16c2b,
571 dnnl_gOIw8o8i = dnnl_aBCd8b8c,
572 dnnl_gOwi16o = dnnl_aBdc16b,
573 dnnl_gOwI16o2i = dnnl_aBdC16b2c,
574 dnnl_gOwi4o = dnnl_aBdc4b,
575 dnnl_gOwi8o = dnnl_aBdc8b,
578 dnnl_gIOhw16i16o = dnnl_aCBde16c16b,
579 dnnl_gIOhw16o16i = dnnl_aCBde16b16c,
580 dnnl_gOhwi16o = dnnl_aBdec16b,
581 dnnl_gOhwI16o2i = dnnl_aBdeC16b2c,
582 dnnl_gOhwi32o = dnnl_aBdec32b,
583 dnnl_gOhwi4o = dnnl_aBdec4b,
584 dnnl_gOhwi8o = dnnl_aBdec8b,
585 dnnl_Goihw16g = dnnl_Abcde16a,
586 dnnl_gOIhw16i16o = dnnl_aBCde16c16b,
587 dnnl_gOIhw16o16i = dnnl_aBCde16b16c,
589 dnnl_gOIhw2i8o4i = dnnl_aBCde2c8b4c,
590 dnnl_gOIhw4i16o4i = dnnl_aBCde4c16b4c,
591 dnnl_gOIhw4i4o = dnnl_aBCde4c4b,
592 dnnl_gOIhw4o4i = dnnl_aBCde4b4c,
594 dnnl_Goihw8g = dnnl_Abcde8a,
595 dnnl_gOIhw8i16o2i = dnnl_aBCde8c16b2c,
596 dnnl_gOIhw8i8o = dnnl_aBCde8c8b,
597 dnnl_gOIhw8o16i2o = dnnl_aBCde8b16c2b,
598 dnnl_gIOhw8o16i2o = dnnl_aCBde8b16c2b,
599 dnnl_gOIhw8o8i = dnnl_aBCde8b8c,
601 dnnl_OIhw4o8i8o4i = dnnl_ABcd4a8b8a4b,
602 dnnl_OIhw2o8i8o2i = dnnl_ABcd2a8b8a2b,
603 dnnl_gOIhw4o8i8o4i = dnnl_aBCde4b8c8b4c,
604 dnnl_gOIhw2o8i8o2i = dnnl_aBCde2b8c8b2c,
607 dnnl_gIOdhw16i16o = dnnl_aCBdef16c16b,
608 dnnl_gOdhwi16o = dnnl_aBdefc16b,
609 dnnl_gOdhwI16o2i = dnnl_aBdefC16b2c,
610 dnnl_gOdhwi4o = dnnl_aBdefc4b,
611 dnnl_gOdhwi8o = dnnl_aBdefc8b,
612 dnnl_gOIdhw16i16o = dnnl_aBCdef16c16b,
613 dnnl_gOIdhw4i16o4i = dnnl_aBCdef4c16b4c,
615 dnnl_gOIdhw16o16i = dnnl_aBCdef16b16c,
617 dnnl_gOIdhw4i4o = dnnl_aBCdef4c4b,
618 dnnl_gOIdhw4o4i = dnnl_aBCdef4b4c,
620 dnnl_gOIdhw8i16o2i = dnnl_aBCdef8c16b2c,
621 dnnl_gOIdhw8i8o = dnnl_aBCdef8c8b,
622 dnnl_gOIdhw8o16i2o = dnnl_aBCdef8b16c2b,
623 dnnl_gIOdhw8o16i2o = dnnl_aCBdef8b16c2b,
624 dnnl_gOIdhw8o8i = dnnl_aBCdef8b8c,
625 dnnl_Goidhw16g = dnnl_Abcdef16a,
839 #define DNNL_MAX_NDIMS 12
843 #define DNNL_RUNTIME_DIM_VAL INT64_MIN
848 #define DNNL_RUNTIME_SIZE_VAL ((size_t)DNNL_RUNTIME_DIM_VAL)
855 } DNNL_RUNTIME_F32_VAL_REP = {0x7fc000d0};
860 #define DNNL_RUNTIME_F32_VAL (DNNL_RUNTIME_F32_VAL_REP.f)
863 static const int DNNL_RUNTIME_S32_VAL_REP = INT32_MIN;
868 #define DNNL_RUNTIME_S32_VAL DNNL_RUNTIME_S32_VAL_REP
922 dnnl_packed_format_undef = 0,
925 } dnnl_rnn_packed_memory_format_t;
929 #define DNNL_RNN_MAX_N_PARTS 4
933 dnnl_rnn_packed_memory_format_t format;
940 size_t offset_compensation;
947 dnnl_memory_extra_flag_none = 0x0U,
956 dnnl_memory_extra_flag_scale_adjust = 0x2U,
957 dnnl_memory_extra_flag_gpu_rnn_u8s8_compensation = 0x4U,
966 int compensation_mask;
1036 #define DNNL_MEMORY_NONE (NULL)
1037 #define DNNL_MEMORY_ALLOCATE ((void *)(size_t)-1)
1302 float batch_norm_epsilon;
1339 float layer_norm_epsilon;
1571 typedef const struct dnnl_engine *const_dnnl_engine_t;
1687 #define DNNL_ARG_SRC_0 1
1688 #define DNNL_ARG_SRC DNNL_ARG_SRC_0
1691 #define DNNL_ARG_SRC_LAYER DNNL_ARG_SRC_0
1694 #define DNNL_ARG_FROM DNNL_ARG_SRC_0
1699 #define DNNL_ARG_SRC_1 2
1700 #define DNNL_ARG_SRC_ITER DNNL_ARG_SRC_1
1705 #define DNNL_ARG_SRC_2 3
1706 #define DNNL_ARG_SRC_ITER_C DNNL_ARG_SRC_2
1711 #define DNNL_ARG_DST_0 17
1712 #define DNNL_ARG_DST DNNL_ARG_DST_0
1715 #define DNNL_ARG_TO DNNL_ARG_DST_0
1718 #define DNNL_ARG_DST_LAYER DNNL_ARG_DST_0
1722 #define DNNL_ARG_DST_1 18
1723 #define DNNL_ARG_DST_ITER DNNL_ARG_DST_1
1728 #define DNNL_ARG_DST_2 19
1729 #define DNNL_ARG_DST_ITER_C DNNL_ARG_DST_2
1734 #define DNNL_ARG_WEIGHTS_0 33
1735 #define DNNL_ARG_WEIGHTS DNNL_ARG_WEIGHTS_0
1738 #define DNNL_ARG_SCALE_SHIFT DNNL_ARG_WEIGHTS_0
1741 #define DNNL_ARG_WEIGHTS_LAYER DNNL_ARG_WEIGHTS_0
1746 #define DNNL_ARG_WEIGHTS_1 34
1747 #define DNNL_ARG_WEIGHTS_ITER DNNL_ARG_WEIGHTS_1
1752 #define DNNL_ARG_BIAS 41
1755 #define DNNL_ARG_MEAN 49
1756 #define DNNL_ARG_VARIANCE 50
1761 #define DNNL_ARG_WORKSPACE 64
1762 #define DNNL_ARG_SCRATCHPAD 80
1766 #define DNNL_ARG_DIFF_SRC_0 129
1767 #define DNNL_ARG_DIFF_SRC DNNL_ARG_DIFF_SRC_0
1770 #define DNNL_ARG_DIFF_SRC_LAYER DNNL_ARG_DIFF_SRC_0
1775 #define DNNL_ARG_DIFF_SRC_1 130
1776 #define DNNL_ARG_DIFF_SRC_ITER DNNL_ARG_DIFF_SRC_1
1781 #define DNNL_ARG_DIFF_SRC_2 131
1782 #define DNNL_ARG_DIFF_SRC_ITER_C DNNL_ARG_DIFF_SRC_2
1787 #define DNNL_ARG_DIFF_DST_0 145
1788 #define DNNL_ARG_DIFF_DST DNNL_ARG_DIFF_DST_0
1791 #define DNNL_ARG_DIFF_DST_LAYER DNNL_ARG_DIFF_DST_0
1796 #define DNNL_ARG_DIFF_DST_1 146
1797 #define DNNL_ARG_DIFF_DST_ITER DNNL_ARG_DIFF_DST_1
1802 #define DNNL_ARG_DIFF_DST_2 147
1803 #define DNNL_ARG_DIFF_DST_ITER_C DNNL_ARG_DIFF_DST_2
1808 #define DNNL_ARG_DIFF_WEIGHTS_0 161
1809 #define DNNL_ARG_DIFF_WEIGHTS DNNL_ARG_DIFF_WEIGHTS_0
1812 #define DNNL_ARG_DIFF_SCALE_SHIFT DNNL_ARG_DIFF_WEIGHTS_0
1815 #define DNNL_ARG_DIFF_WEIGHTS_LAYER DNNL_ARG_DIFF_WEIGHTS_0
1820 #define DNNL_ARG_DIFF_WEIGHTS_1 162
1821 #define DNNL_ARG_DIFF_WEIGHTS_ITER DNNL_ARG_DIFF_WEIGHTS_1
1826 #define DNNL_ARG_DIFF_BIAS 169
1829 #define DNNL_ARG_ATTR_OUTPUT_SCALES 513
1833 #define DNNL_ARG_MULTIPLE_SRC 1024
1834 #define DNNL_ARG_MULTIPLE_DST 2048
1839 #define DNNL_ARG_ATTR_ZERO_POINTS 4096
1974 #define DNNL_RUNTIME_NONE 0u
1977 #define DNNL_RUNTIME_SEQ 1u
1980 #define DNNL_RUNTIME_OMP 2u
1983 #define DNNL_RUNTIME_TBB 4u
1986 #define DNNL_RUNTIME_OCL 256u
1989 #define DNNL_RUNTIME_SYCL 512u
1992 #define DNNL_RUNTIME_DPCPP DNNL_RUNTIME_SYCL
2001 unsigned cpu_runtime;
2002 unsigned gpu_runtime;
2006 #define DNNL_JIT_PROFILE_NONE 0u
2009 #define DNNL_JIT_PROFILE_VTUNE 1u
2012 #define DNNL_JIT_PROFILE_LINUX_PERFMAP 2u
2015 #define DNNL_JIT_PROFILE_LINUX_JITDUMP 4u
2019 #define DNNL_JIT_PROFILE_LINUX_JITDUMP_USE_TSC 8u
2022 #define DNNL_JIT_PROFILE_LINUX_PERF \
2023 (DNNL_JIT_PROFILE_LINUX_JITDUMP | DNNL_JIT_PROFILE_LINUX_PERFMAP)
runtime estimation (seconds)
Definition: dnnl_types.h:1890
destination engine
Definition: dnnl_types.h:1902
6D tensor blocked by 2nd dimension with block size 4
Definition: dnnl_types.h:303
6D CNN weights tensor (incl. groups), an alias to dnnl_defcab
Definition: dnnl_types.h:423
The library manages scratchpad (default) The allocation policy is controlled by the DNNL_ENABLE_CONCU...
Definition: dnnl_types.h:1625
6D CNN weights tensor (incl. groups), an alias to dnnl_abcdef
Definition: dnnl_types.h:419
Internal weights format for 2x3 Winograd.
Definition: dnnl_types.h:898
2D CNN weights tensor, an alias to dnnl_ba
Definition: dnnl_types.h:382
2D CNN activations tensor, an alias to dnnl_ab
Definition: dnnl_types.h:357
32-bit signed integer.
Definition: dnnl_types.h:71
1D tensor, an alias to dnnl_a
Definition: dnnl_types.h:355
Description of tensor of packed weights for rnn.
Definition: dnnl_types.h:931
pooling descriptor
Definition: dnnl_types.h:1914
5D tensor blocked by 1st dimension with block size 8
Definition: dnnl_types.h:264
Internal weights format for 2x3 Winograd.
Definition: dnnl_types.h:899
memory consumption – extra
Definition: dnnl_types.h:1891
8-bit signed integer.
Definition: dnnl_types.h:73
prop_kind
Propagation kind.
Definition: dnnl.hpp:443
dnnl_format_tag_t
Memory format tag specification.
Definition: dnnl_types.h:163
16-bit/half-precision floating point.
Definition: dnnl_types.h:65
An inner product primitive.
Definition: dnnl_types.h:690
The operation failed because requested functionality is not implemented.
Definition: dnnl_types.h:46
permuted 5D tensor
Definition: dnnl_types.h:200
An opaque structure to describe a primitive descriptor iterator.
A batch normalization primitive.
Definition: dnnl_types.h:686
logsoftmax descriptor
Definition: dnnl_types.h:1922
struct dnnl_stream * dnnl_stream_t
An execution stream handle.
Definition: dnnl_types.h:1963
dnnl_status_t
Status values returned by the library functions.
Definition: dnnl_types.h:38
source engine
Definition: dnnl_types.h:1901
Undefined memory format, used for empty memory descriptors.
Definition: dnnl_types.h:896
Indicates the weights have an additional buffer, that depends on the compensation_mask.
Definition: dnnl_types.h:954
A softmax primitive.
Definition: dnnl_types.h:680
rnn descriptor
Definition: dnnl_types.h:1919
2D CNN activations tensor, an alias to dnnl_ba
Definition: dnnl_types.h:359
#define DNNL_MAX_NDIMS
Maximum number of dimensions a tensor can have.
Definition: dnnl_types.h:838
4D RNN states tensor in the format (num_layers, num_directions, batch, state channels).
Definition: dnnl_types.h:431
A user shall query and provide the scratchpad memory to primitives This mode is thread-safe as long a...
Definition: dnnl_types.h:1629
permuted 6D tensor
Definition: dnnl_types.h:201
5D tensor blocked by 2nd dimension with block size 16
Definition: dnnl_types.h:266
An opaque structure to describe an engine.
Eltwise: ReLU.
Definition: dnnl_types.h:719
permuted 3D tensor
Definition: dnnl_types.h:186
A descriptor of a matrix multiplication operation.
Definition: dnnl_types.h:1501
Eltwise: abs.
Definition: dnnl_types.h:727
4D CNN weights tensor, an alias to dnnl_abcd
Definition: dnnl_types.h:392
dnnl_normalization_flags_t
Flags for batch normalization primitive.
Definition: dnnl_types.h:788
A shuffle primitive.
Definition: dnnl_types.h:668
shuffle descriptor
Definition: dnnl_types.h:1911
A descriptor of a convolution operation.
Definition: dnnl_types.h:1060
dnnl_primitive_kind_t
Kinds of primitives.
Definition: dnnl_types.h:662
dnnl_rnn_flags_t
Flags for RNN cell.
Definition: dnnl_types.h:1382
5D RNN weights tensor in the format (num_layers, num_directions, input_channels, num_gates,...
Definition: dnnl_types.h:438
Max pooling.
Definition: dnnl_types.h:752
A structure that contains an index and a memory object, and is used to pass arguments to dnnl_primiti...
Definition: dnnl_types.h:1842
dnnl_stream_flags_t
Stream flags.
Definition: dnnl_types.h:1947
dnnl_query_t
Primitive descriptor query specification.
Definition: dnnl_types.h:1881
non-standard 16-bit (bfloat16 w/ 7 bit mantissa) floating point.
Definition: dnnl_types.h:67
4D CNN activations tensor, an alias to dnnl_acdb
Definition: dnnl_types.h:371
A descriptor for an RNN operation.
Definition: dnnl_types.h:1404
dnnl_rnn_direction_t
A direction of RNN primitive execution.
Definition: dnnl_types.h:1388
permuted 5D tensor
Definition: dnnl_types.h:196
A sum primitive.
Definition: dnnl_types.h:672
5D CNN weights tensor, an alias to dnnl_abcde
Definition: dnnl_types.h:402
Backward weights propagation.
Definition: dnnl_types.h:655
plain 1D tensor
Definition: dnnl_types.h:176
const struct dnnl_stream * const_dnnl_stream_t
A constant execution stream handle.
Definition: dnnl_types.h:1965
A descriptor of an inner product operation.
Definition: dnnl_types.h:1348
GPU engine.
Definition: dnnl_types.h:1559
weights grad. memory desc
Definition: dnnl_types.h:1931
propagation kind
Definition: dnnl_types.h:1904
Eltwise: logistic.
Definition: dnnl_types.h:737
An element-wise primitive.
Definition: dnnl_types.h:678
In-order execution.
Definition: dnnl_types.h:1952
3D tensor blocked by 2nd dimension with block size 16
Definition: dnnl_types.h:209
3D CNN weights tensor, an alias to dnnl_abc
Definition: dnnl_types.h:384
Convolution algorithm(either direct or Winograd) is chosen just in time.
Definition: dnnl_types.h:713
Eltwise: square root.
Definition: dnnl_types.h:729
permuted 4D tensor
Definition: dnnl_types.h:198
Intel(R) Advanced Vector Extensions 512 for Intel(R) Xeon(R) Processor Scalable Family and Intel(R) C...
Definition: dnnl_types.h:2048
Eltwise: bounded_relu.
Definition: dnnl_types.h:733
4D CNN weights tensor, an alias to dnnl_cdba
Definition: dnnl_types.h:394
Forward data propagation (inference mode).
Definition: dnnl_types.h:645
for creating scratchpad memory
Definition: dnnl_types.h:1899
destination memory desc
Definition: dnnl_types.h:1932
resampling descriptor
Definition: dnnl_types.h:1924
inner product descriptor
Definition: dnnl_types.h:1918
Undefined RNN flags.
Definition: dnnl_types.h:1384
5D CNN activations tensor blocked by channels with block size 16, an alias to dnnl_aBcde16b
Definition: dnnl_types.h:460
convolution descriptor
Definition: dnnl_types.h:1909
6D tensor blocked by 2nd dimension with block size 8
Definition: dnnl_types.h:301
permuted 4D tensor
Definition: dnnl_types.h:195
Bidirectional execution of RNN primitive with concatenation of the results.
Definition: dnnl_types.h:1395
A descriptor of a pooling operation.
Definition: dnnl_types.h:1209
permuted 2D tensor
Definition: dnnl_types.h:191
dnnl_data_type_t
Data type specification.
Definition: dnnl_types.h:61
LRN within a single channel.
Definition: dnnl_types.h:762
struct dnnl_engine * dnnl_engine_t
An engine handle.
Definition: dnnl_types.h:1566
Binary mul.
Definition: dnnl_types.h:780
4D CNN weights tensor, an alias to dnnl_bcda
Definition: dnnl_types.h:398
Undefined memory format tag.
Definition: dnnl_types.h:165
Packed weights format used in RNN.
Definition: dnnl_types.h:92
4D CNN weights tensor (incl. groups), an alias to dnnl_abcd
Definition: dnnl_types.h:411
const struct dnnl_primitive_desc_iterator * const_dnnl_primitive_desc_iterator_t
A constant primitive descriptor iterator handle.
Definition: dnnl_types.h:1588
Use scale and shift parameters.
Definition: dnnl_types.h:813
Eltwise: natural logarithm.
Definition: dnnl_types.h:748
layer normalization descriptor
Definition: dnnl_types.h:1917
4D tensor blocked by 1st and 2nd dimension with block size 8
Definition: dnnl_types.h:252
Linear Resampling Method.
Definition: dnnl_types.h:784
5D CNN weights tensor, an alias to dnnl_cdeba
Definition: dnnl_types.h:404
Forward data propagation (training mode).
Definition: dnnl_types.h:641
Eltwise: square.
Definition: dnnl_types.h:725
permuted 3D tensor
Definition: dnnl_types.h:192
Fuse with ReLU.
Definition: dnnl_types.h:826
Intel(R) Advanced Vector Extensions 512 subset for Intel(R) Xeon Phi(TM) Processors 7235,...
Definition: dnnl_types.h:2044
2D RNN statistics tensor, an alias to dnnl_ab
Definition: dnnl_types.h:361
const struct dnnl_primitive_desc * const_dnnl_primitive_desc_t
A constant primitive descriptor handle.
Definition: dnnl_types.h:1599
Weights format used in 8bit Winograd convolution.
Definition: dnnl_types.h:90
const struct dnnl_post_ops * const_dnnl_post_ops_t
A constant post operation chain handle.
Definition: dnnl_types.h:1670
Winograd convolution.
Definition: dnnl_types.h:711
5D tensor blocked by 1st dimension with block size 16
Definition: dnnl_types.h:262
4D CNN activations tensor blocked by channels with block size 8, an alias to dnnl_aBcd8b
Definition: dnnl_types.h:475
dnnl_engine_kind_t
Kinds of engines.
Definition: dnnl_types.h:1553
A binary primitive.
Definition: dnnl_types.h:696
permuted 5D tensor
Definition: dnnl_types.h:199
Eltwise: hyperbolic tangent non-linearity (tanh)
Definition: dnnl_types.h:721
3D tensor blocked by 2nd dimension with block size 4
Definition: dnnl_types.h:213
plain 5D tensor
Definition: dnnl_types.h:180
3D CNN activations tensor blocked by channels with block size 8, an alias to dnnl_aBc8b
Definition: dnnl_types.h:484
struct dnnl_post_ops * dnnl_post_ops_t
A post operation chain handle.
Definition: dnnl_types.h:1667
GEMM descriptor (internal)
Definition: dnnl_types.h:1920
Default order execution.
Definition: dnnl_types.h:1950
A pooling primitive.
Definition: dnnl_types.h:682
permuted 4D tensor
Definition: dnnl_types.h:189
lrn descriptor
Definition: dnnl_types.h:1915
Backward propagation (with respect to all parameters).
Definition: dnnl_types.h:651
5D CNN weights tensor (incl. groups), an alias to dnnl_acbde
Definition: dnnl_types.h:417
A descriptor of a Softmax operation.
Definition: dnnl_types.h:1179
Intel(R) Advanced Vector Extensions 512 with Intel(R) DL Boost and Bfloat16 Support for Intel(R) Xeon...
Definition: dnnl_types.h:2058
Primitive iterator passed over last primitive descriptor.
Definition: dnnl_types.h:48
An opaque structure to describe a primitive descriptor.
5D CNN activations tensor blocked by channels with block size 8, an alias to dnnl_aBcde8b
Definition: dnnl_types.h:466
Average pooling (alias for dnnl_pooling_avg_exclude_padding)
Definition: dnnl_types.h:758
RNN cell.
Definition: dnnl_types.h:764
Alias for dnnl_unidirectional_left2right.
Definition: dnnl_types.h:1400
4D tensor blocked by 2nd dimension with block size 4
Definition: dnnl_types.h:235
matrix multiplication (matmul) descriptor
Definition: dnnl_types.h:1923
struct dnnl_primitive_desc * dnnl_primitive_desc_t
A primitive descriptor handle.
Definition: dnnl_types.h:1596
binary descriptor
Definition: dnnl_types.h:1921
GRU cell with linear before reset.
Definition: dnnl_types.h:776
Forward data propagation (alias for dnnl_forward_training).
Definition: dnnl_types.h:649
32-bit/single-precision floating point.
Definition: dnnl_types.h:69
permuted 6D tensor
Definition: dnnl_types.h:188
3D CNN weights tensor, an alias to dnnl_bca
Definition: dnnl_types.h:390
Use global statistics.
Definition: dnnl_types.h:800
Local response normalization (LRN) across multiple channels.
Definition: dnnl_types.h:760
A (out-of-place) concat primitive.
Definition: dnnl_types.h:670
3D RNN data tensor in the format (batch, seq_length, input channels).
Definition: dnnl_types.h:428
destination grad. memory desc
Definition: dnnl_types.h:1933
Undefined memory format kind, used for empty memory descriptors.
Definition: dnnl_types.h:81
6D tensor blocked by 2nd dimension with block size 16
Definition: dnnl_types.h:296
A layer normalization primitive.
Definition: dnnl_types.h:688
Any ISA (no restrictions)
Definition: dnnl_types.h:2027
op descriptor
Definition: dnnl_types.h:1908
struct dnnl_primitive_desc_iterator * dnnl_primitive_desc_iterator_t
A primitive descriptor iterator handle.
Definition: dnnl_types.h:1585
The operation failed due to an out-of-memory condition.
Definition: dnnl_types.h:42
int64_t dnnl_dim_t
A type to describe tensor dimension.
Definition: dnnl_types.h:870
5D CNN weights tensor, an alias to dnnl_bcdea
Definition: dnnl_types.h:408
plain 4D tensor
Definition: dnnl_types.h:179
8-bit unsigned integer.
Definition: dnnl_types.h:75
5D CNN activations tensor, an alias to dnnl_abcde
Definition: dnnl_types.h:375
workspace memory desc
Definition: dnnl_types.h:1934
Just a sentinel, not real memory format tag.
Definition: dnnl_types.h:350
deconvolution descriptor
Definition: dnnl_types.h:1910
struct dnnl_memory * dnnl_memory_t
A memory handle.
Definition: dnnl_types.h:1030
A logsoftmax primitive.
Definition: dnnl_types.h:698
Undefined memory format tag.
Definition: dnnl_types.h:168
Direct deconvolution.
Definition: dnnl_types.h:715
A reorder primitive.
Definition: dnnl_types.h:666
A descriptor of a Local Response Normalization (LRN) operation.
Definition: dnnl_types.h:1247
Default stream configuration.
Definition: dnnl_types.h:1956
A descriptor of a shuffle operation.
Definition: dnnl_types.h:1113
3D CNN weights tensor, an alias to dnnl_acb
Definition: dnnl_types.h:386
Backward data propagation.
Definition: dnnl_types.h:653
permuted 5D tensor
Definition: dnnl_types.h:190
Structure containing version information as per Semantic Versioning
Definition: dnnl_types.h:1995
A descriptor of a Batch Normalization operation.
Definition: dnnl_types.h:1278
4D RNN bias tensor in the format (num_layers, num_directions, num_gates, output_channels).
Definition: dnnl_types.h:452
dnnl_dim_t dnnl_dims_t[DNNL_MAX_NDIMS]
A type to describe tensor dimensions.
Definition: dnnl_types.h:873
A descriptor of a element-wise operation.
Definition: dnnl_types.h:1135
4D tensor blocked by 2nd dimension with block size 16
Definition: dnnl_types.h:229
Nearest Neighbor Resampling Method.
Definition: dnnl_types.h:782
A rnn primitive.
Definition: dnnl_types.h:692
number of outputs expected
Definition: dnnl_types.h:1888
Intel(R) SSE4.1.
Definition: dnnl_types.h:2030
dnnl_format_kind_t
Memory format kind.
Definition: dnnl_types.h:79
Generic description of blocked data layout for most memory formats.
Definition: dnnl_types.h:878
const struct dnnl_primitive * const_dnnl_primitive_t
A constant primitive handle.
Definition: dnnl_types.h:1683
permuted 5D tensor
Definition: dnnl_types.h:185
Intel(R) Advanced Vector Extensions 2.
Definition: dnnl_types.h:2036
Intel(R) Advanced Vector Extensions 512 with Intel(R) DL Boost Support for Intel(R) Xeon(R) Processor...
Definition: dnnl_types.h:2053
3D tensor blocked by 2nd dimension with block size 8
Definition: dnnl_types.h:220
A descriptor of a Layer Normalization operation.
Definition: dnnl_types.h:1311
Queried element is not required for given primitive.
Definition: dnnl_types.h:52
Eltwise: clip.
Definition: dnnl_types.h:750
Description of tensor of weights for winograd 2x3 convolution.
Definition: dnnl_types.h:906
Average pooling include padding.
Definition: dnnl_types.h:754
5D CNN weights tensor (incl. groups), an alias to dnnl_decab
Definition: dnnl_types.h:415
A deconvolution primitive.
Definition: dnnl_types.h:676
5D tensor blocked by 2nd dimension with block size 4
Definition: dnnl_types.h:273
Out-of-order execution.
Definition: dnnl_types.h:1954
A matrix multiplication primitive (internal).
Definition: dnnl_types.h:694
A convolution primitive.
Definition: dnnl_types.h:674
struct dnnl_primitive * dnnl_primitive_t
A primitive handle.
Definition: dnnl_types.h:1681
const struct dnnl_primitive_attr * const_dnnl_primitive_attr_t
A constant primitive descriptor attributes handle.
Definition: dnnl_types.h:1644
An opaque structure for primitive descriptor attributes.
An LRN primitive.
Definition: dnnl_types.h:684
source memory desc
Definition: dnnl_types.h:1928
dnnl_softmax_desc_t dnnl_logsoftmax_desc_t
A descriptor of a LogSoftmax operation.
Definition: dnnl_types.h:1201
#define DNNL_RNN_MAX_N_PARTS
Maximum number of parts of RNN weights tensor that require separate computation.
Definition: dnnl_types.h:928
dnnl_scratchpad_mode_t
Scratchpad mode.
Definition: dnnl_types.h:1607
Undefined data type, used for empty memory descriptors.
Definition: dnnl_types.h:63
execution engine
Definition: dnnl_types.h:1884
dnnl_wino_memory_format_t
Winograd-specific formats.
Definition: dnnl_types.h:894
softmax descriptor
Definition: dnnl_types.h:1913
A descriptor of resampling operation.
Definition: dnnl_types.h:1523
The operation failed because of incorrect function arguments.
Definition: dnnl_types.h:44
CPU engine.
Definition: dnnl_types.h:1557
An opaque structure for a chain of post operations.
no query
Definition: dnnl_types.h:1882
Eltwise: swish.
Definition: dnnl_types.h:746
5D CNN activations tensor, an alias to dnnl_acdeb
Definition: dnnl_types.h:377
Internal weights format for 4x3 Winograd.
Definition: dnnl_types.h:902
Memory descriptor.
Definition: dnnl_types.h:976
Backward bias propagation.
Definition: dnnl_types.h:657
void * dnnl_op_desc_t
A pointer to any of the operation descriptors.
Definition: dnnl_types.h:1046
3D CNN activations tensor, an alias to dnnl_abc
Definition: dnnl_types.h:365
A matrix multiplication primitive.
Definition: dnnl_types.h:700
dnnl_cpu_isa_t
CPU instruction set flags.
Definition: dnnl_types.h:2025
stub
Definition: dnnl_types.h:1927
const struct dnnl_memory * const_dnnl_memory_t
A constant memory handle.
Definition: dnnl_types.h:1033
4D CNN activations tensor blocked by channels with block size 4, an alias to dnnl_aBcd4b
Definition: dnnl_types.h:472
2D CNN weights tensor, an alias to dnnl_ab
Definition: dnnl_types.h:380
4D CNN weights tensor, an alias to dnnl_acdb
Definition: dnnl_types.h:396
permuted 4D tensor
Definition: dnnl_types.h:193
Unspecified format kind.
Definition: dnnl_types.h:84
3D RNN data tensor in the format (seq_length, batch, input channels).
Definition: dnnl_types.h:426
4D CNN activations tensor blocked by channels with block size 16, an alias to dnnl_aBcd16b
Definition: dnnl_types.h:469
eltwise descriptor
Definition: dnnl_types.h:1912
struct dnnl_primitive_attr * dnnl_primitive_attr_t
A primitive descriptor attributes handle that controls primitive behavior.
Definition: dnnl_types.h:1641
permuted 3D tensor
Definition: dnnl_types.h:197
number of inputs expected
Definition: dnnl_types.h:1887
permuted 5D tensor
Definition: dnnl_types.h:187
dnnl_alg_kind_t
Kinds of algorithms.
Definition: dnnl_types.h:706
Winograd deconvolution.
Definition: dnnl_types.h:717
Intel(R) Advanced Vector Extensions 512 subset for Intel(R) Xeon Phi(TM) Processors x200 Series.
Definition: dnnl_types.h:2040
5D RNN weights tensor in the format (num_layers, num_directions, num_gates, output_channels,...
Definition: dnnl_types.h:445
The operation was successful.
Definition: dnnl_types.h:40
Eltwise: exponent.
Definition: dnnl_types.h:739
plain 6D tensor
Definition: dnnl_types.h:181
5D CNN weights tensor (incl. groups), an alias to dnnl_abcde
Definition: dnnl_types.h:413
Bidirectional execution of RNN primitive with summation of the results.
Definition: dnnl_types.h:1398
Eltwise: linear.
Definition: dnnl_types.h:731
3D CNN activations tensor blocked by channels with block size 16, an alias to dnnl_aBc16b
Definition: dnnl_types.h:478
GRU cell.
Definition: dnnl_types.h:768
plain 3D tensor
Definition: dnnl_types.h:178
A descriptor of a binary operation.
Definition: dnnl_types.h:1476
dnnl_memory_extra_flags_t
Flags for memory special features.
Definition: dnnl_types.h:945
Direct convolution.
Definition: dnnl_types.h:709
source gradient memory desc
Definition: dnnl_types.h:1929
3D CNN weights tensor, an alias to dnnl_cba
Definition: dnnl_types.h:388
Forward data propagation (alias for dnnl_forward_inference).
Definition: dnnl_types.h:647
5D tensor blocked by 2nd dimension with block size 8
Definition: dnnl_types.h:283
Undefined propagation type.
Definition: dnnl_types.h:638
A tensor in a generic format described by the stride and blocking values in each dimension.
Definition: dnnl_types.h:88
primitive kind
Definition: dnnl_types.h:1885
Unidirectional execution of RNN primitive from left to right.
Definition: dnnl_types.h:1390
4D CNN weights tensor, an alias to dnnl_bacd
Definition: dnnl_types.h:400
Eltwise: parametric exponential linear unit (elu)
Definition: dnnl_types.h:723
5D CNN weights tensor, an alias to dnnl_acdeb
Definition: dnnl_types.h:406
3D CNN activations tensor, an alias to dnnl_acb
Definition: dnnl_types.h:367
3D CNN activations tensor blocked by channels with block size 4, an alias to dnnl_aBc4b
Definition: dnnl_types.h:481
LSTM cell.
Definition: dnnl_types.h:766
An unspecified engine.
Definition: dnnl_types.h:1555
5D CNN activations tensor blocked by channels with block size 4, an alias to dnnl_aBcde4b
Definition: dnnl_types.h:463
A resampling primitive.
Definition: dnnl_types.h:702
Internal weights format for 2x3 Winograd.
Definition: dnnl_types.h:900
Intel(R) Advanced Vector Extensions.
Definition: dnnl_types.h:2033
permuted 3D tensor
Definition: dnnl_types.h:194
dnnl_prop_kind_t
Kinds of propagation.
Definition: dnnl_types.h:635
scratchpad memory desc
Definition: dnnl_types.h:1935
4D CNN activations tensor, an alias to dnnl_abcd
Definition: dnnl_types.h:369
Eltwise: gelu.
Definition: dnnl_types.h:744
weights memory descriptor desc
Definition: dnnl_types.h:1930
batch normalization descriptor
Definition: dnnl_types.h:1916
4D CNN activations tensor, an alias to dnnl_bcda
Definition: dnnl_types.h:373
const typedef void * const_dnnl_op_desc_t
A pointer to any of the operation descriptors (constant variant).
Definition: dnnl_types.h:1048
Undefined primitive.
Definition: dnnl_types.h:664
Eltwise: soft_relu.
Definition: dnnl_types.h:735
2D RNN statistics tensor, an alias to dnnl_ba
Definition: dnnl_types.h:363
dnnl_convolution_desc_t dnnl_deconvolution_desc_t
A descriptor of a deconvolution operation.
Definition: dnnl_types.h:1105
Unidirectional execution of RNN primitive from right to left.
Definition: dnnl_types.h:1392
4D tensor blocked by 2nd dimension with block size 8
Definition: dnnl_types.h:247
plain 2D tensor
Definition: dnnl_types.h:177
(scratch) memory, additional to all inputs and outputs memory (bytes)
Definition: dnnl_types.h:1896
Primitive or engine failed on execution.
Definition: dnnl_types.h:50
6D CNN weights tensor (incl. groups), an alias to dnnl_acbdef
Definition: dnnl_types.h:421
memory desc of an execute argument
Definition: dnnl_types.h:1936
stub
Definition: dnnl_types.h:1907
Average pooling exclude padding.
Definition: dnnl_types.h:756
Binary add.
Definition: dnnl_types.h:778