activation_kernels.cu 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206
  1. #include "cuda_runtime.h"
  2. #include "curand.h"
  3. #include "cublas_v2.h"
  4. extern "C" {
  5. #include "activations.h"
  6. #include "cuda.h"
  7. }
  8. __device__ float lhtan_activate_kernel(float x)
  9. {
  10. if(x < 0) return .001f*x;
  11. if(x > 1) return .001f*(x-1.f) + 1.f;
  12. return x;
  13. }
  14. __device__ float lhtan_gradient_kernel(float x)
  15. {
  16. if(x > 0 && x < 1) return 1;
  17. return .001;
  18. }
  19. __device__ float hardtan_activate_kernel(float x)
  20. {
  21. if (x < -1) return -1;
  22. if (x > 1) return 1;
  23. return x;
  24. }
  25. __device__ float linear_activate_kernel(float x){return x;}
  26. __device__ float logistic_activate_kernel(float x){return 1.f/(1.f + expf(-x));}
  27. __device__ float loggy_activate_kernel(float x){return 2.f/(1.f + expf(-x)) - 1;}
  28. __device__ float relu_activate_kernel(float x){return x*(x>0);}
  29. __device__ float elu_activate_kernel(float x){return (x >= 0)*x + (x < 0)*(expf(x)-1);}
  30. __device__ float selu_activate_kernel(float x){return (x >= 0)*1.0507f*x + (x < 0)*1.0507f*1.6732f*(expf(x)-1);}
  31. __device__ float relie_activate_kernel(float x){return (x>0) ? x : .01f*x;}
  32. __device__ float ramp_activate_kernel(float x){return x*(x>0)+.1f*x;}
  33. __device__ float leaky_activate_kernel(float x){return (x>0) ? x : .1f*x;}
  34. __device__ float tanh_activate_kernel(float x){return (2.f/(1 + expf(-2*x)) - 1);}
  35. __device__ float plse_activate_kernel(float x)
  36. {
  37. if(x < -4) return .01f * (x + 4);
  38. if(x > 4) return .01f * (x - 4) + 1;
  39. return .125f*x + .5f;
  40. }
  41. __device__ float stair_activate_kernel(float x)
  42. {
  43. int n = floorf(x);
  44. if (n%2 == 0) return floorf(x/2);
  45. else return (x - n) + floorf(x/2);
  46. }
  47. __device__ float hardtan_gradient_kernel(float x)
  48. {
  49. if (x > -1 && x < 1) return 1;
  50. return 0;
  51. }
  52. __device__ float linear_gradient_kernel(float x){return 1;}
  53. __device__ float logistic_gradient_kernel(float x){return (1-x)*x;}
  54. __device__ float loggy_gradient_kernel(float x)
  55. {
  56. float y = (x+1)/2;
  57. return 2*(1-y)*y;
  58. }
  59. __device__ float relu_gradient_kernel(float x){return (x>0);}
  60. __device__ float elu_gradient_kernel(float x){return (x >= 0) + (x < 0)*(x + 1);}
  61. __device__ float selu_gradient_kernel(float x){return (x >= 0)*1.0507 + (x < 0)*(x + 1.0507*1.6732);}
  62. __device__ float relie_gradient_kernel(float x){return (x>0) ? 1 : .01f;}
  63. __device__ float ramp_gradient_kernel(float x){return (x>0)+.1f;}
  64. __device__ float leaky_gradient_kernel(float x){return (x>0) ? 1 : .1f;}
  65. __device__ float tanh_gradient_kernel(float x){return 1-x*x;}
  66. __device__ float plse_gradient_kernel(float x){return (x < 0 || x > 1) ? .01f : .125f;}
  67. __device__ float stair_gradient_kernel(float x)
  68. {
  69. if (floorf(x) == x) return 0;
  70. return 1;
  71. }
  72. __device__ float activate_kernel(float x, ACTIVATION a)
  73. {
  74. switch(a){
  75. case LINEAR:
  76. return linear_activate_kernel(x);
  77. case LOGISTIC:
  78. return logistic_activate_kernel(x);
  79. case LOGGY:
  80. return loggy_activate_kernel(x);
  81. case RELU:
  82. return relu_activate_kernel(x);
  83. case ELU:
  84. return elu_activate_kernel(x);
  85. case SELU:
  86. return selu_activate_kernel(x);
  87. case RELIE:
  88. return relie_activate_kernel(x);
  89. case RAMP:
  90. return ramp_activate_kernel(x);
  91. case LEAKY:
  92. return leaky_activate_kernel(x);
  93. case TANH:
  94. return tanh_activate_kernel(x);
  95. case PLSE:
  96. return plse_activate_kernel(x);
  97. case STAIR:
  98. return stair_activate_kernel(x);
  99. case HARDTAN:
  100. return hardtan_activate_kernel(x);
  101. case LHTAN:
  102. return lhtan_activate_kernel(x);
  103. }
  104. return 0;
  105. }
  106. __device__ float gradient_kernel(float x, ACTIVATION a)
  107. {
  108. switch(a){
  109. case LINEAR:
  110. return linear_gradient_kernel(x);
  111. case LOGISTIC:
  112. return logistic_gradient_kernel(x);
  113. case LOGGY:
  114. return loggy_gradient_kernel(x);
  115. case RELU:
  116. return relu_gradient_kernel(x);
  117. case ELU:
  118. return elu_gradient_kernel(x);
  119. case SELU:
  120. return selu_gradient_kernel(x);
  121. case RELIE:
  122. return relie_gradient_kernel(x);
  123. case RAMP:
  124. return ramp_gradient_kernel(x);
  125. case LEAKY:
  126. return leaky_gradient_kernel(x);
  127. case TANH:
  128. return tanh_gradient_kernel(x);
  129. case PLSE:
  130. return plse_gradient_kernel(x);
  131. case STAIR:
  132. return stair_gradient_kernel(x);
  133. case HARDTAN:
  134. return hardtan_gradient_kernel(x);
  135. case LHTAN:
  136. return lhtan_gradient_kernel(x);
  137. }
  138. return 0;
  139. }
  140. __global__ void binary_gradient_array_kernel(float *x, float *dy, int n, int s, BINARY_ACTIVATION a, float *dx)
  141. {
  142. int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
  143. int i = id % s;
  144. int b = id / s;
  145. float x1 = x[b*s + i];
  146. float x2 = x[b*s + s/2 + i];
  147. if(id < n) {
  148. float de = dy[id];
  149. dx[b*s + i] = x2*de;
  150. dx[b*s + s/2 + i] = x1*de;
  151. }
  152. }
  153. extern "C" void binary_gradient_array_gpu(float *x, float *dx, int n, int size, BINARY_ACTIVATION a, float *y)
  154. {
  155. binary_gradient_array_kernel<<<cuda_gridsize(n/2), BLOCK>>>(x, dx, n/2, size, a, y);
  156. check_error(cudaPeekAtLastError());
  157. }
  158. __global__ void binary_activate_array_kernel(float *x, int n, int s, BINARY_ACTIVATION a, float *y)
  159. {
  160. int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
  161. int i = id % s;
  162. int b = id / s;
  163. float x1 = x[b*s + i];
  164. float x2 = x[b*s + s/2 + i];
  165. if(id < n) y[id] = x1*x2;
  166. }
  167. extern "C" void binary_activate_array_gpu(float *x, int n, int size, BINARY_ACTIVATION a, float *y)
  168. {
  169. binary_activate_array_kernel<<<cuda_gridsize(n/2), BLOCK>>>(x, n/2, size, a, y);
  170. check_error(cudaPeekAtLastError());
  171. }
  172. __global__ void activate_array_kernel(float *x, int n, ACTIVATION a)
  173. {
  174. int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
  175. if(i < n) x[i] = activate_kernel(x[i], a);
  176. }
  177. __global__ void gradient_array_kernel(float *x, int n, ACTIVATION a, float *delta)
  178. {
  179. int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
  180. if(i < n) delta[i] *= gradient_kernel(x[i], a);
  181. }
  182. extern "C" void activate_array_gpu(float *x, int n, ACTIVATION a)
  183. {
  184. activate_array_kernel<<<cuda_gridsize(n), BLOCK>>>(x, n, a);
  185. check_error(cudaPeekAtLastError());
  186. }
  187. extern "C" void gradient_array_gpu(float *x, int n, ACTIVATION a, float *delta)
  188. {
  189. gradient_array_kernel<<<cuda_gridsize(n), BLOCK>>>(x, n, a, delta);
  190. check_error(cudaPeekAtLastError());
  191. }