| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206 |
- #include "cuda_runtime.h"
- #include "curand.h"
- #include "cublas_v2.h"
- extern "C" {
- #include "activations.h"
- #include "cuda.h"
- }
- __device__ float lhtan_activate_kernel(float x)
- {
- if(x < 0) return .001f*x;
- if(x > 1) return .001f*(x-1.f) + 1.f;
- return x;
- }
- __device__ float lhtan_gradient_kernel(float x)
- {
- if(x > 0 && x < 1) return 1;
- return .001;
- }
- __device__ float hardtan_activate_kernel(float x)
- {
- if (x < -1) return -1;
- if (x > 1) return 1;
- return x;
- }
- __device__ float linear_activate_kernel(float x){return x;}
- __device__ float logistic_activate_kernel(float x){return 1.f/(1.f + expf(-x));}
- __device__ float loggy_activate_kernel(float x){return 2.f/(1.f + expf(-x)) - 1;}
- __device__ float relu_activate_kernel(float x){return x*(x>0);}
- __device__ float elu_activate_kernel(float x){return (x >= 0)*x + (x < 0)*(expf(x)-1);}
- __device__ float selu_activate_kernel(float x){return (x >= 0)*1.0507f*x + (x < 0)*1.0507f*1.6732f*(expf(x)-1);}
- __device__ float relie_activate_kernel(float x){return (x>0) ? x : .01f*x;}
- __device__ float ramp_activate_kernel(float x){return x*(x>0)+.1f*x;}
- __device__ float leaky_activate_kernel(float x){return (x>0) ? x : .1f*x;}
- __device__ float tanh_activate_kernel(float x){return (2.f/(1 + expf(-2*x)) - 1);}
- __device__ float plse_activate_kernel(float x)
- {
- if(x < -4) return .01f * (x + 4);
- if(x > 4) return .01f * (x - 4) + 1;
- return .125f*x + .5f;
- }
- __device__ float stair_activate_kernel(float x)
- {
- int n = floorf(x);
- if (n%2 == 0) return floorf(x/2);
- else return (x - n) + floorf(x/2);
- }
-
- __device__ float hardtan_gradient_kernel(float x)
- {
- if (x > -1 && x < 1) return 1;
- return 0;
- }
- __device__ float linear_gradient_kernel(float x){return 1;}
- __device__ float logistic_gradient_kernel(float x){return (1-x)*x;}
- __device__ float loggy_gradient_kernel(float x)
- {
- float y = (x+1)/2;
- return 2*(1-y)*y;
- }
- __device__ float relu_gradient_kernel(float x){return (x>0);}
- __device__ float elu_gradient_kernel(float x){return (x >= 0) + (x < 0)*(x + 1);}
- __device__ float selu_gradient_kernel(float x){return (x >= 0)*1.0507 + (x < 0)*(x + 1.0507*1.6732);}
- __device__ float relie_gradient_kernel(float x){return (x>0) ? 1 : .01f;}
- __device__ float ramp_gradient_kernel(float x){return (x>0)+.1f;}
- __device__ float leaky_gradient_kernel(float x){return (x>0) ? 1 : .1f;}
- __device__ float tanh_gradient_kernel(float x){return 1-x*x;}
- __device__ float plse_gradient_kernel(float x){return (x < 0 || x > 1) ? .01f : .125f;}
- __device__ float stair_gradient_kernel(float x)
- {
- if (floorf(x) == x) return 0;
- return 1;
- }
- __device__ float activate_kernel(float x, ACTIVATION a)
- {
- switch(a){
- case LINEAR:
- return linear_activate_kernel(x);
- case LOGISTIC:
- return logistic_activate_kernel(x);
- case LOGGY:
- return loggy_activate_kernel(x);
- case RELU:
- return relu_activate_kernel(x);
- case ELU:
- return elu_activate_kernel(x);
- case SELU:
- return selu_activate_kernel(x);
- case RELIE:
- return relie_activate_kernel(x);
- case RAMP:
- return ramp_activate_kernel(x);
- case LEAKY:
- return leaky_activate_kernel(x);
- case TANH:
- return tanh_activate_kernel(x);
- case PLSE:
- return plse_activate_kernel(x);
- case STAIR:
- return stair_activate_kernel(x);
- case HARDTAN:
- return hardtan_activate_kernel(x);
- case LHTAN:
- return lhtan_activate_kernel(x);
- }
- return 0;
- }
- __device__ float gradient_kernel(float x, ACTIVATION a)
- {
- switch(a){
- case LINEAR:
- return linear_gradient_kernel(x);
- case LOGISTIC:
- return logistic_gradient_kernel(x);
- case LOGGY:
- return loggy_gradient_kernel(x);
- case RELU:
- return relu_gradient_kernel(x);
- case ELU:
- return elu_gradient_kernel(x);
- case SELU:
- return selu_gradient_kernel(x);
- case RELIE:
- return relie_gradient_kernel(x);
- case RAMP:
- return ramp_gradient_kernel(x);
- case LEAKY:
- return leaky_gradient_kernel(x);
- case TANH:
- return tanh_gradient_kernel(x);
- case PLSE:
- return plse_gradient_kernel(x);
- case STAIR:
- return stair_gradient_kernel(x);
- case HARDTAN:
- return hardtan_gradient_kernel(x);
- case LHTAN:
- return lhtan_gradient_kernel(x);
- }
- return 0;
- }
- __global__ void binary_gradient_array_kernel(float *x, float *dy, int n, int s, BINARY_ACTIVATION a, float *dx)
- {
- int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
- int i = id % s;
- int b = id / s;
- float x1 = x[b*s + i];
- float x2 = x[b*s + s/2 + i];
- if(id < n) {
- float de = dy[id];
- dx[b*s + i] = x2*de;
- dx[b*s + s/2 + i] = x1*de;
- }
- }
- extern "C" void binary_gradient_array_gpu(float *x, float *dx, int n, int size, BINARY_ACTIVATION a, float *y)
- {
- binary_gradient_array_kernel<<<cuda_gridsize(n/2), BLOCK>>>(x, dx, n/2, size, a, y);
- check_error(cudaPeekAtLastError());
- }
- __global__ void binary_activate_array_kernel(float *x, int n, int s, BINARY_ACTIVATION a, float *y)
- {
- int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
- int i = id % s;
- int b = id / s;
- float x1 = x[b*s + i];
- float x2 = x[b*s + s/2 + i];
- if(id < n) y[id] = x1*x2;
- }
- extern "C" void binary_activate_array_gpu(float *x, int n, int size, BINARY_ACTIVATION a, float *y)
- {
- binary_activate_array_kernel<<<cuda_gridsize(n/2), BLOCK>>>(x, n/2, size, a, y);
- check_error(cudaPeekAtLastError());
- }
- __global__ void activate_array_kernel(float *x, int n, ACTIVATION a)
- {
- int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
- if(i < n) x[i] = activate_kernel(x[i], a);
- }
- __global__ void gradient_array_kernel(float *x, int n, ACTIVATION a, float *delta)
- {
- int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
- if(i < n) delta[i] *= gradient_kernel(x[i], a);
- }
- extern "C" void activate_array_gpu(float *x, int n, ACTIVATION a)
- {
- activate_array_kernel<<<cuda_gridsize(n), BLOCK>>>(x, n, a);
- check_error(cudaPeekAtLastError());
- }
- extern "C" void gradient_array_gpu(float *x, int n, ACTIVATION a, float *delta)
- {
- gradient_array_kernel<<<cuda_gridsize(n), BLOCK>>>(x, n, a, delta);
- check_error(cudaPeekAtLastError());
- }
|