@ -81,7 +81,7 @@ layer make_gaussian_yolo_layer(int batch, int w, int h, int n, int total, int *m
*/
# endif
fprintf ( stderr , " Gaussian_yolo \n " ) ;
//fprintf(stderr, "Gaussian_yolo\n");
srand ( time ( 0 ) ) ;
return l ;
@ -140,32 +140,70 @@ box get_gaussian_yolo_box(float *x, float *biases, int n, int index, int i, int
return b ;
}
float delta_gaussian_yolo_box ( box truth , float * x , float * biases , int n , int index , int i , int j , int lw , int lh , int w , int h , float * delta , float scale , int stride )
float delta_gaussian_yolo_box ( box truth , float * x , float * biases , int n , int index , int i , int j , int lw , int lh , int w , int h , float * delta , float scale , int stride , float iou_normalizer , IOU_LOSS iou_loss , int accumulate )
{
box pred = get_gaussian_yolo_box ( x , biases , n , index , i , j , lw , lh , w , h , stride ) ;
float iou = box_iou ( pred , truth ) ;
float tx = ( truth . x * lw - i ) ;
float ty = ( truth . y * lh - j ) ;
float tw = log ( truth . w * w / biases [ 2 * n ] ) ;
float th = log ( truth . h * h / biases [ 2 * n + 1 ] ) ;
float iou ;
ious all_ious = { 0 } ;
all_ious . iou = box_iou ( pred , truth ) ;
all_ious . giou = box_giou ( pred , truth ) ;
if ( pred . w = = 0 ) { pred . w = 1.0 ; }
if ( pred . h = = 0 ) { pred . h = 1.0 ; }
float sigma_const = 0.3 ;
float epsi = pow ( 10 , - 9 ) ;
float in_exp_x = ( tx - x [ index + 0 * stride ] ) / x [ index + 1 * stride ] ;
float dx , dy , dw , dh ;
if ( iou_loss = = MSE ) {
iou = all_ious . iou ;
float tx = ( truth . x * lw - i ) ;
float ty = ( truth . y * lh - j ) ;
float tw = log ( truth . w * w / biases [ 2 * n ] ) ;
float th = log ( truth . h * h / biases [ 2 * n + 1 ] ) ;
dx = ( tx - x [ index + 0 * stride ] ) ;
dy = ( ty - x [ index + 2 * stride ] ) ;
dw = ( tw - x [ index + 4 * stride ] ) ;
dh = ( th - x [ index + 6 * stride ] ) ;
}
else
{
iou = all_ious . giou ;
// https://github.com/generalized-iou/g-darknet
// https://arxiv.org/abs/1902.09630v2
// https://giou.stanford.edu/
all_ious . dx_iou = dx_box_iou ( pred , truth , iou_loss ) ;
// jacobian^t (transpose)
dx = ( all_ious . dx_iou . dl + all_ious . dx_iou . dr ) ;
dy = ( all_ious . dx_iou . dt + all_ious . dx_iou . db ) ;
dw = ( ( - 0.5 * all_ious . dx_iou . dl ) + ( 0.5 * all_ious . dx_iou . dr ) ) ;
dh = ( ( - 0.5 * all_ious . dx_iou . dt ) + ( 0.5 * all_ious . dx_iou . db ) ) ;
// normalize iou weight
dx * = iou_normalizer ;
dy * = iou_normalizer ;
dw * = iou_normalizer ;
dh * = iou_normalizer ;
}
float in_exp_x = dx / x [ index + 1 * stride ] ;
float in_exp_x_2 = pow ( in_exp_x , 2 ) ;
float normal_dist_x = exp ( in_exp_x_2 * ( - 1. / 2. ) ) / ( sqrt ( M_PI * 2.0 ) * ( x [ index + 1 * stride ] + sigma_const ) ) ;
float in_exp_y = ( ty - x [ index + 2 * stride ] ) / x [ index + 3 * stride ] ;
float in_exp_y = dy / x [ index + 3 * stride ] ;
float in_exp_y_2 = pow ( in_exp_y , 2 ) ;
float normal_dist_y = exp ( in_exp_y_2 * ( - 1. / 2. ) ) / ( sqrt ( M_PI * 2.0 ) * ( x [ index + 3 * stride ] + sigma_const ) ) ;
float in_exp_w = ( tw - x [ index + 4 * stride ] ) / x [ index + 5 * stride ] ;
float in_exp_w = dw / x [ index + 5 * stride ] ;
float in_exp_w_2 = pow ( in_exp_w , 2 ) ;
float normal_dist_w = exp ( in_exp_w_2 * ( - 1. / 2. ) ) / ( sqrt ( M_PI * 2.0 ) * ( x [ index + 5 * stride ] + sigma_const ) ) ;
float in_exp_h = ( th - x [ index + 6 * stride ] ) / x [ index + 7 * stride ] ;
float in_exp_h = dh / x [ index + 7 * stride ] ;
float in_exp_h_2 = pow ( in_exp_h , 2 ) ;
float normal_dist_h = exp ( in_exp_h_2 * ( - 1. / 2. ) ) / ( sqrt ( M_PI * 2.0 ) * ( x [ index + 7 * stride ] + sigma_const ) ) ;
@ -174,15 +212,26 @@ float delta_gaussian_yolo_box(box truth, float *x, float *biases, int n, int ind
float temp_w = ( 1. / 2. ) * 1. / ( normal_dist_w + epsi ) * normal_dist_w * scale ;
float temp_h = ( 1. / 2. ) * 1. / ( normal_dist_h + epsi ) * normal_dist_h * scale ;
delta [ index + 0 * stride ] = temp_x * in_exp_x * ( 1. / x [ index + 1 * stride ] ) ;
delta [ index + 2 * stride ] = temp_y * in_exp_y * ( 1. / x [ index + 3 * stride ] ) ;
delta [ index + 4 * stride ] = temp_w * in_exp_w * ( 1. / x [ index + 5 * stride ] ) ;
delta [ index + 6 * stride ] = temp_h * in_exp_h * ( 1. / x [ index + 7 * stride ] ) ;
if ( ! accumulate ) {
delta [ index + 0 * stride ] = 0 ;
delta [ index + 1 * stride ] = 0 ;
delta [ index + 2 * stride ] = 0 ;
delta [ index + 3 * stride ] = 0 ;
delta [ index + 4 * stride ] = 0 ;
delta [ index + 5 * stride ] = 0 ;
delta [ index + 6 * stride ] = 0 ;
delta [ index + 7 * stride ] = 0 ;
}
delta [ index + 1 * stride ] = temp_x * ( in_exp_x_2 / x [ index + 1 * stride ] - 1. / ( x [ index + 1 * stride ] + sigma_const ) ) ;
delta [ index + 3 * stride ] = temp_y * ( in_exp_y_2 / x [ index + 3 * stride ] - 1. / ( x [ index + 3 * stride ] + sigma_const ) ) ;
delta [ index + 5 * stride ] = temp_w * ( in_exp_w_2 / x [ index + 5 * stride ] - 1. / ( x [ index + 5 * stride ] + sigma_const ) ) ;
delta [ index + 7 * stride ] = temp_h * ( in_exp_h_2 / x [ index + 7 * stride ] - 1. / ( x [ index + 7 * stride ] + sigma_const ) ) ;
delta [ index + 0 * stride ] + = temp_x * in_exp_x * ( 1. / x [ index + 1 * stride ] ) ;
delta [ index + 2 * stride ] + = temp_y * in_exp_y * ( 1. / x [ index + 3 * stride ] ) ;
delta [ index + 4 * stride ] + = temp_w * in_exp_w * ( 1. / x [ index + 5 * stride ] ) ;
delta [ index + 6 * stride ] + = temp_h * in_exp_h * ( 1. / x [ index + 7 * stride ] ) ;
delta [ index + 1 * stride ] + = temp_x * ( in_exp_x_2 / x [ index + 1 * stride ] - 1. / ( x [ index + 1 * stride ] + sigma_const ) ) ;
delta [ index + 3 * stride ] + = temp_y * ( in_exp_y_2 / x [ index + 3 * stride ] - 1. / ( x [ index + 3 * stride ] + sigma_const ) ) ;
delta [ index + 5 * stride ] + = temp_w * ( in_exp_w_2 / x [ index + 5 * stride ] - 1. / ( x [ index + 5 * stride ] + sigma_const ) ) ;
delta [ index + 7 * stride ] + = temp_h * ( in_exp_h_2 / x [ index + 7 * stride ] - 1. / ( x [ index + 7 * stride ] + sigma_const ) ) ;
return iou ;
}
@ -201,6 +250,18 @@ void delta_gaussian_yolo_class(float *output, float *delta, int index, int class
}
}
int compare_gaussian_yolo_class ( float * output , int classes , int class_index , int stride , float objectness , int class_id , float conf_thresh )
{
int j ;
for ( j = 0 ; j < classes ; + + j ) {
float prob = objectness * output [ class_index + stride * j ] ;
if ( prob > conf_thresh ) {
return 1 ;
}
}
return 0 ;
}
static int entry_gaussian_index ( layer l , int batch , int location , int entry )
{
int n = location / ( l . w * l . h ) ;
@ -254,12 +315,31 @@ void forward_gaussian_yolo_layer(const layer l, network_state state)
for ( n = 0 ; n < l . n ; + + n ) {
int box_index = entry_gaussian_index ( l , b , n * l . w * l . h + j * l . w + i , 0 ) ;
box pred = get_gaussian_yolo_box ( l . output , l . biases , l . mask [ n ] , box_index , i , j , l . w , l . h , state . net . w , state . net . h , l . w * l . h ) ;
float best_match_iou = 0 ;
int best_match_t = 0 ;
float best_iou = 0 ;
int best_t = 0 ;
for ( t = 0 ; t < l . max_boxes ; + + t ) {
box truth = float_to_box_stride ( state . truth + t * ( 4 + 1 ) + b * l . truths , 1 ) ;
int class_id = state . truth [ t * ( 4 + 1 ) + b * l . truths + 4 ] ;
if ( class_id > = l . classes ) {
printf ( " Warning: in txt-labels class_id=%d >= classes=%d in cfg-file. In txt-labels class_id should be [from 0 to %d] \n " , class_id , l . classes , l . classes - 1 ) ;
printf ( " truth.x = %f, truth.y = %f, truth.w = %f, truth.h = %f, class_id = %d \n " , truth . x , truth . y , truth . w , truth . h , class_id ) ;
getchar ( ) ;
continue ; // if label contains class_id more than number of classes in the cfg-file
}
if ( ! truth . x ) break ;
int class_index = entry_gaussian_index ( l , b , n * l . w * l . h + j * l . w + i , 9 ) ;
int obj_index = entry_gaussian_index ( l , b , n * l . w * l . h + j * l . w + i , 8 ) ;
float objectness = l . output [ obj_index ] ;
int class_id_match = compare_yolo_class ( l . output , l . classes , class_index , l . w * l . h , objectness , class_id , 0.25f ) ;
float iou = box_iou ( pred , truth ) ;
if ( iou > best_match_iou & & class_id_match = = 1 ) {
best_match_iou = iou ;
best_match_t = t ;
}
if ( iou > best_iou ) {
best_iou = iou ;
best_t = t ;
@ -267,19 +347,19 @@ void forward_gaussian_yolo_layer(const layer l, network_state state)
}
int obj_index = entry_gaussian_index ( l , b , n * l . w * l . h + j * l . w + i , 8 ) ;
avg_anyobj + = l . output [ obj_index ] ;
l . delta [ obj_index ] = 0 - l . output [ obj_index ] ;
if ( best_iou > l . ignore_thresh ) {
l . delta [ obj_index ] = l . cls_normalizer * ( 0 - l . output [ obj_index ] ) ;
if ( best_match_ iou > l . ignore_thresh ) {
l . delta [ obj_index ] = 0 ;
}
if ( best_iou > l . truth_thresh ) {
l . delta [ obj_index ] = 1 - l . output [ obj_index ] ;
l . delta [ obj_index ] = l . cls_normalizer * ( 1 - l . output [ obj_index ] ) ;
int class_id = state . truth [ best_t * ( 4 + 1 ) + b * l . truths + 4 ] ;
if ( l . map ) class_id = l . map [ class_id ] ;
int class_index = entry_gaussian_index ( l , b , n * l . w * l . h + j * l . w + i , 9 ) ;
delta_gaussian_yolo_class ( l . output , l . delta , class_index , class_id , l . classes , l . w * l . h , 0 ) ;
box truth = float_to_box_stride ( state . truth + best_t * ( 4 + 1 ) + b * l . truths , 1 ) ;
delta_gaussian_yolo_box ( truth , l . output , l . biases , l . mask [ n ] , box_index , i , j , l . w , l . h , state . net . w , state . net . h , l . delta , ( 2 - truth . w * truth . h ) , l . w * l . h ) ;
delta_gaussian_yolo_box ( truth , l . output , l . biases , l . mask [ n ] , box_index , i , j , l . w , l . h , state . net . w , state . net . h , l . delta , ( 2 - truth . w * truth . h ) , l . w * l . h , l . iou_normalizer , l . iou_loss , 1 ) ;
}
}
}
@ -308,11 +388,11 @@ void forward_gaussian_yolo_layer(const layer l, network_state state)
int mask_n = int_index ( l . mask , best_n , l . n ) ;
if ( mask_n > = 0 ) {
int box_index = entry_gaussian_index ( l , b , mask_n * l . w * l . h + j * l . w + i , 0 ) ;
float iou = delta_gaussian_yolo_box ( truth , l . output , l . biases , best_n , box_index , i , j , l . w , l . h , state . net . w , state . net . h , l . delta , ( 2 - truth . w * truth . h ) , l . w * l . h ) ;
float iou = delta_gaussian_yolo_box ( truth , l . output , l . biases , best_n , box_index , i , j , l . w , l . h , state . net . w , state . net . h , l . delta , ( 2 - truth . w * truth . h ) , l . w * l . h , l . iou_normalizer , l . iou_loss , 1 ) ;
int obj_index = entry_gaussian_index ( l , b , mask_n * l . w * l . h + j * l . w + i , 8 ) ;
avg_obj + = l . output [ obj_index ] ;
l . delta [ obj_index ] = 1 - l . output [ obj_index ] ;
l . delta [ obj_index ] = l . cls_normalizer * ( 1 - l . output [ obj_index ] ) ;
int class_id = state . truth [ t * ( 4 + 1 ) + b * l . truths + 4 ] ;
if ( l . map ) class_id = l . map [ class_id ] ;
@ -325,6 +405,64 @@ void forward_gaussian_yolo_layer(const layer l, network_state state)
if ( iou > .75 ) recall75 + = 1 ;
avg_iou + = iou ;
}
// iou_thresh
for ( n = 0 ; n < l . total ; + + n ) {
int mask_n = int_index ( l . mask , n , l . n ) ;
if ( mask_n > = 0 & & n ! = best_n & & l . iou_thresh < 1.0f ) {
box pred = { 0 } ;
pred . w = l . biases [ 2 * n ] / state . net . w ;
pred . h = l . biases [ 2 * n + 1 ] / state . net . h ;
float iou = box_iou ( pred , truth_shift ) ;
// iou, n
if ( iou > l . iou_thresh ) {
int box_index = entry_gaussian_index ( l , b , mask_n * l . w * l . h + j * l . w + i , 0 ) ;
float iou = delta_gaussian_yolo_box ( truth , l . output , l . biases , n , box_index , i , j , l . w , l . h , state . net . w , state . net . h , l . delta , ( 2 - truth . w * truth . h ) , l . w * l . h , l . iou_normalizer , l . iou_loss , 1 ) ;
int obj_index = entry_gaussian_index ( l , b , mask_n * l . w * l . h + j * l . w + i , 8 ) ;
avg_obj + = l . output [ obj_index ] ;
l . delta [ obj_index ] = l . cls_normalizer * ( 1 - l . output [ obj_index ] ) ;
int class_id = state . truth [ t * ( 4 + 1 ) + b * l . truths + 4 ] ;
if ( l . map ) class_id = l . map [ class_id ] ;
int class_index = entry_gaussian_index ( l , b , mask_n * l . w * l . h + j * l . w + i , 9 ) ;
delta_gaussian_yolo_class ( l . output , l . delta , class_index , class_id , l . classes , l . w * l . h , & avg_cat ) ;
+ + count ;
+ + class_count ;
if ( iou > .5 ) recall + = 1 ;
if ( iou > .75 ) recall75 + = 1 ;
avg_iou + = iou ;
}
}
}
}
// averages the deltas obtained by the function: delta_yolo_box()_accumulate
for ( j = 0 ; j < l . h ; + + j ) {
for ( i = 0 ; i < l . w ; + + i ) {
for ( n = 0 ; n < l . n ; + + n ) {
int box_index = entry_gaussian_index ( l , b , n * l . w * l . h + j * l . w + i , 0 ) ;
int class_index = entry_gaussian_index ( l , b , n * l . w * l . h + j * l . w + i , 9 ) ;
const int stride = l . w * l . h ;
int classes_in_one_box = 0 ;
for ( n = 0 ; n < l . classes ; + + n ) {
if ( l . delta [ class_index + stride * n ] > 0 ) classes_in_one_box + + ;
}
l . delta [ box_index + 0 * stride ] / = classes_in_one_box ;
l . delta [ box_index + 1 * stride ] / = classes_in_one_box ;
l . delta [ box_index + 2 * stride ] / = classes_in_one_box ;
l . delta [ box_index + 3 * stride ] / = classes_in_one_box ;
l . delta [ box_index + 4 * stride ] / = classes_in_one_box ;
l . delta [ box_index + 5 * stride ] / = classes_in_one_box ;
l . delta [ box_index + 6 * stride ] / = classes_in_one_box ;
l . delta [ box_index + 7 * stride ] / = classes_in_one_box ;
}
}
}
}
* ( l . cost ) = pow ( mag_array ( l . delta , l . outputs * l . batch ) , 2 ) ;