当前位置: 代码迷 >> VC/MFC >> VCNN初始探索
  详细解决方案

VCNN初始探索

热度:662   发布时间:2016-05-02 03:13:24.0
VCNN初步探索

总说: 这篇博客主要是讲VCNN框架的一些东西。表示还没怎么看懂。虽然有点眉目了。还是记录一下吧。
https://github.com/Georgezhouzhou/vcnn_double-bladed

Notes on gen_training_data.m

addpath applications/deep_edge_aware_filters/utility/GT_filters/addpath applications/deep_edge_aware_filters/utility/GT_filters/L0smoothing/addpath data/clear;patch_dim = 64;num_patches = 1000;listing = dir('data/deepeaf/BSDS500/*.jpg');fListing = dir('data/deepeaf/fImgs/*.jpg'); for m = 1 : 101    fprintf('Extracting patch batch: %d / %d\n', m, 101);    % extract random patches    samples = zeros(patch_dim, patch_dim, 3, num_patches);    labels = zeros(size(samples));    for i = 1 : num_patches / 8  % 1000张共要随机抽取125次图,有重复,每次随机选取左上角的点提取patch        if (mod(i,100) == 0)            fprintf('Extracting patch: %d / %d\n', i*8, num_patches);        end
    r_idx = random('unid', size(listing, 1));    %由于命名顺序相同,因此图片x对应的index与该图片滤波后对应的index相同    I = imread(strcat('data/deepeaf/BSDS500/', listing(r_idx).name));    fI = imread(strcat('data/deepeaf/fImgs/',fListing(r_idx).name));    orig_img_size = size(I);    r = random('unid', orig_img_size(1) - patch_dim + 1);    c = random('unid', orig_img_size(2) - patch_dim + 1);    % EdgeExtract只进行了竖直方向的提取,进行左右翻转,再到后面4个90度的旋转         % 就涵盖了matrix的8种全部形态    patch = I(r:r+patch_dim-1, c:c+patch_dim-1, :);    fpatch = fI(r:r+patch_dim-1, c:c+patch_dim-1, :);    patchHoriFlipped = fliplr(patch);    fpatch = fliplr(fpatch);  % 同样进行翻转    idx_list = (i-1)*8+1:(i-1)*8+8;    for idx = 1:4        % samples存储的是in,也就是原始图片的梯度        % labels存储的是vout,也就是滤波后的图片的梯度        % 8个一组,1~4存储原始图像滤波后的图片的Iy, Ix, -Iy, -Ix        % 5~8存储着原始/滤波图片经过左对称后的图片的 Iy, Ix, -Iy, -Ix        % 每间隔4的两张图片左右对称        patch_rotated = im2double(imrotate(patch, (idx-1)*90));         patch_filtered = GT_filter(patch_rotated);            [vin, vout] = EdgeExtract(im2double(patch_rotated), im2double(patch_filtered));         samples(:,:,:,idx_list(idx)) = vin;         labels(:,:,:,idx_list(idx)) = vout;                    patch_rotated = im2double(imrotate(patchHoriFlipped, (idx-1)*90));        patch_filtered = GT_filter(patch_rotated);          [vin, vout] = EdgeExtract(im2double(patch_rotated), im2double(patch_filtered));                   samples(:,:,:,idx_list(idx+4)) = vin;         labels(:,:,:,idx_list(idx+4)) = vout;                                endendsamples = single(samples);labels = single(labels);% save itfilename = strcat('data/deepeaf/certainFilter/train/patches_', num2str(m));save(filename, '-v7.3', 'samples', 'labels');

end

Notes on init.m

首先是根据config进行相应的配置

config.GEN_OUTPUT = @gen_output_copy;

.NEW_MEM = @to_gpu (这个就是把数据转换成gpu数据,gpuArray(single(x)) )

.IM2COL = @im2col_gpu

.NONLINEARITY = @relu;

.OUT_ACT = @nonlinearity_nil;

config.COST_FUN = @L2_norm;

.misc下面放置的是杂项,其他的一些配置
.misc.current_layer = 1

初始化权值以及计算一些统计量

第一层

r = config.weight_range;    conv_layer_c = 0;pool_layer_c = 0;full_layer_c = 0;layer_num = length(config.forward_pass_scheme)-1;config.layer_num = layer_num;config.feature_map_sizes = {};  %这个是三维,第三维表示改成的feturemap的层数或是说“厚度”,即conv_hidden_sizeconfig.weights = {};for idx = 1:layer_num    if idx == 1    ....    if strcmp(config.forward_pass_scheme{idx}, 'conv_v_sr')     ...    elseif strcmp(config.forward_pass_scheme{idx}, 'conv_v')            config.weights{idx} = config.NEW_MEM(randn(config.feature_map_sizes{idx}(3), ...                                              config.kernel_size(conv_layer_c, 1)*config.kernel_size(conv_layer_c, 2)*config.chs)*r);                if config.normalize_init_weights                    config.weights{idx} = config.weights{idx} / sqrt(config.kernel_size(conv_layer_c, 1) * config.kernel_size(conv_layer_c, 2) * config.conv_hidden_size(conv_layer_c));                end

这里主要是针对不同层的形式,进行设置。
对于第一层来说,conv_v的weights进行初始化,如果进行normalize_init_weights,则进行normalize一下。

其他层

如果是其他层(非第一层),那么

elseif strcmp(config.forward_pass_scheme{idx}, 'conv_v')    conv_layer_c = conv_layer_c + 1;        config.feature_map_sizes{idx} = [config.feature_map_sizes{idx-1}(1)-config.kernel_size(conv_layer_c,1)+1 ...        config.feature_map_sizes{idx-1}(2)-config.kernel_size(conv_layer_c,2)+1 ...        config.conv_hidden_size(conv_layer_c)];        config.weights{idx} = config.NEW_MEM(randn(config.feature_map_sizes{idx}(3), ...        config.kernel_size(conv_layer_c, 1)*config.kernel_size(conv_layer_c, 2)*config.feature_map_sizes{idx-1}(3))*r);        if config.normalize_init_weights            config.weights{idx} = config.weights{idx} / sqrt(config.kernel_size(conv_layer_c, 1) * config.kernel_size(conv_layer_c, 2) * config.conv_hidden_size(conv_layer_c));        end

可以看出,feature_map_size{idx}(第几维),一般来说featuremap共3维,前两维是空间大小,而第三维就是conv_hidden_size(conv_layer_c)的大小,其中conv_layer_c是表示当前层数。normalize代码是一样的。
可以看出第一层之所以特殊是因为他是输入,其它层要根据前一层得到featrue_map的大小以及其他的一些信息。

elseif strcmp(config.forward_pass_scheme{idx}, 'conv_f')    conv_layer_c = conv_layer_c + 1;    if idx == layer_num    config.weights{idx} = config.NEW_MEM(randn(config.kernel_size(conv_layer_c, 1)*config.kernel_size(conv_layer_c, 2)*config.output_size(3), config.conv_hidden_size(conv_layer_c-1))*r);    if config.normalize_init_weights    config.weights{idx} = config.weights{idx} / sqrt(config.kernel_size(conv_layer_c, 1) * config.kernel_size(conv_layer_c, 2) * size(config.weights{idx}, 1));    end    config.GEN_OUTPUT = @gen_output_from_conv_f;    else    fprintf('in init(): conv_f layer in the hidden layer not supported yet.\n');    end

注意:由代码可知conv_f只能在最后一层出现。另外,初始化weights时,并不是另weights~N(0,1),而一般是要乘以一个参数。此时这个参数就是r,即在deep_configure.m中定义的config.weight_range.

这样的话就把网络根据config的配置进行了设计,包括weights初始化和正规化,以及每层的featuremap的大小。

初始化偏置

for idx = 1:layer_num-1    config.weights{idx+layer_num} = config.NEW_MEM(zeros(config.feature_map_sizes{idx}(3), 1)+0.01);endif strcmp(config.forward_pass_scheme{layer_num}, 'conv_f')    config.weights{layer_num*2} = config.NEW_MEM(zeros(size(config.weights{layer_num}, 1), 1)+0.05);else  config.weights{layer_num*2} = config.NEW_MEM(zeros(config.output_size(3), 1)+0.05);end

可以看出,config.weights{idx}总共是有layer_num*2层,比如deep_edge_aware共有3层卷积层。配置forward_pass_scheme时是用’conv_v’,’conv_v’,’conv_f’,’out’来进行。layer_num = length(config.forward_pass_scheme)-1;
故layer_num = 4-1 = 3. weights{1~3}是真的卷积层的weights参数。然后weights{4~5}是对应的前2层的偏置。而weights{6}就是最后一层的偏置配置。

prepare memory

mem原来是memory的意思啊。分配内存的意思啊!!而.NEW_MEM是根据config.device是CPU还是GPU来定的。
先进行
reset_mem()和input_mem()

function input_mem()global config mem;    mem.layer_inputs{1} = config.NEW_MEM(zeros(config.kernel_size(1, 1)*config.kernel_size(1, 2)*config.chs, ...(config.input_size(1)-config.kernel_size(1, 1)+1)*(config.input_size(2)-config.kernel_size(1, 2)+1)*config.batch_size));    mem.activations{1} = config.NEW_MEM(zeros(config.feature_map_sizes{1}(3), config.feature_map_sizes{1}(1)*config.feature_map_sizes{1}(2)));end

input_mem()是对输入进行初始化,并且.activations{1}就是zeros(.feature_map_size{1}(3), 第一层的空间).

不同层之间的内存的分配

if strcmp(config.forward_pass_scheme{2}, 'conv_v')  conv2conv_mem(1);endfor m = 2:layer_num    if strfind(config.forward_pass_scheme{m}, 'conv')        conv_mem(m);        if strcmp(config.forward_pass_scheme{m+1}, 'out')            conv2out_mem();        elseif strcmp(config.forward_pass_scheme{m+1}, 'conv_v')            conv2conv_mem(m);     end     elseif strcmp(config.forward_pass_scheme{m}, 'pool')        pool_mem(m);        if strcmp(config.forward_pass_scheme{m+1}, 'conv_v')            pool2conv_mem(m);        end     elseif strcmp(config.forward_pass_scheme{m}, 'full')         full_mem(m);     endend

这里是对层与层之间的联系进行分配内存。这些mem都在mem文件夹下。在deep_edge_aware中,主要是用到了conv2conv_mem和conv2out_mem.

building pipeline

在pipeline中主要是用到了layers_adapters文件夹下面的函数。
这里第一次出现.pipeline_forward.

config.pipeline_forward = {};config.pipeline_forward{1} = @input2conv;conv_layer_c = 1;for idx = 1:layer_numif strfind(config.forward_pass_scheme{idx}, 'conv')    conv_layer_c = conv_layer_c + 1;        ...    if strcmp(config.forward_pass_scheme{idx+1}, 'conv_v')         config.pipeline_forward{length(config.pipeline_forward)+1} = @nonlinearity;         if config.kernel_size(conv_layer_c, 1) == 1 && config.kernel_size(conv_layer_c, 2) == 1            config.pipeline_forward{length(config.pipeline_forward)+1} = @conv2conv1by1;         else             config.pipeline_forward{length(config.pipeline_forward)+1} = @conv2conv;         end    elseif strcmp(config.forward_pass_scheme{idx+1}, 'conv_f')        config.pipeline_forward{length(config.pipeline_forward)+1} = @nonlinearity;        config.pipeline_forward{length(config.pipeline_forward)+1} = @conv2conv_f;

可以看出这是对每层的前向传播进行计算。比如:conv2conv表示,从卷积层到卷积层的计算。这里当某个kernel的某一维的size为1时,有个特殊的函数计算,就是conv2conv1by1,这里是针对deep_edge_aware的第二层卷积层1x1的计算。

进行一些后续工作

config.SCALE_INPUT = @scale_input_nil;config.SCALE_OUTPUT = @scale_output_nil;if flag ~= 0return;endconfig.EXPAND_DELTA_OUT = @expand_delta_out_nil;if strcmp(config.nonlinearity, 'relu')    config.DERI_NONLINEARITY = @deri_relu;...

这是对激活函数的配置。如果输出层有激活函数的话,就会

if strcmp(config.output_activation, 'softmax')    config.DERI_OUT_ACT = @deri_softmax;elseif strcmp(config.output_activation, 'inherit')    config.DERI_OUT_ACT = @deri_inherit;elseif strcmp(config.output_activation, 'nil')    config.DERI_OUT_ACT = @deri_nonlinearity_nil;else    config.DERI_OUT_ACT = @deri_softmax;end

然而deep_edge_aware输出的激活函数为nil

    for m = 2:layer_num        if strcmp(config.forward_pass_scheme{m}, 'conv_v')            if strcmp(config.forward_pass_scheme{m-1}, 'pool')                convBpool_mem(m);            elseif strfind(config.forward_pass_scheme{m}, 'conv')                conv_layer_id = get_conv_layer_idx_from_layer_idx(m);                if config.kernel_size(conv_layer_id, 1) ~= 1 && config.kernel_size(conv_layer_id, 2) ~= 1                    convBconv_mem(m);                end             end        end    end

说实话没看懂这个是要干嘛。带有B的函数就是反向的意思,这里貌似是要检查一下从第二层开始到最后一层的特殊连接,算了。

building pipeline for backprop

这里是重点,反向传播就在这里了。
看到这里我真的想说,这个框架真是无力吐槽了。。主要是这个在前向传播时每一层都考虑了后一层是什么,而在反向传播时每一层又考虑了前一层是什么。每次都是根据相邻两层来决定如何传播。这实在是太冗余复杂了。以后还是用matconvnet吧。这个玩不动啊。。

config.pipeline_backprop = {};config.pipeline_backprop{1} = @out_backprop;for idx = layer_num+1:-1:3    if strcmp(config.forward_pass_scheme{idx}, 'out')        if strcmp(config.forward_pass_scheme{idx-1}, 'conv_f')            config.EXPAND_DELTA_OUT = @expand_delta_out_for_conv_f;            config.pipeline_backprop{length(config.pipeline_backprop)+1} = @outBconv;            config.pipeline_backprop{length(config.pipeline_backprop)+1} = @conv_backprop;        ...        else            fprintf('in init(): backprop from the output layer to the specified layer is not yet supported.\n');        end                elseif strcmp(config.forward_pass_scheme{idx}, 'conv_f')        if strcmp(config.forward_pass_scheme{idx-1}, 'conv_v')                            config.pipeline_backprop{length(config.pipeline_backprop)+1} = @convBconv_1by1;                        else            fprintf('in init(): backprop from conv_f to the specified layer is not yet supported.\n');        end        config.pipeline_backprop{length(config.pipeline_backprop)+1} = @conv_backprop;    elseif strcmp(config.forward_pass_scheme{idx}, 'conv_v')        if strfind(config.forward_pass_scheme{idx-1}, 'conv')            conv_layer_id = get_conv_layer_idx_from_layer_idx(idx);            if config.kernel_size(conv_layer_id, 1) == 1 && config.kernel_size(conv_layer_id, 2) == 1                config.pipeline_backprop{length(config.pipeline_backprop)+1} = @convBconv_1by1;            else                config.pipeline_backprop{length(config.pipeline_backprop)+1} = @convBconv;            end            config.pipeline_backprop{length(config.pipeline_backprop)+1} = @conv_backprop;            ...                        end

以上是对从最后一层到倒数第三层的处理,对于最后两层要做特殊处理

if strcmp(config.forward_pass_scheme{2}, 'conv_v') && config.kernel_size(2, 1) ~= 1 && config.kernel_size(2, 2) ~= 1    config.pipeline_backprop{length(config.pipeline_backprop)+1} = @convBconv_last;endif strcmp(config.forward_pass_scheme{1}, 'conv_v_mask_norm')    if strcmp(config.mask_for_SR, 'true')        config.pipeline_backprop{length(config.pipeline_backprop)+1} = @convBinput_with_mask_accel;    else        config.pipeline_backprop{length(config.pipeline_backprop)+1} = @convBinput_with_mask;    endelseif strcmp(config.forward_pass_scheme{1}, 'conv_v_sr')    config.pipeline_backprop{length(config.pipeline_backprop)+1} = @convBinput_SR;else    config.pipeline_backprop{length(config.pipeline_backprop)+1} = @convBinput;end  

因为convBconv_last适用于倒数第二层到最后一层,而最后一层是输入到卷积(对于deepeaf),因此有 [email protected]

权值更新方式adagrad

这里出现.UPDATE_WEIGHTS

  if strcmp(config.optimization, 'adagrad')                config.his_grad = {};        config.fudge_factor = 1e-6;        if strcmp(config.forward_pass_scheme{1}, 'conv_v_sr')            config.UPDATE_WEIGHTS = @update_weights_adagrad_SR;            config.his_grad{1} = {};            for m = 1:config.misc.mask_type                config.his_grad{1}{m} = config.NEW_MEM(zeros(size(config.weights{1}{m})));            end            for m = 2:length(config.weights)                config.his_grad{m} = config.NEW_MEM(zeros(size(config.weights{m})));            end        else            config.UPDATE_WEIGHTS = @update_weights_adagrad;            for m = 1:length(config.weights)                config.his_grad{m} = config.NEW_MEM(zeros(size(config.weights{m})));            end        end    else        fprintf('optimization method not supported, use adagrad as default\n');        config.UPDATE_WEIGHTS = @update_weights_adagrad;    end

而对于update_weights_adagrad,有如下代码:

function update_weights_adagrad()    global config mem;    for m = 1:length(config.weights)        config.his_grad{m} = config.his_grad{m} + mem.grads{m} .* mem.grads{m};        config.weights{m} = config.weights{m} - config.learning_rate * (mem.grads{m} ./ (config.fudge_factor + sqrt(config.his_grad{m})));    endend

Notes on deepeaf_training

addpath applications/deep_edge_aware_filters/addpath applications/deep_edge_aware_filters/utility/addpath utils/addpath cuda/addpath mem/addpath layers/addpath layers_adapters/addpath optimization/addpath pipeline/addpath data/clearvars -global config;clearvars -global mem;clear;global config mem;deepeaf_configure();  %config进行初始化init(0);  %根据config进行初始化网络,完成其他配置以及对前向/反向传播各层之间的参数设置进行初始化load('data/deepeaf/certainFilter/val/val_1');perm = randperm(size(test_samples, 4));test_samples = test_samples(:,:,:,perm);test_labels = test_labels(:,:,:,perm);test_samples = config.NEW_MEM(test_samples(:,:,:,1:200));test_labels = config.NEW_MEM(test_labels(:,:,:,1:200));test_samples = test_samples * 2;test_labels = test_labels * 2;count = 0;cost_avg = 0;epoc = 0;points_seen = 0;display_points = 500;save_points = 5000;fprintf('%s\n', datestr(now, 'dd-mm-yyyy HH:MM:SS FFF'));for pass = 1:10      % 跑10次       for p = 1:100    % 每次100个        load(strcat('data/deepeaf/certainFilter/train/patches_', num2str(p), '.mat'));                perm = randperm(1000);        samples = samples(:,:,:,perm);        labels = labels(:,:,:,perm);        train_imgs = config.NEW_MEM(samples);        train_labels = config.NEW_MEM(labels);         train_imgs = train_imgs * 2;        train_labels = train_labels * 2;        for i = 1:size(train_labels, 4) / config.batch_size                        points_seen = points_seen + config.batch_size;            % 对in和out进行提取,其中out需要根据output_size进行特殊的裁剪。            in = train_imgs(:,:,:,(i-1)*config.batch_size+1:i*config.batch_size);            out = train_labels(:,:,:,(i-1)*config.batch_size+1:i*config.batch_size);            out = out((size(in, 1) - config.output_size(1)) / 2 + 1:(size(in, 1) - config.output_size(1)) / 2 + config.output_size(1), ...                      (size(in, 2) - config.output_size(2)) / 2 + 1:(size(in, 2) - config.output_size(2)) / 2 + config.output_size(2), :, :);            % operate the training pipeline            op_train_pipe(in, out);            % update the weights            % 这里就是默认的adagrad了!            config.UPDATE_WEIGHTS();            if(cost_avg == 0)                cost_avg = config.cost;            else                cost_avg = (cost_avg + config.cost) / 2;            end            % display point            if(mod(points_seen, display_points) == 0)                   count = count + 1;                fprintf('%d ', count);            end            % save point            if(mod(points_seen, save_points) == 0)                fprintf('\n%s', datestr(now, 'dd-mm-yyyy HH:MM:SS FFF'));                epoc = epoc + 1;                test_cost = 0;                for t = 1:size(test_samples, 4) / config.batch_size                % t_label也是要根据output_size来进行裁剪                    t_label = test_labels(:,:,:,(t-1)*config.batch_size+1:t*config.batch_size);                    t_label = config.NEW_MEM(t_label((size(in, 1) - config.output_size(1)) / 2 + 1:(size(in, 1) - config.output_size(1)) / 2 + config.output_size(1), ...                                            (size(in, 2) - config.output_size(2)) / 2 + 1:(size(in, 2) - config.output_size(2)) / 2 + config.output_size(2), :));                    % 进行测试的pipeline,测试的pipeline只有前向传播                    op_test_pipe(test_samples(:,:,:,(t-1)*config.batch_size+1:t*config.batch_size), t_label);                    test_out = gather(mem.output);                    test_cost = test_cost + config.cost;                end                test_cost = test_cost / size(test_samples, 4);                fprintf('\nepoc %d, training avg cost: %f, test avg cost: %f\n', epoc, cost_avg, test_cost);                save_weights(strcat('applications/deep_edge_aware_filters/results/epoc', num2str(epoc), '.mat'));                cost_avg = 0;            end        end    end
1楼hopebrisker昨天 13:28
你好!我跑了github上这个代码,基于matconvnet实现全卷机网络的, https://github.com/vlfeat/matconvnet-fcn ,跑的gpu,结果速度特别慢,你觉得可能是什么原因啊?求教。ntrain: epoch 01: 1/565: 0.9 Hz accuracy: 0.677 0.048 0.032 objective: 3.044ntrain: epoch 01: 2/565: 1.0 Hz accuracy: 0.691 0.048 0.033 objective: 3.035ntrain: epoch 01: 3/565: 1.1 Hz accuracy: 0.698 0.048 0.033 objective: 2.994
  相关解决方案