Torch 7 利用已有VGG模型提取图片特征

最新推荐文章于 2026-05-07 18:15:16 发布

原创最新推荐文章于 2026-05-07 18:15:16 发布 · 8.8k 阅读

8 ·

本内容遵循CC 4.0 BY-SA版权协议

标签

#深度学习

Deep Learning 专栏收录该内容

2 篇文章

订阅专栏

这篇博客介绍了如何在Torch 7中利用VGG19模型进行图片特征提取，相较于Caffe，Torch提供了更大的灵活性。作者简化了原始代码，使得该过程更加直观，适用于提取单张图片的特征。

在看torch的东西，感觉在深度学习的运用上，相对于Caffe来说更灵活，不过发现没有利用已有caffe模型提取图片特在的代码，在网上看见了一个，利用了GPU来处理批量图片的特征提取。写的比较规范，看来了比较复杂，这里我简化了代码，利用其来提取一张图片的特征，方便理解：

require 'torch'
require 'paths'
require 'xlua'
require 'nn'
require 'loadcaffe'
require 'image'
require 'optim'
require 'nn'

--require 'data'
--require 'dataset.lua'
--require 'donley.lua'
--require 'model'

local loadSize   = {3, 256, 256}
local sampleSize = {3, 224, 224}

local function loadImage(path, scale)
   local input = image.load(path, 3, 'float')
   -- find the smaller dimension, and resize it to loadSize (while keeping aspect ratio)
   if input:size(3) < input:size(2) then
      input = image.scale(input, loadSize[2], loadSize[3] * input:size(2) / input:size(3))
   else
      input = image.scale(input, loadSize[2] * input:size(3) / input:size(2), loadSize[3])
   end
   return input
end

local function loadImage2(path)
   local input = image.load(path, 3, 'float')
   -- find the smaller dimension, and resize it to loadSize (while keeping aspect ratio)
   if input:size(3) < input:size(2) then
      input = image.scale(input, loadSize[2], loadSize[3] * input:size(2) / input:size(3))
   else
      input = image.scale(input, loadSize[2] * input:size(3) / input:size(2), loadSize[3])
   end
   return input
end

-- VGG preprocessing
local bgr_means = {103.939,116.779,123.68}
local function vggPreprocess(img)
  local im2 = img:clone()
  im2[{1,{},{}}] = img[{3,{},{}}]
  im2[{3,{},{}}] = img[{1,{},{}}]

  im2:mul(255)
  for i=1,3 do
    im2[i]:add(-bgr_means[i])
  end
  return im2
end

local function centerCrop(input)
   local oH = sampleSize[2]
   local oW = sampleSize[3]
   local iW = input:size(3)
   local iH = input:size(2)
   local w1 = math.ceil((iW-oW)/2)
   local h1 = math.ceil((iH-oH)/2)
   local out = image.crop(input, w1, h1, w1+oW, h1+oW) -- center patch
   return out
end

function getPretrainedModel()
  local proto = 'model_weights/VGG_ILSVRC_19_layers_deploy.prototxt'
  local caffemodel = 'model_weights/VGG_ILSVRC_19_layers.caffemodel'

  --if opt.backend == 'nn' then
     -- require 'nn'
  --elseif backend == 'nn' then
   --   print('using cunn backend')
  --else
   --   error('unrecognized backend: ' .. backend)
  --end

  local model = loadcaffe.load(proto, caffemodel, 'nn')

    --[[
        Remove the Softmax, class scores, and dropout layer from the original
        network, leaving only the ReLU-ed activations immediately prior to the
        classifier.
    ]]--
    for i=1,3 do
        model.modules[#model.modules] = nil 
    end

    -- L2 normalize the activations
  model:add(nn.Normalize(2))

  model:evaluate()

  return model
end

--paths.dofile('donkey.lua')
--paths.dofile('model.lua')

torch.setdefaulttensortype('torch.FloatTensor')

--paths.dofile('model.lua')

model = getPretrainedModel()

filepath = '/home/test/下载/vgg-19-feature-extractor-master/pic/G0024497.JPG'
local input = loadImage(filepath)
local vggPreprocessed = vggPreprocess(input)
local out = centerCrop(vggPreprocessed)

local outputs = model:forward(out)

print(outputs)
print(#outputs)