OpenStack源码学习笔记4

Roy

2019-12-09

这篇文章记录nova创建快照时候的过程，根据文档，创建快照其实是向/servers/{server_id}/action发送了一个POST的请求，内容则是类似:

{
    "createImage" : {
        "name" : "image-name",
        "metadata": {}
    }
}

根据openstack的套路，首先找到处理这个请求的代码，在第一篇文章中我们分析了在Stein版中虚拟机的创建过程，但由于某些不可描述的原因，下面的代码是N版的，一定要注意版本问题。N版中是没有nova/api/openstack/compute/routes.py这个文件的，所有处理请求的代码都在nova/api/openstack/compute/目录中，所以我们直接看servers.py即可，相关代码如下：

@wsgi.response(202)
@extensions.expected_errors((400, 403, 404, 409))
@wsgi.action('createImage')
@common.check_snapshots_enabled
@validation.schema(schema_servers.create_image, '2.0', '2.0')
@validation.schema(schema_servers.create_image, '2.1')
def _action_create_image(self, req, id, body):
    """Snapshot a server instance."""
    ....
    try:
        if compute_utils.is_volume_backed_instance(context, instance,
                                                        bdms):
            context.can(server_policies.SERVERS %
                'create_image:allow_volume_backed')
            image = self.compute_api.snapshot_volume_backed(
                                                    context,
                                                    instance,
                                                    image_name,
                                                    extra_properties=
                                                    metadata)
        else:
            image = self.compute_api.snapshot(context,
                                                instance,
                                                image_name,
                                                extra_properties=metadata)
    ...
    # build location of newly-created image entity
    image_id = str(image['id'])
    image_ref = glance.generate_image_url(image_id)

    resp = webob.Response(status_int=202)
    resp.headers['Location'] = image_ref
    return resp

由于我的配置文件中没启用cell功能，所以代码中的self.compute_api就是nova/compute/api.py中的API()类，snapshot方法如下：

@check_instance_cell
@check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
                                vm_states.PAUSED, vm_states.SUSPENDED])
def snapshot(self, context, instance, name, extra_properties=None):
    """Snapshot the given instance.

    :param instance: nova.objects.instance.Instance object
    :param name: name of the snapshot
    :param extra_properties: dict of extra image properties to include
                                when creating the image.
    :returns: A dict containing image metadata
    """
    image_meta = self._create_image(context, instance, name,
                                    'snapshot',
                                    extra_properties=extra_properties)

    # NOTE(comstud): Any changes to this method should also be made
    # to the snapshot_instance() method in nova/cells/messaging.py
    instance.task_state = task_states.IMAGE_SNAPSHOT_PENDING
    instance.save(expected_task_state=[None])

    self.compute_rpcapi.snapshot_instance(context, instance,
                                            image_meta['id'])

    return image_meta

def _create_image(self, context, instance, name, image_type,
                    extra_properties=None):
    """Create new image entry in the image service.  This new image
    will be reserved for the compute manager to upload a snapshot
    or backup.

    :param context: security context
    :param instance: nova.objects.instance.Instance object
    :param name: string for name of the snapshot
    :param image_type: snapshot | backup
    :param extra_properties: dict of extra image properties to include

    """
    properties = {
        'instance_uuid': instance.uuid,
        'user_id': str(context.user_id),
        'image_type': image_type,
    }
    properties.update(extra_properties or {})

    image_meta = self._initialize_instance_snapshot_metadata(
        instance, name, properties)
    # if we're making a snapshot, omit the disk and container formats,
    # since the image may have been converted to another format, and the
    # original values won't be accurate.  The driver will populate these
    # with the correct values later, on image upload.
    if image_type == 'snapshot':
        image_meta.pop('disk_format', None)
        image_meta.pop('container_format', None)
    return self.image_api.create(context, image_meta)

代码中的self.image_api就是调用glanceclient创建镜像，之前已经写过镜像上传这里就不贴代码了，有兴趣的可以去看nova/image/glance.py，看到rpc就知道这里发送一个异步请求，看nova/compute/rpcapi.py代码不出所料:

def snapshot_instance(self, ctxt, instance, image_id):
    version = '4.0'
    cctxt = self.router.by_instance(ctxt, instance).prepare(
            server=_compute_host(None, instance), version=version)
    cctxt.cast(ctxt, 'snapshot_instance',
                instance=instance,
                image_id=image_id)

根据套路，这里应该是到nova/compute/manager.py了：

@wrap_exception()
@reverts_task_state
@wrap_instance_fault
@delete_image_on_error
def snapshot_instance(self, context, image_id, instance):
    ...
    try:
        instance.task_state = task_states.IMAGE_SNAPSHOT
        instance.save(
                    expected_task_state=task_states.IMAGE_SNAPSHOT_PENDING)
    except exception.InstanceNotFound:
        # possibility instance no longer exists, no point in continuing
        LOG.debug("Instance not found, could not set state %s "
                    "for instance.",
                    task_states.IMAGE_SNAPSHOT, instance=instance)
        return
    except exception.UnexpectedDeletingTaskStateError:
        LOG.debug("Instance being deleted, snapshot cannot continue",
                    instance=instance)
        return
    self._snapshot_instance(context, image_id, instance,
                            task_states.IMAGE_SNAPSHOT)

def _snapshot_instance(self, context, image_id, instance,
                        expected_task_state):
    context = context.elevated()
    instance.power_state = self._get_power_state(context, instance)
    try:
        ...
        self.driver.snapshot(context, instance, image_id,
                                update_task_state)
        instance.task_state = None
        instance.save(expected_task_state=task_states.IMAGE_UPLOADING)
        self._notify_about_instance_usage(context, instance,
                                            "snapshot.end")
       ...

然后进入driver中，这里我以使用libvirt为例，位于nova/virt/libvirt/driver.py：

def snapshot(self, context, instance, image_id, update_task_state):
        """Create snapshot from a running VM instance.

        This command only works with qemu 0.14+
        """
        ...        
        snapshot = self._image_api.get(context, image_id)
        ...
        # NOTE(bfilippov): save lvm and rbd as raw
        if image_format == 'lvm' or image_format == 'rbd':
            image_format = 'raw'

        metadata = self._create_snapshot_metadata(instance.image_meta,
                                                  instance,
                                                  image_format,
                                                  snapshot['name'])
        ....
        snapshot_backend = self.image_backend.snapshot(instance,
                disk_path,
                image_type=source_type)
        try:
            update_task_state(task_state=task_states.IMAGE_UPLOADING,
                              expected_state=task_states.IMAGE_PENDING_UPLOAD)
            metadata['location'] = snapshot_backend.direct_snapshot(
                context, snapshot_name, image_format, image_id,
                instance.image_ref)
            ...
            self._image_api.update(context, image_id, metadata,
                                   purge_props=False)
        ...

这里代码太长了我做了删减，所以缩进看起来有点奇怪。经过一系列的判断、准备后，首先获取对应的存储后端，这里我以rbd为例，其他类型的可以看nova/virt/libvirt/imagebackend.py中Backend类的相关定义。然后调用Rbd类下的direct_snapshot方法:

def direct_snapshot(self, context, snapshot_name, image_format,
                        image_id, base_image_id):
    """Creates an RBD snapshot directly.
    """
    fsid = self.driver.get_fsid()
    parent_pool = self._get_parent_pool(context, base_image_id, fsid)
    self.driver.create_snap(self.rbd_name, snapshot_name, protect=True)
    location = {'url': 'rbd://%(fsid)s/%(pool)s/%(image)s/%(snap)s' %
                        dict(fsid=fsid,
                            pool=self.pool,
                            image=self.rbd_name,
                            snap=snapshot_name)}
    try:
        self.driver.clone(location, image_id, dest_pool=parent_pool)
        self.driver.flatten(image_id, pool=parent_pool)
    finally:
        self.cleanup_direct_snapshot(location)
    self.driver.create_snap(image_id, 'snap', pool=parent_pool,
                            protect=True)
    return ('rbd://%(fsid)s/%(pool)s/%(image)s/snap' %
            dict(fsid=fsid, pool=parent_pool, image=image_id))

这个函数本质上还是一层封装，最终调用rbd驱动提供的clone、create_snap、flatten方法返回一个location给glance。

总结一下，nova创建快照功能是交给底层对应的驱动来处理的，然后调用glance接口创建一条数据最后更新location字段即可。这里多说一句，如果镜像文件特别大的时候使用glance进行同步特别慢，可以参考这里的思路进行优化，使用ceph提供的功能复制后新增glance中的数据即可。