Skip to content

Image Management: tar Archive to rootfs to Container

Image Management: tar Archive to rootfs to Container

Written by:

Igor Gorovyy
DevOps Engineer Lead & Senior Solutions Architect

LinkedIn


A container image is essentially a zip archive with a filesystem. Nothing complicated. In Sheep, an image is a directory containing a rootfs and a manifest.json.

Image structure

/var/lib/sheep/images/
  abc123/
    manifest.json    <- metadata
    rootfs/          <- filesystem
      bin/
      etc/
      usr/
      var/

manifest.json:

type Image struct {
    ID        string    `json:"id"`
    Name      string    `json:"name"`
    Tag       string    `json:"tag"`
    Size      int64     `json:"size"`
    CreatedAt time.Time `json:"created_at"`
    RootFS    string    `json:"rootfs"`
}

Two ways to get an image

1. Import -- from a tar archive

func (im *ImageManager) Import(name, tag, tarPath string) (*Image, error) {
    id := GenerateID()
    rootfs := filepath.Join(im.baseDir, id, "rootfs")
    os.MkdirAll(rootfs, 0755)

    f, err := os.Open(tarPath)
    if err != nil {
        return nil, fmt.Errorf("open tar: %w", err)
    }
    defer f.Close()

    if err := extractTar(f, rootfs); err != nil {
        os.RemoveAll(filepath.Join(im.baseDir, id))
        return nil, fmt.Errorf("extract tar: %w", err)
    }

    img := &Image{
        ID: id, Name: name, Tag: tag,
        CreatedAt: time.Now(), RootFS: rootfs,
    }

    // Calculate size
    var size int64
    filepath.Walk(rootfs, func(_ string, info os.FileInfo, _ error) error {
        if info != nil && !info.IsDir() {
            size += info.Size()
        }
        return nil
    })
    img.Size = size

    im.saveMetadata(id, img)
    return img, nil
}

Usage: sheep import myimage rootfs.tar.gz

2. Bootstrap -- from the host system

For testing, you can create a minimal image from host binaries:

func (im *ImageManager) Bootstrap(name string) (*Image, error) {
    id := GenerateID()
    rootfs := filepath.Join(im.baseDir, id, "rootfs")

    dirs := []string{
        "bin", "sbin", "usr/bin", "usr/sbin", "usr/lib",
        "lib", "lib64", "etc", "dev", "proc", "sys",
        "tmp", "var", "run", "home", "root",
    }
    for _, d := range dirs {
        os.MkdirAll(filepath.Join(rootfs, d), 0755)
    }

    // Copy basic binaries
    binaries := []string{
        "/bin/sh", "/bin/ls", "/bin/cat",
        "/bin/echo", "/bin/ps", "/bin/mkdir", "/bin/sleep",
    }
    for _, bin := range binaries {
        if _, err := os.Stat(bin); err == nil {
            copyFile(bin, filepath.Join(rootfs, bin))
        }
    }

    // Minimal configs
    os.WriteFile(filepath.Join(rootfs, "etc/hostname"),
        []byte("sheep\n"), 0644)
    os.WriteFile(filepath.Join(rootfs, "etc/hosts"),
        []byte("127.0.0.1 localhost\n"), 0644)
    os.WriteFile(filepath.Join(rootfs, "etc/resolv.conf"),
        []byte("nameserver 8.8.8.8\n"), 0644)

    // ...
    return img, nil
}

Usage: sheep bootstrap minimal

Extracting a tar archive

func extractTar(r io.Reader, dst string) error {
    // Try gzip, fallback to plain tar
    gr, err := gzip.NewReader(r)
    var tr *tar.Reader
    if err != nil {
        if rs, ok := r.(io.ReadSeeker); ok {
            rs.Seek(0, io.SeekStart)
        }
        tr = tar.NewReader(r)
    } else {
        defer gr.Close()
        tr = tar.NewReader(gr)
    }

    for {
        hdr, err := tr.Next()
        if err == io.EOF { break }
        if err != nil { return err }

        target := filepath.Join(dst, hdr.Name)

        switch hdr.Typeflag {
        case tar.TypeDir:
            os.MkdirAll(target, os.FileMode(hdr.Mode))
        case tar.TypeReg:
            os.MkdirAll(filepath.Dir(target), 0755)
            f, _ := os.OpenFile(target,
                os.O_CREATE|os.O_WRONLY|os.O_TRUNC,
                os.FileMode(hdr.Mode))
            io.Copy(f, tr)
            f.Close()
        case tar.TypeSymlink:
            os.Symlink(hdr.Linkname, target)
        case tar.TypeLink:
            os.Link(filepath.Join(dst, hdr.Linkname), target)
        }
    }
    return nil
}

The function handles both gzip and plain tar. It supports directories, files, symlinks, and hard links.

Whiteout files (OCI layers)

When pulling from a registry, each layer can delete files from the previous layer using whiteout files:

// In extractTarReader for OCI layers:
name := hdr.Name
if strings.HasPrefix(filepath.Base(name), ".wh.") {
    // Whiteout: remove the corresponding file
    target := filepath.Join(dst, filepath.Dir(name),
        strings.TrimPrefix(filepath.Base(name), ".wh."))
    os.RemoveAll(target)
    continue
}

A file .wh.config.old means "delete config.old from the previous layer." This lets layers not only add files but also remove them.

Tag and image lookup

func (im *ImageManager) Get(name, tag string) (*Image, error) {
    if tag == "" { tag = "latest" }

    entries, _ := os.ReadDir(im.baseDir)
    for _, e := range entries {
        if !e.IsDir() { continue }
        img, err := im.loadMetadata(e.Name())
        if err != nil { continue }
        if img.Name == name && img.Tag == tag {
            return img, nil
        }
    }
    return nil, fmt.Errorf("image %s:%s not found", name, tag)
}

The lookup is linear -- iterating through all directories. Slow for thousands of images, but fine for a learning project.

What's not ideal here

In Docker, images consist of layers where each layer is a diff from the previous one. This allows sharing layers between images. In Sheep, each image is a complete rootfs. Simpler, but takes more space.

Try it yourself

# Import from tar:
sudo ./sheep import myimage rootfs.tar.gz
# Bootstrap from host:
sudo ./sheep bootstrap test-img
# List images:
sudo ./sheep images

Images are ready. Next up -- Container Lifecycle: the complete state machine of a container.

Resources

Previous: NAT and iptables