Skip to main content

RGW Data Layout

RGW organizes its data into three different kinds: metadata, bucket index, and data.

Metadata

  • This stores the information about user,bucket and bucket.instance
  • Commands to view more information of metadata :
# radosgw-admin metadata list
[
"account",
"bucket",
"bucket.instance",
"group",
"otp",
"roles",
"topic",
"user"
]


# radosgw-admin metadata list user
[
"dashboard",
"kj"
]


# radosgw-admin metadata list bucket.instance
[
"kjbucket:1847c1d2-a429-4a01-a65b-30eac3893e1e.115075.1",
"test:1847c1d2-a429-4a01-a65b-30eac3893e1e.245100.1"
]



# radosgw-admin metadata get bucket:kjbucket
{
"key": "bucket:kjbucket",
"ver": {
"tag": "_m5AWN1VVZjnqa-E1NAj7OAk",
"ver": 1
},
"mtime": "2025-03-21T09:06:35.525451Z",
"data": {
"bucket": {
"name": "kjbucket",
"marker": "1847c1d2-a429-4a01-a65b-30eac3893e1e.115075.1",
"bucket_id": "1847c1d2-a429-4a01-a65b-30eac3893e1e.115075.1",
"tenant": "",
"explicit_placement": {
"data_pool": "",
"data_extra_pool": "",
"index_pool": ""
}
},
"owner": "kj",
"creation_time": "2025-03-21T09:06:32.339318Z",
"linked": true,
"has_bucket_info": false
}
}




# radosgw-admin metadata get bucket.instance:test:1847c1d2-a429-4a01-a65b-30eac3893e1e.245100.1
{
"key": "bucket.instance:test:1847c1d2-a429-4a01-a65b-30eac3893e1e.245100.1",
"ver": {
"tag": "_00XBXnzgU9cr36dWeVjO2Z4",
"ver": 1
},
"mtime": "2025-03-21T09:28:13.878622Z",
"data": {
"bucket_info": {
"bucket": {
"name": "test",
"marker": "1847c1d2-a429-4a01-a65b-30eac3893e1e.245100.1",
"bucket_id": "1847c1d2-a429-4a01-a65b-30eac3893e1e.245100.1",
"tenant": "",
"explicit_placement": {
"data_pool": "",
"data_extra_pool": "",
"index_pool": ""
}
},
"creation_time": "2025-03-21T09:28:13.845783Z",
"owner": "kj",
"flags": 0,
"zonegroup": "fc3a7e3c-e3dc-40a7-a0b8-dbedb7c094a7",
"placement_rule": "default-placement",
"has_instance_obj": false,
"quota": {
"enabled": false,
"check_on_raw": false,
"max_size": -1,
"max_size_kb": 0,
"max_objects": -1
},
"num_shards": 11,
"bi_shard_hash_type": 0,
"requester_pays": false,
"has_website": false,
"swift_versioning": false,
"swift_ver_location": "",
"index_type": 0,
"mdsearch_config": [],
"reshard_status": 0,
"new_bucket_instance_id": ""
},
"attrs": [
{
"key": "user.rgw.acl",
"val": "AgJvAAAAAwIMAAAAAgAAAGtqAgAAAGtqBANXAAAAAQEAAAACAAAAa2oPAAAAAQAAAAIAAABragUDMAAAAAICBAAAAAAAAAACAAAAa2oAAAAAAAAAAAICBAAAAA8AAAACAAAAa2oAAAAAAAAAAAAAAAAAAAAA"
}
]
}
}


# radosgw-admin metadata get user:kj
{
"key": "user:kj",
"ver": {
"tag": "_4mi3AJTONyzsGI-w_SfDj8v",
"ver": 1
},
"mtime": "2025-03-21T09:06:20.256664Z",
"data": {
"user_id": "kj",
"display_name": "kj",
"email": "",
"suspended": 0,
"max_buckets": 1000,
"subusers": [],
"keys": [
{
"user": "kj",
"access_key": "EFNY9U6JK5VW6EQHQHMM",
"secret_key": "P8taqzqGhk6keH71SxO3htotTcaZGHvCyjANzb1O",
"active": true,
"create_date": "2025-03-21T09:06:20.256386Z"
}
],
"swift_keys": [],
"caps": [],
"op_mask": "read, write, delete",
"default_placement": "",
"default_storage_class": "",
"placement_tags": [],
"bucket_quota": {
"enabled": false,
"check_on_raw": false,
"max_size": -1,
"max_size_kb": 0,
"max_objects": -1
},
"user_quota": {
"enabled": false,
"check_on_raw": false,
"max_size": -1,
"max_size_kb": 0,
"max_objects": -1
},
"temp_url_keys": [],
"type": "rgw",
"mfa_ids": [],
"account_id": "",
"path": "/",
"create_date": "2025-03-21T09:06:20.253994Z",
"tags": [],
"group_ids": [],
"attrs": []
}
}

These metadata information is stored in the xattr of the objects in rgw.meta pool under different namespaces

# rados -p kerala.rgw.meta ls --all
root .bucket.meta.test:1847c1d2-a429-4a01-a65b-30eac3893e1e.245100.2
root .bucket.meta.kjbucket:1847c1d2-a429-4a01-a65b-30eac3893e1e.115075.1
users.keys DYLP684TRN99E0AGE9B3
users.uid dashboard
users.uid kj
users.uid kj.buckets
root kjbucket
root test
users.keys EFNY9U6JK5VW6EQHQHMM
users.uid dashboard.buckets

Bucket Index

  • The bucket index holds a KEY-VALUE map attached to RADOS objects

  • When a bucket is created, it will create a bucket index object in rgw.buckets.index pool. (By default in newer versions, the shard number is 11, so it will be split into 11 shards)

# s3cmd mb  s3://kjbucket
Bucket 's3://kjbucket/' created

# rados -p kerala.rgw.buckets.index ls
.dir.1847c1d2-a429-4a01-a65b-30eac3893e1e.115075.1.9
.dir.1847c1d2-a429-4a01-a65b-30eac3893e1e.115075.1.3
.dir.1847c1d2-a429-4a01-a65b-30eac3893e1e.115075.1.5
.dir.1847c1d2-a429-4a01-a65b-30eac3893e1e.115075.1.10
.dir.1847c1d2-a429-4a01-a65b-30eac3893e1e.115075.1.8
.dir.1847c1d2-a429-4a01-a65b-30eac3893e1e.115075.1.4
.dir.1847c1d2-a429-4a01-a65b-30eac3893e1e.115075.1.2
.dir.1847c1d2-a429-4a01-a65b-30eac3893e1e.115075.1.6
.dir.1847c1d2-a429-4a01-a65b-30eac3893e1e.115075.1.1
.dir.1847c1d2-a429-4a01-a65b-30eac3893e1e.115075.1.0
.dir.1847c1d2-a429-4a01-a65b-30eac3893e1e.115075.1.7
  • The index object itself does not contain any data. Look at the size field :
kerala.rgw.buckets.index/.dir.1847c1d2-a429-4a01-a65b-30eac3893e1e.115075.1.9 mtime 2025-03-21T04:06:35.000000-0500, size 0
kerala.rgw.buckets.index/.dir.1847c1d2-a429-4a01-a65b-30eac3893e1e.115075.1.3 mtime 2025-03-21T04:06:35.000000-0500, size 0
kerala.rgw.buckets.index/.dir.1847c1d2-a429-4a01-a65b-30eac3893e1e.115075.1.5 mtime 2025-03-21T04:06:35.000000-0500, size 0
kerala.rgw.buckets.index/.dir.1847c1d2-a429-4a01-a65b-30eac3893e1e.115075.1.10 mtime 2025-03-21T04:06:35.000000-0500, size 0
kerala.rgw.buckets.index/.dir.1847c1d2-a429-4a01-a65b-30eac3893e1e.115075.1.8 mtime 2025-03-21T04:06:35.000000-0500, size 0
kerala.rgw.buckets.index/.dir.1847c1d2-a429-4a01-a65b-30eac3893e1e.115075.1.4 mtime 2025-03-21T05:43:53.000000-0500, size 0
kerala.rgw.buckets.index/.dir.1847c1d2-a429-4a01-a65b-30eac3893e1e.115075.1.2 mtime 2025-03-21T04:06:35.000000-0500, size 0
kerala.rgw.buckets.index/.dir.1847c1d2-a429-4a01-a65b-30eac3893e1e.115075.1.6 mtime 2025-03-21T04:06:35.000000-0500, size 0
kerala.rgw.buckets.index/.dir.1847c1d2-a429-4a01-a65b-30eac3893e1e.115075.1.1 mtime 2025-03-21T04:06:35.000000-0500, size 0
kerala.rgw.buckets.index/.dir.1847c1d2-a429-4a01-a65b-30eac3893e1e.115075.1.0 mtime 2025-03-21T05:43:25.000000-0500, size 0
kerala.rgw.buckets.index/.dir.1847c1d2-a429-4a01-a65b-30eac3893e1e.115075.1.7 mtime 2025-03-21T04:06:35.000000-0500, size 0
  • Each of this rados object have omaps. The KEY of each omap is the name of the S3 object, and the VALUE holds some basic metadata of that object
For example, we have uploaded one object 'object1' to the bucket using s3cmd

# s3cmd ls s3://kjbucket
2025-03-21 13:27 349 s3://kjbucket/object1

If we list the omapkeys for the bucket index object, we will see the KEY with name 'object1' in one of the omap

for i in $(rados -p kerala.rgw.buckets.index ls); do echo $i; rados -p kerala.rgw.buckets.index listomapkeys $i; done
.dir.1847c1d2-a429-4a01-a65b-30eac3893e1e.115075.1.9
.dir.1847c1d2-a429-4a01-a65b-30eac3893e1e.115075.1.3
.dir.1847c1d2-a429-4a01-a65b-30eac3893e1e.115075.1.5
.dir.1847c1d2-a429-4a01-a65b-30eac3893e1e.115075.1.10
.dir.1847c1d2-a429-4a01-a65b-30eac3893e1e.115075.1.8
.dir.1847c1d2-a429-4a01-a65b-30eac3893e1e.115075.1.4
.dir.1847c1d2-a429-4a01-a65b-30eac3893e1e.115075.1.2 object1
.dir.1847c1d2-a429-4a01-a65b-30eac3893e1e.115075.1.6
.dir.1847c1d2-a429-4a01-a65b-30eac3893e1e.115075.1.1
.dir.1847c1d2-a429-4a01-a65b-30eac3893e1e.115075.1.0
.dir.1847c1d2-a429-4a01-a65b-30eac3893e1e.115075.1.7

To see the VALUE for the above KEY:

# rados -p kerala.rgw.buckets.index listomapvals .dir.1847c1d2-a429-4a01-a65b-30eac3893e1e.115075.1.2
object1
value (235 bytes) :
00000000 08 03 e5 00 00 00 07 00 00 00 6f 62 6a 65 63 74 |..........object|
00000010 31 09 00 00 00 00 00 00 00 01 07 03 68 00 00 00 |1...........h...|
00000020 01 5d 01 00 00 00 00 00 00 3b 69 dd 67 fa 35 37 |.].......;i.g.57|
00000030 1d 20 00 00 00 64 36 33 65 64 30 62 61 63 64 61 |. ...d63ed0bacda|
00000040 35 66 37 37 61 38 31 66 36 65 32 38 63 62 38 34 |5f77a81f6e28cb84|
00000050 30 37 66 66 33 02 00 00 00 6b 6a 02 00 00 00 6b |07ff3....kj....k|
00000060 6a 0a 00 00 00 74 65 78 74 2f 70 6c 61 69 6e 5d |j....text/plain]|
00000070 01 00 00 00 00 00 00 00 00 00 00 08 00 00 00 53 |...............S|
00000080 54 41 4e 44 41 52 44 00 00 00 00 00 00 00 00 00 |TANDARD.........|
00000090 01 01 02 00 00 00 25 09 01 40 00 00 00 31 38 34 |......%..@...184|
000000a0 37 63 31 64 32 2d 61 34 32 39 2d 34 61 30 31 2d |7c1d2-a429-4a01-|
000000b0 61 36 35 62 2d 33 30 65 61 63 33 38 39 33 65 31 |a65b-30eac3893e1|
000000c0 65 2e 32 34 35 31 30 30 2e 31 34 35 37 39 33 30 |e.245100.1457930|
000000d0 32 38 31 37 34 36 36 31 31 37 37 34 38 00 00 00 |2817466117748...|
000000e0 00 00 00 00 00 00 00 00 00 00 00 |...........|
000000eb

You can also use *radosgw-admin bi list* command to list the metadata stored for all the s3 objects in the omap of index objects of the bucket
# radosgw-admin bi list --bucket kjbucket --object object1
[
{
"type": "plain",
"idx": "object1",
"entry": {
"name": "object1",
"instance": "",
"ver": {
"pool": 37,
"epoch": 9
},
"locator": "",
"exists": true,
"meta": {
"category": 1,
"size": 349,
"mtime": "2025-03-21T13:27:23.490157Z",
"etag": "d63ed0bacda5f77a81f6e28cb8407ff3",
"storage_class": "STANDARD",
"owner": "kj",
"owner_display_name": "kj",
"content_type": "text/plain",
"accounted_size": 349,
"user_data": "",
"appendable": false
},
"tag": "1847c1d2-a429-4a01-a65b-30eac3893e1e.245100.14579302817466117748",
"flags": 0,
"pending_map": [],
"versioned_epoch": 0
}
}
]


DATA

The actual s3objects are stored in the rgw.buckets.data pool.

# rados -p kerala.rgw.buckets.data ls
1847c1d2-a429-4a01-a65b-30eac3893e1e.115075.1_object1

It could be stored in one or more RADOS objects for each RGW object depending on the object size. (RGW/RADOS objects are 4MB by default. Anything larger that will be split to multiple objects)

For example if we upload 10MB s3object file to the bucket, it will be split as :

# s3cmd put 10mb_object s3://kjbucket
upload: '10mb_object' -> 's3://kjbucket/10mb_object' [1 of 1]
10485760 of 10485760 100% in 0s 49.61 MB/s done

# rados -p kerala.rgw.buckets.data ls
1847c1d2-a429-4a01-a65b-30eac3893e1e.115075.1_10mb_object
1847c1d2-a429-4a01-a65b-30eac3893e1e.115075.1__shadow_.3RXYVnUwzlNxZ4byiE66Pgb0593fydH_2
1847c1d2-a429-4a01-a65b-30eac3893e1e.115075.1__shadow_.3RXYVnUwzlNxZ4byiE66Pgb0593fydH_1

# stat
kerala.rgw.buckets.data/1847c1d2-a429-4a01-a65b-30eac3893e1e.115075.1_10mb_object mtime 2025-03-21T08:49:43.000000-0500, size 4194304
kerala.rgw.buckets.data/1847c1d2-a429-4a01-a65b-30eac3893e1e.115075.1__shadow_.3RXYVnUwzlNxZ4byiE66Pgb0593fydH_2 mtime 2025-03-21T08:49:43.000000-0500, size 2097152
kerala.rgw.buckets.data/1847c1d2-a429-4a01-a65b-30eac3893e1e.115075.1__shadow_.3RXYVnUwzlNxZ4byiE66Pgb0593fydH_1 mtime 2025-03-21T08:49:43.000000-0500, size 4194304

An RGW object may comprise multiple RADOS objects, the first of which is the HEAD that contains metadata including manifest, ACLs, content type, ETag, and user-defined metadata.

# for i in $(rados -p kerala.rgw.buckets.data listxattr 1847c1d2-a429-4a01-a65b-30eac3893e1e.115075.1_object1); do echo $i && rados -p kerala.rgw.buckets.data getxattr 1847c1d2-a429-4a01-a65b-30eac3893e1e.115075.1_10mb_object $i  | hexdump -C ; done
user.rgw.acl
00000000 02 02 6f 00 00 00 03 02 0c 00 00 00 02 00 00 00 |..o.............|
00000010 6b 6a 02 00 00 00 6b 6a 04 03 57 00 00 00 01 01 |kj....kj..W.....|
00000020 00 00 00 02 00 00 00 6b 6a 0f 00 00 00 01 00 00 |.......kj.......|
00000030 00 02 00 00 00 6b 6a 05 03 30 00 00 00 02 02 04 |.....kj..0......|
00000040 00 00 00 00 00 00 00 02 00 00 00 6b 6a 00 00 00 |...........kj...|
00000050 00 00 00 00 00 02 02 04 00 00 00 0f 00 00 00 02 |................|
00000060 00 00 00 6b 6a 00 00 00 00 00 00 00 00 00 00 00 |...kj...........|
00000070 00 00 00 00 00 0a |......|
00000076
user.rgw.content_type
00000000 61 70 70 6c 69 63 61 74 69 6f 6e 2f 6f 63 74 65 |application/octe|
00000010 74 2d 73 74 72 65 61 6d 00 0a |t-stream..|
0000001a
user.rgw.etag
00000000 66 31 63 39 36 34 35 64 62 63 31 34 65 66 64 64 |f1c9645dbc14efdd|
00000010 63 37 64 38 61 33 32 32 36 38 35 66 32 36 65 62 |c7d8a322685f26eb|
00000020 0a |.|
00000021
user.rgw.idtag
00000000 31 38 34 37 63 31 64 32 2d 61 34 32 39 2d 34 61 |1847c1d2-a429-4a|
00000010 30 31 2d 61 36 35 62 2d 33 30 65 61 63 33 38 39 |01-a65b-30eac389|
00000020 33 65 31 65 2e 32 34 35 31 30 30 2e 36 37 38 37 |3e1e.245100.6787|
00000030 37 36 39 33 34 33 39 35 37 35 39 37 38 36 33 00 |769343957597863.|
00000040 0a |.|
00000041
user.rgw.manifest
00000000 08 06 61 01 00 00 00 00 a0 00 00 00 00 00 00 00 |..a.............|
00000010 00 00 00 06 06 90 00 00 00 0a 0a 73 00 00 00 08 |...........s....|
00000020 00 00 00 6b 6a 62 75 63 6b 65 74 2d 00 00 00 31 |...kjbucket-...1|
00000030 38 34 37 63 31 64 32 2d 61 34 32 39 2d 34 61 30 |847c1d2-a429-4a0|
00000040 31 2d 61 36 35 62 2d 33 30 65 61 63 33 38 39 33 |1-a65b-30eac3893|
00000050 65 31 65 2e 31 31 35 30 37 35 2e 31 2d 00 00 00 |e1e.115075.1-...|
00000060 31 38 34 37 63 31 64 32 2d 61 34 32 39 2d 34 61 |1847c1d2-a429-4a|
00000070 30 31 2d 61 36 35 62 2d 33 30 65 61 63 33 38 39 |01-a65b-30eac389|
00000080 33 65 31 65 2e 31 31 35 30 37 35 2e 31 00 00 00 |3e1e.115075.1...|
00000090 00 00 00 00 00 00 0b 00 00 00 31 30 6d 62 5f 6f |..........10mb_o|
000000a0 62 6a 65 63 74 00 00 00 00 00 00 40 00 00 00 00 |bject......@....|
000000b0 00 00 00 40 00 00 00 00 00 21 00 00 00 2e 33 52 |...@.....!....3R|
000000c0 58 59 56 6e 55 77 7a 6c 4e 78 5a 34 62 79 69 45 |XYVnUwzlNxZ4byiE|
000000d0 36 36 50 67 62 30 35 39 33 66 79 64 48 5f 01 00 |66Pgb0593fydH_..|
000000e0 00 00 00 00 00 00 00 00 00 00 02 01 20 00 00 00 |............ ...|
000000f0 00 00 00 00 00 00 40 00 00 00 00 00 00 00 00 00 |......@.........|
*
00000110 00 00 11 00 00 00 64 65 66 61 75 6c 74 2d 70 6c |......default-pl|
00000120 61 63 65 6d 65 6e 74 11 00 00 00 64 65 66 61 75 |acement....defau|
00000130 6c 74 2d 70 6c 61 63 65 6d 65 6e 74 00 00 00 00 |lt-placement....|
00000140 02 02 21 00 00 00 04 00 00 00 6e 6f 6e 65 03 01 |..!.......none..|
00000150 12 00 00 00 00 00 00 00 00 00 00 00 00 00 01 00 |................|
00000160 00 00 00 00 00 00 00 0a |........|
00000168
user.rgw.pg_ver
00000000 0b 00 00 00 00 00 00 00 0a |.........|
00000009
user.rgw.source_zone
00000000 49 88 fa a4 0a |I....|
00000005
user.rgw.tail_tag
00000000 31 38 34 37 63 31 64 32 2d 61 34 32 39 2d 34 61 |1847c1d2-a429-4a|
00000010 30 31 2d 61 36 35 62 2d 33 30 65 61 63 33 38 39 |01-a65b-30eac389|
00000020 33 65 31 65 2e 32 34 35 31 30 30 2e 36 37 38 37 |3e1e.245100.6787|
00000030 37 36 39 33 34 33 39 35 37 35 39 37 38 36 33 00 |769343957597863.|
00000040 0a |.|
00000041
user.rgw.x-amz-meta-s3cmd-attrs
00000000 61 74 69 6d 65 3a 31 37 34 32 35 36 34 39 36 35 |atime:1742564965|
00000010 2f 63 74 69 6d 65 3a 31 37 34 32 35 36 34 39 36 |/ctime:174256496|
00000020 35 2f 67 69 64 3a 30 2f 67 6e 61 6d 65 3a 72 6f |5/gid:0/gname:ro|
00000030 6f 74 2f 6d 64 35 3a 66 31 63 39 36 34 35 64 62 |ot/md5:f1c9645db|
00000040 63 31 34 65 66 64 64 63 37 64 38 61 33 32 32 36 |c14efddc7d8a3226|
00000050 38 35 66 32 36 65 62 2f 6d 6f 64 65 3a 33 33 31 |85f26eb/mode:331|
00000060 38 38 2f 6d 74 69 6d 65 3a 31 37 34 32 35 36 34 |88/mtime:1742564|
00000070 39 36 35 2f 75 69 64 3a 30 2f 75 6e 61 6d 65 3a |965/uid:0/uname:|
00000080 72 6f 6f 74 00 0a |root..|
00000086

The manifest which is stored as xattr as show above, describes how each RGW object is laid out across RADOS objects. Another easy command to see the object manifest is :

# radosgw-admin object manifest --bucket kjbucket --object 10mb_object
{
"size": 10485760,
"objects": [
{
"index": 0,
"part_id": 0,
"stripe_id": 0,
"offset": 0,
"size": 4194304,
"raw_obj": {
"pool": "kerala.rgw.buckets.data",
"oid": "1847c1d2-a429-4a01-a65b-30eac3893e1e.115075.1_10mb_object",
"loc": ""
}
},
{
"index": 1,
"part_id": 0,
"stripe_id": 1,
"offset": 4194304,
"size": 4194304,
"raw_obj": {
"pool": "kerala.rgw.buckets.data",
"oid": "1847c1d2-a429-4a01-a65b-30eac3893e1e.115075.1__shadow_.3RXYVnUwzlNxZ4byiE66Pgb0593fydH_1",
"loc": ""
}
},
{
"index": 2,
"part_id": 0,
"stripe_id": 2,
"offset": 8388608,
"size": 4194304,
"raw_obj": {
"pool": "kerala.rgw.buckets.data",
"oid": "1847c1d2-a429-4a01-a65b-30eac3893e1e.115075.1__shadow_.3RXYVnUwzlNxZ4byiE66Pgb0593fydH_2",
"loc": ""
}
}
]
}

So how does a Multipart read works?

When a request comes, it goes to the HEAD object. The manifest stored as xattr in the HEAD object has the list of its shadow objects. Then RGW combines these objects.