diff --git a/man/repart.d.xml b/man/repart.d.xml index a53057ffa1..d3a8f6fcea 100644 --- a/man/repart.d.xml +++ b/man/repart.d.xml @@ -341,6 +341,11 @@ and the placing algorithm restarted. By default, a minimum size constraint of 10M and no maximum size constraint is set. + If Format= is set, the minimum size is automatically raised to the minimum + file system size for the selected file system type, if known. Moreover, for the ESP/XBOOTLDR + partitions the minimum is raised to 100M (for 512b sector images) or 260M (for 4K sector images) + automatically, if specified smaller. + @@ -1007,6 +1012,25 @@ + + FileSystemSectorSize= + + Controls the sector size for any file system, LUKS volume or Verity volume formatted + on this partition. Expects a power of 2 as value, and must be equal or larger than 512. Typically + it's recommended to set the file system sector size to 4096, even on 512 sector disks (and in + particular for images that are only ever intended to be stored as file on disks), in order to + optimize performance. However, for compatibility with foreign operating systems or firmware it might + be advisable to use the native sector size of the backing disk. + + If unspecified and operating on a block device, defaults to the native sector size of the + device. If unspecified and operating on a disk image file defaults to 4096. + + Regardless of what is configured here, or which default is picked, the file system sector size + is always increased to be equal or larger than the disk sector size. + + + + diff --git a/src/repart/repart.c b/src/repart/repart.c index de138c15d3..ca211702bf 100644 --- a/src/repart/repart.c +++ b/src/repart/repart.c @@ -105,6 +105,15 @@ * filesystems will then also be compatible with sector sizes 512, 1024 and 2048. */ #define DEFAULT_FILESYSTEM_SECTOR_SIZE 4096ULL +/* Minimum sizes for the ESP depending on sector size. What the minimum is, is severely underdocumented, but + * it appears for 4K sector size it must be 260M, and otherwise 100M. This is what Microsoft says here: + * + * https://learn.microsoft.com/en-us/windows-hardware/manufacture/desktop/configure-uefigpt-based-hard-drive-partitions?view=windows-11 + * https://learn.microsoft.com/en-us/windows-hardware/manufacture/desktop/oem-deployment-of-windows-desktop-editions-sample-scripts?view=windows-11&preserve-view=true#-createpartitions-uefitxt + */ +#define ESP_MIN_SIZE (100 * U64_MB) +#define ESP_MIN_SIZE_4K (260 * U64_MB) + #define APIVFS_TMP_DIRS_NULSTR "proc\0sys\0dev\0tmp\0run\0var/tmp\0" #define AUTOMATIC_FSTAB_HEADER_START "# Start section ↓ of automatically generated fstab by systemd-repart" @@ -409,6 +418,7 @@ typedef struct Partition { uint64_t verity_hash_block_size; char *compression; char *compression_level; + uint64_t fs_sector_size; int add_validatefs; CopyFiles *copy_files; @@ -461,7 +471,7 @@ typedef struct Context { uint64_t start, end, total; struct fdisk_context *fdisk_context; - uint64_t sector_size, grain_size, fs_sector_size; + uint64_t sector_size, grain_size, default_fs_sector_size; sd_id128_t seed; @@ -609,6 +619,7 @@ static Partition *partition_new(void) { .add_validatefs = -1, .last_percent = UINT_MAX, .progress_ratelimit = { 100 * USEC_PER_MSEC, 1 }, + .fs_sector_size = UINT64_MAX, }; return p; @@ -724,6 +735,7 @@ static void partition_foreignize(Partition *p) { p->growfs = -1; p->verity = VERITY_OFF; p->add_validatefs = false; + p->fs_sector_size = UINT64_MAX; partition_mountpoint_free_many(p->mountpoints, p->n_mountpoints); p->mountpoints = NULL; @@ -909,6 +921,38 @@ static bool context_drop_or_foreignize_one_priority(Context *context) { return true; } +static uint64_t partition_fs_sector_size(const Context *c, const Partition *p) { + assert(c); + assert(p); + + uint64_t ss; + + if (p->fs_sector_size != UINT64_MAX) + /* Prefer explicitly configured value */ + ss = p->fs_sector_size; + else + /* Otherwise follow the default sector size */ + ss = c->default_fs_sector_size; + + /* never allow the fs sector size to be picked smaller than the physical sector size */ + return MAX(ss, c->sector_size); +} + +static uint64_t partition_fstype_min_size(const Context *c, const Partition *p) { + assert(c); + assert(p); + + /* If a file system type is configured, then take it into consideration for the minimum partition + * size */ + + if (IN_SET(p->type.designator, PARTITION_ESP, PARTITION_XBOOTLDR) && streq_ptr(p->format, "vfat")) { + uint64_t ss = partition_fs_sector_size(c, p); + return ss >= 4096 ? ESP_MIN_SIZE_4K : ESP_MIN_SIZE; + } + + return minimal_size_by_fs_name(p->format); +} + static uint64_t partition_min_size(const Context *context, const Partition *p) { uint64_t sz, override_min; @@ -944,8 +988,8 @@ static uint64_t partition_min_size(const Context *context, const Partition *p) { uint64_t f; /* If we shall synthesize a file system, take minimal fs size into account (assumed to be 4K if not known) */ - f = p->format ? round_up_size(minimal_size_by_fs_name(p->format), context->grain_size) : UINT64_MAX; - d += f == UINT64_MAX ? context->grain_size : f; + f = partition_fstype_min_size(context, p); + d += f == UINT64_MAX ? context->grain_size : round_up_size(f, context->grain_size); } if (d > sz) @@ -1731,6 +1775,45 @@ static int config_parse_block_size( return 0; } +static int config_parse_fs_sector_size( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + uint64_t *fssecsz = ASSERT_PTR(data), parsed; + int r; + + assert(rvalue); + + if (isempty(rvalue)) { + *fssecsz = UINT64_MAX; + return 0; + } + + r = parse_size(rvalue, 1024, &parsed); + if (r < 0) + return log_syntax(unit, LOG_ERR, filename, line, r, + "Failed to parse size value: %s", rvalue); + + if (!ISPOWEROF2(parsed)) + return log_syntax(unit, LOG_ERR, filename, line, SYNTHETIC_ERRNO(EINVAL), + "Value not a power of 2: %s", rvalue); + + /* NB: we make no upper restriction here, since the maximum logical sector sizes file systems support + * vary greatly, and can be much larger than 4K. (That's also the reason we dont't use + * parse_sector_size() here.) */ + + *fssecsz = parsed; + return 0; +} + static int config_parse_fstype( const char *unit, const char *filename, @@ -2483,6 +2566,7 @@ static int partition_read_definition(Partition *p, const char *path, const char { "Partition", "CompressionLevel", config_parse_string, CONFIG_PARSE_STRING_SAFE_AND_ASCII, &p->compression_level }, { "Partition", "SupplementFor", config_parse_string, 0, &p->supplement_for_name }, { "Partition", "AddValidateFS", config_parse_tristate, 0, &p->add_validatefs }, + { "Partition", "FileSystemSectorSize", config_parse_fs_sector_size, 0, &p->fs_sector_size }, {} }; _cleanup_free_ char *filename = NULL; @@ -3179,7 +3263,6 @@ static int context_load_partition_table(Context *context) { bool from_scratch = false; sd_id128_t disk_uuid; size_t n_partitions; - unsigned long secsz; uint64_t grainsz, fs_secsz = DEFAULT_FILESYSTEM_SECTOR_SIZE; int r; @@ -3256,7 +3339,7 @@ static int context_load_partition_table(Context *context) { if (S_ISREG(st.st_mode) && st.st_size == 0) { /* Use the fallback values if we have no better idea */ context->sector_size = fdisk_get_sector_size(c); - context->fs_sector_size = fs_secsz; + context->default_fs_sector_size = fs_secsz; context->grain_size = 4096; return /* from_scratch = */ true; } @@ -3280,7 +3363,7 @@ static int context_load_partition_table(Context *context) { * it for all our needs. Note that the values we use ourselves always are in bytes though, thus mean * the same thing universally. Also note that regardless what kind of sector size is in use we'll * place partitions at multiples of 4K. */ - secsz = fdisk_get_sector_size(c); + unsigned long secsz = fdisk_get_sector_size(c); /* Insist on a power of two, and that it's a multiple of 512, i.e. the traditional sector size. */ if (secsz < 512 || !ISPOWEROF2(secsz)) @@ -3288,9 +3371,9 @@ static int context_load_partition_table(Context *context) { /* Use at least 4K, and ensure it's a multiple of the sector size, regardless if that is smaller or * larger */ - grainsz = secsz < 4096 ? 4096 : secsz; + grainsz = MAX(secsz, 4096U); - log_debug("Sector size of device is %lu bytes. Using filesystem sector size of %" PRIu64 " and grain size of %" PRIu64 ".", secsz, fs_secsz, grainsz); + log_debug("Sector size of device is %lu bytes. Using default filesystem sector size of %" PRIu64 " and grain size of %" PRIu64 ".", secsz, fs_secsz, grainsz); switch (arg_empty) { @@ -3539,7 +3622,7 @@ add_initial_free_area: context->end = last_lba; context->total = nsectors; context->sector_size = secsz; - context->fs_sector_size = fs_secsz; + context->default_fs_sector_size = fs_secsz; context->grain_size = grainsz; context->fdisk_context = TAKE_PTR(c); @@ -4597,7 +4680,7 @@ static int partition_encrypt(Context *context, Partition *p, PartitionTarget *ta const char *node = partition_target_path(target); struct crypt_params_luks2 luks_params = { .label = strempty(ASSERT_PTR(p)->new_label), - .sector_size = ASSERT_PTR(context)->fs_sector_size, + .sector_size = partition_fs_sector_size(context, p), .data_device = offline ? node : NULL, }; struct crypt_params_reencrypt reencrypt_params = { @@ -6347,10 +6430,17 @@ static int context_mkfs(Context *context) { if (r < 0) return r; - r = make_filesystem(partition_target_path(t), p->format, strempty(p->new_label), root, - p->fs_uuid, partition_mkfs_flags(p), - context->fs_sector_size, p->compression, p->compression_level, - extra_mkfs_options); + r = make_filesystem( + partition_target_path(t), + p->format, + strempty(p->new_label), + root, + p->fs_uuid, + partition_mkfs_flags(p), + partition_fs_sector_size(context, p), + p->compression, + p->compression_level, + extra_mkfs_options); if (r < 0) return r; @@ -7925,10 +8015,10 @@ static int context_update_verity_size(Context *context) { assert_se(dp = p->siblings[VERITY_DATA]); if (p->verity_data_block_size == UINT64_MAX) - p->verity_data_block_size = context->fs_sector_size; + p->verity_data_block_size = partition_fs_sector_size(context, p); if (p->verity_hash_block_size == UINT64_MAX) - p->verity_hash_block_size = context->fs_sector_size; + p->verity_hash_block_size = partition_fs_sector_size(context, p); uint64_t sz; if (dp->size_max != UINT64_MAX) { @@ -8061,16 +8151,17 @@ static int context_minimize(Context *context) { if (r < 0) return r; - r = make_filesystem(d ? d->node : temp, - p->format, - strempty(p->new_label), - root, - fs_uuid, - partition_mkfs_flags(p), - context->fs_sector_size, - p->compression, - p->compression_level, - extra_mkfs_options); + r = make_filesystem( + d ? d->node : temp, + p->format, + strempty(p->new_label), + root, + fs_uuid, + partition_mkfs_flags(p), + partition_fs_sector_size(context, p), + p->compression, + p->compression_level, + extra_mkfs_options); if (r < 0) return r; @@ -8152,16 +8243,17 @@ static int context_minimize(Context *context) { return log_error_errno(r, "Failed to make loopback device of %s: %m", temp); } - r = make_filesystem(d ? d->node : temp, - p->format, - strempty(p->new_label), - root, - p->fs_uuid, - partition_mkfs_flags(p), - context->fs_sector_size, - p->compression, - p->compression_level, - extra_mkfs_options); + r = make_filesystem( + d ? d->node : temp, + p->format, + strempty(p->new_label), + root, + p->fs_uuid, + partition_mkfs_flags(p), + partition_fs_sector_size(context, p), + p->compression, + p->compression_level, + extra_mkfs_options); if (r < 0) return r; diff --git a/test/units/TEST-58-REPART.sh b/test/units/TEST-58-REPART.sh index 97e3783613..aa472df99a 100755 --- a/test/units/TEST-58-REPART.sh +++ b/test/units/TEST-58-REPART.sh @@ -565,9 +565,9 @@ EOF output=$(sfdisk --dump "$imgs/zzz") - assert_in "$imgs/zzz1 : start= 2048, size= 20480, type=C12A7328-F81F-11D2-BA4B-00A0C93EC93B, uuid=39107B09-615D-48FB-BA37-C663885FCE67, name=\"esp\"" "$output" - assert_in "$imgs/zzz2 : start= 22528, size= 65536, type=${root_guid}, uuid=${root_uuid}, name=\"root-${architecture}\", attrs=\"GUID:59\"" "$output" - assert_in "$imgs/zzz3 : start= 88064, size= 65536, type=${usr_guid}, uuid=${usr_uuid}, name=\"usr-${architecture}\", attrs=\"GUID:60\"" "$output" + assert_in "$imgs/zzz1 : start= 2048, size= 532480, type=C12A7328-F81F-11D2-BA4B-00A0C93EC93B, uuid=39107B09-615D-48FB-BA37-C663885FCE67, name=\"esp\"" "$output" + assert_in "$imgs/zzz2 : start= 534528, size= 65536, type=${root_guid}, uuid=${root_uuid}, name=\"root-${architecture}\", attrs=\"GUID:59\"" "$output" + assert_in "$imgs/zzz3 : start= 600064, size= 65536, type=${usr_guid}, uuid=${usr_uuid}, name=\"usr-${architecture}\", attrs=\"GUID:60\"" "$output" if systemd-detect-virt --quiet --container; then echo "Skipping second part of copy blocks tests in container." @@ -1573,7 +1573,7 @@ EOF systemd-repart --empty=create --size=auto --dry-run=no --definitions="$defs" "$image" output=$(sfdisk -d "$image") - assert_in "${image}1 : start= 2048, size= 204800, type=${esp_guid}" "$output" + assert_in "${image}1 : start= 2048, size= 532480, type=${esp_guid}" "$output" assert_not_in "${image}2" "$output" # Disk with small ESP => ESP grows @@ -1586,12 +1586,12 @@ EOF systemd-repart --dry-run=no --definitions="$defs" "$image" output=$(sfdisk -d "$image") - assert_in "${image}1 : start= 2048, size= 204800, type=${esp_guid}" "$output" + assert_in "${image}1 : start= 2048, size= 532480, type=${esp_guid}" "$output" assert_not_in "${image}2" "$output" # Disk with small ESP that can't grow => XBOOTLDR created - truncate -s 150M "$image" + truncate -s 400M "$image" sfdisk "$image" < XBOOTLDR grows, small ESP created @@ -1614,8 +1614,8 @@ EOF systemd-repart --dry-run=no --definitions="$defs" "$image" output=$(sfdisk -d "$image") - assert_in "${image}1 : start= 2048, size= 204800, type=${xbootldr_guid}" "$output" - assert_in "${image}2 : start= 206848, size= 100312, type=${esp_guid}" "$output" + assert_in "${image}1 : start= 2048, size= 284632, type=${xbootldr_guid}" "$output" + assert_in "${image}2 : start= 286680, size= 532480, type=${esp_guid}" "$output" } OFFLINE="yes" diff --git a/test/units/TEST-87-AUX-UTILS-VM.validatefs.sh b/test/units/TEST-87-AUX-UTILS-VM.validatefs.sh index ede0d71019..86120975be 100755 --- a/test/units/TEST-87-AUX-UTILS-VM.validatefs.sh +++ b/test/units/TEST-87-AUX-UTILS-VM.validatefs.sh @@ -74,7 +74,7 @@ MountPoint=/somewhere/else Format=ext4 EOF -systemd-repart --dry-run=no --empty=create --size=256M --definitions=/tmp/validatefs-test /var/tmp/validatefs-test.raw +systemd-repart --dry-run=no --empty=create --size=410M --definitions=/tmp/validatefs-test /var/tmp/validatefs-test.raw systemd-dissect --mount --mkdir /var/tmp/validatefs-test.raw /tmp/validatefs-test.mount