Skip to content

Commit

Permalink
Merge pull request #27268 from pbehne/checkpoint_enchancements
Browse files Browse the repository at this point in the history
Checkpoint enchancements
  • Loading branch information
loganharbour authored Jun 26, 2024
2 parents 62dc981 + 6dcfe1f commit 539f904
Show file tree
Hide file tree
Showing 91 changed files with 578 additions and 5,508 deletions.
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -184,9 +184,9 @@ framework/contrib/asio/
*.rd-*

# Checkpoint Files
*.cpa
*.cpa.gz
*.cpr
*.cpa-*
*.cpa.gz-*
*.cpr-*

# Ignore petsc arch
Expand Down
21 changes: 21 additions & 0 deletions framework/doc/content/source/actions/AutoCheckpointAction.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# AutoCheckpointAction

!syntax description /Outputs/AutoCheckpointAction

## Overview

An action, that acts when the `[Outputs]` block exists. It adds the
[!param](/Outputs/checkpoint) and [!param](/Outputs/wall_time_checkpoint)
short-cut syntax parameters. For example, the following enables time_step_interval
based checkpoints while disabling wall time based checkpoints.

```text
[Outputs]
checkpoint = true
wall_time_checkpoint = false
[]
```

Please refer to the [syntax/Outputs/index.md] for more information.

!syntax parameters /Outputs/AutoCheckpointAction
1 change: 1 addition & 0 deletions framework/include/base/MooseApp.h
Original file line number Diff line number Diff line change
Expand Up @@ -538,6 +538,7 @@ class MooseApp : public ConsoleStreamInterface,
* Get the OutputWarehouse objects
*/
OutputWarehouse & getOutputWarehouse();
const OutputWarehouse & getOutputWarehouse() const;

/**
* Get SystemInfo object
Expand Down
2 changes: 1 addition & 1 deletion framework/include/meshgenerators/FileMeshGenerator.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class FileMeshGenerator : public MeshGenerator
* We pick one of the following:
* - If the path just exists, use it
* - If not, but a path with the new suffix exists instead (i.e.,
* /path/to/xxxx_mesh.cpr was provided but /path/to/xxxx-mesh.cpr
* /path/to/xxxx_mesh.cpa.gz was provided but /path/to/xxxx-mesh.cpa.gz
* exists), use that path and provide a param warning via \p object
* - If not, but it is LATEST and we can find a latest checkpoint,
* use the latest checkpoint
Expand Down
14 changes: 13 additions & 1 deletion framework/include/outputs/Checkpoint.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
*/
enum CheckpointType : unsigned short
{
NONE,
SYSTEM_CREATED,
USER_CREATED
};
Expand Down Expand Up @@ -83,6 +82,16 @@ class Checkpoint : public FileOutput
/// Sets the autosave flag manually if the object has already been initialized.
void setAutosaveFlag(CheckpointType flag) { _checkpoint_type = flag; }

/**
* Gathers and records information used later for console output
* @return A stringstream containing the following entries:
* Wall Time Interval : interval length in seconds, if any, otherwise "Disabled"
* User Checkpoint : name of user-define checkpoint, if any, otherwise "Disabled"
* # Checkpoints Kept : value if the 'num_files' parameter
* Execute On : value of the 'execute_on' parameter
*/
std::stringstream checkpointInfo() const;

protected:
/**
* Outputs a checkpoint file.
Expand All @@ -96,6 +105,9 @@ class Checkpoint : public FileOutput
private:
void updateCheckpointFiles(CheckpointFileNames file_struct);

/// Determines if the requested values of execute_on are valid for checkpoints
void validateExecuteOn() const;

/// Determines if this checkpoint is an autosave, and what kind of autosave it is.
CheckpointType _checkpoint_type;

Expand Down
11 changes: 10 additions & 1 deletion framework/include/utils/MooseUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -212,11 +212,20 @@ void serialEnd(const libMesh::Parallel::Communicator & comm, bool warn = true);
*/
bool hasExtension(const std::string & filename, std::string ext, bool strip_exodus_ext = false);

/**
* Gets the extension of the passed file name.
* @param filename The filename of which to get the extension
* @param rfind When true, searches for last "." in filename. Otherwise, searches for first "."
* @return file_ext The extension of filename (does not include the leading "."). If filename has no
* extension, returns "".
*/
std::string getExtension(const std::string & filename, const bool rfind = false);

/**
* Removes any file extension from the given string s (i.e. any ".[extension]" suffix of s) and
* returns the result.
*/
std::string stripExtension(const std::string & s);
std::string stripExtension(const std::string & s, const bool rfind = false);

/**
* Function for splitting path and filename
Expand Down
73 changes: 40 additions & 33 deletions framework/src/actions/AutoCheckpointAction.C
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,14 @@ AutoCheckpointAction::validParams()
{
InputParameters params = Action::validParams();

params.addClassDescription(
"Action to create shortcut syntax-specified checkpoints and automatic checkpoints.");

params.addParam<bool>("checkpoint", false, "Create checkpoint files using the default options.");
params.addParam<bool>("wall_time_checkpoint",
true,
"Enables the output of checkpoints based on elapsed wall time.");

return params;
}

Expand All @@ -25,46 +33,45 @@ AutoCheckpointAction::AutoCheckpointAction(const InputParameters & params) : Act
void
AutoCheckpointAction::act()
{
// if there's already a checkpoint object, we don't need to worry about creating a new
// checkpoint
// if there's already a checkpoint object, we don't need to worry about creating a new one
const auto checkpoints = _app.getOutputWarehouse().getOutputs<Checkpoint>();
const auto num_checkpoints = checkpoints.size();

const bool shortcut_syntax = getParam<bool>("checkpoint");

if (num_checkpoints > 1)
{
// Get most recently added Checkpoint object and error
mooseError("Multiple checkpoints are not allowed. Check the input to ensure there "
"is only one Checkpoint defined in the 'Outputs' block, including the "
"shortcut syntax 'Outputs/checkpoint=true'.");
}
checkpoints[0]->mooseError("Multiple Checkpoint objects are not allowed and there is more than "
"one Checkpoint defined in the 'Outputs' block.");
if (num_checkpoints == 1 && shortcut_syntax)
paramError("checkpoint",
"Shortcut checkpoint syntax cannot be used with another Checkpoint object in the "
"'Outputs' block");

// We don't want to set up automatic checkpoints if we are not in the master app
else if (_app.isUltimateMaster())
if (num_checkpoints == 0)
{
if (num_checkpoints == 0)
{
// If there isn't an existing checkpoint, init a new one
auto cp_params = _factory.getValidParams("Checkpoint");
cp_params.setParameters("checkpoint_type", CheckpointType::SYSTEM_CREATED);
cp_params.set<bool>("_built_by_moose") = true;
_problem->addOutput("Checkpoint", "checkpoint", cp_params);
}
// If there isn't an existing checkpoint, init a new one
auto cp_params = _factory.getValidParams("Checkpoint");

else // num_checkpoints == 1
{
// Use the existing Checkpoint object, since we only need to/should make one object the
// autosave
checkpoints[0]->setAutosaveFlag(CheckpointType::USER_CREATED);
}
cp_params.set<bool>("_built_by_moose") = true;
cp_params.set<bool>("wall_time_checkpoint") = getParam<bool>("wall_time_checkpoint");

// Check for special half transient test harness case
if (_app.testCheckpointHalfTransient())
{
// For half transient, we want to simulate a user-created checkpoint so
// time_step_interval works correctly.
const auto checkpoint = _app.getOutputWarehouse().getOutputs<Checkpoint>()[0];
checkpoint->setAutosaveFlag(CheckpointType::USER_CREATED);
checkpoint->_time_step_interval = 1;
}
// We need to keep track of what type of checkpoint we are creating. system created means the
// default value of 1 for time_step_interval is ignored.
if (!shortcut_syntax)
cp_params.set<CheckpointType>("checkpoint_type") = CheckpointType::SYSTEM_CREATED;

// We only want checkpoints in subapps if the user requests them
if (shortcut_syntax || _app.isUltimateMaster())
_problem->addOutput("Checkpoint", "checkpoint", cp_params);
}

// Check for special half transient test harness case
if (_app.testCheckpointHalfTransient() && _app.isUltimateMaster())
{
// For half transient, we want to simulate a user-created checkpoint so
// time_step_interval works correctly.
const auto checkpoint = _app.getOutputWarehouse().getOutputs<Checkpoint>()[0];
checkpoint->setAutosaveFlag(CheckpointType::USER_CREATED);
checkpoint->_time_step_interval = 1;
}
}
4 changes: 0 additions & 4 deletions framework/src/actions/CommonOutputAction.C
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ CommonOutputAction::validParams()
"xda", false, "Output the results using the default settings for XDA/XDR output (ascii)");
params.addParam<bool>(
"xdr", false, "Output the results using the default settings for XDA/XDR output (binary)");
params.addParam<bool>("checkpoint", false, "Create checkpoint files using the default options.");
params.addParam<bool>(
"gmv", false, "Output the results using the default settings for GMV output");
params.addParam<bool>(
Expand Down Expand Up @@ -211,9 +210,6 @@ CommonOutputAction::act()
if (getParam<bool>("xdr"))
create("XDR", "xdr");

if (getParam<bool>("checkpoint"))
create("Checkpoint", "checkpoint");

if (getParam<bool>("gmv"))
create("GMV", "gmv");

Expand Down
4 changes: 2 additions & 2 deletions framework/src/actions/MeshOnlyAction.C
Original file line number Diff line number Diff line change
Expand Up @@ -192,11 +192,11 @@ MeshOnlyAction::act()
}
}

else if (mesh_file.find(".cpr") + 4 == mesh_file.size())
else if (mesh_file.find(".cpa.gz") + 7 == mesh_file.size())
{
TIME_SECTION("act", 1, "Writing Checkpoint");

CheckpointIO io(mesh_ptr->getMesh(), true);
CheckpointIO io(mesh_ptr->getMesh(), false);
io.write(mesh_file);

// Write mesh metadata
Expand Down
17 changes: 7 additions & 10 deletions framework/src/actions/SetupMeshAction.C
Original file line number Diff line number Diff line change
Expand Up @@ -205,17 +205,14 @@ std::string
SetupMeshAction::modifyParamsForUseSplit(InputParameters & moose_object_params) const
{
// Get the split_file extension, if there is one, and use that to decide
// between .cpr and .cpa
// between .cpr and .cpa.gz
auto split_file = _split_file;
std::string split_file_ext;
auto pos = split_file.rfind(".");
if (pos != std::string::npos)
split_file_ext = split_file.substr(pos + 1, std::string::npos);
std::string split_file_ext = MooseUtils::getExtension(split_file);

// If split_file already has the .cpr or .cpa extension, we go with
// that, otherwise we strip off the extension and append ".cpr".
if (split_file != "" && split_file_ext != "cpr" && split_file_ext != "cpa")
split_file = MooseUtils::stripExtension(split_file) + ".cpr";
// If split_file already has the .cpr or .cpa.gz extension, we go with
// that, otherwise we strip off the extension and append ".cpa.gz".
if (split_file != "" && split_file_ext != "cpr" && split_file_ext != "cpa.gz")
split_file = MooseUtils::stripExtension(split_file) + ".cpa.gz";

if (_type != "FileMesh")
{
Expand All @@ -238,7 +235,7 @@ SetupMeshAction::modifyParamsForUseSplit(InputParameters & moose_object_params)
moose_object_params.set<MeshFileName>("file") = split_file;
else
moose_object_params.set<MeshFileName>("file") =
MooseUtils::stripExtension(moose_object_params.get<MeshFileName>("file")) + ".cpr";
MooseUtils::stripExtension(moose_object_params.get<MeshFileName>("file")) + ".cpa.gz";
}

moose_object_params.set<bool>("_is_split") = true;
Expand Down
22 changes: 8 additions & 14 deletions framework/src/actions/SplitMeshAction.C
Original file line number Diff line number Diff line change
Expand Up @@ -45,42 +45,36 @@ SplitMeshAction::act()

// Decide whether to create ASCII or binary splits based on the split_file_arg. We use the
// following rules to decide:
// 1.) No file extension -> binary
// 1.) No file extension -> ASCII + gzip
// 2.) .cpr file extension -> binary
// 3.) .cpa file extension -> ASCII
// 3.) .cpa.gz file extension -> ASCII + gzip
// 4.) Any other file extension -> mooseError

// Get the file extension without the dot.
// TODO: Maybe this should be in MooseUtils?
std::string split_file_arg_ext;
auto pos = split_file_arg.rfind(".");
if (pos != std::string::npos)
split_file_arg_ext = split_file_arg.substr(pos + 1, std::string::npos);
std::string split_file_arg_ext = MooseUtils::getExtension(split_file_arg);

// If stripExtension() returns the original string, then there is no
// file extension or the original string was empty.
bool checkpoint_binary_flag = true;
bool checkpoint_binary_flag = false;

if (split_file_arg_ext != "")
{
if (split_file_arg_ext == "cpr")
checkpoint_binary_flag = true;
else if (split_file_arg_ext == "cpa")
else if (split_file_arg_ext == "cpa.gz")
checkpoint_binary_flag = false;
else
mooseError("The argument to --split-file, ",
split_file_arg,
", must not end in a file extension other than .cpr or .cpa");
", must not end in a file extension other than .cpr or .cpa.gz");
}

// To name the split files, we start with the given mesh filename
// (if set) or the argument to --split-file, strip any existing
// extension, and then append either .cpr or .cpa depending on the
// extension, and then append either .cpr or .cpa.gz depending on the
// checkpoint_binary_flag.
auto fname = mesh->getFileName();
if (fname == "")
fname = split_file_arg;
fname = MooseUtils::stripExtension(fname) + (checkpoint_binary_flag ? ".cpr" : ".cpa");
fname = MooseUtils::stripExtension(fname) + (checkpoint_binary_flag ? ".cpr" : ".cpa.gz");

for (std::size_t i = 0; i < splits.size(); i++)
{
Expand Down
1 change: 1 addition & 0 deletions framework/src/base/Moose.C
Original file line number Diff line number Diff line change
Expand Up @@ -531,6 +531,7 @@ associateSyntaxInner(Syntax & syntax, ActionFactory & /*action_factory*/)

registerSyntax("AddOutputAction", "Outputs/*");
registerSyntax("CommonOutputAction", "Outputs");
registerSyntax("AutoCheckpointAction", "Outputs");
syntax.registerSyntaxType("Outputs/*", "OutputName");

// Note: Preconditioner Actions will be built by this setup action
Expand Down
8 changes: 7 additions & 1 deletion framework/src/base/MooseApp.C
Original file line number Diff line number Diff line change
Expand Up @@ -1776,6 +1776,12 @@ MooseApp::getOutputWarehouse()
return _output_warehouse;
}

const OutputWarehouse &
MooseApp::getOutputWarehouse() const
{
return _output_warehouse;
}

std::string
MooseApp::appNameToLibName(const std::string & app_name) const
{
Expand Down Expand Up @@ -2397,7 +2403,7 @@ MooseApp::addRelationshipManager(std::shared_ptr<RelationshipManager> new_rm)
const std::string &
MooseApp::checkpointSuffix()
{
static const std::string suffix = "-mesh.cpr";
static const std::string suffix = "-mesh.cpa.gz";
return suffix;
}

Expand Down
3 changes: 1 addition & 2 deletions framework/src/mesh/FileMesh.C
Original file line number Diff line number Diff line change
Expand Up @@ -98,14 +98,13 @@ FileMesh::buildMesh()
}
else
{
// Supports old suffix (xxxx_mesh.cpr -> xxxx-mesh.cpr) and LATEST
_file_name = FileMeshGenerator::deduceCheckpointPath(*this, _file_name);

// If we are reading a mesh while restarting, then we might have
// a solution file that relies on that mesh partitioning and/or
// numbering. In that case, we need to turn off repartitioning
// and renumbering, at least at first.
bool restarting = _file_name.rfind(".cpr") < _file_name.size();
bool restarting = _file_name.rfind(".cpa.gz") < _file_name.size();

const bool skip_partitioning_later = restarting && getMesh().skip_partitioning();
const bool allow_renumbering_later = restarting && getMesh().allow_renumbering();
Expand Down
Loading

0 comments on commit 539f904

Please sign in to comment.