sample_identify_tag_pipeline_metadata.json

{
  "name": "Inspect and Tag Data sources in Data Catalog",
  "description": "A pipeline that reads the data source, randomly samples the input rows to identify sensitive items using DLP content inspection. The aggregated results are then written to Data Catalog tags. ",
  "parameters": [
    {
      "name": "sampleSize",
      "label": "Sample Size",
      "helpText": "The number of samples to extract for each column. (Default: 1000)",
      "isOptional": true
    },
    {
      "name": "dataCatalogEntryGroupId",
      "label": "Cloud Data Catalog Entry Group",
      "helpText": "The Entry Group Id (/projects/{projectId}/locations/{locationId}/entryGroups/{entryGroupId}) to create a new Entry for inspected datasource. Provide to enable pipeline to create new entry in DataCatalog with schema. (Not used for sourceType=BIGQUERY_TABLE)",
      "isOptional": true
    },
    {
      "name": "dataCatalogInspectionTagTemplateId",
      "label": "Data Catalog Tag Template Id",
      "helpText": "The Datacatalog TempalteId to use for creating the sensitivity tags.",
      "isOptional": true
    },
    {
      "name": "dataCatalogForcedUpdate",
      "label": "Force update Data Catalog",
      "helpText": "Force updates to Data Catalog Tags/Entry based on execution of this pipeline. (Default: false)",
      "isOptional": true
    },
    {
      "name": "reportLocation",
      "label": "GCS Location for writing output",
      "helpText": "The GCS location to write the aggregated inspection results and the datasource's AVRO Schema. Atleast one of reportLocation or reportBigQueryTable must be specified.",
      "isOptional": true
    },
    {
      "name": "reportBigQueryTable",
      "label": "Report BigQuery Table",
      "helpText": "The BigQuery table ({projectId}:{datasetId}.{tableId}) to write the aggregated inspection results, the table must exist. Atleast one of reportLocation or reportBigQueryTable must be specified.",
      "isOptional": true
    },
    {
      "name": "observableInfoTypes",
      "label": "DLP Observable InfoTypes",
      "helpText": "Provide a list of info-types to inspect the data with. Keeping EMPTY uses all DLP supported info-types.",
      "isOptional": true
    },
    {
      "name": "sourceType",
      "label": "Data source type",
      "helpText": "The data source to analyse/inspect. One of: [AVRO, PARQUET, BIGQUERY_TABLE, BIGQUERY_QUERY, JDBC_TABLE, JDBC_QUERY, CSV_FILE]"
    },
    {
      "name": "inputPattern",
      "label": "Date Source",
      "helpText": "The location of the datasource: for AVRO or PARQUET, the GCS file pattern to use as input, for BIGQUERY_TABLE: a Fully Qualified table name as {projectId}:{datasetId}.{tableId} format, for JDBC_TABLE, the name of the table. For JDBC_QUERY, a SELECT query to run on the target."
    },
    {
      "name": "dlpRegion",
      "label": "DLP Region",
      "helpText": "The DLP region to use, default: global",
      "isOptional": true
    },
    {
      "name": "csvHeaders",
      "label": "The Headers to use for CSV file inputs",
      "helpText": "One of csvHeaders or csvFirstRowHeader is required if sourceType=CSV_FILE",
      "isOptional": true
    },
    {
      "name": "csvFirstRowHeader",
      "label": "Use CSV First Row as header",
      "helpText": "One of csvHeaders or csvFirstRowHeader is required if sourceType=CSV_FILE",
      "isOptional": true
    },
    {
      "name": "csvCharset",
      "label": "Charset to use for CSV files",
      "helpText": "default: UTF-8",
      "isOptional": true
    },
    {
      "name": "csvColumnDelimiter",
      "label": "Column delimiter for CSV",
      "helpText": "default: ','",
      "isOptional": true
    },
    {
      "name": "csvFormatType",
      "label": "CSV Format type supported by Apache Commons CSV",
      "helpText": "CSV Format type supported by Apache Commons CSV default: Default",
      "isOptional": true
    },
    {
      "name": "jdbcConnectionUrl",
      "label": "JDBC Connection URL",
      "helpText": "The Connection URL used for connecting to a SQL datasource using JDBC. (Required when sourceType=JDBC_TABLE)",
      "isOptional": true
    },
    {
      "name": "jdbcDriverClass",
      "label": "JDBC Driver class name",
      "helpText": "The JDBC driver to use for reading from SQL datasource. (Required when sourceType=JDBC_TABLE)",
      "isOptional": true
    },
    {
      "name": "jdbcFilterClause",
      "label": "JDBC Filter clause",
      "helpText": "The WHERE clause to filter records for inspection from JDBC source. (Optional, though Recommended when sourceType=JDBC_TABLE)",
      "isOptional": true
    },
    {
      "name": "jdbcUserName",
      "label": "JDBC Connection Username",
      "helpText": "The username for the database connection. (Required when sourceType=JDBC_TABLE)",
      "isOptional": true
    },
    {
      "name": "jdbcPasswordSecretsKey",
      "label": "Cloud Secret Version Id that stores JDBC password",
      "helpText": "The Cloud Secrets version that stores password for the user-name. (One of jdbcPassword or jdbcPasswordSecretsKey required when sourceType=JDBC_TABLE)",
      "isOptional": true
    },
    {
      "name": "jdbcPassword",
      "label": "Plain-text password for the JDBC user",
      "helpText": "The plain-text password for the user-name. (One of jdbcPassword or jdbcPasswordSecretsKey required when sourceType=JDBC_TABLE)",
      "isOptional": true
    }
  ]
}