This version of the contract contains the following major and minor change propositions (3.0.0 increment at time of writing Dec 29th 2023). I am using the full-contract.yaml file to make the changes evident but have not yet propagated these into other modules in my local branch at time of writing.
# BASEMODEL:
# ODCS Metadata
kind: DataContract
apiVersion: 3.0.0 # Standard version (follows semantic versioning, previously known as templateVersion)
# NOTE: Possible breaking change proposal:
# Subsequent sections would fall under this _metadata section. This section would inherently be sorted to the top, and could have sub sections within it relating to metadata
# Ex:
_metadata:
datasetDomain: seller # Domain
quantumName: my business data product name # Data product name
userConsumptionMode: Analytical
version: 1.1.0 # Version (follows semantic versioning)
status: current
uuid: 53581432-6c55-4ba2-a65f-72344a91553a
type: tables
tenant: ClimatentrsInc
# NOTE: Begin non _metadata section changes:
# Data Contract Metadata: What's this data contract about and whom/what domain does it apply to?
# Consolidated these fields from v.2.2.0 to reside together
datasetDomain: seller # Domain
quantumName: my business data product name # Data product name
userConsumptionMode: Analytical
version: 1.1.0 # Version (follows semantic versioning)
status: current
uuid: 53581432-6c55-4ba2-a65f-72344a91553a
type: tables
tenant: ClimatentrsInc
systemInstance: instance.ClimateQuantum.org
contractCreatedTs: 2022-11-15 02:59:43
# Access
access: # NOTE: New standalone section in v.3.0.0
database: pypl-edw.pp_access_views
password: "${env.password}"
drivers:
- driver: jdbc
driverVersion: x.x.x
driverUrl: urlToDriverVersion
schedulerAppName: name_coming_from_scheduler # NEW 2.1.0 Required if you want to schedule stuff, comes from DataALM.
server: null
username: "${env.username}"
# Description: High level details on the data product/quantum
description:
limitations: null
purpose: Views built on top of the seller tables.
notes: null # New optional field v.3.0.0
usage: null
# Pricing (if any):
price:
priceAmount: 9.95
priceCurrency: USD
priceUnit: megabyte
# Roles
roles:
- role: microstrategy_user_opr
access: read
firstLevelApprovers: Reporting Manager
secondLevelApprovers: "mandolorian"
- role: bq_queryman_user_opr
access: read
firstLevelApprovers: Reporting Manager
secondLevelApprovers: na
- role: risk_data_access_opr
access: read
firstLevelApprovers: Reporting Manager
secondLevelApprovers: "dathvador"
- role: bq_unica_user_opr
access: write
firstLevelApprovers: Reporting Manager
secondLevelApprovers: "mickey"
# Source: Details about each source, best practice is to add a source and not reuse source numbers.
sourceDetails: # NOTE: New standalone section in v.3.0.0
- source: sourcename1
contractPath: path/to/contract/if/local
contractUrl: urlTosSourceContract
datasetProject: edw
datasetName: access_views
sourcePlatform: googleCloudPlatform
sourceNotes: Any extra source notes needed # New in v.3.0.0 This field is useful for adding color about the data source.
sourceSystem: bigQuery
- source: sourcename2
contractPath: path/to/contract/if/local
contractUrl: urlTosSourceContract
datasetProject: mySnowflakeProject
datasetName: my_snowflake_views
sourcePlatform: azure
sourceNotes: null
sourceSystem: snowflake
- source: sourcename3
contractPath: null # Sometimes you may not have all the source details.
contractUrl: null # Work with source teams to build these out.
datasetProject: myS3LakeProject
datasetName: my_raw_lake_files
sourcePlatform: aws
sourceNotes: Contract needed for this data source, must work with the source team
sourceSystem: s3
# Stakeholders
stakeholders:
- username: ceastwood
role: Data Scientist
dateIn: 2022-08-02
dateOut: 2022-10-01
replacedByUsername: mhopper
- username: mhopper
role: Data Scientist
dateIn: 2022-10-01
dateOut: null
replacedByUsername: null
- username: daustin
role: Owner
comment: Keeper of the grail
dateIn: 2022-10-01
dateOut: null
replacedByUsername: null
# Support: How and where to get support for this data product.
support: # NOTE: New standalone section in v.3.0.0
productDl: [email protected]
productFeedbackUrl: null
productSlackChannel: "#product-help"
# Tags
tags:
- transactions
# Dataset, schema and quality, usually should come as the last section
dataset:
- table: tbl
physicalName: tbl_1 # NEW in v2.1.0, Optional, default value is table name + version separated by underscores, as table_1_2_0
priorTableName: null # if needed
description: Provides core payment metrics
authoritativeDefinitions: # NEW in v2.2.0, inspired by the column-level authoritative links
- url: https://catalog.data.gov/dataset/air-quality
type: businessDefinition
- url: https://youtu.be/jbY1BKFj9ec
type: videoTutorial
tags: null
dataGranularity: Aggregation on columns txn_ref_dt, pmt_txn_id
columns:
- column: txn_ref_dt
classification: null
clusterKeyPosition: -1
clusterStatus: false
criticalDataElementStatus: false
businessName: transaction reference date
description: null
encryptedColumnName: null
isPrimary: false # NEW in v2.1.0, Optional, default value is false, indicates whether the column is primary key in the table.
isNullable: false
logicalType: date
partitionKeyPosition: 1
partitionStatus: true
physicalType: date
primaryKeyPosition: -1
sampleValues:
- 2022-10-03
- 2020-01-28
tags: null
transformDescription: defines the logic in business terms; logic for dummies
transformLogic: sel t1.txn_dt as txn_ref_dt from table_name_1 as t1, table_name_2 as t2, table_name_3 as t3 where t1.txn_dt=date-3
transformSourceTables:
- table_name_1
- table_name_2
- table_name_3
# NOTE: Possible breaking change proposal:
# slaProperties are now nested under the columns which they apply to.
slaProperties: # consolidated to a column level attribute in v3.0.0
- property: latency # Property, see list of values in DP QoS
value: 4
unit: d # d, day, days for days; y, yr, years for years
column: txn_ref_dt # This would not be needed as it is the same table.column as the default one
- property: generalAvailability
value: 2022-05-12T09:30:10-08:00
- property: endOfSupport
value: 2032-05-12T09:30:10-08:00
- property: endOfLife
value: 2042-05-12T09:30:10-08:00
- property: retention
value: 3
unit: y
column: txn_ref_dt
- property: frequency
value: 1
valueExt: 1
unit: d
column: txn_ref_dt
- property: timeOfAvailability
value: 09:00-08:00
column: txn_ref_dt
driver: regulatory # Describes the importance of the SLA: [regulatory|analytics|operational|...]
- property: timeOfAvailability
value: 08:00-08:00
column: tab1.txn_ref_dt
driver: analytics
- column: rcvr_id
isPrimary: true # NEW in v2.1.0, Optional, default value is false, indicates whether the column is primary key in the table.
primaryKeyPosition: 1
businessName: receiver id
logicalType: string
physicalType: varchar(18)
isNullable: false
description: A description for column rcvr_id.
partitionStatus: false
partitionKeyPosition: -1
clusterStatus: true
clusterKeyPosition: 1
criticalDataElementStatus: false
tags: null
classification: null
encryptedColumnName: null
slaProperties: null
- column: rcvr_cntry_code
isPrimary: false # NEW in v2.1.0, Optional, default value is false, indicates whether the column is primary key in the table.
primaryKeyPosition: -1
businessName: receiver country code
logicalType: string
physicalType: varchar(2)
isNullable: false
description: null
partitionStatus: false
partitionKeyPosition: -1
clusterStatus: false
clusterKeyPosition: -1
criticalDataElementStatus: false
tags: null
classification: null
authoritativeDefinitions:
- url: https://collibra.com/asset/742b358f-71a5-4ab1-bda4-dcdba9418c25
type: businessDefinition
- url: https://github.com/myorg/myrepo
type: transformationImplementation
- url: jdbc:postgresql://localhost:5432/adventureworks/tbl_1/rcvr_cntry_code
type: implementation
encryptedColumnName: rcvr_cntry_code_encrypted
quality:
- code: nullCheck
customProperties:
- property: FIELD_NAME
value:
- property: COMPARE_TO
value:
- property: COMPARISON_TYPE
value: Greater than
description: column should not contain null values
dimension: completeness # dropdown 7 values
templateName: NullCheck
toolName: Elevate
toolRuleName: DQ.rw.tab1_2_0_0.rcvr_cntry_code.NullCheck
type: dataQuality
severity: error
businessImpact: operational
scheduleCronExpression: 0 20 * * *
slaProperties: none
quality:
- code: countCheck # Required, name of the rule
businessImpact: operational # Optional NEW in v2.1.0
description: Ensure row count is within expected volume range # Optional
dimension: completeness # Optional
scheduleCronExpression: 0 20 * * * # Optional NEW in v2.1.0 default schedule - every day 10 a.m. UTC
severity: error # Optional NEW in v2.1.0, default value is error
templateName: CountCheck # NEW in v2.1.0 Required
toolName: Elevate # Required
toolRuleName: DQ.rw.tab1.CountCheck # NEW in v2.1.0 Optional (Available only to the users who can change in source code edition)
type: reconciliation # Optional NEW in v2.1.0 default value for column level check - dataQuality and for table level reconciliation