Skip to content

Commit

Permalink
#659 Fix record length option when record id generation is turned on.
Browse files Browse the repository at this point in the history
  • Loading branch information
yruslan committed Mar 25, 2024
1 parent 68f7362 commit f2c9544
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,13 @@
package za.co.absa.cobrix.cobol.reader

import za.co.absa.cobrix.cobol.internal.Logging

import java.nio.charset.{Charset, StandardCharsets}
import za.co.absa.cobrix.cobol.parser.common.Constants
import za.co.absa.cobrix.cobol.parser.encoding.codepage.CodePage
import za.co.absa.cobrix.cobol.parser.encoding.{ASCII, EBCDIC}
import za.co.absa.cobrix.cobol.parser.headerparsers.{RecordHeaderParser, RecordHeaderParserFactory}
import za.co.absa.cobrix.cobol.parser.policies.FillerNamingPolicy
import za.co.absa.cobrix.cobol.parser.recordformats.RecordFormat.{FixedBlock, VariableBlock}
import za.co.absa.cobrix.cobol.parser.{Copybook, CopybookParser}
import za.co.absa.cobrix.cobol.reader.extractors.raw.{FixedBlockParameters, FixedBlockRawRecordExtractor, RawRecordContext, RawRecordExtractor, RawRecordExtractorFactory, TextFullRecordExtractor, TextRecordExtractor, VarOccursRecordExtractor, VariableBlockVariableRecordExtractor}
import za.co.absa.cobrix.cobol.reader.extractors.raw._
import za.co.absa.cobrix.cobol.reader.extractors.record.RecordHandler
import za.co.absa.cobrix.cobol.reader.index.IndexGenerator
import za.co.absa.cobrix.cobol.reader.index.entry.SparseIndexEntry
Expand All @@ -37,6 +34,7 @@ import za.co.absa.cobrix.cobol.reader.schema.CobolSchema
import za.co.absa.cobrix.cobol.reader.stream.SimpleStream
import za.co.absa.cobrix.cobol.reader.validator.ReaderParametersValidator

import java.nio.charset.{Charset, StandardCharsets}
import scala.collection.immutable.HashMap
import scala.collection.mutable.ArrayBuffer
import scala.reflect.ClassTag
Expand Down Expand Up @@ -317,8 +315,9 @@ class VarLenNestedReader[T: ClassTag](copybookContents: Seq[String],
}
} else {
// Fixed record length record parser
val recordSize = readerProperties.recordLength.getOrElse(cobolSchema.getRecordSize)
RecordHeaderParserFactory.createRecordHeaderParser(Constants.RhRdwFixedLength,
cobolSchema.getRecordSize,
recordSize,
readerProperties.fileStartOffset,
readerProperties.fileEndOffset,
0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -492,7 +492,7 @@ object CobolParametersParser extends Logging {
logger.warn(s"Option '$PARAM_BLOCK_LENGTH' is ignored for record format: VB")
}
if (recordFormat == FixedBlock && bdw.recordsPerBlock.nonEmpty) {
logger.warn(s"Option '$PARAM_RECORDS_PER_BLOCK' is ignored for record format: VB")
logger.warn(s"Option '$PARAM_RECORDS_PER_BLOCK' is ignored for record format: F")
}
Some(bdw)
} else {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
/*
* Copyright 2018 ABSA Group Limited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package za.co.absa.cobrix.spark.cobol.source.regression

import org.scalatest.wordspec.AnyWordSpec
import org.slf4j.{Logger, LoggerFactory}
import za.co.absa.cobrix.spark.cobol.source.base.{SimpleComparisonBase, SparkTestBase}
import za.co.absa.cobrix.spark.cobol.source.fixtures.BinaryFileFixture
import za.co.absa.cobrix.spark.cobol.utils.SparkUtils

class Test22RecordLengthGenId extends AnyWordSpec with SparkTestBase with BinaryFileFixture with SimpleComparisonBase {

private implicit val logger: Logger = LoggerFactory.getLogger(this.getClass)

private val copybook =
""" 01 R.
05 A PIC 9(1).
05 B PIC X(2).
"""

val binFileContents: Array[Byte] = Array[Byte](
// 123{456}
0xF1.toByte, 0xF2.toByte, 0xF3.toByte, 0xC0.toByte,
// 789J123A
0xF7.toByte, 0xF8.toByte, 0xF9.toByte, 0xD1.toByte,
// 65432101
0xF6.toByte, 0xF5.toByte, 0xF4.toByte, 0xF3.toByte
)

"EBCDIC files" should {
"correctly work without record it generation" in {
withTempBinFile("sign_overpunch", ".dat", binFileContents) { tmpFileName =>
val df = spark
.read
.format("cobol")
.option("copybook_contents", copybook)
.option("record_format", "F")
.option("record_length", "4")
.option("pedantic", "true")
.load(tmpFileName)

val expected = """[{"A":1,"B":"23"},{"A":7,"B":"89"},{"A":6,"B":"54"}]"""

val actual = df.toJSON.collect().mkString("[", ",", "]")

assertEqualsMultiline(actual, expected)
}
}

"correctly work with record it generation" in {
withTempBinFile("sign_overpunch", ".dat", binFileContents) { tmpFileName =>
val df = spark
.read
.format("cobol")
.option("copybook_contents", copybook)
.option("record_format", "F")
.option("record_length", "4")
.option("generate_record_id", "true")
.option("pedantic", "true")
.load(tmpFileName)

val expected = """[ {
| "File_Id" : 0,
| "Record_Id" : 0,
| "Record_Byte_Length" : 4,
| "A" : 1,
| "B" : "23"
|}, {
| "File_Id" : 0,
| "Record_Id" : 1,
| "Record_Byte_Length" : 4,
| "A" : 7,
| "B" : "89"
|}, {
| "File_Id" : 0,
| "Record_Id" : 2,
| "Record_Byte_Length" : 4,
| "A" : 6,
| "B" : "54"
|} ]""".stripMargin

val actual = SparkUtils.convertDataFrameToPrettyJSON(df)

assertEqualsMultiline(actual, expected)
}
}
}
}

0 comments on commit f2c9544

Please sign in to comment.