https://delta.io logo
s

Sukumar Nataraj

06/19/2023, 9:13 AM
Team, am getting this exception in prod when am creating temp table, usually I will drop this temp table and recreate for every micro batch in spark streaming. some race conditions happening in S3 and Delta?. I just worried to set PERMISSIVE mode, because of silent ignore. Anyone experienced like this error before?. Am using Delta 2.2.0 and Spark 3.3.1 and EMR 6.10.0 versions. Thanks for your help!.
Copy code
rg.apache.spark.SparkException: Malformed records are detected in record parsing. Parse Mode: FAILFAST. To process malformed records as null result, try setting the option 'mode' as 'PERMISSIVE'.
        at org.apache.spark.sql.errors.QueryExecutionErrors$.malformedRecordsDetectedInRecordParsingError(QueryExecutionErrors.scala:1417) ~[spark-catalyst_2.12-3.3.1-amzn-0.jar:3.3.1-amzn-0]
        at org.apache.spark.sql.catalyst.util.FailureSafeParser.parse(FailureSafeParser.scala:68) ~[spark-catalyst_2.12-3.3.1-amzn-0.jar:3.3.1-amzn-0]
        at org.apache.spark.sql.execution.datasources.json.TextInputJsonDataSource$.$anonfun$readFile$9(JsonDataSource.scala:144) ~[spark-sql_2.12-3.3.1-amzn-0.jar:3.3.1-amzn-0]
        at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486) ~[scala-library-2.12.15.jar:?]
        at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492) ~[scala-library-2.12.15.jar:?]
        at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460) ~[scala-library-2.12.15.jar:?]
        at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:227) ~[spark-sql_2.12-3.3.1-amzn-0.jar:3.3.1-amzn-0]
        at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460) ~[scala-library-2.12.15.jar:?]
        at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source) ~[?:?]
        at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:35) ~[spark-sql_2.12-3.3.1-amzn-0.jar:3.3.1-amzn-0]
        at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.hasNext(Unknown Source) ~[?:?]
        at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:955) ~[spark-sql_2.12-3.3.1-amzn-0.jar:3.3.1-amzn-0]
        at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460) ~[scala-library-2.12.15.jar:?]
        at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:168) ~[spark-core_2.12-3.3.1-amzn-0.jar:3.3.1-amzn-0]
        at org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59) ~[spark-core_2.12-3.3.1-amzn-0.jar:3.3.1-amzn-0]
        at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99) ~[spark-core_2.12-3.3.1-amzn-0.jar:3.3.1-amzn-0]
        at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:52) ~[spark-core_2.12-3.3.1-amzn-0.jar:3.3.1-amzn-0]
        at org.apache.spark.scheduler.Task.run(Task.scala:138) ~[spark-core_2.12-3.3.1-amzn-0.jar:3.3.1-amzn-0]
        at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:548) ~[spark-core_2.12-3.3.1-amzn-0.jar:3.3.1-amzn-0]
        at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1516) ~[spark-core_2.12-3.3.1-amzn-0.jar:3.3.1-amzn-0]
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:551) ~[spark-core_2.12-3.3.1-amzn-0.jar:3.3.1-amzn-0]
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) ~[?:1.8.0_372]
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) ~[?:1.8.0_372]
        at java.lang.Thread.run(Thread.java:750) ~[?:1.8.0_372]
Caused by: org.apache.spark.sql.catalyst.util.BadRecordException: com.fasterxml.jackson.core.io.JsonEOFException: Unexpected end-of-input: expected close marker for Object (start marker at [Source: (byte[])"{"add":{"path":"site_id=76492/part-00190-15100589-5cb2-42a9-ba1d-46ca7f6fc84c.c000.snappy.parquet","partitionValues":{"site_id":"76492"},"size":26061,"modificationTime":1687106147000,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":10375265,\"business_entity_id\":1,\"handle\":\"SOME_VALUE\",\"initial_plan_handle\":\"free\",\"current_plan_handle\":\"free\",\"currency_code\":\"USD\",\"created_at\":\"2018-05-18T15:29:17.000Z\",\"activated_at\":\"2018-05-18T15:29:17.000Z\",\"[truncated 1733 bytes]; line: 1, column: 8])
 at [Source: (byte[])"{"add":{"path":"site_id=76492/part-00190-15100589-5cb2-42a9-ba1d-46ca7f6fc84c.c000.snappy.parquet","partitionValues":{"site_id":"76492"},"size":26061,"modificationTime":1687106147000,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":10375265,\"business_entity_id\":1,\"handle\":\"SOME_VALUE\",\"initial_plan_handle\":\"free\",\"current_plan_handle\":\"free\",\"currency_code\":\"USD\",\"created_at\":\"2018-05-18T15:29:17.000Z\",\"activated_at\":\"2018-05-18T15:29:17.000Z\",\"[truncated 1733 bytes]; line: 1, column: 2234]
        at org.apache.spark.sql.catalyst.json.JacksonParser.parse(JacksonParser.scala:513) ~[spark-catalyst_2.12-3.3.1-amzn-0.jar:3.3.1-amzn-0]
        at org.apache.spark.sql.execution.datasources.json.TextInputJsonDataSource$.$anonfun$readFile$7(JsonDataSource.scala:140) ~[spark-sql_2.12-3.3.1-amzn-0.jar:3.3.1-amzn-0]
        at org.apache.spark.sql.catalyst.util.FailureSafeParser.parse(FailureSafeParser.scala:60) ~[spark-catalyst_2.12-3.3.1-amzn-0.jar:3.3.1-amzn-0]
        ... 22 more
Caused by: com.fasterxml.jackson.core.io.JsonEOFException: Unexpected end-of-input: expected close marker for Object (start marker at [Source: (byte[])"{"add":{"path":"site_id=76492/part-00190-15100589-5cb2-42a9-ba1d-46ca7f6fc84c.c000.snappy.parquet","partitionValues":{"site_id":"76492"},"size":26061,"modificationTime":1687106147000,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":10375265,\"business_entity_id\":1,\"handle\":\"SOME_VALUE\",\"initial_plan_handle\":\"free\",\"current_plan_handle\":\"free\",\"currency_code\":\"USD\",\"created_at\":\"2018-05-18T15:29:17.000Z\",\"activated_at\":\"2018-05-18T15:29:17.000Z\",\"[truncated 1733 bytes]; line: 1, column: 8])
 at [Source: (byte[])"{"add":{"path":"site_id=76492/part-00190-15100589-5cb2-42a9-ba1d-46ca7f6fc84c.c000.snappy.parquet","partitionValues":{"site_id":"76492"},"size":26061,"modificationTime":1687106147000,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":10375265,\"business_entity_id\":1,\"handle\":\"SOME_VALUE\",\"initial_plan_handle\":\"free\",\"current_plan_handle\":\"free\",\"currency_code\":\"USD\",\"created_at\":\"2018-05-18T15:29:17.000Z\",\"activated_at\":\"2018-05-18T15:29:17.000Z\",\"[truncated 1733 bytes]; line: 1, column: 2234]
        at com.fasterxml.jackson.core.base.ParserMinimalBase._reportInvalidEOF(ParserMinimalBase.java:682) ~[jackson-core-2.13.4.jar:2.13.4]
        at com.fasterxml.jackson.core.base.ParserBase._handleEOF(ParserBase.java:494) ~[jackson-core-2.13.4.jar:2.13.4]
        at com.fasterxml.jackson.core.base.ParserBase._eofAsNextChar(ParserBase.java:511) ~[jackson-core-2.13.4.jar:2.13.4]
        at com.fasterxml.jackson.core.json.UTF8StreamJsonParser._skipWSOrEnd(UTF8StreamJsonParser.java:3039) ~[jackson-core-2.13.4.jar:2.13.4]
        at com.fasterxml.jackson.core.json.UTF8StreamJsonParser.nextToken(UTF8StreamJsonParser.java:756) ~[jackson-core-2.13.4.jar:2.13.4]
        at org.apache.spark.sql.catalyst.json.JacksonUtils$.nextUntil(JacksonUtils.scala:30) ~[spark-catalyst_2.12-3.3.1-amzn-0.jar:3.3.1-amzn-0]
        at <http://org.apache.spark.sql.catalyst.json.JacksonParser.org|org.apache.spark.sql.catalyst.json.JacksonParser.org>$apache$spark$sql$catalyst$json$JacksonParser$$convertObject(JacksonParser.scala:424) ~[spark-catalyst_2.12-3.3.1-amzn-0.jar:3.3.1-amzn-0]
        at org.apache.spark.sql.catalyst.json.JacksonParser$$anonfun$$nestedInanonfun$makeStructRootConverter$3$1.applyOrElse(JacksonParser.scala:102) ~[spark-catalyst_2.12-3.3.1-amzn-0.jar:3.3.1-amzn-0]
        at org.apache.spark.sql.catalyst.json.JacksonParser$$anonfun$$nestedInanonfun$makeStructRootConverter$3$1.applyOrElse(JacksonParser.scala:101) ~[spark-catalyst_2.12-3.3.1-amzn-0.jar:3.3.1-amzn-0]
        at org.apache.spark.sql.catalyst.json.JacksonParser.parseJsonToken(JacksonParser.scala:377) ~[spark-catalyst_2.12-3.3.1-amzn-0.jar:3.3.1-amzn-0]
        at org.apache.spark.sql.catalyst.json.JacksonParser.$anonfun$makeStructRootConverter$3(JacksonParser.scala:101) ~[spark-catalyst_2.12-3.3.1-amzn-0.jar:3.3.1-amzn-0]
        at org.apache.spark.sql.catalyst.json.JacksonParser.$anonfun$parse$2(JacksonParser.scala:501) ~[spark-catalyst_2.12-3.3.1-amzn-0.jar:3.3.1-amzn-0]
        at org.apache.spark.util.Utils$.tryWithResource(Utils.scala:2860) ~[spark-core_2.12-3.3.1-amzn-0.jar:3.3.1-amzn-0]
        at org.apache.spark.sql.catalyst.json.JacksonParser.parse(JacksonParser.scala:496) ~[spark-catalyst_2.12-3.3.1-amzn-0.jar:3.3.1-amzn-0]
        at org.apache.spark.sql.execution.datasources.json.TextInputJsonDataSource$.$anonfun$readFile$7(JsonDataSource.scala:140) ~[spark-sql_2.12-3.3.1-amzn-0.jar:3.3.1-amzn-0]
        at org.apache.spark.sql.catalyst.util.FailureSafeParser.parse(FailureSafeParser.scala:60) ~[spark-catalyst_2.12-3.3.1-amzn-0.jar:3.3.1-amzn-0]