Prisdha Dharma
2018-11-06 21:55:38 UTC
Hello,
The latest Apache Drill works fine with JDBC, JSON, CSV, and simple parquet files. However it fails to read parquet files with nested columns, such as the one with the following schema:
root
|-- pgid: binary (nullable = true)
|-- update: long (nullable = true)
|-- pid: integer (nullable = true)
|-- source: byte (nullable = true)
|-- creation: long (nullable = true)
|-- devices: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- os: string (nullable = true)
| | |-- kind: byte (nullable = true)
| | |-- maker: string (nullable = true)
| | |-- model: string (nullable = true)
| | |-- id: string (nullable = true)
| | |-- sdk: string (nullable = true)
|-- optout: struct (nullable = true)
| |-- status: boolean (nullable = true)
| |-- level: byte (nullable = true)
|-- inferences: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- attribute: integer (nullable = true)
| | |-- weight: byte (nullable = true)
| | |-- update: long (nullable = true)
| | |-- source: integer (nullable = true)
|-- languages: array (nullable = true)
| |-- element: string (containsNull = true)
|-- residence: struct (nullable = true)
| |-- position: struct (nullable = true)
| | |-- latitude: float (nullable = true)
| | |-- longitude: float (nullable = true)
| |-- timestamp: long (nullable = true)
| |-- address: string (nullable = true)
| |-- city: string (nullable = true)
| |-- state: string (nullable = true)
| |-- country: string (nullable = true)
| |-- zip: string (nullable = true)
| |-- ips: array (nullable = true)
| | |-- element: struct (containsNull = true)
| | | |-- address: string (nullable = true)
| | | |-- frequency: integer (nullable = true)
| | | |-- timestamp: long (nullable = true)
| |-- tags: array (nullable = true)
| | |-- element: integer (containsNull = true)
| |-- score: byte (nullable = true)
| |-- marker: byte (nullable = true)
| |-- visits: integer (nullable = true)
| |-- works: integer (nullable = true)
| |-- worktime: long (nullable = true)
| |-- offworks: integer (nullable = true)
| |-- pastime: long (nullable = true)
| |-- source: byte (nullable = true)
| |-- update: long (nullable = true)
|-- workplace: struct (nullable = true)
| |-- position: struct (nullable = true)
| | |-- latitude: float (nullable = true)
| | |-- longitude: float (nullable = true)
| |-- timestamp: long (nullable = true)
| |-- address: string (nullable = true)
| |-- city: string (nullable = true)
| |-- state: string (nullable = true)
| |-- country: string (nullable = true)
| |-- zip: string (nullable = true)
| |-- ips: array (nullable = true)
| | |-- element: struct (containsNull = true)
| | | |-- address: string (nullable = true)
| | | |-- frequency: integer (nullable = true)
| | | |-- timestamp: long (nullable = true)
| |-- tags: array (nullable = true)
| | |-- element: integer (containsNull = true)
| |-- score: byte (nullable = true)
| |-- marker: byte (nullable = true)
| |-- visits: integer (nullable = true)
| |-- works: integer (nullable = true)
| |-- worktime: long (nullable = true)
| |-- offworks: integer (nullable = true)
| |-- pastime: long (nullable = true)
| |-- source: byte (nullable = true)
| |-- update: long (nullable = true)
Drill fails with the following error:
0: jdbc:drill:zk=local> select * from profiles limit 10;
Error: INTERNAL_ERROR ERROR: Error in drill parquet reader (complex).
Message: Failure in setting up reader
Parquet Metadata: null
Fragment 0:0
Please, refer to logs for more information.
[Error Id: 83cf6c2d-29eb-4238-9093-f56086e492f9 on localhost:31010] (state=,code=0)
Could you please advise.
Prisdha Dharma
The latest Apache Drill works fine with JDBC, JSON, CSV, and simple parquet files. However it fails to read parquet files with nested columns, such as the one with the following schema:
root
|-- pgid: binary (nullable = true)
|-- update: long (nullable = true)
|-- pid: integer (nullable = true)
|-- source: byte (nullable = true)
|-- creation: long (nullable = true)
|-- devices: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- os: string (nullable = true)
| | |-- kind: byte (nullable = true)
| | |-- maker: string (nullable = true)
| | |-- model: string (nullable = true)
| | |-- id: string (nullable = true)
| | |-- sdk: string (nullable = true)
|-- optout: struct (nullable = true)
| |-- status: boolean (nullable = true)
| |-- level: byte (nullable = true)
|-- inferences: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- attribute: integer (nullable = true)
| | |-- weight: byte (nullable = true)
| | |-- update: long (nullable = true)
| | |-- source: integer (nullable = true)
|-- languages: array (nullable = true)
| |-- element: string (containsNull = true)
|-- residence: struct (nullable = true)
| |-- position: struct (nullable = true)
| | |-- latitude: float (nullable = true)
| | |-- longitude: float (nullable = true)
| |-- timestamp: long (nullable = true)
| |-- address: string (nullable = true)
| |-- city: string (nullable = true)
| |-- state: string (nullable = true)
| |-- country: string (nullable = true)
| |-- zip: string (nullable = true)
| |-- ips: array (nullable = true)
| | |-- element: struct (containsNull = true)
| | | |-- address: string (nullable = true)
| | | |-- frequency: integer (nullable = true)
| | | |-- timestamp: long (nullable = true)
| |-- tags: array (nullable = true)
| | |-- element: integer (containsNull = true)
| |-- score: byte (nullable = true)
| |-- marker: byte (nullable = true)
| |-- visits: integer (nullable = true)
| |-- works: integer (nullable = true)
| |-- worktime: long (nullable = true)
| |-- offworks: integer (nullable = true)
| |-- pastime: long (nullable = true)
| |-- source: byte (nullable = true)
| |-- update: long (nullable = true)
|-- workplace: struct (nullable = true)
| |-- position: struct (nullable = true)
| | |-- latitude: float (nullable = true)
| | |-- longitude: float (nullable = true)
| |-- timestamp: long (nullable = true)
| |-- address: string (nullable = true)
| |-- city: string (nullable = true)
| |-- state: string (nullable = true)
| |-- country: string (nullable = true)
| |-- zip: string (nullable = true)
| |-- ips: array (nullable = true)
| | |-- element: struct (containsNull = true)
| | | |-- address: string (nullable = true)
| | | |-- frequency: integer (nullable = true)
| | | |-- timestamp: long (nullable = true)
| |-- tags: array (nullable = true)
| | |-- element: integer (containsNull = true)
| |-- score: byte (nullable = true)
| |-- marker: byte (nullable = true)
| |-- visits: integer (nullable = true)
| |-- works: integer (nullable = true)
| |-- worktime: long (nullable = true)
| |-- offworks: integer (nullable = true)
| |-- pastime: long (nullable = true)
| |-- source: byte (nullable = true)
| |-- update: long (nullable = true)
Drill fails with the following error:
0: jdbc:drill:zk=local> select * from profiles limit 10;
Error: INTERNAL_ERROR ERROR: Error in drill parquet reader (complex).
Message: Failure in setting up reader
Parquet Metadata: null
Fragment 0:0
Please, refer to logs for more information.
[Error Id: 83cf6c2d-29eb-4238-9093-f56086e492f9 on localhost:31010] (state=,code=0)
Could you please advise.
Prisdha Dharma