The Csv Serde is a serde that is applied above a text file. It's one way of reading a CSV / TSV format.
This SerDe:
The following example creates a TSV (Tab-separated) file.
CREATE TABLE my_table(a string, b string, ...)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
WITH SERDEPROPERTIES (
"separatorChar" = "\t",
"quoteChar" = "'",
"escapeChar" = "\\"
)
STORED AS TEXTFILE;
where:
DEFAULT_ESCAPE_CHARACTER \
DEFAULT_QUOTE_CHARACTER "
DEFAULT_SEPARATOR ,
With the tpcds customer table
CREATE EXTERNAL TABLE `customer_dat`(
`c_customer_sk` int,
`c_customer_id` char(16),
`c_current_cdemo_sk` int,
`c_current_hdemo_sk` int,
`c_current_addr_sk` int,
`c_first_shipto_date_sk` int,
`c_first_sales_date_sk` int,
`c_salutation` char(10),
`c_first_name` char(20),
`c_last_name` char(30),
`c_preferred_cust_flag` char(1),
`c_birth_day` int,
`c_birth_month` int,
`c_birth_year` int,
`c_birth_country` varchar(20),
`c_login` char(13),
`c_email_address` char(50),
`c_last_review_date_sk` int)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
WITH SERDEPROPERTIES (
"separatorChar" = "|"
)
LOCATION 'hdfs://locationToADirectory';
where:
Et voila:
FROM `customer_stg`
INSERT OVERWRITE TABLE `customer`
SELECT *;