From 81baf106dba167a81908b29ef12fdac140101f13 Mon Sep 17 00:00:00 2001 From: SelimHorri Date: Thu, 21 Jan 2021 01:21:49 +0100 Subject: [PATCH] create a dataframe, manipulate it and load it in a mysql database locally --- .classpath | 26 +++--- .gitignore | 33 +++++++ pom.xml | 98 +++++++++++---------- src/main/java/com/selimhorri/pack/Main.java | 22 +++++ src/main/resources/name_job.txt | 5 ++ 5 files changed, 123 insertions(+), 61 deletions(-) create mode 100644 .gitignore create mode 100644 src/main/resources/name_job.txt diff --git a/.classpath b/.classpath index 6e0021f..595d245 100644 --- a/.classpath +++ b/.classpath @@ -1,23 +1,10 @@ - - - - - - - - - - - - - @@ -42,6 +29,19 @@ + + + + + + + + + + + + + diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..549e00a --- /dev/null +++ b/.gitignore @@ -0,0 +1,33 @@ +HELP.md +target/ +!.mvn/wrapper/maven-wrapper.jar +!**/src/main/**/target/ +!**/src/test/**/target/ + +### STS ### +.apt_generated +.classpath +.factorypath +.project +.settings +.springBeans +.sts4-cache + +### IntelliJ IDEA ### +.idea +*.iws +*.iml +*.ipr + +### NetBeans ### +/nbproject/private/ +/nbbuild/ +/dist/ +/nbdist/ +/.nb-gradle/ +build/ +!**/src/main/**/build/ +!**/src/test/**/build/ + +### VS Code ### +.vscode/ diff --git a/pom.xml b/pom.xml index f787e2d..3a1556b 100644 --- a/pom.xml +++ b/pom.xml @@ -13,58 +13,62 @@ 11 11 UTF-8 - UTF-8 - 2.12 + UTF-8 + 2.11 2.3.1 - - - - org.apache.spark - spark-core_${scala.version} - ${spark.version} - - - org.apache.spark - spark-sql_${scala.version} - ${spark.version} - - - org.slf4j - slf4j-simple - - - - - org.apache.spark - spark-mllib_${scala.version} - ${spark.version} - - - org.slf4j - slf4j-log4j12 - - - org.slf4j - slf4j-simple - - - - - - junit - junit - 4.11 - test - - + + + + org.apache.spark + spark-core_${scala.version} + ${spark.version} + + + org.apache.spark + spark-sql_${scala.version} + ${spark.version} + + + org.slf4j + slf4j-simple + + + + + org.apache.spark + spark-mllib_${scala.version} + ${spark.version} + + + org.slf4j + slf4j-log4j12 + + + org.slf4j + slf4j-simple + + + + + mysql + mysql-connector-java + 8.0.22 + + + junit + junit + 4.11 + test + + - + org.springframework.boot @@ -92,15 +96,13 @@ repackage - com.selimhorri.pack.Main - - + spark-app diff --git a/src/main/java/com/selimhorri/pack/Main.java b/src/main/java/com/selimhorri/pack/Main.java index 370329a..713045a 100644 --- a/src/main/java/com/selimhorri/pack/Main.java +++ b/src/main/java/com/selimhorri/pack/Main.java @@ -1,10 +1,32 @@ package com.selimhorri.pack; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import static org.apache.spark.sql.functions.*; + +import java.util.Properties; + public class Main { public static void main(String[] args) { + final SparkSession sparkSession = new SparkSession.Builder().appName("CSV to DB").master("local").getOrCreate(); + + Dataset df = sparkSession.read().format("csv").option("header", true).load("src/main/resources/name_job.txt"); + + df = df.withColumn("fullName", concat(lit("FIRST_NAME => "), df.col("first_name"), lit(" || "), lit("LAST_NAME => "), df.col("last_name")) ); + df.show(); + + final String dbUrl = "jdbc:mysql://localhost:3306/spark_db"; + final Properties properties = new Properties(); + // properties.setProperty("driver", ""); + properties.setProperty("user", "root"); + properties.setProperty("password", ""); + df.write().mode(SaveMode.Overwrite).jdbc(dbUrl, "persons", properties); + System.out.println("====>> Loaded in the Database <<===="); } diff --git a/src/main/resources/name_job.txt b/src/main/resources/name_job.txt new file mode 100644 index 0000000..fcff170 --- /dev/null +++ b/src/main/resources/name_job.txt @@ -0,0 +1,5 @@ +first_name,last_name,job +Selim,Horri,Software Engineer +Amine,Ajimi,Student +Omar,Derouiche,Agent +Yesmine,Derouiche,Student \ No newline at end of file