Skip to content

Commit

Permalink
create a dataframe, manipulate it and load it in a mysql database loc…
Browse files Browse the repository at this point in the history
…ally
  • Loading branch information
SelimHorri committed Jan 21, 2021
1 parent 2f07ae0 commit 81baf10
Show file tree
Hide file tree
Showing 5 changed files with 123 additions and 61 deletions.
26 changes: 13 additions & 13 deletions .classpath
Original file line number Diff line number Diff line change
@@ -1,23 +1,10 @@
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="src" output="target/classes" path="src/main/java">
<attributes>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry excluding="**" kind="src" output="target/classes" path="src/main/resources">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="src" output="target/test-classes" path="src/test/java">
<attributes>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
<attribute name="test" value="true"/>
</attributes>
</classpathentry>
<classpathentry excluding="**" kind="src" output="target/test-classes" path="src/test/resources">
<attributes>
<attribute name="maven.pomderived" value="true"/>
Expand All @@ -42,6 +29,19 @@
<attribute name="m2e-apt" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="src" output="target/classes" path="src/main/java">
<attributes>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="src" output="target/test-classes" path="src/test/java">
<attributes>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
<attribute name="test" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="src" output="target/test-classes" path="target/generated-test-sources/test-annotations">
<attributes>
<attribute name="optional" value="true"/>
Expand Down
33 changes: 33 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
HELP.md
target/
!.mvn/wrapper/maven-wrapper.jar
!**/src/main/**/target/
!**/src/test/**/target/

### STS ###
.apt_generated
.classpath
.factorypath
.project
.settings
.springBeans
.sts4-cache

### IntelliJ IDEA ###
.idea
*.iws
*.iml
*.ipr

### NetBeans ###
/nbproject/private/
/nbbuild/
/dist/
/nbdist/
/.nb-gradle/
build/
!**/src/main/**/build/
!**/src/test/**/build/

### VS Code ###
.vscode/
98 changes: 50 additions & 48 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -13,58 +13,62 @@
<maven.compiler.source>11</maven.compiler.source>
<maven.compiler.target>11</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<scala.version>2.12</scala.version>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<scala.version>2.11</scala.version>
<spark.version>2.3.1</spark.version>
</properties>

<dependencies>
<!-- Spark -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.version}</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_${scala.version}</artifactId>
<version>${spark.version}</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-simple</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-mllib_${scala.version}</artifactId>
<version>${spark.version}</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-simple</artifactId>
</exclusion>
</exclusions>
</dependency>

<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>

<dependencies>
<!-- Spark -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.version}</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_${scala.version}</artifactId>
<version>${spark.version}</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-simple</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-mllib_${scala.version}</artifactId>
<version>${spark.version}</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-simple</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>8.0.22</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>

</dependencies>

<build>

<plugins>
<plugin>
<!-- <plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<executions>
Expand All @@ -81,7 +85,7 @@
</configuration>
</execution>
</executions>
</plugin>
</plugin> -->

<plugin>
<groupId>org.springframework.boot</groupId>
Expand All @@ -92,15 +96,13 @@
<goal>repackage</goal>
</goals>
<configuration>

<mainClass>com.selimhorri.pack.Main</mainClass>

</configuration>
</execution>
</executions>
</plugin>

</plugins>
<finalName>spark-app</finalName>
</build>

</project>
22 changes: 22 additions & 0 deletions src/main/java/com/selimhorri/pack/Main.java
Original file line number Diff line number Diff line change
@@ -1,10 +1,32 @@
package com.selimhorri.pack;

import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import static org.apache.spark.sql.functions.*;

import java.util.Properties;

public class Main {

public static void main(String[] args) {

final SparkSession sparkSession = new SparkSession.Builder().appName("CSV to DB").master("local").getOrCreate();

Dataset<Row> df = sparkSession.read().format("csv").option("header", true).load("src/main/resources/name_job.txt");

df = df.withColumn("fullName", concat(lit("FIRST_NAME => "), df.col("first_name"), lit(" || "), lit("LAST_NAME => "), df.col("last_name")) );
df.show();

final String dbUrl = "jdbc:mysql://localhost:3306/spark_db";
final Properties properties = new Properties();
// properties.setProperty("driver", "");
properties.setProperty("user", "root");
properties.setProperty("password", "");

df.write().mode(SaveMode.Overwrite).jdbc(dbUrl, "persons", properties);
System.out.println("====>> Loaded in the Database <<====");

}

Expand Down
5 changes: 5 additions & 0 deletions src/main/resources/name_job.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
first_name,last_name,job
Selim,Horri,Software Engineer
Amine,Ajimi,Student
Omar,Derouiche,Agent
Yesmine,Derouiche,Student

0 comments on commit 81baf10

Please sign in to comment.