/usr/local/service/ directory of the CVM instance for the EMR cluster.root, and the password is the one you set when creating the EMR cluster. Once the correct credentials are entered, you can enter the command line interface.[root@172 ~]# su hadoop[hadoop@172 ~]# cd /usr/local/service/sqoop
[hadoop@172 sqoop]$ mysql -h $mysqlIP –pEnter password:
mysql> use test;Database changedmysql> insert into sqoop_test values(null, 'forth', now(), 'hbase');Query ok, 1 row affected(0.00 sec)
Mysql> select * from sqoop_test;+----+--------+---------------------+---------+| id | title | time | content |+----+--------+---------------------+---------+| 1 | first | 2018-07-03 15:29:37 | hdfs || 2 | second | 2018-07-03 15:30:57 | mr || 3 | third | 2018-07-03 15:31:07 | yarn || 4 | forth | 2018-07-03 15:39:38 | hbase |+----+--------+---------------------+---------+4 rows in set (0.00 sec)
[hadoop@172 sqoop]$ bin/sqoop-import --connect jdbc:mysql://$mysqlIP/test --usernameroot -P --table sqoop_test --check-column id --incremental append --last-value 3 --target-dir/sqoop
[hadoop@172 sqoop]$ hadoop fs -cat /sqoop/*1, first, 2018-07-03 15:29:37.0,hdfs2, second, 2018-07-03 15:30:57.0,mr3, third, 2018-07-03 15:31:07.0,yarn4,forth,2018-07-03 15:39:38.0,hbase
<property><name>sqoop.metastore.client.enable.autoconnect</name><value>true</value></property>
./sqoop-metastore &
[hadoop@172 sqoop]$ bin/sqoop job --create job1 -- import --connectjdbc:mysql://$mysqlIP/test --username root -P --table sqoop_test --check-column id--incremental append --last-value 4 --target-dir /sqoop
mysql> insert into sqoop_test values(null, 'fifth', now(), 'hive');Query ok, 1 row affected(0.00 sec)Mysql> select * from sqoop_test;+----+--------+---------------------+---------+| id | title | time | content |+----+--------+---------------------+---------+| 1 | first | 2018-07-03 15:29:37 | hdfs || 2 | second | 2018-07-03 15:30:57 | mr || 3 | third | 2018-07-03 15:31:07 | yarn || 4 | forth | 2018-07-03 15:39:38 | hbase || 5 | fifth | 2018-07-03 16:02:29 | hive |+----+--------+---------------------+---------+5 rows in set (0.00 sec)
[hadoop@172 sqoop]$ bin/sqoop job --exec job1
[hadoop@172 sqoop]$ hadoop fs -cat /sqoop/*1, first, 2018-07-03 15:29:37.0,hdfs2, second, 2018-07-03 15:30:57.0,mr3, third, 2018-07-03 15:31:07.0,yarn4,forth,2018-07-03 15:39:38.0,hbase5,fifth,2018-07-03 16:02:29.0,hive
mysql> select max(time) from sqoop_test;
[hadoop@172 sqoop]$ bin/sqoop job --create job2 -- import --connect jdbc:mysql://$mysqlIP/test --username root -P --table sqoop_test --check-column time --incremental lastmodified --merge-key id --last-value '2018-07-03 16:02:29' --target-dir /sqoop
$mysqlIP: refers to the private IP address of your MySQL database--check-column: must use a timestamp--incremental : selects the lastmodified mode--merge-key: selects the ID--last-value: refers to the last modified time in the table that is queried. All modifications made after this time will be synced to HDFS, and the Sqoop job will automatically save and update the value each time.mysql> insert into sqoop_test values(null, 'sixth', now(), 'sqoop');Query ok, 1 row affected(0.00 sec)mysql> update sqoop_test set time=now(), content='spark' where id = 1;Query OK, 1 row affected (0.00 sec)Rows matched: 1 changed: 1 warnings: 0Mysql> select * from sqoop_test;+----+--------+---------------------+---------+| id | title | time | content |+----+--------+---------------------+---------+| 1 | first | 2018-07-03 16:07:46 | spark || 2 | second | 2018-07-03 15:30:57 | mr || 3 | third | 2018-07-03 15:31:07 | yarn || 4 | forth | 2018-07-03 15:39:38 | hbase || 5 | fifth | 2018-07-03 16:02:29 | hive || 6 | fifth | 2018-07-03 16:09:58 | sqoop |+----+--------+---------------------+---------+6 rows in set (0.00 sec)
[hadoop@172 sqoop]$ bin/sqoop job --exec job2
[hadoop@172 sqoop]$ hdfs dfs -cat /sqoop/*1,first,2018-07-03 16:07:46.0,spark2,second,2018-07-03 15:30:57.0,mr3,third,2018-07-03 15:31:07.0,yarn4,forth,2018-07-03 15:39:38.0,hbase5,fifth,2018-07-03 16:02:29.0,hive6,sixth,2018-07-03 16:09:58.0,sqoop
Feedback