Skip to content

Commit

Permalink
First commit
Browse files Browse the repository at this point in the history
  • Loading branch information
gwens committed Nov 25, 2016
0 parents commit 9fa121c
Show file tree
Hide file tree
Showing 59 changed files with 3,350 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
*.csv filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
Binary file added 9781484202722.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3 changes: 3 additions & 0 deletions CH07 SourceCode/CH07 SourceCode/2015_01_clickstream.tsv.gz
Git LFS file not shown
29 changes: 29 additions & 0 deletions CH07 SourceCode/CH07 SourceCode/clickstream_code.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
CREATE DATABASE clickstream;

USE clickstream;

CREATE TABLE wikilogs (
previous_id STRING,
current_id STRING,
no_occurences INT,
previous_title STRING,
current_title STRING,
type STRING)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '09'
STORED AS textfile;

LOAD DATA INPATH '/tmp/wikiclickstream/2015_01_clickstream.tsv.gz' OVERWRITE INTO TABLE wikilogs;

CREATE VIEW wikilogs_view (no_occurences, previous_title, current_title)
AS SELECT no_occurences, previous_title, current_title FROM wikilogs;

SELECT * FROM wikilogs_view
SORT BY no_occurences DESC;


SELECT * FROM wikilogs_view
WHERE previous_title = 'other-facebook'
SORT BY no_occurences DESC;


1 change: 1 addition & 0 deletions CH07 SourceCode/CH07 SourceCode/json1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"_id":"5774091c3227dc593d06aff2","index":0,"guid":"3ce0d275-317d-4936-aedb-2a7844b6d54d","isActive":false,"balance":"$1,912.20","picture":"http://placehold.it/32x32","age":27,"eyeColor":"brown","name":"Fry Schmidt","gender":"male","company":"ZORK","email":"[email protected]","phone":"+1 (956) 510-2736","address":"158 Tompkins Avenue, Rosedale, New York, 2134","about":"Et mollit Lorem id sunt dolor in id aliquip nisi incididunt in elit cupidatat exercitation. Minim nulla fugiat cillum cillum exercitation velit veniam. Lorem excepteur et esse adipisicing pariatur ipsum ut excepteur. Quis adipisicing ullamco esse velit in eiusmod veniam eiusmod Lorem. Elit proident ex velit culpa excepteur deserunt laboris duis irure ipsum voluptate nostrud cupidatat. Consequat eiusmod culpa laborum culpa nulla anim et enim sunt voluptate.\r\n","registered":"2015-05-16T05:55:04 +05:00","latitude":65.239382,"longitude":31.355514,"tags":["ea","consectetur","reprehenderit","Lorem","ea","nisi","et"],"friends":[{"id":0,"name":"Gomez Richmond"},{"id":1,"name":"Deanne Mcintyre"},{"id":2,"name":"Best Dyer"}],"greeting":"Hello, Fry Schmidt! You have 8 unread messages.","favoriteFruit":"apple"}
1 change: 1 addition & 0 deletions CH07 SourceCode/CH07 SourceCode/json2
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"_id":"577424548ab78dc174ac0dae","index":0,"guid":"83659acc-ddb7-4194-a1d5-cd9eee5a76b5","isActive":false,"balance":"$2,041.83","picture":"http://placehold.it/32x32","age":36,"eyeColor":"brown","name":{"first":"Roseann","last":"Perry"},"company":"RAMJOB","email":"[email protected]","phone":"+1 (959) 463-3581","address":"203 Pleasant Place, Rehrersburg, Mississippi, 4612","about":"Cupidatat laboris commodo excepteur in incididunt cupidatat sit minim non laborum proident nisi. Occaecat sit eu ea irure occaecat aute amet velit do labore enim sit irure nostrud. Ad ullamco minim tempor esse. Aliquip sit culpa amet sint ea minim in qui magna minim Lorem in voluptate. Ea nulla eu nulla sunt ex. Nulla ea magna cillum esse velit amet voluptate.","registered":"Saturday, April 9, 2016 12:55 PM","latitude":"-11.748526","longitude":"160.379242","tags":["pariatur","laboris","ipsum","sint","eiusmod"],"range":[0,1,2,3,4,5,6,7,8,9],"friends":[{"id":0,"name":"Jessica Lowe"},{"id":1,"name":"Lessie Hoffman"},{"id":2,"name":"Madeline Arnold"}],"greeting":"Hello, Roseann! You have 7 unread messages.","favoriteFruit":"banana"}
1 change: 1 addition & 0 deletions CH07 SourceCode/CH07 SourceCode/json3
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"_id":"5774245438f862f0b8121f41","index":5,"guid":"580ff472-9036-40b2-aa3c-9085f305d6b4","isActive":false,"balance":"$2,252.98","picture":"http://placehold.it/32x32","age":36,"eyeColor":"brown","name":{"first":"Lori","last":"Pacheco"},"company":"LUDAK","email":"[email protected]","phone":"+1 (891) 415-2253","address":"290 Rochester Avenue, Cannondale, Guam, 7856","about":"Qui fugiat nostrud qui laborum Lorem excepteur. Minim exercitation esse mollit irure fugiat eiusmod proident sit Lorem incididunt. Dolor ex ipsum tempor est eu duis exercitation. Enim ea ullamco mollit proident labore eiusmod excepteur magna Lorem anim.","registered":"Tuesday, February 10, 2015 8:07 AM","latitude":"75.805649","longitude":"138.091539","tags":["ullamco","in","voluptate","reprehenderit","sunt"],"range":[0,1,2,3,4,5,6,7,8,9],"friends":[{"id":0,"name":"Byrd Meyers"},{"id":1,"name":"Weeks Miles"},{"id":2,"name":"Marquez Pace"}],"greeting":"Hello, Lori! You have 6 unread messages.","favoriteFruit":"banana"}
72 changes: 72 additions & 0 deletions CH07 SourceCode/CH07 SourceCode/json_code.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
CREATE TABLE json_table (
json string);

LOAD DATA INPATH '/tmp/json_data/json1' INTO TABLE json_table;

SELECT get_json_object(json_table.jcol, '$') FROM json_table;

select get_json_object(json_table.json, ‘$.balance) as balance,
get_json_object(json_table, ‘$.gender) as gender,
get_json_object(json_table.json, ‘$.phone) as phone,
get_json_object(json.table.json, ‘$.friends.name) as friendname
from json_table;

ADD JAR /usr/local/Hive-JSON-Serde/json-serde/target/json-serde-1.3.8-SNAPSHOT-jar-with-dependencies.jar;

CREATE TABLE json_serde_table (
id string,
about string,
address string,
age int,
balance string,
company string,
email string,
eyecolor string,
favoritefruit string,
friends array<struct<id:int, name:string>>,
gender string,
greeting string,
guid string,
index int,
isactive boolean,
latitude double,
longitude double,
name string,
phone string,
picture string,
registered string,
tags array<string>)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES ( "mapping._id" = "id" )

LOAD DATA INPATH '/tmp/json_data/json1' INTO TABLE json_serde_table;

SELECT address, friends.name FROM json_serde_table;

CREATE TABLE json_serde_table (
id string,
about string,
address string,
age int,
balance string,
company string,
email string,
eyecolor string,
favoritefruit string,
friends array<struct<id:int, name:string>>,
gender string,
greeting string,
guid string,
index int,
isactive boolean,
latitude double,
longitude double,
name string,
phone string,
picture string,
registered string,
tags array<string>)
ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'
STORED AS TEXTFILE;


Git LFS file not shown
Binary file not shown.
Binary file added CH07 SourceCode/__MACOSX/CH07 SourceCode/._json1
Binary file not shown.
Binary file added CH07 SourceCode/__MACOSX/CH07 SourceCode/._json2
Binary file not shown.
Binary file added CH07 SourceCode/__MACOSX/CH07 SourceCode/._json3
Binary file not shown.
Binary file not shown.
3 changes: 3 additions & 0 deletions ESM Ch 5 and 8 ExampleData/Person001.csv
Git LFS file not shown
38 changes: 38 additions & 0 deletions ESM Ch 5 and 8 ExampleData/Script_EqualJoin.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
USE census;
CREATE TABLE census.personname (
persid int,
firstname string,
lastname string
)
CLUSTERED BY (persid) INTO 1 BUCKETS
STORED AS orc
TBLPROPERTIES('transactional' = 'true');
INSERT INTO TABLE census.personname
VALUES
(0,'Albert','Ape'),
(1,'Bob','Burger'),
(2,'Charlie','Clown'),
(3,'Danny','Drywer');


CREATE TABLE census.address (
persid int,
postname string
)
CLUSTERED BY (persid) INTO 1 BUCKETS
STORED AS orc
TBLPROPERTIES('transactional' = 'true');
INSERT INTO TABLE census.address
VALUES
(1,'KA13'),
(2,'KA9'),
(10,'SW1');

SELECT personname.firstname,
personname.lastname,
address.postname
FROM
census.personname
JOIN
census.address
ON (personname.persid = address.persid);
9 changes: 9 additions & 0 deletions ESM Ch 5 and 8 ExampleData/Script_MapJoin.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
USE census;

SELECT /*+ MAPJOIN(b) */ personname.firstname,
personname.lastname
FROM
census.personname
LEFT SEMI JOIN
census.address
ON (personname.persid = address.persid);
26 changes: 26 additions & 0 deletions ESM Ch 5 and 8 ExampleData/Script_MultiJoin.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
USE census;

CREATE TABLE census.account (
persid int,
bamount int
)
CLUSTERED BY (persid) INTO 1 BUCKETS
STORED AS orc
TBLPROPERTIES('transactional' = 'true');
INSERT INTO TABLE census.account
VALUES
(1,12),
(2,9);

SELECT personname.firstname,
personname.lastname,
address.postname,
account.bamount
FROM
census.personname
JOIN
census.address
ON (personname.persid = address.persid)
JOIN
census.account
ON (personname.persid = account.persid);
30 changes: 30 additions & 0 deletions ESM Ch 5 and 8 ExampleData/Script_OuterJoin.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
USE census;

SELECT personname.firstname,
personname.lastname,
address.postname
FROM
census.personname
LEFT JOIN
census.address
ON (personname.persid = address.persid);


SELECT personname.firstname,
personname.lastname,
address.postname
FROM
census.personname
RIGHT JOIN
census.address
ON (personname.persid = address.persid);


SELECT personname.firstname,
personname.lastname,
address.postname
FROM
census.personname
FULL OUTER JOIN
census.address
ON (personname.persid = address.persid);
15 changes: 15 additions & 0 deletions ESM Ch 5 and 8 ExampleData/Script_PersonDelete.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
USE census;
CREATE TABLE census.person30 (
persid int,
lastname string,
firstname string
)
CLUSTERED BY (persid) INTO 1 BUCKETS
STORED AS orc
TBLPROPERTIES('transactional' = 'true');
INSERT INTO TABLE census.person30 VALUES (0,'A','B'),(2,'X','Y');
SELECT * FROM census.person30;

DELETE FROM census.person30 WHERE persid = 0;

SELECT * FROM census.person30;
11 changes: 11 additions & 0 deletions ESM Ch 5 and 8 ExampleData/Script_PersonDirectory.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
hadoop fs -ls
hadoop fs -mkdir 'exampleoutput'
hive

USE census;
INSERT OVERWRITE DIRECTORY 'exampleoutput' ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
SELECT persid, firstname, lastname FROM person;
exit;

hadoop fs -ls 'exampleoutput'
hadoop fs -cat 'exampleoutput/000000_0'
9 changes: 9 additions & 0 deletions ESM Ch 5 and 8 ExampleData/Script_PersonHub.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
USE census;
CREATE TABLE personhub (
persid int
);
INSERT OVERWRITE TABLE personhub SELECT DISTINCT persid FROM Person;
SELECT persid FROM personhub where persid < 11;
INSERT INTO TABLE personhub SELECT DISTINCT persid FROM Person;
SELECT persid FROM personhub where persid < 11;

6 changes: 6 additions & 0 deletions ESM Ch 5 and 8 ExampleData/Script_PersonLike.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
USE census;
CREATE TABLE person40 LIKE person;
SELECT * FROM person40;

INSERT INTO TABLE person40 VALUES (0,'Bob','Burger'),(1,'Charlie','Clown');
SELECT * FROM person40;
11 changes: 11 additions & 0 deletions ESM Ch 5 and 8 ExampleData/Script_PersonTable.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
CREATE DATABASE census;
USE census;
CREATE TABLE person (
persid int,
lastname string,
firstname string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';

LOAD DATA LOCAL INPATH 'file:///root/hive/example/person001' OVERWRITE INTO TABLE person;
SELECT persid, lastname, firstname FROM person;
16 changes: 16 additions & 0 deletions ESM Ch 5 and 8 ExampleData/Script_PersonUpdate.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
USE census;
CREATE TABLE census.person20 (
persid int,
lastname string,
firstname string
)
CLUSTERED BY (persid) INTO 1 BUCKETS
STORED AS orc
TBLPROPERTIES('transactional' = 'true');
INSERT INTO TABLE person20 VALUES (0,'A','B'),(2,'X','Y');

SELECT * FROM census.person20;

UPDATE census.person20 SET lastname = 'SS' WHERE persid = 0;

SELECT * FROM census.person20 WHERE persid = 0;
3 changes: 3 additions & 0 deletions ESM Ch 5 and 8 ExampleData/Script_PersonValues.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
USE census;
INSERT INTO TABLE personhub VALUES (0);
SELECT persid FROM personhub WHERE persid = 0;
9 changes: 9 additions & 0 deletions ESM Ch 5 and 8 ExampleData/Script_SemiJoin.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
USE census;

SELECT personname.firstname,
personname.lastname
FROM
census.personname
LEFT SEMI JOIN
census.address
ON (personname.persid = address.persid);
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
--#######################################################################################################################################
--#######################################################################################################################################
--#######################################################################################################################################
DROP DATABASE retrievedb CASCADE;

--#######################################################################################################################################
CREATE DATABASE IF NOT EXISTS retrievedb;
USE retrievedb;

--#######################################################################################################################################
--DROP TABLE retrievedb.rawfirstname;
--DROP TABLE retrievedb.rawlastname;
--DROP TABLE retrievedb.rawpersonname;
--#######################################################################################################################################
CREATE TABLE IF NOT EXISTS retrievedb.rawfirstname (
firstnameid string,
firstname string,
sex string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';

LOAD DATA LOCAL INPATH 'file:///home/mapr/ExampleData/00rawdata/rawfirstname.csv' OVERWRITE INTO TABLE retrievedb.rawfirstname;

--#######################################################################################################################################
CREATE TABLE IF NOT EXISTS retrievedb.rawlastname (
lastnameid string,
lastname string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';

LOAD DATA LOCAL INPATH 'file:///home/mapr/ExampleData/00rawdata/rawlastname.csv' OVERWRITE INTO TABLE retrievedb.rawlastname;

--#######################################################################################################################################
CREATE TABLE IF NOT EXISTS retrievedb.rawperson (
persid string,
firstnameid string,
lastnameid string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';

LOAD DATA LOCAL INPATH 'file:///home/mapr/ExampleData/00rawdata/rawperson.csv' OVERWRITE INTO TABLE retrievedb.rawperson;

--#######################################################################################################################################
Loading

0 comments on commit 9fa121c

Please sign in to comment.