Pig Exercise-Part IV

A. Load Customer records
cust = load '/input/custs' using PigStorage(',') as (custid:chararray, firstname:chararray, lastname:chararray,age:long,profession:chararray);

B. Select only 100 records
amt = limit cust 100;
dump amt;

c. Group customer records by profession
groupbyprofession = group cust by profession;

D. Count no of customers by profession
countbyprofession = foreach groupbyprofession generate group, COUNT(cust);
dump countbyprofession;

E. Load transaction records
txn = load '/input/txns' using PigStorage(',') as(txnid:chararray, date:chararray,custid:chararray,amount:double,category:chararray,product:chararray,city:chararray,state:chararray,type:chararray);

F. Group transactions by customer
txnbycust = group txn by custid;

G. Sum total amount spent by each customer
spendbycust = foreach txnbycust generate group, SUM(txn.amount);

H. Order the customer records beginning from highest spender
custorder = order spendbycust by $1 desc;

I. Select only top 100 customers
top100cust = limit custorder 100;

J. Join the transactions with customer details
top100join = join top100cust by $0, cust by $0;
describe top100join;

K. Select the required fields from the join for final output
top100 = foreach top100join generate $0,$3,$4,$5,$6,$1;
describe top100;

L.Dump the final output
dump top100;

