sftp -i cs4417-lab-5.pem [email protected]
put starbucks-locations-sort.csv
put movies.dat
put Part1.zip
put Part2.zip
put Part3.zip
exit
ssh -i cs4417-lab-5.pem [email protected]
hadoop fs -mkdir /user/cloudera/inputAssignment1
hadoop fs -mkdir /user/cloudera/inputAssignment1/Starbucks
hadoop fs -mkdir /user/cloudera/inputAssignment1/Movies
hadoop fs -copyFromLocal starbucks-locations-sort.csv /user/cloudera/inputAssignment1/Starbucks/starbucks-locations-sort.csv
hadoop fs -copyFromLocal movies.dat /user/cloudera/inputAssignment1/Movies/movies.dat
hadoop fs -ls /user/cloudera/inputAssignment1/Starbucks/
hadoop fs -ls /user/cloudera/inputAssignment1/Movies/
unzip Part1.zip
unzip Part2.zip
unzip Part3.zip
cd Part1
hadoop jar /usr/lib/hadoop-0.20-mapreduce/contrib/streaming/hadoop-streaming-2.6.0-mr1-cdh5.12.0.jar -mapper mapper.py -file mapper.py -reducer reducer.py -file reducer.py -input /user/cloudera/inputAssignment1/Starbucks -output /user/cloudera/outputAssignment1/Starbucks
hadoop fs -cat /user/cloudera/outputAssignment1/Starbucks/*
hadoop fs -getmerge /user/cloudera/outputAssignment1/Starbucks/* cityInformation
python query.py
hadoop fs -rm /user/cloudera/outputAssignment1/Starbucks/*
hadoop fs -rmdir /user/cloudera/outputAssignment1/Starbucks
cd ..
cd Part2
python indexer.py
python query.py
cd ..
cd Part3
hadoop jar /usr/lib/hadoop-0.20-mapreduce/contrib/streaming/hadoop-streaming-2.6.0-mr1-cdh5.12.0.jar -mapper mapper.py -file mapper.py -reducer reducer.py -file reducer.py -input /user/cloudera/inputAssignment1/Movies -output /user/cloudera/outputAssignment1/Movies
hadoop fs -cat /user/cloudera/outputAssignment1/Movies/*
hadoop fs -getmerge /user/cloudera/outputAssignment1/Movies/* invertedIndex
python query.py
hadoop fs -rm /user/cloudera/outputAssignment1/Movies/*
hadoop fs -rmdir /user/cloudera/outputAssignment1/Movies