Hadoop Hive notes
Published by Joe Lovick on June 20th, 2017
Some notes for when I get started again.
place the data into hadoop so you can load it up
unzip -p zipcodes.wkt.zip | hdfs dfs -put - /zipcodes/zipcodes.wkt
cat all_trips | hdfs dfs -put - /trips/all_trips
sample connection string when running on local host
beeline -u "jdbc:hive2://localhost:10000/default;auth=NOSASL" -n root -p ""
run shell from within beeline
!sh ls
run scripts from within beeline
!run /tmp/test.hql
hive instructions
first put jar files into hadoop
hdfs dfs -mkdir /jars
hdfs dfs -put *.jar /jars
then add them into hive
add jar hdfs:///jars/esri-geometry-api-1.2.1.jar;
add jar hdfs:///jars/spatial-sdk-hive-1.1.1-SNAPSHOT.jar;
add jar hdfs:///jars/spatial-sdk-json-1.1.1-SNAPSHOT.jar;
create temporary function ST_Point as 'com.esri.hadoop.hive.ST_Point';
create temporary function ST_AsGeoJSON as 'com.esri.hadoop.hive.ST_AsGeoJson';
create temporary function ST_AsText as 'com.esri.hadoop.hive.ST_AsText';
create temporary function ST_X as 'com.esri.hadoop.hive.ST_X';
create temporary function ST_Y as 'com.esri.hadoop.hive.ST_Y';
create temporary function ST_GeomFromText as 'com.esri.hadoop.hive.ST_GeomFromText';
create temporary function ST_Bin as 'com.esri.hadoop.hive.ST_Bin';
create temporary function ST_BinEnvelope as 'com.esri.hadoop.hive.ST_BinEnvelope';
create temporary function ST_Contains as 'com.esri.hadoop.hive.ST_Contains';
create temporary function ST_Within as 'com.esri.hadoop.hive.ST_Within';
create a table bounded in lat and long.
bin trips by zip code
Full list of all the functions here..