Edit
BigData commands
cd ~
cd bin
./quickstart.sh
cd ../data
unzip -p zipcodes.wkt.zip | hdfs dfs -put - /zipcodes/zipcodes.wkt
#all_trips prepared sepately
cat all_trips | hdfs dfs -put - /trips/all_trips
hdfs dfs -mkdir /jars
hdfs dfs -put *.jar /jars
./beeline_connect.sh
#setup initial
!run function-ddl.sql
!run make_trips_table.hql
Setup Trips and then trip_bins
# trip_table.sql
drop table if exists trips_tab;
create table if not exists trips_tab
stored as parquet as
select ST_AsText(ST_Point(plon,plat)) as WKT from trips
where ((plat > 40 and plat <41) and (plon > -75 and plon < -71))
limit 20000;
drop table if exists trip_bins;
create table trip_bins
stored as parquet as
select ST_AsText(ST_BinEnvelope(0.001, t.bin_id)) shape,count(1) as population from (
select ST_Bin(0.001, ST_Point(plon,plat)) bin_id from trips where ((plat > 40 and plat <41) and (plon > -75 and plon < -71))) as t
group by t.bin_id;
%23%23BigData%20commands%0A%0A@%28Postach.io%29%5Bpublished%5D%0A%0A%60%60%60bash%0Acd%20%7E%0Acd%20bin%0A./quickstart.sh%0Acd%20../data%0Aunzip%20-p%20zipcodes.wkt.zip%20%7C%20hdfs%20dfs%20-put%20-%20/zipcodes/zipcodes.wkt%0A%23all_trips%20prepared%20sepately%0Acat%20all_trips%20%7C%20hdfs%20dfs%20-put%20-%20/trips/all_trips%0Ahdfs%20dfs%20-mkdir%20/jars%0Ahdfs%20dfs%20-put%20*.jar%20/jars%0A./beeline_connect.sh%0A%60%60%60%0A%60%60%60sql%0A%09%20%20%23setup%20initial%20%0A%20%20%20%20%20%20%21run%20function-ddl.sql%0A%20%20%20%20%20%20%21run%20make_trips_table.hql%0A%60%60%60%0ASetup%20Trips%20and%20then%20trip_bins%0A%60%60%60sql%0A%23%20trip_table.sql%0Adrop%20table%20if%20exists%20trips_tab%3B%0Acreate%20table%20if%20not%20exists%20trips_tab%0Astored%20as%20parquet%20as%0Aselect%20ST_AsText%28ST_Point%28plon%2Cplat%29%29%20as%20WKT%20from%20trips%0Awhere%20%28%28plat%20%3E%2040%20and%20plat%20%3C41%29%20and%20%28plon%20%3E%20-75%20and%20plon%20%3C%20-71%29%29%20%0Alimit%2020000%3B%0A%0Adrop%20table%20if%20exists%20trip_bins%3B%0Acreate%20table%20trip_bins%0Astored%20as%20parquet%20as%0Aselect%20ST_AsText%28ST_BinEnvelope%280.001%2C%20t.bin_id%29%29%20shape%2Ccount%281%29%20as%20population%20from%20%28%0Aselect%20ST_Bin%280.001%2C%20ST_Point%28plon%2Cplat%29%29%20bin_id%20from%20trips%20where%20%28%28plat%20%3E%2040%20and%20plat%20%3C41%29%20and%20%28plon%20%3E%20-75%20and%20plon%20%3C%20-71%29%29%29%20as%20t%0Agroup%20by%20t.bin_id%3B%0A%60%60%60%0A%5Bgist%20url%3D%22https%3A//gist.github.com/jlovick/28bef2161b306c7514c9ae6489a4c455%22%5D%0A%5Bgist%20url%3D%22https%3A//gist.github.com/jlovick/32d5a55a7ab7d6cb26271d7ed235b7bc%22%5D%0A%5Bgist%20url%3D%22https%3A//gist.github.com/jlovick/c3561b1ea30c18bd44fdbf3434b9e194%22%5D