832 lines
156 KiB
Plaintext
832 lines
156 KiB
Plaintext
[38;5;12m [39m[38;2;255;187;0m[1m[4mAwesome Big Data[0m
|
||
|
||
[38;5;14m[1m![0m[38;5;12mAwesome[39m[38;5;14m[1m (https://cdn.rawgit.com/sindresorhus/awesome/d7305f38d29fed78fa85652e3a63e154dd8e8829/media/badge.svg)[0m[38;5;12m (https://github.com/sindresorhus/awesome)[39m
|
||
|
||
[38;5;12mA[39m[38;5;12m [39m[38;5;12mcurated[39m[38;5;12m [39m[38;5;12mlist[39m[38;5;12m [39m[38;5;12mof[39m[38;5;12m [39m[38;5;12mawesome[39m[38;5;12m [39m[38;5;12mbig[39m[38;5;12m [39m[38;5;12mdata[39m[38;5;12m [39m[38;5;12mframeworks,[39m[38;5;12m [39m[38;5;12mresources[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mother[39m[38;5;12m [39m[38;5;12mawesomeness.[39m[38;5;12m [39m[38;5;12mInspired[39m[38;5;12m [39m[38;5;12mby[39m[38;5;12m [39m[38;5;14m[1mawesome-php[0m[38;5;12m [39m[38;5;12m(https://github.com/ziadoz/awesome-php),[39m[38;5;12m [39m[38;5;14m[1mawesome-python[0m[38;5;12m [39m[38;5;12m(https://github.com/vinta/awesome-python),[39m[38;5;12m [39m[38;5;14m[1mawesome-ruby[0m[38;5;12m [39m
|
||
[38;5;12m(https://github.com/Sdogruyol/awesome-ruby),[39m[38;5;12m [39m[38;5;14m[1mhadoopecosystemtable[0m[38;5;12m [39m[38;5;12m(http://hadoopecosystemtable.github.io/)[39m[38;5;12m [39m[38;5;12m&[39m[38;5;12m [39m[38;5;14m[1mbig-data[0m[38;5;12m [39m[38;5;12m(http://usefulstuff.io/big-data/).[39m
|
||
|
||
[38;5;12mYour contributions are always welcome![39m
|
||
|
||
[38;5;12m- [39m[38;5;14m[1mAwesome Big Data[0m[38;5;12m (#awesome-big-data)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mRDBMS[0m[38;5;12m (#rdbms)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mFrameworks[0m[38;5;12m (#frameworks)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mDistributed Programming[0m[38;5;12m (#distributed-programming)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mDistributed Filesystem[0m[38;5;12m (#distributed-filesystem)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mDistributed Index[0m[38;5;12m (#distributed-index)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mDocument Data Model[0m[38;5;12m (#document-data-model)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mKey Map Data Model[0m[38;5;12m (#key-map-data-model)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mKey-value Data Model[0m[38;5;12m (#key-value-data-model)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mGraph Data Model[0m[38;5;12m (#graph-data-model)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mColumnar Databases[0m[38;5;12m (#columnar-databases)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mNewSQL Databases[0m[38;5;12m (#newsql-databases)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mTime-Series Databases[0m[38;5;12m (#time-series-databases)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mSQL-like processing[0m[38;5;12m (#sql-like-processing)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mData Ingestion[0m[38;5;12m (#data-ingestion)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mService Programming[0m[38;5;12m (#service-programming)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mScheduling[0m[38;5;12m (#scheduling)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mMachine Learning[0m[38;5;12m (#machine-learning)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mBenchmarking[0m[38;5;12m (#benchmarking)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mSecurity[0m[38;5;12m (#security)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mSystem Deployment[0m[38;5;12m (#system-deployment)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mApplications[0m[38;5;12m (#applications)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mSearch engine and framework[0m[38;5;12m (#search-engine-and-framework)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mMySQL forks and evolutions[0m[38;5;12m (#mysql-forks-and-evolutions)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mPostgreSQL forks and evolutions[0m[38;5;12m (#postgresql-forks-and-evolutions)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mMemcached forks and evolutions[0m[38;5;12m (#memcached-forks-and-evolutions)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mEmbedded Databases[0m[38;5;12m (#embedded-databases)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mBusiness Intelligence[0m[38;5;12m (#business-intelligence)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mData Visualization[0m[38;5;12m (#data-visualization)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mInternet of things and sensor data[0m[38;5;12m (#internet-of-things-and-sensor-data)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mInteresting Readings[0m[38;5;12m (#interesting-readings)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mInteresting Papers[0m[38;5;12m (#interesting-papers)[39m
|
||
[48;5;235m[38;5;249m- **2015 - 2016** (#2015---2016)[49m[39m
|
||
[48;5;235m[38;5;249m- **2013 - 2014** (#2013---2014)[49m[39m
|
||
[48;5;235m[38;5;249m- **2011 - 2012** (#2011---2012)[49m[39m
|
||
[48;5;235m[38;5;249m- **2001 - 2010** (#2001---2010)[49m[39m
|
||
[38;5;12m - [39m[38;5;14m[1mVideos[0m[38;5;12m (#videos)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mBooks[0m[38;5;12m (#books)[39m
|
||
[48;5;235m[38;5;249m - **Streaming** (#streaming)[49m[39m[48;5;235m[38;5;249m [49m[39m
|
||
[48;5;235m[38;5;249m - **Distributed systems** (#distributed-systems)[49m[39m[48;5;235m[38;5;249m [49m[39m
|
||
[48;5;235m[38;5;249m - **Graph Based approach** (#graph-based-approach)[49m[39m
|
||
[48;5;235m[38;5;249m- **Data Visualization** (#data-visualization-1)[49m[39m[48;5;235m[38;5;249m [49m[39m
|
||
[38;5;12m- [39m[38;5;14m[1mOther Awesome Lists[0m[38;5;12m (#other-awesome-lists)[39m
|
||
|
||
[38;2;255;187;0m[4mRDBMS[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mMySQL[0m[38;5;12m (https://www.mysql.com/) The world's most popular open source database.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mPostgreSQL[0m[38;5;12m (https://www.postgresql.org/) The world's most advanced open source database.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mOracle Database[0m[38;5;12m (http://www.oracle.com/us/corporate/features/database-12c/index.html) - object-relational database management system.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mTeradata[0m[38;5;12m (http://www.teradata.com/products-and-services/teradata-database/) - high-performance MPP data warehouse platform.[39m
|
||
|
||
[38;2;255;187;0m[4mFrameworks[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mBistro[0m[38;5;12m [39m[38;5;12m(https://github.com/facebook/bistro)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mgeneral-purpose[39m[38;5;12m [39m[38;5;12mdata[39m[38;5;12m [39m[38;5;12mprocessing[39m[38;5;12m [39m[38;5;12mengine[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12mboth[39m[38;5;12m [39m[38;5;12mbatch[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mstream[39m[38;5;12m [39m[38;5;12manalytics.[39m[38;5;12m [39m[38;5;12mIt[39m[38;5;12m [39m[38;5;12mis[39m[38;5;12m [39m[38;5;12mbased[39m[38;5;12m [39m[38;5;12mon[39m[38;5;12m [39m[38;5;12ma[39m[38;5;12m [39m[38;5;12mnovel[39m[38;5;12m [39m[38;5;12mdata[39m[38;5;12m [39m[38;5;12mmodel,[39m[38;5;12m [39m[38;5;12mwhich[39m[38;5;12m [39m[38;5;12mrepresents[39m[38;5;12m [39m[38;5;12mdata[39m[38;5;12m [39m[38;5;12mvia[39m[38;5;12m [39m[48;2;30;30;40m[38;5;13m[3mfunctions[0m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mprocesses[39m[38;5;12m [39m[38;5;12mdata[39m[38;5;12m [39m[38;5;12mvia[39m[38;5;12m [39m[48;2;30;30;40m[38;5;13m[3mcolumn[0m[48;2;30;30;40m[38;5;13m[3m [0m[48;2;30;30;40m[38;5;13m[3moperations[0m[38;5;12m [39m
|
||
[38;5;12mas[39m[38;5;12m [39m[38;5;12mopposed[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12mhaving[39m[38;5;12m [39m[38;5;12monly[39m[38;5;12m [39m[38;5;12mset[39m[38;5;12m [39m[38;5;12moperations[39m[38;5;12m [39m[38;5;12min[39m[38;5;12m [39m[38;5;12mconventional[39m[38;5;12m [39m[38;5;12mapproaches[39m[38;5;12m [39m[38;5;12mlike[39m[38;5;12m [39m[38;5;12mMapReduce[39m[38;5;12m [39m[38;5;12mor[39m[38;5;12m [39m[38;5;12mSQL.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mIBM Streams[0m
|
||
[38;5;12m (https://www.ibm.com/analytics/us/en/technology/stream-computing/) - platform for distributed processing and real-time analytics. Integrates with many of the popular technologies in the Big Data ecosystem (Kafka, HDFS, Spark, etc.)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Hadoop[0m[38;5;12m (http://hadoop.apache.org/) - framework for distributed processing. Integrates MapReduce (parallel processing), YARN (job scheduling) and HDFS (distributed file system).[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mTigon[0m[38;5;12m (https://github.com/caskdata/tigon) - High Throughput Real-time Stream Processing Framework.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mPachyderm[0m[38;5;12m (http://pachyderm.io/) - Pachyderm is a data storage platform built on Docker and Kubernetes to provide reproducible data processing and analysis.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mPolyaxon[0m[38;5;12m (https://github.com/polyaxon/polyaxon) - A platform for reproducible and scalable machine learning and deep learning.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSmooks[0m[38;5;12m (https://github.com/smooks/smooks) - An extensible Java framework for building XML and non-XML (CSV, EDI, Java, etc...) streaming applications.[39m
|
||
|
||
[38;2;255;187;0m[4mDistributed Programming[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mAddThis Hydra[0m[38;5;12m (https://github.com/addthis/hydra) - distributed data processing and storage system originally developed at AddThis.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mAMPLab SIMR[0m[38;5;12m (http://databricks.github.io/simr/) - run Spark on Hadoop MapReduce v1.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache APEX[0m[38;5;12m (https://apex.apache.org/) - a unified, enterprise platform for big data stream and batch processing.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Beam[0m[38;5;12m (https://beam.apache.org/) - an unified model and set of language-specific SDKs for defining and executing data processing workflows.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Crunch[0m[38;5;12m (http://crunch.apache.org/) - a simple Java API for tasks like joining and data aggregation that are tedious to implement on plain MapReduce.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache DataFu[0m[38;5;12m (http://incubator.apache.org/projects/datafu.html) - collection of user-defined functions for Hadoop and Pig developed by LinkedIn.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Flink[0m[38;5;12m (http://flink.apache.org/) - high-performance runtime, and automatic program optimization.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Gearpump[0m[38;5;12m (http://gearpump.apache.org/) - real-time big data streaming engine based on Akka.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Gora[0m[38;5;12m (http://gora.apache.org/) - framework for in-memory data model and persistence.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Hama[0m[38;5;12m (http://hama.apache.org/) - BSP (Bulk Synchronous Parallel) computing framework.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache MapReduce[0m[38;5;12m (https://wiki.apache.org/hadoop/MapReduce/) - programming model for processing large data sets with a parallel, distributed algorithm on a cluster.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Pig[0m[38;5;12m (https://pig.apache.org/) - high level language to express data analysis programs for Hadoop.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache REEF[0m[38;5;12m (http://reef.apache.org/) - retainable evaluator execution framework to simplify and unify the lower layers of big data systems.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache S4[0m[38;5;12m (http://incubator.apache.org/projects/s4.html) - framework for stream processing, implementation of S4.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Spark[0m[38;5;12m (http://spark.apache.org/) - framework for in-memory cluster computing.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Spark Streaming[0m[38;5;12m (https://spark.apache.org/docs/latest/streaming-programming-guide.html) - framework for stream processing, part of Spark.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Storm[0m[38;5;12m (http://storm.apache.org) - framework for stream processing by Twitter also on YARN.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Samza[0m[38;5;12m (http://samza.apache.org/) - stream processing framework, based on Kafka and YARN.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Tez[0m[38;5;12m (http://tez.apache.org/) - application framework for executing a complex DAG (directed acyclic graph) of tasks, built on YARN.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Twill[0m[38;5;12m (https://incubator.apache.org/projects/twill.html) - abstraction over YARN that reduces the complexity of developing distributed applications.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mBaidu Bigflow[0m[38;5;12m (http://bigflow.cloud/en/index.html) - an interface that allows for writing distributed computing programs providing lots of simple, flexible, powerful APIs to easily handle data of any scale.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mCascalog[0m[38;5;12m (http://cascalog.org/) - data processing and querying library.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mCheetah[0m[38;5;12m (http://vldbarc.org/pvldb/vldb2010/pvldb_vol3/I08.pdf) - High Performance, Custom Data Warehouse on Top of MapReduce.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mConcurrent Cascading[0m[38;5;12m (http://www.cascading.org/) - framework for data management/analytics on Hadoop.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDamballa Parkour[0m[38;5;12m (https://github.com/damballa/parkour) - MapReduce library for Clojure.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDatasalt Pangool[0m[38;5;12m (https://github.com/datasalt/pangool) - alternative MapReduce paradigm.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDataTorrent StrAM[0m
|
||
[38;5;12m (https://www.datatorrent.com/) - real-time engine is designed to enable distributed, asynchronous, real time in-memory big-data computations in as unblocked a way as possible, with minimal overhead and impact on performance.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mFacebook Corona[0m[38;5;12m (https://www.facebook.com/notes/facebook-engineering/under-the-hood-scheduling-mapreduce-jobs-more-efficiently-with-corona/10151142560538920) - Hadoop enhancement which removes single point of failure.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mFacebook Peregrine[0m[38;5;12m (http://peregrine_mapreduce.bitbucket.org/) - Map Reduce framework.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mFacebook Scuba[0m[38;5;12m (https://www.facebook.com/notes/facebook-engineering/under-the-hood-data-diving-with-scuba/10150599692628920) - distributed in-memory datastore.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mGoogle Dataflow[0m[38;5;12m (https://googledevelopers.blogspot.it/2014/06/cloud-platform-at-google-io-new-big.html) - create data pipelines to help themæingest, transform and analyze data.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mGoogle MapReduce[0m[38;5;12m (https://research.google.com/archive/mapreduce.html) - map reduce framework.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mGoogle MillWheel[0m[38;5;12m (https://research.google.com/pubs/pub41378.html) - fault tolerant stream processing framework.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mIBM Streams[0m[38;5;12m (https://www.ibm.com/analytics/us/en/technology/stream-computing/) - platform for distributed processing and real-time analytics. Provides toolkits for advanced analytics like geospatial, time series, etc. out of the box.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mJAQL[0m[38;5;12m (https://code.google.com/p/jaql/) - declarative programming language for working with structured, semi-structured and unstructured data.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mKite[0m[38;5;12m (http://kitesdk.org/docs/current/) - is a set of libraries, tools, examples, and documentation focused on making it easier to build systems on top of the Hadoop ecosystem.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mMetamarkets Druid[0m[38;5;12m (http://druid.io/) - framework for real-time analysis of large datasets.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNetflix PigPen[0m[38;5;12m (https://github.com/Netflix/PigPen) - map-reduce for Clojure which compiles to Apache Pig.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNokia Disco[0m[38;5;12m (http://discoproject.org/) - MapReduce framework developed by Nokia.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mOnyx[0m[38;5;12m (http://www.onyxplatform.org/) - Distributed computation for the cloud.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mPinterest Pinlater[0m[38;5;12m (https://medium.com/@Pinterest_Engineering/pinlater-an-asynchronous-job-execution-system-b8664cb8aa7d) - asynchronous job execution system.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mPydoop[0m[38;5;12m (http://crs4.github.io/pydoop/) - Python MapReduce and HDFS API for Hadoop.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mRay[0m[38;5;12m (https://github.com/ray-project/ray) - A fast and simple framework for building and running distributed applications. [39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mRackerlabs Blueflood[0m[38;5;12m (http://blueflood.io/) - multi-tenant distributed metric processing system[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSkale[0m[38;5;12m (https://github.com/skale-me/skale-engine) - High performance distributed data processing in NodeJS.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mStratosphere[0m[38;5;12m (http://stratosphere.eu/) - general purpose cluster computing framework.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mStreamdrill[0m[38;5;12m (https://streamdrill.com/) - useful for counting activities of event streams over different time windows and finding the most active one.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mstreamsx.topology[0m[38;5;12m (https://github.com/IBMStreams/streamsx.topology) - Libraries to enable building IBM Streams application in Java, Python or Scala.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mTuktu[0m[38;5;12m (https://github.com/UnderstandLingBV/Tuktu) - Easy-to-use platform for batch and streaming computation, built using Scala, Akka and Play![39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mTwitter Heron[0m[38;5;12m (https://github.com/twitter/heron) - Heron is a realtime, distributed, fault-tolerant stream processing engine from Twitter replacing Storm.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mTwitter Scalding[0m[38;5;12m (https://github.com/twitter/scalding) - Scala library for Map Reduce jobs, built on Cascading.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mTwitter Summingbird[0m[38;5;12m (https://github.com/twitter/summingbird) - Streaming MapReduce with Scalding and Storm, by Twitter.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mTwitter TSAR[0m[38;5;12m (https://blog.twitter.com/engineering/en_us/a/2014/tsar-a-timeseries-aggregator.html) - TimeSeries AggregatoR by Twitter.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mWallaroo[0m[38;5;12m (http://www.wallaroolabs.com/community) - The ultrafast and elastic data processing engine. Big or fast data - no fuss, no Java needed.[39m
|
||
|
||
[38;2;255;187;0m[4mDistributed Filesystem[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mAmbry[0m[38;5;12m (https://github.com/linkedin/ambry) - a distributed object store that supports storage of trillion of small immutable objects as well as billions of large objects.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache HDFS[0m[38;5;12m (http://hadoop.apache.org/) - a way to store large files across multiple machines.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Kudu[0m[38;5;12m (http://kudu.apache.org/) - Hadoop's storage layer to enable fast analytics on fast data.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mBeeGFS[0m[38;5;12m (https://www.beegfs.io/content/) - formerly FhGFS, parallel distributed file system.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mCeph Filesystem[0m[38;5;12m (http://ceph.com/ceph-storage/file-system/) - software storage platform designed.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDisco DDFS[0m[38;5;12m (http://disco.readthedocs.org/en/latest/howto/ddfs.html) - distributed filesystem.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mFacebook Haystack[0m[38;5;12m (https://www.facebook.com/note.php?note_id=76191543919) - object storage system.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mGoogle GFS[0m[38;5;12m (http://static.googleusercontent.com/media/research.google.com/en//archive/gfs-sosp2003.pdf) - distributed filesystem.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mGoogle Megastore[0m[38;5;12m (https://research.google.com/pubs/pub36971.html) - scalable, highly available storage.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mGridGain[0m[38;5;12m (https://www.gridgain.com/) - GGFS, Hadoop compliant in-memory file system.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mLustre file system[0m[38;5;12m (http://wiki.lustre.org/) - high-performance distributed filesystem.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mMicrosoft Azure Data Lake Store[0m[38;5;12m (https://hadoop.apache.org/docs/current/hadoop-azure-datalake/index.html) - HDFS-compatible storage in Azure cloud[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mQuantcast File System QFS[0m[38;5;12m (https://www.quantcast.com/about-us/quantcast-file-system/) - open-source distributed file system.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mRed Hat GlusterFS[0m[38;5;12m (http://gluster.org/) - scale-out network-attached storage file system.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSeaweed-FS[0m[38;5;12m (https://github.com/chrislusf/seaweedfs) - simple and highly scalable distributed file system.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mAlluxio[0m[38;5;12m (http://www.alluxio.org/) - reliable file sharing at memory speed across cluster frameworks.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mTahoe-LAFS[0m[38;5;12m (https://www.tahoe-lafs.org/trac/tahoe-lafs) - decentralized cloud storage system.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mBaidu File System[0m[38;5;12m (https://github.com/baidu/bfs) - distributed filesystem.[39m
|
||
|
||
[38;2;255;187;0m[4mDistributed Index[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mPilosa[0m[38;5;12m (https://github.com/pilosa/pilosa) Open source distributed bitmap index that dramatically accelerates queries across multiple, massive data sets. [39m
|
||
|
||
[38;2;255;187;0m[4mDocument Data Model[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mActian Versant[0m[38;5;12m (https://www.actian.com/data-management/ingres-sql-rdbms/) - commercial object-oriented database management systems .[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mCrate Data[0m[38;5;12m (https://crate.io/) - is an open source massively scalable data store. It requires zero administration.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mFacebook Apollo[0m[38;5;12m (http://www.infoq.com/news/2014/06/facebook-apollo) - Facebook’s Paxos-like NoSQL database.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mjumboDB[0m[38;5;12m (http://comsysto.github.io/jumbodb/) - document oriented datastore over Hadoop.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mLinkedIn Espresso[0m[38;5;12m (https://engineering.linkedin.com/data) - horizontally scalable document-oriented NoSQL data store.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mMarkLogic[0m[38;5;12m (http://www.marklogic.com/) - Schema-agnostic Enterprise NoSQL database technology.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mMicrosoft Azure DocumentDB[0m[38;5;12m (https://azure.microsoft.com/en-us/services/cosmos-db/) - NoSQL cloud database service with protocol support for MongoDB [39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mMongoDB[0m[38;5;12m (https://www.mongodb.com/) - Document-oriented database system.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mRavenDB[0m[38;5;12m (https://ravendb.net/) - A transactional, open-source Document Database.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mRethinkDB[0m[38;5;12m (https://rethinkdb.com/) - document database that supports queries like table joins and group by.[39m
|
||
|
||
[38;2;255;187;0m[4mKey Map Data Model[0m
|
||
|
||
[38;5;14m[1mNote[0m[38;5;12m:[39m[38;5;12m [39m[38;5;12mThere[39m[38;5;12m [39m[38;5;12mis[39m[38;5;12m [39m[38;5;12msome[39m[38;5;12m [39m[38;5;12mterm[39m[38;5;12m [39m[38;5;12mconfusion[39m[38;5;12m [39m[38;5;12min[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12mindustry,[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mtwo[39m[38;5;12m [39m[38;5;12mdifferent[39m[38;5;12m [39m[38;5;12mthings[39m[38;5;12m [39m[38;5;12mare[39m[38;5;12m [39m[38;5;12mcalled[39m[38;5;12m [39m[38;5;12m"Columnar[39m[38;5;12m [39m[38;5;12mDatabases".[39m[38;5;12m [39m[38;5;12mSome,[39m[38;5;12m [39m[38;5;12mlisted[39m[38;5;12m [39m[38;5;12mhere,[39m[38;5;12m [39m[38;5;12mare[39m[38;5;12m [39m[38;5;12mdistributed,[39m[38;5;12m [39m[38;5;12mpersistent[39m[38;5;12m [39m[38;5;12mdatabases[39m[38;5;12m [39m[38;5;12mbuilt[39m[38;5;12m [39m[38;5;12maround[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12m"key-map"[39m[38;5;12m [39m[38;5;12mdata[39m[38;5;12m [39m[38;5;12mmodel:[39m[38;5;12m [39m[38;5;12mall[39m[38;5;12m [39m[38;5;12mdata[39m[38;5;12m [39m[38;5;12mhas[39m[38;5;12m [39m[38;5;12ma[39m[38;5;12m [39m[38;5;12m(possibly[39m[38;5;12m [39m
|
||
[38;5;12mcomposite)[39m[38;5;12m [39m[38;5;12mkey,[39m[38;5;12m [39m[38;5;12mwith[39m[38;5;12m [39m[38;5;12mwhich[39m[38;5;12m [39m[38;5;12ma[39m[38;5;12m [39m[38;5;12mmap[39m[38;5;12m [39m[38;5;12mof[39m[38;5;12m [39m[38;5;12mkey-value[39m[38;5;12m [39m[38;5;12mpairs[39m[38;5;12m [39m[38;5;12mis[39m[38;5;12m [39m[38;5;12massociated.[39m[38;5;12m [39m[38;5;12mIn[39m[38;5;12m [39m[38;5;12msome[39m[38;5;12m [39m[38;5;12msystems,[39m[38;5;12m [39m[38;5;12mmultiple[39m[38;5;12m [39m[38;5;12msuch[39m[38;5;12m [39m[38;5;12mvalue[39m[38;5;12m [39m[38;5;12mmaps[39m[38;5;12m [39m[38;5;12mcan[39m[38;5;12m [39m[38;5;12mbe[39m[38;5;12m [39m[38;5;12massociated[39m[38;5;12m [39m[38;5;12mwith[39m[38;5;12m [39m[38;5;12ma[39m[38;5;12m [39m[38;5;12mkey,[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mthese[39m[38;5;12m [39m[38;5;12mmaps[39m[38;5;12m [39m[38;5;12mare[39m[38;5;12m [39m[38;5;12mreferred[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12mas[39m[38;5;12m [39m[38;5;12m"column[39m[38;5;12m [39m[38;5;12mfamilies"[39m[38;5;12m [39m[38;5;12m(with[39m[38;5;12m [39m[38;5;12mvalue[39m[38;5;12m [39m[38;5;12mmap[39m[38;5;12m [39m[38;5;12mkeys[39m[38;5;12m [39m[38;5;12mbeing[39m[38;5;12m [39m[38;5;12mreferred[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12mas[39m[38;5;12m [39m
|
||
[38;5;12m"columns").[39m
|
||
|
||
[38;5;12mAnother[39m[38;5;12m [39m[38;5;12mgroup[39m[38;5;12m [39m[38;5;12mof[39m[38;5;12m [39m[38;5;12mtechnologies[39m[38;5;12m [39m[38;5;12mthat[39m[38;5;12m [39m[38;5;12mcan[39m[38;5;12m [39m[38;5;12malso[39m[38;5;12m [39m[38;5;12mbe[39m[38;5;12m [39m[38;5;12mcalled[39m[38;5;12m [39m[38;5;12m"columnar[39m[38;5;12m [39m[38;5;12mdatabases"[39m[38;5;12m [39m[38;5;12mis[39m[38;5;12m [39m[38;5;12mdistinguished[39m[38;5;12m [39m[38;5;12mby[39m[38;5;12m [39m[38;5;12mhow[39m[38;5;12m [39m[38;5;12mit[39m[38;5;12m [39m[38;5;12mstores[39m[38;5;12m [39m[38;5;12mdata,[39m[38;5;12m [39m[38;5;12mon[39m[38;5;12m [39m[38;5;12mdisk[39m[38;5;12m [39m[38;5;12mor[39m[38;5;12m [39m[38;5;12min[39m[38;5;12m [39m[38;5;12mmemory[39m[38;5;12m [39m[38;5;12m--[39m[38;5;12m [39m[38;5;12mrather[39m[38;5;12m [39m[38;5;12mthan[39m[38;5;12m [39m[38;5;12mstoring[39m[38;5;12m [39m[38;5;12mdata[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12mtraditional[39m[38;5;12m [39m[38;5;12mway,[39m[38;5;12m [39m[38;5;12mwhere[39m[38;5;12m [39m[38;5;12mall[39m[38;5;12m [39m[38;5;12mcolumn[39m[38;5;12m [39m[38;5;12mvalues[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12ma[39m[38;5;12m [39m[38;5;12mgiven[39m[38;5;12m [39m[38;5;12mkey[39m[38;5;12m [39m[38;5;12mare[39m[38;5;12m [39m[38;5;12mstored[39m[38;5;12m [39m
|
||
[38;5;12mnext[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12meach[39m[38;5;12m [39m[38;5;12mother,[39m[38;5;12m [39m[38;5;12m"row[39m[38;5;12m [39m[38;5;12mby[39m[38;5;12m [39m[38;5;12mrow",[39m[38;5;12m [39m[38;5;12mthese[39m[38;5;12m [39m[38;5;12msystems[39m[38;5;12m [39m[38;5;12mstore[39m[38;5;12m [39m[38;5;12mall[39m[38;5;12m [39m[48;2;30;30;40m[38;5;13m[3mcolumn[0m[38;5;12m [39m[38;5;12mvalues[39m[38;5;12m [39m[38;5;12mnext[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12meach[39m[38;5;12m [39m[38;5;12mother.[39m[38;5;12m [39m[38;5;12mSo[39m[38;5;12m [39m[38;5;12mmore[39m[38;5;12m [39m[38;5;12mwork[39m[38;5;12m [39m[38;5;12mis[39m[38;5;12m [39m[38;5;12mneeded[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12mget[39m[38;5;12m [39m[38;5;12mall[39m[38;5;12m [39m[38;5;12mcolumns[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12ma[39m[38;5;12m [39m[38;5;12mgiven[39m[38;5;12m [39m[38;5;12mkey,[39m[38;5;12m [39m[38;5;12mbut[39m[38;5;12m [39m[38;5;12mless[39m[38;5;12m [39m[38;5;12mwork[39m[38;5;12m [39m[38;5;12mis[39m[38;5;12m [39m[38;5;12mneeded[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12mget[39m[38;5;12m [39m[38;5;12mall[39m[38;5;12m [39m[38;5;12mvalues[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12ma[39m[38;5;12m [39m[38;5;12mgiven[39m[38;5;12m [39m[38;5;12mcolumn.[39m
|
||
|
||
[38;5;12mThe former group is referred to as "key map data model" here. The line between these and the [39m[38;5;14m[1mKey-value Data Model[0m[38;5;12m (#key-value-data-model) stores is fairly blurry.[39m
|
||
|
||
[38;5;12mThe latter, being more about the storage format than about the data model, is listed under [39m[38;5;14m[1mColumnar Databases[0m[38;5;12m (#columnar-databases).[39m
|
||
|
||
[38;5;12mYou can read more about this distinction on Prof. Daniel Abadi's blog: [39m[38;5;14m[1mDistinguishing two major types of Column Stores[0m[38;5;12m (http://dbmsmusings.blogspot.com/2010/03/distinguishing-two-major-types-of_29.html).[39m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Accumulo[0m[38;5;12m (http://accumulo.apache.org/) - distributed key/value store, built on Hadoop.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Cassandra[0m[38;5;12m (http://cassandra.apache.org/) - column-oriented distributed datastore, inspired by BigTable.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache HBase[0m[38;5;12m (http://hbase.apache.org/) - column-oriented distributed datastore, inspired by BigTable.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mBaidu Tera[0m[38;5;12m (https://github.com/baidu/tera) - an Internet-scale database, inspired by BigTable.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mFacebook HydraBase[0m[38;5;12m (https://code.facebook.com/posts/321111638043166/hydrabase-the-evolution-of-hbase-facebook/) - evolution of HBase made by Facebook.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mGoogle BigTable[0m[38;5;12m (http://static.googleusercontent.com/media/research.google.com/en//archive/bigtable-osdi06.pdf) - column-oriented distributed datastore.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mGoogle Cloud Datastore[0m[38;5;12m (https://cloud.google.com/datastore/docs/concepts/overview) - is a fully managed, schemaless database for storing non-relational data over BigTable.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mHypertable[0m[38;5;12m (http://www.hypertable.org/) - column-oriented distributed datastore, inspired by BigTable.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mInfiniDB[0m[38;5;12m (https://github.com/infinidb/infinidb/) - is accessed through a MySQL interface and use massive parallel processing to parallelize queries.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mTephra[0m[38;5;12m (https://github.com/caskdata/tephra) - Transactions for HBase.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mTwitter Manhattan[0m[38;5;12m (https://blog.twitter.com/engineering/en_us/a/2014/manhattan-our-real-time-multi-tenant-distributed-database-for-twitter-scale.html) - real-time, multi-tenant distributed database for Twitter scale.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mScyllaDB[0m[38;5;12m (http://www.scylladb.com/) - column-oriented distributed datastore written in C++, totally compatible with Apache Cassandra.[39m
|
||
|
||
|
||
[38;2;255;187;0m[4mKey-value Data Model[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mAerospike[0m[38;5;12m (http://www.aerospike.com/) - NoSQL flash-optimized, in-memory. Open source and "Server code in 'C' (not Java or Erlang) precisely tuned to avoid context switching and memory copies."[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mAmazon DynamoDB[0m[38;5;12m (https://aws.amazon.com/dynamodb/) - distributed key/value store, implementation of Dynamo paper.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mBadger[0m[38;5;12m (https://open.dgraph.io/post/badger/) - a fast, simple, efficient, and persistent key-value store written natively in Go.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mBolt[0m[38;5;12m (https://github.com/boltdb/bolt) - an embedded key-value database for Go.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mBTDB[0m[38;5;12m (https://github.com/Bobris/BTDB) - Key Value Database in .Net with Object DB Layer, RPC, dynamic IL and much more[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mBuntDB[0m[38;5;12m (https://github.com/tidwall/buntdb) - a fast, embeddable, in-memory key/value database for Go with custom indexing and geospatial support.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mEdis[0m[38;5;12m (https://github.com/cbd/edis) - is a protocol-compatible Server replacement for Redis.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mElephantDB[0m[38;5;12m (https://github.com/nathanmarz/elephantdb) - Distributed database specialized in exporting data from Hadoop.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mEventStore[0m[38;5;12m (https://geteventstore.com/) - distributed time series database.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mGhostDB[0m[38;5;12m (https://github.com/jakekgrog/GhostDB) - a distributed, in-memory, general purpose key-value data store that delivers microsecond performance at any scale.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mGraviton[0m[38;5;12m (https://github.com/deroproject/graviton) - a simple, fast, versioned, authenticated, embeddable key-value store database in pure Go(lang).[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mGridDB[0m[38;5;12m (https://github.com/griddb/griddb_nosql) - suitable for sensor data stored in a timeseries.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mHyperDex[0m[38;5;12m (https://github.com/rescrv/HyperDex) - a scalable, next generation key-value and document store with a wide array of features, including consistency, fault tolerance and high performance.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mIgnite[0m[38;5;12m (https://ignite.apache.org/index.html) - is an in-memory key-value data store providing full SQL-compliant data access that can optionally be backed by disk storage.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mLinkedIn Krati[0m[38;5;12m (https://github.com/linkedin-sna/sna-page/tree/master/krati) - is a simple persistent data store with very low latency and high throughput.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mLinkedin Voldemort[0m[38;5;12m (http://www.project-voldemort.com/voldemort/) - distributed key/value storage system.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mOracle NoSQL Database[0m[38;5;12m (http://www.oracle.com/technetwork/database/database-technologies/nosqldb/overview/index.html) - distributed key-value database by Oracle Corporation.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mRedis[0m[38;5;12m (https://redis.io/) - in memory key value datastore.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mRiak[0m[38;5;12m (https://github.com/basho/riak) - a decentralized datastore.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mStorehaus[0m[38;5;12m (https://github.com/twitter/storehaus) - library to work with asynchronous key value stores, by Twitter.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSummitDB[0m[38;5;12m (https://github.com/tidwall/summitdb) - an in-memory, NoSQL key/value database, with disk persistance and using the Raft consensus algorithm.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mTarantool[0m[38;5;12m (https://github.com/tarantool/tarantool) - an efficient NoSQL database and a Lua application server.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mTiKV[0m[38;5;12m (https://github.com/pingcap/tikv) - a distributed key-value database powered by Rust and inspired by Google Spanner and HBase.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mTile38[0m[38;5;12m (https://github.com/tidwall/tile38) - a geolocation data store, spatial index, and realtime geofence, supporting a variety of object types including latitude/longitude points, bounding boxes, XYZ tiles, Geohashes, and GeoJSON[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mTreodeDB[0m[38;5;12m (https://github.com/Treode/store) - key-value store that's replicated and sharded and provides atomic multirow writes.[39m
|
||
|
||
|
||
[38;2;255;187;0m[4mGraph Data Model[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mAgensGraph[0m[38;5;12m (http://www.agensgraph.com/) - a new generation multi-model graph database for the modern complex data environment.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Giraph[0m[38;5;12m (http://giraph.apache.org/) - implementation of Pregel, based on Hadoop.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Spark Bagel[0m[38;5;12m (http://spark.apache.org/docs/0.7.3/bagel-programming-guide.html) - implementation of Pregel, part of Spark.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mArangoDB[0m[38;5;12m (https://www.arangodb.com/) - multi model distributed database.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDGraph[0m[38;5;12m [39m[38;5;12m(https://github.com/dgraph-io/dgraph)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mA[39m[38;5;12m [39m[38;5;12mscalable,[39m[38;5;12m [39m[38;5;12mdistributed,[39m[38;5;12m [39m[38;5;12mlow[39m[38;5;12m [39m[38;5;12mlatency,[39m[38;5;12m [39m[38;5;12mhigh[39m[38;5;12m [39m[38;5;12mthroughput[39m[38;5;12m [39m[38;5;12mgraph[39m[38;5;12m [39m[38;5;12mdatabase[39m[38;5;12m [39m[38;5;12maimed[39m[38;5;12m [39m[38;5;12mat[39m[38;5;12m [39m[38;5;12mproviding[39m[38;5;12m [39m[38;5;12mGoogle[39m[38;5;12m [39m[38;5;12mproduction[39m[38;5;12m [39m[38;5;12mlevel[39m[38;5;12m [39m[38;5;12mscale[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mthroughput,[39m[38;5;12m [39m[38;5;12mwith[39m[38;5;12m [39m[38;5;12mlow[39m[38;5;12m [39m[38;5;12menough[39m[38;5;12m [39m[38;5;12mlatency[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12mbe[39m[38;5;12m [39m[38;5;12mserving[39m[38;5;12m [39m[38;5;12mreal[39m[38;5;12m [39m[38;5;12mtime[39m[38;5;12m [39m[38;5;12muser[39m[38;5;12m [39m
|
||
[38;5;12mqueries,[39m[38;5;12m [39m[38;5;12mover[39m[38;5;12m [39m[38;5;12mterabytes[39m[38;5;12m [39m[38;5;12mof[39m[38;5;12m [39m[38;5;12mstructured[39m[38;5;12m [39m[38;5;12mdata.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mEliasDB[0m[38;5;12m (https://github.com/krotik/eliasdb) - a lightweight graph based database that does not require any third-party libraries.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mFacebook TAO[0m[38;5;12m (https://www.facebook.com/notes/facebook-engineering/tao-the-power-of-the-graph/10151525983993920) - TAO is the distributed data store that is widely used at facebook to store and serve the social graph.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mGCHQ Gaffer[0m[38;5;12m (https://github.com/gchq/Gaffer) - Gaffer by GCHQ is a framework that makes it easy to store large-scale graphs in which the nodes and edges have statistics.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mGoogle Cayley[0m[38;5;12m (https://github.com/cayleygraph/cayley) - open-source graph database.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mGoogle Pregel[0m[38;5;12m (http://kowshik.github.io/JPregel/pregel_paper.pdf) - graph processing framework.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mGraphLab PowerGraph[0m[38;5;12m (https://turi.com/products/create/docs/) - a core C++ GraphLab API and a collection of high-performance machine learning and data mining toolkits built on top of the GraphLab API.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mGraphX[0m[38;5;12m (https://amplab.cs.berkeley.edu/publication/graphx-grades/) - resilient Distributed Graph System on Spark.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mGremlin[0m[38;5;12m (https://github.com/tinkerpop/gremlin) - graph traversal Language.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mInfovore[0m[38;5;12m (https://github.com/paulhoule/infovore) - RDF-centric Map/Reduce framework.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mIntel GraphBuilder[0m[38;5;12m (https://01.org/graphbuilder/) - tools to construct large-scale graphs on top of Hadoop.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mJanusGraph[0m[38;5;12m (http://janusgraph.org) - open-source, distributed graph database[39m
|
||
[38;5;12m with multiple options for storage backends (Bigtable, HBase, Cassandra, etc.)[39m
|
||
[38;5;12m and indexing backends (Elasticsearch, Solr, Lucene).[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mMapGraph[0m[38;5;12m (https://www.blazegraph.com/mapgraph-technology/) - Massively Parallel Graph processing on GPUs.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mMicrosoft Graph Engine[0m[38;5;12m (https://github.com/Microsoft/GraphEngine) - a distributed in-memory data processing engine, underpinned by a strongly-typed in-memory key-value store and a general distributed computation engine.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNeo4j[0m[38;5;12m (https://neo4j.com/) - graph database written entirely in Java.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mOrientDB[0m[38;5;12m (http://orientdb.com/) - document and graph database.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mPhoebus[0m[38;5;12m (https://github.com/xslogic/phoebus) - framework for large scale graph processing.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mTitan[0m[38;5;12m (http://thinkaurelius.github.io/titan/) - distributed graph database, built over Cassandra.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mTwitter FlockDB[0m[38;5;12m (https://github.com/twitter-archive/flockdb) - distributed graph database.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNodeXL[0m[38;5;12m (https://nodexl.codeplex.com/) - A free, open-source template for Microsoft® Excel® 2007, 2010, 2013 and 2016 that makes it easy to explore network graphs.[39m
|
||
|
||
|
||
[38;2;255;187;0m[4mColumnar Databases[0m
|
||
|
||
[38;5;14m[1mNote[0m[38;5;12m please read the note on [39m[38;5;14m[1mKey-Map Data Model[0m[38;5;12m (#key-map-data-model) section.[39m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mColumnar Storage[0m[38;5;12m (http://the-paper-trail.org/blog/columnar-storage/) - an explanation of what columnar storage is and when you might want it.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mActian Vector[0m[38;5;12m (http://www.actian.com/) - column-oriented analytic database.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mClickHouse[0m[38;5;12m (https://clickhouse.yandex/) - an open-source column-oriented database management system that allows generating analytical data reports in real time.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mEventQL[0m[38;5;12m (http://eventql.io/) - a distributed, column-oriented database built for large-scale event collection and analytics.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mMonetDB[0m[38;5;12m (https://www.monetdb.org/) - column store database.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mParquet[0m[38;5;12m (http://parquet.apache.org/) - columnar storage format for Hadoop.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mPivotal Greenplum[0m[38;5;12m (https://pivotal.io/pivotal-greenplum) - purpose-built, dedicated analytic data warehouse that offers a columnar engine as well as a traditional row-based one.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mVertica[0m[38;5;12m (https://www.vertica.com/) - is designed to manage large, fast-growing volumes of data and provide very fast query performance when used for data warehouses.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSQream DB[0m[38;5;12m (http://sqream.com/) - A GPU powered big data database, designed for analytics and data warehousing, with ANSI-92 compliant SQL, suitable for data sets from 10TB to 1PB.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mGoogle BigQuery[0m[38;5;12m (https://cloud.google.com/bigquery/what-is-bigquery) - Google's cloud offering backed by their pioneering work on Dremel.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mAmazon Redshift[0m[38;5;12m (https://aws.amazon.com/redshift/) - Amazon's cloud offering, also based on a columnar datastore backend.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mIndexR[0m[38;5;12m (https://github.com/shunfei/indexr) - an open-source columnar storage format for fast & realtime analytic with big data.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mLocustDB[0m[38;5;12m (https://github.com/cswinter/LocustDB) - an experimental analytics database aiming to set a new standard for query performance on commodity hardware. [39m
|
||
|
||
[38;2;255;187;0m[4mNewSQL Databases[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mActian Ingres[0m[38;5;12m (http://www.actian.com/products/operational-databases/) - commercially supported, open-source SQL relational database management system.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mActorDB[0m[38;5;12m (https://github.com/biokoda/actordb) - a distributed SQL database with the scalability of a KV store, while keeping the query capabilities of a relational database.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mAmazon RedShift[0m[38;5;12m (http://aws.amazon.com/redshift/) - data warehouse service, based on PostgreSQL.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mBayesDB[0m[38;5;12m (https://github.com/probcomp/BayesDB) - statistic oriented SQL database.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mBedrock[0m[38;5;12m (http://bedrockdb.com/) - a simple, modular, networked and distributed transaction layer built atop SQLite.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mCitusDB[0m[38;5;12m (https://www.citusdata.com/) - scales out PostgreSQL through sharding and replication.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mCockroach[0m[38;5;12m (https://github.com/cockroachdb/cockroach) - Scalable, Geo-Replicated, Transactional Datastore.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mComdb2[0m[38;5;12m (https://github.com/bloomberg/comdb2) - a clustered RDBMS built on optimistic concurrency control techniques.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDatomic[0m[38;5;12m (http://www.datomic.com/) - distributed database designed to enable scalable, flexible and intelligent applications.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mFoundationDB[0m[38;5;12m (https://foundationdb.com/) - distributed database, inspired by F1.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mGoogle F1[0m[38;5;12m (https://research.google.com/pubs/pub41344.html) - distributed SQL database built on Spanner.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mGoogle Spanner[0m[38;5;12m (https://research.google.com/archive/spanner.html) - globally distributed semi-relational database.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mH-Store[0m[38;5;12m (http://hstore.cs.brown.edu/) - is an experimental main-memory, parallel database management system that is optimized for on-line transaction processing (OLTP) applications.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mHaeinsa[0m[38;5;12m (https://github.com/VCNC/haeinsa) - linearly scalable multi-row, multi-table transaction library for HBase based on Percolator.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mHandlerSocket[0m[38;5;12m (https://www.percona.com/doc/percona-server/5.5/performance/handlersocket.html) - NoSQL plugin for MySQL/MariaDB.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mInfiniSQL[0m[38;5;12m (http://www.infinisql.org/) - infinity scalable RDBMS.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mKarelDB[0m[38;5;12m (https://github.com/rayokota/kareldb) - a relational database backed by Apache Kafka.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mMap-D[0m[38;5;12m (https://www.mapd.com/) - GPU in-memory database, big data analysis and visualization platform.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mMemSQL[0m[38;5;12m (http://www.memsql.com/) - in memory SQL database witho optimized columnar storage on flash.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNuoDB[0m[38;5;12m (http://www.nuodb.com/) - SQL/ACID compliant distributed database.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mOracle TimesTen in-Memory Database[0m[38;5;12m (http://www.oracle.com/technetwork/database/database-technologies/timesten/overview/index.html) - in-memory, relational database management system with persistence and recoverability.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mPivotal GemFire XD[0m[38;5;12m (http://gemfirexd.docs.pivotal.io/latest/) - Low-latency, in-memory, distributed SQL data store. Provides SQL interface to in-memory table data, persistable in HDFS.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSAP HANA[0m[38;5;12m (https://hana.sap.com/abouthana.html) - is an in-memory, column-oriented, relational database management system.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSenseiDB[0m[38;5;12m (http://senseidb.github.io/sensei/) - distributed, realtime, semi-structured database.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSky[0m[38;5;12m (http://skydb.io/) - database used for flexible, high performance analysis of behavioral data.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSymmetricDS[0m[38;5;12m (http://www.symmetricds.org/) - open source software for both file and database synchronization.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mTiDB[0m[38;5;12m (https://github.com/pingcap/tidb) - TiDB is a distributed SQL database. Inspired by the design of Google F1.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mVoltDB[0m[38;5;12m (https://www.voltdb.com/) - claims to be fastest in-memory database.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1myugabyteDB[0m[38;5;12m (https://github.com/YugaByte/yugabyte-db) - open source, high-performance, distributed SQL database compatible with PostgreSQL.[39m
|
||
|
||
[38;2;255;187;0m[4mTime-Series Databases[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mAxibase Time Series Database[0m[38;5;12m (http://axibase.com/products/axibase-time-series-database/) - Integrated time series database on top of HBase with built-in visualization, rule-engine and SQL support.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mChronix[0m[38;5;12m (http://chronix.io/) - a time series storage built to store time series highly compressed and for fast access times.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mCube[0m[38;5;12m (http://square.github.io/cube/) - uses MongoDB to store time series data.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mHeroic[0m[38;5;12m (https://spotify.github.io/heroic/#!/index) - is a scalable time series database based on Cassandra and Elasticsearch.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mInfluxDB[0m[38;5;12m (https://www.influxdata.com/) - a time series database with optimised IO and queries, supports pgsql and influx wire protocols.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mQuestDB[0m[38;5;12m (https://questdb.io/) - high-performance, open-source SQL database for applications in financial services, IoT, machine learning, DevOps and observability.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mIronDB[0m[38;5;12m (https://www.circonus.com/irondb/) - scalable, general-purpose time series database.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mKairosdb[0m[38;5;12m (https://github.com/kairosdb/kairosdb) - similar to OpenTSDB but allows for Cassandra.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mM3DB[0m[38;5;12m (http://m3db.github.io/m3/m3db/) - a distributed time series database that can be used for storing realtime metrics at long retention.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNewts[0m[38;5;12m (https://opennms.github.io/newts/) - a time series database based on Apache Cassandra.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mTDengine[0m[38;5;12m (https://github.com/taosdata/TDengine/) - a time series database in C utilizing unique features of IoT to improve read/write throughput and reduce space needed to store data[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mOpenTSDB[0m[38;5;12m (http://opentsdb.net) - distributed time series database on top of HBase.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mPrometheus[0m[38;5;12m (https://prometheus.io/) - a time series database and service monitoring system.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mBeringei[0m[38;5;12m (https://github.com/facebookincubator/beringei) - Facebook's in-memory time-series database.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mTrailDB[0m[38;5;12m (http://traildb.io/) - an efficient tool for storing and querying series of events.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDruid[0m[38;5;12m (https://github.com/druid-io/druid/) Column oriented distributed data store ideal for powering interactive applications[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mRiak-TS[0m[38;5;12m (http://basho.com/products/riak-ts/) Riak TS is the only enterprise-grade NoSQL time series database optimized specifically for IoT and Time Series data.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mAkumuli[0m[38;5;12m (https://github.com/akumuli/Akumuli) Akumuli is a numeric time-series database. It can be used to capture, store and process time-series data in real-time. The word "akumuli" can be translated from esperanto as "accumulate".[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mRhombus[0m[38;5;12m (https://github.com/Pardot/Rhombus) A time-series object store for Cassandra that handles all the complexity of building wide row indexes.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDalmatiner DB[0m[38;5;12m (https://github.com/dalmatinerdb/dalmatinerdb) Fast distributed metrics database[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mBlueflood[0m[38;5;12m (https://github.com/rackerlabs/blueflood) A distributed system designed to ingest and process time series data[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mTimely[0m[38;5;12m (https://github.com/NationalSecurityAgency/timely) Timely is a time series database application that provides secure access to time series data based on Accumulo and Grafana.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSiriDB[0m[38;5;12m (https://github.com/transceptor-technology/siridb-server) Highly-scalable, robust and fast, open source time series database with cluster functionality.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mThanos[0m[38;5;12m (https://github.com/improbable-eng/thanos) - Thanos is a set of components to create a highly available metric system with unlimited storage capacity using multiple (existing) Prometheus deployments.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mVictoriaMetrics[0m[38;5;12m (https://github.com/VictoriaMetrics/VictoriaMetrics) - fast, scalable and resource-effective open-source TSDB compatible with Prometheus. Single-node and cluster versions included[39m
|
||
|
||
[38;2;255;187;0m[4mSQL-like processing[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mActian SQL for Hadoop[0m[38;5;12m (http://www.actian.com/analytic-database/vectorh-sql-hadoop) - high performance interactive SQL access to all Hadoop data.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Drill[0m[38;5;12m (http://drill.apache.org/) - framework for interactive analysis, inspired by Dremel.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache HCatalog[0m[38;5;12m (https://cwiki.apache.org/confluence/display/Hive/HCatalog) - table and storage management layer for Hadoop.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Hive[0m[38;5;12m (http://hive.apache.org/) - SQL-like data warehouse system for Hadoop.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Calcite[0m[38;5;12m (http://calcite.apache.org/) - framework that allows efficient translation of queries involving heterogeneous and federated data.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Phoenix[0m[38;5;12m (http://phoenix.apache.org/index.html) - SQL skin over HBase.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mAster Database[0m[38;5;12m (http://www.teradata.com/products-and-services/Teradata-Aster/teradata-aster-database) - SQL-like analytic processing for MapReduce.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mCloudera Impala[0m[38;5;12m (https://www.cloudera.com/products/apache-hadoop/impala.html) - framework for interactive analysis, Inspired by Dremel.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mConcurrent Lingual[0m[38;5;12m (http://www.cascading.org/projects/lingual/) - SQL-like query language for Cascading.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDatasalt Splout SQL[0m[38;5;12m (http://www.datasalt.com/products/splout-sql/) - full SQL query engine for big datasets.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDremio[0m[38;5;12m (https://www.dremio.com/) - an open-source, SQL-like Data-as-a-Service Platform based on Apache Arrow.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mFacebook PrestoDB[0m[38;5;12m (https://prestodb.io/) - distributed SQL query engine.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mGoogle BigQuery[0m[38;5;12m (https://research.google.com/pubs/pub36632.html) - framework for interactive analysis, implementation of Dremel.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mMaterialize[0m[38;5;12m (https://github.com/materializeinc/materialize) - is a streaming database for real-time applications using SQL for queries and supporting a large fraction of PostgreSQL.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mInvantive SQL[0m[38;5;12m (https://documentation.invantive.com/2017R2/invantive-sql-grammar/invantive-sql-grammar-17.30.html) - SQL engine for online and on-premise use with integrated local data replication and 70+ connectors.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mPipelineDB[0m[38;5;12m (https://www.pipelinedb.com/) - an open-source relational database that runs SQL queries continuously on streams, incrementally storing results in tables.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mPivotal HDB[0m[38;5;12m (https://pivotal.io/pivotal-hdb) - SQL-like data warehouse system for Hadoop.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mRainstorDB[0m[38;5;12m (http://rainstor.com/products/rainstor-database/) - database for storing petabyte-scale volumes of structured and semi-structured data.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSpark Catalyst[0m[38;5;12m (https://github.com/apache/spark/tree/master/sql) - is a Query Optimization Framework for Spark and Shark.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSparkSQL[0m[38;5;12m (https://databricks.com/blog/2014/03/26/spark-sql-manipulating-structured-data-using-spark-2.html) - Manipulating Structured Data Using Spark.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSplice Machine[0m[38;5;12m (https://www.splicemachine.com/) - a full-featured SQL-on-Hadoop RDBMS with ACID transactions.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mStinger[0m[38;5;12m (https://hortonworks.com/innovation/stinger/) - interactive query for Hive.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mTajo[0m[38;5;12m (http://tajo.apache.org/) - distributed data warehouse system on Hadoop.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mTrafodion[0m[38;5;12m (https://wiki.trafodion.org/wiki/index.php/Main_Page) - enterprise-class SQL-on-HBase solution targeting big data transactional or operational workloads.[39m
|
||
|
||
[38;2;255;187;0m[4mData Ingestion[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mredpanda[0m[38;5;12m (https://vectorized.io/redpanda) - A Kafka® replacement for mission critical systems; 10x faster. Written in C++.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mAmazon Kinesis[0m[38;5;12m (https://aws.amazon.com/kinesis/) - real-time processing of streaming data at massive scale.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mAmazon Web Services Glue[0m[38;5;12m (https://aws.amazon.com/glue/) - serverless fully managed extract, transform, and load (ETL) service[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mCensus[0m[38;5;12m (https://getcensus.com/) - A reverse ETL product that let you sync data from your data warehouse to SaaS Applications. No engineering favors required—just SQL.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Chukwa[0m[38;5;12m (http://chukwa.apache.org/) - data collection system.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Flume[0m[38;5;12m (http://flume.apache.org/) - service to manage large amount of log data.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Kafka[0m[38;5;12m (http://kafka.apache.org/) - distributed publish-subscribe messaging system.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache NiFi[0m[38;5;12m (https://nifi.apache.org/) - Apache NiFi is an integrated data logistics platform for automating the movement of data between disparate systems.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Pulsar[0m[38;5;12m (https://github.com/apache/pulsar) - a distributed pub-sub messaging platform with a very flexible messaging model and an intuitive client API.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Sqoop[0m[38;5;12m (http://sqoop.apache.org/) - tool to transfer data between Hadoop and a structured datastore.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mEmbulk[0m[38;5;12m (http://www.embulk.org) - open-source bulk data loader that helps data transfer between various databases, storages, file formats, and cloud services.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mFacebook Scribe[0m[38;5;12m (https://github.com/facebookarchive/scribe) - streamed log data aggregator.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mFluentd[0m[38;5;12m (http://www.fluentd.org) - tool to collect events and logs.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mGazette[0m[38;5;12m (https://github.com/gazette/core) - Distributed streaming infrastructure built on cloud storage which makes it easy to mix and match batch and streaming paradigms.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mGoogle Photon[0m[38;5;12m (https://research.google.com/pubs/pub41318.html) - geographically distributed system for joining multiple continuously flowing streams of data in real-time with high scalability and low latency.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mHeka[0m[38;5;12m (https://github.com/mozilla-services/heka) - open source stream processing software system.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mHIHO[0m[38;5;12m (https://github.com/sonalgoyal/hiho) - framework for connecting disparate data sources with Hadoop.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mKestrel[0m[38;5;12m (https://github.com/papertrail/kestrel) - distributed message queue system.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mLinkedIn Databus[0m[38;5;12m (https://engineering.linkedin.com/data) - stream of change capture events for a database.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mLinkedIn Kamikaze[0m[38;5;12m (https://github.com/linkedin/kamikaze) - utility package for compressing sorted integer arrays.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mLinkedIn White Elephant[0m[38;5;12m (https://github.com/linkedin/white-elephant) - log aggregator and dashboard.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mLogstash[0m[38;5;12m (https://www.elastic.co/products/logstash) - a tool for managing events and logs.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNetflix Suro[0m[38;5;12m (https://github.com/Netflix/suro) - log agregattor like Storm and Samza based on Chukwa.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mPinterest Secor[0m[38;5;12m (https://github.com/pinterest/secor) - is a service implementing Kafka log persistance.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mLinkedin Gobblin[0m[38;5;12m (https://github.com/linkedin/gobblin) - linkedin's universal data ingestion framework.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSkizze[0m[38;5;12m (https://github.com/skizzehq/skizze) - sketch data store to deal with all problems around counting and sketching using probabilistic data-structures.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mStreamSets Data Collector[0m[38;5;12m (https://github.com/streamsets/datacollector) - continuous big data ingest infrastructure with a simple to use IDE.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mAlooma[0m[38;5;12m (https://www.alooma.com/integrations/mysql) - data pipeline as a service enabling moving data sources such as MySQL into data warehouses.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mRudderStack[0m[38;5;12m (https://github.com/rudderlabs/rudder-server) - an open source customer data infrastructure (segment, mParticle alternative) written in go.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mZilla[0m[38;5;12m (https://github.com/aklivity/zilla) - An API gateway built for event-driven architectures and streaming that supports standard protocols such as HTTP, SSE, gRPC, MQTT and the native Kafka protocol.[39m
|
||
|
||
[38;2;255;187;0m[4mService Programming[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mAkka Toolkit[0m[38;5;12m (http://akka.io/) - runtime for distributed, and fault tolerant event-driven applications on the JVM.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Avro[0m[38;5;12m (http://avro.apache.org/) - data serialization system.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Curator[0m[38;5;12m (http://curator.apache.org/) - Java libaries for Apache ZooKeeper.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Karaf[0m[38;5;12m (http://karaf.apache.org/) - OSGi runtime that runs on top of any OSGi framework.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Thrift[0m[38;5;12m (http://thrift.apache.org//) - framework to build binary protocols.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Zookeeper[0m[38;5;12m (http://zookeeper.apache.org/) - centralized service for process management.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mGoogle Chubby[0m[38;5;12m (https://research.google.com/archive/chubby.html) - a lock service for loosely-coupled distributed systems.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mHydrosphere Mist[0m[38;5;12m (https://github.com/Hydrospheredata/mist) - a service for exposing Apache Spark analytics jobs and machine learning models as realtime, batch or reactive web services.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mLinkedin Norbert[0m[38;5;12m (https://engineering.linkedin.com/data) - cluster manager.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mMara[0m[38;5;12m (https://github.com/mara/data-integration) - A lightweight opinionated ETL framework, halfway between plain scripts and Apache Airflow[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mOpenMPI[0m[38;5;12m (https://www.open-mpi.org/) - message passing framework.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSerf[0m[38;5;12m (https://www.serf.io/) - decentralized solution for service discovery and orchestration.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSpotify Luigi[0m
|
||
[38;5;12m (https://github.com/spotify/luigi) - a Python package for building complex pipelines of batch jobs. It handles dependency resolution, workflow management, visualization, handling failures, command line integration, and much more.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSpring XD[0m[38;5;12m (https://github.com/spring-projects/spring-xd) - distributed and extensible system for data ingestion, real time analytics, batch processing, and data export.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mTwitter Elephant Bird[0m[38;5;12m (https://github.com/twitter/elephant-bird) - libraries for working with LZOP-compressed data.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mTwitter Finagle[0m[38;5;12m (https://twitter.github.io/finagle/) - asynchronous network stack for the JVM.[39m
|
||
|
||
[38;2;255;187;0m[4mScheduling[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Airflow[0m[38;5;12m (https://github.com/apache/incubator-airflow) - a platform to programmatically author, schedule and monitor workflows.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Aurora[0m[38;5;12m (http://aurora.apache.org/) - is a service scheduler that runs on top of Apache Mesos.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Falcon[0m[38;5;12m (http://falcon.apache.org/) - data management framework.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Oozie[0m[38;5;12m (http://oozie.apache.org/) - workflow job scheduler.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mAzure Data Factory[0m[38;5;12m (https://docs.microsoft.com/en-us/azure/data-factory/data-factory-introduction) - cloud-based pipeline orchestration for on-prem, cloud and HDInsight[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mChronos[0m[38;5;12m (http://mesos.github.io/chronos/) - distributed and fault-tolerant scheduler.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mCronicle[0m[38;5;12m (https://github.com/jhuckaby/Cronicle) - Distributed, easy to install, NodeJS based, task scheduler[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDagster[0m[38;5;12m (https://github.com/dagster-io/dagster) - a data orchestrator for machine learning, analytics, and ETL.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mLinkedin Azkaban[0m[38;5;12m (https://azkaban.github.io/) - batch workflow job scheduler.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSchedoscope[0m[38;5;12m (https://github.com/ottogroup/schedoscope) - Scala DSL for agile scheduling of Hadoop jobs.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSparrow[0m[38;5;12m (https://github.com/radlab/sparrow) - scheduling platform.[39m
|
||
|
||
|
||
[38;2;255;187;0m[4mMachine Learning[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mAzure ML Studio[0m[38;5;12m (https://studio.azureml.net/) - Cloud-based AzureML, R, Python Machine Learning platform[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mbrain[0m[38;5;12m (https://github.com/harthur/brain) - Neural networks in JavaScript.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mOryx[0m[38;5;12m (https://github.com/OryxProject/oryx) - Lambda architecture on Apache Spark, Apache Kafka for real-time large scale machine learning.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mConcurrent Pattern[0m[38;5;12m (http://www.cascading.org/projects/pattern/) - machine learning library for Cascading.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mconvnetjs[0m[38;5;12m (https://github.com/karpathy/convnetjs) - Deep Learning in Javascript. Train Convolutional Neural Networks (or ordinary ones) in your browser.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDataVec[0m[38;5;12m (https://github.com/deeplearning4j/DataVec) - A vectorization and data preprocessing library for deep learning in Java and Scala. Part of the Deeplearning4j ecosystem. [39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDeeplearning4j[0m
|
||
[38;5;12m (https://github.com/deeplearning4j) - Fast, open deep learning for the JVM (Java, Scala, Clojure). A neural network configuration layer powered by a C++ library. Uses Spark and Hadoop to train nets on multiple GPUs and CPUs.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDecider[0m[38;5;12m (https://github.com/danielsdeleo/Decider) - Flexible and Extensible Machine Learning in Ruby.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mENCOG[0m[38;5;12m (http://www.heatonresearch.com/encog/) - machine learning framework that supports a variety of advanced algorithms, as well as support classes to normalize and process data.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1metcML[0m[38;5;12m (http://www.etcml.com/) - text classification with machine learning.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mEtsy Conjecture[0m[38;5;12m (https://github.com/etsy/Conjecture) - scalable Machine Learning in Scalding.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mFeast[0m[38;5;12m (https://github.com/gojek/feast) - A feature store for the management, discovery, and access of machine learning features. Feast provides a consistent view of feature data for both model training and model serving.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mGraphLab Create[0m[38;5;12m (https://dato.com/products/create/) - A machine learning platform in Python with a broad collection of ML toolkits, data engineering, and deployment tools.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mH2O[0m[38;5;12m (https://github.com/h2oai/h2o-3/) - statistical, machine learning and math runtime with Hadoop. R and Python.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mKarate Club[0m[38;5;12m (https://github.com/benedekrozemberczki/karateclub) - An unsupervised machine learning library for graph structured data. Python[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mKeras[0m[38;5;12m (https://github.com/fchollet/keras) - An intuitive neural net API inspired by Torch that runs atop Theano and Tensorflow.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mLambdo[0m[38;5;12m (https://github.com/johnsonc/lambdo) - Lambdo is a workflow engine which significantly simplifies the analysis process by unifying feature engineering and machine learning operations.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mLittle Ball of Fur[0m[38;5;12m (https://github.com/benedekrozemberczki/littleballoffur) - A subsampling library for graph structured data. Python[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mMahout[0m[38;5;12m (http://mahout.apache.org/) - An Apache-backed machine learning library for Hadoop.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mMLbase[0m[38;5;12m (http://www.mlbase.org/) - distributed machine learning libraries for the BDAS stack.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mMLPNeuralNet[0m[38;5;12m (https://github.com/nikolaypavlov/MLPNeuralNet) - Fast multilayer perceptron neural network library for iOS and Mac OS X.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mML Workspace[0m[38;5;12m (https://github.com/ml-tooling/ml-workspace) - All-in-one web-based IDE specialized for machine learning and data science.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mMOA[0m[38;5;12m (http://moa.cms.waikato.ac.nz) - MOA performs big data stream mining in real time, and large scale machine learning.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mMonkeyLearn[0m[38;5;12m (https://monkeylearn.com/) - Text mining made easy. Extract and classify data from text.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mND4J[0m[38;5;12m (https://github.com/deeplearning4j/nd4j) - A matrix library for the JVM. Numpy for Java. [39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mnupic[0m[38;5;12m (https://github.com/numenta/nupic) - Numenta Platform for Intelligent Computing: a brain-inspired machine intelligence platform, and biologically accurate neural network based on cortical learning algorithms.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mPredictionIO[0m[38;5;12m (http://predictionio.incubator.apache.org/index.html) - machine learning server buit on Hadoop, Mahout and Cascading.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mPyTorch Geometric Temporal[0m[38;5;12m (https://github.com/benedekrozemberczki/pytorch_geometric_temporal) - a temporal extension library for PyTorch Geometric .[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mRL4J[0m[38;5;12m (https://github.com/deeplearning4j/rl4j) - Reinforcement learning for Java and Scala. Includes Deep-Q learning and A3C algorithms, and integrates with Open AI's Gym. Runs in the Deeplearning4j ecosystem. [39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSAMOA[0m[38;5;12m (http://samoa.incubator.apache.org/) - distributed streaming machine learning framework.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mscikit-learn[0m[38;5;12m (https://github.com/scikit-learn/scikit-learn) - scikit-learn: machine learning in Python.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mShapley[0m[38;5;12m (https://github.com/benedekrozemberczki/shapley) - A data-driven framework to quantify the value of classifiers in a machine learning ensemble. [39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSpark MLlib[0m[38;5;12m (http://spark.apache.org/docs/0.9.0/mllib-guide.html) - a Spark implementation of some common machine learning (ML) functionality.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSibyl[0m[38;5;12m (https://users.soe.ucsc.edu/~niejiazhong/slides/chandra.pdf) - System for Large Scale Machine Learning at Google.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mTensorFlow[0m[38;5;12m (https://github.com/tensorflow/tensorflow) - Library from Google for machine learning using data flow graphs.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mTheano[0m[38;5;12m (https://github.com/theano) - A Python-focused machine learning library supported by the University of Montreal.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mTorch[0m[38;5;12m (https://github.com/torch) - A deep learning library with a Lua API, supported by NYU and Facebook.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mVelox[0m[38;5;12m (https://github.com/amplab/velox-modelserver) - System for serving machine learning predictions.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mVowpal Wabbit[0m[38;5;12m (https://github.com/JohnLangford/vowpal_wabbit/wiki) - learning system sponsored by Microsoft and Yahoo!.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mWEKA[0m[38;5;12m (http://www.cs.waikato.ac.nz/ml/weka/) - suite of machine learning software.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mBidMach[0m[38;5;12m (https://github.com/BIDData/BIDMach) - CPU and GPU-accelerated Machine Learning Library.[39m
|
||
|
||
[38;2;255;187;0m[4mBenchmarking[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Hadoop Benchmarking[0m[38;5;12m (https://issues.apache.org/jira/browse/MAPREDUCE-3561) - micro-benchmarks for testing Hadoop performances.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mBerkeley SWIM Benchmark[0m[38;5;12m (https://github.com/SWIMProjectUCB/SWIM/wiki) - real-world big data workload benchmark.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mIntel HiBench[0m[38;5;12m (https://github.com/intel-hadoop/HiBench) - a Hadoop benchmark suite.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mPUMA Benchmarking[0m[38;5;12m (https://issues.apache.org/jira/browse/MAPREDUCE-5116) - benchmark suite for MapReduce applications.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mYahoo Gridmix3[0m[38;5;12m (http://yahoohadoop.tumblr.com/post/98294079296/gridmix3-emulating-production-workload-for) - Hadoop cluster benchmarking from Yahoo engineer team.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDeeplearning4j Benchmarks[0m[38;5;12m (https://github.com/deeplearning4j/dl4j-benchmark)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mUCSB[0m[38;5;12m (https://github.com/unum-cloud/ucsb) - extended Yahoo Cloud Serving Benchmark for NoSQL databases.[39m
|
||
|
||
[38;2;255;187;0m[4mSecurity[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Ranger[0m[38;5;12m (http://ranger.apache.org/) - Central security admin & fine-grained authorization for Hadoop[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Eagle[0m[38;5;12m (http://eagle.apache.org/) - real time monitoring solution[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Knox Gateway[0m[38;5;12m (http://knox.apache.org/) - single point of secure access for Hadoop clusters.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Sentry[0m[38;5;12m (http://incubator.apache.org/projects/sentry.html) - security module for data stored in Hadoop.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mBDA[0m[38;5;12m (https://github.com/kotobukki/BDA/) - The vulnerability detector for Hadoop and Spark[39m
|
||
|
||
[38;2;255;187;0m[4mSystem Deployment[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Ambari[0m[38;5;12m (http://ambari.apache.org/) - operational framework for Hadoop mangement.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Bigtop[0m[38;5;12m (http://bigtop.apache.org//) - system deployment framework for the Hadoop ecosystem.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Helix[0m[38;5;12m (http://helix.apache.org/) - cluster management framework.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Mesos[0m[38;5;12m (http://mesos.apache.org/) - cluster manager.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Slider[0m[38;5;12m (https://github.com/apache/incubator-slider) - is a YARN application to deploy existing distributed applications on YARN.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Whirr[0m[38;5;12m (http://whirr.apache.org/) - set of libraries for running cloud services.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache YARN[0m[38;5;12m (https://hortonworks.com/hadoop/yarn/) - Cluster manager.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mBrooklyn[0m[38;5;12m (http://brooklyncentral.github.io/) - library that simplifies application deployment and management.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mBuildoop[0m[38;5;12m (http://buildoop.github.io/) - Similar to Apache BigTop based on Groovy language.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mCloudera HUE[0m[38;5;12m (http://gethue.com/) - web application for interacting with Hadoop.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mFacebook Prism[0m[38;5;12m (http://www.wired.com/2012/08/facebook-prism/) - multi datacenters replication system.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mGoogle Borg[0m[38;5;12m (https://www.wired.com/2013/03/google-borg-twitter-mesos/all/) - job scheduling and monitoring system.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mGoogle Omega[0m[38;5;12m (https://www.youtube.com/watch?v=0ZFMlO98Jkc) - job scheduling and monitoring system.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mHortonworks HOYA[0m[38;5;12m (https://hortonworks.com/blog/introducing-hoya-hbase-on-yarn/) - application that can deploy HBase cluster on YARN.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mKubernetes[0m[38;5;12m (https://kubernetes.io/) - a system for automating deployment, scaling, and management of containerized applications.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mMarathon[0m[38;5;12m (https://github.com/mesosphere/marathon) - Mesos framework for long-running services.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mLinkis[0m[38;5;12m (https://github.com/WeBankFinTech/Linkis) - Linkis helps easily connect to various back-end computation/storage engines.[39m
|
||
|
||
[38;2;255;187;0m[4mApplications[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1m411[0m[38;5;12m (https://github.com/etsy/411) - an web application for alert management resulting from scheduled searches into Elasticsearch.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mAdobe spindle[0m[38;5;12m (https://github.com/adobe-research/spindle) - Next-generation web analytics processing with Scala, Spark, and Parquet.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Metron[0m[38;5;12m (http://metron.apache.org/) - a platform that integrates a variety of open source big data technologies in order to offer a centralized tool for security monitoring and analysis.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Nutch[0m[38;5;12m (http://nutch.apache.org/) - open source web crawler.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache OODT[0m[38;5;12m (http://oodt.apache.org/) - capturing, processing and sharing of data for NASA's scientific archives.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Tika[0m[38;5;12m (https://tika.apache.org/) - content analysis toolkit.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mArgus[0m[38;5;12m (https://github.com/salesforce/Argus) - Time series monitoring and alerting platform.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mAthenaX[0m[38;5;12m (https://github.com/uber/AthenaX) - a streaming analytics platform that enables users to run production-quality, large scale streaming analytics using Structured Query Language (SQL).[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mAtlas[0m[38;5;12m (https://github.com/Netflix/atlas) - a backend for managing dimensional time series data.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mCountly[0m[38;5;12m (https://count.ly/) - open source mobile and web analytics platform, based on Node.js & MongoDB.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDomino[0m[38;5;12m (https://www.dominodatalab.com/) - Run, scale, share, and deploy models — without any infrastructure.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mEclipse BIRT[0m[38;5;12m (http://www.eclipse.org/birt/) - Eclipse-based reporting system.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mElastAert[0m[38;5;12m (https://github.com/Yelp/elastalert) - ElastAlert is a simple framework for alerting on anomalies, spikes, or other patterns of interest from data in ElasticSearch.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mEventhub[0m[38;5;12m (https://github.com/Codecademy/EventHub) - open source event analytics platform.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mHASH[0m[38;5;12m (https://hash.ai) - open source simulation and visualization platform.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mHermes[0m[38;5;12m (https://github.com/allegro/hermes) - asynchronous message broker built on top of Kafka.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mHunk[0m[38;5;12m (https://www.splunk.com/en_us/download/hunk.html) - Splunk analytics for Hadoop.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mImhotep[0m[38;5;12m (http://opensource.indeedeng.io/imhotep/) - Large scale analytics platform by indeed.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mIndicative[0m[38;5;12m (https://www.indicative.com/) - Web & mobile analytics tool, with data warehouse (AWS, BigQuery) integration.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mJupyter[0m[38;5;12m (https://jupyter.org/) - Notebook and project application for interactive data science and scientific computing across all programming languages.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mMADlib[0m[38;5;12m (http://madlib.incubator.apache.org/community/) - data-processing library of an RDBMS to analyze data.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mKapacitor[0m[38;5;12m (https://github.com/influxdata/kapacitor) - an open source framework for processing, monitoring, and alerting on time series data.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mKylin[0m[38;5;12m (http://kylin.apache.org/) - open source Distributed Analytics Engine from eBay.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mPivotalR[0m[38;5;12m (https://github.com/pivotalsoftware/PivotalR) - R on Pivotal HD / HAWQ and PostgreSQL.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mRakam[0m[38;5;12m (https://github.com/rakam-io/rakam) - open-source real-time custom analytics platform powered by Postgresql, Kinesis and PrestoDB. [39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mQubole[0m[38;5;12m (https://www.qubole.com/) - auto-scaling Hadoop cluster, built-in data connectors.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSnappyData[0m[38;5;12m [39m[38;5;12m(https://github.com/SnappyDataInc/snappydata)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12ma[39m[38;5;12m [39m[38;5;12mdistributed[39m[38;5;12m [39m[38;5;12min-memory[39m[38;5;12m [39m[38;5;12mdata[39m[38;5;12m [39m[38;5;12mstore[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12mreal-time[39m[38;5;12m [39m[38;5;12moperational[39m[38;5;12m [39m[38;5;12manalytics,[39m[38;5;12m [39m[38;5;12mdelivering[39m[38;5;12m [39m[38;5;12mstream[39m[38;5;12m [39m[38;5;12manalytics,[39m[38;5;12m [39m[38;5;12mOLTP[39m[38;5;12m [39m[38;5;12m(online[39m[38;5;12m [39m[38;5;12mtransaction[39m[38;5;12m [39m[38;5;12mprocessing)[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mOLAP[39m[38;5;12m [39m[38;5;12m(online[39m[38;5;12m [39m[38;5;12manalytical[39m[38;5;12m [39m
|
||
[38;5;12mprocessing)[39m[38;5;12m [39m[38;5;12mbuilt[39m[38;5;12m [39m[38;5;12mon[39m[38;5;12m [39m[38;5;12mSpark[39m[38;5;12m [39m[38;5;12min[39m[38;5;12m [39m[38;5;12ma[39m[38;5;12m [39m[38;5;12msingle[39m[38;5;12m [39m[38;5;12mintegrated[39m[38;5;12m [39m[38;5;12mcluster.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSnowplow[0m[38;5;12m (https://github.com/snowplow/snowplow) - enterprise-strength web and event analytics, powered by Hadoop, Kinesis, Redshift and Postgres.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSparkR[0m[38;5;12m (http://amplab-extras.github.io/SparkR-pkg/) - R frontend for Spark.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSplunk[0m[38;5;12m (https://www.splunk.com/) - analyzer for machine-generated data.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSumo Logic[0m[38;5;12m (https://www.sumologic.com/) - cloud based analyzer for machine-generated data.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSubstation[0m[38;5;12m (https://github.com/brexhq/substation) - Substation is a cloud native data pipeline and transformation toolkit written in Go.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mTalend[0m[38;5;12m (http://www.talend.com/products/big-data/) - unified open source environment for YARN, Hadoop, HBASE, Hive, HCatalog & Pig.[39m
|
||
|
||
[38;2;255;187;0m[4mSearch engine and framework[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Lucene[0m[38;5;12m (http://lucene.apache.org/) - Search engine library.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Solr[0m[38;5;12m (http://lucene.apache.org/solr/) - Search platform for Apache Lucene.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mElassandra[0m[38;5;12m (https://github.com/strapdata/elassandra) - is a fork of Elasticsearch modified to run on top of Apache Cassandra in a scalable and resilient peer-to-peer architecture.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mElasticSearch[0m[38;5;12m (https://www.elastic.co/) - Search and analytics engine based on Apache Lucene.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mEnigma.io[0m[38;5;12m (https://www.enigma.com/) – Freemium robust web application for exploring, filtering, analyzing, searching and exporting massive datasets scraped from across the Web.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mGoogle Caffeine[0m[38;5;12m (https://googleblog.blogspot.it/2010/06/our-new-search-index-caffeine.html) - continuous indexing system.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mGoogle Percolator[0m[38;5;12m (https://research.google.com/pubs/pub36726.html) - continuous indexing system.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mHBase Coprocessor[0m[38;5;12m (https://blogs.apache.org/hbase/entry/coprocessor_introduction) - implementation of Percolator, part of HBase.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mLily HBase Indexer[0m[38;5;12m (http://ngdata.github.io/hbase-indexer/) - quickly and easily search for any content stored in HBase.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mLinkedIn Bobo[0m[38;5;12m (http://senseidb.github.io/bobo/) - is a Faceted Search implementation written purely in Java, an extension to Apache Lucene.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mLinkedIn Cleo[0m[38;5;12m (https://github.com/linkedin/cleo) - is a flexible software library for enabling rapid development of partial, out-of-order and real-time typeahead search.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mLinkedIn Galene[0m[38;5;12m (https://engineering.linkedin.com/search/did-you-mean-galene) - search architecture at LinkedIn.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mLinkedIn Zoie[0m[38;5;12m (https://github.com/senseidb/zoie) - is a realtime search/indexing system written in Java.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mMG4J[0m[38;5;12m [39m[38;5;12m(http://mg4j.di.unimi.it/)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mMG4J[39m[38;5;12m [39m[38;5;12m(Managing[39m[38;5;12m [39m[38;5;12mGigabytes[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12mJava)[39m[38;5;12m [39m[38;5;12mis[39m[38;5;12m [39m[38;5;12ma[39m[38;5;12m [39m[38;5;12mfull-text[39m[38;5;12m [39m[38;5;12msearch[39m[38;5;12m [39m[38;5;12mengine[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12mlarge[39m[38;5;12m [39m[38;5;12mdocument[39m[38;5;12m [39m[38;5;12mcollections[39m[38;5;12m [39m[38;5;12mwritten[39m[38;5;12m [39m[38;5;12min[39m[38;5;12m [39m[38;5;12mJava.[39m[38;5;12m [39m[38;5;12mIt[39m[38;5;12m [39m[38;5;12mis[39m[38;5;12m [39m[38;5;12mhighly[39m[38;5;12m [39m[38;5;12mcustomisable,[39m[38;5;12m [39m[38;5;12mhigh-performance[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mprovides[39m[38;5;12m [39m[38;5;12mstate-of-the-art[39m[38;5;12m [39m[38;5;12mfeatures[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m
|
||
[38;5;12mnew[39m[38;5;12m [39m[38;5;12mresearch[39m[38;5;12m [39m[38;5;12malgorithms.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSphinx Search Server[0m[38;5;12m (http://sphinxsearch.com/) - fulltext search engine.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mVespa[0m[38;5;12m (http://vespa.ai/) - is an engine for low-latency computation over large data sets. It stores and indexes your data such that queries, selection and processing over the data can be performed at serving time.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mFacebook[0m[38;5;14m[1m [0m[38;5;14m[1mFaiss[0m[38;5;12m [39m[38;5;12m(https://github.com/facebookresearch/faiss)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mis[39m[38;5;12m [39m[38;5;12ma[39m[38;5;12m [39m[38;5;12mlibrary[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12mefficient[39m[38;5;12m [39m[38;5;12msimilarity[39m[38;5;12m [39m[38;5;12msearch[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mclustering[39m[38;5;12m [39m[38;5;12mof[39m[38;5;12m [39m[38;5;12mdense[39m[38;5;12m [39m[38;5;12mvectors.[39m[38;5;12m [39m[38;5;12mIt[39m[38;5;12m [39m[38;5;12mcontains[39m[38;5;12m [39m[38;5;12malgorithms[39m[38;5;12m [39m[38;5;12mthat[39m[38;5;12m [39m[38;5;12msearch[39m[38;5;12m [39m[38;5;12min[39m[38;5;12m [39m[38;5;12msets[39m[38;5;12m [39m[38;5;12mof[39m[38;5;12m [39m[38;5;12mvectors[39m[38;5;12m [39m[38;5;12mof[39m[38;5;12m [39m[38;5;12many[39m[38;5;12m [39m[38;5;12msize,[39m[38;5;12m [39m[38;5;12mup[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12mones[39m[38;5;12m [39m[38;5;12mthat[39m[38;5;12m [39m[38;5;12mpossibly[39m[38;5;12m [39m[38;5;12mdo[39m[38;5;12m [39m
|
||
[38;5;12mnot[39m[38;5;12m [39m[38;5;12mfit[39m[38;5;12m [39m[38;5;12min[39m[38;5;12m [39m[38;5;12mRAM.[39m[38;5;12m [39m[38;5;12mIt[39m[38;5;12m [39m[38;5;12malso[39m[38;5;12m [39m[38;5;12mcontains[39m[38;5;12m [39m[38;5;12msupporting[39m[38;5;12m [39m[38;5;12mcode[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12mevaluation[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mparameter[39m[38;5;12m [39m[38;5;12mtuning.[39m[38;5;12m [39m[38;5;12mFaiss[39m[38;5;12m [39m[38;5;12mis[39m[38;5;12m [39m[38;5;12mwritten[39m[38;5;12m [39m[38;5;12min[39m[38;5;12m [39m[38;5;12mC++[39m[38;5;12m [39m[38;5;12mwith[39m[38;5;12m [39m[38;5;12mcomplete[39m[38;5;12m [39m[38;5;12mwrappers[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12mPython/numpy.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mAnnoy[0m[38;5;12m [39m[38;5;12m(https://github.com/spotify/annoy)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mis[39m[38;5;12m [39m[38;5;12ma[39m[38;5;12m [39m[38;5;12mC++[39m[38;5;12m [39m[38;5;12mlibrary[39m[38;5;12m [39m[38;5;12mwith[39m[38;5;12m [39m[38;5;12mPython[39m[38;5;12m [39m[38;5;12mbindings[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12msearch[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12mpoints[39m[38;5;12m [39m[38;5;12min[39m[38;5;12m [39m[38;5;12mspace[39m[38;5;12m [39m[38;5;12mthat[39m[38;5;12m [39m[38;5;12mare[39m[38;5;12m [39m[38;5;12mclose[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12ma[39m[38;5;12m [39m[38;5;12mgiven[39m[38;5;12m [39m[38;5;12mquery[39m[38;5;12m [39m[38;5;12mpoint.[39m[38;5;12m [39m[38;5;12mIt[39m[38;5;12m [39m[38;5;12malso[39m[38;5;12m [39m[38;5;12mcreates[39m[38;5;12m [39m[38;5;12mlarge[39m[38;5;12m [39m[38;5;12mread-only[39m[38;5;12m [39m[38;5;12mfile-based[39m[38;5;12m [39m[38;5;12mdata[39m[38;5;12m [39m[38;5;12mstructures[39m[38;5;12m [39m[38;5;12mthat[39m[38;5;12m [39m[38;5;12mare[39m[38;5;12m [39m[38;5;12mmmapped[39m[38;5;12m [39m[38;5;12minto[39m[38;5;12m [39m
|
||
[38;5;12mmemory[39m[38;5;12m [39m[38;5;12mso[39m[38;5;12m [39m[38;5;12mthat[39m[38;5;12m [39m[38;5;12mmany[39m[38;5;12m [39m[38;5;12mprocesses[39m[38;5;12m [39m[38;5;12mmay[39m[38;5;12m [39m[38;5;12mshare[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12msame[39m[38;5;12m [39m[38;5;12mdata.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mWeaviate[0m[38;5;12m (https://github.com/semi-technologies/weaviate) - Weaviate is a GraphQL-based semantic search engine with build-in (word) embeddings.[39m
|
||
|
||
[38;2;255;187;0m[4mMySQL forks and evolutions[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mAmazon RDS[0m[38;5;12m (https://aws.amazon.com/rds/) - MySQL databases in Amazon's cloud.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDrizzle[0m[38;5;12m (http://www.drizzle.org/) - evolution of MySQL 6.0.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mGoogle Cloud SQL[0m[38;5;12m (https://cloud.google.com/sql/docs/) - MySQL databases in Google's cloud.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mMariaDB[0m[38;5;12m (https://mariadb.org/) - enhanced, drop-in replacement for MySQL.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mMySQL Cluster[0m[38;5;12m (https://www.mysql.com/products/cluster/) - MySQL implementation using NDB Cluster storage engine.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mPercona Server[0m[38;5;12m (https://www.percona.com/software/mysql-database/percona-server) - enhanced, drop-in replacement for MySQL.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mProxySQL[0m[38;5;12m (https://github.com/renecannao/proxysql) - High Performance Proxy for MySQL.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mTokuDB[0m[38;5;12m (https://www.percona.com/) - TokuDB is a storage engine for MySQL and MariaDB.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mWebScaleSQL[0m[38;5;12m (http://webscalesql.org/) - is a collaboration among engineers from several companies that face similar challenges in running MySQL at scale.[39m
|
||
|
||
[38;2;255;187;0m[4mPostgreSQL forks and evolutions[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mHadoopDB[0m[38;5;12m (http://db.cs.yale.edu/hadoopdb/hadoopdb.html) - hybrid of MapReduce and DBMS.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mIBM Netezza[0m[38;5;12m (http://www-01.ibm.com/software/data/netezza/) - high-performance data warehouse appliances.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mPostgres-XL[0m[38;5;12m (http://www.postgres-xl.org/) - Scalable Open Source PostgreSQL-based Database Cluster.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mRecDB[0m[38;5;12m (http://www-users.cs.umn.edu/~sarwat/RecDB/) - Open Source Recommendation Engine Built Entirely Inside PostgreSQL.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mStado[0m[38;5;12m (http://www.stormdb.com/community/stado) - open source MPP database system solely targeted at data warehousing and data mart applications.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mYahoo Everest[0m[38;5;12m (https://www.scribd.com/doc/3159239/70-Everest-PGCon-RT) - multi-peta-byte database / MPP derived by PostgreSQL.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mTimescaleDB[0m[38;5;12m (http://www.timescale.com/) - An open-source time-series database optimized for fast ingest and complex queries[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mPipelineDB[0m[38;5;12m (https://www.pipelinedb.com/) - The Streaming SQL Database. An open-source relational database that runs SQL queries continuously on streams, incrementally storing results in tables[39m
|
||
|
||
[38;2;255;187;0m[4mMemcached forks and evolutions[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mFacebook McDipper[0m[38;5;12m (https://www.facebook.com/notes/facebook-engineering/mcdipper-a-key-value-cache-for-flash-storage/10151347090423920) - key/value cache for flash storage.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mFacebook Memcached[0m[38;5;12m (https://www.facebook.com/notes/facebook-engineering/scaling-memcache-at-facebook/10151411410803920) - fork of Memcache.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mTwemproxy[0m[38;5;12m (https://github.com/twitter/twemproxy) - A fast, light-weight proxy for memcached and redis.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mTwitter Fatcache[0m[38;5;12m (https://github.com/twitter/fatcache) - key/value cache for flash storage.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mTwitter Twemcache[0m[38;5;12m (https://github.com/twitter/twemcache) - fork of Memcache.[39m
|
||
|
||
[38;2;255;187;0m[4mEmbedded Databases[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mActian PSQL[0m[38;5;12m (http://www.actian.com/products/operational-databases/) - ACID-compliant DBMS developed by Pervasive Software, optimized for embedding in applications.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mBerkeleyDB[0m[38;5;12m (https://www.oracle.com/database/berkeley-db/index.html) - a software library that provides a high-performance embedded database for key/value data.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mHanoiDB[0m[38;5;12m (https://github.com/krestenkrab/hanoidb) - Erlang LSM BTree Storage.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mLevelDB[0m[38;5;12m (https://github.com/google/leveldb) - a fast key-value storage library written at Google that provides an ordered mapping from string keys to string values.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mLMDB[0m[38;5;12m (https://symas.com/mdb/) - ultra-fast, ultra-compact key-value embedded data store developed by Symas.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mRocksDB[0m[38;5;12m (http://rocksdb.org/) - embeddable persistent key-value store for fast storage based on LevelDB.[39m
|
||
|
||
[38;2;255;187;0m[4mBusiness Intelligence[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mBIME Analytics[0m[38;5;12m (https://www.bimeanalytics.com/?lang=en) - business intelligence platform in the cloud.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mBlazer[0m[38;5;12m (https://github.com/ankane/blazer) - business intelligence made simple.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mChartio[0m[38;5;12m (https://chartio.com) - lean business intelligence platform to visualize and explore your data.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mCount[0m[38;5;12m (https://count.co) - notebook-based anlytics and visualisation platform using SQL or drag-and-drop.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mdatapine[0m[38;5;12m (https://www.datapine.com/) - self-service business intelligence tool in the cloud.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDekart[0m[38;5;12m (https://dekart.xyz/) - Large scale geospatial analytics for Google BigQuery based on Kepler.gl.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mGoodData[0m[38;5;12m (https://www.gooddata.com/) - platform for data products and embedded analytics.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mJaspersoft[0m[38;5;12m (https://www.jaspersoft.com/) - powerful business intelligence suite.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mJedox Palo[0m[38;5;12m (https://www.jedox.com/en/) - customisable Business Intelligence platform.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mJethrodata[0m[38;5;12m (https://jethro.io/) - Interactive Big Data Analytics.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mintermix.io[0m[38;5;12m (https://intermix.io/) - Performance Monitoring for Amazon Redshift[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mMetabase[0m[38;5;12m (https://github.com/metabase/metabase) - The simplest, fastest way to get business intelligence and analytics to everyone in your company.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mMicrosoft[0m[38;5;12m (http://www.microsoft.com/en-us/server-cloud/solutions/business-intelligence/default.aspx) - business intelligence software and platform.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mMicrostrategy[0m[38;5;12m (https://www.microstrategy.com/) - software platforms for business intelligence, mobile intelligence, and network applications.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNumeracy[0m[38;5;12m (https://numeracy.co/) - Fast, clean SQL client and business intelligence.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mPentaho[0m[38;5;12m (http://www.pentaho.com/) - business intelligence platform.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mQlik[0m[38;5;12m (http://www.qlik.com/us/) - business intelligence and analytics platform.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mRedash[0m[38;5;12m (https://redash.io/) - Open source business intelligence platform, supporting multiple data sources and planned queries.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSaiku Analytics[0m[38;5;12m (https://www.meteorite.bi/) - Open source analytics platform.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mKnowage[0m[38;5;12m (https://www.knowage-suite.com/) - open source business intelligence platform. (former [39m[38;5;14m[1mSpagoBi[0m[38;5;12m (http://www.spagobi.org/))[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSparklineData SNAP[0m[38;5;12m (http://sparklinedata.com/) - modern B.I platform powered by Apache Spark.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mTableau[0m[38;5;12m (https://www.tableau.com/) - business intelligence platform.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mZoomdata[0m[38;5;12m (https://www.zoomdata.com/) - Big Data Analytics.[39m
|
||
|
||
|
||
[38;2;255;187;0m[4mData Visualization[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mAirpal[0m[38;5;12m (https://github.com/airbnb/airpal) - Web UI for PrestoDB.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mAnyChart[0m[38;5;12m (http://www.anychart.com) - fast, simple and flexible JavaScript (HTML5) charting library featuring pure JS API.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mArbor[0m[38;5;12m (https://github.com/samizdatco/arbor) - graph visualization library using web workers and jQuery.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mBanana[0m[38;5;12m (https://github.com/LucidWorks/banana) - visualize logs and time-stamped data stored in Solr. Port of Kibana.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mBloomery[0m[38;5;12m (https://github.com/ufukomer/bloomery) - Web UI for Impala.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mBokeh[0m[38;5;12m [39m[38;5;12m(http://bokeh.pydata.org/en/latest/)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mA[39m[38;5;12m [39m[38;5;12mpowerful[39m[38;5;12m [39m[38;5;12mPython[39m[38;5;12m [39m[38;5;12minteractive[39m[38;5;12m [39m[38;5;12mvisualization[39m[38;5;12m [39m[38;5;12mlibrary[39m[38;5;12m [39m[38;5;12mthat[39m[38;5;12m [39m[38;5;12mtargets[39m[38;5;12m [39m[38;5;12mmodern[39m[38;5;12m [39m[38;5;12mweb[39m[38;5;12m [39m[38;5;12mbrowsers[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12mpresentation,[39m[38;5;12m [39m[38;5;12mwith[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12mgoal[39m[38;5;12m [39m[38;5;12mof[39m[38;5;12m [39m[38;5;12mproviding[39m[38;5;12m [39m[38;5;12melegant,[39m[38;5;12m [39m[38;5;12mconcise[39m[38;5;12m [39m[38;5;12mconstruction[39m[38;5;12m [39m[38;5;12mof[39m[38;5;12m [39m[38;5;12mnovel[39m[38;5;12m [39m[38;5;12mgraphics[39m[38;5;12m [39m[38;5;12min[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m
|
||
[38;5;12mstyle[39m[38;5;12m [39m[38;5;12mof[39m[38;5;12m [39m[38;5;12mD3.js,[39m[38;5;12m [39m[38;5;12mbut[39m[38;5;12m [39m[38;5;12malso[39m[38;5;12m [39m[38;5;12mdelivering[39m[38;5;12m [39m[38;5;12mthis[39m[38;5;12m [39m[38;5;12mcapability[39m[38;5;12m [39m[38;5;12mwith[39m[38;5;12m [39m[38;5;12mhigh-performance[39m[38;5;12m [39m[38;5;12minteractivity[39m[38;5;12m [39m[38;5;12mover[39m[38;5;12m [39m[38;5;12mvery[39m[38;5;12m [39m[38;5;12mlarge[39m[38;5;12m [39m[38;5;12mor[39m[38;5;12m [39m[38;5;12mstreaming[39m[38;5;12m [39m[38;5;12mdatasets.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mC3[0m[38;5;12m (http://c3js.org/) - D3-based reusable chart library[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mCartoDB[0m[38;5;12m (https://github.com/CartoDB/cartodb) - open-source or freemium hosting for geospatial databases with powerful front-end editing capabilities and a robust API.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mchartd[0m[38;5;12m (http://chartd.co/) - responsive, retina-compatible charts with just an img tag.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mChart.js[0m[38;5;12m (http://www.chartjs.org/) - open source HTML5 Charts visualizations.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mChartist.js[0m[38;5;12m (https://github.com/gionkunz/chartist-js) - another open source HTML5 Charts visualization.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mCrossfilter[0m[38;5;12m (http://square.github.io/crossfilter/) - JavaScript library for exploring large multivariate datasets in the browser. Works well with dc.js and d3.js.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mCubism[0m[38;5;12m (https://github.com/square/cubism) - JavaScript library for time series visualization.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mCytoscape[0m[38;5;12m (http://cytoscape.github.io/) - JavaScript library for visualizing complex networks.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDC.js[0m[38;5;12m (http://dc-js.github.io/dc.js/) - Dimensional charting built to work natively with crossfilter rendered using d3.js. Excellent for connecting charts/additional metadata to hover events in D3.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mD3[0m[38;5;12m (https://d3js.org/) - javaScript library for manipulating documents.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mD3.compose[0m[38;5;12m (https://github.com/CSNW/d3.compose) - Compose complex, data-driven visualizations from reusable charts and components.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mD3Plus[0m[38;5;12m (http://d3plus.org) - A fairly robust set of reusable charts and styles for d3.js.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDash[0m[38;5;12m (https://github.com/plotly/dash) - Analytical Web Apps for Python, R, Julia, and Jupyter. Built on top of plotly, no JS required[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDekart[0m[38;5;12m (https://dekart.xyz/) - Large scale geospatial analytics for Google BigQuery based on Kepler.gl.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDevExtreme React Chart[0m[38;5;12m (https://devexpress.github.io/devextreme-reactive/react/chart/) - High-performance plugin-based React chart for Bootstrap and Material Design.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mEcharts[0m[38;5;12m (https://github.com/ecomfe/echarts) - Baidus enterprise charts.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mEnvisionjs[0m[38;5;12m (https://github.com/HumbleSoftware/envisionjs) - dynamic HTML5 visualization.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mFnordMetric[0m[38;5;12m (https://metrictools.org/) - write SQL queries that return SVG charts rather than tables[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mFrappe Charts[0m[38;5;12m (https://frappe.io/charts) - GitHub-inspired simple and modern SVG charts for the web with zero dependencies.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mFreeboard[0m[38;5;12m (https://github.com/Freeboard/freeboard) - pen source real-time dashboard builder for IOT and other web mashups.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mGephi[0m[38;5;12m (https://github.com/gephi/gephi) - An award-winning open-source platform for visualizing and manipulating large graphs and network connections. It's like Photoshop, but for graphs. Available for Windows and Mac OS X.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mGoogle Charts[0m[38;5;12m (https://developers.google.com/chart/) - simple charting API.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mGrafana[0m[38;5;12m (https://grafana.com/) - graphite dashboard frontend, editor and graph composer.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mGraphite[0m[38;5;12m (http://graphiteapp.org/) - scalable Realtime Graphing.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mHighcharts[0m[38;5;12m (https://www.highcharts.com/) - simple and flexible charting API.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mIPython[0m[38;5;12m (http://ipython.org/) - provides a rich architecture for interactive computing.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mKibana[0m[38;5;12m (https://www.elastic.co/products/kibana) - visualize logs and time-stamped data[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mLumify[0m[38;5;12m (http://lumify.io/) - open source big data analysis and visualization platform[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mMatplotlib[0m[38;5;12m (https://github.com/matplotlib/matplotlib) - plotting with Python.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mMetricsgraphic.js[0m[38;5;12m (https://metricsgraphicsjs.org/) - a library built on top of D3 that is optimized for time-series data[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNVD3[0m[38;5;12m (http://nvd3.org/) - chart components for d3.js.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mPeity[0m[38;5;12m (https://github.com/benpickles/peity) - Progressive SVG bar, line and pie charts.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mPlot.ly[0m[38;5;12m (https://plot.ly/) - Easy-to-use web service that allows for rapid creation of complex charts, from heatmaps to histograms. Upload data to create and style charts with Plotly's online spreadsheet. Fork others' plots.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mPlotly.js[0m[38;5;12m (https://github.com/plotly/plotly.js) The open source javascript graphing library that powers plotly.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mRecline[0m[38;5;12m (https://github.com/okfn/recline) - simple but powerful library for building data applications in pure Javascript and HTML.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mRedash[0m[38;5;12m (https://github.com/getredash/redash) - open-source platform to query and visualize data.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mReCharts[0m[38;5;12m (http://recharts.org/) - A composable charting library built on React components[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mShiny[0m[38;5;12m (http://shiny.rstudio.com/) - a web application framework for R.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSigma.js[0m[38;5;12m (https://github.com/jacomyal/sigma.js) - JavaScript library dedicated to graph drawing.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSuperset[0m[38;5;12m (https://github.com/apache/incubator-superset) - a data exploration platform designed to be visual, intuitive and interactive, making it easy to slice, dice and visualize data and perform analytics at the speed of thought.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mVega[0m[38;5;12m (https://github.com/vega/vega) - a visualization grammar.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mZeppelin[0m[38;5;12m (https://github.com/ZEPL/zeppelin) - a notebook-style collaborative data analysis.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mZing Charts[0m[38;5;12m (https://www.zingchart.com/) - JavaScript charting library for big data.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDataSphere Studio[0m[38;5;12m (https://github.com/WeBankFinTech/DataSphereStudio) - one-stop data application development management portal.[39m
|
||
|
||
[38;2;255;187;0m[4mInternet of things and sensor data[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApache Edgent (Incubating)[0m[38;5;12m (http://edgent.apache.org/) - a programming model and micro-kernel style runtime that can be embedded in gateways and small footprint edge devices enabling local, real-time, analytics on the edge devices.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mAzure IoT Hub[0m[38;5;12m (https://azure.microsoft.com/en-us/services/iot-hub/) - Cloud-based bi-directional monitoring and messaging hub[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mTempoIQ[0m[38;5;12m (https://www.tempoiq.com/) - Cloud-based sensor analytics.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1m2lemetry[0m[38;5;12m (http://2lemetry.com/) - Platform for Internet of things.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mPubnub[0m[38;5;12m (https://www.pubnub.com/) - Data stream network[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mThingWorx[0m[38;5;12m (https://www.thingworx.com/) - Rapid development and connection of intelligent systems[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mIFTTT[0m[38;5;12m (https://ifttt.com/) - If this then that[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mEvrything[0m[38;5;12m (https://evrythng.com/)- Making products smart[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNetLytics[0m[38;5;12m (https://github.com/marty90/netlytics/) - Analytics platform to process network data on Spark.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mAbly[0m[38;5;12m (https://ably.com/) - Pub/sub messaging platform for IoT [39m
|
||
|
||
[38;2;255;187;0m[4mInteresting Readings[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mBig Data Benchmark[0m[38;5;12m (https://amplab.cs.berkeley.edu/benchmark/) - Benchmark of Redshift, Hive, Shark, Impala and Stiger/Tez.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNoSQL Comparison[0m
|
||
[38;5;12m (https://kkovacs.eu/cassandra-vs-mongodb-vs-couchdb-vs-redis) - Cassandra vs MongoDB vs CouchDB vs Redis vs Riak vs HBase vs Couchbase vs Neo4j vs Hypertable vs ElasticSearch vs Accumulo vs VoltDB vs Scalaris comparison.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mMonitoring Kafka performance[0m[38;5;12m (https://www.datadoghq.com/blog/monitoring-kafka-performance-metrics?ref=awesome) - Guide to monitoring Apache Kafka, including native methods for metrics collection.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mMonitoring Hadoop performance[0m[38;5;12m (https://www.datadoghq.com/blog/monitor-hadoop-metrics?ref=awesome) - Guide to monitoring Hadoop, with an overview of Hadoop architecture, and native methods for metrics collection.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mMonitoring Cassandra performance[0m[38;5;12m (https://www.datadoghq.com/blog/how-to-monitor-cassandra-performance-metrics/?ref=awesome) - Guide to monitoring Cassandra, including native methods for metrics collection.[39m
|
||
|
||
[38;2;255;187;0m[4mInteresting Papers[0m
|
||
|
||
[38;2;255;187;0m[4m2015 - 2016[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1m2015[0m[38;5;12m (http://www.vldb.org/pvldb/vol8/p1804-ching.pdf) - [39m[38;5;14m[1mFacebook[0m[38;5;12m - One Trillion Edges: Graph Processing at Facebook-Scale.[39m
|
||
|
||
[38;2;255;187;0m[4m2013 - 2014[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1m2014[0m[38;5;12m (http://infolab.stanford.edu/~ullman/mmds/book.pdf) - [39m[38;5;14m[1mStanford[0m[38;5;12m - Mining of Massive Datasets.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1m2013[0m[38;5;12m (https://amplab.cs.berkeley.edu/wp-content/uploads/2013/03/eurosys13-paper83.pdf) - [39m[38;5;14m[1mAMPLab[0m[38;5;12m - Presto: Distributed Machine Learning and Graph Processing with Sparse Matrices.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1m2013[0m[38;5;12m (https://amplab.cs.berkeley.edu/wp-content/uploads/2013/01/dmx1.pdf) - [39m[38;5;14m[1mAMPLab[0m[38;5;12m - MLbase: A Distributed Machine-learning System.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1m2013[0m[38;5;12m (https://amplab.cs.berkeley.edu/wp-content/uploads/2013/02/shark_sigmod2013.pdf) - [39m[38;5;14m[1mAMPLab[0m[38;5;12m - Shark: SQL and Rich Analytics at Scale.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1m2013[0m[38;5;12m (https://amplab.cs.berkeley.edu/wp-content/uploads/2013/05/grades-graphx_with_fonts.pdf) - [39m[38;5;14m[1mAMPLab[0m[38;5;12m - GraphX: A Resilient Distributed Graph System on Spark.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1m2013[0m[38;5;12m (http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/40671.pdf) - [39m[38;5;14m[1mGoogle[0m[38;5;12m - HyperLogLog in Practice: Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1m2013[0m[38;5;12m (http://research.microsoft.com/pubs/200169/now-vldb.pdf) - [39m[38;5;14m[1mMicrosoft[0m[38;5;12m - Scalable Progressive Analytics on Big Data in the Cloud.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1m2013[0m[38;5;12m (http://static.druid.io/docs/druid.pdf) - [39m[38;5;14m[1mMetamarkets[0m[38;5;12m - Druid: A Real-time Analytical Data Store.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1m2013[0m[38;5;12m (http://db.disi.unitn.eu/pages/VLDBProgram/pdf/industry/p764-rae.pdf) - [39m[38;5;14m[1mGoogle[0m[38;5;12m - Online, Asynchronous Schema Change in F1.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1m2013[0m[38;5;12m (http://static.googleusercontent.com/media/research.google.com/en/us/pubs/archive/41344.pdf) - [39m[38;5;14m[1mGoogle[0m[38;5;12m - F1: A Distributed SQL Database That Scales.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1m2013[0m[38;5;12m (http://db.disi.unitn.eu/pages/VLDBProgram/pdf/industry/p734-akidau.pdf) - [39m[38;5;14m[1mGoogle[0m[38;5;12m - MillWheel: Fault-Tolerant Stream Processing at Internet Scale.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1m2013[0m[38;5;12m (http://db.disi.unitn.eu/pages/VLDBProgram/pdf/industry/p767-wiener.pdf) - [39m[38;5;14m[1mFacebook[0m[38;5;12m - Scuba: Diving into Data at Facebook.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1m2013[0m[38;5;12m (http://db.disi.unitn.eu/pages/VLDBProgram/pdf/industry/p871-curtiss.pdf) - [39m[38;5;14m[1mFacebook[0m[38;5;12m - Unicorn: A System for Searching the Social Graph.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1m2013[0m[38;5;12m (https://www.usenix.org/system/files/conference/nsdi13/nsdi13-final170_update.pdf) - [39m[38;5;14m[1mFacebook[0m[38;5;12m - Scaling Memcache at Facebook.[39m
|
||
|
||
[38;2;255;187;0m[4m2011 - 2012[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1m2012[0m[38;5;12m (http://vldb.org/pvldb/vol5/p1771_georgelee_vldb2012.pdf) - [39m[38;5;14m[1mTwitter[0m[38;5;12m - The Unified Logging Infrastructure[39m
|
||
[38;5;12mfor Data Analytics at Twitter.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1m2012[0m[38;5;12m (https://amplab.cs.berkeley.edu/wp-content/uploads/2013/04/blinkdb_vldb12_demo.pdf) - [39m[38;5;14m[1mAMPLab[0m[38;5;12m - Blink and It’s Done: Interactive Queries on Very Large Data.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1m2012[0m[38;5;12m (https://www.usenix.org/system/files/login/articles/zaharia.pdf) - [39m[38;5;14m[1mAMPLab[0m[38;5;12m - Fast and Interactive Analytics over Hadoop Data with Spark.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1m2012[0m[38;5;12m (https://amplab.cs.berkeley.edu/wp-content/uploads/2012/03/mod482-xin1.pdf) - [39m[38;5;14m[1mAMPLab[0m[38;5;12m - Shark: Fast Data Analysis Using Coarse-grained Distributed Memory.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1m2012[0m[38;5;12m (https://www.usenix.org/legacy/event/nsdi11/tech/full_papers/Bolosky.pdf) - [39m[38;5;14m[1mMicrosoft[0m[38;5;12m - Paxos Replicated State Machines as the Basis of a High-Performance Data Store.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1m2012[0m[38;5;12m (http://research.microsoft.com/pubs/178045/ppaoxs-paper29.pdf) - [39m[38;5;14m[1mMicrosoft[0m[38;5;12m - Paxos Made Parallel.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1m2012[0m[38;5;12m (https://arxiv.org/pdf/1203.5485.pdf) - [39m[38;5;14m[1mAMPLab[0m[38;5;12m - BlinkDB: Queries with Bounded Errors and Bounded Response Times on Very Large Data.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1m2012[0m[38;5;12m (http://vldb.org/pvldb/vol5/p1436_alexanderhall_vldb2012.pdf) - [39m[38;5;14m[1mGoogle[0m[38;5;12m - Processing a trillion cells per mouse click.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1m2012[0m[38;5;12m (http://static.googleusercontent.com/media/research.google.com/en//archive/spanner-osdi2012.pdf) - [39m[38;5;14m[1mGoogle[0m[38;5;12m - Spanner: Google’s Globally-Distributed Database.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1m2011[0m[38;5;12m (https://amplab.cs.berkeley.edu/wp-content/uploads/2011/06/euro118-ananthanarayanan.pdf) - [39m[38;5;14m[1mAMPLab[0m[38;5;12m - Scarlett: Coping with Skewed Popularity Content in MapReduce Clusters.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1m2011[0m[38;5;12m (https://amplab.cs.berkeley.edu/wp-content/uploads/2011/06/Mesos-A-Platform-for-Fine-Grained-Resource-Sharing-in-the-Data-Center.pdf) - [39m[38;5;14m[1mAMPLab[0m[38;5;12m - Mesos: A Platform for Fine-Grained Resource Sharing in the Data Center.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1m2011[0m[38;5;12m (http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/36971.pdf) - [39m[38;5;14m[1mGoogle[0m[38;5;12m - Megastore: Providing Scalable, Highly Available Storage for Interactive Services.[39m
|
||
|
||
[38;2;255;187;0m[4m2001 - 2010[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1m2010[0m[38;5;12m (https://www.usenix.org/legacy/event/osdi10/tech/full_papers/Beaver.pdf) - [39m[38;5;14m[1mFacebook[0m[38;5;12m - Finding a needle in Haystack: Facebook’s photo storage.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1m2010[0m[38;5;12m (https://amplab.cs.berkeley.edu/wp-content/uploads/2011/06/Spark-Cluster-Computing-with-Working-Sets.pdf) - [39m[38;5;14m[1mAMPLab[0m[38;5;12m - Spark: Cluster Computing with Working Sets.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1m2010[0m[38;5;12m (http://kowshik.github.io/JPregel/pregel_paper.pdf) - [39m[38;5;14m[1mGoogle[0m[38;5;12m - Pregel: A System for Large-Scale Graph Processing.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1m2010[0m[38;5;12m (http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/36726.pdf) - [39m[38;5;14m[1mGoogle[0m[38;5;12m - Large-scale Incremental Processing Using Distributed Transactions and Notifications base of Percolator and Caffeine.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1m2010[0m[38;5;12m (http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/36632.pdf) - [39m[38;5;14m[1mGoogle[0m[38;5;12m - Dremel: Interactive Analysis of Web-Scale Datasets.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1m2010[0m[38;5;12m (http://leoneu.github.io/) - [39m[38;5;14m[1mYahoo[0m[38;5;12m - S4: Distributed Stream Computing Platform.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1m2009[0m[38;5;12m (http://www.cs.umd.edu/~abadi/papers/hadoopdb.pdf) - HadoopDB: An Architectural Hybrid of MapReduce and DBMS Technologies for Analytical Workloads. [39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1m2008[0m[38;5;12m (https://cwiki.apache.org/confluence/download/attachments/120729877/chukwa_cca08.pdf?version=1&modificationDate=1562667399000&api=v2) - [39m[38;5;14m[1mAMPLab[0m[38;5;12m - Chukwa: A large-scale monitoring system.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1m2007[0m[38;5;12m (http://www.read.seas.harvard.edu/~kohler/class/cs239-w08/decandia07dynamo.pdf) - [39m[38;5;14m[1mAmazon[0m[38;5;12m - Dynamo: Amazon’s Highly Available Key-value Store.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1m2006[0m[38;5;12m (http://static.googleusercontent.com/media/research.google.com/en//archive/chubby-osdi06.pdf) - [39m[38;5;14m[1mGoogle[0m[38;5;12m - The Chubby lock service for loosely-coupled distributed systems.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1m2006[0m[38;5;12m (http://static.googleusercontent.com/external_content/untrusted_dlcp/research.google.com/en//archive/bigtable-osdi06.pdf) - [39m[38;5;14m[1mGoogle[0m[38;5;12m - Bigtable: A Distributed Storage System for Structured Data.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1m2004[0m[38;5;12m (http://static.googleusercontent.com/media/research.google.com/en//archive/mapreduce-osdi04.pdf) - [39m[38;5;14m[1mGoogle[0m[38;5;12m - MapReduce: Simplied Data Processing on Large Clusters.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1m2003[0m[38;5;12m (http://static.googleusercontent.com/media/research.google.com/en//archive/gfs-sosp2003.pdf) - [39m[38;5;14m[1mGoogle[0m[38;5;12m - The Google File System.[39m
|
||
|
||
[38;2;255;187;0m[4mVideos[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSpark in Motion[0m[38;5;12m (https://www.manning.com/livevideo/spark-in-motion) - Spark in Motion teaches you how to use Spark for batch and streaming data analytics.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mMachine Learning, Data Science and Deep Learning with Python [0m
|
||
[38;5;12m (https://www.manning.com/livevideo/machine-learning-data-science-and-deep-learning-with-python) - LiveVideo tutorial that covers machine learning, Tensorflow, artificial intelligence, and neural networks.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mData warehouse schema design - dimensional modeling and star schema[0m[38;5;12m (https://snir.dev/talks/data-warehouse-schema-design) - Introduction to schema design for data warehouse using the star schema method.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mElasticsearch 7 and Elastic Stack[0m
|
||
[38;5;12m (https://www.manning.com/livevideo/elasticsearch-7-and-elastic-stack) - LiveVideo tutorial that covers searching, analyzing, and visualizing big data on a cluster with Elasticsearch, Logstash, Beats, Kibana, and more.[39m
|
||
|
||
[38;2;255;187;0m[4mBooks[0m
|
||
|
||
[38;2;255;187;0m[4mStreaming[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mData Science at Scale with Python and Dask[0m
|
||
[38;5;12m (https://www.manning.com/books/data-science-at-scale-with-python-and-dask) - Data Science at Scale with Python and Dask teaches you how to build distributed data projects that can handle huge amounts of data.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mStreaming Data[0m[38;5;12m (https://www.manning.com/books/streaming-data) - Streaming Data introduces the concepts and requirements of streaming and real-time data systems.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mStorm Applied[0m[38;5;12m (https://www.manning.com/books/storm-applied) - Storm Applied is a practical guide to using Apache Storm for the real-world tasks associated with processing and analyzing real-time data streams.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mFundamentals[0m[38;5;14m[1m [0m[38;5;14m[1mof[0m[38;5;14m[1m [0m[38;5;14m[1mStream[0m[38;5;14m[1m [0m[38;5;14m[1mProcessing:[0m[38;5;14m[1m [0m[38;5;14m[1mApplication[0m[38;5;14m[1m [0m[38;5;14m[1mDesign,[0m[38;5;14m[1m [0m[38;5;14m[1mSystems,[0m[38;5;14m[1m [0m[38;5;14m[1mand[0m[38;5;14m[1m [0m[38;5;14m[1mAnalytics[0m[38;5;12m [39m
|
||
[38;5;12m(http://www.cambridge.org/us/academic/subjects/engineering/communications-and-signal-processing/fundamentals-stream-processing-application-design-systems-and-analytics)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mThis[39m[38;5;12m [39m[38;5;12mcomprehensive,[39m[38;5;12m [39m[38;5;12mhands-on[39m[38;5;12m [39m[38;5;12mguide[39m[38;5;12m [39m[38;5;12mcombining[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12mfundamental[39m[38;5;12m [39m
|
||
[38;5;12mbuilding[39m[38;5;12m [39m[38;5;12mblocks[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12memerging[39m[38;5;12m [39m[38;5;12mresearch[39m[38;5;12m [39m[38;5;12min[39m[38;5;12m [39m[38;5;12mstream[39m[38;5;12m [39m[38;5;12mprocessing[39m[38;5;12m [39m[38;5;12mis[39m[38;5;12m [39m[38;5;12mideal[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12mapplication[39m[38;5;12m [39m[38;5;12mdesigners,[39m[38;5;12m [39m[38;5;12msystem[39m[38;5;12m [39m[38;5;12mbuilders,[39m[38;5;12m [39m[38;5;12manalytic[39m[38;5;12m [39m[38;5;12mdevelopers,[39m[38;5;12m [39m[38;5;12mas[39m[38;5;12m [39m[38;5;12mwell[39m[38;5;12m [39m[38;5;12mas[39m[38;5;12m [39m[38;5;12mstudents[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mresearchers[39m[38;5;12m [39m[38;5;12min[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12mfield.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mStream Data Processing: A Quality of Service Perspective[0m[38;5;12m (http://www.springer.com/us/book/9780387710020) - Presents a new paradigm suitable for stream and complex event processing.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mUnified Log Processing[0m[38;5;12m (https://www.manning.com/books/event-streams-in-action) - Unified Log Processing is a practical guide to implementing a unified log of event streams (Kafka or Kinesis) in your business[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mKafka[0m[38;5;14m[1m [0m[38;5;14m[1mStreams[0m[38;5;14m[1m [0m[38;5;14m[1min[0m[38;5;14m[1m [0m[38;5;14m[1mAction[0m[38;5;12m [39m[38;5;12m(https://www.manning.com/books/kafka-streams-in-action)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mKafka[39m[38;5;12m [39m[38;5;12mStreams[39m[38;5;12m [39m[38;5;12min[39m[38;5;12m [39m[38;5;12mAction[39m[38;5;12m [39m[38;5;12mteaches[39m[38;5;12m [39m[38;5;12myou[39m[38;5;12m [39m[38;5;12meverything[39m[38;5;12m [39m[38;5;12myou[39m[38;5;12m [39m[38;5;12mneed[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12mknow[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12mimplement[39m[38;5;12m [39m[38;5;12mstream[39m[38;5;12m [39m[38;5;12mprocessing[39m[38;5;12m [39m[38;5;12mon[39m[38;5;12m [39m[38;5;12mdata[39m[38;5;12m [39m[38;5;12mflowing[39m[38;5;12m [39m[38;5;12minto[39m[38;5;12m [39m[38;5;12myour[39m[38;5;12m [39m[38;5;12mKafka[39m[38;5;12m [39m[38;5;12mplatform,[39m[38;5;12m [39m[38;5;12mallowing[39m[38;5;12m [39m[38;5;12myou[39m[38;5;12m [39m[38;5;12mto[39m
|
||
[38;5;12mfocus[39m[38;5;12m [39m[38;5;12mon[39m[38;5;12m [39m[38;5;12mgetting[39m[38;5;12m [39m[38;5;12mmore[39m[38;5;12m [39m[38;5;12mfrom[39m[38;5;12m [39m[38;5;12myour[39m[38;5;12m [39m[38;5;12mdata[39m[38;5;12m [39m[38;5;12mwithout[39m[38;5;12m [39m[38;5;12msacrificing[39m[38;5;12m [39m[38;5;12mtime[39m[38;5;12m [39m[38;5;12mor[39m[38;5;12m [39m[38;5;12meffort.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mBig[0m[38;5;14m[1m [0m[38;5;14m[1mData[0m[38;5;12m [39m[38;5;12m(https://www.manning.com/books/big-data)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mBig[39m[38;5;12m [39m[38;5;12mData[39m[38;5;12m [39m[38;5;12mteaches[39m[38;5;12m [39m[38;5;12myou[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12mbuild[39m[38;5;12m [39m[38;5;12mbig[39m[38;5;12m [39m[38;5;12mdata[39m[38;5;12m [39m[38;5;12msystems[39m[38;5;12m [39m[38;5;12musing[39m[38;5;12m [39m[38;5;12man[39m[38;5;12m [39m[38;5;12marchitecture[39m[38;5;12m [39m[38;5;12mthat[39m[38;5;12m [39m[38;5;12mtakes[39m[38;5;12m [39m[38;5;12madvantage[39m[38;5;12m [39m[38;5;12mof[39m[38;5;12m [39m[38;5;12mclustered[39m[38;5;12m [39m[38;5;12mhardware[39m[38;5;12m [39m[38;5;12malong[39m[38;5;12m [39m[38;5;12mwith[39m[38;5;12m [39m[38;5;12mnew[39m[38;5;12m [39m[38;5;12mtools[39m[38;5;12m [39m[38;5;12mdesigned[39m[38;5;12m [39m[38;5;12mspecifically[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12mcapture[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12manalyze[39m[38;5;12m [39m
|
||
[38;5;12mweb-scale[39m[38;5;12m [39m[38;5;12mdata.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSpark[0m[38;5;14m[1m [0m[38;5;14m[1min[0m[38;5;14m[1m [0m[38;5;14m[1mAction[0m[38;5;12m [39m[38;5;12m(https://www.manning.com/books/spark-in-action)[39m[38;5;12m [39m[38;5;12m&[39m[38;5;12m [39m[38;5;14m[1mSpark[0m[38;5;14m[1m [0m[38;5;14m[1min[0m[38;5;14m[1m [0m[38;5;14m[1mAction[0m[38;5;14m[1m [0m[38;5;14m[1m2nd[0m[38;5;14m[1m [0m[38;5;14m[1mEd.[0m[38;5;12m [39m[38;5;12m(https://www.manning.com/books/spark-in-action-second-edition)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mSpark[39m[38;5;12m [39m[38;5;12min[39m[38;5;12m [39m[38;5;12mAction[39m[38;5;12m [39m[38;5;12mteaches[39m[38;5;12m [39m[38;5;12myou[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12mtheory[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mskills[39m[38;5;12m [39m[38;5;12myou[39m[38;5;12m [39m[38;5;12mneed[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12meffectively[39m[38;5;12m [39m
|
||
[38;5;12mhandle[39m[38;5;12m [39m[38;5;12mbatch[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mstreaming[39m[38;5;12m [39m[38;5;12mdata[39m[38;5;12m [39m[38;5;12musing[39m[38;5;12m [39m[38;5;12mSpark.[39m[38;5;12m [39m[38;5;12mFully[39m[38;5;12m [39m[38;5;12mupdated[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12mSpark[39m[38;5;12m [39m[38;5;12m2.0.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mKafka in Action[0m[38;5;12m (https://www.manning.com/books/kafka-in-action) - Kafka in Action is a fast-paced introduction to every aspect of working with Kafka you need to really reap its benefits.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mFusion in Action[0m[38;5;12m (https://www.manning.com/books/fusion-in-action) - Fusion in Action teaches you to build a full-featured data analytics pipeline, including document and data search and distributed data clustering.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mReactive[0m[38;5;14m[1m [0m[38;5;14m[1mData[0m[38;5;14m[1m [0m[38;5;14m[1mHandling[0m[38;5;12m [39m[38;5;12m(https://www.manning.com/books/reactive-data-handling)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mReactive[39m[38;5;12m [39m[38;5;12mData[39m[38;5;12m [39m[38;5;12mHandling[39m[38;5;12m [39m[38;5;12mis[39m[38;5;12m [39m[38;5;12ma[39m[38;5;12m [39m[38;5;12mcollection[39m[38;5;12m [39m[38;5;12mof[39m[38;5;12m [39m[38;5;12mfive[39m[38;5;12m [39m[38;5;12mhand-picked[39m[38;5;12m [39m[38;5;12mchapters,[39m[38;5;12m [39m[38;5;12mselected[39m[38;5;12m [39m[38;5;12mby[39m[38;5;12m [39m[38;5;12mManuel[39m[38;5;12m [39m[38;5;12mBernhardt,[39m[38;5;12m [39m[38;5;12mthat[39m[38;5;12m [39m[38;5;12mintroduce[39m[38;5;12m [39m[38;5;12myou[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12mbuilding[39m[38;5;12m [39m[38;5;12mreactive[39m[38;5;12m [39m[38;5;12mapplications[39m[38;5;12m [39m
|
||
[38;5;12mcapable[39m[38;5;12m [39m[38;5;12mof[39m[38;5;12m [39m[38;5;12mhandling[39m[38;5;12m [39m[38;5;12mreal-time[39m[38;5;12m [39m[38;5;12mprocessing[39m[38;5;12m [39m[38;5;12mwith[39m[38;5;12m [39m[38;5;12mlarge[39m[38;5;12m [39m[38;5;12mdata[39m[38;5;12m [39m[38;5;12mloads--free[39m[38;5;12m [39m[38;5;12meBook![39m[38;5;12m [39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mAzure Data Engineering[0m[38;5;12m (https://www.manning.com/books/azure-data-engineering) - A book about data engineering in general and the Azure platform specifically [39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mGrokking[0m[38;5;14m[1m [0m[38;5;14m[1mStreaming[0m[38;5;14m[1m [0m[38;5;14m[1mSystems[0m[38;5;12m [39m[38;5;12m(https://www.manning.com/books/grokking-streaming-systems)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mGrokking[39m[38;5;12m [39m[38;5;12mStreaming[39m[38;5;12m [39m[38;5;12mSystems[39m[38;5;12m [39m[38;5;12mhelps[39m[38;5;12m [39m[38;5;12myou[39m[38;5;12m [39m[38;5;12munravel[39m[38;5;12m [39m[38;5;12mwhat[39m[38;5;12m [39m[38;5;12mstreaming[39m[38;5;12m [39m[38;5;12msystems[39m[38;5;12m [39m[38;5;12mare,[39m[38;5;12m [39m[38;5;12mhow[39m[38;5;12m [39m[38;5;12mthey[39m[38;5;12m [39m[38;5;12mwork,[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mwhether[39m[38;5;12m [39m[38;5;12mthey’re[39m[38;5;12m [39m[38;5;12mright[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12myour[39m[38;5;12m [39m[38;5;12mbusiness.[39m[38;5;12m [39m[38;5;12mWritten[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12mbe[39m
|
||
[38;5;12mtool-agnostic,[39m[38;5;12m [39m[38;5;12myou’ll[39m[38;5;12m [39m[38;5;12mbe[39m[38;5;12m [39m[38;5;12mable[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12mapply[39m[38;5;12m [39m[38;5;12mwhat[39m[38;5;12m [39m[38;5;12myou[39m[38;5;12m [39m[38;5;12mlearn[39m[38;5;12m [39m[38;5;12mno[39m[38;5;12m [39m[38;5;12mmatter[39m[38;5;12m [39m[38;5;12mwhich[39m[38;5;12m [39m[38;5;12mframework[39m[38;5;12m [39m[38;5;12myou[39m[38;5;12m [39m[38;5;12mchoose.[39m
|
||
|
||
[38;2;255;187;0m[4mDistributed systems[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDistributed Systems for fun and profit[0m[38;5;12m (http://book.mixu.net/distsys/) – Theory of distributed systems. Include parts about time and ordering, replication and impossibility results.[39m
|
||
|
||
[38;2;255;187;0m[4mGraph Based approach[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mGraph-Powered Machine Learning[0m[38;5;12m (https://www.manning.com/books/graph-powered-machine-learning) - Alessandro Negro. Combine graph theory and models to improve machine learning projects[39m
|
||
|
||
[38;2;255;187;0m[4mData Visualization[0m
|
||
[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mThe beauty of data visualization[0m[38;5;12m (https://www.youtube.com/watch?v=5Zg-C8AAIGg)[39m
|
||
[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDesigning Data Visualizations with Noah Iliinsky[0m[38;5;12m (https://www.youtube.com/watch?v=R-oiKt7bUU8)[39m
|
||
[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mHans Rosling's 200 Countries, 200 Years, 4 Minutes[0m[38;5;12m (https://www.youtube.com/watch?v=jbkSRLYSojo)[39m
|
||
[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mIce Bucket Challenge Data Visualization[0m[38;5;12m (https://www.youtube.com/watch?v=qTEchen97rQ)[39m
|
||
|
||
|
||
[38;5;12m [39m[38;2;255;187;0m[1m[4mOther Awesome Lists[0m
|
||
[38;5;12m- Other awesome lists [39m[38;5;14m[1mawesome-awesomeness[0m[38;5;12m (https://github.com/bayandin/awesome-awesomeness).[39m
|
||
[38;5;12m- Even more lists [39m[38;5;14m[1mawesome[0m[38;5;12m (https://github.com/sindresorhus/awesome).[39m
|
||
[38;5;12m- Another list? [39m[38;5;14m[1mlist[0m[38;5;12m (https://github.com/jnv/lists).[39m
|
||
[38;5;12m- WTF! [39m[38;5;14m[1mawesome-awesome-awesome[0m[38;5;12m (https://github.com/t3chnoboy/awesome-awesome-awesome).[39m
|
||
[38;5;12m- Analytics [39m[38;5;14m[1mawesome-analytics[0m[38;5;12m (https://github.com/onurakpolat/awesome-analytics).[39m
|
||
[38;5;12m- Public Datasets [39m[38;5;14m[1mawesome-public-datasets[0m[38;5;12m (https://github.com/awesomedata/awesome-public-datasets).[39m
|
||
[38;5;12m- Graph Classification [39m[38;5;14m[1mawesome-graph-classification[0m[38;5;12m (https://github.com/benedekrozemberczki/awesome-graph-classification).[39m
|
||
[38;5;12m- Network Embedding [39m[38;5;14m[1mawesome-network-embedding[0m[38;5;12m (https://github.com/chihming/awesome-network-embedding).[39m
|
||
[38;5;12m- Community Detection [39m[38;5;14m[1mawesome-community-detection[0m[38;5;12m (https://github.com/benedekrozemberczki/awesome-community-detection).[39m
|
||
[38;5;12m- Decision Tree Papers [39m[38;5;14m[1mawesome-decision-tree-papers[0m[38;5;12m (https://github.com/benedekrozemberczki/awesome-decision-tree-papers).[39m
|
||
[38;5;12m- Fraud Detection Papers [39m[38;5;14m[1mawesome-fraud-detection-papers[0m[38;5;12m (https://github.com/benedekrozemberczki/awesome-fraud-detection-papers).[39m
|
||
[38;5;12m- Gradient Boosting Papers [39m[38;5;14m[1mawesome-gradient-boosting-papers[0m[38;5;12m (https://github.com/benedekrozemberczki/awesome-gradient-boosting-papers).[39m
|
||
[38;5;12m- Monte Carlo Tree Search Papers [39m[38;5;14m[1mawesome-monte-carlo-tree-search-papers[0m[38;5;12m (https://github.com/benedekrozemberczki/awesome-monte-carlo-tree-search-papers).[39m
|
||
[38;5;12m- Kafka [39m[38;5;14m[1mawesome-kafka[0m[38;5;12m (https://github.com/monksy/awesome-kafka).[39m
|
||
[38;5;12m- [39m[38;5;14m[1mGoogle Bigtable[0m[38;5;12m (https://github.com/zrosenbauer/awesome-bigtable).[39m
|