660 lines
112 KiB
Plaintext
660 lines
112 KiB
Plaintext
[38;5;12m [39m[38;2;255;187;0m[1m[4mawesome-nlp[0m
|
||
|
||
[38;5;14m[1m![0m[38;5;12mAwesome[39m[38;5;14m[1m (https://cdn.rawgit.com/sindresorhus/awesome/d7305f38d29fed78fa85652e3a63e154dd8e8829/media/badge.svg)[0m[38;5;12m (https://github.com/sindresorhus/awesome)[39m
|
||
|
||
[38;5;12mA curated list of resources dedicated to Natural Language Processing[39m
|
||
|
||
[38;5;12m![39m[38;5;14m[1mAwesome NLP Logo[0m[38;5;12m (/images/logo.jpg)[39m
|
||
|
||
[38;5;12mRead this in [39m[38;5;14m[1mEnglish[0m[38;5;12m (./README.md), [39m[38;5;14m[1mTraditional Chinese[0m[38;5;12m (./README-ZH-TW.md)[39m
|
||
|
||
[38;5;12m_Please read the [39m[38;5;14m[1mcontribution guidelines[0m[38;5;12m (contributing.md) before contributing. Please add your favourite NLP resource by raising a [39m[38;5;14m[1mpull request[0m[38;5;12m (https://github.com/keonkim/awesome-nlp/pulls)_[39m
|
||
|
||
[38;2;255;187;0m[4mContents[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mResearch Summaries and Trends[0m[38;5;12m (#research-summaries-and-trends)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mProminent NLP Research Labs[0m[38;5;12m (#prominent-nlp-research-labs)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mTutorials[0m[38;5;12m (#tutorials)[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mReading Content[0m[38;5;12m (#reading-content)[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mVideos and Courses[0m[38;5;12m (#videos-and-online-courses)[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mBooks[0m[38;5;12m (#books)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mLibraries[0m[38;5;12m (#libraries)[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNode.js[0m[38;5;12m (#node-js)[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mPython[0m[38;5;12m (#python)[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mC++[0m[38;5;12m (#c++)[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mJava[0m[38;5;12m (#java)[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mKotlin[0m[38;5;12m (#kotlin)[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mScala[0m[38;5;12m (#scala)[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mR[0m[38;5;12m (#R)[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mClojure[0m[38;5;12m (#clojure)[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mRuby[0m[38;5;12m (#ruby)[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mRust[0m[38;5;12m (#rust)[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNLP++[0m[38;5;12m (#NLP++)[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mJulia[0m[38;5;12m (#julia)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mServices[0m[38;5;12m (#services)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mAnnotation Tools[0m[38;5;12m (#annotation-tools)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDatasets[0m[38;5;12m (#datasets)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNLP in Korean[0m[38;5;12m (#nlp-in-korean)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNLP in Arabic[0m[38;5;12m (#nlp-in-arabic)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNLP in Chinese[0m[38;5;12m (#nlp-in-chinese)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNLP in German[0m[38;5;12m (#nlp-in-german)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNLP in Polish[0m[38;5;12m (#nlp-in-polish)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNLP in Spanish[0m[38;5;12m (#nlp-in-spanish)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNLP in Indic Languages[0m[38;5;12m (#nlp-in-indic-languages)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNLP in Thai[0m[38;5;12m (#nlp-in-thai)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNLP in Danish[0m[38;5;12m (#nlp-in-danish)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNLP in Vietnamese[0m[38;5;12m (#nlp-in-vietnamese)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNLP for Dutch[0m[38;5;12m (#nlp-for-dutch)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNLP in Indonesian[0m[38;5;12m (#nlp-in-indonesian)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNLP in Urdu[0m[38;5;12m (#nlp-in-urdu)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNLP in Persian[0m[38;5;12m (#nlp-in-persian)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNLP in Ukrainian[0m[38;5;12m (#nlp-in-ukrainian)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNLP in Hungarian[0m[38;5;12m (#nlp-in-hungarian)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNLP in Portuguese[0m[38;5;12m (#nlp-in-portuguese)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mOther Languages[0m[38;5;12m (#other-languages)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mCredits[0m[38;5;12m (#credits)[39m
|
||
|
||
[38;2;255;187;0m[4mResearch Summaries and Trends[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNLP-Overview[0m[38;5;12m (https://nlpoverview.com/) is an up-to-date overview of deep learning techniques applied to NLP, including theory, implementations, applications, and state-of-the-art results. This is a great Deep NLP Introduction for researchers.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNLP-Progress[0m[38;5;12m (https://nlpprogress.com/) tracks the progress in Natural Language Processing, including the datasets and the current state-of-the-art for the most common NLP tasks[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNLP's ImageNet moment has arrived[0m[38;5;12m (https://thegradient.pub/nlp-imagenet/)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mACL 2018 Highlights: Understanding Representation and Evaluation in More Challenging Settings[0m[38;5;12m (http://ruder.io/acl-2018-highlights/)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mFour deep learning trends from ACL 2017. Part One: Linguistic Structure and Word Embeddings[0m[38;5;12m (https://www.abigailsee.com/2017/08/30/four-deep-learning-trends-from-acl-2017-part-1.html)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mFour deep learning trends from ACL 2017. Part Two: Interpretability and Attention[0m[38;5;12m (https://www.abigailsee.com/2017/08/30/four-deep-learning-trends-from-acl-2017-part-2.html)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mHighlights of EMNLP 2017: Exciting Datasets, Return of the Clusters, and More![0m[38;5;12m (http://blog.aylien.com/highlights-emnlp-2017-exciting-datasets-return-clusters/)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDeep Learning for Natural Language Processing (NLP): Advancements & Trends[0m
|
||
[38;5;12m (https://tryolabs.com/blog/2017/12/12/deep-learning-for-nlp-advancements-and-trends-in-2017/?utm_campaign=Revue%20newsletter&utm_medium=Newsletter&utm_source=The%20Wild%20Week%20in%20AI)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSurvey of the State of the Art in Natural Language Generation[0m[38;5;12m (https://arxiv.org/abs/1703.09902)[39m
|
||
|
||
[38;2;255;187;0m[4mProminent NLP Research Labs[0m
|
||
[38;5;14m[1mBack to Top[0m[38;5;12m (#contents)[39m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mThe[0m[38;5;14m[1m [0m[38;5;14m[1mBerkeley[0m[38;5;14m[1m [0m[38;5;14m[1mNLP[0m[38;5;14m[1m [0m[38;5;14m[1mGroup[0m[38;5;12m [39m[38;5;12m(http://nlp.cs.berkeley.edu/index.shtml)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mNotable[39m[38;5;12m [39m[38;5;12mcontributions[39m[38;5;12m [39m[38;5;12minclude[39m[38;5;12m [39m[38;5;12ma[39m[38;5;12m [39m[38;5;12mtool[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12mreconstruct[39m[38;5;12m [39m[38;5;12mlong[39m[38;5;12m [39m[38;5;12mdead[39m[38;5;12m [39m[38;5;12mlanguages,[39m[38;5;12m [39m[38;5;12mreferenced[39m[38;5;12m [39m[38;5;14m[1mhere[0m[38;5;12m [39m[38;5;12m(https://www.bbc.com/news/science-environment-21427896)[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mby[39m[38;5;12m [39m[38;5;12mtaking[39m[38;5;12m [39m[38;5;12mcorpora[39m[38;5;12m [39m[38;5;12mfrom[39m[38;5;12m [39m[38;5;12m637[39m[38;5;12m [39m
|
||
[38;5;12mlanguages[39m[38;5;12m [39m[38;5;12mcurrently[39m[38;5;12m [39m[38;5;12mspoken[39m[38;5;12m [39m[38;5;12min[39m[38;5;12m [39m[38;5;12mAsia[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12mPacific[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mrecreating[39m[38;5;12m [39m[38;5;12mtheir[39m[38;5;12m [39m[38;5;12mdescendant.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mLanguage[0m[38;5;14m[1m [0m[38;5;14m[1mTechnologies[0m[38;5;14m[1m [0m[38;5;14m[1mInstitute,[0m[38;5;14m[1m [0m[38;5;14m[1mCarnegie[0m[38;5;14m[1m [0m[38;5;14m[1mMellon[0m[38;5;14m[1m [0m[38;5;14m[1mUniversity[0m[38;5;12m [39m[38;5;12m(http://www.cs.cmu.edu/~nasmith/nlp-cl.html)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mNotable[39m[38;5;12m [39m[38;5;12mprojects[39m[38;5;12m [39m[38;5;12minclude[39m[38;5;12m [39m[38;5;14m[1mAvenue[0m[38;5;14m[1m [0m[38;5;14m[1mProject[0m[38;5;12m [39m[38;5;12m(http://www.cs.cmu.edu/~avenue/),[39m[38;5;12m [39m[38;5;12ma[39m[38;5;12m [39m[38;5;12msyntax[39m[38;5;12m [39m[38;5;12mdriven[39m[38;5;12m [39m[38;5;12mmachine[39m[38;5;12m [39m[38;5;12mtranslation[39m[38;5;12m [39m[38;5;12msystem[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12mendangered[39m[38;5;12m [39m
|
||
[38;5;12mlanguages[39m[38;5;12m [39m[38;5;12mlike[39m[38;5;12m [39m[38;5;12mQuechua[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mAymara[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mpreviously,[39m[38;5;12m [39m[38;5;14m[1mNoah's[0m[38;5;14m[1m [0m[38;5;14m[1mArk[0m[38;5;12m [39m[38;5;12m(http://www.cs.cmu.edu/~ark/)[39m[38;5;12m [39m[38;5;12mwhich[39m[38;5;12m [39m[38;5;12mcreated[39m[38;5;12m [39m[38;5;14m[1mAQMAR[0m[38;5;12m [39m[38;5;12m(http://www.cs.cmu.edu/~ark/AQMAR/)[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12mimprove[39m[38;5;12m [39m[38;5;12mNLP[39m[38;5;12m [39m[38;5;12mtools[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12mArabic.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNLP research group, Columbia University[0m[38;5;12m (http://www1.cs.columbia.edu/nlp/index.cgi) - Responsible for creating BOLT ( interactive error handling for speech translation systems) and an un-named project to characterize laughter in dialogue.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mThe[0m[38;5;14m[1m [0m[38;5;14m[1mCenter[0m[38;5;14m[1m [0m[38;5;14m[1mor[0m[38;5;14m[1m [0m[38;5;14m[1mLanguage[0m[38;5;14m[1m [0m[38;5;14m[1mand[0m[38;5;14m[1m [0m[38;5;14m[1mSpeech[0m[38;5;14m[1m [0m[38;5;14m[1mProcessing,[0m[38;5;14m[1m [0m[38;5;14m[1mJohn[0m[38;5;14m[1m [0m[38;5;14m[1mHopkins[0m[38;5;14m[1m [0m[38;5;14m[1mUniversity[0m[38;5;12m [39m[38;5;12m(http://clsp.jhu.edu/)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mRecently[39m[38;5;12m [39m[38;5;12min[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12mnews[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12mdeveloping[39m[38;5;12m [39m[38;5;12mspeech[39m[38;5;12m [39m[38;5;12mrecognition[39m[38;5;12m [39m[38;5;12msoftware[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12mcreate[39m[38;5;12m [39m[38;5;12ma[39m[38;5;12m [39m[38;5;12mdiagnostic[39m[38;5;12m [39m[38;5;12mtest[39m[38;5;12m [39m[38;5;12mor[39m[38;5;12m [39m[38;5;12mParkinson's[39m[38;5;12m [39m[38;5;12mDisease,[39m[38;5;12m [39m[38;5;14m[1mhere[0m[38;5;12m [39m
|
||
[38;5;12m(https://www.clsp.jhu.edu/2019/03/27/speech-recognition-software-and-machine-learning-tools-are-being-used-to-create-diagnostic-test-for-parkinsons-disease/#.XNFqrIkzYdU).[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mComputational[0m[38;5;14m[1m [0m[38;5;14m[1mLinguistics[0m[38;5;14m[1m [0m[38;5;14m[1mand[0m[38;5;14m[1m [0m[38;5;14m[1mInformation[0m[38;5;14m[1m [0m[38;5;14m[1mProcessing[0m[38;5;14m[1m [0m[38;5;14m[1mGroup,[0m[38;5;14m[1m [0m[38;5;14m[1mUniversity[0m[38;5;14m[1m [0m[38;5;14m[1mof[0m[38;5;14m[1m [0m[38;5;14m[1mMaryland[0m[38;5;12m [39m[38;5;12m(https://wiki.umiacs.umd.edu/clip/index.php/Main_Page)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mNotable[39m[38;5;12m [39m[38;5;12mcontributions[39m[38;5;12m [39m[38;5;12minclude[39m[38;5;12m [39m[38;5;14m[1mHuman-Computer[0m[38;5;14m[1m [0m[38;5;14m[1mCooperation[0m[38;5;14m[1m [0m[38;5;14m[1mor[0m[38;5;14m[1m [0m[38;5;14m[1mWord-by-Word[0m[38;5;14m[1m [0m[38;5;14m[1mQuestion[0m[38;5;14m[1m [0m[38;5;14m[1mAnswering[0m[38;5;12m [39m
|
||
[38;5;12m(http://www.umiacs.umd.edu/~jbg/projects/IIS-1652666)[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mmodeling[39m[38;5;12m [39m[38;5;12mdevelopment[39m[38;5;12m [39m[38;5;12mof[39m[38;5;12m [39m[38;5;12mphonetic[39m[38;5;12m [39m[38;5;12mrepresentations.[39m[38;5;12m [39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mPenn Natural Language Processing, University of Pennsylvania[0m[38;5;12m (https://nlp.cis.upenn.edu/)- Famous for creating the [39m[38;5;14m[1mPenn Treebank[0m[38;5;12m (https://www.seas.upenn.edu/~pdtb/).[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mThe[0m[38;5;14m[1m [0m[38;5;14m[1mStanford[0m[38;5;14m[1m [0m[38;5;14m[1mNautral[0m[38;5;14m[1m [0m[38;5;14m[1mLanguage[0m[38;5;14m[1m [0m[38;5;14m[1mProcessing[0m[38;5;14m[1m [0m[38;5;14m[1mGroup[0m[38;5;12m [39m[38;5;12m(https://nlp.stanford.edu/)-[39m[38;5;12m [39m[38;5;12mOne[39m[38;5;12m [39m[38;5;12mof[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12mtop[39m[38;5;12m [39m[38;5;12mNLP[39m[38;5;12m [39m[38;5;12mresearch[39m[38;5;12m [39m[38;5;12mlabs[39m[38;5;12m [39m[38;5;12min[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12mworld,[39m[38;5;12m [39m[38;5;12mnotable[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12mcreating[39m[38;5;12m [39m[38;5;14m[1mStanford[0m[38;5;14m[1m [0m[38;5;14m[1mCoreNLP[0m[38;5;12m [39m[38;5;12m(https://nlp.stanford.edu/software/corenlp.shtml)[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mtheir[39m[38;5;12m [39m[38;5;14m[1mcoreference[0m[38;5;14m[1m [0m[38;5;14m[1mresolution[0m[38;5;14m[1m [0m
|
||
[38;5;14m[1msystem[0m[38;5;12m [39m[38;5;12m(https://nlp.stanford.edu/software/dcoref.shtml)[39m
|
||
|
||
|
||
[38;2;255;187;0m[4mTutorials[0m
|
||
[38;5;14m[1mBack to Top[0m[38;5;12m (#contents)[39m
|
||
|
||
[38;2;255;187;0m[4mReading Content[0m
|
||
|
||
[38;5;12mGeneral Machine Learning[39m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mMachine Learning 101[0m[38;5;12m (https://docs.google.com/presentation/d/1kSuQyW5DTnkVaZEjGYCkfOxvzCqGEFzWBy4e9Uedd9k/edit?usp=sharing) from Google's Senior Creative Engineer explains Machine Learning for engineer's and executives alike[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mAI Playbook[0m[38;5;12m (https://aiplaybook.a16z.com/) - a16z AI playbook is a great link to forward to your managers or content for your presentations[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mRuder's Blog[0m[38;5;12m (http://ruder.io/#open) by [39m[38;5;14m[1mSebastian Ruder[0m[38;5;12m (https://twitter.com/seb_ruder) for commentary on the best of NLP Research[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mHow To Label Data[0m[38;5;12m (https://www.lighttag.io/how-to-label-data/) guide to managing larger linguistic annotation projects[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDepends on the Definition[0m[38;5;12m (https://www.depends-on-the-definition.com/) collection of blog posts covering a wide array of NLP topics with detailed implementation[39m
|
||
|
||
[38;5;12mIntroductions and Guides to NLP[39m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mUnderstand & Implement Natural Language Processing[0m[38;5;12m (https://www.analyticsvidhya.com/blog/2017/01/ultimate-guide-to-understand-implement-natural-language-processing-codes-in-python/)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNLP in Python[0m[38;5;12m (http://github.com/NirantK/nlp-python-deep-learning) - Collection of Github notebooks[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNatural Language Processing: An Introduction[0m[38;5;12m (https://academic.oup.com/jamia/article/18/5/544/829676) - Oxford[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDeep Learning for NLP with Pytorch[0m[38;5;12m (https://pytorch.org/tutorials/beginner/deep_learning_nlp_tutorial.html)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mHands-On NLTK Tutorial[0m[38;5;12m (https://github.com/hb20007/hands-on-nltk-tutorial) - NLTK Tutorials, Jupyter notebooks[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNatural Language Processing with Python – Analyzing Text with the Natural Language Toolkit[0m[38;5;12m (https://www.nltk.org/book/) - An online and print book introducing NLP concepts using NLTK. The book's authors also wrote the NLTK library.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mTrain a new language model from scratch[0m[38;5;12m (https://huggingface.co/blog/how-to-train) - Hugging Face 🤗[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mThe Super Duper NLP Repo (SDNLPR)[0m[38;5;12m (https://notebooks.quantumstat.com/): Collection of Colab notebooks covering a wide array of NLP task implementations.[39m
|
||
|
||
[38;5;12mBlogs and Newsletters[39m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDeep Learning, NLP, and Representations[0m[38;5;12m (https://colah.github.io/posts/2014-07-NLP-RNNs-Representations/)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mThe Illustrated BERT, ELMo, and co. (How NLP Cracked Transfer Learning)[0m[38;5;12m (https://jalammar.github.io/illustrated-bert/) and [39m[38;5;14m[1mThe Illustrated Transformer[0m[38;5;12m (https://jalammar.github.io/illustrated-transformer/)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNatural Language Processing[0m[38;5;12m (https://nlpers.blogspot.com/) by Hal Daumé III[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1marXiv: Natural Language Processing (Almost) from Scratch[0m[38;5;12m (https://arxiv.org/pdf/1103.0398.pdf)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mKarpathy's The Unreasonable Effectiveness of Recurrent Neural Networks[0m[38;5;12m (https://karpathy.github.io/2015/05/21/rnn-effectiveness)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mMachine Learning Mastery: Deep Learning for Natural Language Processing[0m[38;5;12m (https://machinelearningmastery.com/category/natural-language-processing)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mVisual NLP Paper Summaries[0m[38;5;12m (https://amitness.com/categories/#nlp)[39m
|
||
|
||
[38;2;255;187;0m[4mVideos and Online Courses[0m
|
||
[38;5;14m[1mBack to Top[0m[38;5;12m (#contents)[39m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mAdvanced Natural Language Processing[0m[38;5;12m (https://people.cs.umass.edu/~miyyer/cs685_f20/) - CS 685, UMass Amherst CS[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDeep Natural Language Processing[0m[38;5;12m (https://github.com/oxford-cs-deepnlp-2017/lectures) - Lectures series from Oxford[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDeep Learning for Natural Language Processing (cs224-n)[0m[38;5;12m (https://web.stanford.edu/class/cs224n/) - Richard Socher and Christopher Manning's Stanford Course[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNeural Networks for NLP[0m[38;5;12m (http://phontron.com/class/nn4nlp2017/) - Carnegie Mellon Language Technology Institute there[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDeep NLP Course[0m[38;5;12m (https://github.com/yandexdataschool/nlp_course) by Yandex Data School, covering important ideas from text embedding to machine translation including sequence modeling, language models and so on.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mfast.ai[0m[38;5;14m[1m [0m[38;5;14m[1mCode-First[0m[38;5;14m[1m [0m[38;5;14m[1mIntro[0m[38;5;14m[1m [0m[38;5;14m[1mto[0m[38;5;14m[1m [0m[38;5;14m[1mNatural[0m[38;5;14m[1m [0m[38;5;14m[1mLanguage[0m[38;5;14m[1m [0m[38;5;14m[1mProcessing[0m[38;5;12m [39m[38;5;12m(https://www.fast.ai/2019/07/08/fastai-nlp/)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mThis[39m[38;5;12m [39m[38;5;12mcovers[39m[38;5;12m [39m[38;5;12ma[39m[38;5;12m [39m[38;5;12mblend[39m[38;5;12m [39m[38;5;12mof[39m[38;5;12m [39m[38;5;12mtraditional[39m[38;5;12m [39m[38;5;12mNLP[39m[38;5;12m [39m[38;5;12mtopics[39m[38;5;12m [39m[38;5;12m(including[39m[38;5;12m [39m[38;5;12mregex,[39m[38;5;12m [39m[38;5;12mSVD,[39m[38;5;12m [39m[38;5;12mnaive[39m[38;5;12m [39m[38;5;12mbayes,[39m[38;5;12m [39m[38;5;12mtokenization)[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mrecent[39m[38;5;12m [39m[38;5;12mneural[39m[38;5;12m [39m[38;5;12mnetwork[39m[38;5;12m [39m[38;5;12mapproaches[39m[38;5;12m [39m
|
||
[38;5;12m(including[39m[38;5;12m [39m[38;5;12mRNNs,[39m[38;5;12m [39m[38;5;12mseq2seq,[39m[38;5;12m [39m[38;5;12mGRUs,[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12mTransformer),[39m[38;5;12m [39m[38;5;12mas[39m[38;5;12m [39m[38;5;12mwell[39m[38;5;12m [39m[38;5;12mas[39m[38;5;12m [39m[38;5;12maddressing[39m[38;5;12m [39m[38;5;12murgent[39m[38;5;12m [39m[38;5;12methical[39m[38;5;12m [39m[38;5;12missues,[39m[38;5;12m [39m[38;5;12msuch[39m[38;5;12m [39m[38;5;12mas[39m[38;5;12m [39m[38;5;12mbias[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mdisinformation.[39m[38;5;12m [39m[38;5;12mFind[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12mJupyter[39m[38;5;12m [39m[38;5;12mNotebooks[39m[38;5;12m [39m[38;5;14m[1mhere[0m[38;5;12m [39m[38;5;12m(https://github.com/fastai/course-nlp)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mMachine Learning University - Accelerated Natural Language Processing[0m
|
||
[38;5;12m (https://www.youtube.com/playlist?list=PL8P_Z6C4GcuWfAq8Pt6PBYlck4OprHXsw) - Lectures go from introduction to NLP and text processing to Recurrent Neural Networks and Transformers.[39m
|
||
[38;5;12mMaterial can be found [39m[38;5;14m[1mhere[0m[38;5;12m (https://github.com/aws-samples/aws-machine-learning-university-accelerated-nlp).[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApplied[0m[38;5;14m[1m [0m[38;5;14m[1mNatural[0m[38;5;14m[1m [0m[38;5;14m[1mLanguage[0m[38;5;14m[1m [0m[38;5;14m[1mProcessing[0m[38;5;12m [39m[38;5;12m(https://www.youtube.com/playlist?list=PLH-xYrxjfO2WyR3pOAB006CYMhNt4wTqp)-[39m[38;5;12m [39m[38;5;12mLecture[39m[38;5;12m [39m[38;5;12mseries[39m[38;5;12m [39m[38;5;12mfrom[39m[38;5;12m [39m[38;5;12mIIT[39m[38;5;12m [39m[38;5;12mMadras[39m[38;5;12m [39m[38;5;12mtaking[39m[38;5;12m [39m[38;5;12mfrom[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12mbasics[39m[38;5;12m [39m[38;5;12mall[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12mway[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12mautoencoders[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12meverything.[39m[38;5;12m [39m[38;5;12mThe[39m[38;5;12m [39m[38;5;12mgithub[39m[38;5;12m [39m[38;5;12mnotebooks[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12mthis[39m[38;5;12m [39m
|
||
[38;5;12mcourse[39m[38;5;12m [39m[38;5;12mare[39m[38;5;12m [39m[38;5;12malso[39m[38;5;12m [39m[38;5;12mavailable[39m[38;5;12m [39m[38;5;14m[1mhere[0m[38;5;12m [39m[38;5;12m(https://github.com/Ramaseshanr/anlp)[39m
|
||
|
||
|
||
[38;2;255;187;0m[4mBooks[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSpeech and Language Processing[0m[38;5;12m (https://web.stanford.edu/~jurafsky/slp3/) - free, by Prof. Dan Jurafsy[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNatural Language Processing[0m[38;5;12m (https://github.com/jacobeisenstein/gt-nlp-class) - free, NLP notes by Dr. Jacob Eisenstein at GeorgiaTech[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNLP with PyTorch[0m[38;5;12m (https://github.com/joosthub/PyTorchNLPBook) - Brian & Delip Rao[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mText Mining in R[0m[38;5;12m (https://www.tidytextmining.com)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNatural Language Processing with Python[0m[38;5;12m (https://www.nltk.org/book/)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mPractical Natural Language Processing[0m[38;5;12m (https://www.oreilly.com/library/view/practical-natural-language/9781492054047/)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNatural Language Processing with Spark NLP[0m[38;5;12m (https://www.oreilly.com/library/view/natural-language-processing/9781492047759/)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDeep Learning for Natural Language Processing[0m[38;5;12m (https://www.manning.com/books/deep-learning-for-natural-language-processing) by Stephan Raaijmakers[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mReal-World Natural Language Processing[0m[38;5;12m (https://www.manning.com/books/real-world-natural-language-processing) - by Masato Hagiwara[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNatural Language Processing in Action, Second Edition[0m[38;5;12m (https://www.manning.com/books/natural-language-processing-in-action-second-edition) - by Hobson Lane and Maria Dyshel[39m
|
||
[38;2;255;187;0m[4mLibraries[0m
|
||
|
||
[38;5;14m[1mBack to Top[0m[38;5;12m (#contents)[39m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;12m | [39m[38;5;14m[1mBack to Top[0m[38;5;12m (#contents)[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mTwitter-text[0m[38;5;12m (https://github.com/twitter/twitter-text) - A JavaScript implementation of Twitter's text processing library[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mKnwl.js[0m[38;5;12m (https://github.com/benhmoore/Knwl.js) - A Natural Language Processor in JS[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mRetext[0m[38;5;12m (https://github.com/retextjs/retext) - Extensible system for analyzing and manipulating natural language[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNLP Compromise[0m[38;5;12m (https://github.com/spencermountain/compromise) - Natural Language processing in the browser[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNatural[0m[38;5;12m (https://github.com/NaturalNode/natural) - general natural language facilities for node[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mPoplar[0m[38;5;12m (https://github.com/synyi/poplar) - A web-based annotation tool for natural language processing (NLP)[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNLP.js[0m[38;5;12m (https://github.com/axa-group/nlp.js) - An NLP library for building bots[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mnode-question-answering[0m[38;5;12m (https://github.com/huggingface/node-question-answering) - Fast and production-ready question answering w/ DistilBERT in Node.js[39m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;12m | [39m[38;5;14m[1mBack to Top[0m[38;5;12m (#contents)[39m
|
||
[38;5;12m - [39m[38;5;14m[1msentimental-onix[0m[38;5;12m (https://github.com/sloev/sentimental-onix) Sentiment models for spacy using onnx[39m
|
||
[38;5;12m - [39m[38;5;14m[1mTextAttack[0m[38;5;12m (https://github.com/QData/TextAttack) - Adversarial attacks, adversarial training, and data augmentation in NLP[39m
|
||
[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;14m[1mTextBlob[0m[38;5;12m [39m[38;5;12m(http://textblob.readthedocs.org/)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mProviding[39m[38;5;12m [39m[38;5;12ma[39m[38;5;12m [39m[38;5;12mconsistent[39m[38;5;12m [39m[38;5;12mAPI[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12mdiving[39m[38;5;12m [39m[38;5;12minto[39m[38;5;12m [39m[38;5;12mcommon[39m[38;5;12m [39m[38;5;12mnatural[39m[38;5;12m [39m[38;5;12mlanguage[39m[38;5;12m [39m[38;5;12mprocessing[39m[38;5;12m [39m[38;5;12m(NLP)[39m[38;5;12m [39m[38;5;12mtasks.[39m[38;5;12m [39m[38;5;12mStands[39m[38;5;12m [39m[38;5;12mon[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12mgiant[39m[38;5;12m [39m[38;5;12mshoulders[39m[38;5;12m [39m[38;5;12mof[39m[38;5;12m [39m[38;5;14m[1mNatural[0m[38;5;14m[1m [0m[38;5;14m[1mLanguage[0m[38;5;14m[1m [0m[38;5;14m[1mToolkit[0m[38;5;14m[1m [0m[38;5;14m[1m(NLTK)[0m[38;5;12m [39m[38;5;12m(https://www.nltk.org/)[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;14m[1mPattern[0m[38;5;12m [39m
|
||
[38;5;12m(https://github.com/clips/pattern),[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mplays[39m[38;5;12m [39m[38;5;12mnicely[39m[38;5;12m [39m[38;5;12mwith[39m[38;5;12m [39m[38;5;12mboth[39m[38;5;12m [39m[38;5;12m:+1:[39m
|
||
[38;5;12m - [39m[38;5;14m[1mspaCy[0m[38;5;12m (https://github.com/explosion/spaCy) - Industrial strength NLP with Python and Cython :+1:[39m
|
||
[38;5;12m - [39m[38;5;14m[1mSpeedster[0m[38;5;12m (https://github.com/nebuly-ai/nebullvm/tree/main/apps/accelerate/speedster) - Automatically apply SOTA optimization techniques to achieve the maximum inference speed-up on your hardware[39m
|
||
[48;5;235m[38;5;249m- **textacy** (https://github.com/chartbeat-labs/textacy) - Higher level NLP built on spaCy[49m[39m
|
||
[38;5;12m - [39m[38;5;14m[1mgensim[0m[38;5;12m (https://radimrehurek.com/gensim/index.html) - Python library to conduct unsupervised semantic modelling from plain text :+1:[39m
|
||
[38;5;12m - [39m[38;5;14m[1mscattertext[0m[38;5;12m (https://github.com/JasonKessler/scattertext) - Python library to produce d3 visualizations of how language differs between corpora[39m
|
||
[38;5;12m - [39m[38;5;14m[1mGluonNLP[0m[38;5;12m (https://github.com/dmlc/gluon-nlp) - A deep learning toolkit for NLP, built on MXNet/Gluon, for research prototyping and industrial deployment of state-of-the-art models on a wide range of NLP tasks.[39m
|
||
[38;5;12m - [39m[38;5;14m[1mAllenNLP[0m[38;5;12m (https://github.com/allenai/allennlp) - An NLP research library, built on PyTorch, for developing state-of-the-art deep learning models on a wide variety of linguistic tasks.[39m
|
||
[38;5;12m - [39m[38;5;14m[1mPyTorch-NLP[0m[38;5;12m (https://github.com/PetrochukM/PyTorch-NLP) - NLP research toolkit designed to support rapid prototyping with better data loaders, word vector loaders, neural network layer representations, common NLP metrics such as BLEU[39m
|
||
[38;5;12m - [39m[38;5;14m[1mRosetta[0m[38;5;12m (https://github.com/columbia-applied-data-science/rosetta) - Text processing tools and wrappers (e.g. Vowpal Wabbit)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mPyNLPl[0m[38;5;12m (https://github.com/proycon/pynlpl) - Python Natural Language Processing Library. General purpose NLP library for Python, handles some specific formats like ARPA language models, Moses phrasetables, GIZA++ alignments.[39m
|
||
[38;5;12m - [39m[38;5;14m[1mfoliapy[0m[38;5;12m (https://github.com/proycon/foliapy) - Python library for working with [39m[38;5;14m[1mFoLiA[0m[38;5;12m (https://proycon.github.io/folia/), an XML format for linguistic annotation.[39m
|
||
[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;14m[1mPySS3[0m[38;5;12m [39m[38;5;12m(https://github.com/sergioburdisso/pyss3)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mPython[39m[38;5;12m [39m[38;5;12mpackage[39m[38;5;12m [39m[38;5;12mthat[39m[38;5;12m [39m[38;5;12mimplements[39m[38;5;12m [39m[38;5;12ma[39m[38;5;12m [39m[38;5;12mnovel[39m[38;5;12m [39m[38;5;12mwhite-box[39m[38;5;12m [39m[38;5;12mmachine[39m[38;5;12m [39m[38;5;12mlearning[39m[38;5;12m [39m[38;5;12mmodel[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12mtext[39m[38;5;12m [39m[38;5;12mclassification,[39m[38;5;12m [39m[38;5;12mcalled[39m[38;5;12m [39m[38;5;12mSS3.[39m[38;5;12m [39m[38;5;12mSince[39m[38;5;12m [39m[38;5;12mSS3[39m[38;5;12m [39m[38;5;12mhas[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12mability[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12mvisually[39m[38;5;12m [39m[38;5;12mexplain[39m[38;5;12m [39m[38;5;12mits[39m[38;5;12m [39m[38;5;12mrationale,[39m[38;5;12m [39m[38;5;12mthis[39m[38;5;12m [39m[38;5;12mpackage[39m[38;5;12m [39m[38;5;12malso[39m[38;5;12m [39m
|
||
[38;5;12mcomes[39m[38;5;12m [39m[38;5;12mwith[39m[38;5;12m [39m[38;5;12measy-to-use[39m[38;5;12m [39m[38;5;12minteractive[39m[38;5;12m [39m[38;5;12mvisualizations[39m[38;5;12m [39m[38;5;12mtools[39m[38;5;12m [39m[38;5;12m([39m[38;5;14m[1monline[0m[38;5;14m[1m [0m[38;5;14m[1mdemos[0m[38;5;12m [39m[38;5;12m(http://tworld.io/ss3/)).[39m
|
||
[38;5;12m - [39m[38;5;14m[1mjPTDP[0m[38;5;12m (https://github.com/datquocnguyen/jPTDP) - A toolkit for joint part-of-speech (POS) tagging and dependency parsing. jPTDP provides pre-trained models for 40+ languages.[39m
|
||
[38;5;12m - [39m[38;5;14m[1mBigARTM[0m[38;5;12m (https://github.com/bigartm/bigartm) - a fast library for topic modelling[39m
|
||
[38;5;12m - [39m[38;5;14m[1mSnips NLU[0m[38;5;12m (https://github.com/snipsco/snips-nlu) - A production ready library for intent parsing[39m
|
||
[38;5;12m - [39m[38;5;14m[1mChazutsu[0m[38;5;12m (https://github.com/chakki-works/chazutsu) - A library for downloading&parsing standard NLP research datasets[39m
|
||
[38;5;12m - [39m[38;5;14m[1mWord Forms[0m[38;5;12m (https://github.com/gutfeeling/word_forms) - Word forms can accurately generate all possible forms of an English word[39m
|
||
[38;5;12m - [39m[38;5;14m[1mMultilingual Latent Dirichlet Allocation (LDA)[0m[38;5;12m (https://github.com/ArtificiAI/Multilingual-Latent-Dirichlet-Allocation-LDA) - A multilingual and extensible document clustering pipeline[39m
|
||
[38;5;12m - [39m[38;5;14m[1mNatural Language Toolkit (NLTK)[0m[38;5;12m (https://www.nltk.org/) - A library containing a wide variety of NLP functionality, supporting over 50 corpora.[39m
|
||
[38;5;12m - [39m[38;5;14m[1mNLP Architect[0m[38;5;12m (https://github.com/NervanaSystems/nlp-architect) - A library for exploring the state-of-the-art deep learning topologies and techniques for NLP and NLU[39m
|
||
[38;5;12m - [39m[38;5;14m[1mFlair[0m[38;5;12m (https://github.com/zalandoresearch/flair) - A very simple framework for state-of-the-art multilingual NLP built on PyTorch. Includes BERT, ELMo and Flair embeddings.[39m
|
||
[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;14m[1mKashgari[0m[38;5;12m [39m[38;5;12m(https://github.com/BrikerMan/Kashgari)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mSimple,[39m[38;5;12m [39m[38;5;12mKeras-powered[39m[38;5;12m [39m[38;5;12mmultilingual[39m[38;5;12m [39m[38;5;12mNLP[39m[38;5;12m [39m[38;5;12mframework,[39m[38;5;12m [39m[38;5;12mallows[39m[38;5;12m [39m[38;5;12myou[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12mbuild[39m[38;5;12m [39m[38;5;12myour[39m[38;5;12m [39m[38;5;12mmodels[39m[38;5;12m [39m[38;5;12min[39m[38;5;12m [39m[38;5;12m5[39m[38;5;12m [39m[38;5;12mminutes[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12mnamed[39m[38;5;12m [39m[38;5;12mentity[39m[38;5;12m [39m[38;5;12mrecognition[39m[38;5;12m [39m[38;5;12m(NER),[39m[38;5;12m [39m[38;5;12mpart-of-speech[39m[38;5;12m [39m[38;5;12mtagging[39m[38;5;12m [39m[38;5;12m(PoS)[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mtext[39m[38;5;12m [39m[38;5;12mclassification[39m[38;5;12m [39m[38;5;12mtasks.[39m[38;5;12m [39m
|
||
[38;5;12mIncludes[39m[38;5;12m [39m[38;5;12mBERT[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mword2vec[39m[38;5;12m [39m[38;5;12membedding.[39m
|
||
[38;5;12m - [39m[38;5;14m[1mFARM[0m[38;5;12m (https://github.com/deepset-ai/FARM) - Fast & easy transfer learning for NLP. Harvesting language models for the industry. Focus on Question Answering.[39m
|
||
[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;14m[1mHaystack[0m[38;5;12m [39m[38;5;12m(https://github.com/deepset-ai/haystack)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mEnd-to-end[39m[38;5;12m [39m[38;5;12mPython[39m[38;5;12m [39m[38;5;12mframework[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12mbuilding[39m[38;5;12m [39m[38;5;12mnatural[39m[38;5;12m [39m[38;5;12mlanguage[39m[38;5;12m [39m[38;5;12msearch[39m[38;5;12m [39m[38;5;12minterfaces[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12mdata.[39m[38;5;12m [39m[38;5;12mLeverages[39m[38;5;12m [39m[38;5;12mTransformers[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12mState-of-the-Art[39m[38;5;12m [39m[38;5;12mof[39m[38;5;12m [39m[38;5;12mNLP.[39m[38;5;12m [39m[38;5;12mSupports[39m[38;5;12m [39m[38;5;12mDPR,[39m[38;5;12m [39m[38;5;12mElasticsearch,[39m[38;5;12m [39m[38;5;12mHuggingFace’s[39m[38;5;12m [39m
|
||
[38;5;12mModelhub,[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mmuch[39m[38;5;12m [39m[38;5;12mmore![39m
|
||
[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;14m[1mRita[0m[38;5;14m[1m [0m[38;5;14m[1mDSL[0m[38;5;12m [39m[38;5;12m(https://github.com/zaibacu/rita-dsl)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12ma[39m[38;5;12m [39m[38;5;12mDSL,[39m[38;5;12m [39m[38;5;12mloosely[39m[38;5;12m [39m[38;5;12mbased[39m[38;5;12m [39m[38;5;12mon[39m[38;5;12m [39m[38;5;14m[1mRUTA[0m[38;5;14m[1m [0m[38;5;14m[1mon[0m[38;5;14m[1m [0m[38;5;14m[1mApache[0m[38;5;14m[1m [0m[38;5;14m[1mUIMA[0m[38;5;12m [39m[38;5;12m(https://uima.apache.org/ruta.html).[39m[38;5;12m [39m[38;5;12mAllows[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12mdefine[39m[38;5;12m [39m[38;5;12mlanguage[39m[38;5;12m [39m[38;5;12mpatterns[39m[38;5;12m [39m[38;5;12m(rule-based[39m[38;5;12m [39m[38;5;12mNLP)[39m[38;5;12m [39m[38;5;12mwhich[39m[38;5;12m [39m[38;5;12mare[39m[38;5;12m [39m[38;5;12mthen[39m[38;5;12m [39m[38;5;12mtranslated[39m[38;5;12m [39m[38;5;12minto[39m[38;5;12m [39m[38;5;14m[1mspaCy[0m[38;5;12m [39m[38;5;12m(https://spacy.io/),[39m[38;5;12m [39m[38;5;12mor[39m
|
||
[38;5;12mif[39m[38;5;12m [39m[38;5;12myou[39m[38;5;12m [39m[38;5;12mprefer[39m[38;5;12m [39m[38;5;12mless[39m[38;5;12m [39m[38;5;12mfeatures[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mlightweight[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mregex[39m[38;5;12m [39m[38;5;12mpatterns.[39m
|
||
[38;5;12m - [39m[38;5;14m[1mTransformers[0m[38;5;12m (https://github.com/huggingface/transformers) - Natural Language Processing for TensorFlow 2.0 and PyTorch.[39m
|
||
[38;5;12m - [39m[38;5;14m[1mTokenizers[0m[38;5;12m (https://github.com/huggingface/tokenizers) - Tokenizers optimized for Research and Production.[39m
|
||
[38;5;12m - [39m[38;5;14m[1mfairSeq[0m[38;5;12m (https://github.com/pytorch/fairseq) Facebook AI Research implementations of SOTA seq2seq models in Pytorch. [39m
|
||
[38;5;12m - [39m[38;5;14m[1mcorex_topic[0m[38;5;12m (https://github.com/gregversteeg/corex_topic) - Hierarchical Topic Modeling with Minimal Domain Knowledge[39m
|
||
[38;5;12m - [39m[38;5;14m[1mSockeye[0m[38;5;12m (https://github.com/awslabs/sockeye) - Neural Machine Translation (NMT) toolkit that powers Amazon Translate.[39m
|
||
[38;5;12m - [39m[38;5;14m[1mDL Translate[0m[38;5;12m (https://github.com/xhlulu/dl-translate) - A deep learning-based translation library for 50 languages, built on [39m[48;5;235m[38;5;249mtransformers[49m[39m[38;5;12m and Facebook's mBART Large.[39m
|
||
[38;5;12m - [39m[38;5;14m[1mJury[0m[38;5;12m (https://github.com/obss/jury) - Evaluation of NLP model outputs offering various automated metrics.[39m
|
||
[38;5;12m - [39m[38;5;14m[1mpython-ucto[0m[38;5;12m (https://github.com/proycon/python-ucto) - Unicode-aware regular-expression based tokenizer for various languages. Python binding to C++ library, supports [39m[38;5;14m[1mFoLiA format[0m[38;5;12m (https://proycon.github.io/folia).[39m
|
||
|
||
[38;5;12m- | [39m[38;5;14m[1mBack to Top[0m[38;5;12m (#contents)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mInsNet[0m[38;5;12m (https://github.com/chncwang/InsNet) - A neural network library for building instance-dependent NLP models with padding-free dynamic batching.[39m
|
||
[38;5;12m - [39m[38;5;14m[1mMIT Information Extraction Toolkit[0m[38;5;12m (https://github.com/mit-nlp/MITIE) - C, C++, and Python tools for named entity recognition and relation extraction[39m
|
||
[38;5;12m - [39m[38;5;14m[1mCRF++[0m[38;5;12m (https://taku910.github.io/crfpp/) - Open source implementation of Conditional Random Fields (CRFs) for segmenting/labeling sequential data & other Natural Language Processing tasks.[39m
|
||
[38;5;12m - [39m[38;5;14m[1mCRFsuite[0m[38;5;12m (http://www.chokkan.org/software/crfsuite/) - CRFsuite is an implementation of Conditional Random Fields (CRFs) for labeling sequential data.[39m
|
||
[38;5;12m - [39m[38;5;14m[1mBLLIP Parser[0m[38;5;12m (https://github.com/BLLIP/bllip-parser) - BLLIP Natural Language Parser (also known as the Charniak-Johnson parser)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mcolibri-core[0m[38;5;12m (https://github.com/proycon/colibri-core) - C++ library, command line tools, and Python binding for extracting and working with basic linguistic constructions such as n-grams and skipgrams in a quick and memory-efficient way.[39m
|
||
[38;5;12m - [39m[38;5;14m[1mucto[0m[38;5;12m (https://github.com/LanguageMachines/ucto) - Unicode-aware regular-expression based tokenizer for various languages. Tool and C++ library. Supports FoLiA format.[39m
|
||
[38;5;12m - [39m[38;5;14m[1mlibfolia[0m[38;5;12m (https://github.com/LanguageMachines/libfolia) - C++ library for the [39m[38;5;14m[1mFoLiA format[0m[38;5;12m (https://proycon.github.io/folia/)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mfrog[0m[38;5;12m (https://github.com/LanguageMachines/frog) - Memory-based NLP suite developed for Dutch: PoS tagger, lemmatiser, dependency parser, NER, shallow parser, morphological analyzer.[39m
|
||
[38;5;12m - [39m[38;5;14m[1mMeTA[0m[38;5;12m (https://github.com/meta-toolkit/meta) - [39m[38;5;14m[1mMeTA : ModErn Text Analysis[0m[38;5;12m (https://meta-toolkit.org/) is a C++ Data Sciences Toolkit that facilitates mining big text data.[39m
|
||
[38;5;12m - [39m[38;5;14m[1mMecab (Japanese)[0m[38;5;12m (https://taku910.github.io/mecab/)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mMoses[0m[38;5;12m (http://statmt.org/moses/)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mStarSpace[0m[38;5;12m (https://github.com/facebookresearch/StarSpace) - a library from Facebook for creating embeddings of word-level, paragraph-level, document-level and for text classification[39m
|
||
|
||
[38;5;12m- | [39m[38;5;14m[1mBack to Top[0m[38;5;12m (#contents)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mStanford NLP[0m[38;5;12m (https://nlp.stanford.edu/software/index.shtml)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mOpenNLP[0m[38;5;12m (https://opennlp.apache.org/)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mNLP4J[0m[38;5;12m (https://emorynlp.github.io/nlp4j/)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mWord2vec in Java[0m[38;5;12m (https://deeplearning4j.org/docs/latest/deeplearning4j-nlp-word2vec)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mReVerb[0m[38;5;12m (https://github.com/knowitall/reverb/) Web-Scale Open Information Extraction[39m
|
||
[38;5;12m - [39m[38;5;14m[1mOpenRegex[0m[38;5;12m (https://github.com/knowitall/openregex) An efficient and flexible token-based regular expression language and engine.[39m
|
||
[38;5;12m - [39m[38;5;14m[1mCogcompNLP[0m[38;5;12m (https://github.com/CogComp/cogcomp-nlp) - Core libraries developed in the U of Illinois' Cognitive Computation Group.[39m
|
||
[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;14m[1mMALLET[0m[38;5;12m [39m[38;5;12m(http://mallet.cs.umass.edu/)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mMAchine[39m[38;5;12m [39m[38;5;12mLearning[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12mLanguagE[39m[38;5;12m [39m[38;5;12mToolkit[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mpackage[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12mstatistical[39m[38;5;12m [39m[38;5;12mnatural[39m[38;5;12m [39m[38;5;12mlanguage[39m[38;5;12m [39m[38;5;12mprocessing,[39m[38;5;12m [39m[38;5;12mdocument[39m[38;5;12m [39m[38;5;12mclassification,[39m[38;5;12m [39m[38;5;12mclustering,[39m[38;5;12m [39m[38;5;12mtopic[39m[38;5;12m [39m[38;5;12mmodeling,[39m[38;5;12m [39m[38;5;12minformation[39m[38;5;12m [39m[38;5;12mextraction,[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mother[39m[38;5;12m [39m[38;5;12mmachine[39m[38;5;12m [39m[38;5;12mlearning[39m[38;5;12m [39m
|
||
[38;5;12mapplications[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12mtext.[39m
|
||
[38;5;12m - [39m[38;5;14m[1mRDRPOSTagger[0m[38;5;12m (https://github.com/datquocnguyen/RDRPOSTagger) - A robust POS tagging toolkit available (in both Java & Python) together with pre-trained models for 40+ languages.[39m
|
||
|
||
[38;5;12m- | [39m[38;5;14m[1mBack to Top[0m[38;5;12m (#contents)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mLingua[0m[38;5;12m (https://github.com/pemistahl/lingua/) A language detection library for Kotlin and Java, suitable for long and short text alike[39m
|
||
[38;5;12m - [39m[38;5;14m[1mKotidgy[0m[38;5;12m (https://github.com/meiblorn/kotidgy) — an index-based text data generator written in Kotlin[39m
|
||
|
||
[38;5;12m- | [39m[38;5;14m[1mBack to Top[0m[38;5;12m (#contents)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mSaul[0m[38;5;12m (https://github.com/CogComp/saul) - Library for developing NLP systems, including built in modules like SRL, POS, etc.[39m
|
||
[38;5;12m - [39m[38;5;14m[1mATR4S[0m[38;5;12m (https://github.com/ispras/atr4s) - Toolkit with state-of-the-art [39m[38;5;14m[1mautomatic term recognition[0m[38;5;12m (https://en.wikipedia.org/wiki/Terminology_extraction) methods.[39m
|
||
[38;5;12m - [39m[38;5;14m[1mtm[0m[38;5;12m (https://github.com/ispras/tm) - Implementation of topic modeling based on regularized multilingual [39m[38;5;14m[1mPLSA[0m[38;5;12m (https://en.wikipedia.org/wiki/Probabilistic_latent_semantic_analysis).[39m
|
||
[38;5;12m - [39m[38;5;14m[1mword2vec-scala[0m[38;5;12m (https://github.com/Refefer/word2vec-scala) - Scala interface to word2vec model; includes operations on vectors like word-distance and word-analogy.[39m
|
||
[38;5;12m - [39m[38;5;14m[1mEpic[0m[38;5;12m (https://github.com/dlwh/epic) - Epic is a high performance statistical parser written in Scala, along with a framework for building complex structured prediction models.[39m
|
||
[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;14m[1mSpark[0m[38;5;14m[1m [0m[38;5;14m[1mNLP[0m[38;5;12m [39m[38;5;12m(https://github.com/JohnSnowLabs/spark-nlp)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mSpark[39m[38;5;12m [39m[38;5;12mNLP[39m[38;5;12m [39m[38;5;12mis[39m[38;5;12m [39m[38;5;12ma[39m[38;5;12m [39m[38;5;12mnatural[39m[38;5;12m [39m[38;5;12mlanguage[39m[38;5;12m [39m[38;5;12mprocessing[39m[38;5;12m [39m[38;5;12mlibrary[39m[38;5;12m [39m[38;5;12mbuilt[39m[38;5;12m [39m[38;5;12mon[39m[38;5;12m [39m[38;5;12mtop[39m[38;5;12m [39m[38;5;12mof[39m[38;5;12m [39m[38;5;12mApache[39m[38;5;12m [39m[38;5;12mSpark[39m[38;5;12m [39m[38;5;12mML[39m[38;5;12m [39m[38;5;12mthat[39m[38;5;12m [39m[38;5;12mprovides[39m[38;5;12m [39m[38;5;12msimple,[39m[38;5;12m [39m[38;5;12mperformant[39m[38;5;12m [39m[38;5;12m&[39m[38;5;12m [39m[38;5;12maccurate[39m[38;5;12m [39m[38;5;12mNLP[39m[38;5;12m [39m[38;5;12mannotations[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12mmachine[39m[38;5;12m [39m[38;5;12mlearning[39m[38;5;12m [39m[38;5;12mpipelines[39m[38;5;12m [39m[38;5;12mthat[39m[38;5;12m [39m[38;5;12mscale[39m[38;5;12m [39m
|
||
[38;5;12measily[39m[38;5;12m [39m[38;5;12min[39m[38;5;12m [39m[38;5;12ma[39m[38;5;12m [39m[38;5;12mdistributed[39m[38;5;12m [39m[38;5;12menvironment.[39m
|
||
|
||
[38;5;12m- | [39m[38;5;14m[1mBack to Top[0m[38;5;12m (#contents)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mtext2vec[0m[38;5;12m (https://github.com/dselivanov/text2vec) - Fast vectorization, topic modeling, distances and GloVe word embeddings in R.[39m
|
||
[38;5;12m - [39m[38;5;14m[1mwordVectors[0m[38;5;12m (https://github.com/bmschmidt/wordVectors) - An R package for creating and exploring word2vec and other word embedding models[39m
|
||
[38;5;12m - [39m[38;5;14m[1mRMallet[0m[38;5;12m (https://github.com/mimno/RMallet) - R package to interface with the Java machine learning tool MALLET[39m
|
||
[38;5;12m - [39m[38;5;14m[1mdfr-browser[0m[38;5;12m (https://github.com/agoldst/dfr-browser) - Creates d3 visualizations for browsing topic models of text in a web browser.[39m
|
||
[38;5;12m - [39m[38;5;14m[1mdfrtopics[0m[38;5;12m (https://github.com/agoldst/dfrtopics) - R package for exploring topic models of text.[39m
|
||
[38;5;12m - [39m[38;5;14m[1msentiment_classifier[0m[38;5;12m (https://github.com/kevincobain2000/sentiment_classifier) - Sentiment Classification using Word Sense Disambiguation and WordNet Reader[39m
|
||
[38;5;12m - [39m[38;5;14m[1mjProcessing[0m[38;5;12m (https://github.com/kevincobain2000/jProcessing) - Japanese Natural Langauge Processing Libraries, with Japanese sentiment classification[39m
|
||
[38;5;12m - [39m[38;5;14m[1mcorporaexplorer[0m[38;5;12m (https://kgjerde.github.io/corporaexplorer/) - An R package for dynamic exploration of text collections[39m
|
||
[38;5;12m - [39m[38;5;14m[1mtidytext[0m[38;5;12m (https://github.com/juliasilge/tidytext) - Text mining using tidy tools[39m
|
||
[38;5;12m - [39m[38;5;14m[1mspacyr[0m[38;5;12m (https://github.com/quanteda/spacyr) - R wrapper to spaCy NLP[39m
|
||
[38;5;12m - [39m[38;5;14m[1mCRAN Task View: Natural Language Processing[0m[38;5;12m (https://github.com/cran-task-views/NaturalLanguageProcessing/)[39m
|
||
|
||
[38;5;12m- | [39m[38;5;14m[1mBack to Top[0m[38;5;12m (#contents)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mClojure-openNLP[0m[38;5;12m (https://github.com/dakrone/clojure-opennlp) - Natural Language Processing in Clojure (opennlp)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mInfections-clj[0m[38;5;12m (https://github.com/r0man/inflections-clj) - Rails-like inflection library for Clojure and ClojureScript[39m
|
||
[38;5;12m - [39m[38;5;14m[1mpostagga[0m[38;5;12m (https://github.com/fekr/postagga) - A library to parse natural language in Clojure and ClojureScript[39m
|
||
|
||
[38;5;12m- | [39m[38;5;14m[1mBack to Top[0m[38;5;12m (#contents)[39m
|
||
[38;5;12m - Kevin Dias's [39m[38;5;14m[1mA collection of Natural Language Processing (NLP) Ruby libraries, tools and software[0m[38;5;12m (https://github.com/diasks2/ruby-nlp)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mPractical Natural Language Processing done in Ruby[0m[38;5;12m (https://github.com/arbox/nlp-with-ruby)[39m
|
||
|
||
[38;5;12m- | [39m[38;5;14m[1mBack to Top[0m[38;5;12m (#contents)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mwhatlang[0m[38;5;12m (https://github.com/greyblake/whatlang-rs) — Natural language recognition library based on trigrams[39m
|
||
[38;5;12m - [39m[38;5;14m[1msnips-nlu-rs[0m[38;5;12m (https://github.com/snipsco/snips-nlu-rs) - A production ready library for intent parsing[39m
|
||
[38;5;12m - [39m[38;5;14m[1mrust-bert[0m[38;5;12m (https://github.com/guillaume-be/rust-bert) - Ready-to-use NLP pipelines and Transformer-based models[39m
|
||
|
||
[38;5;12m- | [39m[38;5;14m[1mBack to Top[0m[38;5;12m (#contents)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mVSCode Language Extension[0m[38;5;12m (https://marketplace.visualstudio.com/items?itemName=dehilster.nlp) - NLP++ Language Extension for VSCode[39m
|
||
[38;5;12m - [39m[38;5;14m[1mnlp-engine[0m[38;5;12m (https://github.com/VisualText/nlp-engine) - NLP++ engine to run NLP++ code on Linux including a full English parser[39m
|
||
[38;5;12m - [39m[38;5;14m[1mVisualText[0m[38;5;12m (http://visualtext.org) - Homepage for the NLP++ Language[39m
|
||
[38;5;12m - [39m[38;5;14m[1mNLP++ Wiki[0m[38;5;12m (http://wiki.naturalphilosophy.org/index.php?title=NLP%2B%2B) - Wiki entry for the NLP++ language[39m
|
||
|
||
[38;5;12m- | [39m[38;5;14m[1mBack to Top[0m[38;5;12m (#contents)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mCorpusLoaders[0m[38;5;12m (https://github.com/JuliaText/CorpusLoaders.jl) - A variety of loaders for various NLP corpora[39m
|
||
[38;5;12m - [39m[38;5;14m[1mLanguages[0m[38;5;12m (https://github.com/JuliaText/Languages.jl) - A package for working with human languages[39m
|
||
[38;5;12m - [39m[38;5;14m[1mTextAnalysis[0m[38;5;12m (https://github.com/JuliaText/TextAnalysis.jl) - Julia package for text analysis[39m
|
||
[38;5;12m - [39m[38;5;14m[1mTextModels[0m[38;5;12m (https://github.com/JuliaText/TextModels.jl) - Neural Network based models for Natural Language Processing[39m
|
||
[38;5;12m - [39m[38;5;14m[1mWordTokenizers[0m[38;5;12m (https://github.com/JuliaText/WordTokenizers.jl) - High performance tokenizers for natural language processing and other related tasks[39m
|
||
[38;5;12m - [39m[38;5;14m[1mWord2Vec[0m[38;5;12m (https://github.com/JuliaText/Word2Vec.jl) - Julia interface to word2vec[39m
|
||
|
||
[38;2;255;187;0m[4mServices[0m
|
||
|
||
[38;5;12mNLP as API with higher level functionality such as NER, Topic tagging and so on | [39m[38;5;14m[1mBack to Top[0m[38;5;12m (#contents)[39m
|
||
|
||
[38;5;12m- [39m[38;5;14m[1mWit-ai[0m[38;5;12m (https://github.com/wit-ai/wit) - Natural Language Interface for apps and devices[39m
|
||
[38;5;12m- [39m[38;5;14m[1mIBM Watson's Natural Language Understanding[0m[38;5;12m (https://github.com/watson-developer-cloud/natural-language-understanding-nodejs) - API and Github demo[39m
|
||
[38;5;12m- [39m[38;5;14m[1mAmazon Comprehend[0m[38;5;12m (https://aws.amazon.com/comprehend/) - NLP and ML suite covers most common tasks like NER, tagging, and sentiment analysis[39m
|
||
[38;5;12m- [39m[38;5;14m[1mGoogle Cloud Natural Language API[0m[38;5;12m (https://cloud.google.com/natural-language/) - Syntax Analysis, NER, Sentiment Analysis, and Content tagging in atleast 9 languages include English and Chinese (Simplified and Traditional).[39m
|
||
[38;5;12m- [39m[38;5;14m[1mParallelDots[0m[38;5;12m (https://www.paralleldots.com/text-analysis-apis) - High level Text Analysis API Service ranging from Sentiment Analysis to Intent Analysis[39m
|
||
[38;5;12m- [39m[38;5;14m[1mMicrosoft Cognitive Service[0m[38;5;12m (https://azure.microsoft.com/en-us/services/cognitive-services/text-analytics/)[39m
|
||
[38;5;12m- [39m[38;5;14m[1mTextRazor[0m[38;5;12m (https://www.textrazor.com/)[39m
|
||
[38;5;12m- [39m[38;5;14m[1mRosette[0m[38;5;12m (https://www.rosette.com/)[39m
|
||
[38;5;12m- [39m[38;5;14m[1mTextalytic[0m[38;5;12m (https://www.textalytic.com) - Natural Language Processing in the Browser with sentiment analysis, named entity extraction, POS tagging, word frequencies, topic modeling, word clouds, and more[39m
|
||
[38;5;12m- [39m[38;5;14m[1mNLP Cloud[0m[38;5;12m (https://nlpcloud.io) - SpaCy NLP models (custom and pre-trained ones) served through a RESTful API for named entity recognition (NER), POS tagging, and more.[39m
|
||
[38;5;12m- [39m[38;5;14m[1mCloudmersive[0m[38;5;12m (https://cloudmersive.com/nlp-api) - Unified and free NLP APIs that perform actions such as speech tagging, text rephrasing, language translation/detection, and sentence parsing[39m
|
||
|
||
[38;2;255;187;0m[4mAnnotation Tools[0m
|
||
|
||
[38;5;12m- [39m[38;5;14m[1mGATE[0m[38;5;12m (https://gate.ac.uk/overview.html) - General Architecture and Text Engineering is 15+ years old, free and open source[39m
|
||
[38;5;12m- [39m[38;5;14m[1mAnafora[0m[38;5;12m (https://github.com/weitechen/anafora) is free and open source, web-based raw text annotation tool[39m
|
||
[38;5;12m- [39m[38;5;14m[1mbrat[0m[38;5;12m (https://brat.nlplab.org/) - brat rapid annotation tool is an online environment for collaborative text annotation[39m
|
||
[38;5;12m- [39m[38;5;14m[1mdoccano[0m[38;5;12m (https://github.com/chakki-works/doccano) - doccano is free, open-source, and provides annotation features for text classification, sequence labeling and sequence to sequence[39m
|
||
[38;5;12m- [39m[38;5;14m[1mINCEpTION[0m[38;5;12m (https://inception-project.github.io) - A semantic annotation platform offering intelligent assistance and knowledge management[39m
|
||
[38;5;12m- [39m[38;5;14m[1mtagtog[0m[38;5;12m (https://www.tagtog.net/), team-first web tool to find, create, maintain, and share datasets - costs $[39m
|
||
[38;5;12m- [39m[38;5;14m[1mprodigy[0m[38;5;12m (https://prodi.gy/) is an annotation tool powered by active learning, costs $[39m
|
||
[38;5;12m- [39m[38;5;14m[1mLightTag[0m[38;5;12m (https://lighttag.io) - Hosted and managed text annotation tool for teams, costs $[39m
|
||
[38;5;12m- [39m[38;5;14m[1mrstWeb[0m[38;5;12m (https://corpling.uis.georgetown.edu/rstweb/info/) - open source local or online tool for discourse tree annotations[39m
|
||
[38;5;12m- [39m[38;5;14m[1mGitDox[0m[38;5;12m (https://corpling.uis.georgetown.edu/gitdox/) - open source server annotation tool with GitHub version control and validation for XML data and collaborative spreadsheet grids[39m
|
||
[38;5;12m- [39m[38;5;14m[1mLabel Studio[0m[38;5;12m (https://www.heartex.ai/) - Hosted and managed text annotation tool for teams, freemium based, costs $[39m
|
||
[38;5;12m- [39m[38;5;14m[1mDatasaur[0m[38;5;12m (https://datasaur.ai/) support various NLP tasks for individual or teams, freemium based[39m
|
||
[38;5;12m- [39m[38;5;14m[1mKonfuzio[0m[38;5;12m (https://konfuzio.com/en/) - team-first hosted and on-prem text, image and PDF annotation tool powered by active learning, freemium based, costs $[39m
|
||
[38;5;12m- [39m[38;5;14m[1mUBIAI[0m[38;5;12m (https://ubiai.tools/) - Easy-to-use text annotation tool for teams with most comprehensive auto-annotation features. Supports NER, relations and document classification as well as OCR annotation for invoice labeling, costs $[39m
|
||
[38;5;12m-[39m[38;5;12m [39m[38;5;14m[1mShoonya[0m[38;5;12m [39m[38;5;12m(https://github.com/AI4Bharat/Shoonya-Backend)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mShoonya[39m[38;5;12m [39m[38;5;12mis[39m[38;5;12m [39m[38;5;12mfree[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mopen[39m[38;5;12m [39m[38;5;12msource[39m[38;5;12m [39m[38;5;12mdata[39m[38;5;12m [39m[38;5;12mannotation[39m[38;5;12m [39m[38;5;12mplatform[39m[38;5;12m [39m[38;5;12mwith[39m[38;5;12m [39m[38;5;12mwide[39m[38;5;12m [39m[38;5;12mvarities[39m[38;5;12m [39m[38;5;12mof[39m[38;5;12m [39m[38;5;12morganization[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mworkspace[39m[38;5;12m [39m[38;5;12mlevel[39m[38;5;12m [39m[38;5;12mmanagement[39m[38;5;12m [39m[38;5;12msystem.[39m[38;5;12m [39m[38;5;12mShoonya[39m[38;5;12m [39m[38;5;12mis[39m[38;5;12m [39m[38;5;12mdata[39m[38;5;12m [39m[38;5;12magnostic,[39m[38;5;12m [39m[38;5;12mcan[39m[38;5;12m [39m[38;5;12mbe[39m[38;5;12m [39m[38;5;12mused[39m[38;5;12m [39m[38;5;12mby[39m[38;5;12m [39m[38;5;12mteams[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m
|
||
[38;5;12mannotate[39m[38;5;12m [39m[38;5;12mdata[39m[38;5;12m [39m[38;5;12mwith[39m[38;5;12m [39m[38;5;12mvarious[39m[38;5;12m [39m[38;5;12mlevel[39m[38;5;12m [39m[38;5;12mof[39m[38;5;12m [39m[38;5;12mverification[39m[38;5;12m [39m[38;5;12mstages[39m[38;5;12m [39m[38;5;12mat[39m[38;5;12m [39m[38;5;12mscale.[39m
|
||
[38;5;12m-[39m[38;5;12m [39m[38;5;14m[1mAnnotation[0m[38;5;14m[1m [0m[38;5;14m[1mLab[0m[38;5;12m [39m[38;5;12m(https://www.johnsnowlabs.com/annotation-lab/)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mFree[39m[38;5;12m [39m[38;5;12mEnd-to-End[39m[38;5;12m [39m[38;5;12mNo-Code[39m[38;5;12m [39m[38;5;12mplatform[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12mtext[39m[38;5;12m [39m[38;5;12mannotation[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mDL[39m[38;5;12m [39m[38;5;12mmodel[39m[38;5;12m [39m[38;5;12mtraining/tuning.[39m[38;5;12m [39m[38;5;12mOut-of-the-box[39m[38;5;12m [39m[38;5;12msupport[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12mNamed[39m[38;5;12m [39m[38;5;12mEntity[39m[38;5;12m [39m[38;5;12mRecognition,[39m[38;5;12m [39m[38;5;12mClassification,[39m[38;5;12m [39m[38;5;12mRelation[39m[38;5;12m [39m[38;5;12mextraction[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m
|
||
[38;5;12mAssertion[39m[38;5;12m [39m[38;5;12mStatus[39m[38;5;12m [39m[38;5;12mSpark[39m[38;5;12m [39m[38;5;12mNLP[39m[38;5;12m [39m[38;5;12mmodels.[39m[38;5;12m [39m[38;5;12mUnlimited[39m[38;5;12m [39m[38;5;12msupport[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12musers,[39m[38;5;12m [39m[38;5;12mteams,[39m[38;5;12m [39m[38;5;12mprojects,[39m[38;5;12m [39m[38;5;12mdocuments.[39m[38;5;12m [39m[38;5;12mNot[39m[38;5;12m [39m[38;5;12mFOSS.[39m[38;5;12m [39m
|
||
[38;5;12m- [39m[38;5;14m[1mFLAT[0m[38;5;12m (https://github.com/proycon/flat) - FLAT is a web-based linguistic annotation environment based around the [39m[38;5;14m[1mFoLiA format[0m[38;5;12m (http://proycon.github.io/folia), a rich XML-based format for linguistic annotation. Free and open source.[39m
|
||
|
||
|
||
[38;2;255;187;0m[4mTechniques[0m
|
||
|
||
[38;2;255;187;0m[4mText Embeddings[0m
|
||
|
||
[38;2;255;187;0m[4mWord Embeddings[0m
|
||
|
||
[38;5;12m- Thumb Rule: [39m[38;5;14m[1mfastText >> GloVe > word2vec[0m
|
||
|
||
[38;5;12m-[39m[38;5;12m [39m[38;5;14m[1mword2vec[0m[38;5;12m [39m[38;5;12m(https://papers.nips.cc/paper/5021-distributed-representations-of-words-and-phrases-and-their-compositionality.pdf)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;14m[1mimplementation[0m[38;5;12m [39m[38;5;12m(https://code.google.com/archive/p/word2vec/)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;14m[1mexplainer[0m[38;5;14m[1m [0m[38;5;14m[1mblog[0m[38;5;12m [39m
|
||
[38;5;12m(http://colah.github.io/posts/2014-07-NLP-RNNs-Representations/)[39m
|
||
[38;5;12m- [39m[38;5;14m[1mglove[0m[38;5;12m (https://nlp.stanford.edu/pubs/glove.pdf) - [39m[38;5;14m[1mexplainer blog[0m[38;5;12m (https://blog.acolyer.org/2016/04/22/glove-global-vectors-for-word-representation/)[39m
|
||
[38;5;12m- fasttext - [39m[38;5;14m[1mimplementation[0m[38;5;12m (https://github.com/facebookresearch/fastText) - [39m[38;5;14m[1mpaper[0m[38;5;12m (https://arxiv.org/abs/1607.04606) - [39m[38;5;14m[1mexplainer blog[0m[38;5;12m (https://towardsdatascience.com/fasttext-under-the-hood-11efc57b2b3)[39m
|
||
|
||
[38;2;255;187;0m[4mSentence and Language Model Based Word Embeddings[0m
|
||
|
||
[38;5;14m[1mBack to Top[0m[38;5;12m (#contents)[39m
|
||
|
||
[38;5;12m- ElMo - [39m[38;5;14m[1mDeep Contextualized Word Representations[0m[38;5;12m (https://arxiv.org/abs/1802.05365) - [39m[38;5;14m[1mPyTorch implmentation[0m[38;5;12m (https://github.com/allenai/allennlp/blob/master/tutorials/how_to/elmo.md) - [39m[38;5;14m[1mTF Implementation[0m[38;5;12m (https://github.com/allenai/bilm-tf)[39m
|
||
[38;5;12m- ULMFiT - [39m[38;5;14m[1mUniversal Language Model Fine-tuning for Text Classification[0m[38;5;12m (https://arxiv.org/abs/1801.06146) by Jeremy Howard and Sebastian Ruder[39m
|
||
[38;5;12m- InferSent - [39m[38;5;14m[1mSupervised Learning of Universal Sentence Representations from Natural Language Inference Data[0m[38;5;12m (https://arxiv.org/abs/1705.02364) by facebook[39m
|
||
[38;5;12m- CoVe - [39m[38;5;14m[1mLearned in Translation: Contextualized Word Vectors[0m[38;5;12m (https://arxiv.org/abs/1708.00107)[39m
|
||
[38;5;12m- Pargraph vectors - from [39m[38;5;14m[1mDistributed Representations of Sentences and Documents[0m[38;5;12m (https://cs.stanford.edu/~quocle/paragraph_vector.pdf). See [39m[38;5;14m[1mdoc2vec tutorial at gensim[0m[38;5;12m (https://rare-technologies.com/doc2vec-tutorial/)[39m
|
||
[38;5;12m- [39m[38;5;14m[1msense2vec[0m[38;5;12m (https://arxiv.org/abs/1511.06388) - on word sense disambiguation[39m
|
||
[38;5;12m- [39m[38;5;14m[1mSkip Thought Vectors[0m[38;5;12m (https://arxiv.org/abs/1506.06726) - word representation method[39m
|
||
[38;5;12m- [39m[38;5;14m[1mAdaptive skip-gram[0m[38;5;12m (https://arxiv.org/abs/1502.07257) - similar approach, with adaptive properties[39m
|
||
[38;5;12m- [39m[38;5;14m[1mSequence to Sequence Learning[0m[38;5;12m (https://papers.nips.cc/paper/5346-sequence-to-sequence-learning-with-neural-networks.pdf) - word vectors for machine translation[39m
|
||
|
||
[38;2;255;187;0m[4mQuestion Answering and Knowledge Extraction[0m
|
||
|
||
[38;5;14m[1mBack to Top[0m[38;5;12m (#contents)[39m
|
||
|
||
[38;5;12m- [39m[38;5;14m[1mDrQA[0m[38;5;12m (https://github.com/facebookresearch/DrQA) - Open Domain Question Answering work by Facebook Research on Wikipedia data[39m
|
||
[38;5;12m- [39m[38;5;14m[1mDocument-QA[0m[38;5;12m (https://github.com/allenai/document-qa) - Simple and Effective Multi-Paragraph Reading Comprehension by AllenAI[39m
|
||
[38;5;12m- [39m[38;5;14m[1mTemplate-Based Information Extraction without the Templates[0m[38;5;12m (https://www.usna.edu/Users/cs/nchamber/pubs/acl2011-chambers-templates.pdf)[39m
|
||
[38;5;12m- [39m[38;5;14m[1mPrivee: An Architecture for Automatically Analyzing Web Privacy Policies[0m[38;5;12m (https://www.sebastianzimmeck.de/zimmeckAndBellovin2014Privee.pdf)[39m
|
||
|
||
[38;2;255;187;0m[4mDatasets[0m
|
||
|
||
[38;5;14m[1mBack to Top[0m[38;5;12m (#contents)[39m
|
||
|
||
[38;5;12m- [39m[38;5;14m[1mnlp-datasets[0m[38;5;12m (https://github.com/niderhoff/nlp-datasets) great collection of nlp datasets[39m
|
||
[38;5;12m- [39m[38;5;14m[1mgensim-data[0m[38;5;12m (https://github.com/RaRe-Technologies/gensim-data) - Data repository for pretrained NLP models and NLP corpora.[39m
|
||
|
||
[38;2;255;187;0m[4mMultilingual NLP Frameworks[0m
|
||
|
||
[38;5;14m[1mBack to Top[0m[38;5;12m (#contents)[39m
|
||
|
||
[38;5;12m-[39m[38;5;12m [39m[38;5;14m[1mUDPipe[0m[38;5;12m [39m[38;5;12m(https://github.com/ufal/udpipe)[39m[38;5;12m [39m[38;5;12mis[39m[38;5;12m [39m[38;5;12ma[39m[38;5;12m [39m[38;5;12mtrainable[39m[38;5;12m [39m[38;5;12mpipeline[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12mtokenizing,[39m[38;5;12m [39m[38;5;12mtagging,[39m[38;5;12m [39m[38;5;12mlemmatizing[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mparsing[39m[38;5;12m [39m[38;5;12mUniversal[39m[38;5;12m [39m[38;5;12mTreebanks[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mother[39m[38;5;12m [39m[38;5;12mCoNLL-U[39m[38;5;12m [39m[38;5;12mfiles.[39m[38;5;12m [39m[38;5;12mPrimarily[39m[38;5;12m [39m[38;5;12mwritten[39m[38;5;12m [39m[38;5;12min[39m[38;5;12m [39m[38;5;12mC++,[39m[38;5;12m [39m[38;5;12moffers[39m[38;5;12m [39m[38;5;12ma[39m[38;5;12m [39m[38;5;12mfast[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mreliable[39m[38;5;12m [39m[38;5;12msolution[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12mmultilingual[39m[38;5;12m [39m[38;5;12mNLP[39m[38;5;12m [39m
|
||
[38;5;12mprocessing.[39m
|
||
[38;5;12m-[39m[38;5;12m [39m[38;5;14m[1mNLP-Cube[0m[38;5;12m [39m[38;5;12m(https://github.com/adobe/NLP-Cube)[39m[38;5;12m [39m[38;5;12m:[39m[38;5;12m [39m[38;5;12mNatural[39m[38;5;12m [39m[38;5;12mLanguage[39m[38;5;12m [39m[38;5;12mProcessing[39m[38;5;12m [39m[38;5;12mPipeline[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mSentence[39m[38;5;12m [39m[38;5;12mSplitting,[39m[38;5;12m [39m[38;5;12mTokenization,[39m[38;5;12m [39m[38;5;12mLemmatization,[39m[38;5;12m [39m[38;5;12mPart-of-speech[39m[38;5;12m [39m[38;5;12mTagging[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mDependency[39m[38;5;12m [39m[38;5;12mParsing.[39m[38;5;12m [39m[38;5;12mNew[39m[38;5;12m [39m[38;5;12mplatform,[39m[38;5;12m [39m[38;5;12mwritten[39m[38;5;12m [39m[38;5;12min[39m[38;5;12m [39m[38;5;12mPython[39m[38;5;12m [39m[38;5;12mwith[39m[38;5;12m [39m[38;5;12mDynet[39m[38;5;12m [39m[38;5;12m2.0.[39m[38;5;12m [39m[38;5;12mOffers[39m[38;5;12m [39m
|
||
[38;5;12mstandalone[39m[38;5;12m [39m[38;5;12m(CLI/Python[39m[38;5;12m [39m[38;5;12mbindings)[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mserver[39m[38;5;12m [39m[38;5;12mfunctionality[39m[38;5;12m [39m[38;5;12m(REST[39m[38;5;12m [39m[38;5;12mAPI).[39m
|
||
[38;5;12m-[39m[38;5;12m [39m[38;5;14m[1mUralicNLP[0m[38;5;12m [39m[38;5;12m(https://github.com/mikahama/uralicNLP)[39m[38;5;12m [39m[38;5;12mis[39m[38;5;12m [39m[38;5;12man[39m[38;5;12m [39m[38;5;12mNLP[39m[38;5;12m [39m[38;5;12mlibrary[39m[38;5;12m [39m[38;5;12mmostly[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12mmany[39m[38;5;12m [39m[38;5;12mendangered[39m[38;5;12m [39m[38;5;12mUralic[39m[38;5;12m [39m[38;5;12mlanguages[39m[38;5;12m [39m[38;5;12msuch[39m[38;5;12m [39m[38;5;12mas[39m[38;5;12m [39m[38;5;12mSami[39m[38;5;12m [39m[38;5;12mlanguages,[39m[38;5;12m [39m[38;5;12mMordvin[39m[38;5;12m [39m[38;5;12mlanguages,[39m[38;5;12m [39m[38;5;12mMari[39m[38;5;12m [39m[38;5;12mlanguages,[39m[38;5;12m [39m[38;5;12mKomi[39m[38;5;12m [39m[38;5;12mlanguages[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mso[39m[38;5;12m [39m[38;5;12mon.[39m[38;5;12m [39m[38;5;12mAlso[39m[38;5;12m [39m[38;5;12msome[39m[38;5;12m [39m[38;5;12mnon-endangered[39m[38;5;12m [39m[38;5;12mlanguages[39m[38;5;12m [39m[38;5;12mare[39m[38;5;12m [39m
|
||
[38;5;12msupported[39m[38;5;12m [39m[38;5;12msuch[39m[38;5;12m [39m[38;5;12mas[39m[38;5;12m [39m[38;5;12mFinnish[39m[38;5;12m [39m[38;5;12mtogether[39m[38;5;12m [39m[38;5;12mwith[39m[38;5;12m [39m[38;5;12mnon-Uralic[39m[38;5;12m [39m[38;5;12mlanguages[39m[38;5;12m [39m[38;5;12msuch[39m[38;5;12m [39m[38;5;12mas[39m[38;5;12m [39m[38;5;12mSwedish[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mArabic.[39m[38;5;12m [39m[38;5;12mUralicNLP[39m[38;5;12m [39m[38;5;12mcan[39m[38;5;12m [39m[38;5;12mdo[39m[38;5;12m [39m[38;5;12mmorphological[39m[38;5;12m [39m[38;5;12manalysis,[39m[38;5;12m [39m[38;5;12mgeneration,[39m[38;5;12m [39m[38;5;12mlemmatization[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mdisambiguation.[39m
|
||
|
||
[38;2;255;187;0m[4mNLP in Korean[0m
|
||
|
||
[38;5;14m[1mBack to Top[0m[38;5;12m (#contents)[39m
|
||
|
||
[38;2;255;187;0m[4mLibraries[0m
|
||
|
||
[38;5;12m- [39m[38;5;14m[1mKoNLPy[0m[38;5;12m (http://konlpy.org) - Python package for Korean natural language processing.[39m
|
||
[38;5;12m- [39m[38;5;14m[1mMecab (Korean)[0m[38;5;12m (https://eunjeon.blogspot.com/) - C++ library for Korean NLP[39m
|
||
[38;5;12m- [39m[38;5;14m[1mKoalaNLP[0m[38;5;12m (https://koalanlp.github.io/koalanlp/) - Scala library for Korean Natural Language Processing.[39m
|
||
[38;5;12m- [39m[38;5;14m[1mKoNLP[0m[38;5;12m (https://cran.r-project.org/package=KoNLP) - R package for Korean Natural language processing[39m
|
||
|
||
[38;2;255;187;0m[4mBlogs and Tutorials[0m
|
||
|
||
[38;5;12m- [39m[38;5;14m[1mdsindex's blog[0m[38;5;12m (https://dsindex.github.io/)[39m
|
||
[38;5;12m- [39m[38;5;14m[1mKangwon University's NLP course in Korean[0m[38;5;12m (http://cs.kangwon.ac.kr/~leeck/NLP/)[39m
|
||
|
||
[38;2;255;187;0m[4mDatasets[0m
|
||
|
||
[38;5;12m- [39m[38;5;14m[1mKAIST Corpus[0m[38;5;12m (http://semanticweb.kaist.ac.kr/home/index.php/KAIST_Corpus) - A corpus from the Korea Advanced Institute of Science and Technology in Korean.[39m
|
||
[38;5;12m- [39m[38;5;14m[1mNaver Sentiment Movie Corpus in Korean[0m[38;5;12m (https://github.com/e9t/nsmc/)[39m
|
||
[38;5;12m- [39m[38;5;14m[1mChosun Ilbo archive[0m[38;5;12m (http://srchdb1.chosun.com/pdf/i_archive/) - dataset in Korean from one of the major newspapers in South Korea, the Chosun Ilbo.[39m
|
||
[38;5;12m- [39m[38;5;14m[1mChat data[0m[38;5;12m (https://github.com/songys/Chatbot_data) - Chatbot data in Korean[39m
|
||
[38;5;12m- [39m[38;5;14m[1mPetitions[0m[38;5;12m (https://github.com/akngs/petitions) - Collect expired petition data from the Blue House National Petition Site.[39m
|
||
[38;5;12m- [39m[38;5;14m[1mKorean Parallel corpora[0m[38;5;12m (https://github.com/j-min/korean-parallel-corpora) - Neural Machine Translation(NMT) Dataset for [39m[38;5;14m[1mKorean to French[0m[38;5;12m & [39m[38;5;14m[1mKorean to English[0m
|
||
[38;5;12m- [39m[38;5;14m[1mKorQuAD[0m[38;5;12m (https://korquad.github.io/) - Korean SQuAD dataset with Wiki HTML source. Mentions both v1.0 and v2.1 at the time of adding to Awesome NLP[39m
|
||
|
||
[38;2;255;187;0m[4mNLP in Arabic[0m
|
||
|
||
[38;5;14m[1mBack to Top[0m[38;5;12m (#contents)[39m
|
||
|
||
[38;2;255;187;0m[4mLibraries[0m
|
||
|
||
[38;5;12m- [39m[38;5;14m[1mgoarabic[0m[38;5;12m (https://github.com/01walid/goarabic) - Go package for Arabic text processing[39m
|
||
[38;5;12m- [39m[38;5;14m[1mjsastem[0m[38;5;12m (https://github.com/ejtaal/jsastem) - Javascript for Arabic stemming[39m
|
||
[38;5;12m- [39m[38;5;14m[1mPyArabic[0m[38;5;12m (https://pypi.org/project/PyArabic/) - Python libraries for Arabic[39m
|
||
[38;5;12m- [39m[38;5;14m[1mRFTokenizer[0m[38;5;12m (https://github.com/amir-zeldes/RFTokenizer) - trainable Python segmenter for Arabic, Hebrew and Coptic[39m
|
||
|
||
[38;2;255;187;0m[4mDatasets[0m
|
||
|
||
[38;5;12m- [39m[38;5;14m[1mMultidomain Datasets[0m[38;5;12m (https://github.com/hadyelsahar/large-arabic-sentiment-analysis-resouces) - Largest Available Multi-Domain Resources for Arabic Sentiment Analysis[39m
|
||
[38;5;12m- [39m[38;5;14m[1mLABR[0m[38;5;12m (https://github.com/mohamedadaly/labr) - LArge Arabic Book Reviews dataset[39m
|
||
[38;5;12m- [39m[38;5;14m[1mArabic Stopwords[0m[38;5;12m (https://github.com/mohataher/arabic-stop-words) - A list of Arabic stopwords from various resources[39m
|
||
|
||
[38;2;255;187;0m[4mNLP in Chinese[0m
|
||
|
||
[38;5;14m[1mBack to Top[0m[38;5;12m (#contents)[39m
|
||
|
||
[38;2;255;187;0m[4mLibraries[0m
|
||
|
||
[38;5;12m- [39m[38;5;14m[1mjieba[0m[38;5;12m (https://github.com/fxsjy/jieba#jieba-1) - Python package for Words Segmentation Utilities in Chinese[39m
|
||
[38;5;12m- [39m[38;5;14m[1mSnowNLP[0m[38;5;12m (https://github.com/isnowfy/snownlp) - Python package for Chinese NLP[39m
|
||
[38;5;12m- [39m[38;5;14m[1mFudanNLP[0m[38;5;12m (https://github.com/FudanNLP/fnlp) - Java library for Chinese text processing[39m
|
||
[38;5;12m- [39m[38;5;14m[1mHanLP[0m[38;5;12m (https://github.com/hankcs/HanLP) - The multilingual NLP library[39m
|
||
|
||
[38;2;255;187;0m[4mAnthology[0m
|
||
[38;5;12m- [39m[38;5;14m[1mfunNLP[0m[38;5;12m (https://github.com/fighting41love/funNLP) - Collection of NLP tools and resources mainly for Chinese[39m
|
||
|
||
[38;2;255;187;0m[4mNLP in German[0m
|
||
|
||
[38;5;12m- [39m[38;5;14m[1mGerman-NLP[0m[38;5;12m (https://github.com/adbar/German-NLP) - Curated list of open-access/open-source/off-the-shelf resources and tools developed with a particular focus on German[39m
|
||
|
||
[38;2;255;187;0m[4mNLP in Polish[0m
|
||
|
||
[38;5;12m- [39m[38;5;14m[1mPolish-NLP[0m[38;5;12m (https://github.com/ksopyla/awesome-nlp-polish) - A curated list of resources dedicated to Natural Language Processing (NLP) in polish. Models, tools, datasets.[39m
|
||
|
||
[38;2;255;187;0m[4mNLP in Spanish[0m
|
||
|
||
[38;5;14m[1mBack to Top[0m[38;5;12m (#contents)[39m
|
||
|
||
[38;2;255;187;0m[4mLibraries[0m
|
||
|
||
[38;5;12m-[39m[38;5;12m [39m[38;5;14m[1mspanlp[0m[38;5;12m [39m[38;5;12m(https://github.com/jfreddypuentes/spanlp)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mPython[39m[38;5;12m [39m[38;5;12mlibrary[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12mdetect,[39m[38;5;12m [39m[38;5;12mcensor[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mclean[39m[38;5;12m [39m[38;5;12mprofanity,[39m[38;5;12m [39m[38;5;12mvulgarities,[39m[38;5;12m [39m[38;5;12mhateful[39m[38;5;12m [39m[38;5;12mwords,[39m[38;5;12m [39m[38;5;12mracism,[39m[38;5;12m [39m[38;5;12mxenophobia[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mbullying[39m[38;5;12m [39m[38;5;12min[39m[38;5;12m [39m[38;5;12mtexts[39m[38;5;12m [39m[38;5;12mwritten[39m[38;5;12m [39m[38;5;12min[39m[38;5;12m [39m[38;5;12mSpanish.[39m[38;5;12m [39m[38;5;12mIt[39m[38;5;12m [39m[38;5;12mcontains[39m[38;5;12m [39m[38;5;12mdata[39m[38;5;12m [39m[38;5;12mof[39m[38;5;12m [39m[38;5;12m21[39m[38;5;12m [39m[38;5;12mSpanish-speaking[39m[38;5;12m [39m
|
||
[38;5;12mcountries.[39m
|
||
|
||
[38;2;255;187;0m[4mData[0m
|
||
|
||
[38;5;12m- [39m[38;5;14m[1mColumbian Political Speeches[0m[38;5;12m (https://github.com/dav009/LatinamericanTextResources)[39m
|
||
[38;5;12m- [39m[38;5;14m[1mCopenhagen Treebank[0m[38;5;12m (https://mbkromann.github.io/copenhagen-dependency-treebank/)[39m
|
||
[38;5;12m- [39m[38;5;14m[1mSpanish Billion words corpus with Word2Vec embeddings[0m[38;5;12m (https://github.com/crscardellino/sbwce)[39m
|
||
[38;5;12m- [39m[38;5;14m[1mCompilation of Spanish Unannotated Corpora[0m[38;5;12m (https://github.com/josecannete/spanish-unannotated-corpora)[39m
|
||
|
||
[38;2;255;187;0m[4mWord and Sentence Embeddings[0m
|
||
[38;5;12m- [39m[38;5;14m[1mSpanish Word Embeddings Computed with Different Methods and from Different Corpora[0m[38;5;12m (https://github.com/dccuchile/spanish-word-embeddings)[39m
|
||
[38;5;12m- [39m[38;5;14m[1mSpanish Word Embeddings Computed from Large Corpora and Different Sizes Using fastText[0m[38;5;12m (https://github.com/BotCenter/spanishWordEmbeddings)[39m
|
||
[38;5;12m- [39m[38;5;14m[1mSpanish Sentence Embeddings Computed from Large Corpora Using sent2vec[0m[38;5;12m (https://github.com/BotCenter/spanishSent2Vec)[39m
|
||
[38;5;12m- [39m[38;5;14m[1mBeto - BERT for Spanish[0m[38;5;12m (https://github.com/dccuchile/beto)[39m
|
||
|
||
|
||
[38;2;255;187;0m[4mNLP in Indic languages[0m
|
||
|
||
[38;5;14m[1mBack to Top[0m[38;5;12m (#contents)[39m
|
||
|
||
[38;2;255;187;0m[4mData, Corpora and Treebanks[0m
|
||
|
||
[38;5;12m- [39m[38;5;14m[1mHindi Dependency Treebank[0m[38;5;12m (https://ltrc.iiit.ac.in/treebank_H2014/) - A multi-representational multi-layered treebank for Hindi and Urdu[39m
|
||
[38;5;12m- [39m[38;5;14m[1mUniversal Dependencies Treebank in Hindi[0m[38;5;12m (https://universaldependencies.org/treebanks/hi_hdtb/index.html)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mParallel Universal Dependencies Treebank in Hindi[0m[38;5;12m (http://universaldependencies.org/treebanks/hi_pud/index.html) - A smaller part of the above-mentioned treebank.[39m
|
||
[38;5;12m- [39m[38;5;14m[1mISI FIRE Stopwords List (Hindi and Bangla)[0m[38;5;12m (https://www.isical.ac.in/~fire/data/)[39m
|
||
[38;5;12m- [39m[38;5;14m[1mPeter Graham's Stopwords List[0m[38;5;12m (https://github.com/6/stopwords-json)[39m
|
||
[38;5;12m- [39m[38;5;14m[1mNLTK Corpus[0m[38;5;12m (https://www.nltk.org/book/ch02.html) 60k Words POS Tagged, Bangla, Hindi, Marathi, Telugu[39m
|
||
[38;5;12m- [39m[38;5;14m[1mHindi Movie Reviews Dataset[0m[38;5;12m (https://github.com/goru001/nlp-for-hindi) ~1k Samples, 3 polarity classes[39m
|
||
[38;5;12m- [39m[38;5;14m[1mBBC News Hindi Dataset[0m[38;5;12m (https://github.com/NirantK/hindi2vec/releases/tag/bbc-hindi-v0.1) 4.3k Samples, 14 classes[39m
|
||
[38;5;12m- [39m[38;5;14m[1mIIT Patna Hindi ABSA Dataset[0m[38;5;12m (https://github.com/pnisarg/ABSA) 5.4k Samples, 12 Domains, 4k aspect terms, aspect and sentence level polarity in 4 classes[39m
|
||
[38;5;12m- [39m[38;5;14m[1mBangla ABSA[0m[38;5;12m (https://github.com/AtikRahman/Bangla_Datasets_ABSA) 5.5k Samples, 2 Domains, 10 aspect terms[39m
|
||
[38;5;12m- [39m[38;5;14m[1mIIT Patna Movie Review Sentiment Dataset[0m[38;5;12m (https://www.iitp.ac.in/~ai-nlp-ml/resources.html) 2k Samples, 3 polarity labels[39m
|
||
|
||
[38;2;255;187;0m[4mCorpora/Datasets that need a login/access can be gained via email[0m
|
||
|
||
[38;5;12m- [39m[38;5;14m[1mSAIL 2015[0m[38;5;12m (http://amitavadas.com/SAIL/) Twitter and Facebook labelled sentiment samples in Hindi, Bengali, Tamil, Telugu.[39m
|
||
[38;5;12m- [39m[38;5;14m[1mIIT Bombay NLP Resources[0m[38;5;12m (http://www.cfilt.iitb.ac.in/Sentiment_Analysis_Resources.html) Sentiwordnet, Movie and Tourism parallel labelled corpora, polarity labelled sense annotated corpus, Marathi polarity labelled corpus.[39m
|
||
[38;5;12m- [39m[38;5;14m[1mTDIL-IC aggregates a lot of useful resources and provides access to otherwise gated datasets[0m[38;5;12m (https://tdil-dc.in/index.php?option=com_catalogue&task=viewTools&id=83&lang=en)[39m
|
||
|
||
[38;2;255;187;0m[4mLanguage Models and Word Embeddings[0m
|
||
|
||
[38;5;12m- [39m[38;5;14m[1mHindi2Vec[0m[38;5;12m (https://nirantk.com/hindi2vec/) and [39m[38;5;14m[1mnlp-for-hindi[0m[38;5;12m (https://github.com/goru001/nlp-for-hindi) ULMFIT style languge model[39m
|
||
[38;5;12m- [39m[38;5;14m[1mIIT Patna Bilingual Word Embeddings Hi-En[0m[38;5;12m (https://www.iitp.ac.in/~ai-nlp-ml/resources.html)[39m
|
||
[38;5;12m- [39m[38;5;14m[1mFasttext word embeddings in a whole bunch of languages, trained on Common Crawl[0m[38;5;12m (https://fasttext.cc/docs/en/crawl-vectors.html)[39m
|
||
[38;5;12m- [39m[38;5;14m[1mHindi and Bengali Word2Vec[0m[38;5;12m (https://github.com/Kyubyong/wordvectors)[39m
|
||
[38;5;12m- [39m[38;5;14m[1mHindi and Urdu Elmo Model[0m[38;5;12m (https://github.com/HIT-SCIR/ELMoForManyLangs)[39m
|
||
[38;5;12m- [39m[38;5;14m[1mSanskrit Albert[0m[38;5;12m (https://huggingface.co/surajp/albert-base-sanskrit) Trained on Sanskrit Wikipedia and OSCAR corpus[39m
|
||
|
||
[38;2;255;187;0m[4mLibraries and Tooling[0m
|
||
|
||
[38;5;12m- [39m[38;5;14m[1mMulti-Task Deep Morphological Analyzer[0m[38;5;12m (https://github.com/Saurav0074/mt-dma) Deep Network based Morphological Parser for Hindi and Urdu[39m
|
||
[38;5;12m- [39m[38;5;14m[1mAnoop Kunchukuttan[0m[38;5;12m (https://github.com/anoopkunchukuttan/indic_nlp_library) 18 Languages, whole host of features from tokenization to translation[39m
|
||
[38;5;12m- [39m[38;5;14m[1mSivaReddy's Dependency Parser[0m[38;5;12m (http://sivareddy.in/downloads) Dependency Parser and Pos Tagger for Kannada, Hindi and Telugu. [39m[38;5;14m[1mPython3 Port[0m[38;5;12m (https://github.com/CalmDownKarm/sivareddydependencyparser)[39m
|
||
[38;5;12m- [39m[38;5;14m[1miNLTK[0m[38;5;12m (https://github.com/goru001/inltk) - A Natural Language Toolkit for Indic Languages (Indian subcontinent languages) built on top of Pytorch/Fastai, which aims to provide out of the box support for common NLP tasks.[39m
|
||
|
||
[38;2;255;187;0m[4mNLP in Thai[0m
|
||
|
||
[38;5;14m[1mBack to Top[0m[38;5;12m (#contents)[39m
|
||
|
||
[38;2;255;187;0m[4mLibraries[0m
|
||
|
||
[38;5;12m- [39m[38;5;14m[1mPyThaiNLP[0m[38;5;12m (https://github.com/PyThaiNLP/pythainlp) - Thai NLP in Python Package[39m
|
||
[38;5;12m- [39m[38;5;14m[1mJTCC[0m[38;5;12m (https://github.com/wittawatj/jtcc) - A character cluster library in Java[39m
|
||
[38;5;12m- [39m[38;5;14m[1mCutKum[0m[38;5;12m (https://github.com/pucktada/cutkum) - Word segmentation with deep learning in TensorFlow[39m
|
||
[38;5;12m- [39m[38;5;14m[1mThai Language Toolkit[0m[38;5;12m (https://pypi.python.org/pypi/tltk/) - Based on a paper by Wirote Aroonmanakun in 2002 with included dataset[39m
|
||
[38;5;12m- [39m[38;5;14m[1mSynThai[0m[38;5;12m (https://github.com/KenjiroAI/SynThai) - Word segmentation and POS tagging using deep learning in Python[39m
|
||
|
||
[38;2;255;187;0m[4mData[0m
|
||
|
||
[38;5;12m- [39m[38;5;14m[1mInter-BEST[0m[38;5;12m (https://www.nectec.or.th/corpus/index.php?league=pm) - A text corpus with 5 million words with word segmentation[39m
|
||
[38;5;12m- [39m[38;5;14m[1mPrime Minister 29[0m[38;5;12m (https://github.com/PyThaiNLP/lexicon-thai/tree/master/thai-corpus/Prime%20Minister%2029) - Dataset containing speeches of the current Prime Minister of Thailand[39m
|
||
|
||
[38;2;255;187;0m[4mNLP in Danish[0m
|
||
|
||
[38;5;12m- [39m[38;5;14m[1mNamed Entity Recognition for Danish[0m[38;5;12m (https://github.com/ITUnlp/daner)[39m
|
||
[38;5;12m- [39m[38;5;14m[1mDaNLP[0m[38;5;12m (https://github.com/alexandrainst/danlp) - NLP resources in Danish[39m
|
||
[38;5;12m- [39m[38;5;14m[1mAwesome Danish[0m[38;5;12m (https://github.com/fnielsen/awesome-danish) - A curated list of awesome resources for Danish language technology[39m
|
||
|
||
[38;2;255;187;0m[4mNLP in Vietnamese[0m
|
||
|
||
[38;2;255;187;0m[4mLibraries[0m
|
||
|
||
[38;5;12m- [39m[38;5;14m[1munderthesea[0m[38;5;12m (https://github.com/undertheseanlp/underthesea) - Vietnamese NLP Toolkit[39m
|
||
[38;5;12m- [39m[38;5;14m[1mvn.vitk[0m[38;5;12m (https://github.com/phuonglh/vn.vitk) - A Vietnamese Text Processing Toolkit[39m
|
||
[38;5;12m- [39m[38;5;14m[1mVnCoreNLP[0m[38;5;12m (https://github.com/vncorenlp/VnCoreNLP) - A Vietnamese natural language processing toolkit[39m
|
||
[38;5;12m- [39m[38;5;14m[1mPhoBERT[0m[38;5;12m (https://github.com/VinAIResearch/PhoBERT) - Pre-trained language models for Vietnamese[39m
|
||
[38;5;12m- [39m[38;5;14m[1mpyvi[0m[38;5;12m (https://github.com/trungtv/pyvi) - Python Vietnamese Core NLP Toolkit[39m
|
||
|
||
[38;2;255;187;0m[4mData[0m
|
||
|
||
[38;5;12m- [39m[38;5;14m[1mVietnamese treebank[0m[38;5;12m (https://vlsp.hpda.vn/demo/?page=resources&lang=en) - 10,000 sentences for the constituency parsing task[39m
|
||
[38;5;12m- [39m[38;5;14m[1mBKTreeBank[0m[38;5;12m (https://arxiv.org/pdf/1710.05519.pdf) - a Vietnamese Dependency Treebank[39m
|
||
[38;5;12m- [39m[38;5;14m[1mUD_Vietnamese[0m[38;5;12m (https://github.com/UniversalDependencies/UD_Vietnamese-VTB) - Vietnamese Universal Dependency Treebank[39m
|
||
[38;5;12m- [39m[38;5;14m[1mVIVOS[0m[38;5;12m (https://ailab.hcmus.edu.vn/vivos/) - a free Vietnamese speech corpus consisting of 15 hours of recording speech by AILab[39m
|
||
[38;5;12m- [39m[38;5;14m[1mVNTQcorpus(big).txt[0m[38;5;12m (http://viet.jnlp.org/download-du-lieu-tu-vung-corpus) - 1.75 million sentences in news[39m
|
||
[38;5;12m- [39m[38;5;14m[1mViText2SQL[0m[38;5;12m (https://github.com/VinAIResearch/ViText2SQL) - A dataset for Vietnamese Text-to-SQL semantic parsing (EMNLP-2020 Findings)[39m
|
||
[38;5;12m-[39m[38;5;12m [39m[38;5;14m[1mEVB[0m[38;5;14m[1m [0m[38;5;14m[1mCorpus[0m[38;5;12m [39m[38;5;12m(https://github.com/qhungngo/EVBCorpus)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12m20,000,000[39m[38;5;12m [39m[38;5;12mwords[39m[38;5;12m [39m[38;5;12m(20[39m[38;5;12m [39m[38;5;12mmillion)[39m[38;5;12m [39m[38;5;12mfrom[39m[38;5;12m [39m[38;5;12m15[39m[38;5;12m [39m[38;5;12mbilingual[39m[38;5;12m [39m[38;5;12mbooks,[39m[38;5;12m [39m[38;5;12m100[39m[38;5;12m [39m[38;5;12mparallel[39m[38;5;12m [39m[38;5;12mEnglish-Vietnamese[39m[38;5;12m [39m[38;5;12m/[39m[38;5;12m [39m[38;5;12mVietnamese-English[39m[38;5;12m [39m[38;5;12mtexts,[39m[38;5;12m [39m[38;5;12m250[39m[38;5;12m [39m[38;5;12mparallel[39m[38;5;12m [39m[38;5;12mlaw[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mordinance[39m[38;5;12m [39m[38;5;12mtexts,[39m[38;5;12m [39m[38;5;12m5,000[39m[38;5;12m [39m[38;5;12mnews[39m[38;5;12m [39m[38;5;12marticles,[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12m2,000[39m[38;5;12m [39m[38;5;12mfilm[39m[38;5;12m [39m
|
||
[38;5;12msubtitles.[39m
|
||
|
||
|
||
[38;2;255;187;0m[4mNLP for Dutch[0m
|
||
|
||
[38;5;14m[1mBack to Top[0m[38;5;12m (#contents)[39m
|
||
|
||
[38;5;12m- [39m[38;5;14m[1mpython-frog[0m[38;5;12m (https://github.com/proycon/python-frog) - Python binding to Frog, an NLP suite for Dutch. (pos tagging, lemmatisation, dependency parsing, NER)[39m
|
||
[38;5;12m- [39m[38;5;14m[1mSimpleNLG_NL[0m[38;5;12m (https://github.com/rfdj/SimpleNLG-NL) - Dutch surface realiser used for Natural Language Generation in Dutch, based on the SimpleNLG implementation for English and French.[39m
|
||
[38;5;12m- [39m[38;5;14m[1mAlpino[0m[38;5;12m (https://github.com/rug-compling/alpino) - Dependency parser for Dutch (also does PoS tagging and Lemmatisation).[39m
|
||
[38;5;12m- [39m[38;5;14m[1mKaldi NL[0m[38;5;12m (https://github.com/opensource-spraakherkenning-nl/Kaldi_NL) - Dutch Speech Recognition models based on [39m[38;5;14m[1mKaldi[0m[38;5;12m (http://kaldi-asr.org/).[39m
|
||
[38;5;12m- [39m[38;5;14m[1mspaCy[0m[38;5;12m (https://spacy.io/) - [39m[38;5;14m[1mDutch model[0m[38;5;12m (https://spacy.io/models/nl) available. - Industrial strength NLP with Python and Cython. [39m
|
||
|
||
|
||
[38;2;255;187;0m[4mNLP in Indonesian[0m
|
||
|
||
[38;2;255;187;0m[4mDatasets[0m
|
||
[38;5;12m- Kompas and Tempo collections at [39m[38;5;14m[1mILPS[0m[38;5;12m (http://ilps.science.uva.nl/resources/bahasa/)[39m
|
||
[38;5;12m- [39m[38;5;14m[1mPANL10N for PoS tagging[0m[38;5;12m (http://www.panl10n.net/english/outputs/Indonesia/UI/0802/UI-1M-tagged.zip): 39K sentences and 900K word tokens[39m
|
||
[38;5;12m- [39m[38;5;14m[1mIDN for PoS tagging[0m[38;5;12m (https://github.com/famrashel/idn-tagged-corpus): This corpus contains 10K sentences and 250K word tokens[39m
|
||
[38;5;12m- [39m[38;5;14m[1mIndonesian Treebank[0m[38;5;12m (https://github.com/famrashel/idn-treebank) and [39m[38;5;14m[1mUniversal Dependencies-Indonesian[0m[38;5;12m (https://github.com/UniversalDependencies/UD_Indonesian-GSD)[39m
|
||
[38;5;12m- [39m[38;5;14m[1mIndoSum[0m[38;5;12m (https://github.com/kata-ai/indosum) for text summarization and classification both[39m
|
||
[38;5;12m- [39m[38;5;14m[1mWordnet-Bahasa[0m[38;5;12m (http://wn-msa.sourceforge.net/) - large, free, semantic dictionary[39m
|
||
[38;5;12m- IndoBenchmark [39m[38;5;14m[1mIndoNLU[0m[38;5;12m (https://github.com/indobenchmark/indonlu) includes pre-trained language model (IndoBERT), FastText model, Indo4B corpus, and several NLU benchmark datasets[39m
|
||
|
||
[38;2;255;187;0m[4mLibraries & Embedding[0m
|
||
[38;5;12m- Natural language toolkit [39m[38;5;14m[1mbahasa[0m[38;5;12m (https://github.com/kangfend/bahasa)[39m
|
||
[38;5;12m- [39m[38;5;14m[1mIndonesian Word Embedding[0m[38;5;12m (https://github.com/galuhsahid/indonesian-word-embedding)[39m
|
||
[38;5;12m- Pretrained [39m[38;5;14m[1mIndonesian fastText Text Embedding[0m[38;5;12m (https://s3-us-west-1.amazonaws.com/fasttext-vectors/wiki.id.zip) trained on Wikipedia[39m
|
||
[38;5;12m- IndoBenchmark [39m[38;5;14m[1mIndoNLU[0m[38;5;12m (https://github.com/indobenchmark/indonlu) includes pretrained language model (IndoBERT), FastText model, Indo4B corpus, and several NLU benchmark datasets[39m
|
||
|
||
[38;2;255;187;0m[4mNLP in Urdu[0m
|
||
|
||
[38;2;255;187;0m[4mDatasets[0m
|
||
[38;5;12m- [39m[38;5;14m[1mCollection of Urdu datasets[0m[38;5;12m (https://github.com/mirfan899/Urdu) for POS, NER and NLP tasks[39m
|
||
|
||
[38;2;255;187;0m[4mLibraries[0m
|
||
[38;5;12m- [39m[38;5;14m[1mNatural Language Processing library[0m[38;5;12m (https://github.com/urduhack/urduhack) for ( 🇵🇰)Urdu language[39m
|
||
|
||
[38;2;255;187;0m[4mNLP in Persian[0m
|
||
|
||
[38;5;14m[1mBack to Top[0m[38;5;12m (#contents)[39m
|
||
|
||
[38;2;255;187;0m[4mLibraries[0m
|
||
[38;5;12m- [39m[38;5;14m[1mHazm[0m[38;5;12m (https://github.com/roshan-research/hazm) - Persian NLP Toolkit.[39m
|
||
[38;5;12m- [39m[38;5;14m[1mParsivar[0m[38;5;12m (https://github.com/ICTRC/Parsivar): A Language Processing Toolkit for Persian[39m
|
||
[38;5;12m-[39m[38;5;12m [39m[38;5;14m[1mPerke[0m[38;5;12m [39m[38;5;12m(https://github.com/AlirezaTheH/perke):[39m[38;5;12m [39m[38;5;12mPerke[39m[38;5;12m [39m[38;5;12mis[39m[38;5;12m [39m[38;5;12ma[39m[38;5;12m [39m[38;5;12mPython[39m[38;5;12m [39m[38;5;12mkeyphrase[39m[38;5;12m [39m[38;5;12mextraction[39m[38;5;12m [39m[38;5;12mpackage[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12mPersian[39m[38;5;12m [39m[38;5;12mlanguage.[39m[38;5;12m [39m[38;5;12mIt[39m[38;5;12m [39m[38;5;12mprovides[39m[38;5;12m [39m[38;5;12man[39m[38;5;12m [39m[38;5;12mend-to-end[39m[38;5;12m [39m[38;5;12mkeyphrase[39m[38;5;12m [39m[38;5;12mextraction[39m[38;5;12m [39m[38;5;12mpipeline[39m[38;5;12m [39m[38;5;12min[39m[38;5;12m [39m[38;5;12mwhich[39m[38;5;12m [39m[38;5;12meach[39m[38;5;12m [39m[38;5;12mcomponent[39m[38;5;12m [39m[38;5;12mcan[39m[38;5;12m [39m[38;5;12mbe[39m[38;5;12m [39m[38;5;12measily[39m[38;5;12m [39m[38;5;12mmodified[39m[38;5;12m [39m[38;5;12mor[39m[38;5;12m [39m[38;5;12mextended[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12mdevelop[39m[38;5;12m [39m[38;5;12mnew[39m
|
||
[38;5;12mmodels.[39m
|
||
[38;5;12m- [39m[38;5;14m[1mPerstem[0m[38;5;12m (https://github.com/jonsafari/perstem): Persian stemmer, morphological analyzer, transliterator, and partial part-of-speech tagger[39m
|
||
[38;5;12m- [39m[38;5;14m[1mParsiAnalyzer[0m[38;5;12m (https://github.com/NarimanN2/ParsiAnalyzer): Persian Analyzer For Elasticsearch[39m
|
||
[38;5;12m- [39m[38;5;14m[1mvirastar[0m[38;5;12m (https://github.com/aziz/virastar): Cleaning up Persian text![39m
|
||
|
||
[38;2;255;187;0m[4mDatasets[0m
|
||
[38;5;12m-[39m[38;5;12m [39m[38;5;14m[1mBijankhan[0m[38;5;14m[1m [0m[38;5;14m[1mCorpus[0m[38;5;12m [39m[38;5;12m(https://dbrg.ut.ac.ir/بیژن%E2%80%8Cخان/):[39m[38;5;12m [39m[38;5;12mBijankhan[39m[38;5;12m [39m[38;5;12mcorpus[39m[38;5;12m [39m[38;5;12mis[39m[38;5;12m [39m[38;5;12ma[39m[38;5;12m [39m[38;5;12mtagged[39m[38;5;12m [39m[38;5;12mcorpus[39m[38;5;12m [39m[38;5;12mthat[39m[38;5;12m [39m[38;5;12mis[39m[38;5;12m [39m[38;5;12msuitable[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12mnatural[39m[38;5;12m [39m[38;5;12mlanguage[39m[38;5;12m [39m[38;5;12mprocessing[39m[38;5;12m [39m[38;5;12mresearch[39m[38;5;12m [39m[38;5;12mon[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12mPersian[39m[38;5;12m [39m[38;5;12m(Farsi)[39m[38;5;12m [39m[38;5;12mlanguage.[39m[38;5;12m [39m[38;5;12mThis[39m[38;5;12m [39m[38;5;12mcollection[39m[38;5;12m [39m[38;5;12mis[39m[38;5;12m [39m[38;5;12mgathered[39m[38;5;12m [39m[38;5;12mform[39m[38;5;12m [39m[38;5;12mdaily[39m[38;5;12m [39m[38;5;12mnews[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mcommon[39m[38;5;12m [39m
|
||
[38;5;12mtexts.[39m[38;5;12m [39m[38;5;12mIn[39m[38;5;12m [39m[38;5;12mthis[39m[38;5;12m [39m[38;5;12mcollection[39m[38;5;12m [39m[38;5;12mall[39m[38;5;12m [39m[38;5;12mdocuments[39m[38;5;12m [39m[38;5;12mare[39m[38;5;12m [39m[38;5;12mcategorized[39m[38;5;12m [39m[38;5;12minto[39m[38;5;12m [39m[38;5;12mdifferent[39m[38;5;12m [39m[38;5;12msubjects[39m[38;5;12m [39m[38;5;12msuch[39m[38;5;12m [39m[38;5;12mas[39m[38;5;12m [39m[38;5;12mpolitical,[39m[38;5;12m [39m[38;5;12mcultural[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mso[39m[38;5;12m [39m[38;5;12mon.[39m[38;5;12m [39m[38;5;12mTotally,[39m[38;5;12m [39m[38;5;12mthere[39m[38;5;12m [39m[38;5;12mare[39m[38;5;12m [39m[38;5;12m4300[39m[38;5;12m [39m[38;5;12mdifferent[39m[38;5;12m [39m[38;5;12msubjects.[39m[38;5;12m [39m[38;5;12mThe[39m[38;5;12m [39m[38;5;12mBijankhan[39m[38;5;12m [39m[38;5;12mcollection[39m[38;5;12m [39m[38;5;12mcontains[39m[38;5;12m [39m[38;5;12mabout[39m[38;5;12m [39m[38;5;12m2.6[39m[38;5;12m [39m[38;5;12mmillions[39m[38;5;12m [39m[38;5;12mmanually[39m[38;5;12m [39m[38;5;12mtagged[39m[38;5;12m [39m[38;5;12mwords[39m[38;5;12m [39m[38;5;12mwith[39m[38;5;12m [39m[38;5;12ma[39m[38;5;12m [39m
|
||
[38;5;12mtag[39m[38;5;12m [39m[38;5;12mset[39m[38;5;12m [39m[38;5;12mthat[39m[38;5;12m [39m[38;5;12mcontains[39m[38;5;12m [39m[38;5;12m40[39m[38;5;12m [39m[38;5;12mPersian[39m[38;5;12m [39m[38;5;12mPOS[39m[38;5;12m [39m[38;5;12mtags.[39m
|
||
[38;5;12m-[39m[38;5;12m [39m[38;5;14m[1mUppsala[0m[38;5;14m[1m [0m[38;5;14m[1mPersian[0m[38;5;14m[1m [0m[38;5;14m[1mCorpus[0m[38;5;14m[1m [0m[38;5;14m[1m(UPC)[0m[38;5;12m [39m[38;5;12m(https://sites.google.com/site/mojganserajicom/home/upc):[39m[38;5;12m [39m[38;5;12mUppsala[39m[38;5;12m [39m[38;5;12mPersian[39m[38;5;12m [39m[38;5;12mCorpus[39m[38;5;12m [39m[38;5;12m(UPC)[39m[38;5;12m [39m[38;5;12mis[39m[38;5;12m [39m[38;5;12ma[39m[38;5;12m [39m[38;5;12mlarge,[39m[38;5;12m [39m[38;5;12mfreely[39m[38;5;12m [39m[38;5;12mavailable[39m[38;5;12m [39m[38;5;12mPersian[39m[38;5;12m [39m[38;5;12mcorpus.[39m[38;5;12m [39m[38;5;12mThe[39m[38;5;12m [39m[38;5;12mcorpus[39m[38;5;12m [39m[38;5;12mis[39m[38;5;12m [39m[38;5;12ma[39m[38;5;12m [39m[38;5;12mmodified[39m[38;5;12m [39m[38;5;12mversion[39m[38;5;12m [39m[38;5;12mof[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12mBijankhan[39m[38;5;12m [39m[38;5;12mcorpus[39m[38;5;12m [39m[38;5;12mwith[39m[38;5;12m [39m[38;5;12madditional[39m[38;5;12m [39m[38;5;12msentence[39m[38;5;12m [39m
|
||
[38;5;12msegmentation[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mconsistent[39m[38;5;12m [39m[38;5;12mtokenization[39m[38;5;12m [39m[38;5;12mcontaining[39m[38;5;12m [39m[38;5;12m2,704,028[39m[38;5;12m [39m[38;5;12mtokens[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mannotated[39m[38;5;12m [39m[38;5;12mwith[39m[38;5;12m [39m[38;5;12m31[39m[38;5;12m [39m[38;5;12mpart-of-speech[39m[38;5;12m [39m[38;5;12mtags.[39m[38;5;12m [39m[38;5;12mThe[39m[38;5;12m [39m[38;5;12mpart-of-speech[39m[38;5;12m [39m[38;5;12mtags[39m[38;5;12m [39m[38;5;12mare[39m[38;5;12m [39m[38;5;12mlisted[39m[38;5;12m [39m[38;5;12mwith[39m[38;5;12m [39m[38;5;12mexplanations[39m[38;5;12m [39m[38;5;12min[39m[38;5;12m [39m[38;5;14m[1mthis[0m[38;5;14m[1m [0m[38;5;14m[1mtable[0m[38;5;12m [39m
|
||
[38;5;12m(https://sites.google.com/site/mojganserajicom/home/upc/Table_tag.pdf).[39m
|
||
[38;5;12m-[39m[38;5;12m [39m[38;5;14m[1mLarge-Scale[0m[38;5;14m[1m [0m[38;5;14m[1mColloquial[0m[38;5;14m[1m [0m[38;5;14m[1mPersian[0m[38;5;12m [39m[38;5;12m(http://hdl.handle.net/11234/1-3195):[39m[38;5;12m [39m[38;5;12mLarge[39m[38;5;12m [39m[38;5;12mScale[39m[38;5;12m [39m[38;5;12mColloquial[39m[38;5;12m [39m[38;5;12mPersian[39m[38;5;12m [39m[38;5;12mDataset[39m[38;5;12m [39m[38;5;12m(LSCP)[39m[38;5;12m [39m[38;5;12mis[39m[38;5;12m [39m[38;5;12mhierarchically[39m[38;5;12m [39m[38;5;12morganized[39m[38;5;12m [39m[38;5;12min[39m[38;5;12m [39m[38;5;12masemantic[39m[38;5;12m [39m[38;5;12mtaxonomy[39m[38;5;12m [39m[38;5;12mthat[39m[38;5;12m [39m[38;5;12mfocuses[39m[38;5;12m [39m[38;5;12mon[39m[38;5;12m [39m[38;5;12mmulti-task[39m[38;5;12m [39m[38;5;12minformal[39m[38;5;12m [39m[38;5;12mPersian[39m[38;5;12m [39m[38;5;12mlanguage[39m[38;5;12m [39m[38;5;12munderstanding[39m[38;5;12m [39m[38;5;12mas[39m[38;5;12m [39m[38;5;12ma[39m[38;5;12m [39m
|
||
[38;5;12mcomprehensive[39m[38;5;12m [39m[38;5;12mproblem.[39m[38;5;12m [39m[38;5;12mLSCP[39m[38;5;12m [39m[38;5;12mincludes[39m[38;5;12m [39m[38;5;12m120M[39m[38;5;12m [39m[38;5;12msentences[39m[38;5;12m [39m[38;5;12mfrom[39m[38;5;12m [39m[38;5;12m27M[39m[38;5;12m [39m[38;5;12mcasual[39m[38;5;12m [39m[38;5;12mPersian[39m[38;5;12m [39m[38;5;12mtweets[39m[38;5;12m [39m[38;5;12mwith[39m[38;5;12m [39m[38;5;12mits[39m[38;5;12m [39m[38;5;12mdependency[39m[38;5;12m [39m[38;5;12mrelations[39m[38;5;12m [39m[38;5;12min[39m[38;5;12m [39m[38;5;12msyntactic[39m[38;5;12m [39m[38;5;12mannotation,[39m[38;5;12m [39m[38;5;12mPart-of-speech[39m[38;5;12m [39m[38;5;12mtags,[39m[38;5;12m [39m[38;5;12msentiment[39m[38;5;12m [39m[38;5;12mpolarity[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mautomatic[39m[38;5;12m [39m[38;5;12mtranslation[39m[38;5;12m [39m[38;5;12mof[39m[38;5;12m [39m[38;5;12moriginal[39m[38;5;12m [39m[38;5;12mPersian[39m[38;5;12m [39m[38;5;12msentences[39m[38;5;12m [39m[38;5;12min[39m[38;5;12m [39m[38;5;12mEnglish[39m[38;5;12m [39m
|
||
[38;5;12m(EN),[39m[38;5;12m [39m[38;5;12mGerman[39m[38;5;12m [39m[38;5;12m(DE),[39m[38;5;12m [39m[38;5;12mCzech[39m[38;5;12m [39m[38;5;12m(CS),[39m[38;5;12m [39m[38;5;12mItalian[39m[38;5;12m [39m[38;5;12m(IT)[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mHindi[39m[38;5;12m [39m[38;5;12m(HI)[39m[38;5;12m [39m[38;5;12mspoken[39m[38;5;12m [39m[38;5;12mlanguages.[39m[38;5;12m [39m[38;5;12mLearn[39m[38;5;12m [39m[38;5;12mmore[39m[38;5;12m [39m[38;5;12mabout[39m[38;5;12m [39m[38;5;12mthis[39m[38;5;12m [39m[38;5;12mproject[39m[38;5;12m [39m[38;5;12mat[39m[38;5;12m [39m[38;5;14m[1mLSCP[0m[38;5;14m[1m [0m[38;5;14m[1mwebpage[0m[38;5;12m [39m[38;5;12m(https://iasbs.ac.ir/~ansari/lscp/).[39m
|
||
[38;5;12m-[39m[38;5;12m [39m[38;5;14m[1mArmanPersoNERCorpus[0m[38;5;12m [39m[38;5;12m(https://github.com/HaniehP/PersianNER):[39m[38;5;12m [39m[38;5;12mThe[39m[38;5;12m [39m[38;5;12mdataset[39m[38;5;12m [39m[38;5;12mincludes[39m[38;5;12m [39m[38;5;12m250,015[39m[38;5;12m [39m[38;5;12mtokens[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12m7,682[39m[38;5;12m [39m[38;5;12mPersian[39m[38;5;12m [39m[38;5;12msentences[39m[38;5;12m [39m[38;5;12min[39m[38;5;12m [39m[38;5;12mtotal.[39m[38;5;12m [39m[38;5;12mIt[39m[38;5;12m [39m[38;5;12mis[39m[38;5;12m [39m[38;5;12mavailable[39m[38;5;12m [39m[38;5;12min[39m[38;5;12m [39m[38;5;12m3[39m[38;5;12m [39m[38;5;12mfolds[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12mbe[39m[38;5;12m [39m[38;5;12mused[39m[38;5;12m [39m[38;5;12min[39m[38;5;12m [39m[38;5;12mturn[39m[38;5;12m [39m[38;5;12mas[39m[38;5;12m [39m[38;5;12mtraining[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mtest[39m[38;5;12m [39m[38;5;12msets.[39m[38;5;12m [39m[38;5;12mEach[39m[38;5;12m [39m[38;5;12mfile[39m[38;5;12m [39m[38;5;12mcontains[39m[38;5;12m [39m[38;5;12mone[39m[38;5;12m [39m[38;5;12mtoken,[39m[38;5;12m [39m[38;5;12malong[39m[38;5;12m [39m
|
||
[38;5;12mwith[39m[38;5;12m [39m[38;5;12mits[39m[38;5;12m [39m[38;5;12mmanually[39m[38;5;12m [39m[38;5;12mannotated[39m[38;5;12m [39m[38;5;12mnamed-entity[39m[38;5;12m [39m[38;5;12mtag,[39m[38;5;12m [39m[38;5;12mper[39m[38;5;12m [39m[38;5;12mline.[39m[38;5;12m [39m[38;5;12mEach[39m[38;5;12m [39m[38;5;12msentence[39m[38;5;12m [39m[38;5;12mis[39m[38;5;12m [39m[38;5;12mseparated[39m[38;5;12m [39m[38;5;12mwith[39m[38;5;12m [39m[38;5;12ma[39m[38;5;12m [39m[38;5;12mnewline.[39m[38;5;12m [39m[38;5;12mThe[39m[38;5;12m [39m[38;5;12mNER[39m[38;5;12m [39m[38;5;12mtags[39m[38;5;12m [39m[38;5;12mare[39m[38;5;12m [39m[38;5;12min[39m[38;5;12m [39m[38;5;12mIOB[39m[38;5;12m [39m[38;5;12mformat.[39m
|
||
[38;5;12m-[39m[38;5;12m [39m[38;5;14m[1mFarsiYar[0m[38;5;14m[1m [0m[38;5;14m[1mPersianNER[0m[38;5;12m [39m[38;5;12m(https://github.com/Text-Mining/Persian-NER):[39m[38;5;12m [39m[38;5;12mThe[39m[38;5;12m [39m[38;5;12mdataset[39m[38;5;12m [39m[38;5;12mincludes[39m[38;5;12m [39m[38;5;12mabout[39m[38;5;12m [39m[38;5;12m25,000,000[39m[38;5;12m [39m[38;5;12mtokens[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mabout[39m[38;5;12m [39m[38;5;12m1,000,000[39m[38;5;12m [39m[38;5;12mPersian[39m[38;5;12m [39m[38;5;12msentences[39m[38;5;12m [39m[38;5;12min[39m[38;5;12m [39m[38;5;12mtotal[39m[38;5;12m [39m[38;5;12mbased[39m[38;5;12m [39m[38;5;12mon[39m[38;5;12m [39m[38;5;14m[1mPersian[0m[38;5;14m[1m [0m[38;5;14m[1mWikipedia[0m[38;5;14m[1m [0m[38;5;14m[1mCorpus[0m[38;5;12m [39m
|
||
[38;5;12m(https://github.com/Text-Mining/Persian-Wikipedia-Corpus).[39m[38;5;12m [39m[38;5;12mThe[39m[38;5;12m [39m[38;5;12mNER[39m[38;5;12m [39m[38;5;12mtags[39m[38;5;12m [39m[38;5;12mare[39m[38;5;12m [39m[38;5;12min[39m[38;5;12m [39m[38;5;12mIOB[39m[38;5;12m [39m[38;5;12mformat.[39m[38;5;12m [39m[38;5;12mMore[39m[38;5;12m [39m[38;5;12mthan[39m[38;5;12m [39m[38;5;12m1000[39m[38;5;12m [39m[38;5;12mvolunteers[39m[38;5;12m [39m[38;5;12mcontributed[39m[38;5;12m [39m[38;5;12mtag[39m[38;5;12m [39m[38;5;12mimprovements[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12mthis[39m[38;5;12m [39m[38;5;12mdataset[39m[38;5;12m [39m[38;5;12mvia[39m[38;5;12m [39m[38;5;12mweb[39m[38;5;12m [39m[38;5;12mpanel[39m[38;5;12m [39m[38;5;12mor[39m[38;5;12m [39m[38;5;12mandroid[39m[38;5;12m [39m[38;5;12mapp.[39m[38;5;12m [39m[38;5;12mThey[39m[38;5;12m [39m[38;5;12mrelease[39m[38;5;12m [39m[38;5;12mupdated[39m[38;5;12m [39m[38;5;12mtags[39m[38;5;12m [39m[38;5;12mevery[39m[38;5;12m [39m[38;5;12mtwo[39m[38;5;12m [39m[38;5;12mweeks.[39m
|
||
[38;5;12m- [39m[38;5;14m[1mPERLEX[0m[38;5;12m (http://farsbase.net/PERLEX.html): The first Persian dataset for relation extraction, which is an expert translated version of the “Semeval-2010-Task-8” dataset. Link to the relevant publication.[39m
|
||
[38;5;12m-[39m[38;5;12m [39m[38;5;14m[1mPersian[0m[38;5;14m[1m [0m[38;5;14m[1mSyntactic[0m[38;5;14m[1m [0m[38;5;14m[1mDependency[0m[38;5;14m[1m [0m[38;5;14m[1mTreebank[0m[38;5;12m [39m[38;5;12m(http://dadegan.ir/catalog/perdt):[39m[38;5;12m [39m[38;5;12mThis[39m[38;5;12m [39m[38;5;12mtreebank[39m[38;5;12m [39m[38;5;12mis[39m[38;5;12m [39m[38;5;12msupplied[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12mfree[39m[38;5;12m [39m[38;5;12mnoncommercial[39m[38;5;12m [39m[38;5;12muse.[39m[38;5;12m [39m[38;5;12mFor[39m[38;5;12m [39m[38;5;12mcommercial[39m[38;5;12m [39m[38;5;12muses[39m[38;5;12m [39m[38;5;12mfeel[39m[38;5;12m [39m[38;5;12mfree[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12mcontact[39m[38;5;12m [39m[38;5;12mus.[39m[38;5;12m [39m[38;5;12mThe[39m[38;5;12m [39m[38;5;12mnumber[39m[38;5;12m [39m[38;5;12mof[39m[38;5;12m [39m[38;5;12mannotated[39m[38;5;12m [39m[38;5;12msentences[39m[38;5;12m [39m[38;5;12mis[39m[38;5;12m [39m[38;5;12m29,982[39m[38;5;12m [39m[38;5;12msentences[39m[38;5;12m [39m[38;5;12mincluding[39m[38;5;12m [39m[38;5;12msamples[39m[38;5;12m [39m
|
||
[38;5;12mfrom[39m[38;5;12m [39m[38;5;12malmost[39m[38;5;12m [39m[38;5;12mall[39m[38;5;12m [39m[38;5;12mverbs[39m[38;5;12m [39m[38;5;12mof[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12mPersian[39m[38;5;12m [39m[38;5;12mvalency[39m[38;5;12m [39m[38;5;12mlexicon.[39m
|
||
[38;5;12m- [39m[38;5;14m[1mUppsala Persian Dependency Treebank (UPDT)[0m[38;5;12m (http://stp.lingfil.uu.se/~mojgan/UPDT.html): Dependency-based syntactically annotated corpus.[39m
|
||
[38;5;12m-[39m[38;5;12m [39m[38;5;14m[1mHamshahri[0m[38;5;12m [39m[38;5;12m(https://dbrg.ut.ac.ir/hamshahri/):[39m[38;5;12m [39m[38;5;12mHamshahri[39m[38;5;12m [39m[38;5;12mcollection[39m[38;5;12m [39m[38;5;12mis[39m[38;5;12m [39m[38;5;12ma[39m[38;5;12m [39m[38;5;12mstandard[39m[38;5;12m [39m[38;5;12mreliable[39m[38;5;12m [39m[38;5;12mPersian[39m[38;5;12m [39m[38;5;12mtext[39m[38;5;12m [39m[38;5;12mcollection[39m[38;5;12m [39m[38;5;12mthat[39m[38;5;12m [39m[38;5;12mwas[39m[38;5;12m [39m[38;5;12mused[39m[38;5;12m [39m[38;5;12mat[39m[38;5;12m [39m[38;5;12mCross[39m[38;5;12m [39m[38;5;12mLanguage[39m[38;5;12m [39m[38;5;12mEvaluation[39m[38;5;12m [39m[38;5;12mForum[39m[38;5;12m [39m[38;5;12m(CLEF)[39m[38;5;12m [39m[38;5;12mduring[39m[38;5;12m [39m[38;5;12myears[39m[38;5;12m [39m[38;5;12m2008[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12m2009[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12mevaluation[39m[38;5;12m [39m[38;5;12mof[39m[38;5;12m [39m[38;5;12mPersian[39m[38;5;12m [39m[38;5;12minformation[39m[38;5;12m [39m[38;5;12mretrieval[39m
|
||
[38;5;12msystems.[39m
|
||
|
||
|
||
[38;2;255;187;0m[4mNLP in Ukrainian[0m
|
||
|
||
[38;5;14m[1mBack to Top[0m[38;5;12m (#contents)[39m
|
||
|
||
[38;5;12m- [39m[38;5;14m[1mawesome-ukrainian-nlp[0m[38;5;12m (https://github.com/asivokon/awesome-ukrainian-nlp) - a curated list of Ukrainian NLP datasets, models, etc.[39m
|
||
[38;5;12m- [39m[38;5;14m[1mUkrainianLT[0m[38;5;12m (https://github.com/Helsinki-NLP/UkrainianLT) - another curated list with a focus on machine translation and speech processing[39m
|
||
|
||
|
||
[38;2;255;187;0m[4mNLP in Hungarian[0m
|
||
|
||
[38;5;14m[1mBack to Top[0m[38;5;12m (#contents)[39m
|
||
|
||
[38;5;12m- [39m[38;5;14m[1mawesome-hungarian-nlp[0m[38;5;12m (https://github.com/oroszgy/awesome-hungarian-nlp): A curated list of free resources dedicated to Hungarian Natural Language Processing.[39m
|
||
|
||
[38;2;255;187;0m[4mNLP in Portuguese[0m
|
||
|
||
[38;5;14m[1mBack to Top[0m[38;5;12m (#contents)[39m
|
||
|
||
[38;5;12m- [39m[38;5;14m[1mPortuguese-nlp[0m[38;5;12m (https://github.com/ajdavidl/Portuguese-NLP) - a List of resources and tools developed with focus on Portuguese.[39m
|
||
|
||
[38;2;255;187;0m[4mOther Languages[0m
|
||
|
||
[38;5;12m- Russian: [39m[38;5;14m[1mpymorphy2[0m[38;5;12m (https://github.com/kmike/pymorphy2) - a good pos-tagger for Russian[39m
|
||
[38;5;12m- Asian Languages: Thai, Lao, Chinese, Japanese, and Korean [39m[38;5;14m[1mICU Tokenizer[0m[38;5;12m (https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-icu-tokenizer.html) implementation in ElasticSearch[39m
|
||
[38;5;12m- Ancient Languages: [39m[38;5;14m[1mCLTK[0m[38;5;12m (https://github.com/cltk/cltk): The Classical Language Toolkit is a Python library and collection of texts for doing NLP in ancient languages[39m
|
||
[38;5;12m- Hebrew: [39m[38;5;14m[1mNLPH_Resources[0m[38;5;12m (https://github.com/NLPH/NLPH_Resources) - A collection of papers, corpora and linguistic resources for NLP in Hebrew[39m
|
||
|
||
[38;5;14m[1mBack to Top[0m[38;5;12m (#contents)[39m
|
||
|
||
[38;5;14m[1mCredits[0m[38;5;12m (./CREDITS.md) for initial curators and sources[39m
|
||
|
||
[38;2;255;187;0m[4mLicense[0m
|
||
[38;5;14m[1mLicense[0m[38;5;12m (./LICENSE) - CC0[39m
|
||
|
||
[38;5;12mnlp Github: https://github.com/keon/awesome-nlp[39m
|