297 lines
76 KiB
Plaintext
297 lines
76 KiB
Plaintext
|
||
[38;5;12m [39m[38;2;255;187;0m[1m[4mAwesome Web Archiving [0m[38;5;14m[1m[4m![0m[38;2;255;187;0m[1m[4mAwesome[0m[38;5;14m[1m[4m (https://awesome.re/badge.svg)[0m[38;2;255;187;0m[1m[4m (https://awesome.re)[0m
|
||
|
||
[38;5;12mWeb[39m[38;5;12m [39m[38;5;12marchiving[39m[38;5;12m [39m[38;5;12mis[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12mprocess[39m[38;5;12m [39m[38;5;12mof[39m[38;5;12m [39m[38;5;12mcollecting[39m[38;5;12m [39m[38;5;12mportions[39m[38;5;12m [39m[38;5;12mof[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12mWorld[39m[38;5;12m [39m[38;5;12mWide[39m[38;5;12m [39m[38;5;12mWeb[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12mensure[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12minformation[39m[38;5;12m [39m[38;5;12mis[39m[38;5;12m [39m[38;5;12mpreserved[39m[38;5;12m [39m[38;5;12min[39m[38;5;12m [39m[38;5;12man[39m[38;5;12m [39m[38;5;12marchive[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12mfuture[39m[38;5;12m [39m[38;5;12mresearchers,[39m[38;5;12m [39m[38;5;12mhistorians,[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12mpublic.[39m[38;5;12m [39m[38;5;12mWeb[39m[38;5;12m [39m[38;5;12marchivists[39m[38;5;12m [39m[38;5;12mtypically[39m[38;5;12m [39m[38;5;12memploy[39m[38;5;12m [39m
|
||
[38;5;12mWeb[39m[38;5;12m [39m[38;5;12mcrawlers[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12mautomated[39m[38;5;12m [39m[38;5;12mcapture[39m[38;5;12m [39m[38;5;12mdue[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12mmassive[39m[38;5;12m [39m[38;5;12mscale[39m[38;5;12m [39m[38;5;12mof[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12mWeb.[39m[38;5;12m [39m[38;5;12mEver-evolving[39m[38;5;12m [39m[38;5;12mWeb[39m[38;5;12m [39m[38;5;12mstandards[39m[38;5;12m [39m[38;5;12mrequire[39m[38;5;12m [39m[38;5;12mcontinuous[39m[38;5;12m [39m[38;5;12mevolution[39m[38;5;12m [39m[38;5;12mof[39m[38;5;12m [39m[38;5;12marchiving[39m[38;5;12m [39m[38;5;12mtools[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12mkeep[39m[38;5;12m [39m[38;5;12mup[39m[38;5;12m [39m[38;5;12mwith[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12mchanges[39m[38;5;12m [39m[38;5;12min[39m[38;5;12m [39m[38;5;12mWeb[39m[38;5;12m [39m[38;5;12mtechnologies[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12mensure[39m[38;5;12m [39m
|
||
[38;5;12mreliable[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mmeaningful[39m[38;5;12m [39m[38;5;12mcapture[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mreplay[39m[38;5;12m [39m[38;5;12mof[39m[38;5;12m [39m[38;5;12marchived[39m[38;5;12m [39m[38;5;12mweb[39m[38;5;12m [39m[38;5;12mpages.[39m
|
||
|
||
|
||
[38;2;255;187;0m[4mContents[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mTraining/Documentation[0m[38;5;12m (#trainingdocumentation)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mResources for Web Publishers[0m[38;5;12m (#resources-for-web-publishers)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mTools & Software[0m[38;5;12m (#tools--software)[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mAcquisition[0m[38;5;12m (#acquisition)[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mReplay[0m[38;5;12m (#replay)[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSearch & Discovery[0m[38;5;12m (#search--discovery)[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mUtilities[0m[38;5;12m (#utilities)[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mWARC I/O Libraries[0m[38;5;12m (#warc-io-libraries)[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mAnalysis[0m[38;5;12m (#analysis)[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mQuality Assurance[0m[38;5;12m (#quality-assurance)[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mCuration[0m[38;5;12m (#curation)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mCommunity Resources[0m[38;5;12m (#community-resources)[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mOther Awesome Lists[0m[38;5;12m (#other-awesome-lists)[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mBlogs and Scholarship[0m[38;5;12m (#blogs-and-scholarship)[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mMailing Lists[0m[38;5;12m (#mailing-lists)[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSlack[0m[38;5;12m (#slack)[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mTwitter[0m[38;5;12m (#twitter)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mWeb Archiving Service Providers[0m[38;5;12m (#web-archiving-service-providers)[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSelf-hostable, Open Source[0m[38;5;12m (#self-hostable-open-source)[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mHosted, Closed Source[0m[38;5;12m (#hosted-closed-source)[39m
|
||
|
||
[38;2;255;187;0m[4mTraining/Documentation[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;12mIntroductions to web archiving concepts:[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mWhat is a web archive?[0m[38;5;12m (https://youtu.be/ubDHY-ynWi0) - A video from [39m[38;5;14m[1mthe UK Web Archive YouTube Channel[0m[38;5;12m (https://www.youtube.com/channel/UCJukhTSw8VRj-VNTpBcqWkw)[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mWikipedia's List of Web Archiving Initiatives[0m[38;5;12m (https://en.wikipedia.org/wiki/List_of_Web_archiving_initiatives)[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mGlossary of Archive-It and Web Archiving Terms[0m[38;5;12m (https://support.archive-it.org/hc/en-us/articles/208111686-Glossary-of-Archive-It-and-Web-Archiving-Terms)[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mThe[0m[38;5;14m[1m [0m[38;5;14m[1mWeb[0m[38;5;14m[1m [0m[38;5;14m[1mArchiving[0m[38;5;14m[1m [0m[38;5;14m[1mLifecycle[0m[38;5;14m[1m [0m[38;5;14m[1mModel[0m[38;5;12m [39m[38;5;12m(https://archive-it.org/blog/post/announcing-the-web-archiving-life-cycle-model/)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mThe[39m[38;5;12m [39m[38;5;12mWeb[39m[38;5;12m [39m[38;5;12mArchiving[39m[38;5;12m [39m[38;5;12mLifecycle[39m[38;5;12m [39m[38;5;12mModel[39m[38;5;12m [39m[38;5;12mis[39m[38;5;12m [39m[38;5;12man[39m[38;5;12m [39m[38;5;12mattempt[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12mincorporate[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12mtechnological[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m
|
||
[38;5;12mprogrammatic[39m[38;5;12m [39m[38;5;12marms[39m[38;5;12m [39m[38;5;12mof[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12mweb[39m[38;5;12m [39m[38;5;12marchiving[39m[38;5;12m [39m[38;5;12minto[39m[38;5;12m [39m[38;5;12ma[39m[38;5;12m [39m[38;5;12mframework[39m[38;5;12m [39m[38;5;12mthat[39m[38;5;12m [39m[38;5;12mwill[39m[38;5;12m [39m[38;5;12mbe[39m[38;5;12m [39m[38;5;12mrelevant[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12many[39m[38;5;12m [39m[38;5;12morganization[39m[38;5;12m [39m[38;5;12mseeking[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12marchive[39m[38;5;12m [39m[38;5;12mcontent[39m[38;5;12m [39m[38;5;12mfrom[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12mweb.[39m[38;5;12m [39m[38;5;12mArchive-It,[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12mweb[39m[38;5;12m [39m[38;5;12marchiving[39m[38;5;12m [39m[38;5;12mservice[39m[38;5;12m [39m[38;5;12mfrom[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12mInternet[39m[38;5;12m [39m[38;5;12mArchive,[39m[38;5;12m [39m
|
||
[38;5;12mdeveloped[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12mmodel[39m[38;5;12m [39m[38;5;12mbased[39m[38;5;12m [39m[38;5;12mon[39m[38;5;12m [39m[38;5;12mits[39m[38;5;12m [39m[38;5;12mwork[39m[38;5;12m [39m[38;5;12mwith[39m[38;5;12m [39m[38;5;12mmemory[39m[38;5;12m [39m[38;5;12minstitutions[39m[38;5;12m [39m[38;5;12maround[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12mworld.[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mRetrieving and Archiving Information from Websites by Wael Eskandar and Brad Murray[0m[38;5;12m (https://kit.exposingtheinvisible.org/en/web-archive.html/)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;12mTraining materials:[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mIIPC and DPC Training materials: module for beginners (8 sessions)[0m[38;5;12m (https://netpreserve.org/web-archiving/training-materials/)[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mUNT Web Archiving Course 2022[0m[38;5;12m (https://github.com/vphill/web-archiving-course)[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mContinuing Education to Advance Web Archiving (CEDWARC)[0m[38;5;12m (https://cedwarc.github.io/)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;12mThe WARC Standard:[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;12mThe [39m[38;5;14m[1mwarc-specifications[0m[38;5;12m (https://iipc.github.io/warc-specifications/) community HTML version of the official specification and hub for new proposals.[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;12mThe [39m[38;5;14m[1moffical ISO 28500 WARC specification homepage[0m[38;5;12m (http://bibnum.bnf.fr/WARC/).[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;12mFor researchers using web archives:[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mGLAM[0m[38;5;14m[1m [0m[38;5;14m[1mWorkbench:[0m[38;5;14m[1m [0m[38;5;14m[1mWeb[0m[38;5;14m[1m [0m[38;5;14m[1mArchives[0m[38;5;12m [39m[38;5;12m(https://glam-workbench.github.io/web-archives/)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mSee[39m[38;5;12m [39m[38;5;12malso[39m[38;5;12m [39m[38;5;14m[1mthis[0m[38;5;14m[1m [0m[38;5;14m[1mrelated[0m[38;5;14m[1m [0m[38;5;14m[1mblog[0m[38;5;14m[1m [0m[38;5;14m[1mpost[0m[38;5;14m[1m [0m[38;5;14m[1mon[0m[38;5;14m[1m [0m[38;5;14m[1m'Asking[0m[38;5;14m[1m [0m[38;5;14m[1mquestions[0m[38;5;14m[1m [0m[38;5;14m[1mwith[0m[38;5;14m[1m [0m[38;5;14m[1mweb[0m[38;5;14m[1m [0m[38;5;14m[1marchives'[0m[38;5;12m [39m
|
||
[38;5;12m(https://netpreserveblog.wordpress.com/2020/05/28/asking-questions-with-web-archives/).[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mArchives Unleashed Toolkit documentation[0m[38;5;12m (https://aut.docs.archivesunleashed.org/)[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mTutorial for Humanities researchers about how to explore Arquivo.pt[0m[38;5;12m (https://sobre.arquivo.pt/en/tutorial-for-humanities-researchers-about-how-to-use-arquivo-pt/)[39m
|
||
|
||
[38;2;255;187;0m[4mResources for Web Publishers[0m
|
||
|
||
[38;5;12mThese resources can help when working with individuals or organisations who publish on the web, and who want to make sure their site can be archived.[39m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mStanford Libraries' Archivability pages[0m[38;5;12m (https://library.stanford.edu/projects/web-archiving/archivability)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;12mThe [39m[38;5;14m[1mArchive Ready[0m[38;5;12m (http://archiveready.com/) tool, for estimating how likely a web page will be archived successfully.[39m
|
||
|
||
|
||
[38;2;255;187;0m[4mTools & Software[0m
|
||
|
||
[38;5;12mThis[39m[38;5;12m [39m[38;5;12mlist[39m[38;5;12m [39m[38;5;12mof[39m[38;5;12m [39m[38;5;12mtools[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12msoftware[39m[38;5;12m [39m[38;5;12mis[39m[38;5;12m [39m[38;5;12mintended[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12mbriefly[39m[38;5;12m [39m[38;5;12mdescribe[39m[38;5;12m [39m[38;5;12msome[39m[38;5;12m [39m[38;5;12mof[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12mmost[39m[38;5;12m [39m[38;5;12mimportant[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mwidely-used[39m[38;5;12m [39m[38;5;12mtools[39m[38;5;12m [39m[38;5;12mrelated[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12mweb[39m[38;5;12m [39m[38;5;12marchiving.[39m[38;5;12m [39m[38;5;12mFor[39m[38;5;12m [39m[38;5;12mmore[39m[38;5;12m [39m[38;5;12mdetails,[39m[38;5;12m [39m[38;5;12mwe[39m[38;5;12m [39m[38;5;12mrecommend[39m[38;5;12m [39m[38;5;12myou[39m[38;5;12m [39m[38;5;12mrefer[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12m(and[39m[38;5;12m [39m[38;5;12mcontribute[39m[38;5;12m [39m[38;5;12mto!)[39m[38;5;12m [39m[38;5;12mthese[39m[38;5;12m [39m
|
||
[38;5;12mexcellent[39m[38;5;12m [39m[38;5;12mresources[39m[38;5;12m [39m[38;5;12mfrom[39m[38;5;12m [39m[38;5;12mother[39m[38;5;12m [39m[38;5;12mgroups:[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mComparison of web archiving software[0m[38;5;12m (https://github.com/archivers-space/research/tree/master/web_archiving)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mAwesome Website Change Monitoring[0m[38;5;12m (https://github.com/edgi-govdata-archiving/awesome-website-change-monitoring)[39m
|
||
|
||
[38;2;255;187;0m[4mAcquisition[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mArchiveBox[0m[38;5;12m [39m[38;5;12m(https://github.com/pirate/ArchiveBox)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mA[39m[38;5;12m [39m[38;5;12mtool[39m[38;5;12m [39m[38;5;12mwhich[39m[38;5;12m [39m[38;5;12mmaintains[39m[38;5;12m [39m[38;5;12man[39m[38;5;12m [39m[38;5;12madditive[39m[38;5;12m [39m[38;5;12marchive[39m[38;5;12m [39m[38;5;12mfrom[39m[38;5;12m [39m[38;5;12mRSS[39m[38;5;12m [39m[38;5;12mfeeds,[39m[38;5;12m [39m[38;5;12mbookmarks,[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mlinks[39m[38;5;12m [39m[38;5;12musing[39m[38;5;12m [39m[38;5;12mwget,[39m[38;5;12m [39m[38;5;12mChrome[39m[38;5;12m [39m[38;5;12mheadless,[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mother[39m[38;5;12m [39m[38;5;12mmethods[39m[38;5;12m [39m[38;5;12m(formerly[39m[38;5;12m [39m[48;5;235m[38;5;249mBookmark Archiver[49m[39m[38;5;12m).[39m[38;5;12m [39m
|
||
[48;2;30;30;40m[38;5;13m[3m(In[0m[48;2;30;30;40m[38;5;13m[3m [0m[48;2;30;30;40m[38;5;13m[3mDevelopment)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1marchivenow[0m[38;5;12m (https://github.com/oduwsdl/archivenow) - A [39m[38;5;14m[1mPython library[0m[38;5;12m (http://ws-dl.blogspot.com/2017/02/2017-02-22-archive-now-archivenow.html) to push web resources into on-demand web archives. [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mArchiveWeb.Page[0m[38;5;12m [39m[38;5;12m(https://archiveweb.page)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mA[39m[38;5;12m [39m[38;5;12mplugin[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12mChrome[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mother[39m[38;5;12m [39m[38;5;12mChromium[39m[38;5;12m [39m[38;5;12mbased[39m[38;5;12m [39m[38;5;12mbrowsers[39m[38;5;12m [39m[38;5;12mthat[39m[38;5;12m [39m[38;5;12mlets[39m[38;5;12m [39m[38;5;12myou[39m[38;5;12m [39m[38;5;12minteractively[39m[38;5;12m [39m[38;5;12marchive[39m[38;5;12m [39m[38;5;12mweb[39m[38;5;12m [39m[38;5;12mpages,[39m[38;5;12m [39m[38;5;12mreplay[39m[38;5;12m [39m[38;5;12mthem,[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mexport[39m[38;5;12m [39m[38;5;12mthem[39m[38;5;12m [39m[38;5;12mas[39m[38;5;12m [39m[38;5;12mWARC[39m[38;5;12m [39m[38;5;12mdata.[39m[38;5;12m [39m[38;5;12mAlso[39m[38;5;12m [39m[38;5;12mavailable[39m[38;5;12m [39m[38;5;12mas[39m[38;5;12m [39m[38;5;12man[39m[38;5;12m [39m
|
||
[38;5;12mElectron[39m[38;5;12m [39m[38;5;12mbased[39m[38;5;12m [39m[38;5;12mdesktop[39m[38;5;12m [39m[38;5;12mapplication.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mAuto[0m[38;5;14m[1m [0m[38;5;14m[1mArchiver[0m[38;5;12m [39m[38;5;12m(https://github.com/bellingcat/auto-archiver)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mPython[39m[38;5;12m [39m[38;5;12mscript[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12mautomatically[39m[38;5;12m [39m[38;5;12marchive[39m[38;5;12m [39m[38;5;12msocial[39m[38;5;12m [39m[38;5;12mmedia[39m[38;5;12m [39m[38;5;12mposts,[39m[38;5;12m [39m[38;5;12mvideos,[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mimages[39m[38;5;12m [39m[38;5;12mfrom[39m[38;5;12m [39m[38;5;12ma[39m[38;5;12m [39m[38;5;12mGoogle[39m[38;5;12m [39m[38;5;12mSheets[39m[38;5;12m [39m[38;5;12mdocument.[39m[38;5;12m [39m[38;5;12mRead[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;14m[1marticle[0m[38;5;14m[1m [0m[38;5;14m[1mabout[0m[38;5;14m[1m [0m[38;5;14m[1mAuto[0m[38;5;14m[1m [0m[38;5;14m[1mArchiver[0m[38;5;14m[1m [0m
|
||
[38;5;14m[1mon[0m[38;5;14m[1m [0m[38;5;14m[1mbellingcat.com[0m[38;5;12m [39m[38;5;12m(https://www.bellingcat.com/resources/2022/09/22/preserve-vital-online-content-with-bellingcats-auto-archiver-tool/).[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mBrowsertrix Crawler[0m
|
||
[38;5;12m (https://github.com/webrecorder/browsertrix-crawler) - A Chrome based high-fidelity crawling system, designed to run a complex, customizable browser-based crawl in a single Docker container.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mBrozzler[0m[38;5;12m (https://github.com/internetarchive/brozzler) - A distributed web crawler (爬虫) that uses a real browser (Chrome or Chromium) to fetch pages and embedded urls and to extract links. [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mCairn[0m[38;5;12m (https://github.com/wabarc/cairn) - A npm package and CLI tool for saving webpages. [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mChronicler[0m[38;5;12m (https://github.com/CGamesPlay/chronicler) - Web browser with record and replay functionality. [39m[48;2;30;30;40m[38;5;13m[3m(In Development)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mcrau[0m[38;5;12m (https://github.com/turicas/crau) - crau is the way (most) Brazilians pronounce crawl, it's the easiest command-line tool for archiving the Web and playing archives: you just need a list of URLs. [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mCrawl[0m[38;5;12m (https://git.autistici.org/ale/crawl) - A simple web crawler in Golang. [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mcrocoite[0m[38;5;12m (https://github.com/promyloph/crocoite) - Crawl websites using headless Google Chrome/Chromium and save resources, static DOM snapshot and page screenshots to WARC files. [39m[48;2;30;30;40m[38;5;13m[3m(In Development)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDiskerNet[0m[38;5;12m (https://github.com/dosyago/DiskerNet) - A non-WARC-based tool which hooks into the Chrome browser and archives everything you browse making it available for offline replay. [39m[48;2;30;30;40m[38;5;13m[3m(In Development)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mF(b)arc[0m[38;5;12m [39m[38;5;12m(https://github.com/justinlittman/fbarc)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mA[39m[38;5;12m [39m[38;5;12mcommandline[39m[38;5;12m [39m[38;5;12mtool[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mPython[39m[38;5;12m [39m[38;5;12mlibrary[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12marchiving[39m[38;5;12m [39m[38;5;12mdata[39m[38;5;12m [39m[38;5;12mfrom[39m[38;5;12m [39m[38;5;14m[1mFacebook[0m[38;5;12m [39m[38;5;12m(https://www.facebook.com/)[39m[38;5;12m [39m[38;5;12musing[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;14m[1mGraph[0m[38;5;14m[1m [0m[38;5;14m[1mAPI[0m[38;5;12m [39m
|
||
[38;5;12m(https://developers.facebook.com/docs/graph-api).[39m[38;5;12m [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mfreeze-dry[0m[38;5;12m (https://github.com/WebMemex/freeze-dry) - JavaScript library to turn page into static, self-contained HTML document; useful for browser extensions. [39m[48;2;30;30;40m[38;5;13m[3m(In Development)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mgrab-site[0m[38;5;12m (https://github.com/ArchiveTeam/grab-site) - The archivist's web crawler: WARC output, dashboard for all crawls, dynamic ignore patterns. [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mHeritrix[0m[38;5;12m (https://github.com/internetarchive/heritrix3/wiki) - An open source, extensible, web-scale, archival quality web crawler. [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mHeritrix Q&A[0m[38;5;12m (https://github.com/internetarchive/heritrix3/discussions/categories/q-a) - A discussion forum for asking questions and getting answers about using Heritrix.[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mHeritrix Walkthrough[0m[38;5;12m (https://github.com/web-archive-group/heritrix-walkthrough) [39m[48;2;30;30;40m[38;5;13m[3m(In Development)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mhtml2warc[0m[38;5;12m (https://github.com/steffenfritz/html2warc) - A simple script to convert offline data into a single WARC file. [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mHTTrack[0m[38;5;12m (http://www.httrack.com/) - An open source website copying utility. [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mmonolith[0m[38;5;12m (https://github.com/Y2Z/monolith) - CLI tool to save a web page as a single HTML file. [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mObelisk[0m[38;5;12m (https://github.com/go-shiori/obelisk) - Go package and CLI tool for saving web page as single HTML file. [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mScoop[0m[38;5;12m (https://github.com/harvard-lil/scoop) - High-fidelity, browser-based, single-page web archiving library and CLI for witnessing the web. [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSingleFile[0m[38;5;12m (https://github.com/gildas-lormeau/SingleFile) - Browser extension for Firefox/Chrome and CLI tool to save a faithful copy of a complete page as a single HTML file. [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSiteStory[0m[38;5;12m (http://mementoweb.github.com/SiteStory/) - A transactional archive that selectively captures and stores transactions that take place between a web client (browser) and a web server. [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSocial Feed Manager[0m[38;5;12m (https://gwu-libraries.github.io/sfm-ui/) - Open source software that enables users to create social media collections from Twitter, Tumblr, Flickr, and Sina Weibo public APIs. [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSquidwarc[0m[38;5;12m [39m[38;5;12m(https://github.com/N0taN3rd/Squidwarc)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mAn[39m[38;5;12m [39m[38;5;14m[1mopen[0m[38;5;14m[1m [0m[38;5;14m[1msource,[0m[38;5;14m[1m [0m[38;5;14m[1mhigh-fidelity,[0m[38;5;14m[1m [0m[38;5;14m[1mpage[0m[38;5;14m[1m [0m[38;5;14m[1minteracting[0m[38;5;12m [39m[38;5;12m(http://ws-dl.blogspot.com/2017/07/2017-07-24-replacing-heritrix-with.html)[39m[38;5;12m [39m[38;5;12marchival[39m[38;5;12m [39m[38;5;12mcrawler[39m[38;5;12m [39m[38;5;12mthat[39m[38;5;12m [39m[38;5;12muses[39m[38;5;12m [39m[38;5;12mChrome[39m
|
||
[38;5;12mor[39m[38;5;12m [39m[38;5;12mChrome[39m[38;5;12m [39m[38;5;12mHeadless[39m[38;5;12m [39m[38;5;12mdirectly.[39m[38;5;12m [39m[48;2;30;30;40m[38;5;13m[3m(In[0m[48;2;30;30;40m[38;5;13m[3m [0m[48;2;30;30;40m[38;5;13m[3mDevelopment)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mStormCrawler[0m[38;5;12m (http://stormcrawler.net/) - A collection of resources for building low-latency, scalable web crawlers on Apache Storm. [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mtwarc[0m[38;5;12m (https://github.com/docnow/twarc) - A command line tool and Python library for archiving Twitter JSON data. [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mWAIL[0m[38;5;12m [39m[38;5;12m(https://github.com/machawk1/wail)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mA[39m[38;5;12m [39m[38;5;12mgraphical[39m[38;5;12m [39m[38;5;12muser[39m[38;5;12m [39m[38;5;12minterface[39m[38;5;12m [39m[38;5;12m(GUI)[39m[38;5;12m [39m[38;5;12matop[39m[38;5;12m [39m[38;5;12mmultiple[39m[38;5;12m [39m[38;5;12mweb[39m[38;5;12m [39m[38;5;12marchiving[39m[38;5;12m [39m[38;5;12mtools[39m[38;5;12m [39m[38;5;12mintended[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12mbe[39m[38;5;12m [39m[38;5;12mused[39m[38;5;12m [39m[38;5;12mas[39m[38;5;12m [39m[38;5;12man[39m[38;5;12m [39m[38;5;12measy[39m[38;5;12m [39m[38;5;12mway[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12manyone[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12mpreserve[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mreplay[39m[38;5;12m [39m[38;5;12mweb[39m[38;5;12m [39m[38;5;12mpages;[39m[38;5;12m [39m[38;5;14m[1mPython[0m[38;5;12m [39m
|
||
[38;5;12m(https://machawk1.github.io/wail/),[39m[38;5;12m [39m[38;5;14m[1mElectron[0m[38;5;12m [39m[38;5;12m(https://github.com/n0tan3rd/wail).[39m[38;5;12m [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mWarcprox[0m[38;5;12m (https://github.com/internetarchive/warcprox) - WARC-writing MITM HTTP/S proxy. [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mWARCreate[0m[38;5;12m (http://matkelly.com/warcreate/) - A [39m[38;5;14m[1mGoogle Chrome[0m[38;5;12m (https://www.google.com/intl/en/chrome/browser/) extension for archiving an individual webpage or website to a WARC file. [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mWarcworker[0m[38;5;12m (https://github.com/peterk/warcworker) - An open source, dockerized, queued, high fidelity web archiver based on Squidwarc with a simple web GUI. [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mWayback[0m[38;5;12m (https://github.com/wabarc/wayback) - A toolkit for snapshot webpage to Internet Archive, archive.today, IPFS and beyond. [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mWaybackpy[0m[38;5;12m (https://github.com/akamhy/waybackpy) - Wayback Machine Save, CDX and availability API interface in Python and a command-line tool [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mWeb2Warc[0m[38;5;12m (https://github.com/helgeho/Web2Warc) - An easy-to-use and highly customizable crawler that enables anyone to create their own little Web archives (WARC/CDX). [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mWeb Curator Tool[0m[38;5;12m (https://webcuratortool.org) - Open-source workflow management for selective web archiving. [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mWebMemex[0m[38;5;12m (https://github.com/WebMemex) - Browser extension for Firefox and Chrome which lets you archive web pages you visit. [39m[48;2;30;30;40m[38;5;13m[3m(In Development)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mWebrecorder[0m[38;5;12m (https://webrecorder.io/) - Create high-fidelity, interactive recordings of any web site you browse. [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mWget[0m[38;5;12m (http://www.gnu.org/software/wget/) - An open source file retrieval utility that of [39m[38;5;14m[1mversion 1.14 supports writing warcs[0m[38;5;12m (http://www.archiveteam.org/index.php?title=Wget_with_WARC_output). [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mWget-lua[0m[38;5;12m (https://github.com/alard/wget-lua) - Wget with Lua extension. [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mWpull[0m[38;5;12m (https://github.com/chfoo/wpull) - A Wget-compatible (or remake/clone/replacement/alternative) web downloader and crawler. [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
|
||
[38;2;255;187;0m[4mReplay[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mInterPlanetary Wayback (ipwb)[0m[38;5;12m (https://github.com/oduwsdl/ipwb) - Web Archive (WARC) indexing and replay using [39m[38;5;14m[1mIPFS[0m[38;5;12m (https://ipfs.io/).[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mOpenWayback[0m[38;5;12m [39m[38;5;12m(https://github.com/iipc/openwayback/)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mThe[39m[38;5;12m [39m[38;5;12mopen[39m[38;5;12m [39m[38;5;12msource[39m[38;5;12m [39m[38;5;12mproject[39m[38;5;12m [39m[38;5;12maimed[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12mdevelop[39m[38;5;12m [39m[38;5;12mWayback[39m[38;5;12m [39m[38;5;12mMachine,[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12mkey[39m[38;5;12m [39m[38;5;12msoftware[39m[38;5;12m [39m[38;5;12mused[39m[38;5;12m [39m[38;5;12mby[39m[38;5;12m [39m[38;5;12mweb[39m[38;5;12m [39m[38;5;12marchives[39m[38;5;12m [39m[38;5;12mworldwide[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12mplay[39m[38;5;12m [39m[38;5;12mback[39m[38;5;12m [39m[38;5;12marchived[39m[38;5;12m [39m[38;5;12mwebsites[39m[38;5;12m [39m[38;5;12min[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12muser's[39m[38;5;12m [39m
|
||
[38;5;12mbrowser.[39m[38;5;12m [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mPyWb[0m[38;5;12m (https://github.com/ikreymer/pywb) - A Python (2 and 3) implementation of web archival replay tools, sometimes also known as 'Wayback Machine'. [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mReconstructive[0m[38;5;12m [39m[38;5;12m(https://oduwsdl.github.io/Reconstructive/)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mReconstructive[39m[38;5;12m [39m[38;5;12mis[39m[38;5;12m [39m[38;5;12ma[39m[38;5;12m [39m[38;5;12mServiceWorker[39m[38;5;12m [39m[38;5;12mmodule[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12mclient-side[39m[38;5;12m [39m[38;5;12mreconstruction[39m[38;5;12m [39m[38;5;12mof[39m[38;5;12m [39m[38;5;12mcomposite[39m[38;5;12m [39m[38;5;12mmementos[39m[38;5;12m [39m[38;5;12mby[39m[38;5;12m [39m[38;5;12mrerouting[39m[38;5;12m [39m[38;5;12mresource[39m[38;5;12m [39m[38;5;12mrequests[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12mcorresponding[39m[38;5;12m [39m
|
||
[38;5;12marchived[39m[38;5;12m [39m[38;5;12mcopies[39m[38;5;12m [39m[38;5;12m(JavaScript).[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mReplayWeb.Page[0m[38;5;12m (https://replayweb.page/) - A browser-based, fully client-side replay engine for both local and remote WARC files.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mwarc2html[0m[38;5;12m (https://github.com/iipc/warc2html) - Converts WARC files to static HTML suitable for browsing offline or rehosting.[39m
|
||
|
||
[38;2;255;187;0m[4mSearch & Discovery[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mMink[0m[38;5;12m [39m[38;5;12m(https://github.com/machawk1/mink)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mA[39m[38;5;12m [39m[38;5;14m[1mGoogle[0m[38;5;14m[1m [0m[38;5;14m[1mChrome[0m[38;5;12m [39m[38;5;12m(https://www.google.com/intl/en/chrome/)[39m[38;5;12m [39m[38;5;12mextension[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12mquerying[39m[38;5;12m [39m[38;5;12mMemento[39m[38;5;12m [39m[38;5;12maggregators[39m[38;5;12m [39m[38;5;12mwhile[39m[38;5;12m [39m[38;5;12mbrowsing[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mintegrating[39m[38;5;12m [39m[38;5;12mlive-archived[39m[38;5;12m [39m[38;5;12mweb[39m[38;5;12m [39m[38;5;12mnavigation.[39m[38;5;12m [39m
|
||
[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mplayback[0m[38;5;12m [39m[38;5;12m(https://github.com/wabarc/playback)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mA[39m[38;5;12m [39m[38;5;12mtoolkit[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12msearching[39m[38;5;12m [39m[38;5;12marchived[39m[38;5;12m [39m[38;5;12mwebpages[39m[38;5;12m [39m[38;5;12mfrom[39m[38;5;12m [39m[38;5;14m[1mInternet[0m[38;5;14m[1m [0m[38;5;14m[1mArchive[0m[38;5;12m [39m[38;5;12m(https://web.archive.org),[39m[38;5;12m [39m[38;5;14m[1marchive.today[0m[38;5;12m [39m[38;5;12m(https://archive.today),[39m[38;5;12m [39m[38;5;14m[1mMemento[0m[38;5;12m [39m
|
||
[38;5;12m(http://timetravel.mementoweb.org)[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mbeyond.[39m[38;5;12m [39m[48;2;30;30;40m[38;5;13m[3m(In[0m[48;2;30;30;40m[38;5;13m[3m [0m[48;2;30;30;40m[38;5;13m[3mDevelopment)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSecurityTrails[0m[38;5;12m (https://securitytrails.com/) - Web based archive for WHOIS and DNS records. REST API available free of charge.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mTempas v1[0m[38;5;12m (http://tempas.L3S.de/v1) - Temporal web archive search based on [39m[38;5;14m[1mDelicious[0m[38;5;12m (https://en.wikipedia.org/wiki/Delicious_(website)) tags. [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mTempas[0m[38;5;14m[1m [0m[38;5;14m[1mv2[0m[38;5;12m [39m[38;5;12m(http://tempas.L3S.de/v2)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mTemporal[39m[38;5;12m [39m[38;5;12mweb[39m[38;5;12m [39m[38;5;12marchive[39m[38;5;12m [39m[38;5;12msearch[39m[38;5;12m [39m[38;5;12mbased[39m[38;5;12m [39m[38;5;12mon[39m[38;5;12m [39m[38;5;12mlinks[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12manchor[39m[38;5;12m [39m[38;5;12mtexts[39m[38;5;12m [39m[38;5;12mextracted[39m[38;5;12m [39m[38;5;12mfrom[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12mGerman[39m[38;5;12m [39m[38;5;12mweb[39m[38;5;12m [39m[38;5;12mfrom[39m[38;5;12m [39m[38;5;12m1996[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12m2013[39m[38;5;12m [39m[38;5;12m(results[39m[38;5;12m [39m[38;5;12mare[39m[38;5;12m [39m[38;5;12mnot[39m[38;5;12m [39m[38;5;12mlimited[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12mGerman[39m[38;5;12m [39m[38;5;12mpages,[39m[38;5;12m [39m[38;5;12me.g.,[39m[38;5;12m [39m[38;5;14m[1mObama@2005-2009[0m
|
||
[38;5;14m[1min[0m[38;5;14m[1m [0m[38;5;14m[1mTempas[0m[38;5;12m [39m[38;5;12m(http://tempas.l3s.de/v2/query?q=obama&from=2005&to=2009)).[39m[38;5;12m [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mwebarchive-discovery[0m[38;5;12m (https://github.com/ukwa/webarchive-discovery) - WARC and ARC full-text indexing and discovery tools, with a number of associated tools capable of using the index shown below. [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mShine[0m[38;5;12m [39m[38;5;12m(https://github.com/ukwa/shine)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mA[39m[38;5;12m [39m[38;5;12mprototype[39m[38;5;12m [39m[38;5;12mweb[39m[38;5;12m [39m[38;5;12marchives[39m[38;5;12m [39m[38;5;12mexploration[39m[38;5;12m [39m[38;5;12mUI,[39m[38;5;12m [39m[38;5;12mdeveloped[39m[38;5;12m [39m[38;5;12mwith[39m[38;5;12m [39m[38;5;12mresearchers[39m[38;5;12m [39m[38;5;12mas[39m[38;5;12m [39m[38;5;12mpart[39m[38;5;12m [39m[38;5;12mof[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;14m[1mBig[0m[38;5;14m[1m [0m[38;5;14m[1mUK[0m[38;5;14m[1m [0m[38;5;14m[1mDomain[0m[38;5;14m[1m [0m[38;5;14m[1mData[0m[38;5;14m[1m [0m[38;5;14m[1mfor[0m[38;5;14m[1m [0m[38;5;14m[1mthe[0m[38;5;14m[1m [0m[38;5;14m[1mArts[0m[38;5;14m[1m [0m[38;5;14m[1mand[0m[38;5;14m[1m [0m[38;5;14m[1mHumanities[0m[38;5;14m[1m [0m[38;5;14m[1mproject[0m[38;5;12m [39m
|
||
[38;5;12m(https://buddah.projects.history.ac.uk/).[39m[38;5;12m [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSolrWayback[0m[38;5;12m [39m[38;5;12m(https://github.com/netarchivesuite/solrwayback)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mA[39m[38;5;12m [39m[38;5;12mbackend[39m[38;5;12m [39m[38;5;12mJava[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mfrontend[39m[38;5;12m [39m[38;5;12mVUE[39m[38;5;12m [39m[38;5;12mJS[39m[38;5;12m [39m[38;5;12mproject[39m[38;5;12m [39m[38;5;12mwith[39m[38;5;12m [39m[38;5;12mfreetext[39m[38;5;12m [39m[38;5;12msearch[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12ma[39m[38;5;12m [39m[38;5;12mbuild[39m[38;5;12m [39m[38;5;12min[39m[38;5;12m [39m[38;5;12mplayback[39m[38;5;12m [39m[38;5;12mengine.[39m[38;5;12m [39m[38;5;12mRequire[39m[38;5;12m [39m[38;5;12mWarc[39m[38;5;12m [39m[38;5;12mfiles[39m[38;5;12m [39m[38;5;12mhas[39m[38;5;12m [39m[38;5;12mbeen[39m[38;5;12m [39m[38;5;12mindex[39m[38;5;12m [39m[38;5;12mwith[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m
|
||
[38;5;12mWarc-Indexer.[39m[38;5;12m [39m[38;5;12mThe[39m[38;5;12m [39m[38;5;12mweb[39m[38;5;12m [39m[38;5;12mapplication[39m[38;5;12m [39m[38;5;12malso[39m[38;5;12m [39m[38;5;12mhas[39m[38;5;12m [39m[38;5;12ma[39m[38;5;12m [39m[38;5;12mwide[39m[38;5;12m [39m[38;5;12mrange[39m[38;5;12m [39m[38;5;12mof[39m[38;5;12m [39m[38;5;12mdata[39m[38;5;12m [39m[38;5;12mvisualization[39m[38;5;12m [39m[38;5;12mtools[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mdata[39m[38;5;12m [39m[38;5;12mexport[39m[38;5;12m [39m[38;5;12mtools[39m[38;5;12m [39m[38;5;12mthat[39m[38;5;12m [39m[38;5;12mcan[39m[38;5;12m [39m[38;5;12mbe[39m[38;5;12m [39m[38;5;12mused[39m[38;5;12m [39m[38;5;12mon[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12mwhole[39m[38;5;12m [39m[38;5;12mwebarchive.[39m[38;5;12m [39m[38;5;14m[1mSolrWayback[0m[38;5;14m[1m [0m[38;5;14m[1m4[0m[38;5;14m[1m [0m[38;5;14m[1mBundle[0m[38;5;14m[1m [0m[38;5;14m[1mrelease[0m[38;5;12m [39m
|
||
[38;5;12m(https://github.com/netarchivesuite/solrwayback/releases)[39m[38;5;12m [39m[38;5;12mcontains[39m[38;5;12m [39m[38;5;12mall[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12msoftware[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mdependencies[39m[38;5;12m [39m[38;5;12min[39m[38;5;12m [39m[38;5;12man[39m[38;5;12m [39m[38;5;12mout-of-the[39m[38;5;12m [39m[38;5;12mbox[39m[38;5;12m [39m[38;5;12msolution[39m[38;5;12m [39m[38;5;12mthat[39m[38;5;12m [39m[38;5;12mis[39m[38;5;12m [39m[38;5;12measy[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12minstall.[39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mWarclight[0m[38;5;12m (https://github.com/archivesunleashed/warclight) - A Project Blacklight based Rails engine that supports the discovery of web archives held in the WARC and ARC formats. [39m[48;2;30;30;40m[38;5;13m[3m(In Development)[0m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mWasp[0m[38;5;12m (https://github.com/webis-de/wasp) - A fully functional prototype of a personal [39m[38;5;14m[1mweb archive and search system[0m[38;5;12m (http://ceur-ws.org/Vol-2167/paper6.pdf). [39m[48;2;30;30;40m[38;5;13m[3m(In Development)[0m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;12mOther possible options for builting a front-end are listed on in the [39m[48;5;235m[38;5;249mwebarchive-discovery[49m[39m[38;5;12m wiki, [39m[38;5;14m[1mhere[0m[38;5;12m (https://github.com/ukwa/webarchive-discovery/wiki/Front-ends).[39m
|
||
|
||
[38;2;255;187;0m[4mUtilities[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mArchiveTools[0m[38;5;12m (https://github.com/recrm/ArchiveTools) - Collection of tools to extract and interact with WARC files (Python).[39m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mcdx-toolkit[0m[38;5;12m (https://pypi.org/project/cdx-toolkit/) - Library and CLI to consult cdx indexes and create WARC extractions of subsets. Abstracts away Common Crawl's unusual crawl structure. [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mGo Get Crawl[0m[38;5;12m (https://github.com/karust/gogetcrawl) - Extract web archive data using [39m[38;5;14m[1mWayback Machine[0m[38;5;12m (https://web.archive.org/) and [39m[38;5;14m[1mCommon Crawl[0m[38;5;12m (https://commoncrawl.org/). [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mgowarcserver[0m[38;5;12m (https://github.com/nlnwa/gowarcserver) - [39m[38;5;14m[1mBadgerDB[0m[38;5;12m (https://github.com/dgraph-io/badger)-based capture index (CDX) and WARC record server, used to index and serve WARC files (Go).[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mhar2warc[0m[38;5;12m (https://github.com/webrecorder/har2warc) - Convert HTTP Archive (HAR) -> Web Archive (WARC) format (Python).[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mhttpreserve.info[0m[38;5;12m [39m[38;5;12m(https://httpreserve.info/)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mService[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12mreturn[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12mstatus[39m[38;5;12m [39m[38;5;12mof[39m[38;5;12m [39m[38;5;12ma[39m[38;5;12m [39m[38;5;12mweb[39m[38;5;12m [39m[38;5;12mpage[39m[38;5;12m [39m[38;5;12mor[39m[38;5;12m [39m[38;5;12msave[39m[38;5;12m [39m[38;5;12mit[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12mInternet[39m[38;5;12m [39m[38;5;12mArchive.[39m[38;5;12m [39m[38;5;12mHTTPreserve[39m[38;5;12m [39m[38;5;12mincludes[39m[38;5;12m [39m[38;5;12mdisambiguation[39m[38;5;12m [39m[38;5;12mof[39m[38;5;12m [39m[38;5;12mwell-known[39m[38;5;12m [39m[38;5;12mshort[39m[38;5;12m [39m[38;5;12mlink[39m[38;5;12m [39m[38;5;12mservices.[39m[38;5;12m [39m[38;5;12mIt[39m[38;5;12m [39m[38;5;12mreturns[39m[38;5;12m [39m
|
||
[38;5;12mJSON[39m[38;5;12m [39m[38;5;12mvia[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12mbrowser[39m[38;5;12m [39m[38;5;12mor[39m[38;5;12m [39m[38;5;12mcommand[39m[38;5;12m [39m[38;5;12mline[39m[38;5;12m [39m[38;5;12mvia[39m[38;5;12m [39m[38;5;12mCURL[39m[38;5;12m [39m[38;5;12musing[39m[38;5;12m [39m[38;5;12mGET.[39m[38;5;12m [39m[38;5;12mDescribes[39m[38;5;12m [39m[38;5;12mweb[39m[38;5;12m [39m[38;5;12msites[39m[38;5;12m [39m[38;5;12musing[39m[38;5;12m [39m[38;5;12mearliest[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mlatest[39m[38;5;12m [39m[38;5;12mdates[39m[38;5;12m [39m[38;5;12min[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12mInternet[39m[38;5;12m [39m[38;5;12mArchive[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mdemonstrates[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12mconstruction[39m[38;5;12m [39m[38;5;12mof[39m[38;5;12m [39m[38;5;12mRobust[39m[38;5;12m [39m[38;5;12mLinks[39m[38;5;12m [39m[38;5;12min[39m[38;5;12m [39m[38;5;12mits[39m[38;5;12m [39m[38;5;12moutput[39m[38;5;12m [39m[38;5;12musing[39m[38;5;12m [39m[38;5;12mthat[39m[38;5;12m [39m
|
||
[38;5;12mrange.[39m[38;5;12m [39m[38;5;12m(Golang).[39m[38;5;12m [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mHTTPreserve[0m[38;5;14m[1m [0m[38;5;14m[1mlinkstat[0m[38;5;12m [39m[38;5;12m(https://github.com/httpreserve/linkstat)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mCommand[39m[38;5;12m [39m[38;5;12mline[39m[38;5;12m [39m[38;5;12mimplementation[39m[38;5;12m [39m[38;5;12mof[39m[38;5;12m [39m[38;5;14m[1mhttpreserve.info[0m[38;5;12m [39m[38;5;12m(https://httpreserve.info)[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12mdescribe[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12mstatus[39m[38;5;12m [39m[38;5;12mof[39m[38;5;12m [39m[38;5;12ma[39m[38;5;12m [39m[38;5;12mweb[39m[38;5;12m [39m[38;5;12mpage.[39m[38;5;12m [39m[38;5;12mCan[39m[38;5;12m [39m[38;5;12mbe[39m[38;5;12m [39m[38;5;12measily[39m[38;5;12m [39m[38;5;12mscripted[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m
|
||
[38;5;12mprovides[39m[38;5;12m [39m[38;5;12mJSON[39m[38;5;12m [39m[38;5;12moutput[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12menable[39m[38;5;12m [39m[38;5;12mquerying[39m[38;5;12m [39m[38;5;12mthrough[39m[38;5;12m [39m[38;5;12mtools[39m[38;5;12m [39m[38;5;12mlike[39m[38;5;12m [39m[38;5;12mJQ.[39m[38;5;12m [39m[38;5;12mHTTPreserve[39m[38;5;12m [39m[38;5;12mLinkstat[39m[38;5;12m [39m[38;5;12mdescribes[39m[38;5;12m [39m[38;5;12mcurrent[39m[38;5;12m [39m[38;5;12mstatus,[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mearliest[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mlatest[39m[38;5;12m [39m[38;5;12mlinks[39m[38;5;12m [39m[38;5;12mon[39m[38;5;12m [39m[38;5;14m[1marchive.org[0m[38;5;12m [39m[38;5;12m(https://archive.org/).[39m[38;5;12m [39m[38;5;12m(Golang).[39m[38;5;12m [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mInternet Archive Library[0m[38;5;12m (https://github.com/jjjake/internetarchive) - A command line tool and Python library for interacting directly with [39m[38;5;14m[1marchive.org[0m[38;5;12m (https://archive.org). (Python). [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mhttrack2warc[0m[38;5;12m (https://github.com/nla/httrack2warc) - Convert HTTrack archives to WARC format (Java).[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mMementoMap[0m[38;5;12m (https://github.com/oduwsdl/MementoMap) - A Tool to Summarize Web Archive Holdings (Python). [39m[48;2;30;30;40m[38;5;13m[3m(In Development)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mMemGator[0m[38;5;12m (https://github.com/oduwsdl/MemGator) - A Memento Aggregator CLI and Server (Golang). [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mnode-cdxj[0m[38;5;12m (https://github.com/N0taN3rd/node-cdxj) - [39m[38;5;14m[1mCDXJ[0m[38;5;12m (https://github.com/oduwsdl/ORS/wiki/CDXJ) file parser (Node.js). [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mOutbackCDX[0m[38;5;12m [39m[38;5;12m(https://github.com/nla/outbackcdx)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mRocksDB-based[39m[38;5;12m [39m[38;5;12mcapture[39m[38;5;12m [39m[38;5;12mindex[39m[38;5;12m [39m[38;5;12m(CDX)[39m[38;5;12m [39m[38;5;12mserver[39m[38;5;12m [39m[38;5;12msupporting[39m[38;5;12m [39m[38;5;12mincremental[39m[38;5;12m [39m[38;5;12mupdates[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mcompression.[39m[38;5;12m [39m[38;5;12mCan[39m[38;5;12m [39m[38;5;12mbe[39m[38;5;12m [39m[38;5;12mused[39m[38;5;12m [39m[38;5;12mas[39m[38;5;12m [39m[38;5;12mbackend[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12mOpenWayback,[39m[38;5;12m [39m[38;5;12mPyWb[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;14m[1mHeritrix[0m[38;5;12m [39m
|
||
[38;5;12m(https://github.com/ukwa/ukwa-heritrix/blob/master/src/main/java/uk/bl/wap/modules/uriuniqfilters/OutbackCDXRecentlySeenUriUniqFilter.java).[39m[38;5;12m [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mpy-wasapi-client[0m[38;5;12m (https://github.com/unt-libraries/py-wasapi-client) - Command line application to download crawls from WASAPI (Python). [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mThe[0m[38;5;14m[1m [0m[38;5;14m[1mArchive[0m[38;5;14m[1m [0m[38;5;14m[1mBrowser[0m[38;5;12m [39m[38;5;12m(https://archivebrowser.c3.cx/)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mThe[39m[38;5;12m [39m[38;5;12mArchive[39m[38;5;12m [39m[38;5;12mBrowser[39m[38;5;12m [39m[38;5;12mis[39m[38;5;12m [39m[38;5;12ma[39m[38;5;12m [39m[38;5;12mprogram[39m[38;5;12m [39m[38;5;12mthat[39m[38;5;12m [39m[38;5;12mlets[39m[38;5;12m [39m[38;5;12myou[39m[38;5;12m [39m[38;5;12mbrowse[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12mcontents[39m[38;5;12m [39m[38;5;12mof[39m[38;5;12m [39m[38;5;12marchives,[39m[38;5;12m [39m[38;5;12mas[39m[38;5;12m [39m[38;5;12mwell[39m[38;5;12m [39m[38;5;12mas[39m[38;5;12m [39m[38;5;12mextract[39m[38;5;12m [39m[38;5;12mthem.[39m[38;5;12m [39m[38;5;12mIt[39m[38;5;12m [39m[38;5;12mwill[39m[38;5;12m [39m[38;5;12mlet[39m[38;5;12m [39m[38;5;12myou[39m[38;5;12m [39m[38;5;12mopen[39m[38;5;12m [39m[38;5;12mfiles[39m[38;5;12m [39m[38;5;12mfrom[39m[38;5;12m [39m[38;5;12minside[39m[38;5;12m [39m[38;5;12marchives,[39m[38;5;12m [39m
|
||
[38;5;12mand[39m[38;5;12m [39m[38;5;12mlets[39m[38;5;12m [39m[38;5;12myou[39m[38;5;12m [39m[38;5;12mpreview[39m[38;5;12m [39m[38;5;12mthem[39m[38;5;12m [39m[38;5;12musing[39m[38;5;12m [39m[38;5;12mQuick[39m[38;5;12m [39m[38;5;12mLook.[39m[38;5;12m [39m[38;5;12mWARC[39m[38;5;12m [39m[38;5;12mis[39m[38;5;12m [39m[38;5;12msupported[39m[38;5;12m [39m[38;5;12m(macOS[39m[38;5;12m [39m[38;5;12monly,[39m[38;5;12m [39m[38;5;12mProprietary[39m[38;5;12m [39m[38;5;12mapp).[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mThe Unarchiver[0m
|
||
[38;5;12m (http://unarchiver.c3.cx/unarchiver) - Program to extract the contents of many archive formats, inclusive of WARC, to a file system. Free variant of The Archive Browser (macOS only, Proprietary app).[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mtikalinkextract[0m[38;5;12m [39m[38;5;12m(https://github.com/httpreserve/tikalinkextract)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mExtract[39m[38;5;12m [39m[38;5;12mhyperlinks[39m[38;5;12m [39m[38;5;12mas[39m[38;5;12m [39m[38;5;12ma[39m[38;5;12m [39m[38;5;12mseed[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12mweb[39m[38;5;12m [39m[38;5;12marchiving[39m[38;5;12m [39m[38;5;12mfrom[39m[38;5;12m [39m[38;5;12mfolders[39m[38;5;12m [39m[38;5;12mof[39m[38;5;12m [39m[38;5;12mdocument[39m[38;5;12m [39m[38;5;12mtypes[39m[38;5;12m [39m[38;5;12mthat[39m[38;5;12m [39m[38;5;12mcan[39m[38;5;12m [39m[38;5;12mbe[39m[38;5;12m [39m[38;5;12mparsed[39m[38;5;12m [39m[38;5;12mby[39m[38;5;12m [39m[38;5;12mApache[39m[38;5;12m [39m[38;5;12mTika[39m[38;5;12m [39m[38;5;12m(Golang,[39m[38;5;12m [39m[38;5;12mApache[39m[38;5;12m [39m[38;5;12mTika[39m[38;5;12m [39m[38;5;12mServer).[39m[38;5;12m [39m
|
||
[48;2;30;30;40m[38;5;13m[3m(In[0m[48;2;30;30;40m[38;5;13m[3m [0m[48;2;30;30;40m[38;5;13m[3mDevelopment)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mwasapi-downloader[0m[38;5;12m (https://github.com/sul-dlss/wasapi-downloader) - Java command line application to download crawls from WASAPI. [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mWarchaeology[0m[38;5;12m (https://nlnwa.github.io/warchaeology/) - Warchaeology is a collection of tools for inspecting, manipulating, deduplicating and validating WARC-files. [39m[48;2;30;30;40m[38;5;13m[3mStable[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mwarcdb[0m[38;5;12m (https://github.com/florents-Tselai/warcdb) - A command line utility (Python) for importing WARC files into a SQLite database. [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mwarcdedupe[0m[38;5;12m (https://gitlab.com/taricorp/warcdedupe) - WARC deduplication tool (and WARC library) written in Rust. (In Development)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mWarcPartitioner[0m[38;5;12m (https://github.com/helgeho/WarcPartitioner) - Partition (W)ARC Files by MIME Type and Year. [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mwarcrefs[0m[38;5;12m (https://github.com/arcalex/warcrefs) - Web archive deduplication tools. [39m[48;2;30;30;40m[38;5;13m[3mStable[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mwebarchive-indexing[0m[38;5;12m (https://github.com/ikreymer/webarchive-indexing) - Tools for bulk indexing of WARC/ARC files on Hadoop, EMR or local file system.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mwikiteam[0m[38;5;12m (https://github.com/WikiTeam/wikiteam) - Tools for downloading and preserving wikis. [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
|
||
[38;2;255;187;0m[4mWARC I/O Libraries[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mFastWARC[0m[38;5;12m (https://github.com/chatnoir-eu/chatnoir-resiliparse) - A high-performance WARC parsing library (Python).[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mHadoopConcatGz[0m[38;5;12m (https://github.com/helgeho/HadoopConcatGz) - A Splitable Hadoop InputFormat for Concatenated GZIP Files (and [39m[48;5;235m[38;5;249m*.warc.gz[49m[39m[38;5;12m). [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mjwarc[0m[38;5;12m (https://github.com/iipc/jwarc) - Read and write WARC files with a type safe API (Java).[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mJwat[0m[38;5;12m (https://sbforge.org/display/JWAT/JWAT) - Libraries and tools for reading/writing/validating WARC/ARC/GZIP files (Java). [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mnode-warc[0m[38;5;12m [39m[38;5;12m(https://github.com/N0taN3rd/node-warc)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mParse[39m[38;5;12m [39m[38;5;12mWARC[39m[38;5;12m [39m[38;5;12mfiles[39m[38;5;12m [39m[38;5;12mor[39m[38;5;12m [39m[38;5;12mcreate[39m[38;5;12m [39m[38;5;12mWARC[39m[38;5;12m [39m[38;5;12mfiles[39m[38;5;12m [39m[38;5;12musing[39m[38;5;12m [39m[38;5;12meither[39m[38;5;12m [39m[38;5;14m[1mElectron[0m[38;5;12m [39m[38;5;12m(https://electron.atom.io/)[39m[38;5;12m [39m[38;5;12mor[39m[38;5;12m [39m[38;5;14m[1mchrome-remote-interface[0m[38;5;12m [39m
|
||
[38;5;12m(https://github.com/cyrus-and/chrome-remote-interface)[39m[38;5;12m [39m[38;5;12m(Node.js).[39m[38;5;12m [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSparkling[0m[38;5;12m (https://github.com/internetarchive/Sparkling) - Internet Archive's Sparkling Data Processing Library. [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mUnwarcit[0m[38;5;12m (https://github.com/emmadickson/unwarcit) - Command line interface to unzip WARC and WACZ files (Python).[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mWarcat[0m[38;5;12m (https://github.com/chfoo/warcat) - Tool and library for handling Web ARChive (WARC) files (Python). [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mwarcio[0m[38;5;12m (https://github.com/webrecorder/warcio) - Streaming WARC/ARC library for fast web archive IO (Python). [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mwarctools[0m[38;5;12m (https://github.com/internetarchive/warctools) - Library to work with ARC and WARC files (Python).[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mwebarchive[0m[38;5;12m (https://github.com/richardlehane/webarchive) - Golang readers for ARC and WARC webarchive formats (Golang).[39m
|
||
|
||
[38;2;255;187;0m[4mAnalysis[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mArchives Research Compute Hub[0m[38;5;12m (https://github.com/internetarchive/arch) - Web application for distributed compute analysis of Archive-It web archive collections. [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mArchiveSpark[0m[38;5;12m (https://github.com/helgeho/ArchiveSpark) - An Apache Spark framework (not only) for Web Archives that enables easy data processing, extraction as well as derivation. [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mArchives[0m[38;5;14m[1m [0m[38;5;14m[1mUnleashed[0m[38;5;14m[1m [0m[38;5;14m[1mNotebooks[0m[38;5;12m [39m[38;5;12m(https://github.com/archivesunleashed/notebooks)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mNotebooks[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12mworking[39m[38;5;12m [39m[38;5;12mwith[39m[38;5;12m [39m[38;5;12mweb[39m[38;5;12m [39m[38;5;12marchives[39m[38;5;12m [39m[38;5;12mwith[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12mArchives[39m[38;5;12m [39m[38;5;12mUnleashed[39m[38;5;12m [39m[38;5;12mToolkit,[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mderivatives[39m[38;5;12m [39m[38;5;12mgenerated[39m[38;5;12m [39m[38;5;12mby[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12mArchives[39m[38;5;12m [39m[38;5;12mUnleashed[39m[38;5;12m [39m
|
||
[38;5;12mToolkit.[39m[38;5;12m [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mArchives Unleashed Toolkit[0m[38;5;12m (https://github.com/archivesunleashed/aut) - Archives Unleashed Toolkit (AUT) is an open-source platform for analyzing web archives with Apache Spark. [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mCommon Crawl Columnar Index[0m[38;5;12m (https://commoncrawl.org/tag/columnar-index/) - SQL-queryable index, with CDX info plus language classification. [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mCommon Crawl Web Graph[0m[38;5;12m (https://commoncrawl.org/category/web-graph/) - A host or domain-level graph of the web, with ranking information. [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mCommon Crawl Jupyter notebooks[0m[38;5;12m (https://github.com/commoncrawl/cc-notebooks) - A collection of notebooks using Common Crawl's various datasets. [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mTweet Archvies Unleashed Toolkit[0m[38;5;12m (https://github.com/archivesunleashed/twut) - An open-source toolkit for analyzing line-oriented JSON Twitter archives with Apache Spark. [39m[48;2;30;30;40m[38;5;13m[3m(In Development)[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mWeb Data Commons[0m[38;5;12m (http://webdatacommons.org/) - Structured data extracted from Common Crawl. [39m[48;2;30;30;40m[38;5;13m[3m(Stable)[0m
|
||
|
||
[38;2;255;187;0m[4mQuality Assurance[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mChrome Check My Links[0m[38;5;12m (https://chrome.google.com/webstore/detail/check-my-links/ojkcdipcgfaekbeaelaapakgnjflfglf) - Browser extension: a link checker with more options.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mChrome link checker[0m[38;5;12m (https://chrome.google.com/webstore/detail/link-checker/aibjbgmpmnidnmagaefhmcjhadpffaoi) - Browser extension: basic link checker.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mChrome link gopher[0m[38;5;12m (https://chrome.google.com/webstore/detail/bpjdkodgnbfalgghnbeggfbfjpcfamkf/publish-accepted?hl=en-US&gl=US) - Browser extension: link harvester on a page.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mChrome Open Multiple URLs[0m[38;5;12m (https://chrome.google.com/webstore/detail/open-multiple-urls/oifijhaokejakekmnjmphonojcfkpbbh?hl=de) - Browser extension: opens multiple URLs and also extracts URLs from text.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mChrome Revolver[0m[38;5;12m (https://chrome.google.com/webstore/detail/revolver-tabs/dlknooajieciikpedpldejhhijacnbda) - Browser extension: switches between browser tabs.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mFlameShot[0m[38;5;12m (https://github.com/lupoDharkael/flameshot) - Screen capture and annotation on Ubuntu.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mPlayOnLinux[0m[38;5;12m (https://www.playonlinux.com/en/) - For running Xenu and Notepad++ on Ubuntu.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mPlayOnMac[0m[38;5;12m (https://www.playonmac.com/en/) - For running Xenu and Notepad++ on macOS.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mWindows[0m[38;5;14m[1m [0m[38;5;14m[1mSnipping[0m[38;5;14m[1m [0m[38;5;14m[1mTool[0m[38;5;12m [39m[38;5;12m(https://support.microsoft.com/en-gb/help/13776/windows-use-snipping-tool-to-capture-screenshots)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mWindows[39m[38;5;12m [39m[38;5;12mbuilt-in[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12mpartial[39m[38;5;12m [39m[38;5;12mscreen[39m[38;5;12m [39m[38;5;12mcapture[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mannotation.[39m[38;5;12m [39m[38;5;12mOn[39m[38;5;12m [39m[38;5;12mmacOS[39m[38;5;12m [39m[38;5;12myou[39m[38;5;12m [39m[38;5;12mcan[39m[38;5;12m [39m[38;5;12muse[39m[38;5;12m [39m
|
||
[38;5;12mCommand[39m[38;5;12m [39m[38;5;12m+[39m[38;5;12m [39m[38;5;12mShift[39m[38;5;12m [39m[38;5;12m+[39m[38;5;12m [39m[38;5;12m4[39m[38;5;12m [39m[38;5;12m(keyboard[39m[38;5;12m [39m[38;5;12mshortcut[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12mtaking[39m[38;5;12m [39m[38;5;12mpartial[39m[38;5;12m [39m[38;5;12mscreen[39m[38;5;12m [39m[38;5;12mcapture).[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mWineBottler[0m[38;5;12m (http://winebottler.kronenberg.org/) - For running Xenu and Notepad++ on macOS.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mxDoTool[0m[38;5;12m (https://github.com/jordansissel/xdotool) - Click automation on Ubuntu.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mXenu[0m[38;5;12m (http://home.snafu.de/tilman/xenulink.html) - Desktop link checker for Windows.[39m
|
||
|
||
[38;2;255;187;0m[4mCuration[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mZotero[0m[38;5;14m[1m [0m[38;5;14m[1mRobust[0m[38;5;14m[1m [0m[38;5;14m[1mLinks[0m[38;5;14m[1m [0m[38;5;14m[1mExtension[0m[38;5;12m [39m[38;5;12m(https://robustlinks.mementoweb.org/zotero/)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mA[39m[38;5;12m [39m[38;5;14m[1mZotero[0m[38;5;12m [39m[38;5;12m(https://www.zotero.org/)[39m[38;5;12m [39m[38;5;12mextension[39m[38;5;12m [39m[38;5;12mthat[39m[38;5;12m [39m[38;5;12msubmits[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mreads[39m[38;5;12m [39m[38;5;12mfrom[39m[38;5;12m [39m[38;5;12mweb[39m[38;5;12m [39m[38;5;12marchives.[39m[38;5;12m [39m[38;5;12mSource[39m[38;5;12m [39m[38;5;14m[1mon[0m[38;5;14m[1m [0m[38;5;14m[1mGitHub[0m[38;5;12m [39m
|
||
[38;5;12m(https://github.com/lanl/Zotero-Robust-Links-Extension).[39m[38;5;12m [39m[38;5;12mSupercedes[39m[38;5;12m [39m[38;5;14m[1mleonkt/zotero-memento[0m[38;5;12m [39m[38;5;12m(https://github.com/leonkt/zotero-memento).[39m
|
||
|
||
[38;2;255;187;0m[4mCommunity Resources[0m
|
||
|
||
[38;2;255;187;0m[4mOther Awesome Lists[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mWeb Archiving Community[0m[38;5;12m (https://github.com/pirate/ArchiveBox/wiki/Web-Archiving-Community)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mAwesome Memento[0m[38;5;12m (https://github.com/machawk1/awesome-memento)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mThe WARC Ecosystem[0m[38;5;12m (http://www.archiveteam.org/index.php?title=The_WARC_Ecosystem)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mThe Web Crawl section of COPTR[0m[38;5;12m (http://coptr.digipres.org/Category:Web_Crawl)[39m
|
||
|
||
[38;2;255;187;0m[4mBlogs and Scholarship[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mIIPC Blog[0m[38;5;12m (https://netpreserveblog.wordpress.com/)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mWeb[0m[38;5;14m[1m [0m[38;5;14m[1mArchiving[0m[38;5;14m[1m [0m[38;5;14m[1mRoundtable[0m[38;5;12m [39m[38;5;12m(https://webarchivingrt.wordpress.com/)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;12mUnofficial[39m[38;5;12m [39m[38;5;12mblog[39m[38;5;12m [39m[38;5;12mof[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12mWeb[39m[38;5;12m [39m[38;5;12mArchiving[39m[38;5;12m [39m[38;5;12mRoundtable[39m[38;5;12m [39m[38;5;12mof[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;14m[1mSociety[0m[38;5;14m[1m [0m[38;5;14m[1mof[0m[38;5;14m[1m [0m[38;5;14m[1mAmerican[0m[38;5;14m[1m [0m[38;5;14m[1mArchivists[0m[38;5;12m [39m[38;5;12m(https://www2.archivists.org/)[39m[38;5;12m [39m[38;5;12mmaintained[39m[38;5;12m [39m[38;5;12mby[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12mmembers[39m
|
||
[38;5;12mof[39m[38;5;12m [39m[38;5;12mthe[39m[38;5;12m [39m[38;5;12mWeb[39m[38;5;12m [39m[38;5;12mArchiving[39m[38;5;12m [39m[38;5;12mRoundtable.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mThe Web as History[0m[38;5;12m (https://www.uclpress.co.uk/products/84010) - An open-source book that provides a conceptual overview to web archiving research, as well as several case studies.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mWS-DL Blog[0m[38;5;12m (https://ws-dl.blogspot.com/) - Web Science and Digital Libraries Research Group blogs about various Web archiving related topics, scholarly work, and academic trip reports.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDSHR's Blog[0m[38;5;12m (https://blog.dshr.org/) - David Rosenthal regularly reviews and summarizes work done in the Digital Preservation field.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mUK Web Archive Blog[0m[38;5;12m (https://blogs.bl.uk/webarchive/)[39m
|
||
|
||
[38;2;255;187;0m[4mMailing Lists[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mCommon Crawl[0m[38;5;12m (https://groups.google.com/g/common-crawl)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mIIPC[0m[38;5;12m (http://netpreserve.org/about-us/iipc-mailing-list/)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mOpenWayback[0m[38;5;12m (https://groups.google.com/g/openwayback-dev)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mWASAPI[0m[38;5;12m (https://groups.google.com/g/wasapi-community)[39m
|
||
|
||
[38;2;255;187;0m[4mSlack[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mIIPC Slack[0m[38;5;12m (https://iipc.slack.com/) - Ask [39m[38;5;14m[1m@netpreserve[0m[38;5;12m (https://twitter.com/NetPreserve?s=20) for access.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mArchives Unleashed Slack[0m[38;5;12m (https://archivesunleashed.slack.com/) - [39m[38;5;14m[1mFill out this request form[0m[38;5;12m (http://slack.archivesunleashed.org/) for access to a researcher group of people working with web archives.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mArchivers[0m[38;5;14m[1m [0m[38;5;14m[1mSlack[0m[38;5;12m [39m[38;5;12m(https://archivers.slack.com)[39m[38;5;12m [39m[38;5;12m-[39m[38;5;12m [39m[38;5;14m[1mInvite[0m[38;5;14m[1m [0m[38;5;14m[1myourself[0m[38;5;12m [39m[38;5;12m(https://archivers-slack.herokuapp.com/)[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12ma[39m[38;5;12m [39m[38;5;12mmulti-disciplinary[39m[38;5;12m [39m[38;5;12meffort[39m[38;5;12m [39m[38;5;12mfor[39m[38;5;12m [39m[38;5;12marchiving[39m[38;5;12m [39m[38;5;12mprojects[39m[38;5;12m [39m[38;5;12mrun[39m[38;5;12m [39m[38;5;12min[39m[38;5;12m [39m[38;5;12maffiliation[39m[38;5;12m [39m[38;5;12mwith[39m[38;5;12m [39m[38;5;14m[1mEDGI[0m[38;5;12m [39m
|
||
[38;5;12m(https://envirodatagov.org/archiving/)[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;14m[1mData[0m[38;5;14m[1m [0m[38;5;14m[1mTogether[0m[38;5;12m [39m[38;5;12m(http://datatogether.org/).[39m
|
||
|
||
[38;2;255;187;0m[4mTwitter[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1m@NetPreserve[0m[38;5;12m (https://twitter.com/NetPreserve) - Official IIPC handle.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1m@WebSciDL[0m[38;5;12m (https://twitter.com/WebSciDL) - ODU Web Science and Digital Libraries Research Group.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1m#WebArchiving[0m[38;5;12m (https://twitter.com/search?q=%23webarchiving)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1m#WebArchiveWednesday[0m[38;5;12m (https://twitter.com/hashtag/webarchivewednesday)[39m
|
||
|
||
[38;2;255;187;0m[4mWeb Archiving Service Providers[0m
|
||
|
||
[38;5;12mThe[39m[38;5;12m [39m[38;5;12mintention[39m[38;5;12m [39m[38;5;12mis[39m[38;5;12m [39m[38;5;12mthat[39m[38;5;12m [39m[38;5;12mwe[39m[38;5;12m [39m[38;5;12monly[39m[38;5;12m [39m[38;5;12mlist[39m[38;5;12m [39m[38;5;12mservices[39m[38;5;12m [39m[38;5;12mthat[39m[38;5;12m [39m[38;5;12mallow[39m[38;5;12m [39m[38;5;12mweb[39m[38;5;12m [39m[38;5;12marchives[39m[38;5;12m [39m[38;5;12mto[39m[38;5;12m [39m[38;5;12mbe[39m[38;5;12m [39m[38;5;12mexported[39m[38;5;12m [39m[38;5;12min[39m[38;5;12m [39m[38;5;12mstandard[39m[38;5;12m [39m[38;5;12mformats[39m[38;5;12m [39m[38;5;12m(WARC[39m[38;5;12m [39m[38;5;12mor[39m[38;5;12m [39m[38;5;12mWACZ).[39m[38;5;12m [39m[38;5;12mBut[39m[38;5;12m [39m[38;5;12mthis[39m[38;5;12m [39m[38;5;12mis[39m[38;5;12m [39m[38;5;12mnot[39m[38;5;12m [39m[38;5;12man[39m[38;5;12m [39m[38;5;12mendorsement[39m[38;5;12m [39m[38;5;12mof[39m[38;5;12m [39m[38;5;12mthese[39m[38;5;12m [39m[38;5;12mservices,[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mreaders[39m[38;5;12m [39m[38;5;12mshould[39m[38;5;12m [39m[38;5;12mcheck[39m[38;5;12m [39m[38;5;12mand[39m[38;5;12m [39m[38;5;12mevaluate[39m[38;5;12m [39m
|
||
[38;5;12mthese[39m[38;5;12m [39m[38;5;12moptions[39m[38;5;12m [39m[38;5;12mbased[39m[38;5;12m [39m[38;5;12mon[39m[38;5;12m [39m[38;5;12mtheir[39m[38;5;12m [39m[38;5;12mneeds.[39m[38;5;12m [39m
|
||
|
||
[38;2;255;187;0m[4mSelf-hostable, Open Source[0m
|
||
|
||
[48;2;30;30;40m[38;5;13m[3m [0m[48;2;30;30;40m[38;5;14m[1m[3mBrowsertrix Cloud[0m[48;2;30;30;40m[38;5;13m[3m (https://browsertrix.cloud/) - From [0m[48;2;30;30;40m[38;5;14m[1m[3mWebrecorder[0m[48;2;30;30;40m[38;5;13m[3m (https://webrecorder.net/), source available at .[0m
|
||
[48;2;30;30;40m[38;5;13m[3m [0m[48;2;30;30;40m[38;5;14m[1m[3mConifer[0m[48;2;30;30;40m[38;5;13m[3m (https://conifer.rhizome.org/) - From [0m[48;2;30;30;40m[38;5;14m[1m[3mRhizome[0m[48;2;30;30;40m[38;5;13m[3m (https://rhizome.org/), source available at .[0m
|
||
|
||
[38;2;255;187;0m[4mHosted, Closed Source[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mArchive-It[0m[38;5;12m (https://archive-it.org/) - From the Internet Archive.[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mArkiwera[0m[38;5;12m (https://arkiwera.se/wp/websites/)[39m
|
||
[48;2;30;30;40m[38;5;13m[3m [0m[48;2;30;30;40m[38;5;14m[1m[3mHanzo[0m[48;2;30;30;40m[38;5;13m[3m (https://www.hanzo.co/chronicle)[0m
|
||
[48;2;30;30;40m[38;5;13m[3m [0m[48;2;30;30;40m[38;5;14m[1m[3mMirrorWeb[0m[48;2;30;30;40m[38;5;13m[3m (https://www.mirrorweb.com/solutions/capabilities/website-archiving)[0m
|
||
[48;2;30;30;40m[38;5;13m[3m [0m[48;2;30;30;40m[38;5;14m[1m[3mPageFreezer[0m[48;2;30;30;40m[38;5;13m[3m (https://www.pagefreezer.com/)[0m
|
||
[48;2;30;30;40m[38;5;13m[3m [0m[48;2;30;30;40m[38;5;14m[1m[3mSmarsh[0m[48;2;30;30;40m[38;5;13m[3m (https://www.smarsh.com/platform/compliance-management/web-archive)[0m
|